""" PostGIS Geo API for Heritage Custodian Map FastAPI backend providing spatial queries for bronhouder.nl map Mounted at /api/geo/ via Caddy reverse proxy. Endpoints: - GET / - Health check and geo statistics - GET /provinces - Get all provinces as GeoJSON - GET /municipalities - Get municipalities (with filters) - GET /institutions - Get institutions as GeoJSON (with bbox/type filters) - GET /institution/:ghcid - Get single institution details - GET /historical - Get historical boundaries - GET /search - Search institutions by name - GET /admin/point - Find admin unit for a point - GET /nearby - Find institutions near a point - GET /stats/by-type - Institution counts by type - GET /stats/by-province - Institution counts by province """ import os import json from datetime import datetime from typing import Optional, List, Dict, Any from contextlib import asynccontextmanager from decimal import Decimal from fastapi import FastAPI, HTTPException, Query, APIRouter from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from pydantic import BaseModel, Field import asyncpg # ============================================================================ # Configuration # ============================================================================ class GeoSettings(BaseModel): """PostGIS geo database settings""" host: str = os.getenv("GEO_POSTGRES_HOST", "localhost") port: int = int(os.getenv("GEO_POSTGRES_PORT", "5432")) database: str = os.getenv("GEO_POSTGRES_DB", "glam_geo") user: str = os.getenv("GEO_POSTGRES_USER", "glam_api") password: str = os.getenv("GEO_POSTGRES_PASSWORD", "glam_secret_2025") # Server settings api_host: str = os.getenv("GEO_API_HOST", "0.0.0.0") api_port: int = int(os.getenv("GEO_API_PORT", "8002")) settings = GeoSettings() # ============================================================================ # Pydantic Models # ============================================================================ class GeoStatsResponse(BaseModel): """Geo database statistics""" status: str database: str provinces: int municipalities: int institutions: int historical_boundaries: int postgres_version: str class InstitutionDetail(BaseModel): """Detailed institution information""" ghcid: str name: str type: str type_name: Optional[str] lat: Optional[float] lon: Optional[float] address: Optional[str] city: Optional[str] province: Optional[str] website: Optional[str] phone: Optional[str] wikidata_id: Optional[str] rating: Optional[float] total_ratings: Optional[int] description: Optional[str] reviews: Optional[List[Dict]] genealogiewerkbalk: Optional[Dict] class AdminPoint(BaseModel): """Admin unit for a point""" province_code: Optional[str] province_name: Optional[str] municipality_code: Optional[str] municipality_name: Optional[str] class NearbyInstitution(BaseModel): """Institution with distance""" ghcid: str name: str type: str type_name: Optional[str] distance_km: float city: Optional[str] province: Optional[str] rating: Optional[float] # ============================================================================ # Global State # ============================================================================ _pool: Optional[asyncpg.Pool] = None _start_time: datetime = datetime.now() async def get_pool() -> asyncpg.Pool: """Get or create connection pool""" global _pool if _pool is None: _pool = await asyncpg.create_pool( host=settings.host, port=settings.port, database=settings.database, user=settings.user, password=settings.password, min_size=2, max_size=10, ) return _pool # ============================================================================ # FastAPI App # ============================================================================ @asynccontextmanager async def lifespan(app: FastAPI): """Application lifespan handler""" await get_pool() yield global _pool if _pool: await _pool.close() _pool = None app = FastAPI( title="PostGIS Geo API", description="Spatial REST API for heritage institution map", version="1.0.0", lifespan=lifespan, ) # CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ============================================================================ # Helper Functions # ============================================================================ def serialize_value(val: Any) -> Any: """Convert PostgreSQL values to JSON-serializable format""" if val is None: return None elif isinstance(val, datetime): return val.isoformat() elif isinstance(val, Decimal): return float(val) elif isinstance(val, (dict, list)): return val elif isinstance(val, bytes): return val.decode('utf-8', errors='replace') else: return val def row_to_dict(row: asyncpg.Record) -> Dict[str, Any]: """Convert asyncpg row to dict with serialization""" return {key: serialize_value(row[key]) for key in row.keys()} # ============================================================================ # API Endpoints # ============================================================================ @app.get("/", response_model=GeoStatsResponse) async def get_geo_status() -> GeoStatsResponse: """Get geo database status and statistics""" pool = await get_pool() async with pool.acquire() as conn: version = await conn.fetchval("SELECT version()") provinces = await conn.fetchval("SELECT COUNT(*) FROM provinces") municipalities = await conn.fetchval("SELECT COUNT(*) FROM municipalities") institutions = await conn.fetchval("SELECT COUNT(*) FROM institutions") historical = await conn.fetchval("SELECT COUNT(*) FROM historical_boundaries") return GeoStatsResponse( status="healthy", database=settings.database, provinces=provinces or 0, municipalities=municipalities or 0, institutions=institutions or 0, historical_boundaries=historical or 0, postgres_version=version.split(',')[0] if version else "unknown", ) @app.get("/provinces") async def get_provinces( simplified: bool = Query(True, description="Return simplified geometries") ): """Get all provinces as GeoJSON FeatureCollection""" pool = await get_pool() tolerance = 0.001 if simplified else 0 async with pool.acquire() as conn: rows = await conn.fetch(f""" SELECT id, province_code, iso_code, name, ST_AsGeoJSON( {'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'} )::json as geometry, ST_X(centroid) as centroid_lon, ST_Y(centroid) as centroid_lat, area_km2 FROM provinces ORDER BY name """) features = [] for row in rows: features.append({ "type": "Feature", "id": row['province_code'], "geometry": row['geometry'], "properties": { "id": row['id'], "province_code": row['province_code'], "iso_code": row['iso_code'], "name": row['name'], "centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None, "centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None, "area_km2": float(row['area_km2']) if row['area_km2'] else None, } }) return { "type": "FeatureCollection", "features": features } @app.get("/municipalities") async def get_municipalities( province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH)"), simplified: bool = Query(True, description="Return simplified geometries"), limit: int = Query(500, ge=1, le=1000, description="Maximum results") ): """Get municipalities as GeoJSON FeatureCollection""" pool = await get_pool() tolerance = 0.0005 if simplified else 0 query = f""" SELECT m.id, m.municipality_code, m.name, p.iso_code as province_iso, p.name as province_name, ST_AsGeoJSON( {'ST_Simplify(m.geom, ' + str(tolerance) + ')' if simplified else 'm.geom'} )::json as geometry, ST_X(m.centroid) as centroid_lon, ST_Y(m.centroid) as centroid_lat, m.area_km2 FROM municipalities m LEFT JOIN provinces p ON m.province_id = p.id {'WHERE p.iso_code = $1' if province else ''} ORDER BY m.name LIMIT {'$2' if province else '$1'} """ async with pool.acquire() as conn: if province: rows = await conn.fetch(query, province.upper(), limit) else: rows = await conn.fetch(query, limit) features = [] for row in rows: features.append({ "type": "Feature", "id": row['municipality_code'], "geometry": row['geometry'], "properties": { "id": row['id'], "code": row['municipality_code'], "name": row['name'], "province_iso": row['province_iso'], "province_name": row['province_name'], "centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None, "centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None, "area_km2": float(row['area_km2']) if row['area_km2'] else None, } }) return { "type": "FeatureCollection", "features": features } @app.get("/institutions") async def get_institutions( bbox: Optional[str] = Query(None, description="Bounding box: minLon,minLat,maxLon,maxLat"), province: Optional[str] = Query(None, description="Filter by province ISO code"), type: Optional[str] = Query(None, description="Filter by institution type (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T)"), limit: int = Query(2000, ge=1, le=5000, description="Maximum results") ): """Get institutions as GeoJSON FeatureCollection""" pool = await get_pool() # Build WHERE clauses conditions = ["i.geom IS NOT NULL"] params = [] param_count = 0 if bbox: try: min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(',')) param_count += 4 conditions.append(f""" i.geom && ST_MakeEnvelope(${param_count-3}, ${param_count-2}, ${param_count-1}, ${param_count}, 4326) """) params.extend([min_lon, min_lat, max_lon, max_lat]) except ValueError: raise HTTPException(status_code=400, detail="Invalid bbox format. Use: minLon,minLat,maxLon,maxLat") if province: param_count += 1 conditions.append(f"p.iso_code = ${param_count}") params.append(province.upper()) if type: param_count += 1 conditions.append(f"i.institution_type = ${param_count}") params.append(type.upper()) param_count += 1 where_clause = " AND ".join(conditions) query = f""" SELECT i.ghcid_current as ghcid, i.name, i.institution_type as type, i.type_name, ST_X(i.geom) as lon, ST_Y(i.geom) as lat, i.city, p.iso_code as province_iso, p.name as province_name, i.rating, i.total_ratings, i.wikidata_id, i.website FROM institutions i LEFT JOIN provinces p ON i.province_id = p.id WHERE {where_clause} ORDER BY i.name LIMIT ${param_count} """ params.append(limit) async with pool.acquire() as conn: rows = await conn.fetch(query, *params) features = [] for row in rows: features.append({ "type": "Feature", "geometry": { "type": "Point", "coordinates": [float(row['lon']), float(row['lat'])] }, "properties": { "ghcid": row['ghcid'], "name": row['name'], "type": row['type'], "type_name": row['type_name'], "city": row['city'], "province_iso": row['province_iso'], "province": row['province_name'], "rating": float(row['rating']) if row['rating'] else None, "total_ratings": row['total_ratings'], "wikidata_id": row['wikidata_id'], "website": row['website'], } }) return { "type": "FeatureCollection", "features": features, "metadata": { "count": len(features), "limit": limit, "filters": { "bbox": bbox, "province": province, "type": type } } } @app.get("/institution/{ghcid}", response_model=InstitutionDetail) async def get_institution(ghcid: str): """Get detailed information for a single institution""" pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow(""" SELECT i.ghcid_current as ghcid, i.name, i.institution_type as type, i.type_name, ST_X(i.geom) as lon, ST_Y(i.geom) as lat, i.address, i.city, p.name as province, i.website, i.phone, i.wikidata_id, i.rating, i.total_ratings, i.description, i.reviews, i.genealogiewerkbalk FROM institutions i LEFT JOIN provinces p ON i.province_id = p.id WHERE i.ghcid_current = $1 """, ghcid) if not row: raise HTTPException(status_code=404, detail=f"Institution '{ghcid}' not found") return InstitutionDetail( ghcid=row['ghcid'], name=row['name'], type=row['type'], type_name=row['type_name'], lat=float(row['lat']) if row['lat'] else None, lon=float(row['lon']) if row['lon'] else None, address=row['address'], city=row['city'], province=row['province'], website=row['website'], phone=row['phone'], wikidata_id=row['wikidata_id'], rating=float(row['rating']) if row['rating'] else None, total_ratings=row['total_ratings'], description=row['description'], reviews=row['reviews'], genealogiewerkbalk=row['genealogiewerkbalk'], ) @app.get("/search") async def search_institutions( q: str = Query(..., min_length=2, description="Search query"), type: Optional[str] = Query(None, description="Filter by institution type"), limit: int = Query(50, ge=1, le=200, description="Maximum results") ): """Search institutions by name""" pool = await get_pool() # Use PostgreSQL full-text search with trigram similarity query = """ SELECT i.ghcid_current as ghcid, i.name, i.institution_type as type, i.type_name, ST_X(i.geom) as lon, ST_Y(i.geom) as lat, i.city, p.iso_code as province_iso, i.rating, ts_rank_cd( to_tsvector('simple', i.name || ' ' || COALESCE(i.description, '')), plainto_tsquery('simple', $1) ) as rank FROM institutions i LEFT JOIN provinces p ON i.province_id = p.id WHERE to_tsvector('simple', i.name || ' ' || COALESCE(i.description, '')) @@ plainto_tsquery('simple', $1) """ params = [q] param_count = 1 if type: param_count += 1 query += f" AND i.institution_type = ${param_count}" params.append(type.upper()) param_count += 1 query += f" ORDER BY rank DESC, i.name LIMIT ${param_count}" params.append(limit) async with pool.acquire() as conn: rows = await conn.fetch(query, *params) # If no results from FTS, try ILIKE fallback if not rows: fallback_query = f""" SELECT i.ghcid_current as ghcid, i.name, i.institution_type as type, i.type_name, ST_X(i.geom) as lon, ST_Y(i.geom) as lat, i.city, p.iso_code as province_iso, i.rating, 0 as rank FROM institutions i LEFT JOIN provinces p ON i.province_id = p.id WHERE i.name ILIKE $1 {'AND i.institution_type = $2' if type else ''} ORDER BY i.name LIMIT {'$3' if type else '$2'} """ async with pool.acquire() as conn: if type: rows = await conn.fetch(fallback_query, f"%{q}%", type.upper(), limit) else: rows = await conn.fetch(fallback_query, f"%{q}%", limit) return { "query": q, "count": len(rows), "results": [ { "ghcid": row['ghcid'], "name": row['name'], "type": row['type'], "type_name": row['type_name'], "lon": float(row['lon']) if row['lon'] else None, "lat": float(row['lat']) if row['lat'] else None, "city": row['city'], "province_iso": row['province_iso'], "rating": float(row['rating']) if row['rating'] else None, } for row in rows ] } @app.get("/nearby", response_model=List[NearbyInstitution]) async def find_nearby( lon: float = Query(..., description="Longitude"), lat: float = Query(..., description="Latitude"), radius_km: float = Query(10, ge=0.1, le=100, description="Search radius in km"), type: Optional[str] = Query(None, description="Filter by institution type"), limit: int = Query(50, ge=1, le=200, description="Maximum results") ): """Find institutions near a point""" pool = await get_pool() query = """ SELECT i.ghcid_current as ghcid, i.name, i.institution_type as type, i.type_name, (ST_Distance(i.geom::geography, ST_SetSRID(ST_Point($1, $2), 4326)::geography) / 1000) as distance_km, i.city, p.name as province, i.rating FROM institutions i LEFT JOIN provinces p ON i.province_id = p.id WHERE ST_DWithin( i.geom::geography, ST_SetSRID(ST_Point($1, $2), 4326)::geography, $3 * 1000 ) """ params = [lon, lat, radius_km] param_count = 3 if type: param_count += 1 query += f" AND i.institution_type = ${param_count}" params.append(type.upper()) param_count += 1 query += f" ORDER BY distance_km LIMIT ${param_count}" params.append(limit) async with pool.acquire() as conn: rows = await conn.fetch(query, *params) return [ NearbyInstitution( ghcid=row['ghcid'], name=row['name'], type=row['type'], type_name=row['type_name'], distance_km=round(float(row['distance_km']), 2), city=row['city'], province=row['province'], rating=float(row['rating']) if row['rating'] else None, ) for row in rows ] @app.get("/admin/point", response_model=AdminPoint) async def get_admin_for_point( lon: float = Query(..., description="Longitude"), lat: float = Query(..., description="Latitude") ): """Find which municipality/province contains a point""" pool = await get_pool() async with pool.acquire() as conn: row = await conn.fetchrow(""" SELECT p.province_code, p.name as province_name, m.municipality_code, m.name as municipality_name FROM municipalities m JOIN provinces p ON m.province_id = p.id WHERE ST_Contains(m.geom, ST_SetSRID(ST_Point($1, $2), 4326)) LIMIT 1 """, lon, lat) if not row: # Try province only async with pool.acquire() as conn: row = await conn.fetchrow(""" SELECT province_code, name as province_name, NULL as municipality_code, NULL as municipality_name FROM provinces WHERE ST_Contains(geom, ST_SetSRID(ST_Point($1, $2), 4326)) LIMIT 1 """, lon, lat) if not row: return AdminPoint( province_code=None, province_name=None, municipality_code=None, municipality_name=None ) return AdminPoint( province_code=row['province_code'], province_name=row['province_name'], municipality_code=row['municipality_code'], municipality_name=row['municipality_name'] ) @app.get("/historical") async def get_historical_boundaries( year: int = Query(1500, description="Reference year"), boundary_type: Optional[str] = Query(None, description="Boundary type filter"), simplified: bool = Query(True, description="Return simplified geometries"), limit: int = Query(1000, ge=1, le=10000, description="Maximum results") ): """Get historical boundaries as GeoJSON""" pool = await get_pool() tolerance = 0.001 if simplified else 0 conditions = ["reference_year = $1"] params = [year] param_count = 1 if boundary_type: param_count += 1 conditions.append(f"boundary_type = ${param_count}") params.append(boundary_type) param_count += 1 where_clause = " AND ".join(conditions) query = f""" SELECT id, boundary_code, name, boundary_type, reference_year, ST_AsGeoJSON( {'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'} )::json as geometry, ST_X(centroid) as centroid_lon, ST_Y(centroid) as centroid_lat, area_km2 FROM historical_boundaries WHERE {where_clause} ORDER BY name LIMIT ${param_count} """ params.append(limit) async with pool.acquire() as conn: rows = await conn.fetch(query, *params) features = [] for row in rows: if row['geometry']: features.append({ "type": "Feature", "id": row['boundary_code'], "geometry": row['geometry'], "properties": { "id": row['id'], "code": row['boundary_code'], "name": row['name'], "type": row['boundary_type'], "year": row['reference_year'], "centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None, "centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None, "area_km2": float(row['area_km2']) if row['area_km2'] else None, } }) return { "type": "FeatureCollection", "features": features, "metadata": { "year": year, "boundary_type": boundary_type, "count": len(features) } } @app.get("/stats/by-type") async def get_stats_by_type(): """Get institution counts by type""" pool = await get_pool() async with pool.acquire() as conn: rows = await conn.fetch(""" SELECT institution_type as type, type_name, COUNT(*) as count, ROUND(AVG(rating)::numeric, 2) as avg_rating FROM institutions WHERE geom IS NOT NULL GROUP BY institution_type, type_name ORDER BY count DESC """) return { "stats": [ { "type": row['type'], "type_name": row['type_name'], "count": row['count'], "avg_rating": float(row['avg_rating']) if row['avg_rating'] else None } for row in rows ] } @app.get("/stats/by-province") async def get_stats_by_province(): """Get institution counts by province""" pool = await get_pool() async with pool.acquire() as conn: rows = await conn.fetch(""" SELECT p.iso_code, p.name as province_name, COUNT(i.id) as count, ROUND(AVG(i.rating)::numeric, 2) as avg_rating FROM provinces p LEFT JOIN institutions i ON i.province_id = p.id GROUP BY p.id, p.iso_code, p.name ORDER BY count DESC """) return { "stats": [ { "province_iso": row['iso_code'], "province_name": row['province_name'], "count": row['count'], "avg_rating": float(row['avg_rating']) if row['avg_rating'] else None } for row in rows ] } # ============================================================================ # Main # ============================================================================ if __name__ == "__main__": import uvicorn uvicorn.run( "geo_api:app", host=settings.api_host, port=settings.api_port, reload=True, )