glam/backend/postgres/geo_api.py
2025-12-10 23:51:51 +01:00

1218 lines
40 KiB
Python

"""
PostGIS Geo API for Heritage Custodian Map
FastAPI backend providing spatial queries for bronhouder.nl map
Mounted at /api/geo/ via Caddy reverse proxy.
Endpoints:
- GET / - Health check and geo statistics
- GET /countries - Get all countries as GeoJSON with institution counts
- GET /provinces - Get all provinces as GeoJSON
- GET /municipalities - Get municipalities (with filters)
- GET /institutions - Get institutions as GeoJSON (with bbox/type filters)
- GET /institution/:ghcid - Get single institution details
- GET /historical - Get historical boundaries
- GET /search - Search institutions by name
- GET /admin/point - Find admin unit for a point
- GET /nearby - Find institutions near a point
- GET /stats/by-type - Institution counts by type
- GET /stats/by-province - Institution counts by province
"""
import os
import json
from datetime import datetime
from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager
from decimal import Decimal
from fastapi import FastAPI, HTTPException, Query, APIRouter
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
import asyncpg
# ============================================================================
# Configuration
# ============================================================================
class GeoSettings(BaseModel):
"""PostGIS geo database settings - connects to glam_geo with PostGIS boundaries"""
host: str = os.getenv("GEO_POSTGRES_HOST", "localhost")
port: int = int(os.getenv("GEO_POSTGRES_PORT", "5432"))
database: str = os.getenv("GEO_POSTGRES_DB", "glam_geo") # glam_geo has boundary data
user: str = os.getenv("GEO_POSTGRES_USER", "glam_api")
password: str = os.getenv("GEO_POSTGRES_PASSWORD", "")
# Server settings
api_host: str = os.getenv("GEO_API_HOST", "0.0.0.0")
api_port: int = int(os.getenv("GEO_API_PORT", "8002"))
settings = GeoSettings()
# ============================================================================
# Pydantic Models
# ============================================================================
class GeoStatsResponse(BaseModel):
"""Geo database statistics"""
status: str
database: str
provinces: int
municipalities: int
institutions: int
historical_boundaries: int
postgres_version: str
class InstitutionDetail(BaseModel):
"""Detailed institution information"""
ghcid: str
name: str
type: str
type_name: Optional[str]
lat: Optional[float]
lon: Optional[float]
address: Optional[str]
city: Optional[str]
province: Optional[str]
website: Optional[str]
phone: Optional[str]
wikidata_id: Optional[str]
rating: Optional[float]
total_ratings: Optional[int]
description: Optional[str]
reviews: Optional[List[Dict]]
genealogiewerkbalk: Optional[Dict]
class AdminPoint(BaseModel):
"""Admin unit for a point"""
province_code: Optional[str]
province_name: Optional[str]
municipality_code: Optional[str]
municipality_name: Optional[str]
class NearbyInstitution(BaseModel):
"""Institution with distance"""
ghcid: str
name: str
type: str
type_name: Optional[str]
distance_km: float
city: Optional[str]
province: Optional[str]
rating: Optional[float]
# ============================================================================
# Global State
# ============================================================================
_pool: Optional[asyncpg.Pool] = None
_start_time: datetime = datetime.now()
async def get_pool() -> asyncpg.Pool:
"""Get or create connection pool"""
global _pool
if _pool is None:
_pool = await asyncpg.create_pool(
host=settings.host,
port=settings.port,
database=settings.database,
user=settings.user,
password=settings.password,
min_size=2,
max_size=10,
)
return _pool
# ============================================================================
# FastAPI App
# ============================================================================
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan handler"""
await get_pool()
yield
global _pool
if _pool:
await _pool.close()
_pool = None
app = FastAPI(
title="PostGIS Geo API",
description="Spatial REST API for heritage institution map",
version="1.0.0",
lifespan=lifespan,
)
# CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ============================================================================
# Helper Functions
# ============================================================================
def serialize_value(val: Any) -> Any:
"""Convert PostgreSQL values to JSON-serializable format"""
if val is None:
return None
elif isinstance(val, datetime):
return val.isoformat()
elif isinstance(val, Decimal):
return float(val)
elif isinstance(val, (dict, list)):
return val
elif isinstance(val, bytes):
return val.decode('utf-8', errors='replace')
else:
return val
def row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
"""Convert asyncpg row to dict with serialization"""
return {key: serialize_value(row[key]) for key in row.keys()}
# ============================================================================
# API Endpoints
# ============================================================================
@app.get("/", response_model=GeoStatsResponse)
async def get_geo_status() -> GeoStatsResponse:
"""Get geo database status and statistics"""
pool = await get_pool()
async with pool.acquire() as conn:
version = await conn.fetchval("SELECT version()")
provinces = await conn.fetchval("SELECT COUNT(*) FROM provinces")
municipalities = await conn.fetchval("SELECT COUNT(*) FROM municipalities")
institutions = await conn.fetchval("SELECT COUNT(*) FROM institutions")
historical = await conn.fetchval("SELECT COUNT(*) FROM historical_boundaries")
return GeoStatsResponse(
status="healthy",
database=settings.database,
provinces=provinces or 0,
municipalities=municipalities or 0,
institutions=institutions or 0,
historical_boundaries=historical or 0,
postgres_version=version.split(',')[0] if version else "unknown",
)
@app.get("/provinces")
async def get_provinces(
simplified: bool = Query(True, description="Return simplified geometries")
):
"""Get all provinces as GeoJSON FeatureCollection"""
pool = await get_pool()
tolerance = 0.001 if simplified else 0
async with pool.acquire() as conn:
rows = await conn.fetch(f"""
SELECT
id, province_code, iso_code, name,
ST_AsGeoJSON(
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
)::json as geometry,
ST_X(centroid) as centroid_lon,
ST_Y(centroid) as centroid_lat,
area_km2
FROM provinces
ORDER BY name
""")
features = []
for row in rows:
features.append({
"type": "Feature",
"id": row['province_code'],
"geometry": row['geometry'],
"properties": {
"id": row['id'],
"province_code": row['province_code'],
"iso_code": row['iso_code'],
"name": row['name'],
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
}
})
return {
"type": "FeatureCollection",
"features": features
}
@app.get("/countries")
async def get_countries(
simplified: bool = Query(True, description="Return simplified geometries"),
with_counts: bool = Query(False, description="Include institution counts per country"),
):
"""Get all countries as GeoJSON FeatureCollection with optional institution counts"""
pool = await get_pool()
# Use more aggressive simplification for countries (world view)
tolerance = 0.01 if simplified else 0
async with pool.acquire() as conn:
if with_counts:
# Join with custodians to get counts per country
rows = await conn.fetch(f"""
SELECT
bc.id,
bc.iso_a2 as country_code,
bc.iso_a3,
bc.country_name as name,
ST_AsGeoJSON(
{'ST_Simplify(bc.geom, ' + str(tolerance) + ')' if simplified else 'bc.geom'}
) as geometry,
ST_X(bc.centroid) as centroid_lon,
ST_Y(bc.centroid) as centroid_lat,
bc.area_km2,
COALESCE(counts.institution_count, 0) as institution_count
FROM boundary_countries bc
LEFT JOIN (
SELECT country_code, COUNT(*) as institution_count
FROM custodians
WHERE country_code IS NOT NULL
GROUP BY country_code
) counts ON bc.iso_a2 = counts.country_code
WHERE bc.geom IS NOT NULL
ORDER BY bc.country_name
""")
else:
rows = await conn.fetch(f"""
SELECT
id,
iso_a2 as country_code,
iso_a3,
country_name as name,
ST_AsGeoJSON(
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
) as geometry,
ST_X(centroid) as centroid_lon,
ST_Y(centroid) as centroid_lat,
area_km2
FROM boundary_countries
WHERE geom IS NOT NULL
ORDER BY country_name
""")
features = []
total_institutions = 0
countries_with_data = 0
for row in rows:
# Parse geometry from string to dict (ST_AsGeoJSON returns text)
geometry = row['geometry']
if geometry is None:
# Skip countries with no geometry (e.g., Vatican City)
continue
if isinstance(geometry, str):
geometry = json.loads(geometry)
# Ensure geometry has required structure
if not isinstance(geometry, dict) or 'type' not in geometry or 'coordinates' not in geometry:
continue
iso_a2 = row['country_code'].strip() if row['country_code'] else None
iso_a3 = row['iso_a3'].strip() if row['iso_a3'] else None
institution_count = row['institution_count'] if with_counts else 0
# Track totals
if with_counts:
total_institutions += institution_count
if institution_count > 0:
countries_with_data += 1
# Build properties with frontend-expected field names
properties = {
"id": row['id'],
"iso_a2": iso_a2, # Frontend expects iso_a2
"iso_a3": iso_a3,
"name": row['name'],
"institution_count": institution_count,
"centroid": [
float(row['centroid_lon']) if row['centroid_lon'] else None,
float(row['centroid_lat']) if row['centroid_lat'] else None,
],
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
}
features.append({
"type": "Feature",
"id": iso_a2,
"geometry": geometry,
"properties": properties
})
return {
"type": "FeatureCollection",
"features": features,
"metadata": {
"count": len(features),
"total_institutions": total_institutions,
"countries_with_data": countries_with_data,
"type_filter": None,
"simplified": simplified,
}
}
@app.get("/municipalities")
async def get_municipalities(
province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH)"),
simplified: bool = Query(True, description="Return simplified geometries"),
limit: int = Query(500, ge=1, le=1000, description="Maximum results")
):
"""Get municipalities as GeoJSON FeatureCollection"""
pool = await get_pool()
tolerance = 0.0005 if simplified else 0
query = f"""
SELECT
m.id, m.municipality_code, m.name,
p.iso_code as province_iso, p.name as province_name,
ST_AsGeoJSON(
{'ST_Simplify(m.geom, ' + str(tolerance) + ')' if simplified else 'm.geom'}
)::json as geometry,
ST_X(m.centroid) as centroid_lon,
ST_Y(m.centroid) as centroid_lat,
m.area_km2
FROM municipalities m
LEFT JOIN provinces p ON m.province_id = p.id
{'WHERE p.iso_code = $1' if province else ''}
ORDER BY m.name
LIMIT {'$2' if province else '$1'}
"""
async with pool.acquire() as conn:
if province:
rows = await conn.fetch(query, province.upper(), limit)
else:
rows = await conn.fetch(query, limit)
features = []
for row in rows:
features.append({
"type": "Feature",
"id": row['municipality_code'],
"geometry": row['geometry'],
"properties": {
"id": row['id'],
"code": row['municipality_code'],
"name": row['name'],
"province_iso": row['province_iso'],
"province_name": row['province_name'],
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
}
})
return {
"type": "FeatureCollection",
"features": features
}
@app.get("/institutions")
async def get_institutions(
bbox: Optional[str] = Query(None, description="Bounding box: minLon,minLat,maxLon,maxLat"),
province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH, ZH)"),
country: Optional[str] = Query(None, description="Filter by country code (e.g., NL, DE, JP)"),
type: Optional[str] = Query(None, description="Filter by institution type (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T)"),
limit: int = Query(50000, ge=1, le=100000, description="Maximum results")
):
"""Get institutions as GeoJSON FeatureCollection with full metadata from custodians table"""
pool = await get_pool()
# Build WHERE clauses - query custodians table directly
conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
params = []
param_count = 0
if bbox:
try:
min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
param_count += 4
conditions.append(f"""
lon >= ${param_count-3} AND lat >= ${param_count-2}
AND lon <= ${param_count-1} AND lat <= ${param_count}
""")
params.extend([min_lon, min_lat, max_lon, max_lat])
except ValueError:
raise HTTPException(status_code=400, detail="Invalid bbox format. Use: minLon,minLat,maxLon,maxLat")
if province:
param_count += 1
conditions.append(f"region_code = ${param_count}")
params.append(province.upper())
if type:
param_count += 1
conditions.append(f"type = ${param_count}")
params.append(type.upper())
if country:
param_count += 1
conditions.append(f"country_code = ${param_count}")
params.append(country.upper())
param_count += 1
where_clause = " AND ".join(conditions)
# Query custodians table with all rich metadata fields
query = f"""
SELECT
ghcid,
name,
emic_name,
type,
type_name,
lon,
lat,
city,
region as province,
region_code as province_iso,
country_code,
formatted_address,
street_address,
postal_code,
rating,
total_ratings,
wikidata_id,
website,
phone,
email,
isil_code,
google_place_id,
description,
opening_hours,
reviews,
photos,
photo_urls,
business_status,
street_view_url,
founding_year,
dissolution_year,
temporal_extent,
museum_register,
youtube_channel_url,
youtube_subscriber_count,
youtube_video_count,
youtube_enrichment,
social_facebook,
social_twitter,
social_instagram,
wikidata_label_en,
wikidata_description_en
FROM custodians
WHERE {where_clause}
ORDER BY name
LIMIT ${param_count}
"""
params.append(limit)
async with pool.acquire() as conn:
rows = await conn.fetch(query, *params)
features = []
for row in rows:
# Build properties with all available metadata
props = {
"ghcid": row['ghcid'],
"name": row['name'],
"emic_name": row['emic_name'],
"type": row['type'],
"type_name": row['type_name'],
"city": row['city'],
"province": row['province'],
"province_iso": row['province_iso'],
"country_code": row['country_code'],
"formatted_address": row['formatted_address'],
"rating": float(row['rating']) if row['rating'] else None,
"total_ratings": row['total_ratings'],
"wikidata_id": row['wikidata_id'],
"website": row['website'],
"phone": row['phone'],
"email": row['email'],
"isil_code": row['isil_code'],
"google_place_id": row['google_place_id'],
"description": row['description'],
"business_status": row['business_status'],
"street_view_url": row['street_view_url'],
"founding_year": row['founding_year'],
"dissolution_year": row['dissolution_year'],
}
# Add JSONB fields (handle potential None values)
if row['opening_hours']:
props["opening_hours"] = row['opening_hours']
if row['reviews']:
props["reviews"] = row['reviews']
if row['photos']:
props["photos"] = row['photos']
if row['photo_urls']:
props["photo_urls"] = row['photo_urls']
if row['temporal_extent']:
props["temporal_extent"] = row['temporal_extent']
if row['museum_register']:
props["museum_register"] = row['museum_register']
if row['youtube_enrichment']:
props["youtube_enrichment"] = row['youtube_enrichment']
elif row['youtube_channel_url']:
# Build minimal YouTube data if enrichment not present
props["youtube"] = {
"channel_url": row['youtube_channel_url'],
"subscriber_count": row['youtube_subscriber_count'],
"video_count": row['youtube_video_count'],
}
# Social media
social = {}
if row['social_facebook']:
social['facebook'] = row['social_facebook']
if row['social_twitter']:
social['twitter'] = row['social_twitter']
if row['social_instagram']:
social['instagram'] = row['social_instagram']
if social:
props["social_media"] = social
# Wikidata labels
if row['wikidata_label_en']:
props["wikidata_label"] = row['wikidata_label_en']
if row['wikidata_description_en']:
props["wikidata_description"] = row['wikidata_description_en']
features.append({
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [float(row['lon']), float(row['lat'])]
},
"properties": props
})
return {
"type": "FeatureCollection",
"features": features,
"metadata": {
"count": len(features),
"limit": limit,
"filters": {
"bbox": bbox,
"province": province,
"type": type
}
}
}
@app.get("/institution/{ghcid}")
async def get_institution(ghcid: str):
"""Get detailed information for a single institution with full metadata"""
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
ghcid,
name,
emic_name,
verified_name,
type,
type_name,
lon,
lat,
city,
region as province,
region_code as province_iso,
country_code,
formatted_address,
street_address,
postal_code,
website,
phone,
email,
wikidata_id,
isil_code,
google_place_id,
rating,
total_ratings,
description,
business_status,
street_view_url,
google_maps_url,
opening_hours,
reviews,
photos,
photo_urls,
founding_year,
founding_date,
dissolution_year,
dissolution_date,
temporal_extent,
museum_register,
youtube_channel_id,
youtube_channel_url,
youtube_subscriber_count,
youtube_video_count,
youtube_view_count,
youtube_enrichment,
social_facebook,
social_twitter,
social_instagram,
social_linkedin,
social_youtube,
logo_url,
wikidata_label_nl,
wikidata_label_en,
wikidata_description_nl,
wikidata_description_en,
wikidata_types,
wikidata_inception,
wikidata_enrichment,
genealogiewerkbalk,
nan_isil_enrichment,
kb_enrichment,
zcbs_enrichment,
web_claims,
ghcid_uuid,
ghcid_numeric,
identifiers,
data_source,
data_tier,
provenance
FROM custodians
WHERE ghcid = $1
""", ghcid)
if not row:
raise HTTPException(status_code=404, detail=f"Institution '{ghcid}' not found")
# Build comprehensive response with all metadata
result = {
"ghcid": row['ghcid'],
"name": row['name'],
"emic_name": row['emic_name'],
"verified_name": row['verified_name'],
"type": row['type'],
"type_name": row['type_name'],
"lat": float(row['lat']) if row['lat'] else None,
"lon": float(row['lon']) if row['lon'] else None,
"city": row['city'],
"province": row['province'],
"province_iso": row['province_iso'],
"country_code": row['country_code'],
"formatted_address": row['formatted_address'],
"street_address": row['street_address'],
"postal_code": row['postal_code'],
"website": row['website'],
"phone": row['phone'],
"email": row['email'],
"wikidata_id": row['wikidata_id'],
"isil_code": row['isil_code'],
"google_place_id": row['google_place_id'],
"rating": float(row['rating']) if row['rating'] else None,
"total_ratings": row['total_ratings'],
"description": row['description'],
"business_status": row['business_status'],
"street_view_url": row['street_view_url'],
"google_maps_url": row['google_maps_url'],
}
# JSONB fields - only include if present
if row['opening_hours']:
result["opening_hours"] = row['opening_hours']
if row['reviews']:
result["reviews"] = row['reviews']
if row['photos']:
result["photos"] = row['photos']
if row['photo_urls']:
result["photo_urls"] = row['photo_urls']
if row['identifiers']:
result["identifiers"] = row['identifiers']
# Temporal data
temporal = {}
if row['founding_year']:
temporal["founding_year"] = row['founding_year']
if row['founding_date']:
temporal["founding_date"] = row['founding_date'].isoformat() if row['founding_date'] else None
if row['dissolution_year']:
temporal["dissolution_year"] = row['dissolution_year']
if row['dissolution_date']:
temporal["dissolution_date"] = row['dissolution_date'].isoformat() if row['dissolution_date'] else None
if row['temporal_extent']:
temporal["extent"] = row['temporal_extent']
if temporal:
result["temporal"] = temporal
# Museum register
if row['museum_register']:
result["museum_register"] = row['museum_register']
# YouTube enrichment
youtube = {}
if row['youtube_channel_id']:
youtube["channel_id"] = row['youtube_channel_id']
if row['youtube_channel_url']:
youtube["channel_url"] = row['youtube_channel_url']
if row['youtube_subscriber_count']:
youtube["subscriber_count"] = row['youtube_subscriber_count']
if row['youtube_video_count']:
youtube["video_count"] = row['youtube_video_count']
if row['youtube_view_count']:
youtube["view_count"] = row['youtube_view_count']
if row['youtube_enrichment']:
youtube["enrichment"] = row['youtube_enrichment']
if youtube:
result["youtube"] = youtube
# Social media
social = {}
if row['social_facebook']:
social["facebook"] = row['social_facebook']
if row['social_twitter']:
social["twitter"] = row['social_twitter']
if row['social_instagram']:
social["instagram"] = row['social_instagram']
if row['social_linkedin']:
social["linkedin"] = row['social_linkedin']
if row['social_youtube']:
social["youtube"] = row['social_youtube']
if social:
result["social_media"] = social
# Wikidata
wikidata = {}
if row['wikidata_label_nl']:
wikidata["label_nl"] = row['wikidata_label_nl']
if row['wikidata_label_en']:
wikidata["label_en"] = row['wikidata_label_en']
if row['wikidata_description_nl']:
wikidata["description_nl"] = row['wikidata_description_nl']
if row['wikidata_description_en']:
wikidata["description_en"] = row['wikidata_description_en']
if row['wikidata_types']:
wikidata["types"] = row['wikidata_types']
if row['wikidata_inception']:
wikidata["inception"] = row['wikidata_inception']
if row['wikidata_enrichment']:
wikidata["enrichment"] = row['wikidata_enrichment']
if wikidata:
result["wikidata"] = wikidata
# Logo
if row['logo_url']:
result["logo_url"] = row['logo_url']
# Other enrichment data
if row['genealogiewerkbalk']:
result["genealogiewerkbalk"] = row['genealogiewerkbalk']
if row['nan_isil_enrichment']:
result["nan_isil_enrichment"] = row['nan_isil_enrichment']
if row['kb_enrichment']:
result["kb_enrichment"] = row['kb_enrichment']
if row['zcbs_enrichment']:
result["zcbs_enrichment"] = row['zcbs_enrichment']
if row['web_claims']:
result["web_claims"] = row['web_claims']
# GHCID details
ghcid_data = {"current": row['ghcid']}
if row['ghcid_uuid']:
ghcid_data["uuid"] = str(row['ghcid_uuid'])
if row['ghcid_numeric']:
ghcid_data["numeric"] = int(row['ghcid_numeric'])
result["ghcid_details"] = ghcid_data
# Provenance
if row['data_source'] or row['data_tier'] or row['provenance']:
result["provenance"] = {
"data_source": row['data_source'],
"data_tier": row['data_tier'],
"details": row['provenance'],
}
return result
@app.get("/search")
async def search_institutions(
q: str = Query(..., min_length=2, description="Search query"),
type: Optional[str] = Query(None, description="Filter by institution type"),
limit: int = Query(50, ge=1, le=200, description="Maximum results")
):
"""Search institutions by name"""
pool = await get_pool()
# Use PostgreSQL full-text search with trigram similarity
query = """
SELECT
i.ghcid_current as ghcid,
i.name,
i.institution_type as type,
i.type_name,
ST_X(i.geom) as lon,
ST_Y(i.geom) as lat,
i.city,
p.iso_code as province_iso,
i.rating,
ts_rank_cd(
to_tsvector('simple', i.name || ' ' || COALESCE(i.description, '')),
plainto_tsquery('simple', $1)
) as rank
FROM institutions i
LEFT JOIN provinces p ON i.province_id = p.id
WHERE to_tsvector('simple', i.name || ' ' || COALESCE(i.description, ''))
@@ plainto_tsquery('simple', $1)
"""
params = [q]
param_count = 1
if type:
param_count += 1
query += f" AND i.institution_type = ${param_count}"
params.append(type.upper())
param_count += 1
query += f" ORDER BY rank DESC, i.name LIMIT ${param_count}"
params.append(limit)
async with pool.acquire() as conn:
rows = await conn.fetch(query, *params)
# If no results from FTS, try ILIKE fallback
if not rows:
fallback_query = f"""
SELECT
i.ghcid_current as ghcid,
i.name,
i.institution_type as type,
i.type_name,
ST_X(i.geom) as lon,
ST_Y(i.geom) as lat,
i.city,
p.iso_code as province_iso,
i.rating,
0 as rank
FROM institutions i
LEFT JOIN provinces p ON i.province_id = p.id
WHERE i.name ILIKE $1
{'AND i.institution_type = $2' if type else ''}
ORDER BY i.name
LIMIT {'$3' if type else '$2'}
"""
async with pool.acquire() as conn:
if type:
rows = await conn.fetch(fallback_query, f"%{q}%", type.upper(), limit)
else:
rows = await conn.fetch(fallback_query, f"%{q}%", limit)
return {
"query": q,
"count": len(rows),
"results": [
{
"ghcid": row['ghcid'],
"name": row['name'],
"type": row['type'],
"type_name": row['type_name'],
"lon": float(row['lon']) if row['lon'] else None,
"lat": float(row['lat']) if row['lat'] else None,
"city": row['city'],
"province_iso": row['province_iso'],
"rating": float(row['rating']) if row['rating'] else None,
}
for row in rows
]
}
@app.get("/nearby", response_model=List[NearbyInstitution])
async def find_nearby(
lon: float = Query(..., description="Longitude"),
lat: float = Query(..., description="Latitude"),
radius_km: float = Query(10, ge=0.1, le=100, description="Search radius in km"),
type: Optional[str] = Query(None, description="Filter by institution type"),
limit: int = Query(50, ge=1, le=200, description="Maximum results")
):
"""Find institutions near a point"""
pool = await get_pool()
query = """
SELECT
i.ghcid_current as ghcid,
i.name,
i.institution_type as type,
i.type_name,
(ST_Distance(i.geom::geography, ST_SetSRID(ST_Point($1, $2), 4326)::geography) / 1000) as distance_km,
i.city,
p.name as province,
i.rating
FROM institutions i
LEFT JOIN provinces p ON i.province_id = p.id
WHERE ST_DWithin(
i.geom::geography,
ST_SetSRID(ST_Point($1, $2), 4326)::geography,
$3 * 1000
)
"""
params = [lon, lat, radius_km]
param_count = 3
if type:
param_count += 1
query += f" AND i.institution_type = ${param_count}"
params.append(type.upper())
param_count += 1
query += f" ORDER BY distance_km LIMIT ${param_count}"
params.append(limit)
async with pool.acquire() as conn:
rows = await conn.fetch(query, *params)
return [
NearbyInstitution(
ghcid=row['ghcid'],
name=row['name'],
type=row['type'],
type_name=row['type_name'],
distance_km=round(float(row['distance_km']), 2),
city=row['city'],
province=row['province'],
rating=float(row['rating']) if row['rating'] else None,
)
for row in rows
]
@app.get("/admin/point", response_model=AdminPoint)
async def get_admin_for_point(
lon: float = Query(..., description="Longitude"),
lat: float = Query(..., description="Latitude")
):
"""Find which municipality/province contains a point"""
pool = await get_pool()
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
p.province_code,
p.name as province_name,
m.municipality_code,
m.name as municipality_name
FROM municipalities m
JOIN provinces p ON m.province_id = p.id
WHERE ST_Contains(m.geom, ST_SetSRID(ST_Point($1, $2), 4326))
LIMIT 1
""", lon, lat)
if not row:
# Try province only
async with pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT
province_code,
name as province_name,
NULL as municipality_code,
NULL as municipality_name
FROM provinces
WHERE ST_Contains(geom, ST_SetSRID(ST_Point($1, $2), 4326))
LIMIT 1
""", lon, lat)
if not row:
return AdminPoint(
province_code=None,
province_name=None,
municipality_code=None,
municipality_name=None
)
return AdminPoint(
province_code=row['province_code'],
province_name=row['province_name'],
municipality_code=row['municipality_code'],
municipality_name=row['municipality_name']
)
@app.get("/historical")
async def get_historical_boundaries(
year: int = Query(1500, description="Reference year"),
boundary_type: Optional[str] = Query(None, description="Boundary type filter"),
simplified: bool = Query(True, description="Return simplified geometries"),
limit: int = Query(1000, ge=1, le=10000, description="Maximum results")
):
"""Get historical boundaries as GeoJSON"""
pool = await get_pool()
tolerance = 0.001 if simplified else 0
conditions = ["reference_year = $1"]
params = [year]
param_count = 1
if boundary_type:
param_count += 1
conditions.append(f"boundary_type = ${param_count}")
params.append(boundary_type)
param_count += 1
where_clause = " AND ".join(conditions)
query = f"""
SELECT
id, boundary_code, name, boundary_type, reference_year,
ST_AsGeoJSON(
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
)::json as geometry,
ST_X(centroid) as centroid_lon,
ST_Y(centroid) as centroid_lat,
area_km2
FROM historical_boundaries
WHERE {where_clause}
ORDER BY name
LIMIT ${param_count}
"""
params.append(limit)
async with pool.acquire() as conn:
rows = await conn.fetch(query, *params)
features = []
for row in rows:
if row['geometry']:
features.append({
"type": "Feature",
"id": row['boundary_code'],
"geometry": row['geometry'],
"properties": {
"id": row['id'],
"code": row['boundary_code'],
"name": row['name'],
"type": row['boundary_type'],
"year": row['reference_year'],
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
}
})
return {
"type": "FeatureCollection",
"features": features,
"metadata": {
"year": year,
"boundary_type": boundary_type,
"count": len(features)
}
}
@app.get("/stats/by-type")
async def get_stats_by_type():
"""Get institution counts by type"""
pool = await get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
institution_type as type,
type_name,
COUNT(*) as count,
ROUND(AVG(rating)::numeric, 2) as avg_rating
FROM institutions
WHERE geom IS NOT NULL
GROUP BY institution_type, type_name
ORDER BY count DESC
""")
return {
"stats": [
{
"type": row['type'],
"type_name": row['type_name'],
"count": row['count'],
"avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
}
for row in rows
]
}
@app.get("/stats/by-province")
async def get_stats_by_province():
"""Get institution counts by province"""
pool = await get_pool()
async with pool.acquire() as conn:
rows = await conn.fetch("""
SELECT
p.iso_code,
p.name as province_name,
COUNT(i.id) as count,
ROUND(AVG(i.rating)::numeric, 2) as avg_rating
FROM provinces p
LEFT JOIN institutions i ON i.province_id = p.id
GROUP BY p.id, p.iso_code, p.name
ORDER BY count DESC
""")
return {
"stats": [
{
"province_iso": row['iso_code'],
"province_name": row['province_name'],
"count": row['count'],
"avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
}
for row in rows
]
}
# ============================================================================
# Main
# ============================================================================
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"geo_api:app",
host=settings.api_host,
port=settings.api_port,
reload=True,
)