1218 lines
40 KiB
Python
1218 lines
40 KiB
Python
"""
|
|
PostGIS Geo API for Heritage Custodian Map
|
|
FastAPI backend providing spatial queries for bronhouder.nl map
|
|
|
|
Mounted at /api/geo/ via Caddy reverse proxy.
|
|
|
|
Endpoints:
|
|
- GET / - Health check and geo statistics
|
|
- GET /countries - Get all countries as GeoJSON with institution counts
|
|
- GET /provinces - Get all provinces as GeoJSON
|
|
- GET /municipalities - Get municipalities (with filters)
|
|
- GET /institutions - Get institutions as GeoJSON (with bbox/type filters)
|
|
- GET /institution/:ghcid - Get single institution details
|
|
- GET /historical - Get historical boundaries
|
|
- GET /search - Search institutions by name
|
|
- GET /admin/point - Find admin unit for a point
|
|
- GET /nearby - Find institutions near a point
|
|
- GET /stats/by-type - Institution counts by type
|
|
- GET /stats/by-province - Institution counts by province
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Optional, List, Dict, Any
|
|
from contextlib import asynccontextmanager
|
|
from decimal import Decimal
|
|
|
|
from fastapi import FastAPI, HTTPException, Query, APIRouter
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import JSONResponse
|
|
from pydantic import BaseModel, Field
|
|
import asyncpg
|
|
|
|
|
|
# ============================================================================
|
|
# Configuration
|
|
# ============================================================================
|
|
|
|
class GeoSettings(BaseModel):
|
|
"""PostGIS geo database settings - connects to glam_geo with PostGIS boundaries"""
|
|
host: str = os.getenv("GEO_POSTGRES_HOST", "localhost")
|
|
port: int = int(os.getenv("GEO_POSTGRES_PORT", "5432"))
|
|
database: str = os.getenv("GEO_POSTGRES_DB", "glam_geo") # glam_geo has boundary data
|
|
user: str = os.getenv("GEO_POSTGRES_USER", "glam_api")
|
|
password: str = os.getenv("GEO_POSTGRES_PASSWORD", "")
|
|
|
|
# Server settings
|
|
api_host: str = os.getenv("GEO_API_HOST", "0.0.0.0")
|
|
api_port: int = int(os.getenv("GEO_API_PORT", "8002"))
|
|
|
|
|
|
settings = GeoSettings()
|
|
|
|
|
|
# ============================================================================
|
|
# Pydantic Models
|
|
# ============================================================================
|
|
|
|
class GeoStatsResponse(BaseModel):
|
|
"""Geo database statistics"""
|
|
status: str
|
|
database: str
|
|
provinces: int
|
|
municipalities: int
|
|
institutions: int
|
|
historical_boundaries: int
|
|
postgres_version: str
|
|
|
|
|
|
class InstitutionDetail(BaseModel):
|
|
"""Detailed institution information"""
|
|
ghcid: str
|
|
name: str
|
|
type: str
|
|
type_name: Optional[str]
|
|
lat: Optional[float]
|
|
lon: Optional[float]
|
|
address: Optional[str]
|
|
city: Optional[str]
|
|
province: Optional[str]
|
|
website: Optional[str]
|
|
phone: Optional[str]
|
|
wikidata_id: Optional[str]
|
|
rating: Optional[float]
|
|
total_ratings: Optional[int]
|
|
description: Optional[str]
|
|
reviews: Optional[List[Dict]]
|
|
genealogiewerkbalk: Optional[Dict]
|
|
|
|
|
|
class AdminPoint(BaseModel):
|
|
"""Admin unit for a point"""
|
|
province_code: Optional[str]
|
|
province_name: Optional[str]
|
|
municipality_code: Optional[str]
|
|
municipality_name: Optional[str]
|
|
|
|
|
|
class NearbyInstitution(BaseModel):
|
|
"""Institution with distance"""
|
|
ghcid: str
|
|
name: str
|
|
type: str
|
|
type_name: Optional[str]
|
|
distance_km: float
|
|
city: Optional[str]
|
|
province: Optional[str]
|
|
rating: Optional[float]
|
|
|
|
|
|
# ============================================================================
|
|
# Global State
|
|
# ============================================================================
|
|
|
|
_pool: Optional[asyncpg.Pool] = None
|
|
_start_time: datetime = datetime.now()
|
|
|
|
|
|
async def get_pool() -> asyncpg.Pool:
|
|
"""Get or create connection pool"""
|
|
global _pool
|
|
|
|
if _pool is None:
|
|
_pool = await asyncpg.create_pool(
|
|
host=settings.host,
|
|
port=settings.port,
|
|
database=settings.database,
|
|
user=settings.user,
|
|
password=settings.password,
|
|
min_size=2,
|
|
max_size=10,
|
|
)
|
|
|
|
return _pool
|
|
|
|
|
|
# ============================================================================
|
|
# FastAPI App
|
|
# ============================================================================
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Application lifespan handler"""
|
|
await get_pool()
|
|
yield
|
|
global _pool
|
|
if _pool:
|
|
await _pool.close()
|
|
_pool = None
|
|
|
|
|
|
app = FastAPI(
|
|
title="PostGIS Geo API",
|
|
description="Spatial REST API for heritage institution map",
|
|
version="1.0.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
# CORS middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# Helper Functions
|
|
# ============================================================================
|
|
|
|
def serialize_value(val: Any) -> Any:
|
|
"""Convert PostgreSQL values to JSON-serializable format"""
|
|
if val is None:
|
|
return None
|
|
elif isinstance(val, datetime):
|
|
return val.isoformat()
|
|
elif isinstance(val, Decimal):
|
|
return float(val)
|
|
elif isinstance(val, (dict, list)):
|
|
return val
|
|
elif isinstance(val, bytes):
|
|
return val.decode('utf-8', errors='replace')
|
|
else:
|
|
return val
|
|
|
|
|
|
def row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
|
"""Convert asyncpg row to dict with serialization"""
|
|
return {key: serialize_value(row[key]) for key in row.keys()}
|
|
|
|
|
|
# ============================================================================
|
|
# API Endpoints
|
|
# ============================================================================
|
|
|
|
@app.get("/", response_model=GeoStatsResponse)
|
|
async def get_geo_status() -> GeoStatsResponse:
|
|
"""Get geo database status and statistics"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
version = await conn.fetchval("SELECT version()")
|
|
provinces = await conn.fetchval("SELECT COUNT(*) FROM provinces")
|
|
municipalities = await conn.fetchval("SELECT COUNT(*) FROM municipalities")
|
|
institutions = await conn.fetchval("SELECT COUNT(*) FROM institutions")
|
|
historical = await conn.fetchval("SELECT COUNT(*) FROM historical_boundaries")
|
|
|
|
return GeoStatsResponse(
|
|
status="healthy",
|
|
database=settings.database,
|
|
provinces=provinces or 0,
|
|
municipalities=municipalities or 0,
|
|
institutions=institutions or 0,
|
|
historical_boundaries=historical or 0,
|
|
postgres_version=version.split(',')[0] if version else "unknown",
|
|
)
|
|
|
|
|
|
@app.get("/provinces")
|
|
async def get_provinces(
|
|
simplified: bool = Query(True, description="Return simplified geometries")
|
|
):
|
|
"""Get all provinces as GeoJSON FeatureCollection"""
|
|
pool = await get_pool()
|
|
|
|
tolerance = 0.001 if simplified else 0
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(f"""
|
|
SELECT
|
|
id, province_code, iso_code, name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
|
|
)::json as geometry,
|
|
ST_X(centroid) as centroid_lon,
|
|
ST_Y(centroid) as centroid_lat,
|
|
area_km2
|
|
FROM provinces
|
|
ORDER BY name
|
|
""")
|
|
|
|
features = []
|
|
for row in rows:
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": row['province_code'],
|
|
"geometry": row['geometry'],
|
|
"properties": {
|
|
"id": row['id'],
|
|
"province_code": row['province_code'],
|
|
"iso_code": row['iso_code'],
|
|
"name": row['name'],
|
|
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features
|
|
}
|
|
|
|
|
|
@app.get("/countries")
|
|
async def get_countries(
|
|
simplified: bool = Query(True, description="Return simplified geometries"),
|
|
with_counts: bool = Query(False, description="Include institution counts per country"),
|
|
):
|
|
"""Get all countries as GeoJSON FeatureCollection with optional institution counts"""
|
|
pool = await get_pool()
|
|
|
|
# Use more aggressive simplification for countries (world view)
|
|
tolerance = 0.01 if simplified else 0
|
|
|
|
async with pool.acquire() as conn:
|
|
if with_counts:
|
|
# Join with custodians to get counts per country
|
|
rows = await conn.fetch(f"""
|
|
SELECT
|
|
bc.id,
|
|
bc.iso_a2 as country_code,
|
|
bc.iso_a3,
|
|
bc.country_name as name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(bc.geom, ' + str(tolerance) + ')' if simplified else 'bc.geom'}
|
|
) as geometry,
|
|
ST_X(bc.centroid) as centroid_lon,
|
|
ST_Y(bc.centroid) as centroid_lat,
|
|
bc.area_km2,
|
|
COALESCE(counts.institution_count, 0) as institution_count
|
|
FROM boundary_countries bc
|
|
LEFT JOIN (
|
|
SELECT country_code, COUNT(*) as institution_count
|
|
FROM custodians
|
|
WHERE country_code IS NOT NULL
|
|
GROUP BY country_code
|
|
) counts ON bc.iso_a2 = counts.country_code
|
|
WHERE bc.geom IS NOT NULL
|
|
ORDER BY bc.country_name
|
|
""")
|
|
else:
|
|
rows = await conn.fetch(f"""
|
|
SELECT
|
|
id,
|
|
iso_a2 as country_code,
|
|
iso_a3,
|
|
country_name as name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
|
|
) as geometry,
|
|
ST_X(centroid) as centroid_lon,
|
|
ST_Y(centroid) as centroid_lat,
|
|
area_km2
|
|
FROM boundary_countries
|
|
WHERE geom IS NOT NULL
|
|
ORDER BY country_name
|
|
""")
|
|
|
|
features = []
|
|
total_institutions = 0
|
|
countries_with_data = 0
|
|
|
|
for row in rows:
|
|
# Parse geometry from string to dict (ST_AsGeoJSON returns text)
|
|
geometry = row['geometry']
|
|
if geometry is None:
|
|
# Skip countries with no geometry (e.g., Vatican City)
|
|
continue
|
|
if isinstance(geometry, str):
|
|
geometry = json.loads(geometry)
|
|
|
|
# Ensure geometry has required structure
|
|
if not isinstance(geometry, dict) or 'type' not in geometry or 'coordinates' not in geometry:
|
|
continue
|
|
|
|
iso_a2 = row['country_code'].strip() if row['country_code'] else None
|
|
iso_a3 = row['iso_a3'].strip() if row['iso_a3'] else None
|
|
institution_count = row['institution_count'] if with_counts else 0
|
|
|
|
# Track totals
|
|
if with_counts:
|
|
total_institutions += institution_count
|
|
if institution_count > 0:
|
|
countries_with_data += 1
|
|
|
|
# Build properties with frontend-expected field names
|
|
properties = {
|
|
"id": row['id'],
|
|
"iso_a2": iso_a2, # Frontend expects iso_a2
|
|
"iso_a3": iso_a3,
|
|
"name": row['name'],
|
|
"institution_count": institution_count,
|
|
"centroid": [
|
|
float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
],
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": iso_a2,
|
|
"geometry": geometry,
|
|
"properties": properties
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"count": len(features),
|
|
"total_institutions": total_institutions,
|
|
"countries_with_data": countries_with_data,
|
|
"type_filter": None,
|
|
"simplified": simplified,
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/municipalities")
|
|
async def get_municipalities(
|
|
province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH)"),
|
|
simplified: bool = Query(True, description="Return simplified geometries"),
|
|
limit: int = Query(500, ge=1, le=1000, description="Maximum results")
|
|
):
|
|
"""Get municipalities as GeoJSON FeatureCollection"""
|
|
pool = await get_pool()
|
|
|
|
tolerance = 0.0005 if simplified else 0
|
|
|
|
query = f"""
|
|
SELECT
|
|
m.id, m.municipality_code, m.name,
|
|
p.iso_code as province_iso, p.name as province_name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(m.geom, ' + str(tolerance) + ')' if simplified else 'm.geom'}
|
|
)::json as geometry,
|
|
ST_X(m.centroid) as centroid_lon,
|
|
ST_Y(m.centroid) as centroid_lat,
|
|
m.area_km2
|
|
FROM municipalities m
|
|
LEFT JOIN provinces p ON m.province_id = p.id
|
|
{'WHERE p.iso_code = $1' if province else ''}
|
|
ORDER BY m.name
|
|
LIMIT {'$2' if province else '$1'}
|
|
"""
|
|
|
|
async with pool.acquire() as conn:
|
|
if province:
|
|
rows = await conn.fetch(query, province.upper(), limit)
|
|
else:
|
|
rows = await conn.fetch(query, limit)
|
|
|
|
features = []
|
|
for row in rows:
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": row['municipality_code'],
|
|
"geometry": row['geometry'],
|
|
"properties": {
|
|
"id": row['id'],
|
|
"code": row['municipality_code'],
|
|
"name": row['name'],
|
|
"province_iso": row['province_iso'],
|
|
"province_name": row['province_name'],
|
|
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features
|
|
}
|
|
|
|
|
|
@app.get("/institutions")
|
|
async def get_institutions(
|
|
bbox: Optional[str] = Query(None, description="Bounding box: minLon,minLat,maxLon,maxLat"),
|
|
province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH, ZH)"),
|
|
country: Optional[str] = Query(None, description="Filter by country code (e.g., NL, DE, JP)"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T)"),
|
|
limit: int = Query(50000, ge=1, le=100000, description="Maximum results")
|
|
):
|
|
"""Get institutions as GeoJSON FeatureCollection with full metadata from custodians table"""
|
|
pool = await get_pool()
|
|
|
|
# Build WHERE clauses - query custodians table directly
|
|
conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
|
|
params = []
|
|
param_count = 0
|
|
|
|
if bbox:
|
|
try:
|
|
min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
|
|
param_count += 4
|
|
conditions.append(f"""
|
|
lon >= ${param_count-3} AND lat >= ${param_count-2}
|
|
AND lon <= ${param_count-1} AND lat <= ${param_count}
|
|
""")
|
|
params.extend([min_lon, min_lat, max_lon, max_lat])
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail="Invalid bbox format. Use: minLon,minLat,maxLon,maxLat")
|
|
|
|
if province:
|
|
param_count += 1
|
|
conditions.append(f"region_code = ${param_count}")
|
|
params.append(province.upper())
|
|
|
|
if type:
|
|
param_count += 1
|
|
conditions.append(f"type = ${param_count}")
|
|
params.append(type.upper())
|
|
|
|
if country:
|
|
param_count += 1
|
|
conditions.append(f"country_code = ${param_count}")
|
|
params.append(country.upper())
|
|
|
|
param_count += 1
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
# Query custodians table with all rich metadata fields
|
|
query = f"""
|
|
SELECT
|
|
ghcid,
|
|
name,
|
|
emic_name,
|
|
type,
|
|
type_name,
|
|
lon,
|
|
lat,
|
|
city,
|
|
region as province,
|
|
region_code as province_iso,
|
|
country_code,
|
|
formatted_address,
|
|
street_address,
|
|
postal_code,
|
|
rating,
|
|
total_ratings,
|
|
wikidata_id,
|
|
website,
|
|
phone,
|
|
email,
|
|
isil_code,
|
|
google_place_id,
|
|
description,
|
|
opening_hours,
|
|
reviews,
|
|
photos,
|
|
photo_urls,
|
|
business_status,
|
|
street_view_url,
|
|
founding_year,
|
|
dissolution_year,
|
|
temporal_extent,
|
|
museum_register,
|
|
youtube_channel_url,
|
|
youtube_subscriber_count,
|
|
youtube_video_count,
|
|
youtube_enrichment,
|
|
social_facebook,
|
|
social_twitter,
|
|
social_instagram,
|
|
wikidata_label_en,
|
|
wikidata_description_en
|
|
FROM custodians
|
|
WHERE {where_clause}
|
|
ORDER BY name
|
|
LIMIT ${param_count}
|
|
"""
|
|
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
features = []
|
|
for row in rows:
|
|
# Build properties with all available metadata
|
|
props = {
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"emic_name": row['emic_name'],
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"city": row['city'],
|
|
"province": row['province'],
|
|
"province_iso": row['province_iso'],
|
|
"country_code": row['country_code'],
|
|
"formatted_address": row['formatted_address'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
"total_ratings": row['total_ratings'],
|
|
"wikidata_id": row['wikidata_id'],
|
|
"website": row['website'],
|
|
"phone": row['phone'],
|
|
"email": row['email'],
|
|
"isil_code": row['isil_code'],
|
|
"google_place_id": row['google_place_id'],
|
|
"description": row['description'],
|
|
"business_status": row['business_status'],
|
|
"street_view_url": row['street_view_url'],
|
|
"founding_year": row['founding_year'],
|
|
"dissolution_year": row['dissolution_year'],
|
|
}
|
|
|
|
# Add JSONB fields (handle potential None values)
|
|
if row['opening_hours']:
|
|
props["opening_hours"] = row['opening_hours']
|
|
if row['reviews']:
|
|
props["reviews"] = row['reviews']
|
|
if row['photos']:
|
|
props["photos"] = row['photos']
|
|
if row['photo_urls']:
|
|
props["photo_urls"] = row['photo_urls']
|
|
if row['temporal_extent']:
|
|
props["temporal_extent"] = row['temporal_extent']
|
|
if row['museum_register']:
|
|
props["museum_register"] = row['museum_register']
|
|
if row['youtube_enrichment']:
|
|
props["youtube_enrichment"] = row['youtube_enrichment']
|
|
elif row['youtube_channel_url']:
|
|
# Build minimal YouTube data if enrichment not present
|
|
props["youtube"] = {
|
|
"channel_url": row['youtube_channel_url'],
|
|
"subscriber_count": row['youtube_subscriber_count'],
|
|
"video_count": row['youtube_video_count'],
|
|
}
|
|
|
|
# Social media
|
|
social = {}
|
|
if row['social_facebook']:
|
|
social['facebook'] = row['social_facebook']
|
|
if row['social_twitter']:
|
|
social['twitter'] = row['social_twitter']
|
|
if row['social_instagram']:
|
|
social['instagram'] = row['social_instagram']
|
|
if social:
|
|
props["social_media"] = social
|
|
|
|
# Wikidata labels
|
|
if row['wikidata_label_en']:
|
|
props["wikidata_label"] = row['wikidata_label_en']
|
|
if row['wikidata_description_en']:
|
|
props["wikidata_description"] = row['wikidata_description_en']
|
|
|
|
features.append({
|
|
"type": "Feature",
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [float(row['lon']), float(row['lat'])]
|
|
},
|
|
"properties": props
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"count": len(features),
|
|
"limit": limit,
|
|
"filters": {
|
|
"bbox": bbox,
|
|
"province": province,
|
|
"type": type
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/institution/{ghcid}")
|
|
async def get_institution(ghcid: str):
|
|
"""Get detailed information for a single institution with full metadata"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT
|
|
ghcid,
|
|
name,
|
|
emic_name,
|
|
verified_name,
|
|
type,
|
|
type_name,
|
|
lon,
|
|
lat,
|
|
city,
|
|
region as province,
|
|
region_code as province_iso,
|
|
country_code,
|
|
formatted_address,
|
|
street_address,
|
|
postal_code,
|
|
website,
|
|
phone,
|
|
email,
|
|
wikidata_id,
|
|
isil_code,
|
|
google_place_id,
|
|
rating,
|
|
total_ratings,
|
|
description,
|
|
business_status,
|
|
street_view_url,
|
|
google_maps_url,
|
|
opening_hours,
|
|
reviews,
|
|
photos,
|
|
photo_urls,
|
|
founding_year,
|
|
founding_date,
|
|
dissolution_year,
|
|
dissolution_date,
|
|
temporal_extent,
|
|
museum_register,
|
|
youtube_channel_id,
|
|
youtube_channel_url,
|
|
youtube_subscriber_count,
|
|
youtube_video_count,
|
|
youtube_view_count,
|
|
youtube_enrichment,
|
|
social_facebook,
|
|
social_twitter,
|
|
social_instagram,
|
|
social_linkedin,
|
|
social_youtube,
|
|
logo_url,
|
|
wikidata_label_nl,
|
|
wikidata_label_en,
|
|
wikidata_description_nl,
|
|
wikidata_description_en,
|
|
wikidata_types,
|
|
wikidata_inception,
|
|
wikidata_enrichment,
|
|
genealogiewerkbalk,
|
|
nan_isil_enrichment,
|
|
kb_enrichment,
|
|
zcbs_enrichment,
|
|
web_claims,
|
|
ghcid_uuid,
|
|
ghcid_numeric,
|
|
identifiers,
|
|
data_source,
|
|
data_tier,
|
|
provenance
|
|
FROM custodians
|
|
WHERE ghcid = $1
|
|
""", ghcid)
|
|
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail=f"Institution '{ghcid}' not found")
|
|
|
|
# Build comprehensive response with all metadata
|
|
result = {
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"emic_name": row['emic_name'],
|
|
"verified_name": row['verified_name'],
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"lat": float(row['lat']) if row['lat'] else None,
|
|
"lon": float(row['lon']) if row['lon'] else None,
|
|
"city": row['city'],
|
|
"province": row['province'],
|
|
"province_iso": row['province_iso'],
|
|
"country_code": row['country_code'],
|
|
"formatted_address": row['formatted_address'],
|
|
"street_address": row['street_address'],
|
|
"postal_code": row['postal_code'],
|
|
"website": row['website'],
|
|
"phone": row['phone'],
|
|
"email": row['email'],
|
|
"wikidata_id": row['wikidata_id'],
|
|
"isil_code": row['isil_code'],
|
|
"google_place_id": row['google_place_id'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
"total_ratings": row['total_ratings'],
|
|
"description": row['description'],
|
|
"business_status": row['business_status'],
|
|
"street_view_url": row['street_view_url'],
|
|
"google_maps_url": row['google_maps_url'],
|
|
}
|
|
|
|
# JSONB fields - only include if present
|
|
if row['opening_hours']:
|
|
result["opening_hours"] = row['opening_hours']
|
|
if row['reviews']:
|
|
result["reviews"] = row['reviews']
|
|
if row['photos']:
|
|
result["photos"] = row['photos']
|
|
if row['photo_urls']:
|
|
result["photo_urls"] = row['photo_urls']
|
|
if row['identifiers']:
|
|
result["identifiers"] = row['identifiers']
|
|
|
|
# Temporal data
|
|
temporal = {}
|
|
if row['founding_year']:
|
|
temporal["founding_year"] = row['founding_year']
|
|
if row['founding_date']:
|
|
temporal["founding_date"] = row['founding_date'].isoformat() if row['founding_date'] else None
|
|
if row['dissolution_year']:
|
|
temporal["dissolution_year"] = row['dissolution_year']
|
|
if row['dissolution_date']:
|
|
temporal["dissolution_date"] = row['dissolution_date'].isoformat() if row['dissolution_date'] else None
|
|
if row['temporal_extent']:
|
|
temporal["extent"] = row['temporal_extent']
|
|
if temporal:
|
|
result["temporal"] = temporal
|
|
|
|
# Museum register
|
|
if row['museum_register']:
|
|
result["museum_register"] = row['museum_register']
|
|
|
|
# YouTube enrichment
|
|
youtube = {}
|
|
if row['youtube_channel_id']:
|
|
youtube["channel_id"] = row['youtube_channel_id']
|
|
if row['youtube_channel_url']:
|
|
youtube["channel_url"] = row['youtube_channel_url']
|
|
if row['youtube_subscriber_count']:
|
|
youtube["subscriber_count"] = row['youtube_subscriber_count']
|
|
if row['youtube_video_count']:
|
|
youtube["video_count"] = row['youtube_video_count']
|
|
if row['youtube_view_count']:
|
|
youtube["view_count"] = row['youtube_view_count']
|
|
if row['youtube_enrichment']:
|
|
youtube["enrichment"] = row['youtube_enrichment']
|
|
if youtube:
|
|
result["youtube"] = youtube
|
|
|
|
# Social media
|
|
social = {}
|
|
if row['social_facebook']:
|
|
social["facebook"] = row['social_facebook']
|
|
if row['social_twitter']:
|
|
social["twitter"] = row['social_twitter']
|
|
if row['social_instagram']:
|
|
social["instagram"] = row['social_instagram']
|
|
if row['social_linkedin']:
|
|
social["linkedin"] = row['social_linkedin']
|
|
if row['social_youtube']:
|
|
social["youtube"] = row['social_youtube']
|
|
if social:
|
|
result["social_media"] = social
|
|
|
|
# Wikidata
|
|
wikidata = {}
|
|
if row['wikidata_label_nl']:
|
|
wikidata["label_nl"] = row['wikidata_label_nl']
|
|
if row['wikidata_label_en']:
|
|
wikidata["label_en"] = row['wikidata_label_en']
|
|
if row['wikidata_description_nl']:
|
|
wikidata["description_nl"] = row['wikidata_description_nl']
|
|
if row['wikidata_description_en']:
|
|
wikidata["description_en"] = row['wikidata_description_en']
|
|
if row['wikidata_types']:
|
|
wikidata["types"] = row['wikidata_types']
|
|
if row['wikidata_inception']:
|
|
wikidata["inception"] = row['wikidata_inception']
|
|
if row['wikidata_enrichment']:
|
|
wikidata["enrichment"] = row['wikidata_enrichment']
|
|
if wikidata:
|
|
result["wikidata"] = wikidata
|
|
|
|
# Logo
|
|
if row['logo_url']:
|
|
result["logo_url"] = row['logo_url']
|
|
|
|
# Other enrichment data
|
|
if row['genealogiewerkbalk']:
|
|
result["genealogiewerkbalk"] = row['genealogiewerkbalk']
|
|
if row['nan_isil_enrichment']:
|
|
result["nan_isil_enrichment"] = row['nan_isil_enrichment']
|
|
if row['kb_enrichment']:
|
|
result["kb_enrichment"] = row['kb_enrichment']
|
|
if row['zcbs_enrichment']:
|
|
result["zcbs_enrichment"] = row['zcbs_enrichment']
|
|
if row['web_claims']:
|
|
result["web_claims"] = row['web_claims']
|
|
|
|
# GHCID details
|
|
ghcid_data = {"current": row['ghcid']}
|
|
if row['ghcid_uuid']:
|
|
ghcid_data["uuid"] = str(row['ghcid_uuid'])
|
|
if row['ghcid_numeric']:
|
|
ghcid_data["numeric"] = int(row['ghcid_numeric'])
|
|
result["ghcid_details"] = ghcid_data
|
|
|
|
# Provenance
|
|
if row['data_source'] or row['data_tier'] or row['provenance']:
|
|
result["provenance"] = {
|
|
"data_source": row['data_source'],
|
|
"data_tier": row['data_tier'],
|
|
"details": row['provenance'],
|
|
}
|
|
|
|
return result
|
|
|
|
|
|
@app.get("/search")
|
|
async def search_institutions(
|
|
q: str = Query(..., min_length=2, description="Search query"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type"),
|
|
limit: int = Query(50, ge=1, le=200, description="Maximum results")
|
|
):
|
|
"""Search institutions by name"""
|
|
pool = await get_pool()
|
|
|
|
# Use PostgreSQL full-text search with trigram similarity
|
|
query = """
|
|
SELECT
|
|
i.ghcid_current as ghcid,
|
|
i.name,
|
|
i.institution_type as type,
|
|
i.type_name,
|
|
ST_X(i.geom) as lon,
|
|
ST_Y(i.geom) as lat,
|
|
i.city,
|
|
p.iso_code as province_iso,
|
|
i.rating,
|
|
ts_rank_cd(
|
|
to_tsvector('simple', i.name || ' ' || COALESCE(i.description, '')),
|
|
plainto_tsquery('simple', $1)
|
|
) as rank
|
|
FROM institutions i
|
|
LEFT JOIN provinces p ON i.province_id = p.id
|
|
WHERE to_tsvector('simple', i.name || ' ' || COALESCE(i.description, ''))
|
|
@@ plainto_tsquery('simple', $1)
|
|
"""
|
|
|
|
params = [q]
|
|
param_count = 1
|
|
|
|
if type:
|
|
param_count += 1
|
|
query += f" AND i.institution_type = ${param_count}"
|
|
params.append(type.upper())
|
|
|
|
param_count += 1
|
|
query += f" ORDER BY rank DESC, i.name LIMIT ${param_count}"
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
# If no results from FTS, try ILIKE fallback
|
|
if not rows:
|
|
fallback_query = f"""
|
|
SELECT
|
|
i.ghcid_current as ghcid,
|
|
i.name,
|
|
i.institution_type as type,
|
|
i.type_name,
|
|
ST_X(i.geom) as lon,
|
|
ST_Y(i.geom) as lat,
|
|
i.city,
|
|
p.iso_code as province_iso,
|
|
i.rating,
|
|
0 as rank
|
|
FROM institutions i
|
|
LEFT JOIN provinces p ON i.province_id = p.id
|
|
WHERE i.name ILIKE $1
|
|
{'AND i.institution_type = $2' if type else ''}
|
|
ORDER BY i.name
|
|
LIMIT {'$3' if type else '$2'}
|
|
"""
|
|
|
|
async with pool.acquire() as conn:
|
|
if type:
|
|
rows = await conn.fetch(fallback_query, f"%{q}%", type.upper(), limit)
|
|
else:
|
|
rows = await conn.fetch(fallback_query, f"%{q}%", limit)
|
|
|
|
return {
|
|
"query": q,
|
|
"count": len(rows),
|
|
"results": [
|
|
{
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"lon": float(row['lon']) if row['lon'] else None,
|
|
"lat": float(row['lat']) if row['lat'] else None,
|
|
"city": row['city'],
|
|
"province_iso": row['province_iso'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
}
|
|
for row in rows
|
|
]
|
|
}
|
|
|
|
|
|
@app.get("/nearby", response_model=List[NearbyInstitution])
|
|
async def find_nearby(
|
|
lon: float = Query(..., description="Longitude"),
|
|
lat: float = Query(..., description="Latitude"),
|
|
radius_km: float = Query(10, ge=0.1, le=100, description="Search radius in km"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type"),
|
|
limit: int = Query(50, ge=1, le=200, description="Maximum results")
|
|
):
|
|
"""Find institutions near a point"""
|
|
pool = await get_pool()
|
|
|
|
query = """
|
|
SELECT
|
|
i.ghcid_current as ghcid,
|
|
i.name,
|
|
i.institution_type as type,
|
|
i.type_name,
|
|
(ST_Distance(i.geom::geography, ST_SetSRID(ST_Point($1, $2), 4326)::geography) / 1000) as distance_km,
|
|
i.city,
|
|
p.name as province,
|
|
i.rating
|
|
FROM institutions i
|
|
LEFT JOIN provinces p ON i.province_id = p.id
|
|
WHERE ST_DWithin(
|
|
i.geom::geography,
|
|
ST_SetSRID(ST_Point($1, $2), 4326)::geography,
|
|
$3 * 1000
|
|
)
|
|
"""
|
|
|
|
params = [lon, lat, radius_km]
|
|
param_count = 3
|
|
|
|
if type:
|
|
param_count += 1
|
|
query += f" AND i.institution_type = ${param_count}"
|
|
params.append(type.upper())
|
|
|
|
param_count += 1
|
|
query += f" ORDER BY distance_km LIMIT ${param_count}"
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
return [
|
|
NearbyInstitution(
|
|
ghcid=row['ghcid'],
|
|
name=row['name'],
|
|
type=row['type'],
|
|
type_name=row['type_name'],
|
|
distance_km=round(float(row['distance_km']), 2),
|
|
city=row['city'],
|
|
province=row['province'],
|
|
rating=float(row['rating']) if row['rating'] else None,
|
|
)
|
|
for row in rows
|
|
]
|
|
|
|
|
|
@app.get("/admin/point", response_model=AdminPoint)
|
|
async def get_admin_for_point(
|
|
lon: float = Query(..., description="Longitude"),
|
|
lat: float = Query(..., description="Latitude")
|
|
):
|
|
"""Find which municipality/province contains a point"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT
|
|
p.province_code,
|
|
p.name as province_name,
|
|
m.municipality_code,
|
|
m.name as municipality_name
|
|
FROM municipalities m
|
|
JOIN provinces p ON m.province_id = p.id
|
|
WHERE ST_Contains(m.geom, ST_SetSRID(ST_Point($1, $2), 4326))
|
|
LIMIT 1
|
|
""", lon, lat)
|
|
|
|
if not row:
|
|
# Try province only
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT
|
|
province_code,
|
|
name as province_name,
|
|
NULL as municipality_code,
|
|
NULL as municipality_name
|
|
FROM provinces
|
|
WHERE ST_Contains(geom, ST_SetSRID(ST_Point($1, $2), 4326))
|
|
LIMIT 1
|
|
""", lon, lat)
|
|
|
|
if not row:
|
|
return AdminPoint(
|
|
province_code=None,
|
|
province_name=None,
|
|
municipality_code=None,
|
|
municipality_name=None
|
|
)
|
|
|
|
return AdminPoint(
|
|
province_code=row['province_code'],
|
|
province_name=row['province_name'],
|
|
municipality_code=row['municipality_code'],
|
|
municipality_name=row['municipality_name']
|
|
)
|
|
|
|
|
|
@app.get("/historical")
|
|
async def get_historical_boundaries(
|
|
year: int = Query(1500, description="Reference year"),
|
|
boundary_type: Optional[str] = Query(None, description="Boundary type filter"),
|
|
simplified: bool = Query(True, description="Return simplified geometries"),
|
|
limit: int = Query(1000, ge=1, le=10000, description="Maximum results")
|
|
):
|
|
"""Get historical boundaries as GeoJSON"""
|
|
pool = await get_pool()
|
|
|
|
tolerance = 0.001 if simplified else 0
|
|
|
|
conditions = ["reference_year = $1"]
|
|
params = [year]
|
|
param_count = 1
|
|
|
|
if boundary_type:
|
|
param_count += 1
|
|
conditions.append(f"boundary_type = ${param_count}")
|
|
params.append(boundary_type)
|
|
|
|
param_count += 1
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
query = f"""
|
|
SELECT
|
|
id, boundary_code, name, boundary_type, reference_year,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
|
|
)::json as geometry,
|
|
ST_X(centroid) as centroid_lon,
|
|
ST_Y(centroid) as centroid_lat,
|
|
area_km2
|
|
FROM historical_boundaries
|
|
WHERE {where_clause}
|
|
ORDER BY name
|
|
LIMIT ${param_count}
|
|
"""
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
features = []
|
|
for row in rows:
|
|
if row['geometry']:
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": row['boundary_code'],
|
|
"geometry": row['geometry'],
|
|
"properties": {
|
|
"id": row['id'],
|
|
"code": row['boundary_code'],
|
|
"name": row['name'],
|
|
"type": row['boundary_type'],
|
|
"year": row['reference_year'],
|
|
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"year": year,
|
|
"boundary_type": boundary_type,
|
|
"count": len(features)
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/stats/by-type")
|
|
async def get_stats_by_type():
|
|
"""Get institution counts by type"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch("""
|
|
SELECT
|
|
institution_type as type,
|
|
type_name,
|
|
COUNT(*) as count,
|
|
ROUND(AVG(rating)::numeric, 2) as avg_rating
|
|
FROM institutions
|
|
WHERE geom IS NOT NULL
|
|
GROUP BY institution_type, type_name
|
|
ORDER BY count DESC
|
|
""")
|
|
|
|
return {
|
|
"stats": [
|
|
{
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"count": row['count'],
|
|
"avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
|
|
}
|
|
for row in rows
|
|
]
|
|
}
|
|
|
|
|
|
@app.get("/stats/by-province")
|
|
async def get_stats_by_province():
|
|
"""Get institution counts by province"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch("""
|
|
SELECT
|
|
p.iso_code,
|
|
p.name as province_name,
|
|
COUNT(i.id) as count,
|
|
ROUND(AVG(i.rating)::numeric, 2) as avg_rating
|
|
FROM provinces p
|
|
LEFT JOIN institutions i ON i.province_id = p.id
|
|
GROUP BY p.id, p.iso_code, p.name
|
|
ORDER BY count DESC
|
|
""")
|
|
|
|
return {
|
|
"stats": [
|
|
{
|
|
"province_iso": row['iso_code'],
|
|
"province_name": row['province_name'],
|
|
"count": row['count'],
|
|
"avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
|
|
}
|
|
for row in rows
|
|
]
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Main
|
|
# ============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(
|
|
"geo_api:app",
|
|
host=settings.api_host,
|
|
port=settings.api_port,
|
|
reload=True,
|
|
)
|