2027 lines
69 KiB
Python
2027 lines
69 KiB
Python
"""
|
|
PostGIS Geo API for Heritage Custodian Map
|
|
FastAPI backend providing spatial queries for bronhouder.nl map
|
|
|
|
Mounted at /api/geo/ via Caddy reverse proxy.
|
|
|
|
Endpoints:
|
|
- GET / - Health check and geo statistics
|
|
- GET /countries - Get all countries as GeoJSON with institution counts
|
|
- GET /provinces - Get all provinces as GeoJSON
|
|
- GET /municipalities - Get municipalities (with filters)
|
|
- GET /institutions - Get institutions as GeoJSON (with bbox/type filters)
|
|
- GET /institution/:ghcid - Get single institution details
|
|
- GET /historical - Get historical boundaries
|
|
- GET /search - Search institutions by name
|
|
- GET /admin/point - Find admin unit for a point
|
|
- GET /nearby - Find institutions near a point
|
|
- GET /stats/by-type - Institution counts by type
|
|
- GET /stats/by-province - Institution counts by province
|
|
|
|
Person Endpoints (Beschermers):
|
|
- GET /persons - List persons with filters (custodian, type, country)
|
|
- GET /persons/count - Get total person count for stats
|
|
- GET /persons/search - Search persons by name/headline/custodian
|
|
- GET /person/:staff_id - Get single person details
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Optional, List, Dict, Any
|
|
from contextlib import asynccontextmanager
|
|
from decimal import Decimal
|
|
|
|
from fastapi import FastAPI, HTTPException, Query, APIRouter
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.middleware.gzip import GZipMiddleware
|
|
from fastapi.responses import JSONResponse
|
|
from pydantic import BaseModel, Field
|
|
import asyncpg
|
|
|
|
|
|
# ============================================================================
|
|
# Configuration
|
|
# ============================================================================
|
|
|
|
class GeoSettings(BaseModel):
|
|
"""PostGIS geo database settings - connects to glam_geo with PostGIS boundaries"""
|
|
host: str = os.getenv("GEO_POSTGRES_HOST", "localhost")
|
|
port: int = int(os.getenv("GEO_POSTGRES_PORT", "5432"))
|
|
database: str = os.getenv("GEO_POSTGRES_DB", "glam_geo") # glam_geo has boundary data
|
|
user: str = os.getenv("GEO_POSTGRES_USER", "glam_api")
|
|
password: str = os.getenv("GEO_POSTGRES_PASSWORD", "")
|
|
|
|
# Server settings
|
|
api_host: str = os.getenv("GEO_API_HOST", "0.0.0.0")
|
|
api_port: int = int(os.getenv("GEO_API_PORT", "8002"))
|
|
|
|
|
|
settings = GeoSettings()
|
|
|
|
|
|
# ============================================================================
|
|
# Pydantic Models
|
|
# ============================================================================
|
|
|
|
class GeoStatsResponse(BaseModel):
|
|
"""Geo database statistics"""
|
|
status: str
|
|
database: str
|
|
provinces: int
|
|
municipalities: int
|
|
institutions: int
|
|
historical_boundaries: int
|
|
postgres_version: str
|
|
|
|
|
|
class InstitutionDetail(BaseModel):
|
|
"""Detailed institution information"""
|
|
ghcid: str
|
|
name: str
|
|
type: str
|
|
type_name: Optional[str]
|
|
lat: Optional[float]
|
|
lon: Optional[float]
|
|
address: Optional[str]
|
|
city: Optional[str]
|
|
province: Optional[str]
|
|
website: Optional[str]
|
|
phone: Optional[str]
|
|
wikidata_id: Optional[str]
|
|
rating: Optional[float]
|
|
total_ratings: Optional[int]
|
|
description: Optional[str]
|
|
reviews: Optional[List[Dict]]
|
|
genealogiewerkbalk: Optional[Dict]
|
|
|
|
|
|
class AdminPoint(BaseModel):
|
|
"""Admin unit for a point"""
|
|
province_code: Optional[str]
|
|
province_name: Optional[str]
|
|
municipality_code: Optional[str]
|
|
municipality_name: Optional[str]
|
|
|
|
|
|
class NearbyInstitution(BaseModel):
|
|
"""Institution with distance"""
|
|
ghcid: str
|
|
name: str
|
|
type: str
|
|
type_name: Optional[str]
|
|
distance_km: float
|
|
city: Optional[str]
|
|
province: Optional[str]
|
|
rating: Optional[float]
|
|
|
|
|
|
class PersonSummary(BaseModel):
|
|
"""Summary person information for list views"""
|
|
staff_id: str
|
|
name: str
|
|
headline: Optional[str]
|
|
location: Optional[str]
|
|
country_code: Optional[str]
|
|
custodian_slug: Optional[str]
|
|
custodian_name: Optional[str]
|
|
linkedin_url: Optional[str]
|
|
profile_image_url: Optional[str]
|
|
heritage_relevant: bool = True
|
|
heritage_types: List[str] = []
|
|
|
|
|
|
class PersonDetail(BaseModel):
|
|
"""Detailed person information"""
|
|
staff_id: str
|
|
name: str
|
|
headline: Optional[str]
|
|
location: Optional[str]
|
|
country_code: Optional[str]
|
|
custodian_slug: Optional[str]
|
|
custodian_name: Optional[str]
|
|
linkedin_url: Optional[str]
|
|
profile_image_url: Optional[str]
|
|
heritage_relevant: bool = True
|
|
heritage_types: List[str] = []
|
|
experience: List[Dict] = []
|
|
education: List[Dict] = []
|
|
skills: List[str] = []
|
|
languages: List[Dict] = []
|
|
about: Optional[str]
|
|
connections: Optional[str]
|
|
extraction_date: Optional[str]
|
|
extraction_method: Optional[str]
|
|
source_file: Optional[str]
|
|
|
|
|
|
# ============================================================================
|
|
# Global State
|
|
# ============================================================================
|
|
|
|
_pool: Optional[asyncpg.Pool] = None
|
|
_start_time: datetime = datetime.now()
|
|
|
|
|
|
async def get_pool() -> asyncpg.Pool:
|
|
"""Get or create connection pool"""
|
|
global _pool
|
|
|
|
if _pool is None:
|
|
_pool = await asyncpg.create_pool(
|
|
host=settings.host,
|
|
port=settings.port,
|
|
database=settings.database,
|
|
user=settings.user,
|
|
password=settings.password,
|
|
min_size=2,
|
|
max_size=10,
|
|
)
|
|
|
|
return _pool
|
|
|
|
|
|
# ============================================================================
|
|
# FastAPI App
|
|
# ============================================================================
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Application lifespan handler"""
|
|
await get_pool()
|
|
yield
|
|
global _pool
|
|
if _pool:
|
|
await _pool.close()
|
|
_pool = None
|
|
|
|
|
|
app = FastAPI(
|
|
title="PostGIS Geo API",
|
|
description="Spatial REST API for heritage institution map",
|
|
version="1.0.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
# CORS middleware
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# GZip compression middleware - compresses responses >1KB
|
|
# Reduces ~126MB JSON payload to ~20-30MB (70-80% reduction)
|
|
app.add_middleware(GZipMiddleware, minimum_size=1000)
|
|
|
|
|
|
# ============================================================================
|
|
# Helper Functions
|
|
# ============================================================================
|
|
|
|
def serialize_value(val: Any) -> Any:
|
|
"""Convert PostgreSQL values to JSON-serializable format"""
|
|
if val is None:
|
|
return None
|
|
elif isinstance(val, datetime):
|
|
return val.isoformat()
|
|
elif isinstance(val, Decimal):
|
|
return float(val)
|
|
elif isinstance(val, (dict, list)):
|
|
return val
|
|
elif isinstance(val, bytes):
|
|
return val.decode('utf-8', errors='replace')
|
|
else:
|
|
return val
|
|
|
|
|
|
def row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
|
|
"""Convert asyncpg row to dict with serialization"""
|
|
return {key: serialize_value(row[key]) for key in row.keys()}
|
|
|
|
|
|
# ============================================================================
|
|
# API Endpoints
|
|
# ============================================================================
|
|
|
|
@app.get("/", response_model=GeoStatsResponse)
|
|
async def get_geo_status() -> GeoStatsResponse:
|
|
"""Get geo database status and statistics"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
version = await conn.fetchval("SELECT version()")
|
|
provinces = await conn.fetchval("SELECT COUNT(*) FROM provinces")
|
|
municipalities = await conn.fetchval("SELECT COUNT(*) FROM municipalities")
|
|
institutions = await conn.fetchval("SELECT COUNT(*) FROM institutions")
|
|
historical = await conn.fetchval("SELECT COUNT(*) FROM historical_boundaries")
|
|
|
|
return GeoStatsResponse(
|
|
status="healthy",
|
|
database=settings.database,
|
|
provinces=provinces or 0,
|
|
municipalities=municipalities or 0,
|
|
institutions=institutions or 0,
|
|
historical_boundaries=historical or 0,
|
|
postgres_version=version.split(',')[0] if version else "unknown",
|
|
)
|
|
|
|
|
|
@app.get("/provinces")
|
|
async def get_provinces(
|
|
simplified: bool = Query(True, description="Return simplified geometries")
|
|
):
|
|
"""Get all provinces as GeoJSON FeatureCollection"""
|
|
pool = await get_pool()
|
|
|
|
tolerance = 0.001 if simplified else 0
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(f"""
|
|
SELECT
|
|
id, province_code, iso_code, name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
|
|
)::json as geometry,
|
|
ST_X(centroid) as centroid_lon,
|
|
ST_Y(centroid) as centroid_lat,
|
|
area_km2
|
|
FROM provinces
|
|
ORDER BY name
|
|
""")
|
|
|
|
features = []
|
|
for row in rows:
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": row['province_code'],
|
|
"geometry": row['geometry'],
|
|
"properties": {
|
|
"id": row['id'],
|
|
"province_code": row['province_code'],
|
|
"iso_code": row['iso_code'],
|
|
"name": row['name'],
|
|
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features
|
|
}
|
|
|
|
|
|
@app.get("/countries")
|
|
async def get_countries(
|
|
simplified: bool = Query(True, description="Return simplified geometries"),
|
|
with_counts: bool = Query(False, description="Include institution counts per country"),
|
|
):
|
|
"""Get all countries as GeoJSON FeatureCollection with optional institution counts"""
|
|
pool = await get_pool()
|
|
|
|
# Use more aggressive simplification for countries (world view)
|
|
tolerance = 0.01 if simplified else 0
|
|
|
|
async with pool.acquire() as conn:
|
|
if with_counts:
|
|
# Join with custodians to get counts per country
|
|
rows = await conn.fetch(f"""
|
|
SELECT
|
|
bc.id,
|
|
bc.iso_a2 as country_code,
|
|
bc.iso_a3,
|
|
bc.country_name as name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(bc.geom, ' + str(tolerance) + ')' if simplified else 'bc.geom'}
|
|
) as geometry,
|
|
ST_X(bc.centroid) as centroid_lon,
|
|
ST_Y(bc.centroid) as centroid_lat,
|
|
bc.area_km2,
|
|
COALESCE(counts.institution_count, 0) as institution_count
|
|
FROM boundary_countries bc
|
|
LEFT JOIN (
|
|
SELECT country_code, COUNT(*) as institution_count
|
|
FROM custodians
|
|
WHERE country_code IS NOT NULL
|
|
GROUP BY country_code
|
|
) counts ON bc.iso_a2 = counts.country_code
|
|
WHERE bc.geom IS NOT NULL
|
|
ORDER BY bc.country_name
|
|
""")
|
|
else:
|
|
rows = await conn.fetch(f"""
|
|
SELECT
|
|
id,
|
|
iso_a2 as country_code,
|
|
iso_a3,
|
|
country_name as name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
|
|
) as geometry,
|
|
ST_X(centroid) as centroid_lon,
|
|
ST_Y(centroid) as centroid_lat,
|
|
area_km2
|
|
FROM boundary_countries
|
|
WHERE geom IS NOT NULL
|
|
ORDER BY country_name
|
|
""")
|
|
|
|
features = []
|
|
total_institutions = 0
|
|
countries_with_data = 0
|
|
|
|
for row in rows:
|
|
# Parse geometry from string to dict (ST_AsGeoJSON returns text)
|
|
geometry = row['geometry']
|
|
if geometry is None:
|
|
# Skip countries with no geometry (e.g., Vatican City)
|
|
continue
|
|
if isinstance(geometry, str):
|
|
geometry = json.loads(geometry)
|
|
|
|
# Ensure geometry has required structure
|
|
if not isinstance(geometry, dict) or 'type' not in geometry or 'coordinates' not in geometry:
|
|
continue
|
|
|
|
iso_a2 = row['country_code'].strip() if row['country_code'] else None
|
|
iso_a3 = row['iso_a3'].strip() if row['iso_a3'] else None
|
|
institution_count = row['institution_count'] if with_counts else 0
|
|
|
|
# Track totals
|
|
if with_counts:
|
|
total_institutions += institution_count
|
|
if institution_count > 0:
|
|
countries_with_data += 1
|
|
|
|
# Build properties with frontend-expected field names
|
|
properties = {
|
|
"id": row['id'],
|
|
"iso_a2": iso_a2, # Frontend expects iso_a2
|
|
"iso_a3": iso_a3,
|
|
"name": row['name'],
|
|
"institution_count": institution_count,
|
|
"centroid": [
|
|
float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
],
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": iso_a2,
|
|
"geometry": geometry,
|
|
"properties": properties
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"count": len(features),
|
|
"total_institutions": total_institutions,
|
|
"countries_with_data": countries_with_data,
|
|
"type_filter": None,
|
|
"simplified": simplified,
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/municipalities")
|
|
async def get_municipalities(
|
|
province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH)"),
|
|
simplified: bool = Query(True, description="Return simplified geometries"),
|
|
limit: int = Query(500, ge=1, le=1000, description="Maximum results")
|
|
):
|
|
"""Get municipalities as GeoJSON FeatureCollection"""
|
|
pool = await get_pool()
|
|
|
|
tolerance = 0.0005 if simplified else 0
|
|
|
|
query = f"""
|
|
SELECT
|
|
m.id, m.municipality_code, m.name,
|
|
p.iso_code as province_iso, p.name as province_name,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(m.geom, ' + str(tolerance) + ')' if simplified else 'm.geom'}
|
|
)::json as geometry,
|
|
ST_X(m.centroid) as centroid_lon,
|
|
ST_Y(m.centroid) as centroid_lat,
|
|
m.area_km2
|
|
FROM municipalities m
|
|
LEFT JOIN provinces p ON m.province_id = p.id
|
|
{'WHERE p.iso_code = $1' if province else ''}
|
|
ORDER BY m.name
|
|
LIMIT {'$2' if province else '$1'}
|
|
"""
|
|
|
|
async with pool.acquire() as conn:
|
|
if province:
|
|
rows = await conn.fetch(query, province.upper(), limit)
|
|
else:
|
|
rows = await conn.fetch(query, limit)
|
|
|
|
features = []
|
|
for row in rows:
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": row['municipality_code'],
|
|
"geometry": row['geometry'],
|
|
"properties": {
|
|
"id": row['id'],
|
|
"code": row['municipality_code'],
|
|
"name": row['name'],
|
|
"province_iso": row['province_iso'],
|
|
"province_name": row['province_name'],
|
|
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features
|
|
}
|
|
|
|
|
|
@app.get("/institutions")
|
|
async def get_institutions(
|
|
bbox: Optional[str] = Query(None, description="Bounding box: minLon,minLat,maxLon,maxLat"),
|
|
province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH, ZH)"),
|
|
country: Optional[str] = Query(None, description="Filter by country code (e.g., NL, DE, JP)"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T)"),
|
|
limit: int = Query(50000, ge=1, le=100000, description="Maximum results")
|
|
):
|
|
"""Get institutions as GeoJSON FeatureCollection with full metadata from custodians table"""
|
|
pool = await get_pool()
|
|
|
|
# Build WHERE clauses - query custodians table directly
|
|
conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
|
|
params = []
|
|
param_count = 0
|
|
|
|
if bbox:
|
|
try:
|
|
min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
|
|
param_count += 4
|
|
conditions.append(f"""
|
|
lon >= ${param_count-3} AND lat >= ${param_count-2}
|
|
AND lon <= ${param_count-1} AND lat <= ${param_count}
|
|
""")
|
|
params.extend([min_lon, min_lat, max_lon, max_lat])
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail="Invalid bbox format. Use: minLon,minLat,maxLon,maxLat")
|
|
|
|
if province:
|
|
param_count += 1
|
|
conditions.append(f"region_code = ${param_count}")
|
|
params.append(province.upper())
|
|
|
|
if type:
|
|
param_count += 1
|
|
conditions.append(f"type = ${param_count}")
|
|
params.append(type.upper())
|
|
|
|
if country:
|
|
param_count += 1
|
|
conditions.append(f"country_code = ${param_count}")
|
|
params.append(country.upper())
|
|
|
|
param_count += 1
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
# Query custodians table with all rich metadata fields
|
|
query = f"""
|
|
SELECT
|
|
ghcid,
|
|
name,
|
|
emic_name,
|
|
type,
|
|
type_name,
|
|
lon,
|
|
lat,
|
|
city,
|
|
region as province,
|
|
region_code as province_iso,
|
|
country_code,
|
|
formatted_address,
|
|
street_address,
|
|
postal_code,
|
|
rating,
|
|
total_ratings,
|
|
wikidata_id,
|
|
website,
|
|
phone,
|
|
email,
|
|
isil_code,
|
|
google_place_id,
|
|
description,
|
|
opening_hours,
|
|
reviews,
|
|
photos,
|
|
photo_urls,
|
|
business_status,
|
|
street_view_url,
|
|
founding_year,
|
|
dissolution_year,
|
|
temporal_extent,
|
|
museum_register,
|
|
youtube_channel_url,
|
|
youtube_subscriber_count,
|
|
youtube_video_count,
|
|
youtube_enrichment,
|
|
social_facebook,
|
|
social_twitter,
|
|
social_instagram,
|
|
wikidata_label_en,
|
|
wikidata_description_en,
|
|
logo_url,
|
|
web_claims
|
|
FROM custodians
|
|
WHERE {where_clause}
|
|
ORDER BY name
|
|
LIMIT ${param_count}
|
|
"""
|
|
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
features = []
|
|
for row in rows:
|
|
# Build properties with all available metadata
|
|
props = {
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"emic_name": row['emic_name'],
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"city": row['city'],
|
|
"province": row['province'],
|
|
"province_iso": row['province_iso'],
|
|
"country_code": row['country_code'],
|
|
"formatted_address": row['formatted_address'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
"total_ratings": row['total_ratings'],
|
|
"wikidata_id": row['wikidata_id'],
|
|
"website": row['website'],
|
|
"phone": row['phone'],
|
|
"email": row['email'],
|
|
"isil_code": row['isil_code'],
|
|
"google_place_id": row['google_place_id'],
|
|
"description": row['description'],
|
|
"business_status": row['business_status'],
|
|
"street_view_url": row['street_view_url'],
|
|
"founding_year": row['founding_year'],
|
|
"dissolution_year": row['dissolution_year'],
|
|
}
|
|
|
|
# Add JSONB fields (handle potential None values)
|
|
if row['opening_hours']:
|
|
props["opening_hours"] = row['opening_hours']
|
|
if row['reviews']:
|
|
props["reviews"] = row['reviews']
|
|
if row['photos']:
|
|
props["photos"] = row['photos']
|
|
if row['photo_urls']:
|
|
props["photo_urls"] = row['photo_urls']
|
|
if row['temporal_extent']:
|
|
props["temporal_extent"] = row['temporal_extent']
|
|
if row['museum_register']:
|
|
props["museum_register"] = row['museum_register']
|
|
if row['youtube_enrichment']:
|
|
props["youtube_enrichment"] = row['youtube_enrichment']
|
|
elif row['youtube_channel_url']:
|
|
# Build minimal YouTube data if enrichment not present
|
|
props["youtube"] = {
|
|
"channel_url": row['youtube_channel_url'],
|
|
"subscriber_count": row['youtube_subscriber_count'],
|
|
"video_count": row['youtube_video_count'],
|
|
}
|
|
|
|
# Social media
|
|
social = {}
|
|
if row['social_facebook']:
|
|
social['facebook'] = row['social_facebook']
|
|
if row['social_twitter']:
|
|
social['twitter'] = row['social_twitter']
|
|
if row['social_instagram']:
|
|
social['instagram'] = row['social_instagram']
|
|
if social:
|
|
props["social_media"] = social
|
|
|
|
# Wikidata labels
|
|
if row['wikidata_label_en']:
|
|
props["wikidata_label"] = row['wikidata_label_en']
|
|
if row['wikidata_description_en']:
|
|
props["wikidata_description"] = row['wikidata_description_en']
|
|
|
|
# Logo URL from web claims extraction
|
|
if row['logo_url']:
|
|
props["logo_url"] = row['logo_url']
|
|
|
|
# Web claims (financial documents, etc.)
|
|
if row['web_claims']:
|
|
props["web_claims"] = row['web_claims']
|
|
|
|
features.append({
|
|
"type": "Feature",
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [float(row['lon']), float(row['lat'])]
|
|
},
|
|
"properties": props
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"count": len(features),
|
|
"limit": limit,
|
|
"filters": {
|
|
"bbox": bbox,
|
|
"province": province,
|
|
"type": type
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/institution/{ghcid}")
|
|
async def get_institution(ghcid: str):
|
|
"""Get detailed information for a single institution with full metadata"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT
|
|
ghcid,
|
|
name,
|
|
emic_name,
|
|
verified_name,
|
|
type,
|
|
type_name,
|
|
lon,
|
|
lat,
|
|
city,
|
|
region as province,
|
|
region_code as province_iso,
|
|
country_code,
|
|
formatted_address,
|
|
street_address,
|
|
postal_code,
|
|
website,
|
|
phone,
|
|
email,
|
|
wikidata_id,
|
|
isil_code,
|
|
google_place_id,
|
|
rating,
|
|
total_ratings,
|
|
description,
|
|
business_status,
|
|
street_view_url,
|
|
google_maps_url,
|
|
opening_hours,
|
|
reviews,
|
|
photos,
|
|
photo_urls,
|
|
founding_year,
|
|
founding_date,
|
|
dissolution_year,
|
|
dissolution_date,
|
|
temporal_extent,
|
|
museum_register,
|
|
youtube_channel_id,
|
|
youtube_channel_url,
|
|
youtube_subscriber_count,
|
|
youtube_video_count,
|
|
youtube_view_count,
|
|
youtube_enrichment,
|
|
social_facebook,
|
|
social_twitter,
|
|
social_instagram,
|
|
social_linkedin,
|
|
social_youtube,
|
|
logo_url,
|
|
wikidata_label_nl,
|
|
wikidata_label_en,
|
|
wikidata_description_nl,
|
|
wikidata_description_en,
|
|
wikidata_types,
|
|
wikidata_inception,
|
|
wikidata_enrichment,
|
|
genealogiewerkbalk,
|
|
nan_isil_enrichment,
|
|
kb_enrichment,
|
|
zcbs_enrichment,
|
|
web_claims,
|
|
ghcid_uuid,
|
|
ghcid_numeric,
|
|
identifiers,
|
|
data_source,
|
|
data_tier,
|
|
provenance
|
|
FROM custodians
|
|
WHERE ghcid = $1
|
|
""", ghcid)
|
|
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail=f"Institution '{ghcid}' not found")
|
|
|
|
# Build comprehensive response with all metadata
|
|
result = {
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"emic_name": row['emic_name'],
|
|
"verified_name": row['verified_name'],
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"lat": float(row['lat']) if row['lat'] else None,
|
|
"lon": float(row['lon']) if row['lon'] else None,
|
|
"city": row['city'],
|
|
"province": row['province'],
|
|
"province_iso": row['province_iso'],
|
|
"country_code": row['country_code'],
|
|
"formatted_address": row['formatted_address'],
|
|
"street_address": row['street_address'],
|
|
"postal_code": row['postal_code'],
|
|
"website": row['website'],
|
|
"phone": row['phone'],
|
|
"email": row['email'],
|
|
"wikidata_id": row['wikidata_id'],
|
|
"isil_code": row['isil_code'],
|
|
"google_place_id": row['google_place_id'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
"total_ratings": row['total_ratings'],
|
|
"description": row['description'],
|
|
"business_status": row['business_status'],
|
|
"street_view_url": row['street_view_url'],
|
|
"google_maps_url": row['google_maps_url'],
|
|
}
|
|
|
|
# JSONB fields - only include if present
|
|
if row['opening_hours']:
|
|
result["opening_hours"] = row['opening_hours']
|
|
if row['reviews']:
|
|
result["reviews"] = row['reviews']
|
|
if row['photos']:
|
|
result["photos"] = row['photos']
|
|
if row['photo_urls']:
|
|
result["photo_urls"] = row['photo_urls']
|
|
if row['identifiers']:
|
|
result["identifiers"] = row['identifiers']
|
|
|
|
# Temporal data
|
|
temporal = {}
|
|
if row['founding_year']:
|
|
temporal["founding_year"] = row['founding_year']
|
|
if row['founding_date']:
|
|
temporal["founding_date"] = row['founding_date'].isoformat() if row['founding_date'] else None
|
|
if row['dissolution_year']:
|
|
temporal["dissolution_year"] = row['dissolution_year']
|
|
if row['dissolution_date']:
|
|
temporal["dissolution_date"] = row['dissolution_date'].isoformat() if row['dissolution_date'] else None
|
|
if row['temporal_extent']:
|
|
temporal["extent"] = row['temporal_extent']
|
|
if temporal:
|
|
result["temporal"] = temporal
|
|
|
|
# Museum register
|
|
if row['museum_register']:
|
|
result["museum_register"] = row['museum_register']
|
|
|
|
# YouTube enrichment
|
|
youtube = {}
|
|
if row['youtube_channel_id']:
|
|
youtube["channel_id"] = row['youtube_channel_id']
|
|
if row['youtube_channel_url']:
|
|
youtube["channel_url"] = row['youtube_channel_url']
|
|
if row['youtube_subscriber_count']:
|
|
youtube["subscriber_count"] = row['youtube_subscriber_count']
|
|
if row['youtube_video_count']:
|
|
youtube["video_count"] = row['youtube_video_count']
|
|
if row['youtube_view_count']:
|
|
youtube["view_count"] = row['youtube_view_count']
|
|
if row['youtube_enrichment']:
|
|
youtube["enrichment"] = row['youtube_enrichment']
|
|
if youtube:
|
|
result["youtube"] = youtube
|
|
|
|
# Social media
|
|
social = {}
|
|
if row['social_facebook']:
|
|
social["facebook"] = row['social_facebook']
|
|
if row['social_twitter']:
|
|
social["twitter"] = row['social_twitter']
|
|
if row['social_instagram']:
|
|
social["instagram"] = row['social_instagram']
|
|
if row['social_linkedin']:
|
|
social["linkedin"] = row['social_linkedin']
|
|
if row['social_youtube']:
|
|
social["youtube"] = row['social_youtube']
|
|
if social:
|
|
result["social_media"] = social
|
|
|
|
# Wikidata
|
|
wikidata = {}
|
|
if row['wikidata_label_nl']:
|
|
wikidata["label_nl"] = row['wikidata_label_nl']
|
|
if row['wikidata_label_en']:
|
|
wikidata["label_en"] = row['wikidata_label_en']
|
|
if row['wikidata_description_nl']:
|
|
wikidata["description_nl"] = row['wikidata_description_nl']
|
|
if row['wikidata_description_en']:
|
|
wikidata["description_en"] = row['wikidata_description_en']
|
|
if row['wikidata_types']:
|
|
wikidata["types"] = row['wikidata_types']
|
|
if row['wikidata_inception']:
|
|
wikidata["inception"] = row['wikidata_inception']
|
|
if row['wikidata_enrichment']:
|
|
wikidata["enrichment"] = row['wikidata_enrichment']
|
|
if wikidata:
|
|
result["wikidata"] = wikidata
|
|
|
|
# Logo
|
|
if row['logo_url']:
|
|
result["logo_url"] = row['logo_url']
|
|
|
|
# Other enrichment data
|
|
if row['genealogiewerkbalk']:
|
|
result["genealogiewerkbalk"] = row['genealogiewerkbalk']
|
|
if row['nan_isil_enrichment']:
|
|
result["nan_isil_enrichment"] = row['nan_isil_enrichment']
|
|
if row['kb_enrichment']:
|
|
result["kb_enrichment"] = row['kb_enrichment']
|
|
if row['zcbs_enrichment']:
|
|
result["zcbs_enrichment"] = row['zcbs_enrichment']
|
|
if row['web_claims']:
|
|
result["web_claims"] = row['web_claims']
|
|
|
|
# GHCID details
|
|
ghcid_data = {"current": row['ghcid']}
|
|
if row['ghcid_uuid']:
|
|
ghcid_data["uuid"] = str(row['ghcid_uuid'])
|
|
if row['ghcid_numeric']:
|
|
ghcid_data["numeric"] = int(row['ghcid_numeric'])
|
|
result["ghcid_details"] = ghcid_data
|
|
|
|
# Provenance
|
|
if row['data_source'] or row['data_tier'] or row['provenance']:
|
|
result["provenance"] = {
|
|
"data_source": row['data_source'],
|
|
"data_tier": row['data_tier'],
|
|
"details": row['provenance'],
|
|
}
|
|
|
|
return result
|
|
|
|
|
|
@app.get("/search")
|
|
async def search_institutions(
|
|
q: str = Query(..., min_length=2, description="Search query"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type"),
|
|
limit: int = Query(50, ge=1, le=200, description="Maximum results")
|
|
):
|
|
"""Search institutions by name"""
|
|
pool = await get_pool()
|
|
|
|
# Use PostgreSQL full-text search with trigram similarity
|
|
query = """
|
|
SELECT
|
|
i.ghcid_current as ghcid,
|
|
i.name,
|
|
i.institution_type as type,
|
|
i.type_name,
|
|
ST_X(i.geom) as lon,
|
|
ST_Y(i.geom) as lat,
|
|
i.city,
|
|
p.iso_code as province_iso,
|
|
i.rating,
|
|
ts_rank_cd(
|
|
to_tsvector('simple', i.name || ' ' || COALESCE(i.description, '')),
|
|
plainto_tsquery('simple', $1)
|
|
) as rank
|
|
FROM institutions i
|
|
LEFT JOIN provinces p ON i.province_id = p.id
|
|
WHERE to_tsvector('simple', i.name || ' ' || COALESCE(i.description, ''))
|
|
@@ plainto_tsquery('simple', $1)
|
|
"""
|
|
|
|
params = [q]
|
|
param_count = 1
|
|
|
|
if type:
|
|
param_count += 1
|
|
query += f" AND i.institution_type = ${param_count}"
|
|
params.append(type.upper())
|
|
|
|
param_count += 1
|
|
query += f" ORDER BY rank DESC, i.name LIMIT ${param_count}"
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
# If no results from FTS, try ILIKE fallback
|
|
if not rows:
|
|
fallback_query = f"""
|
|
SELECT
|
|
i.ghcid_current as ghcid,
|
|
i.name,
|
|
i.institution_type as type,
|
|
i.type_name,
|
|
ST_X(i.geom) as lon,
|
|
ST_Y(i.geom) as lat,
|
|
i.city,
|
|
p.iso_code as province_iso,
|
|
i.rating,
|
|
0 as rank
|
|
FROM institutions i
|
|
LEFT JOIN provinces p ON i.province_id = p.id
|
|
WHERE i.name ILIKE $1
|
|
{'AND i.institution_type = $2' if type else ''}
|
|
ORDER BY i.name
|
|
LIMIT {'$3' if type else '$2'}
|
|
"""
|
|
|
|
async with pool.acquire() as conn:
|
|
if type:
|
|
rows = await conn.fetch(fallback_query, f"%{q}%", type.upper(), limit)
|
|
else:
|
|
rows = await conn.fetch(fallback_query, f"%{q}%", limit)
|
|
|
|
return {
|
|
"query": q,
|
|
"count": len(rows),
|
|
"results": [
|
|
{
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"lon": float(row['lon']) if row['lon'] else None,
|
|
"lat": float(row['lat']) if row['lat'] else None,
|
|
"city": row['city'],
|
|
"province_iso": row['province_iso'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
}
|
|
for row in rows
|
|
]
|
|
}
|
|
|
|
|
|
@app.get("/nearby", response_model=List[NearbyInstitution])
|
|
async def find_nearby(
|
|
lon: float = Query(..., description="Longitude"),
|
|
lat: float = Query(..., description="Latitude"),
|
|
radius_km: float = Query(10, ge=0.1, le=100, description="Search radius in km"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type"),
|
|
limit: int = Query(50, ge=1, le=200, description="Maximum results")
|
|
):
|
|
"""Find institutions near a point"""
|
|
pool = await get_pool()
|
|
|
|
query = """
|
|
SELECT
|
|
i.ghcid_current as ghcid,
|
|
i.name,
|
|
i.institution_type as type,
|
|
i.type_name,
|
|
(ST_Distance(i.geom::geography, ST_SetSRID(ST_Point($1, $2), 4326)::geography) / 1000) as distance_km,
|
|
i.city,
|
|
p.name as province,
|
|
i.rating
|
|
FROM institutions i
|
|
LEFT JOIN provinces p ON i.province_id = p.id
|
|
WHERE ST_DWithin(
|
|
i.geom::geography,
|
|
ST_SetSRID(ST_Point($1, $2), 4326)::geography,
|
|
$3 * 1000
|
|
)
|
|
"""
|
|
|
|
params = [lon, lat, radius_km]
|
|
param_count = 3
|
|
|
|
if type:
|
|
param_count += 1
|
|
query += f" AND i.institution_type = ${param_count}"
|
|
params.append(type.upper())
|
|
|
|
param_count += 1
|
|
query += f" ORDER BY distance_km LIMIT ${param_count}"
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
return [
|
|
NearbyInstitution(
|
|
ghcid=row['ghcid'],
|
|
name=row['name'],
|
|
type=row['type'],
|
|
type_name=row['type_name'],
|
|
distance_km=round(float(row['distance_km']), 2),
|
|
city=row['city'],
|
|
province=row['province'],
|
|
rating=float(row['rating']) if row['rating'] else None,
|
|
)
|
|
for row in rows
|
|
]
|
|
|
|
|
|
@app.get("/admin/point", response_model=AdminPoint)
|
|
async def get_admin_for_point(
|
|
lon: float = Query(..., description="Longitude"),
|
|
lat: float = Query(..., description="Latitude")
|
|
):
|
|
"""Find which municipality/province contains a point"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT
|
|
p.province_code,
|
|
p.name as province_name,
|
|
m.municipality_code,
|
|
m.name as municipality_name
|
|
FROM municipalities m
|
|
JOIN provinces p ON m.province_id = p.id
|
|
WHERE ST_Contains(m.geom, ST_SetSRID(ST_Point($1, $2), 4326))
|
|
LIMIT 1
|
|
""", lon, lat)
|
|
|
|
if not row:
|
|
# Try province only
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow("""
|
|
SELECT
|
|
province_code,
|
|
name as province_name,
|
|
NULL as municipality_code,
|
|
NULL as municipality_name
|
|
FROM provinces
|
|
WHERE ST_Contains(geom, ST_SetSRID(ST_Point($1, $2), 4326))
|
|
LIMIT 1
|
|
""", lon, lat)
|
|
|
|
if not row:
|
|
return AdminPoint(
|
|
province_code=None,
|
|
province_name=None,
|
|
municipality_code=None,
|
|
municipality_name=None
|
|
)
|
|
|
|
return AdminPoint(
|
|
province_code=row['province_code'],
|
|
province_name=row['province_name'],
|
|
municipality_code=row['municipality_code'],
|
|
municipality_name=row['municipality_name']
|
|
)
|
|
|
|
|
|
@app.get("/historical")
|
|
async def get_historical_boundaries(
|
|
year: int = Query(1500, description="Reference year"),
|
|
boundary_type: Optional[str] = Query(None, description="Boundary type filter"),
|
|
simplified: bool = Query(True, description="Return simplified geometries"),
|
|
limit: int = Query(1000, ge=1, le=10000, description="Maximum results")
|
|
):
|
|
"""Get historical boundaries as GeoJSON"""
|
|
pool = await get_pool()
|
|
|
|
tolerance = 0.001 if simplified else 0
|
|
|
|
conditions = ["reference_year = $1"]
|
|
params = [year]
|
|
param_count = 1
|
|
|
|
if boundary_type:
|
|
param_count += 1
|
|
conditions.append(f"boundary_type = ${param_count}")
|
|
params.append(boundary_type)
|
|
|
|
param_count += 1
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
query = f"""
|
|
SELECT
|
|
id, boundary_code, name, boundary_type, reference_year,
|
|
ST_AsGeoJSON(
|
|
{'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
|
|
)::json as geometry,
|
|
ST_X(centroid) as centroid_lon,
|
|
ST_Y(centroid) as centroid_lat,
|
|
area_km2
|
|
FROM historical_boundaries
|
|
WHERE {where_clause}
|
|
ORDER BY name
|
|
LIMIT ${param_count}
|
|
"""
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
features = []
|
|
for row in rows:
|
|
if row['geometry']:
|
|
features.append({
|
|
"type": "Feature",
|
|
"id": row['boundary_code'],
|
|
"geometry": row['geometry'],
|
|
"properties": {
|
|
"id": row['id'],
|
|
"code": row['boundary_code'],
|
|
"name": row['name'],
|
|
"type": row['boundary_type'],
|
|
"year": row['reference_year'],
|
|
"centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
|
|
"centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
|
|
"area_km2": float(row['area_km2']) if row['area_km2'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"year": year,
|
|
"boundary_type": boundary_type,
|
|
"count": len(features)
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/stats/by-type")
|
|
async def get_stats_by_type():
|
|
"""Get institution counts by type"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch("""
|
|
SELECT
|
|
institution_type as type,
|
|
type_name,
|
|
COUNT(*) as count,
|
|
ROUND(AVG(rating)::numeric, 2) as avg_rating
|
|
FROM institutions
|
|
WHERE geom IS NOT NULL
|
|
GROUP BY institution_type, type_name
|
|
ORDER BY count DESC
|
|
""")
|
|
|
|
return {
|
|
"stats": [
|
|
{
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"count": row['count'],
|
|
"avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
|
|
}
|
|
for row in rows
|
|
]
|
|
}
|
|
|
|
|
|
@app.get("/stats/by-province")
|
|
async def get_stats_by_province():
|
|
"""Get institution counts by province"""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch("""
|
|
SELECT
|
|
p.iso_code,
|
|
p.name as province_name,
|
|
COUNT(i.id) as count,
|
|
ROUND(AVG(i.rating)::numeric, 2) as avg_rating
|
|
FROM provinces p
|
|
LEFT JOIN institutions i ON i.province_id = p.id
|
|
GROUP BY p.id, p.iso_code, p.name
|
|
ORDER BY count DESC
|
|
""")
|
|
|
|
return {
|
|
"stats": [
|
|
{
|
|
"province_iso": row['iso_code'],
|
|
"province_name": row['province_name'],
|
|
"count": row['count'],
|
|
"avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
|
|
}
|
|
for row in rows
|
|
]
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Optimized Loading Endpoints (Pagination, Viewport, Lite)
|
|
# ============================================================================
|
|
|
|
@app.get("/institutions/lite")
|
|
async def get_institutions_lite(
|
|
bbox: Optional[str] = Query(None, description="Bounding box: minLon,minLat,maxLon,maxLat"),
|
|
country: Optional[str] = Query(None, description="Filter by country code (e.g., NL, DE, JP)"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type"),
|
|
limit: int = Query(100000, ge=1, le=200000, description="Maximum results")
|
|
):
|
|
"""
|
|
Get lightweight institution data for map markers.
|
|
Returns only essential fields (~5-10MB instead of ~126MB).
|
|
Use /institution/{ghcid} for full details on click.
|
|
"""
|
|
pool = await get_pool()
|
|
|
|
conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
|
|
params = []
|
|
param_count = 0
|
|
|
|
if bbox:
|
|
try:
|
|
min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
|
|
param_count += 4
|
|
conditions.append(f"""
|
|
lon >= ${param_count-3} AND lat >= ${param_count-2}
|
|
AND lon <= ${param_count-1} AND lat <= ${param_count}
|
|
""")
|
|
params.extend([min_lon, min_lat, max_lon, max_lat])
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail="Invalid bbox format")
|
|
|
|
if country:
|
|
param_count += 1
|
|
conditions.append(f"country_code = ${param_count}")
|
|
params.append(country.upper())
|
|
|
|
if type:
|
|
param_count += 1
|
|
conditions.append(f"type = ${param_count}")
|
|
params.append(type.upper())
|
|
|
|
param_count += 1
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
# Minimal fields for markers - dramatically reduces payload
|
|
query = f"""
|
|
SELECT
|
|
ghcid,
|
|
name,
|
|
type,
|
|
lon,
|
|
lat,
|
|
city,
|
|
country_code,
|
|
rating
|
|
FROM custodians
|
|
WHERE {where_clause}
|
|
ORDER BY name
|
|
LIMIT ${param_count}
|
|
"""
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
features = []
|
|
for row in rows:
|
|
features.append({
|
|
"type": "Feature",
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [float(row['lon']), float(row['lat'])]
|
|
},
|
|
"properties": {
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"type": row['type'],
|
|
"city": row['city'],
|
|
"country_code": row['country_code'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"count": len(features),
|
|
"mode": "lite",
|
|
"filters": {"bbox": bbox, "country": country, "type": type}
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/institutions/page")
|
|
async def get_institutions_paginated(
|
|
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
|
|
page_size: int = Query(1000, ge=100, le=5000, description="Items per page"),
|
|
country: Optional[str] = Query(None, description="Filter by country code"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type"),
|
|
sort_by: str = Query("name", description="Sort field: name, city, rating, type"),
|
|
sort_order: str = Query("asc", description="Sort order: asc, desc"),
|
|
):
|
|
"""
|
|
Get paginated institutions with full metadata.
|
|
~1MB per page instead of ~126MB full download.
|
|
Supports cursor-based iteration for large datasets.
|
|
"""
|
|
pool = await get_pool()
|
|
|
|
# Validate sort parameters
|
|
valid_sort_fields = {"name", "city", "rating", "type", "country_code"}
|
|
if sort_by not in valid_sort_fields:
|
|
sort_by = "name"
|
|
sort_direction = "DESC" if sort_order.lower() == "desc" else "ASC"
|
|
|
|
conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
|
|
params = []
|
|
param_count = 0
|
|
|
|
if country:
|
|
param_count += 1
|
|
conditions.append(f"country_code = ${param_count}")
|
|
params.append(country.upper())
|
|
|
|
if type:
|
|
param_count += 1
|
|
conditions.append(f"type = ${param_count}")
|
|
params.append(type.upper())
|
|
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
# Get total count for pagination metadata
|
|
count_query = f"SELECT COUNT(*) FROM custodians WHERE {where_clause}"
|
|
|
|
# Calculate offset
|
|
offset = (page - 1) * page_size
|
|
param_count += 1
|
|
limit_param = param_count
|
|
param_count += 1
|
|
offset_param = param_count
|
|
|
|
# Full metadata query with pagination
|
|
query = f"""
|
|
SELECT
|
|
ghcid, name, emic_name, type, type_name,
|
|
lon, lat, city, region as province, region_code as province_iso,
|
|
country_code, formatted_address, street_address, postal_code,
|
|
rating, total_ratings, wikidata_id, website, phone, email,
|
|
isil_code, google_place_id, description, opening_hours,
|
|
reviews, photos, photo_urls, business_status, street_view_url,
|
|
founding_year, dissolution_year, temporal_extent, museum_register,
|
|
youtube_channel_url, youtube_subscriber_count, youtube_video_count,
|
|
youtube_enrichment, social_facebook, social_twitter, social_instagram,
|
|
wikidata_label_en, wikidata_description_en
|
|
FROM custodians
|
|
WHERE {where_clause}
|
|
ORDER BY {sort_by} {sort_direction}
|
|
LIMIT ${limit_param} OFFSET ${offset_param}
|
|
"""
|
|
params.extend([page_size, offset])
|
|
|
|
async with pool.acquire() as conn:
|
|
total_count = await conn.fetchval(count_query, *params[:param_count-2] if params else [])
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
total_pages = (total_count + page_size - 1) // page_size
|
|
|
|
features = []
|
|
for row in rows:
|
|
props = {
|
|
"ghcid": row['ghcid'],
|
|
"name": row['name'],
|
|
"emic_name": row['emic_name'],
|
|
"type": row['type'],
|
|
"type_name": row['type_name'],
|
|
"city": row['city'],
|
|
"province": row['province'],
|
|
"province_iso": row['province_iso'],
|
|
"country_code": row['country_code'],
|
|
"formatted_address": row['formatted_address'],
|
|
"rating": float(row['rating']) if row['rating'] else None,
|
|
"total_ratings": row['total_ratings'],
|
|
"wikidata_id": row['wikidata_id'],
|
|
"website": row['website'],
|
|
"phone": row['phone'],
|
|
"email": row['email'],
|
|
"isil_code": row['isil_code'],
|
|
"google_place_id": row['google_place_id'],
|
|
"description": row['description'],
|
|
"business_status": row['business_status'],
|
|
"street_view_url": row['street_view_url'],
|
|
"founding_year": row['founding_year'],
|
|
"dissolution_year": row['dissolution_year'],
|
|
}
|
|
|
|
# Add JSONB fields if present
|
|
if row['opening_hours']:
|
|
props["opening_hours"] = row['opening_hours']
|
|
if row['reviews']:
|
|
props["reviews"] = row['reviews']
|
|
if row['photos']:
|
|
props["photos"] = row['photos']
|
|
if row['photo_urls']:
|
|
props["photo_urls"] = row['photo_urls']
|
|
if row['temporal_extent']:
|
|
props["temporal_extent"] = row['temporal_extent']
|
|
if row['museum_register']:
|
|
props["museum_register"] = row['museum_register']
|
|
if row['youtube_enrichment']:
|
|
props["youtube_enrichment"] = row['youtube_enrichment']
|
|
elif row['youtube_channel_url']:
|
|
props["youtube"] = {
|
|
"channel_url": row['youtube_channel_url'],
|
|
"subscriber_count": row['youtube_subscriber_count'],
|
|
"video_count": row['youtube_video_count'],
|
|
}
|
|
|
|
social = {}
|
|
if row['social_facebook']:
|
|
social['facebook'] = row['social_facebook']
|
|
if row['social_twitter']:
|
|
social['twitter'] = row['social_twitter']
|
|
if row['social_instagram']:
|
|
social['instagram'] = row['social_instagram']
|
|
if social:
|
|
props["social_media"] = social
|
|
|
|
if row['wikidata_label_en']:
|
|
props["wikidata_label"] = row['wikidata_label_en']
|
|
if row['wikidata_description_en']:
|
|
props["wikidata_description"] = row['wikidata_description_en']
|
|
|
|
features.append({
|
|
"type": "Feature",
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [float(row['lon']), float(row['lat'])]
|
|
},
|
|
"properties": props
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"pagination": {
|
|
"page": page,
|
|
"page_size": page_size,
|
|
"total_count": total_count,
|
|
"total_pages": total_pages,
|
|
"has_next": page < total_pages,
|
|
"has_prev": page > 1,
|
|
"next_page": page + 1 if page < total_pages else None,
|
|
"prev_page": page - 1 if page > 1 else None,
|
|
},
|
|
"metadata": {
|
|
"mode": "paginated",
|
|
"filters": {"country": country, "type": type},
|
|
"sort": {"field": sort_by, "order": sort_order}
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/institutions/viewport")
|
|
async def get_institutions_viewport(
|
|
bbox: str = Query(..., description="Bounding box: minLon,minLat,maxLon,maxLat (REQUIRED)"),
|
|
zoom: int = Query(10, ge=1, le=20, description="Map zoom level (affects detail)"),
|
|
country: Optional[str] = Query(None, description="Filter by country code"),
|
|
type: Optional[str] = Query(None, description="Filter by institution type"),
|
|
limit: int = Query(2000, ge=100, le=10000, description="Maximum results for viewport"),
|
|
):
|
|
"""
|
|
Get institutions visible in current map viewport.
|
|
Returns lite data at low zoom, full data at high zoom.
|
|
Optimized for map pan/zoom interactions.
|
|
"""
|
|
pool = await get_pool()
|
|
|
|
# Parse bbox
|
|
try:
|
|
min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail="Invalid bbox format. Use: minLon,minLat,maxLon,maxLat")
|
|
|
|
conditions = [
|
|
"lat IS NOT NULL AND lon IS NOT NULL",
|
|
f"lon >= $1 AND lat >= $2 AND lon <= $3 AND lat <= $4"
|
|
]
|
|
params = [min_lon, min_lat, max_lon, max_lat]
|
|
param_count = 4
|
|
|
|
if country:
|
|
param_count += 1
|
|
conditions.append(f"country_code = ${param_count}")
|
|
params.append(country.upper())
|
|
|
|
if type:
|
|
param_count += 1
|
|
conditions.append(f"type = ${param_count}")
|
|
params.append(type.upper())
|
|
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
# Adaptive detail based on zoom level
|
|
# Low zoom (world/continent view): minimal fields
|
|
# Medium zoom (country view): basic fields
|
|
# High zoom (city view): full fields
|
|
if zoom <= 6:
|
|
# World/continent view - just markers
|
|
select_fields = "ghcid, name, type, lon, lat, country_code"
|
|
detail_level = "minimal"
|
|
elif zoom <= 10:
|
|
# Country view - basic info
|
|
select_fields = "ghcid, name, type, type_name, lon, lat, city, country_code, rating"
|
|
detail_level = "basic"
|
|
elif zoom <= 14:
|
|
# Region view - moderate detail
|
|
select_fields = """
|
|
ghcid, name, emic_name, type, type_name, lon, lat, city,
|
|
region as province, country_code, rating, total_ratings,
|
|
website, wikidata_id, description
|
|
"""
|
|
detail_level = "moderate"
|
|
else:
|
|
# City/street view - full detail
|
|
select_fields = """
|
|
ghcid, name, emic_name, type, type_name, lon, lat, city,
|
|
region as province, region_code as province_iso, country_code,
|
|
formatted_address, street_address, postal_code, rating, total_ratings,
|
|
wikidata_id, website, phone, email, isil_code, google_place_id,
|
|
description, opening_hours, reviews, photos, photo_urls,
|
|
business_status, street_view_url, founding_year, dissolution_year,
|
|
social_facebook, social_twitter, social_instagram,
|
|
wikidata_label_en, wikidata_description_en
|
|
"""
|
|
detail_level = "full"
|
|
|
|
param_count += 1
|
|
query = f"""
|
|
SELECT {select_fields}
|
|
FROM custodians
|
|
WHERE {where_clause}
|
|
ORDER BY rating DESC NULLS LAST, name
|
|
LIMIT ${param_count}
|
|
"""
|
|
params.append(limit)
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
|
|
features = []
|
|
for row in rows:
|
|
row_dict = dict(row)
|
|
props = {"ghcid": row_dict['ghcid'], "name": row_dict['name'], "type": row_dict['type']}
|
|
|
|
# Add fields based on detail level
|
|
if 'type_name' in row_dict:
|
|
props['type_name'] = row_dict['type_name']
|
|
if 'city' in row_dict:
|
|
props['city'] = row_dict['city']
|
|
if 'country_code' in row_dict:
|
|
props['country_code'] = row_dict['country_code']
|
|
if 'rating' in row_dict and row_dict['rating']:
|
|
props['rating'] = float(row_dict['rating'])
|
|
if 'total_ratings' in row_dict:
|
|
props['total_ratings'] = row_dict['total_ratings']
|
|
if 'province' in row_dict:
|
|
props['province'] = row_dict['province']
|
|
if 'province_iso' in row_dict:
|
|
props['province_iso'] = row_dict['province_iso']
|
|
if 'emic_name' in row_dict:
|
|
props['emic_name'] = row_dict['emic_name']
|
|
if 'website' in row_dict:
|
|
props['website'] = row_dict['website']
|
|
if 'wikidata_id' in row_dict:
|
|
props['wikidata_id'] = row_dict['wikidata_id']
|
|
if 'description' in row_dict:
|
|
props['description'] = row_dict['description']
|
|
if 'formatted_address' in row_dict:
|
|
props['formatted_address'] = row_dict['formatted_address']
|
|
if 'street_address' in row_dict:
|
|
props['street_address'] = row_dict['street_address']
|
|
if 'postal_code' in row_dict:
|
|
props['postal_code'] = row_dict['postal_code']
|
|
if 'phone' in row_dict:
|
|
props['phone'] = row_dict['phone']
|
|
if 'email' in row_dict:
|
|
props['email'] = row_dict['email']
|
|
if 'isil_code' in row_dict:
|
|
props['isil_code'] = row_dict['isil_code']
|
|
if 'google_place_id' in row_dict:
|
|
props['google_place_id'] = row_dict['google_place_id']
|
|
if 'business_status' in row_dict:
|
|
props['business_status'] = row_dict['business_status']
|
|
if 'street_view_url' in row_dict:
|
|
props['street_view_url'] = row_dict['street_view_url']
|
|
if 'founding_year' in row_dict:
|
|
props['founding_year'] = row_dict['founding_year']
|
|
if 'dissolution_year' in row_dict:
|
|
props['dissolution_year'] = row_dict['dissolution_year']
|
|
|
|
# JSONB fields at full detail
|
|
if 'opening_hours' in row_dict and row_dict['opening_hours']:
|
|
props['opening_hours'] = row_dict['opening_hours']
|
|
if 'reviews' in row_dict and row_dict['reviews']:
|
|
props['reviews'] = row_dict['reviews']
|
|
if 'photos' in row_dict and row_dict['photos']:
|
|
props['photos'] = row_dict['photos']
|
|
if 'photo_urls' in row_dict and row_dict['photo_urls']:
|
|
props['photo_urls'] = row_dict['photo_urls']
|
|
|
|
# Social media at full detail
|
|
social = {}
|
|
if 'social_facebook' in row_dict and row_dict['social_facebook']:
|
|
social['facebook'] = row_dict['social_facebook']
|
|
if 'social_twitter' in row_dict and row_dict['social_twitter']:
|
|
social['twitter'] = row_dict['social_twitter']
|
|
if 'social_instagram' in row_dict and row_dict['social_instagram']:
|
|
social['instagram'] = row_dict['social_instagram']
|
|
if social:
|
|
props['social_media'] = social
|
|
|
|
if 'wikidata_label_en' in row_dict and row_dict['wikidata_label_en']:
|
|
props['wikidata_label'] = row_dict['wikidata_label_en']
|
|
if 'wikidata_description_en' in row_dict and row_dict['wikidata_description_en']:
|
|
props['wikidata_description'] = row_dict['wikidata_description_en']
|
|
|
|
features.append({
|
|
"type": "Feature",
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [float(row_dict['lon']), float(row_dict['lat'])]
|
|
},
|
|
"properties": props
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"count": len(features),
|
|
"mode": "viewport",
|
|
"detail_level": detail_level,
|
|
"zoom": zoom,
|
|
"bbox": {"min_lon": min_lon, "min_lat": min_lat, "max_lon": max_lon, "max_lat": max_lat},
|
|
"filters": {"country": country, "type": type},
|
|
"limit": limit,
|
|
"truncated": len(features) >= limit
|
|
}
|
|
}
|
|
|
|
|
|
@app.get("/institutions/cluster")
|
|
async def get_institutions_clustered(
|
|
bbox: str = Query(..., description="Bounding box: minLon,minLat,maxLon,maxLat"),
|
|
zoom: int = Query(5, ge=1, le=20, description="Map zoom level"),
|
|
grid_size: Optional[float] = Query(None, description="Grid cell size in degrees (auto if not set)"),
|
|
):
|
|
"""
|
|
Get clustered institution counts for overview maps.
|
|
Returns grid cells with counts instead of individual markers.
|
|
Ideal for world/country views with 10k+ institutions.
|
|
"""
|
|
pool = await get_pool()
|
|
|
|
try:
|
|
min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail="Invalid bbox format")
|
|
|
|
# Auto-calculate grid size based on zoom
|
|
# Higher zoom = smaller grid cells = more detail
|
|
if grid_size is None:
|
|
if zoom <= 3:
|
|
grid_size = 10.0 # ~1000km cells
|
|
elif zoom <= 5:
|
|
grid_size = 5.0 # ~500km cells
|
|
elif zoom <= 7:
|
|
grid_size = 2.0 # ~200km cells
|
|
elif zoom <= 9:
|
|
grid_size = 1.0 # ~100km cells
|
|
elif zoom <= 11:
|
|
grid_size = 0.5 # ~50km cells
|
|
elif zoom <= 13:
|
|
grid_size = 0.2 # ~20km cells
|
|
else:
|
|
grid_size = 0.1 # ~10km cells
|
|
|
|
# Use PostgreSQL to cluster into grid cells
|
|
query = """
|
|
SELECT
|
|
FLOOR(lon / $5) * $5 + $5/2 as cell_lon,
|
|
FLOOR(lat / $5) * $5 + $5/2 as cell_lat,
|
|
COUNT(*) as count,
|
|
array_agg(DISTINCT type) as types,
|
|
AVG(rating) as avg_rating
|
|
FROM custodians
|
|
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
|
AND lon >= $1 AND lat >= $2 AND lon <= $3 AND lat <= $4
|
|
GROUP BY FLOOR(lon / $5), FLOOR(lat / $5)
|
|
ORDER BY count DESC
|
|
"""
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, min_lon, min_lat, max_lon, max_lat, grid_size)
|
|
|
|
features = []
|
|
total_count = 0
|
|
for row in rows:
|
|
count = row['count']
|
|
total_count += count
|
|
|
|
features.append({
|
|
"type": "Feature",
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [float(row['cell_lon']), float(row['cell_lat'])]
|
|
},
|
|
"properties": {
|
|
"cluster": True,
|
|
"count": count,
|
|
"types": row['types'],
|
|
"avg_rating": round(float(row['avg_rating']), 2) if row['avg_rating'] else None,
|
|
}
|
|
})
|
|
|
|
return {
|
|
"type": "FeatureCollection",
|
|
"features": features,
|
|
"metadata": {
|
|
"mode": "clustered",
|
|
"cluster_count": len(features),
|
|
"total_institutions": total_count,
|
|
"grid_size": grid_size,
|
|
"zoom": zoom,
|
|
"bbox": {"min_lon": min_lon, "min_lat": min_lat, "max_lon": max_lon, "max_lat": max_lat}
|
|
}
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Person Endpoints (Beschermers)
|
|
# ============================================================================
|
|
|
|
@app.get("/persons", response_model=Dict[str, Any])
|
|
async def list_persons(
|
|
custodian_slug: Optional[str] = Query(None, description="Filter by custodian slug"),
|
|
heritage_type: Optional[str] = Query(None, description="Filter by heritage type (A, L, M, etc.)"),
|
|
country_code: Optional[str] = Query(None, description="Filter by country code"),
|
|
heritage_relevant: Optional[bool] = Query(None, description="Filter by heritage relevance (true/false)"),
|
|
limit: int = Query(50, ge=1, le=500, description="Max results to return"),
|
|
offset: int = Query(0, ge=0, description="Offset for pagination"),
|
|
):
|
|
"""
|
|
List persons with optional filters.
|
|
Returns paginated list of heritage professionals (beschermers).
|
|
"""
|
|
pool = await get_pool()
|
|
|
|
# Build query with optional filters
|
|
conditions = []
|
|
params = []
|
|
param_idx = 1
|
|
|
|
if custodian_slug:
|
|
conditions.append(f"custodian_slug = ${param_idx}")
|
|
params.append(custodian_slug)
|
|
param_idx += 1
|
|
|
|
if heritage_type:
|
|
conditions.append(f"${param_idx} = ANY(heritage_types)")
|
|
params.append(heritage_type)
|
|
param_idx += 1
|
|
|
|
if country_code:
|
|
conditions.append(f"country_code = ${param_idx}")
|
|
params.append(country_code)
|
|
param_idx += 1
|
|
|
|
if heritage_relevant is not None:
|
|
conditions.append(f"heritage_relevant = ${param_idx}")
|
|
params.append(heritage_relevant)
|
|
param_idx += 1
|
|
|
|
where_clause = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
|
|
|
# Add pagination params
|
|
params.extend([limit, offset])
|
|
|
|
query = f"""
|
|
SELECT staff_id, name, headline, location, country_code,
|
|
custodian_slug, custodian_name, linkedin_url, profile_image_url,
|
|
heritage_relevant, heritage_types
|
|
FROM persons
|
|
{where_clause}
|
|
ORDER BY name
|
|
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
|
"""
|
|
|
|
# Count query
|
|
count_query = f"SELECT COUNT(*) FROM persons {where_clause}"
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, *params)
|
|
count_params = params[:-2] if params else [] # Remove limit/offset for count
|
|
total = await conn.fetchval(count_query, *count_params) if count_params else await conn.fetchval(count_query)
|
|
|
|
persons = []
|
|
for row in rows:
|
|
persons.append(PersonSummary(
|
|
staff_id=row['staff_id'],
|
|
name=row['name'],
|
|
headline=row['headline'],
|
|
location=row['location'],
|
|
country_code=row['country_code'],
|
|
custodian_slug=row['custodian_slug'],
|
|
custodian_name=row['custodian_name'],
|
|
linkedin_url=row['linkedin_url'],
|
|
profile_image_url=row['profile_image_url'],
|
|
heritage_relevant=row['heritage_relevant'] if row['heritage_relevant'] is not None else True,
|
|
heritage_types=row['heritage_types'] if row['heritage_types'] else [],
|
|
))
|
|
|
|
return {
|
|
"persons": [p.model_dump() for p in persons],
|
|
"total": total,
|
|
"limit": limit,
|
|
"offset": offset,
|
|
"has_more": offset + len(persons) < total,
|
|
}
|
|
|
|
|
|
@app.get("/persons/count")
|
|
async def get_persons_count():
|
|
"""Get total person count for stats display."""
|
|
pool = await get_pool()
|
|
|
|
async with pool.acquire() as conn:
|
|
total = await conn.fetchval("SELECT COUNT(*) FROM persons")
|
|
heritage_relevant = await conn.fetchval("SELECT COUNT(*) FROM persons WHERE heritage_relevant = true")
|
|
|
|
return {
|
|
"total": total,
|
|
"heritage_relevant": heritage_relevant,
|
|
}
|
|
|
|
|
|
@app.get("/persons/search", response_model=Dict[str, Any])
|
|
async def search_persons(
|
|
q: str = Query(..., min_length=2, description="Search query"),
|
|
limit: int = Query(20, ge=1, le=100, description="Max results"),
|
|
):
|
|
"""
|
|
Search persons by name, headline, or custodian name.
|
|
Uses PostgreSQL full-text search.
|
|
"""
|
|
pool = await get_pool()
|
|
|
|
# Use ILIKE for simple search (full-text search can be added later if index exists)
|
|
search_pattern = f"%{q}%"
|
|
|
|
query = """
|
|
SELECT staff_id, name, headline, location, country_code,
|
|
custodian_slug, custodian_name, linkedin_url, profile_image_url,
|
|
heritage_relevant, heritage_types
|
|
FROM persons
|
|
WHERE name ILIKE $1
|
|
OR headline ILIKE $1
|
|
OR custodian_name ILIKE $1
|
|
ORDER BY
|
|
CASE WHEN name ILIKE $2 THEN 0 ELSE 1 END,
|
|
name
|
|
LIMIT $3
|
|
"""
|
|
|
|
async with pool.acquire() as conn:
|
|
rows = await conn.fetch(query, search_pattern, f"{q}%", limit)
|
|
|
|
persons = []
|
|
for row in rows:
|
|
persons.append(PersonSummary(
|
|
staff_id=row['staff_id'],
|
|
name=row['name'],
|
|
headline=row['headline'],
|
|
location=row['location'],
|
|
country_code=row['country_code'],
|
|
custodian_slug=row['custodian_slug'],
|
|
custodian_name=row['custodian_name'],
|
|
linkedin_url=row['linkedin_url'],
|
|
profile_image_url=row['profile_image_url'],
|
|
heritage_relevant=row['heritage_relevant'] if row['heritage_relevant'] is not None else True,
|
|
heritage_types=row['heritage_types'] if row['heritage_types'] else [],
|
|
))
|
|
|
|
return {
|
|
"persons": [p.model_dump() for p in persons],
|
|
"count": len(persons),
|
|
"query": q,
|
|
}
|
|
|
|
|
|
@app.get("/person/{staff_id}", response_model=PersonDetail)
|
|
async def get_person(staff_id: str):
|
|
"""Get detailed information for a single person."""
|
|
pool = await get_pool()
|
|
|
|
query = """
|
|
SELECT staff_id, name, headline, location, country_code,
|
|
custodian_slug, custodian_name, linkedin_url, profile_image_url,
|
|
heritage_relevant, heritage_types, experience, education,
|
|
skills, languages, about, connections,
|
|
extraction_date, extraction_method, source_file
|
|
FROM persons
|
|
WHERE staff_id = $1
|
|
"""
|
|
|
|
async with pool.acquire() as conn:
|
|
row = await conn.fetchrow(query, staff_id)
|
|
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail=f"Person not found: {staff_id}")
|
|
|
|
return PersonDetail(
|
|
staff_id=row['staff_id'],
|
|
name=row['name'],
|
|
headline=row['headline'],
|
|
location=row['location'],
|
|
country_code=row['country_code'],
|
|
custodian_slug=row['custodian_slug'],
|
|
custodian_name=row['custodian_name'],
|
|
linkedin_url=row['linkedin_url'],
|
|
profile_image_url=row['profile_image_url'],
|
|
heritage_relevant=row['heritage_relevant'] if row['heritage_relevant'] is not None else True,
|
|
heritage_types=row['heritage_types'] if row['heritage_types'] else [],
|
|
experience=row['experience'] if row['experience'] else [],
|
|
education=row['education'] if row['education'] else [],
|
|
skills=row['skills'] if row['skills'] else [],
|
|
languages=row['languages'] if row['languages'] else [],
|
|
about=row['about'],
|
|
connections=row['connections'],
|
|
extraction_date=row['extraction_date'].isoformat() if row['extraction_date'] else None,
|
|
extraction_method=row['extraction_method'],
|
|
source_file=row['source_file'],
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# Main
|
|
# ============================================================================
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
uvicorn.run(
|
|
"geo_api:app",
|
|
host=settings.api_host,
|
|
port=settings.api_port,
|
|
reload=True,
|
|
)
|