"""
PostGIS Geo API for Heritage Custodian Map
FastAPI backend providing spatial queries for bronhouder.nl map

Mounted at /api/geo/ via Caddy reverse proxy.

Endpoints:
- GET  /                  - Health check and geo statistics
- GET  /countries         - Get all countries as GeoJSON with institution counts
- GET  /provinces         - Get all provinces as GeoJSON
- GET  /municipalities    - Get municipalities (with filters)
- GET  /institutions      - Get institutions as GeoJSON (with bbox/type filters)
- GET  /institution/:ghcid - Get single institution details
- GET  /historical        - Get historical boundaries
- GET  /search            - Search institutions by name
- GET  /admin/point       - Find admin unit for a point
- GET  /nearby            - Find institutions near a point
- GET  /stats/by-type     - Institution counts by type
- GET  /stats/by-province - Institution counts by province

Person Endpoints (Beschermers):
- GET  /persons           - List persons with filters (custodian, type, country)
- GET  /persons/count     - Get total person count for stats
- GET  /persons/search    - Search persons by name/headline/custodian
- GET  /person/:staff_id  - Get single person details
"""

import os
import json
from datetime import datetime
from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager
from decimal import Decimal

from fastapi import FastAPI, HTTPException, Query, APIRouter
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
import asyncpg


# ============================================================================
# Configuration
# ============================================================================

class GeoSettings(BaseModel):
    """PostGIS geo database settings - connects to glam_geo with PostGIS boundaries"""
    host: str = os.getenv("GEO_POSTGRES_HOST", "localhost")
    port: int = int(os.getenv("GEO_POSTGRES_PORT", "5432"))
    database: str = os.getenv("GEO_POSTGRES_DB", "glam_geo")  # glam_geo has boundary data
    user: str = os.getenv("GEO_POSTGRES_USER", "glam_api")
    password: str = os.getenv("GEO_POSTGRES_PASSWORD", "")
    
    # Server settings
    api_host: str = os.getenv("GEO_API_HOST", "0.0.0.0")
    api_port: int = int(os.getenv("GEO_API_PORT", "8002"))


settings = GeoSettings()


# ============================================================================
# Pydantic Models
# ============================================================================

class GeoStatsResponse(BaseModel):
    """Geo database statistics"""
    status: str
    database: str
    provinces: int
    municipalities: int
    institutions: int
    historical_boundaries: int
    postgres_version: str


class InstitutionDetail(BaseModel):
    """Detailed institution information"""
    ghcid: str
    name: str
    type: str
    type_name: Optional[str]
    lat: Optional[float]
    lon: Optional[float]
    address: Optional[str]
    city: Optional[str]
    province: Optional[str]
    website: Optional[str]
    phone: Optional[str]
    wikidata_id: Optional[str]
    rating: Optional[float]
    total_ratings: Optional[int]
    description: Optional[str]
    reviews: Optional[List[Dict]]
    genealogiewerkbalk: Optional[Dict]


class AdminPoint(BaseModel):
    """Admin unit for a point"""
    province_code: Optional[str]
    province_name: Optional[str]
    municipality_code: Optional[str]
    municipality_name: Optional[str]


class NearbyInstitution(BaseModel):
    """Institution with distance"""
    ghcid: str
    name: str
    type: str
    type_name: Optional[str]
    distance_km: float
    city: Optional[str]
    province: Optional[str]
    rating: Optional[float]


class PersonSummary(BaseModel):
    """Summary person information for list views"""
    staff_id: str
    name: str
    headline: Optional[str]
    location: Optional[str]
    country_code: Optional[str]
    custodian_slug: Optional[str]
    custodian_name: Optional[str]
    linkedin_url: Optional[str]
    profile_image_url: Optional[str]
    heritage_relevant: bool = True
    heritage_types: List[str] = []


class PersonDetail(BaseModel):
    """Detailed person information"""
    staff_id: str
    name: str
    headline: Optional[str]
    location: Optional[str]
    country_code: Optional[str]
    custodian_slug: Optional[str]
    custodian_name: Optional[str]
    linkedin_url: Optional[str]
    profile_image_url: Optional[str]
    heritage_relevant: bool = True
    heritage_types: List[str] = []
    experience: List[Dict] = []
    education: List[Dict] = []
    skills: List[str] = []
    languages: List[Dict] = []
    about: Optional[str]
    connections: Optional[str]
    extraction_date: Optional[str]
    extraction_method: Optional[str]
    source_file: Optional[str]


# ============================================================================
# Global State
# ============================================================================

_pool: Optional[asyncpg.Pool] = None
_start_time: datetime = datetime.now()


async def get_pool() -> asyncpg.Pool:
    """Get or create connection pool"""
    global _pool
    
    if _pool is None:
        _pool = await asyncpg.create_pool(
            host=settings.host,
            port=settings.port,
            database=settings.database,
            user=settings.user,
            password=settings.password,
            min_size=2,
            max_size=10,
        )
    
    return _pool


# ============================================================================
# FastAPI App
# ============================================================================

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Application lifespan handler"""
    await get_pool()
    yield
    global _pool
    if _pool:
        await _pool.close()
        _pool = None


app = FastAPI(
    title="PostGIS Geo API",
    description="Spatial REST API for heritage institution map",
    version="1.0.0",
    lifespan=lifespan,
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# GZip compression middleware - compresses responses >1KB
# Reduces ~126MB JSON payload to ~20-30MB (70-80% reduction)
app.add_middleware(GZipMiddleware, minimum_size=1000)


# ============================================================================
# Helper Functions
# ============================================================================

def serialize_value(val: Any) -> Any:
    """Convert PostgreSQL values to JSON-serializable format"""
    if val is None:
        return None
    elif isinstance(val, datetime):
        return val.isoformat()
    elif isinstance(val, Decimal):
        return float(val)
    elif isinstance(val, (dict, list)):
        return val
    elif isinstance(val, bytes):
        return val.decode('utf-8', errors='replace')
    else:
        return val


def row_to_dict(row: asyncpg.Record) -> Dict[str, Any]:
    """Convert asyncpg row to dict with serialization"""
    return {key: serialize_value(row[key]) for key in row.keys()}


# ============================================================================
# API Endpoints
# ============================================================================

@app.get("/", response_model=GeoStatsResponse)
async def get_geo_status() -> GeoStatsResponse:
    """Get geo database status and statistics"""
    pool = await get_pool()
    
    async with pool.acquire() as conn:
        version = await conn.fetchval("SELECT version()")
        provinces = await conn.fetchval("SELECT COUNT(*) FROM provinces")
        municipalities = await conn.fetchval("SELECT COUNT(*) FROM municipalities")
        institutions = await conn.fetchval("SELECT COUNT(*) FROM institutions")
        historical = await conn.fetchval("SELECT COUNT(*) FROM historical_boundaries")
    
    return GeoStatsResponse(
        status="healthy",
        database=settings.database,
        provinces=provinces or 0,
        municipalities=municipalities or 0,
        institutions=institutions or 0,
        historical_boundaries=historical or 0,
        postgres_version=version.split(',')[0] if version else "unknown",
    )


@app.get("/provinces")
async def get_provinces(
    simplified: bool = Query(True, description="Return simplified geometries")
):
    """Get all provinces as GeoJSON FeatureCollection"""
    pool = await get_pool()
    
    tolerance = 0.001 if simplified else 0
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(f"""
            SELECT 
                id, province_code, iso_code, name,
                ST_AsGeoJSON(
                    {'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
                )::json as geometry,
                ST_X(centroid) as centroid_lon,
                ST_Y(centroid) as centroid_lat,
                area_km2
            FROM provinces
            ORDER BY name
        """)
    
    features = []
    for row in rows:
        features.append({
            "type": "Feature",
            "id": row['province_code'],
            "geometry": row['geometry'],
            "properties": {
                "id": row['id'],
                "province_code": row['province_code'],
                "iso_code": row['iso_code'],
                "name": row['name'],
                "centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
                "centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
                "area_km2": float(row['area_km2']) if row['area_km2'] else None,
            }
        })
    
    return {
        "type": "FeatureCollection",
        "features": features
    }


@app.get("/countries")
async def get_countries(
    simplified: bool = Query(True, description="Return simplified geometries"),
    with_counts: bool = Query(False, description="Include institution counts per country"),
):
    """Get all countries as GeoJSON FeatureCollection with optional institution counts"""
    pool = await get_pool()
    
    # Use more aggressive simplification for countries (world view)
    tolerance = 0.01 if simplified else 0
    
    async with pool.acquire() as conn:
        if with_counts:
            # Join with custodians to get counts per country
            rows = await conn.fetch(f"""
                SELECT 
                    bc.id, 
                    bc.iso_a2 as country_code, 
                    bc.iso_a3,
                    bc.country_name as name,
                    ST_AsGeoJSON(
                        {'ST_Simplify(bc.geom, ' + str(tolerance) + ')' if simplified else 'bc.geom'}
                    ) as geometry,
                    ST_X(bc.centroid) as centroid_lon,
                    ST_Y(bc.centroid) as centroid_lat,
                    bc.area_km2,
                    COALESCE(counts.institution_count, 0) as institution_count
                FROM boundary_countries bc
                LEFT JOIN (
                    SELECT country_code, COUNT(*) as institution_count
                    FROM custodians
                    WHERE country_code IS NOT NULL
                    GROUP BY country_code
                ) counts ON bc.iso_a2 = counts.country_code
                WHERE bc.geom IS NOT NULL
                ORDER BY bc.country_name
            """)
        else:
            rows = await conn.fetch(f"""
                SELECT 
                    id, 
                    iso_a2 as country_code, 
                    iso_a3,
                    country_name as name,
                    ST_AsGeoJSON(
                        {'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
                    ) as geometry,
                    ST_X(centroid) as centroid_lon,
                    ST_Y(centroid) as centroid_lat,
                    area_km2
                FROM boundary_countries
                WHERE geom IS NOT NULL
                ORDER BY country_name
            """)
    
    features = []
    total_institutions = 0
    countries_with_data = 0
    
    for row in rows:
        # Parse geometry from string to dict (ST_AsGeoJSON returns text)
        geometry = row['geometry']
        if geometry is None:
            # Skip countries with no geometry (e.g., Vatican City)
            continue
        if isinstance(geometry, str):
            geometry = json.loads(geometry)
        
        # Ensure geometry has required structure
        if not isinstance(geometry, dict) or 'type' not in geometry or 'coordinates' not in geometry:
            continue
        
        iso_a2 = row['country_code'].strip() if row['country_code'] else None
        iso_a3 = row['iso_a3'].strip() if row['iso_a3'] else None
        institution_count = row['institution_count'] if with_counts else 0
        
        # Track totals
        if with_counts:
            total_institutions += institution_count
            if institution_count > 0:
                countries_with_data += 1
        
        # Build properties with frontend-expected field names
        properties = {
            "id": row['id'],
            "iso_a2": iso_a2,  # Frontend expects iso_a2
            "iso_a3": iso_a3,
            "name": row['name'],
            "institution_count": institution_count,
            "centroid": [
                float(row['centroid_lon']) if row['centroid_lon'] else None,
                float(row['centroid_lat']) if row['centroid_lat'] else None,
            ],
            "area_km2": float(row['area_km2']) if row['area_km2'] else None,
        }
        
        features.append({
            "type": "Feature",
            "id": iso_a2,
            "geometry": geometry,
            "properties": properties
        })
    
    return {
        "type": "FeatureCollection",
        "features": features,
        "metadata": {
            "count": len(features),
            "total_institutions": total_institutions,
            "countries_with_data": countries_with_data,
            "type_filter": None,
            "simplified": simplified,
        }
    }


@app.get("/municipalities")
async def get_municipalities(
    province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH)"),
    simplified: bool = Query(True, description="Return simplified geometries"),
    limit: int = Query(500, ge=1, le=1000, description="Maximum results")
):
    """Get municipalities as GeoJSON FeatureCollection"""
    pool = await get_pool()
    
    tolerance = 0.0005 if simplified else 0
    
    query = f"""
        SELECT 
            m.id, m.municipality_code, m.name,
            p.iso_code as province_iso, p.name as province_name,
            ST_AsGeoJSON(
                {'ST_Simplify(m.geom, ' + str(tolerance) + ')' if simplified else 'm.geom'}
            )::json as geometry,
            ST_X(m.centroid) as centroid_lon,
            ST_Y(m.centroid) as centroid_lat,
            m.area_km2
        FROM municipalities m
        LEFT JOIN provinces p ON m.province_id = p.id
        {'WHERE p.iso_code = $1' if province else ''}
        ORDER BY m.name
        LIMIT {'$2' if province else '$1'}
    """
    
    async with pool.acquire() as conn:
        if province:
            rows = await conn.fetch(query, province.upper(), limit)
        else:
            rows = await conn.fetch(query, limit)
    
    features = []
    for row in rows:
        features.append({
            "type": "Feature",
            "id": row['municipality_code'],
            "geometry": row['geometry'],
            "properties": {
                "id": row['id'],
                "code": row['municipality_code'],
                "name": row['name'],
                "province_iso": row['province_iso'],
                "province_name": row['province_name'],
                "centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
                "centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
                "area_km2": float(row['area_km2']) if row['area_km2'] else None,
            }
        })
    
    return {
        "type": "FeatureCollection",
        "features": features
    }


@app.get("/institutions")
async def get_institutions(
    bbox: Optional[str] = Query(None, description="Bounding box: minLon,minLat,maxLon,maxLat"),
    province: Optional[str] = Query(None, description="Filter by province ISO code (e.g., NH, ZH)"),
    country: Optional[str] = Query(None, description="Filter by country code (e.g., NL, DE, JP)"),
    type: Optional[str] = Query(None, description="Filter by institution type (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T)"),
    limit: int = Query(50000, ge=1, le=100000, description="Maximum results")
):
    """Get institutions as GeoJSON FeatureCollection with full metadata from custodians table"""
    pool = await get_pool()
    
    # Build WHERE clauses - query custodians table directly
    conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
    params = []
    param_count = 0
    
    if bbox:
        try:
            min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
            param_count += 4
            conditions.append(f"""
                lon >= ${param_count-3} AND lat >= ${param_count-2} 
                AND lon <= ${param_count-1} AND lat <= ${param_count}
            """)
            params.extend([min_lon, min_lat, max_lon, max_lat])
        except ValueError:
            raise HTTPException(status_code=400, detail="Invalid bbox format. Use: minLon,minLat,maxLon,maxLat")
    
    if province:
        param_count += 1
        conditions.append(f"region_code = ${param_count}")
        params.append(province.upper())
    
    if type:
        param_count += 1
        conditions.append(f"type = ${param_count}")
        params.append(type.upper())
    
    if country:
        param_count += 1
        conditions.append(f"country_code = ${param_count}")
        params.append(country.upper())
    
    param_count += 1
    where_clause = " AND ".join(conditions)
    
    # Query custodians table with all rich metadata fields
    query = f"""
        SELECT 
            ghcid,
            name,
            emic_name,
            type,
            type_name,
            lon,
            lat,
            city,
            region as province,
            region_code as province_iso,
            country_code,
            formatted_address,
            street_address,
            postal_code,
            rating,
            total_ratings,
            wikidata_id,
            website,
            phone,
            email,
            isil_code,
            google_place_id,
            description,
            opening_hours,
            reviews,
            photos,
            photo_urls,
            business_status,
            street_view_url,
            founding_year,
            dissolution_year,
            temporal_extent,
            museum_register,
            youtube_channel_url,
            youtube_subscriber_count,
            youtube_video_count,
            youtube_enrichment,
            social_facebook,
            social_twitter,
            social_instagram,
            wikidata_label_en,
            wikidata_description_en,
            logo_url,
            web_claims
        FROM custodians
        WHERE {where_clause}
        ORDER BY name
        LIMIT ${param_count}
    """
    
    params.append(limit)
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, *params)
    
    features = []
    for row in rows:
        # Build properties with all available metadata
        props = {
            "ghcid": row['ghcid'],
            "name": row['name'],
            "emic_name": row['emic_name'],
            "type": row['type'],
            "type_name": row['type_name'],
            "city": row['city'],
            "province": row['province'],
            "province_iso": row['province_iso'],
            "country_code": row['country_code'],
            "formatted_address": row['formatted_address'],
            "rating": float(row['rating']) if row['rating'] else None,
            "total_ratings": row['total_ratings'],
            "wikidata_id": row['wikidata_id'],
            "website": row['website'],
            "phone": row['phone'],
            "email": row['email'],
            "isil_code": row['isil_code'],
            "google_place_id": row['google_place_id'],
            "description": row['description'],
            "business_status": row['business_status'],
            "street_view_url": row['street_view_url'],
            "founding_year": row['founding_year'],
            "dissolution_year": row['dissolution_year'],
        }
        
        # Add JSONB fields (handle potential None values)
        if row['opening_hours']:
            props["opening_hours"] = row['opening_hours']
        if row['reviews']:
            props["reviews"] = row['reviews']
        if row['photos']:
            props["photos"] = row['photos']
        if row['photo_urls']:
            props["photo_urls"] = row['photo_urls']
        if row['temporal_extent']:
            props["temporal_extent"] = row['temporal_extent']
        if row['museum_register']:
            props["museum_register"] = row['museum_register']
        if row['youtube_enrichment']:
            props["youtube_enrichment"] = row['youtube_enrichment']
        elif row['youtube_channel_url']:
            # Build minimal YouTube data if enrichment not present
            props["youtube"] = {
                "channel_url": row['youtube_channel_url'],
                "subscriber_count": row['youtube_subscriber_count'],
                "video_count": row['youtube_video_count'],
            }
        
        # Social media
        social = {}
        if row['social_facebook']:
            social['facebook'] = row['social_facebook']
        if row['social_twitter']:
            social['twitter'] = row['social_twitter']
        if row['social_instagram']:
            social['instagram'] = row['social_instagram']
        if social:
            props["social_media"] = social
        
        # Wikidata labels
        if row['wikidata_label_en']:
            props["wikidata_label"] = row['wikidata_label_en']
        if row['wikidata_description_en']:
            props["wikidata_description"] = row['wikidata_description_en']
        
        # Logo URL from web claims extraction
        if row['logo_url']:
            props["logo_url"] = row['logo_url']
        
        # Web claims (financial documents, etc.)
        if row['web_claims']:
            props["web_claims"] = row['web_claims']
        
        features.append({
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [float(row['lon']), float(row['lat'])]
            },
            "properties": props
        })
    
    return {
        "type": "FeatureCollection",
        "features": features,
        "metadata": {
            "count": len(features),
            "limit": limit,
            "filters": {
                "bbox": bbox,
                "province": province,
                "type": type
            }
        }
    }


@app.get("/institution/{ghcid}")
async def get_institution(ghcid: str):
    """Get detailed information for a single institution with full metadata"""
    pool = await get_pool()
    
    async with pool.acquire() as conn:
        row = await conn.fetchrow("""
            SELECT 
                ghcid,
                name,
                emic_name,
                verified_name,
                type,
                type_name,
                lon,
                lat,
                city,
                region as province,
                region_code as province_iso,
                country_code,
                formatted_address,
                street_address,
                postal_code,
                website,
                phone,
                email,
                wikidata_id,
                isil_code,
                google_place_id,
                rating,
                total_ratings,
                description,
                business_status,
                street_view_url,
                google_maps_url,
                opening_hours,
                reviews,
                photos,
                photo_urls,
                founding_year,
                founding_date,
                dissolution_year,
                dissolution_date,
                temporal_extent,
                museum_register,
                youtube_channel_id,
                youtube_channel_url,
                youtube_subscriber_count,
                youtube_video_count,
                youtube_view_count,
                youtube_enrichment,
                social_facebook,
                social_twitter,
                social_instagram,
                social_linkedin,
                social_youtube,
                logo_url,
                wikidata_label_nl,
                wikidata_label_en,
                wikidata_description_nl,
                wikidata_description_en,
                wikidata_types,
                wikidata_inception,
                wikidata_enrichment,
                genealogiewerkbalk,
                nan_isil_enrichment,
                kb_enrichment,
                zcbs_enrichment,
                web_claims,
                ghcid_uuid,
                ghcid_numeric,
                identifiers,
                data_source,
                data_tier,
                provenance
            FROM custodians
            WHERE ghcid = $1
        """, ghcid)
    
    if not row:
        raise HTTPException(status_code=404, detail=f"Institution '{ghcid}' not found")
    
    # Build comprehensive response with all metadata
    result = {
        "ghcid": row['ghcid'],
        "name": row['name'],
        "emic_name": row['emic_name'],
        "verified_name": row['verified_name'],
        "type": row['type'],
        "type_name": row['type_name'],
        "lat": float(row['lat']) if row['lat'] else None,
        "lon": float(row['lon']) if row['lon'] else None,
        "city": row['city'],
        "province": row['province'],
        "province_iso": row['province_iso'],
        "country_code": row['country_code'],
        "formatted_address": row['formatted_address'],
        "street_address": row['street_address'],
        "postal_code": row['postal_code'],
        "website": row['website'],
        "phone": row['phone'],
        "email": row['email'],
        "wikidata_id": row['wikidata_id'],
        "isil_code": row['isil_code'],
        "google_place_id": row['google_place_id'],
        "rating": float(row['rating']) if row['rating'] else None,
        "total_ratings": row['total_ratings'],
        "description": row['description'],
        "business_status": row['business_status'],
        "street_view_url": row['street_view_url'],
        "google_maps_url": row['google_maps_url'],
    }
    
    # JSONB fields - only include if present
    if row['opening_hours']:
        result["opening_hours"] = row['opening_hours']
    if row['reviews']:
        result["reviews"] = row['reviews']
    if row['photos']:
        result["photos"] = row['photos']
    if row['photo_urls']:
        result["photo_urls"] = row['photo_urls']
    if row['identifiers']:
        result["identifiers"] = row['identifiers']
    
    # Temporal data
    temporal = {}
    if row['founding_year']:
        temporal["founding_year"] = row['founding_year']
    if row['founding_date']:
        temporal["founding_date"] = row['founding_date'].isoformat() if row['founding_date'] else None
    if row['dissolution_year']:
        temporal["dissolution_year"] = row['dissolution_year']
    if row['dissolution_date']:
        temporal["dissolution_date"] = row['dissolution_date'].isoformat() if row['dissolution_date'] else None
    if row['temporal_extent']:
        temporal["extent"] = row['temporal_extent']
    if temporal:
        result["temporal"] = temporal
    
    # Museum register
    if row['museum_register']:
        result["museum_register"] = row['museum_register']
    
    # YouTube enrichment
    youtube = {}
    if row['youtube_channel_id']:
        youtube["channel_id"] = row['youtube_channel_id']
    if row['youtube_channel_url']:
        youtube["channel_url"] = row['youtube_channel_url']
    if row['youtube_subscriber_count']:
        youtube["subscriber_count"] = row['youtube_subscriber_count']
    if row['youtube_video_count']:
        youtube["video_count"] = row['youtube_video_count']
    if row['youtube_view_count']:
        youtube["view_count"] = row['youtube_view_count']
    if row['youtube_enrichment']:
        youtube["enrichment"] = row['youtube_enrichment']
    if youtube:
        result["youtube"] = youtube
    
    # Social media
    social = {}
    if row['social_facebook']:
        social["facebook"] = row['social_facebook']
    if row['social_twitter']:
        social["twitter"] = row['social_twitter']
    if row['social_instagram']:
        social["instagram"] = row['social_instagram']
    if row['social_linkedin']:
        social["linkedin"] = row['social_linkedin']
    if row['social_youtube']:
        social["youtube"] = row['social_youtube']
    if social:
        result["social_media"] = social
    
    # Wikidata
    wikidata = {}
    if row['wikidata_label_nl']:
        wikidata["label_nl"] = row['wikidata_label_nl']
    if row['wikidata_label_en']:
        wikidata["label_en"] = row['wikidata_label_en']
    if row['wikidata_description_nl']:
        wikidata["description_nl"] = row['wikidata_description_nl']
    if row['wikidata_description_en']:
        wikidata["description_en"] = row['wikidata_description_en']
    if row['wikidata_types']:
        wikidata["types"] = row['wikidata_types']
    if row['wikidata_inception']:
        wikidata["inception"] = row['wikidata_inception']
    if row['wikidata_enrichment']:
        wikidata["enrichment"] = row['wikidata_enrichment']
    if wikidata:
        result["wikidata"] = wikidata
    
    # Logo
    if row['logo_url']:
        result["logo_url"] = row['logo_url']
    
    # Other enrichment data
    if row['genealogiewerkbalk']:
        result["genealogiewerkbalk"] = row['genealogiewerkbalk']
    if row['nan_isil_enrichment']:
        result["nan_isil_enrichment"] = row['nan_isil_enrichment']
    if row['kb_enrichment']:
        result["kb_enrichment"] = row['kb_enrichment']
    if row['zcbs_enrichment']:
        result["zcbs_enrichment"] = row['zcbs_enrichment']
    if row['web_claims']:
        result["web_claims"] = row['web_claims']
    
    # GHCID details
    ghcid_data = {"current": row['ghcid']}
    if row['ghcid_uuid']:
        ghcid_data["uuid"] = str(row['ghcid_uuid'])
    if row['ghcid_numeric']:
        ghcid_data["numeric"] = int(row['ghcid_numeric'])
    result["ghcid_details"] = ghcid_data
    
    # Provenance
    if row['data_source'] or row['data_tier'] or row['provenance']:
        result["provenance"] = {
            "data_source": row['data_source'],
            "data_tier": row['data_tier'],
            "details": row['provenance'],
        }
    
    return result


@app.get("/search")
async def search_institutions(
    q: str = Query(..., min_length=2, description="Search query"),
    type: Optional[str] = Query(None, description="Filter by institution type"),
    limit: int = Query(50, ge=1, le=200, description="Maximum results")
):
    """Search institutions by name"""
    pool = await get_pool()
    
    # Use PostgreSQL full-text search with trigram similarity
    query = """
        SELECT 
            i.ghcid_current as ghcid,
            i.name,
            i.institution_type as type,
            i.type_name,
            ST_X(i.geom) as lon,
            ST_Y(i.geom) as lat,
            i.city,
            p.iso_code as province_iso,
            i.rating,
            ts_rank_cd(
                to_tsvector('simple', i.name || ' ' || COALESCE(i.description, '')),
                plainto_tsquery('simple', $1)
            ) as rank
        FROM institutions i
        LEFT JOIN provinces p ON i.province_id = p.id
        WHERE to_tsvector('simple', i.name || ' ' || COALESCE(i.description, '')) 
              @@ plainto_tsquery('simple', $1)
    """
    
    params = [q]
    param_count = 1
    
    if type:
        param_count += 1
        query += f" AND i.institution_type = ${param_count}"
        params.append(type.upper())
    
    param_count += 1
    query += f" ORDER BY rank DESC, i.name LIMIT ${param_count}"
    params.append(limit)
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, *params)
    
    # If no results from FTS, try ILIKE fallback
    if not rows:
        fallback_query = f"""
            SELECT 
                i.ghcid_current as ghcid,
                i.name,
                i.institution_type as type,
                i.type_name,
                ST_X(i.geom) as lon,
                ST_Y(i.geom) as lat,
                i.city,
                p.iso_code as province_iso,
                i.rating,
                0 as rank
            FROM institutions i
            LEFT JOIN provinces p ON i.province_id = p.id
            WHERE i.name ILIKE $1
            {'AND i.institution_type = $2' if type else ''}
            ORDER BY i.name
            LIMIT {'$3' if type else '$2'}
        """
        
        async with pool.acquire() as conn:
            if type:
                rows = await conn.fetch(fallback_query, f"%{q}%", type.upper(), limit)
            else:
                rows = await conn.fetch(fallback_query, f"%{q}%", limit)
    
    return {
        "query": q,
        "count": len(rows),
        "results": [
            {
                "ghcid": row['ghcid'],
                "name": row['name'],
                "type": row['type'],
                "type_name": row['type_name'],
                "lon": float(row['lon']) if row['lon'] else None,
                "lat": float(row['lat']) if row['lat'] else None,
                "city": row['city'],
                "province_iso": row['province_iso'],
                "rating": float(row['rating']) if row['rating'] else None,
            }
            for row in rows
        ]
    }


@app.get("/nearby", response_model=List[NearbyInstitution])
async def find_nearby(
    lon: float = Query(..., description="Longitude"),
    lat: float = Query(..., description="Latitude"),
    radius_km: float = Query(10, ge=0.1, le=100, description="Search radius in km"),
    type: Optional[str] = Query(None, description="Filter by institution type"),
    limit: int = Query(50, ge=1, le=200, description="Maximum results")
):
    """Find institutions near a point"""
    pool = await get_pool()
    
    query = """
        SELECT 
            i.ghcid_current as ghcid,
            i.name,
            i.institution_type as type,
            i.type_name,
            (ST_Distance(i.geom::geography, ST_SetSRID(ST_Point($1, $2), 4326)::geography) / 1000) as distance_km,
            i.city,
            p.name as province,
            i.rating
        FROM institutions i
        LEFT JOIN provinces p ON i.province_id = p.id
        WHERE ST_DWithin(
            i.geom::geography,
            ST_SetSRID(ST_Point($1, $2), 4326)::geography,
            $3 * 1000
        )
    """
    
    params = [lon, lat, radius_km]
    param_count = 3
    
    if type:
        param_count += 1
        query += f" AND i.institution_type = ${param_count}"
        params.append(type.upper())
    
    param_count += 1
    query += f" ORDER BY distance_km LIMIT ${param_count}"
    params.append(limit)
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, *params)
    
    return [
        NearbyInstitution(
            ghcid=row['ghcid'],
            name=row['name'],
            type=row['type'],
            type_name=row['type_name'],
            distance_km=round(float(row['distance_km']), 2),
            city=row['city'],
            province=row['province'],
            rating=float(row['rating']) if row['rating'] else None,
        )
        for row in rows
    ]


@app.get("/admin/point", response_model=AdminPoint)
async def get_admin_for_point(
    lon: float = Query(..., description="Longitude"),
    lat: float = Query(..., description="Latitude")
):
    """Find which municipality/province contains a point"""
    pool = await get_pool()
    
    async with pool.acquire() as conn:
        row = await conn.fetchrow("""
            SELECT 
                p.province_code,
                p.name as province_name,
                m.municipality_code,
                m.name as municipality_name
            FROM municipalities m
            JOIN provinces p ON m.province_id = p.id
            WHERE ST_Contains(m.geom, ST_SetSRID(ST_Point($1, $2), 4326))
            LIMIT 1
        """, lon, lat)
    
    if not row:
        # Try province only
        async with pool.acquire() as conn:
            row = await conn.fetchrow("""
                SELECT 
                    province_code,
                    name as province_name,
                    NULL as municipality_code,
                    NULL as municipality_name
                FROM provinces
                WHERE ST_Contains(geom, ST_SetSRID(ST_Point($1, $2), 4326))
                LIMIT 1
            """, lon, lat)
    
    if not row:
        return AdminPoint(
            province_code=None,
            province_name=None,
            municipality_code=None,
            municipality_name=None
        )
    
    return AdminPoint(
        province_code=row['province_code'],
        province_name=row['province_name'],
        municipality_code=row['municipality_code'],
        municipality_name=row['municipality_name']
    )


@app.get("/historical")
async def get_historical_boundaries(
    year: int = Query(1500, description="Reference year"),
    boundary_type: Optional[str] = Query(None, description="Boundary type filter"),
    simplified: bool = Query(True, description="Return simplified geometries"),
    limit: int = Query(1000, ge=1, le=10000, description="Maximum results")
):
    """Get historical boundaries as GeoJSON"""
    pool = await get_pool()
    
    tolerance = 0.001 if simplified else 0
    
    conditions = ["reference_year = $1"]
    params = [year]
    param_count = 1
    
    if boundary_type:
        param_count += 1
        conditions.append(f"boundary_type = ${param_count}")
        params.append(boundary_type)
    
    param_count += 1
    where_clause = " AND ".join(conditions)
    
    query = f"""
        SELECT 
            id, boundary_code, name, boundary_type, reference_year,
            ST_AsGeoJSON(
                {'ST_Simplify(geom, ' + str(tolerance) + ')' if simplified else 'geom'}
            )::json as geometry,
            ST_X(centroid) as centroid_lon,
            ST_Y(centroid) as centroid_lat,
            area_km2
        FROM historical_boundaries
        WHERE {where_clause}
        ORDER BY name
        LIMIT ${param_count}
    """
    params.append(limit)
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, *params)
    
    features = []
    for row in rows:
        if row['geometry']:
            features.append({
                "type": "Feature",
                "id": row['boundary_code'],
                "geometry": row['geometry'],
                "properties": {
                    "id": row['id'],
                    "code": row['boundary_code'],
                    "name": row['name'],
                    "type": row['boundary_type'],
                    "year": row['reference_year'],
                    "centroid_lon": float(row['centroid_lon']) if row['centroid_lon'] else None,
                    "centroid_lat": float(row['centroid_lat']) if row['centroid_lat'] else None,
                    "area_km2": float(row['area_km2']) if row['area_km2'] else None,
                }
            })
    
    return {
        "type": "FeatureCollection",
        "features": features,
        "metadata": {
            "year": year,
            "boundary_type": boundary_type,
            "count": len(features)
        }
    }


@app.get("/stats/by-type")
async def get_stats_by_type():
    """Get institution counts by type"""
    pool = await get_pool()
    
    async with pool.acquire() as conn:
        rows = await conn.fetch("""
            SELECT 
                institution_type as type,
                type_name,
                COUNT(*) as count,
                ROUND(AVG(rating)::numeric, 2) as avg_rating
            FROM institutions
            WHERE geom IS NOT NULL
            GROUP BY institution_type, type_name
            ORDER BY count DESC
        """)
    
    return {
        "stats": [
            {
                "type": row['type'],
                "type_name": row['type_name'],
                "count": row['count'],
                "avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
            }
            for row in rows
        ]
    }


@app.get("/stats/by-province")
async def get_stats_by_province():
    """Get institution counts by province"""
    pool = await get_pool()
    
    async with pool.acquire() as conn:
        rows = await conn.fetch("""
            SELECT 
                p.iso_code,
                p.name as province_name,
                COUNT(i.id) as count,
                ROUND(AVG(i.rating)::numeric, 2) as avg_rating
            FROM provinces p
            LEFT JOIN institutions i ON i.province_id = p.id
            GROUP BY p.id, p.iso_code, p.name
            ORDER BY count DESC
        """)
    
    return {
        "stats": [
            {
                "province_iso": row['iso_code'],
                "province_name": row['province_name'],
                "count": row['count'],
                "avg_rating": float(row['avg_rating']) if row['avg_rating'] else None
            }
            for row in rows
        ]
    }


# ============================================================================
# Optimized Loading Endpoints (Pagination, Viewport, Lite)
# ============================================================================

@app.get("/institutions/lite")
async def get_institutions_lite(
    bbox: Optional[str] = Query(None, description="Bounding box: minLon,minLat,maxLon,maxLat"),
    country: Optional[str] = Query(None, description="Filter by country code (e.g., NL, DE, JP)"),
    type: Optional[str] = Query(None, description="Filter by institution type"),
    limit: int = Query(100000, ge=1, le=200000, description="Maximum results")
):
    """
    Get lightweight institution data for map markers.
    Returns only essential fields (~5-10MB instead of ~126MB).
    Use /institution/{ghcid} for full details on click.
    """
    pool = await get_pool()
    
    conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
    params = []
    param_count = 0
    
    if bbox:
        try:
            min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
            param_count += 4
            conditions.append(f"""
                lon >= ${param_count-3} AND lat >= ${param_count-2} 
                AND lon <= ${param_count-1} AND lat <= ${param_count}
            """)
            params.extend([min_lon, min_lat, max_lon, max_lat])
        except ValueError:
            raise HTTPException(status_code=400, detail="Invalid bbox format")
    
    if country:
        param_count += 1
        conditions.append(f"country_code = ${param_count}")
        params.append(country.upper())
    
    if type:
        param_count += 1
        conditions.append(f"type = ${param_count}")
        params.append(type.upper())
    
    param_count += 1
    where_clause = " AND ".join(conditions)
    
    # Minimal fields for markers - dramatically reduces payload
    query = f"""
        SELECT 
            ghcid,
            name,
            type,
            lon,
            lat,
            city,
            country_code,
            rating
        FROM custodians
        WHERE {where_clause}
        ORDER BY name
        LIMIT ${param_count}
    """
    params.append(limit)
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, *params)
    
    features = []
    for row in rows:
        features.append({
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [float(row['lon']), float(row['lat'])]
            },
            "properties": {
                "ghcid": row['ghcid'],
                "name": row['name'],
                "type": row['type'],
                "city": row['city'],
                "country_code": row['country_code'],
                "rating": float(row['rating']) if row['rating'] else None,
            }
        })
    
    return {
        "type": "FeatureCollection",
        "features": features,
        "metadata": {
            "count": len(features),
            "mode": "lite",
            "filters": {"bbox": bbox, "country": country, "type": type}
        }
    }


@app.get("/institutions/page")
async def get_institutions_paginated(
    page: int = Query(1, ge=1, description="Page number (1-indexed)"),
    page_size: int = Query(1000, ge=100, le=5000, description="Items per page"),
    country: Optional[str] = Query(None, description="Filter by country code"),
    type: Optional[str] = Query(None, description="Filter by institution type"),
    sort_by: str = Query("name", description="Sort field: name, city, rating, type"),
    sort_order: str = Query("asc", description="Sort order: asc, desc"),
):
    """
    Get paginated institutions with full metadata.
    ~1MB per page instead of ~126MB full download.
    Supports cursor-based iteration for large datasets.
    """
    pool = await get_pool()
    
    # Validate sort parameters
    valid_sort_fields = {"name", "city", "rating", "type", "country_code"}
    if sort_by not in valid_sort_fields:
        sort_by = "name"
    sort_direction = "DESC" if sort_order.lower() == "desc" else "ASC"
    
    conditions = ["lat IS NOT NULL AND lon IS NOT NULL"]
    params = []
    param_count = 0
    
    if country:
        param_count += 1
        conditions.append(f"country_code = ${param_count}")
        params.append(country.upper())
    
    if type:
        param_count += 1
        conditions.append(f"type = ${param_count}")
        params.append(type.upper())
    
    where_clause = " AND ".join(conditions)
    
    # Get total count for pagination metadata
    count_query = f"SELECT COUNT(*) FROM custodians WHERE {where_clause}"
    
    # Calculate offset
    offset = (page - 1) * page_size
    param_count += 1
    limit_param = param_count
    param_count += 1
    offset_param = param_count
    
    # Full metadata query with pagination
    query = f"""
        SELECT 
            ghcid, name, emic_name, type, type_name,
            lon, lat, city, region as province, region_code as province_iso,
            country_code, formatted_address, street_address, postal_code,
            rating, total_ratings, wikidata_id, website, phone, email,
            isil_code, google_place_id, description, opening_hours,
            reviews, photos, photo_urls, business_status, street_view_url,
            founding_year, dissolution_year, temporal_extent, museum_register,
            youtube_channel_url, youtube_subscriber_count, youtube_video_count,
            youtube_enrichment, social_facebook, social_twitter, social_instagram,
            wikidata_label_en, wikidata_description_en
        FROM custodians
        WHERE {where_clause}
        ORDER BY {sort_by} {sort_direction}
        LIMIT ${limit_param} OFFSET ${offset_param}
    """
    params.extend([page_size, offset])
    
    async with pool.acquire() as conn:
        total_count = await conn.fetchval(count_query, *params[:param_count-2] if params else [])
        rows = await conn.fetch(query, *params)
    
    total_pages = (total_count + page_size - 1) // page_size
    
    features = []
    for row in rows:
        props = {
            "ghcid": row['ghcid'],
            "name": row['name'],
            "emic_name": row['emic_name'],
            "type": row['type'],
            "type_name": row['type_name'],
            "city": row['city'],
            "province": row['province'],
            "province_iso": row['province_iso'],
            "country_code": row['country_code'],
            "formatted_address": row['formatted_address'],
            "rating": float(row['rating']) if row['rating'] else None,
            "total_ratings": row['total_ratings'],
            "wikidata_id": row['wikidata_id'],
            "website": row['website'],
            "phone": row['phone'],
            "email": row['email'],
            "isil_code": row['isil_code'],
            "google_place_id": row['google_place_id'],
            "description": row['description'],
            "business_status": row['business_status'],
            "street_view_url": row['street_view_url'],
            "founding_year": row['founding_year'],
            "dissolution_year": row['dissolution_year'],
        }
        
        # Add JSONB fields if present
        if row['opening_hours']:
            props["opening_hours"] = row['opening_hours']
        if row['reviews']:
            props["reviews"] = row['reviews']
        if row['photos']:
            props["photos"] = row['photos']
        if row['photo_urls']:
            props["photo_urls"] = row['photo_urls']
        if row['temporal_extent']:
            props["temporal_extent"] = row['temporal_extent']
        if row['museum_register']:
            props["museum_register"] = row['museum_register']
        if row['youtube_enrichment']:
            props["youtube_enrichment"] = row['youtube_enrichment']
        elif row['youtube_channel_url']:
            props["youtube"] = {
                "channel_url": row['youtube_channel_url'],
                "subscriber_count": row['youtube_subscriber_count'],
                "video_count": row['youtube_video_count'],
            }
        
        social = {}
        if row['social_facebook']:
            social['facebook'] = row['social_facebook']
        if row['social_twitter']:
            social['twitter'] = row['social_twitter']
        if row['social_instagram']:
            social['instagram'] = row['social_instagram']
        if social:
            props["social_media"] = social
        
        if row['wikidata_label_en']:
            props["wikidata_label"] = row['wikidata_label_en']
        if row['wikidata_description_en']:
            props["wikidata_description"] = row['wikidata_description_en']
        
        features.append({
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [float(row['lon']), float(row['lat'])]
            },
            "properties": props
        })
    
    return {
        "type": "FeatureCollection",
        "features": features,
        "pagination": {
            "page": page,
            "page_size": page_size,
            "total_count": total_count,
            "total_pages": total_pages,
            "has_next": page < total_pages,
            "has_prev": page > 1,
            "next_page": page + 1 if page < total_pages else None,
            "prev_page": page - 1 if page > 1 else None,
        },
        "metadata": {
            "mode": "paginated",
            "filters": {"country": country, "type": type},
            "sort": {"field": sort_by, "order": sort_order}
        }
    }


@app.get("/institutions/viewport")
async def get_institutions_viewport(
    bbox: str = Query(..., description="Bounding box: minLon,minLat,maxLon,maxLat (REQUIRED)"),
    zoom: int = Query(10, ge=1, le=20, description="Map zoom level (affects detail)"),
    country: Optional[str] = Query(None, description="Filter by country code"),
    type: Optional[str] = Query(None, description="Filter by institution type"),
    limit: int = Query(2000, ge=100, le=10000, description="Maximum results for viewport"),
):
    """
    Get institutions visible in current map viewport.
    Returns lite data at low zoom, full data at high zoom.
    Optimized for map pan/zoom interactions.
    """
    pool = await get_pool()
    
    # Parse bbox
    try:
        min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid bbox format. Use: minLon,minLat,maxLon,maxLat")
    
    conditions = [
        "lat IS NOT NULL AND lon IS NOT NULL",
        f"lon >= $1 AND lat >= $2 AND lon <= $3 AND lat <= $4"
    ]
    params = [min_lon, min_lat, max_lon, max_lat]
    param_count = 4
    
    if country:
        param_count += 1
        conditions.append(f"country_code = ${param_count}")
        params.append(country.upper())
    
    if type:
        param_count += 1
        conditions.append(f"type = ${param_count}")
        params.append(type.upper())
    
    where_clause = " AND ".join(conditions)
    
    # Adaptive detail based on zoom level
    # Low zoom (world/continent view): minimal fields
    # Medium zoom (country view): basic fields  
    # High zoom (city view): full fields
    if zoom <= 6:
        # World/continent view - just markers
        select_fields = "ghcid, name, type, lon, lat, country_code"
        detail_level = "minimal"
    elif zoom <= 10:
        # Country view - basic info
        select_fields = "ghcid, name, type, type_name, lon, lat, city, country_code, rating"
        detail_level = "basic"
    elif zoom <= 14:
        # Region view - moderate detail
        select_fields = """
            ghcid, name, emic_name, type, type_name, lon, lat, city,
            region as province, country_code, rating, total_ratings,
            website, wikidata_id, description
        """
        detail_level = "moderate"
    else:
        # City/street view - full detail
        select_fields = """
            ghcid, name, emic_name, type, type_name, lon, lat, city,
            region as province, region_code as province_iso, country_code,
            formatted_address, street_address, postal_code, rating, total_ratings,
            wikidata_id, website, phone, email, isil_code, google_place_id,
            description, opening_hours, reviews, photos, photo_urls,
            business_status, street_view_url, founding_year, dissolution_year,
            social_facebook, social_twitter, social_instagram,
            wikidata_label_en, wikidata_description_en
        """
        detail_level = "full"
    
    param_count += 1
    query = f"""
        SELECT {select_fields}
        FROM custodians
        WHERE {where_clause}
        ORDER BY rating DESC NULLS LAST, name
        LIMIT ${param_count}
    """
    params.append(limit)
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, *params)
    
    features = []
    for row in rows:
        row_dict = dict(row)
        props = {"ghcid": row_dict['ghcid'], "name": row_dict['name'], "type": row_dict['type']}
        
        # Add fields based on detail level
        if 'type_name' in row_dict:
            props['type_name'] = row_dict['type_name']
        if 'city' in row_dict:
            props['city'] = row_dict['city']
        if 'country_code' in row_dict:
            props['country_code'] = row_dict['country_code']
        if 'rating' in row_dict and row_dict['rating']:
            props['rating'] = float(row_dict['rating'])
        if 'total_ratings' in row_dict:
            props['total_ratings'] = row_dict['total_ratings']
        if 'province' in row_dict:
            props['province'] = row_dict['province']
        if 'province_iso' in row_dict:
            props['province_iso'] = row_dict['province_iso']
        if 'emic_name' in row_dict:
            props['emic_name'] = row_dict['emic_name']
        if 'website' in row_dict:
            props['website'] = row_dict['website']
        if 'wikidata_id' in row_dict:
            props['wikidata_id'] = row_dict['wikidata_id']
        if 'description' in row_dict:
            props['description'] = row_dict['description']
        if 'formatted_address' in row_dict:
            props['formatted_address'] = row_dict['formatted_address']
        if 'street_address' in row_dict:
            props['street_address'] = row_dict['street_address']
        if 'postal_code' in row_dict:
            props['postal_code'] = row_dict['postal_code']
        if 'phone' in row_dict:
            props['phone'] = row_dict['phone']
        if 'email' in row_dict:
            props['email'] = row_dict['email']
        if 'isil_code' in row_dict:
            props['isil_code'] = row_dict['isil_code']
        if 'google_place_id' in row_dict:
            props['google_place_id'] = row_dict['google_place_id']
        if 'business_status' in row_dict:
            props['business_status'] = row_dict['business_status']
        if 'street_view_url' in row_dict:
            props['street_view_url'] = row_dict['street_view_url']
        if 'founding_year' in row_dict:
            props['founding_year'] = row_dict['founding_year']
        if 'dissolution_year' in row_dict:
            props['dissolution_year'] = row_dict['dissolution_year']
        
        # JSONB fields at full detail
        if 'opening_hours' in row_dict and row_dict['opening_hours']:
            props['opening_hours'] = row_dict['opening_hours']
        if 'reviews' in row_dict and row_dict['reviews']:
            props['reviews'] = row_dict['reviews']
        if 'photos' in row_dict and row_dict['photos']:
            props['photos'] = row_dict['photos']
        if 'photo_urls' in row_dict and row_dict['photo_urls']:
            props['photo_urls'] = row_dict['photo_urls']
        
        # Social media at full detail
        social = {}
        if 'social_facebook' in row_dict and row_dict['social_facebook']:
            social['facebook'] = row_dict['social_facebook']
        if 'social_twitter' in row_dict and row_dict['social_twitter']:
            social['twitter'] = row_dict['social_twitter']
        if 'social_instagram' in row_dict and row_dict['social_instagram']:
            social['instagram'] = row_dict['social_instagram']
        if social:
            props['social_media'] = social
        
        if 'wikidata_label_en' in row_dict and row_dict['wikidata_label_en']:
            props['wikidata_label'] = row_dict['wikidata_label_en']
        if 'wikidata_description_en' in row_dict and row_dict['wikidata_description_en']:
            props['wikidata_description'] = row_dict['wikidata_description_en']
        
        features.append({
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [float(row_dict['lon']), float(row_dict['lat'])]
            },
            "properties": props
        })
    
    return {
        "type": "FeatureCollection",
        "features": features,
        "metadata": {
            "count": len(features),
            "mode": "viewport",
            "detail_level": detail_level,
            "zoom": zoom,
            "bbox": {"min_lon": min_lon, "min_lat": min_lat, "max_lon": max_lon, "max_lat": max_lat},
            "filters": {"country": country, "type": type},
            "limit": limit,
            "truncated": len(features) >= limit
        }
    }


@app.get("/institutions/cluster")
async def get_institutions_clustered(
    bbox: str = Query(..., description="Bounding box: minLon,minLat,maxLon,maxLat"),
    zoom: int = Query(5, ge=1, le=20, description="Map zoom level"),
    grid_size: Optional[float] = Query(None, description="Grid cell size in degrees (auto if not set)"),
):
    """
    Get clustered institution counts for overview maps.
    Returns grid cells with counts instead of individual markers.
    Ideal for world/country views with 10k+ institutions.
    """
    pool = await get_pool()
    
    try:
        min_lon, min_lat, max_lon, max_lat = map(float, bbox.split(','))
    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid bbox format")
    
    # Auto-calculate grid size based on zoom
    # Higher zoom = smaller grid cells = more detail
    if grid_size is None:
        if zoom <= 3:
            grid_size = 10.0  # ~1000km cells
        elif zoom <= 5:
            grid_size = 5.0   # ~500km cells
        elif zoom <= 7:
            grid_size = 2.0   # ~200km cells
        elif zoom <= 9:
            grid_size = 1.0   # ~100km cells
        elif zoom <= 11:
            grid_size = 0.5   # ~50km cells
        elif zoom <= 13:
            grid_size = 0.2   # ~20km cells
        else:
            grid_size = 0.1   # ~10km cells
    
    # Use PostgreSQL to cluster into grid cells
    query = """
        SELECT 
            FLOOR(lon / $5) * $5 + $5/2 as cell_lon,
            FLOOR(lat / $5) * $5 + $5/2 as cell_lat,
            COUNT(*) as count,
            array_agg(DISTINCT type) as types,
            AVG(rating) as avg_rating
        FROM custodians
        WHERE lat IS NOT NULL AND lon IS NOT NULL
          AND lon >= $1 AND lat >= $2 AND lon <= $3 AND lat <= $4
        GROUP BY FLOOR(lon / $5), FLOOR(lat / $5)
        ORDER BY count DESC
    """
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, min_lon, min_lat, max_lon, max_lat, grid_size)
    
    features = []
    total_count = 0
    for row in rows:
        count = row['count']
        total_count += count
        
        features.append({
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [float(row['cell_lon']), float(row['cell_lat'])]
            },
            "properties": {
                "cluster": True,
                "count": count,
                "types": row['types'],
                "avg_rating": round(float(row['avg_rating']), 2) if row['avg_rating'] else None,
            }
        })
    
    return {
        "type": "FeatureCollection",
        "features": features,
        "metadata": {
            "mode": "clustered",
            "cluster_count": len(features),
            "total_institutions": total_count,
            "grid_size": grid_size,
            "zoom": zoom,
            "bbox": {"min_lon": min_lon, "min_lat": min_lat, "max_lon": max_lon, "max_lat": max_lat}
        }
    }


# ============================================================================
# Person Endpoints (Beschermers)
# ============================================================================

@app.get("/persons", response_model=Dict[str, Any])
async def list_persons(
    custodian_slug: Optional[str] = Query(None, description="Filter by custodian slug"),
    heritage_type: Optional[str] = Query(None, description="Filter by heritage type (A, L, M, etc.)"),
    country_code: Optional[str] = Query(None, description="Filter by country code"),
    heritage_relevant: Optional[bool] = Query(None, description="Filter by heritage relevance (true/false)"),
    limit: int = Query(50, ge=1, le=500, description="Max results to return"),
    offset: int = Query(0, ge=0, description="Offset for pagination"),
):
    """
    List persons with optional filters.
    Returns paginated list of heritage professionals (beschermers).
    """
    pool = await get_pool()
    
    # Build query with optional filters
    conditions = []
    params = []
    param_idx = 1
    
    if custodian_slug:
        conditions.append(f"custodian_slug = ${param_idx}")
        params.append(custodian_slug)
        param_idx += 1
    
    if heritage_type:
        conditions.append(f"${param_idx} = ANY(heritage_types)")
        params.append(heritage_type)
        param_idx += 1
    
    if country_code:
        conditions.append(f"country_code = ${param_idx}")
        params.append(country_code)
        param_idx += 1
    
    if heritage_relevant is not None:
        conditions.append(f"heritage_relevant = ${param_idx}")
        params.append(heritage_relevant)
        param_idx += 1
    
    where_clause = f"WHERE {' AND '.join(conditions)}" if conditions else ""
    
    # Add pagination params
    params.extend([limit, offset])
    
    query = f"""
        SELECT staff_id, name, headline, location, country_code,
               custodian_slug, custodian_name, linkedin_url, profile_image_url,
               heritage_relevant, heritage_types
        FROM persons
        {where_clause}
        ORDER BY name
        LIMIT ${param_idx} OFFSET ${param_idx + 1}
    """
    
    # Count query
    count_query = f"SELECT COUNT(*) FROM persons {where_clause}"
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, *params)
        count_params = params[:-2] if params else []  # Remove limit/offset for count
        total = await conn.fetchval(count_query, *count_params) if count_params else await conn.fetchval(count_query)
    
    persons = []
    for row in rows:
        persons.append(PersonSummary(
            staff_id=row['staff_id'],
            name=row['name'],
            headline=row['headline'],
            location=row['location'],
            country_code=row['country_code'],
            custodian_slug=row['custodian_slug'],
            custodian_name=row['custodian_name'],
            linkedin_url=row['linkedin_url'],
            profile_image_url=row['profile_image_url'],
            heritage_relevant=row['heritage_relevant'] if row['heritage_relevant'] is not None else True,
            heritage_types=row['heritage_types'] if row['heritage_types'] else [],
        ))
    
    return {
        "persons": [p.model_dump() for p in persons],
        "total": total,
        "limit": limit,
        "offset": offset,
        "has_more": offset + len(persons) < total,
    }


@app.get("/persons/count")
async def get_persons_count():
    """Get total person count for stats display."""
    pool = await get_pool()
    
    async with pool.acquire() as conn:
        total = await conn.fetchval("SELECT COUNT(*) FROM persons")
        heritage_relevant = await conn.fetchval("SELECT COUNT(*) FROM persons WHERE heritage_relevant = true")
    
    return {
        "total": total,
        "heritage_relevant": heritage_relevant,
    }


@app.get("/persons/search", response_model=Dict[str, Any])
async def search_persons(
    q: str = Query(..., min_length=2, description="Search query"),
    limit: int = Query(20, ge=1, le=100, description="Max results"),
):
    """
    Search persons by name, headline, or custodian name.
    Uses PostgreSQL full-text search.
    """
    pool = await get_pool()
    
    # Use ILIKE for simple search (full-text search can be added later if index exists)
    search_pattern = f"%{q}%"
    
    query = """
        SELECT staff_id, name, headline, location, country_code,
               custodian_slug, custodian_name, linkedin_url, profile_image_url,
               heritage_relevant, heritage_types
        FROM persons
        WHERE name ILIKE $1 
           OR headline ILIKE $1 
           OR custodian_name ILIKE $1
        ORDER BY 
            CASE WHEN name ILIKE $2 THEN 0 ELSE 1 END,
            name
        LIMIT $3
    """
    
    async with pool.acquire() as conn:
        rows = await conn.fetch(query, search_pattern, f"{q}%", limit)
    
    persons = []
    for row in rows:
        persons.append(PersonSummary(
            staff_id=row['staff_id'],
            name=row['name'],
            headline=row['headline'],
            location=row['location'],
            country_code=row['country_code'],
            custodian_slug=row['custodian_slug'],
            custodian_name=row['custodian_name'],
            linkedin_url=row['linkedin_url'],
            profile_image_url=row['profile_image_url'],
            heritage_relevant=row['heritage_relevant'] if row['heritage_relevant'] is not None else True,
            heritage_types=row['heritage_types'] if row['heritage_types'] else [],
        ))
    
    return {
        "persons": [p.model_dump() for p in persons],
        "count": len(persons),
        "query": q,
    }


@app.get("/person/{staff_id}", response_model=PersonDetail)
async def get_person(staff_id: str):
    """Get detailed information for a single person."""
    pool = await get_pool()
    
    query = """
        SELECT staff_id, name, headline, location, country_code,
               custodian_slug, custodian_name, linkedin_url, profile_image_url,
               heritage_relevant, heritage_types, experience, education,
               skills, languages, about, connections,
               extraction_date, extraction_method, source_file
        FROM persons
        WHERE staff_id = $1
    """
    
    async with pool.acquire() as conn:
        row = await conn.fetchrow(query, staff_id)
    
    if not row:
        raise HTTPException(status_code=404, detail=f"Person not found: {staff_id}")
    
    return PersonDetail(
        staff_id=row['staff_id'],
        name=row['name'],
        headline=row['headline'],
        location=row['location'],
        country_code=row['country_code'],
        custodian_slug=row['custodian_slug'],
        custodian_name=row['custodian_name'],
        linkedin_url=row['linkedin_url'],
        profile_image_url=row['profile_image_url'],
        heritage_relevant=row['heritage_relevant'] if row['heritage_relevant'] is not None else True,
        heritage_types=row['heritage_types'] if row['heritage_types'] else [],
        experience=row['experience'] if row['experience'] else [],
        education=row['education'] if row['education'] else [],
        skills=row['skills'] if row['skills'] else [],
        languages=row['languages'] if row['languages'] else [],
        about=row['about'],
        connections=row['connections'],
        extraction_date=row['extraction_date'].isoformat() if row['extraction_date'] else None,
        extraction_method=row['extraction_method'],
        source_file=row['source_file'],
    )


# ============================================================================
# Main
# ============================================================================

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "geo_api:app",
        host=settings.api_host,
        port=settings.api_port,
        reload=True,
    )