glam/backend/typedb/main.py

"""
TypeDB REST API for Heritage Custodian Data
FastAPI backend providing TypeQL query interface for bronhouder.nl

Updated for TypeDB 3.x driver API (no sessions, direct transactions)

Endpoints:
- GET  /              - Health check and statistics
- GET  /status        - Server status
- POST /query         - Execute TypeQL match query (read-only)
- GET  /databases     - List all databases
- POST /databases/{n} - Create new database
- GET  /schema        - Get database schema types
- POST /schema/load   - Load TypeQL schema from file
- POST /data/insert   - Execute insert query
- POST /database/reset/{n} - Reset (delete/recreate) database
- GET  /stats         - Get detailed statistics
- POST /sync/postgresql - Sync data from PostgreSQL to TypeDB
"""

import os
import re
from datetime import datetime
from typing import Optional, List, Dict, Any
from contextlib import asynccontextmanager
import asyncio
from concurrent.futures import ThreadPoolExecutor

from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field

# TypeDB 3.x imports
from typedb.driver import TypeDB, TransactionType, Credentials, DriverOptions

# PostgreSQL for sync
try:
    import psycopg2
    import psycopg2.extras
    HAS_PSYCOPG2 = True
except ImportError:
    HAS_PSYCOPG2 = False


# ============================================================================
# Configuration
# ============================================================================

class Settings(BaseModel):
    """TypeDB server settings"""
    host: str = os.getenv("TYPEDB_HOST", "localhost")
    port: int = int(os.getenv("TYPEDB_PORT", "1729"))
    database: str = os.getenv("TYPEDB_DATABASE", "glam")

    # TypeDB 3.x authentication
    username: str = os.getenv("TYPEDB_USERNAME", "admin")
    password: str = os.getenv("TYPEDB_PASSWORD", "password")

    # Server settings
    api_host: str = os.getenv("API_HOST", "0.0.0.0")
    api_port: int = int(os.getenv("API_PORT", "8003"))

    # PostgreSQL settings for sync (glam_geo database with custodians table)
    pg_host: str = os.getenv("POSTGRES_HOST", "localhost")
    pg_port: int = int(os.getenv("POSTGRES_PORT", "5432"))
    pg_database: str = os.getenv("POSTGRES_DATABASE", "glam_geo")
    pg_user: str = os.getenv("POSTGRES_USER", "glam_api")
    pg_password: str = os.getenv("POSTGRES_PASSWORD", "glam_secret_2025")


settings = Settings()


# ============================================================================
# Pydantic Models
# ============================================================================

class QueryRequest(BaseModel):
    """TypeQL query request"""
    query: str = Field(..., description="TypeQL query to execute")
    database: Optional[str] = Field(None, description="Database name (defaults to 'glam')")


class QueryResponse(BaseModel):
    """TypeQL query response"""
    results: List[Dict[str, Any]]
    result_count: int
    execution_time_ms: float
    query_type: str


class DatabaseInfo(BaseModel):
    """Database metadata"""
    name: str


class StatusResponse(BaseModel):
    """Server status response"""
    status: str
    databases: List[str]
    default_database: str
    uptime_seconds: float
    typedb_version: str
    # Fields for frontend compatibility
    connected: bool = False
    database: str = ""
    version: str = ""


# ============================================================================
# Global State
# ============================================================================

_driver: Any = None
_executor = ThreadPoolExecutor(max_workers=4)
_start_time: datetime = datetime.now()


def get_driver() -> Any:
    """Get or create TypeDB 3.x driver"""
    global _driver

    if _driver is None:
        # TypeDB 3.x: requires Credentials and DriverOptions
        address = f"{settings.host}:{settings.port}"
        credentials = Credentials(settings.username, settings.password)
        options = DriverOptions(is_tls_enabled=False, tls_root_ca_path=None)

        _driver = TypeDB.driver(address, credentials, options)

    return _driver


def close_driver():
    """Close the driver connection"""
    global _driver
    if _driver is not None:
        _driver.close()
        _driver = None


# ============================================================================
# Helper Functions for TypeDB 3.x
# ============================================================================

def serialize_concept_3x(concept: Any) -> Dict[str, Any]:
    """Convert TypeDB 3.x concept to JSON-serializable dict"""
    result: Dict[str, Any] = {}

    # Check if it's already a dict (concept document)
    if isinstance(concept, dict):
        return concept

    # Get type information
    if hasattr(concept, 'get_type'):
        concept_type = concept.get_type()
        if concept_type:
            if hasattr(concept_type, 'get_label'):
                label = concept_type.get_label()
                result['_type'] = str(label)
                result['type'] = str(label)

    # Handle IID (Internal ID)
    if hasattr(concept, 'get_iid'):
        iid = concept.get_iid()
        if iid is not None:
            result['_iid'] = str(iid)
            result['id'] = str(iid)

    # Handle value (for attributes)
    if hasattr(concept, 'get_value'):
        try:
            result['value'] = concept.get_value()
        except Exception:
            pass

    # Try to get label directly (for type concepts)
    if hasattr(concept, 'get_label'):
        try:
            result['label'] = str(concept.get_label())
        except Exception:
            pass

    return result


def execute_read_query(database: str, query: str) -> tuple:
    """Execute a read query in TypeDB 3.x (blocking)

    TypeDB 3.x query patterns:
    - match $x isa entity-type; -> returns _ConceptRowIterator
    - match $x isa type; fetch { "key": $x.attr }; -> returns _ConceptDocumentIterator
    """
    driver = get_driver()
    results: List[Dict[str, Any]] = []
    query_type = "unknown"

    # Determine query type
    query_stripped = query.strip().lower()
    if "fetch" in query_stripped:
        query_type = "fetch"
    elif query_stripped.startswith("match"):
        query_type = "match"
    elif query_stripped.startswith("define"):
        query_type = "define"
    elif query_stripped.startswith("insert"):
        query_type = "insert"

    # TypeDB 3.x: Transaction directly on driver, no session
    with driver.transaction(database, TransactionType.READ) as tx:
        # Execute query - TypeDB 3.x uses tx.query().resolve()
        answer = tx.query(query).resolve()

        # Handle different result types based on query type
        if query_type == "fetch":
            # Fetch query results return ConceptDocumentIterator
            try:
                for doc in answer.as_concept_documents():
                    results.append(doc)
            except Exception as e:
                # Fallback: try to iterate directly
                try:
                    for item in answer:
                        if isinstance(item, dict):
                            results.append(item)
                        else:
                            results.append({"result": str(item)})
                except Exception:
                    results.append({"error": str(e)})
        else:
            # Match query results return ConceptRowIterator
            try:
                for row in answer.as_concept_rows():
                    row_dict = {}
                    # Get column names - returns an iterator in TypeDB 3.x
                    col_names = list(row.column_names())
                    for var in col_names:
                        concept = row.get(var)
                        if concept:
                            row_dict[var] = serialize_concept_3x(concept)
                    results.append(row_dict)
            except Exception as e:
                # If as_concept_rows fails, try other methods
                try:
                    for item in answer:
                        results.append({"result": str(item)})
                except Exception:
                    results.append({"error": str(e)})

    return results, query_type


def get_databases() -> List[str]:
    """Get list of databases"""
    driver = get_driver()
    return [db.name for db in driver.databases.all()]


def database_exists(database: str) -> bool:
    """Check if database exists"""
    driver = get_driver()
    return driver.databases.contains(database)


def get_schema_types(database: str) -> Dict[str, Any]:
    """Get schema types from database using TypeDB 3.x TypeQL queries"""
    driver = get_driver()
    schema: Dict[str, Any] = {"entity_types": [], "relation_types": [], "attribute_types": []}

    try:
        # TypeDB 3.x: Use SCHEMA transaction type
        with driver.transaction(database, TransactionType.SCHEMA) as tx:
            # Query for entity types using TypeDB 3.x syntax
            # In TypeDB 3.x, use: match entity $x;
            try:
                result = tx.query("match entity $x;").resolve()
                for row in result.as_concept_rows():
                    concept = row.get('x')
                    if concept and hasattr(concept, 'get_label'):
                        label = str(concept.get_label())
                        if label != 'entity':
                            schema["entity_types"].append(label)
            except Exception as e:
                print(f"Error getting entity types: {e}")

            # Query for relation types
            try:
                result = tx.query("match relation $x;").resolve()
                for row in result.as_concept_rows():
                    concept = row.get('x')
                    if concept and hasattr(concept, 'get_label'):
                        label = str(concept.get_label())
                        if label != 'relation':
                            schema["relation_types"].append(label)
            except Exception as e:
                print(f"Error getting relation types: {e}")

            # Query for attribute types
            try:
                result = tx.query("match attribute $x;").resolve()
                for row in result.as_concept_rows():
                    concept = row.get('x')
                    if concept and hasattr(concept, 'get_label'):
                        label = str(concept.get_label())
                        if label != 'attribute':
                            schema["attribute_types"].append(label)
            except Exception as e:
                print(f"Error getting attribute types: {e}")

    except Exception as e:
        schema["error"] = str(e)

    return schema


def get_stats_data(database: str) -> Dict[str, Any]:
    """Get database statistics including entity/relation/attribute counts"""
    driver = get_driver()
    stats: Dict[str, Any] = {
        "database": database,
        "entity_types": [],
        "relation_types": [],
        "attribute_types": [],
        "total_entities": 0,
        "total_relations": 0,
    }

    # Check if database exists
    if not driver.databases.contains(database):
        stats["error"] = f"Database '{database}' not found"
        return stats

    # Get schema types using SCHEMA transaction
    try:
        with driver.transaction(database, TransactionType.SCHEMA) as tx:
            # Get entity types
            try:
                result = tx.query("match entity $x;").resolve()
                for row in result.as_concept_rows():
                    concept = row.get('x')
                    if concept:
                        label = str(concept.get_label()) if hasattr(concept, 'get_label') else str(concept)
                        if label != 'entity':
                            is_abstract = False
                            if hasattr(concept, 'is_abstract'):
                                is_abstract = concept.is_abstract()
                            stats["entity_types"].append({
                                "label": label,
                                "abstract": is_abstract,
                                "count": 0
                            })
            except Exception as e:
                print(f"Error getting entity types: {e}")

            # Get relation types
            try:
                result = tx.query("match relation $x;").resolve()
                for row in result.as_concept_rows():
                    concept = row.get('x')
                    if concept:
                        label = str(concept.get_label()) if hasattr(concept, 'get_label') else str(concept)
                        if label != 'relation':
                            stats["relation_types"].append({
                                "label": label,
                                "roles": [],
                                "count": 0
                            })
            except Exception as e:
                print(f"Error getting relation types: {e}")

            # Get attribute types
            try:
                result = tx.query("match attribute $x;").resolve()
                for row in result.as_concept_rows():
                    concept = row.get('x')
                    if concept:
                        label = str(concept.get_label()) if hasattr(concept, 'get_label') else str(concept)
                        if label != 'attribute':
                            stats["attribute_types"].append({
                                "label": label,
                                "valueType": "unknown",
                                "count": 0
                            })
            except Exception as e:
                print(f"Error getting attribute types: {e}")
    except Exception as e:
        stats["schema_error"] = str(e)

    # Count instances using READ transaction
    try:
        with driver.transaction(database, TransactionType.READ) as tx:
            for et in stats["entity_types"]:
                if et.get("abstract"):
                    et["count"] = 0
                    continue
                try:
                    label = et["label"]
                    # Count query for this entity type
                    result = tx.query(f"match $x isa {label};").resolve()
                    count = sum(1 for _ in result.as_concept_rows())
                    et["count"] = count
                    stats["total_entities"] += count
                except Exception as e:
                    print(f"Error counting {label}: {e}")
                    et["count"] = 0

            for rt in stats["relation_types"]:
                try:
                    label = rt["label"]
                    result = tx.query(f"match $x isa {label};").resolve()
                    count = sum(1 for _ in result.as_concept_rows())
                    rt["count"] = count
                    stats["total_relations"] += count
                except Exception:
                    rt["count"] = 0

            for at in stats["attribute_types"]:
                try:
                    label = at["label"]
                    result = tx.query(f"match $x isa {label};").resolve()
                    count = sum(1 for _ in result.as_concept_rows())
                    at["count"] = count
                except Exception:
                    at["count"] = 0
    except Exception as e:
        stats["data_error"] = str(e)

    return stats


# ============================================================================
# FastAPI App
# ============================================================================

@asynccontextmanager
async def lifespan(app: FastAPI):
    """Application lifespan handler"""
    # Startup: Initialize driver
    try:
        get_driver()
    except Exception as e:
        print(f"Warning: Could not connect to TypeDB on startup: {e}")
    yield
    # Shutdown: Close driver
    close_driver()
    _executor.shutdown(wait=True)


app = FastAPI(
    title="TypeDB Heritage API",
    description="REST API for heritage institution TypeQL queries (TypeDB 3.x)",
    version="3.0.0",
    lifespan=lifespan,
)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Configure for production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# ============================================================================
# Schema and Data Loading Functions
# ============================================================================

def load_schema_from_file(database: str, filepath: str) -> Dict[str, Any]:
    """Load TypeQL schema from a .tql file"""
    driver = get_driver()

    # Read the schema file
    with open(filepath, 'r', encoding='utf-8') as f:
        schema_content = f.read()

    # TypeDB 3.x: Schema operations use SCHEMA transaction type
    with driver.transaction(database, TransactionType.SCHEMA) as tx:
        tx.query(schema_content).resolve()
        tx.commit()

    return {"status": "success", "file": filepath, "database": database}


def execute_write_query(database: str, query: str) -> Dict[str, Any]:
    """Execute a write query (insert/delete) in TypeDB 3.x"""
    driver = get_driver()
    result: Dict[str, Any] = {"status": "success", "inserted": 0}

    # TypeDB 3.x: Data operations use WRITE transaction type
    with driver.transaction(database, TransactionType.WRITE) as tx:
        answer = tx.query(query).resolve()

        # Try to count results
        try:
            if hasattr(answer, 'as_concept_rows'):
                count = sum(1 for _ in answer.as_concept_rows())
                result["inserted"] = count
            elif hasattr(answer, '__iter__'):
                count = sum(1 for _ in answer)
                result["inserted"] = count
        except Exception:
            result["inserted"] = 1  # Assume at least one insertion

        tx.commit()

    return result


def reset_database(database: str) -> Dict[str, Any]:
    """Delete and recreate a database"""
    driver = get_driver()

    # Delete if exists
    try:
        if driver.databases.contains(database):
            driver.databases.get(database).delete()
    except Exception:
        pass

    # Create new
    driver.databases.create(database)

    return {"status": "success", "database": database, "action": "reset"}


# ============================================================================
# API Endpoints
# ============================================================================

@app.get("/", response_model=StatusResponse)
async def get_root_status() -> StatusResponse:
    """Get server status and statistics (root endpoint)"""
    return await get_status()


@app.get("/status")
async def get_status() -> StatusResponse:
    """Get server status and statistics"""
    loop = asyncio.get_event_loop()

    databases = []
    connected = False
    version = "3.7.0"

    try:
        databases = await loop.run_in_executor(_executor, get_databases)
        connected = True
    except Exception as e:
        print(f"Status check error: {e}")
        connected = False

    uptime = (datetime.now() - _start_time).total_seconds()

    return StatusResponse(
        status="healthy" if connected else "error",
        databases=databases,
        default_database=settings.database,
        uptime_seconds=uptime,
        typedb_version=version,
        connected=connected,
        database=settings.database,
        version=version,
    )


@app.post("/query", response_model=QueryResponse)
async def execute_query(request: QueryRequest) -> QueryResponse:
    """Execute a TypeQL query (read-only)

    Supports both match and fetch queries:
    - match $x isa custodian-observation;
    - match $x isa custodian-observation; fetch { "id": $x.id, "name": $x.observed-name };
    """
    loop = asyncio.get_event_loop()
    database = request.database or settings.database

    # Security: Only allow match/fetch queries for now
    query_stripped = request.query.strip().lower()
    if not (query_stripped.startswith("match") or query_stripped.startswith("fetch")):
        raise HTTPException(
            status_code=403,
            detail="Only match/fetch queries are allowed for read operations."
        )

    start_time = datetime.now()

    try:
        results, query_type = await loop.run_in_executor(
            _executor,
            execute_read_query,
            database,
            request.query
        )

        execution_time = (datetime.now() - start_time).total_seconds() * 1000

        return QueryResponse(
            results=results,
            result_count=len(results),
            execution_time_ms=round(execution_time, 2),
            query_type=query_type,
        )

    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.get("/databases", response_model=List[DatabaseInfo])
async def list_databases() -> List[DatabaseInfo]:
    """List all databases"""
    loop = asyncio.get_event_loop()

    try:
        databases = await loop.run_in_executor(_executor, get_databases)
        return [DatabaseInfo(name=db) for db in databases]
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/schema")
async def get_schema(database: Optional[str] = None) -> Dict[str, Any]:
    """Get schema for a database"""
    loop = asyncio.get_event_loop()
    db = database or settings.database

    try:
        # Check database exists
        databases = await loop.run_in_executor(_executor, get_databases)
        if db not in databases:
            raise HTTPException(status_code=404, detail=f"Database '{db}' not found")

        schema = await loop.run_in_executor(_executor, get_schema_types, db)
        return {"database": db, "schema": schema}

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/databases/{name}")
async def create_database(name: str) -> Dict[str, str]:
    """Create a new database"""
    loop = asyncio.get_event_loop()

    def _create_db() -> str:
        driver = get_driver()
        driver.databases.create(name)
        return name

    try:
        await loop.run_in_executor(_executor, _create_db)
        return {"status": "created", "database": name}
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.post("/schema/load")
async def load_schema(filepath: str = "/var/www/backend/typedb/01_custodian_name.tql", database: Optional[str] = None) -> Dict[str, Any]:
    """Load TypeQL schema from a file on the server"""
    loop = asyncio.get_event_loop()
    db = database or settings.database

    # Security: Only allow files in the backend directory
    if not filepath.startswith("/var/www/backend/typedb/"):
        raise HTTPException(status_code=403, detail="Schema files must be in /var/www/backend/typedb/")

    try:
        result = await loop.run_in_executor(_executor, load_schema_from_file, db, filepath)
        return result
    except FileNotFoundError:
        raise HTTPException(status_code=404, detail=f"Schema file not found: {filepath}")
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.post("/data/insert")
async def insert_data(request: QueryRequest) -> Dict[str, Any]:
    """Execute an insert query to add data"""
    loop = asyncio.get_event_loop()
    database = request.database or settings.database

    # Security: Only allow insert queries
    query_stripped = request.query.strip().lower()
    if not (query_stripped.startswith("insert") or (query_stripped.startswith("match") and "insert" in query_stripped)):
        raise HTTPException(
            status_code=403,
            detail="Only insert queries are allowed for this endpoint."
        )

    start_time = datetime.now()

    try:
        result = await loop.run_in_executor(_executor, execute_write_query, database, request.query)
        execution_time = (datetime.now() - start_time).total_seconds() * 1000
        result["execution_time_ms"] = round(execution_time, 2)
        return result
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.post("/database/reset/{name}")
async def reset_db(name: str) -> Dict[str, Any]:
    """Reset (delete and recreate) a database"""
    loop = asyncio.get_event_loop()

    try:
        result = await loop.run_in_executor(_executor, reset_database, name)
        return result
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.get("/stats")
async def get_stats(database: Optional[str] = None) -> Dict[str, Any]:
    """Get database statistics including entity/relation/attribute counts"""
    loop = asyncio.get_event_loop()
    db = database or settings.database

    try:
        stats = await loop.run_in_executor(_executor, get_stats_data, db)
        return stats
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/graph/{entity_type}")
async def get_graph_data(entity_type: str, limit: int = 100, database: Optional[str] = None) -> Dict[str, Any]:
    """Get graph data for visualization (nodes + edges) for a specific entity type

    Uses TypeDB 3.x fetch query to get entities with their attributes.
    For custodian-observation entities, fetches all key attributes.
    """
    loop = asyncio.get_event_loop()
    db = database or settings.database

    # Key attributes to fetch for custodian-observation (in priority order)
    # These are the most commonly populated and useful for display
    CUSTODIAN_ATTRS = [
        ('id', 'id'),
        ('observed-name', 'name'),
        ('city', 'city'),
        ('country-code', 'country'),
        ('institution-type', 'institutionType'),
        ('latitude', 'lat'),
        ('longitude', 'lon'),
        ('region', 'region'),
        ('region-code', 'regionCode'),
        ('wikidata-id', 'wikidataId'),
        ('isil-code', 'isilCode'),
        ('website', 'website'),
        ('street-address', 'address'),
        ('description', 'description'),
        ('rating', 'rating'),
    ]

    def _get_graph() -> Dict[str, Any]:
        driver = get_driver()
        nodes: List[Dict[str, Any]] = []
        edges: List[Dict[str, Any]] = []

        # Check if database exists
        if not driver.databases.contains(db):
            return {"nodes": [], "edges": [], "nodeCount": 0, "edgeCount": 0, "error": f"Database '{db}' not found"}

        with driver.transaction(db, TransactionType.READ) as tx:
            # For custodian-observation, fetch key attributes efficiently
            if entity_type == "custodian-observation":
                try:
                    # Build a fetch query that gets entities with their key attributes
                    # First get all IDs
                    id_query = f"match $x isa {entity_type}, has id $id; limit {limit};"
                    id_result = tx.query(id_query).resolve()

                    entity_ids = []
                    for row in id_result.as_concept_rows():
                        concept = row.get('id')
                        if concept and hasattr(concept, 'get_value'):
                            entity_ids.append(concept.get_value())

                    # For each entity, fetch all its attributes using a single query
                    for eid in entity_ids:
                        # Escape the ID for the query
                        eid_escaped = eid.replace('"', '\\"')

                        # Get all attributes for this entity
                        attrs = {"id": eid, "type": entity_type}

                        # Query each key attribute individually (more reliable than trying optional blocks)
                        for attr_name, json_key in CUSTODIAN_ATTRS:
                            if attr_name == 'id':
                                continue  # Already have id
                            try:
                                attr_query = f'match $x isa {entity_type}, has id "{eid_escaped}", has {attr_name} $v; fetch {{ "v": $v }};'
                                attr_result = tx.query(attr_query).resolve()
                                for doc in attr_result.as_concept_documents():
                                    val = doc.get('v')
                                    if val is not None:
                                        attrs[json_key] = val
                                        break  # Take first value
                            except Exception:
                                pass  # Attribute not present for this entity

                        # Use name as label if available, otherwise use id
                        label = attrs.get('name', eid)
                        if isinstance(label, str) and len(label) > 40:
                            label = label[:37] + "..."

                        nodes.append({
                            "id": eid,
                            "label": label,
                            "type": "entity",
                            "entityType": entity_type,
                            "attributes": attrs,
                        })

                except Exception as e:
                    print(f"Error with custodian-observation query: {e}")
                    # Fallback to simple ID-only query
                    nodes = _fallback_query(tx, entity_type, limit)
            else:
                # For other entity types, use a simpler approach
                nodes = _fallback_query(tx, entity_type, limit)

        return {
            "nodes": nodes,
            "edges": edges,
            "nodeCount": len(nodes),
            "edgeCount": len(edges),
        }

    def _fallback_query(tx, entity_type: str, limit: int) -> List[Dict[str, Any]]:
        """Fallback: simple match query returning just IDs"""
        nodes = []
        try:
            result = tx.query(f"match $x isa {entity_type}, has id $id; limit {limit};").resolve()
            count = 0
            for row in result.as_concept_rows():
                if count >= limit:
                    break
                concept = row.get('id')
                if concept and hasattr(concept, 'get_value'):
                    node_id = concept.get_value()
                    nodes.append({
                        "id": node_id,
                        "label": node_id[:30],
                        "type": "entity",
                        "entityType": entity_type,
                        "attributes": {"id": node_id, "type": entity_type},
                    })
                    count += 1
        except Exception as e:
            print(f"Fallback query failed: {e}")
        return nodes

    try:
        result = await loop.run_in_executor(_executor, _get_graph)
        return result
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ============================================================================
# RAG-Optimized Search Endpoint
# ============================================================================

class RAGSearchRequest(BaseModel):
    """Request for RAG-optimized semantic search"""
    query: str = Field(..., description="Natural language search query")
    search_type: str = Field("semantic", description="Search type: semantic, name, type, location, id")
    limit: int = Field(10, ge=1, le=100, description="Maximum results to return")
    include_coordinates: bool = Field(True, description="Include lat/lon in results")


class RAGSearchResult(BaseModel):
    """Single search result optimized for RAG context"""
    id: str
    name: str
    city: str | None = None
    country: str | None = None
    institution_type: str | None = None
    lat: float | None = None
    lon: float | None = None
    wikidata_id: str | None = None
    description: str | None = None
    relevance_score: float = 1.0


class RAGSearchResponse(BaseModel):
    """Response for RAG search endpoint"""
    results: List[RAGSearchResult]
    total_count: int
    query_time_ms: float
    search_type: str
    query: str


@app.post("/rag/search", response_model=RAGSearchResponse)
async def rag_search(request: RAGSearchRequest, database: Optional[str] = None) -> RAGSearchResponse:
    """Fast search endpoint optimized for RAG context retrieval.

    Uses efficient batch queries to fetch essential attributes for RAG context.
    Supports multiple search strategies:

    - **semantic**: Natural language query (parses type + location patterns)
    - **name**: Search by institution name (partial match)
    - **type**: Search by institution type (M=museum, A=archive, L=library, G=gallery)
    - **location**: Search by city name
    - **id**: Search by exact GHCID

    Examples:
        {"query": "museums in Amsterdam", "search_type": "semantic"}
        {"query": "Rijksmuseum", "search_type": "name"}
        {"query": "M", "search_type": "type"}  # All museums
        {"query": "Rotterdam", "search_type": "location"}
        {"query": "NL-NH-AMS-M-RMA", "search_type": "id"}
    """
    import time
    start_time = time.time()

    loop = asyncio.get_event_loop()
    db = database or settings.database

    def _search() -> List[Dict[str, Any]]:
        driver = get_driver()
        results = []

        if not driver.databases.contains(db):
            return []

        query_text = request.query.strip()
        search_type = request.search_type.lower()
        limit = request.limit

        with driver.transaction(db, TransactionType.READ) as tx:
            # Build the appropriate query based on search type
            # All queries must include 'has id $id' for entity ID extraction
            if search_type == "id":
                # Exact ID match
                escaped_id = escape_typeql_string(query_text)
                match_query = f'match $x isa custodian-observation, has id $id; $id = "{escaped_id}"'

            elif search_type == "name":
                # Name contains search
                escaped_name = escape_typeql_string(query_text.lower())
                match_query = f'match $x isa custodian-observation, has observed-name $name, has id $id; $name contains "{escaped_name}"'

            elif search_type == "type":
                # Institution type filter
                type_code = query_text.upper()[:1]  # First letter only
                match_query = f'match $x isa custodian-observation, has institution-type "{type_code}", has id $id'

            elif search_type == "location":
                # City search
                escaped_city = escape_typeql_string(query_text.lower())
                match_query = f'match $x isa custodian-observation, has city $city, has id $id; $city contains "{escaped_city}"'

            else:  # semantic - parse natural language
                # Extract type and location from query
                query_lower = query_text.lower()

                # Detect institution type mentions
                type_code = None
                if any(w in query_lower for w in ['museum', 'musea', 'museums']):
                    type_code = 'M'
                elif any(w in query_lower for w in ['archive', 'archief', 'archives']):
                    type_code = 'A'
                elif any(w in query_lower for w in ['library', 'bibliotheek', 'libraries']):
                    type_code = 'L'
                elif any(w in query_lower for w in ['gallery', 'galerie', 'galleries']):
                    type_code = 'G'

                # Extract potential city names (simple heuristic: words after "in" or capitalized words)
                city_pattern = None
                if ' in ' in query_lower:
                    parts = query_lower.split(' in ')
                    if len(parts) > 1:
                        city_pattern = parts[-1].strip()

                # Build query - need to structure for adding id fetch
                # TypeQL requires: match clause; constraint; has id $id;
                if type_code and city_pattern:
                    escaped_city = escape_typeql_string(city_pattern)
                    match_query = f'match $x isa custodian-observation, has institution-type "{type_code}", has city $city, has id $id; $city contains "{escaped_city}"'
                elif type_code:
                    match_query = f'match $x isa custodian-observation, has institution-type "{type_code}", has id $id'
                elif city_pattern:
                    escaped_city = escape_typeql_string(city_pattern)
                    match_query = f'match $x isa custodian-observation, has city $city, has id $id; $city contains "{escaped_city}"'
                else:
                    # Fallback: search in name
                    escaped_q = escape_typeql_string(query_lower)
                    match_query = f'match $x isa custodian-observation, has observed-name $name, has id $id; $name contains "{escaped_q}"'

            # First get all matching entity IDs
            try:
                # Query already has $id variable, just add limit
                id_query = f"{match_query}; limit {limit};"
                id_result = tx.query(id_query).resolve()

                entity_ids = []
                for row in id_result.as_concept_rows():
                    concept = row.get('id')
                    if concept and hasattr(concept, 'get_value'):
                        entity_ids.append(concept.get_value())

                # Now fetch essential attributes for each entity in batch
                # This is faster than per-entity queries for small result sets
                for eid in entity_ids:
                    eid_escaped = eid.replace('"', '\\"')

                    # Fetch all key attributes in one query using fetch
                    try:
                        fetch_query = f'''
                            match $x isa custodian-observation, has id "{eid_escaped}";
                            $x has observed-name $name;
                            fetch {{
                                "id": $x.id,
                                "name": $name
                            }};
                        '''
                        # Note: This fetch approach may not work in TypeDB 3.x
                        # Fall back to individual attribute queries
                    except:
                        pass

                    # Individual attribute fetching (more reliable)
                    attrs = {"id": eid}

                    # Essential attributes for RAG context
                    essential_attrs = [
                        ('observed-name', 'name'),
                        ('city', 'city'),
                        ('country-code', 'country'),
                        ('institution-type', 'institution_type'),
                        ('wikidata-id', 'wikidata_id'),
                        ('description', 'description'),
                    ]

                    if request.include_coordinates:
                        essential_attrs.extend([
                            ('latitude', 'lat'),
                            ('longitude', 'lon'),
                        ])

                    for attr_name, json_key in essential_attrs:
                        try:
                            attr_query = f'match $x isa custodian-observation, has id "{eid_escaped}", has {attr_name} $v; fetch {{ "v": $v }};'
                            attr_result = tx.query(attr_query).resolve()
                            for doc in attr_result.as_concept_documents():
                                val = doc.get('v')
                                if val is not None:
                                    attrs[json_key] = val
                                    break
                        except Exception:
                            pass

                    results.append(attrs)

            except Exception as e:
                print(f"RAG search query failed: {e}")

        return results

    try:
        raw_results = await loop.run_in_executor(_executor, _search)

        # Convert to response model
        search_results = []
        for r in raw_results:
            search_results.append(RAGSearchResult(
                id=r.get('id', ''),
                name=r.get('name', r.get('id', 'Unknown')),
                city=r.get('city'),
                country=r.get('country'),
                institution_type=r.get('institution_type'),
                lat=r.get('lat'),
                lon=r.get('lon'),
                wikidata_id=r.get('wikidata_id'),
                description=r.get('description'),
                relevance_score=1.0,
            ))

        elapsed_ms = (time.time() - start_time) * 1000

        return RAGSearchResponse(
            results=search_results,
            total_count=len(search_results),
            query_time_ms=round(elapsed_ms, 2),
            search_type=request.search_type,
            query=request.query,
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ============================================================================
# PostgreSQL Sync Functions
# ============================================================================

def escape_typeql_string(value: str) -> str:
    """Escape a string for TypeQL - handle quotes and special characters"""
    if not value:
        return ""
    # Escape backslashes first, then double quotes
    escaped = value.replace("\\", "\\\\").replace('"', '\\"')
    # Remove or replace problematic characters
    escaped = re.sub(r'[\x00-\x1f\x7f]', '', escaped)  # Remove control characters
    return escaped


def get_pg_connection():
    """Get PostgreSQL connection"""
    if not HAS_PSYCOPG2:
        raise RuntimeError("psycopg2 not installed. Run: pip install psycopg2-binary")

    return psycopg2.connect(
        host=settings.pg_host,
        port=settings.pg_port,
        database=settings.pg_database,
        user=settings.pg_user,
        password=settings.pg_password,
    )


def sync_postgresql_to_typedb(
    database: str,
    batch_size: int = 100,
    dry_run: bool = False,
    clear_existing: bool = False,
) -> Dict[str, Any]:
    """Sync institutions from PostgreSQL to TypeDB as custodian-observations

    Args:
        database: TypeDB database name
        batch_size: Number of records to insert per transaction
        dry_run: If True, only count records without inserting
        clear_existing: If True, delete existing custodian-observations first

    Returns:
        Dict with sync statistics
    """
    driver = get_driver()
    result = {
        "status": "success",
        "database": database,
        "dry_run": dry_run,
        "total_in_postgresql": 0,
        "existing_in_typedb": 0,
        "inserted": 0,
        "skipped": 0,
        "errors": [],
    }

    # Get existing IDs from TypeDB
    existing_ids = set()
    try:
        with driver.transaction(database, TransactionType.READ) as tx:
            query_result = tx.query("match $x isa custodian-observation, has id $id;").resolve()
            for row in query_result.as_concept_rows():
                concept = row.get('id')
                if concept and hasattr(concept, 'get_value'):
                    existing_ids.add(concept.get_value())
            result["existing_in_typedb"] = len(existing_ids)
    except Exception as e:
        result["errors"].append(f"Error getting existing IDs: {e}")

    # Clear existing if requested
    if clear_existing and not dry_run:
        try:
            with driver.transaction(database, TransactionType.WRITE) as tx:
                tx.query("match $x isa custodian-observation; delete $x;").resolve()
                tx.commit()
            existing_ids = set()
            result["cleared_existing"] = True
        except Exception as e:
            result["errors"].append(f"Error clearing existing: {e}")

    # Get institutions from PostgreSQL
    try:
        conn = get_pg_connection()
        cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

        # Count total
        cur.execute("SELECT COUNT(*) FROM custodians WHERE ghcid IS NOT NULL")
        result["total_in_postgresql"] = cur.fetchone()[0]

        if dry_run:
            result["would_insert"] = result["total_in_postgresql"] - len(existing_ids)
            cur.close()
            conn.close()
            return result

        # Fetch all custodians with all relevant fields
        cur.execute("""
            SELECT ghcid, name, city, region, region_code, country_code, type, type_name,
                   lat, lon, wikidata_id, isil_code, viaf_id, google_place_id,
                   website, email, phone, street_address, postal_code,
                   description, rating, founding_year
            FROM custodians
            WHERE ghcid IS NOT NULL
            ORDER BY ghcid
        """)

        rows = cur.fetchall()
        cur.close()
        conn.close()

    except Exception as e:
        result["status"] = "error"
        result["errors"].append(f"PostgreSQL error: {e}")
        return result

    # Build and execute insert queries in batches
    batch = []
    for row in rows:
        ghcid = row['ghcid']

        # Skip if already exists (unless clear_existing)
        if ghcid in existing_ids:
            result["skipped"] += 1
            continue

        # Build insert statement for this custodian
        name = escape_typeql_string(row['name'] or "")
        city = escape_typeql_string(row['city'] or "")
        region = escape_typeql_string(row['region'] or "")
        region_code = escape_typeql_string(row['region_code'] or "")
        inst_type = escape_typeql_string(row['type'] or "")
        country_code = escape_typeql_string(row['country_code'] or "")

        # Coordinates
        lat = row['lat']
        lon = row['lon']

        # Identifiers
        wikidata_id = escape_typeql_string(row['wikidata_id'] or "")
        isil_code = escape_typeql_string(row['isil_code'] or "")
        viaf_id = escape_typeql_string(row['viaf_id'] or "")
        google_place_id = escape_typeql_string(row['google_place_id'] or "")

        # Contact info
        website = escape_typeql_string(row['website'] or "")
        email = escape_typeql_string(row['email'] or "")
        phone = escape_typeql_string(row['phone'] or "")
        street_address = escape_typeql_string(row['street_address'] or "")
        postal_code = escape_typeql_string(row['postal_code'] or "")

        # Descriptive fields
        description = escape_typeql_string(row['description'] or "")
        rating = row['rating']
        founding_year = row['founding_year']

        # Fallback: Extract country code from GHCID if not in row
        if not country_code and ghcid and len(ghcid) >= 2:
            country_code = ghcid[:2]

        # Build attributes - only add non-empty ones
        attrs = [f'has id "{ghcid}"']
        if name:
            attrs.append(f'has observed-name "{name}"')
        if city:
            attrs.append(f'has city "{city}"')
        if country_code:
            attrs.append(f'has country-code "{country_code}"')
        if inst_type:
            attrs.append(f'has institution-type "{inst_type}"')
        if region:
            attrs.append(f'has region "{region}"')
        if region_code:
            attrs.append(f'has region-code "{region_code}"')

        # Add coordinates (TypeDB double values)
        if lat is not None:
            attrs.append(f'has latitude {lat}')
        if lon is not None:
            attrs.append(f'has longitude {lon}')

        # Add identifiers
        if wikidata_id:
            attrs.append(f'has wikidata-id "{wikidata_id}"')
        if isil_code:
            attrs.append(f'has isil-code "{isil_code}"')
        if viaf_id:
            attrs.append(f'has viaf-id "{viaf_id}"')
        if google_place_id:
            attrs.append(f'has google-place-id "{google_place_id}"')

        # Add contact info
        if website:
            attrs.append(f'has website "{website}"')
        if email:
            attrs.append(f'has email "{email}"')
        if phone:
            attrs.append(f'has phone "{phone}"')
        if street_address:
            attrs.append(f'has street-address "{street_address}"')
        if postal_code:
            attrs.append(f'has postal-code "{postal_code}"')

        # Add descriptive fields
        if description:
            attrs.append(f'has description "{description}"')
        if rating is not None:
            attrs.append(f'has rating {rating}')
        if founding_year is not None:
            attrs.append(f'has founding-year {founding_year}')

        insert_stmt = f'$x{len(batch)} isa custodian-observation, {", ".join(attrs)};'
        batch.append(insert_stmt)

        # Execute batch when full
        if len(batch) >= batch_size:
            try:
                query = "insert " + " ".join(batch)
                with driver.transaction(database, TransactionType.WRITE) as tx:
                    tx.query(query).resolve()
                    tx.commit()
                result["inserted"] += len(batch)
            except Exception as e:
                result["errors"].append(f"Batch insert error: {e}")
            batch = []

    # Insert remaining batch
    if batch:
        try:
            query = "insert " + " ".join(batch)
            with driver.transaction(database, TransactionType.WRITE) as tx:
                tx.query(query).resolve()
                tx.commit()
            result["inserted"] += len(batch)
        except Exception as e:
            result["errors"].append(f"Final batch error: {e}")

    return result


@app.post("/sync/postgresql")
async def sync_from_postgresql(
    database: Optional[str] = None,
    batch_size: int = Query(default=100, ge=1, le=1000),
    dry_run: bool = Query(default=False),
    clear_existing: bool = Query(default=False),
) -> Dict[str, Any]:
    """Sync institutions from PostgreSQL to TypeDB

    - database: TypeDB database name (defaults to 'glam')
    - batch_size: Number of records per transaction (1-1000)
    - dry_run: If true, only count records without inserting
    - clear_existing: If true, delete existing custodian-observations first
    """
    loop = asyncio.get_event_loop()
    db = database or settings.database

    if not HAS_PSYCOPG2:
        raise HTTPException(
            status_code=500,
            detail="psycopg2 not installed on server. Run: pip install psycopg2-binary"
        )

    try:
        result = await loop.run_in_executor(
            _executor,
            sync_postgresql_to_typedb,
            db,
            batch_size,
            dry_run,
            clear_existing,
        )
        return result
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# ============================================================================
# Main Entry Point
# ============================================================================

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "main:app",
        host=settings.api_host,
        port=settings.api_port,
        reload=True,
    )