fix(rag): correct SPARQL ontology prefixes for LinkML schema
- Update HeritageSPARQLGenerator docstring with correct prefixes - Change main class from hc:Custodian to crm:E39_Actor - Change type property from hcp:institutionType to org:classification - Update type values from single letters to full names (MUSEUM, ARCHIVE, etc.) - Add rate limit handling with exponential backoff for 429 errors - Fix test_live_rag.py sample queries to use correct ontology - Update optimized_models instructions with correct prefixes
This commit is contained in:
parent
7a056fa746
commit
8e97a7beca
4 changed files with 584 additions and 86 deletions
|
|
@ -20,6 +20,7 @@ from __future__ import annotations
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
@ -32,6 +33,104 @@ from dspy.streaming import StatusMessage, StreamListener, StatusMessageProvider
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# RATE LIMIT HANDLING
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def is_rate_limit_error(error: Exception) -> bool:
|
||||||
|
"""Check if an exception is a rate limit error (429).
|
||||||
|
|
||||||
|
Handles both direct errors and ExceptionGroups from asyncio.TaskGroup.
|
||||||
|
"""
|
||||||
|
error_str = str(error).lower()
|
||||||
|
|
||||||
|
# Direct rate limit indicators
|
||||||
|
if '429' in error_str or 'rate' in error_str or '1305' in error_str:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check nested exceptions in ExceptionGroup (from asyncio.TaskGroup)
|
||||||
|
if hasattr(error, 'exceptions'):
|
||||||
|
for sub_exc in error.exceptions:
|
||||||
|
if is_rate_limit_error(sub_exc):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check __cause__ chain
|
||||||
|
if error.__cause__ and is_rate_limit_error(error.__cause__):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_actual_error(error: Exception) -> Exception:
|
||||||
|
"""Extract the actual error from an ExceptionGroup if present."""
|
||||||
|
if hasattr(error, 'exceptions'):
|
||||||
|
for sub_exc in error.exceptions:
|
||||||
|
# Return rate limit error if found
|
||||||
|
if is_rate_limit_error(sub_exc):
|
||||||
|
return sub_exc
|
||||||
|
# Recursively check nested groups
|
||||||
|
actual = extract_actual_error(sub_exc)
|
||||||
|
if actual is not sub_exc:
|
||||||
|
return actual
|
||||||
|
return error
|
||||||
|
|
||||||
|
|
||||||
|
async def call_with_rate_limit_retry(
|
||||||
|
func: Callable,
|
||||||
|
*args,
|
||||||
|
max_retries: int = 3,
|
||||||
|
base_delay: float = 2.0,
|
||||||
|
max_delay: float = 30.0,
|
||||||
|
**kwargs
|
||||||
|
) -> Any:
|
||||||
|
"""Call a function with exponential backoff retry on rate limit errors.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
func: The function to call (can be sync or async)
|
||||||
|
*args: Positional arguments for the function
|
||||||
|
max_retries: Maximum number of retry attempts
|
||||||
|
base_delay: Initial delay in seconds
|
||||||
|
max_delay: Maximum delay in seconds
|
||||||
|
**kwargs: Keyword arguments for the function
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The function's return value
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
The original exception if max retries exceeded or non-rate-limit error
|
||||||
|
"""
|
||||||
|
last_exception = None
|
||||||
|
|
||||||
|
for attempt in range(max_retries + 1):
|
||||||
|
try:
|
||||||
|
# Call the function (handle both sync and async)
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
if asyncio.iscoroutine(result):
|
||||||
|
result = await result
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
last_exception = e
|
||||||
|
actual_error = extract_actual_error(e)
|
||||||
|
|
||||||
|
if is_rate_limit_error(e) and attempt < max_retries:
|
||||||
|
# Calculate delay with exponential backoff + jitter
|
||||||
|
delay = min(base_delay * (2 ** attempt) + random.uniform(0, 1), max_delay)
|
||||||
|
logger.warning(
|
||||||
|
f"Rate limited (attempt {attempt + 1}/{max_retries + 1}), "
|
||||||
|
f"waiting {delay:.1f}s before retry. Error: {actual_error}"
|
||||||
|
)
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
else:
|
||||||
|
# Not a rate limit error, or max retries exceeded
|
||||||
|
if is_rate_limit_error(e):
|
||||||
|
logger.error(f"Max retries ({max_retries}) exceeded for rate limit")
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Should not reach here, but just in case
|
||||||
|
raise last_exception if last_exception else RuntimeError("Unexpected retry loop exit")
|
||||||
|
|
||||||
# Semantic cache imports (graceful degradation if not available)
|
# Semantic cache imports (graceful degradation if not available)
|
||||||
SEMANTIC_CACHE_AVAILABLE = False
|
SEMANTIC_CACHE_AVAILABLE = False
|
||||||
get_cache: Optional[Callable[[], Any]] = None
|
get_cache: Optional[Callable[[], Any]] = None
|
||||||
|
|
@ -229,71 +328,95 @@ class HeritageQueryIntent(dspy.Signature):
|
||||||
class HeritageSPARQLGenerator(dspy.Signature):
|
class HeritageSPARQLGenerator(dspy.Signature):
|
||||||
"""Generate SPARQL queries for heritage custodian knowledge graph.
|
"""Generate SPARQL queries for heritage custodian knowledge graph.
|
||||||
|
|
||||||
You are an expert in SPARQL and the Heritage Custodian Ontology.
|
You are an expert in SPARQL and the Heritage Custodian Ontology (based on LinkML schema).
|
||||||
Generate valid SPARQL queries that work with our Oxigraph endpoint.
|
Generate valid SPARQL queries that work with our Oxigraph endpoint.
|
||||||
|
|
||||||
Key prefixes (MUST USE THESE EXACT URIs):
|
REQUIRED PREFIXES (MUST USE THESE EXACT URIs):
|
||||||
- PREFIX hc: <https://nde.nl/ontology/hc/class/>
|
- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
- PREFIX hcp: <https://w3id.org/heritage/custodian/>
|
- PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
- PREFIX ghcid: <https://w3id.org/heritage/custodian/>
|
|
||||||
- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
- PREFIX schema: <http://schema.org/>
|
- PREFIX schema: <http://schema.org/>
|
||||||
- PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
- PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||||
- PREFIX dct: <http://purl.org/dc/terms/>
|
- PREFIX dcterms: <http://purl.org/dc/terms/>
|
||||||
- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
- PREFIX hc: <https://nde.nl/ontology/hc/>
|
||||||
- PREFIX wdt: <http://www.wikidata.org/prop/direct/>
|
- PREFIX rico: <https://www.ica.org/standards/RiC/ontology#>
|
||||||
|
- PREFIX prov: <http://www.w3.org/ns/prov#>
|
||||||
- PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
- PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
||||||
|
|
||||||
Key classes:
|
MAIN CLASS (from LinkML Custodian.yaml):
|
||||||
- hc:Custodian - Heritage custodian institution
|
- crm:E39_Actor - Heritage custodian institution (this is THE class for all custodians)
|
||||||
- schema:Place - Geographic location
|
|
||||||
- foaf:OnlineAccount - Social media profile
|
|
||||||
|
|
||||||
Key properties:
|
KEY PROPERTIES:
|
||||||
- skos:prefLabel - Institution name (literal)
|
- dcterms:identifier - Unique identifier (hc_id)
|
||||||
- hcp:institutionType - Type code (M, L, A, G, etc.)
|
- skos:prefLabel - Preferred/display name (literal)
|
||||||
- schema:addressCountry - Country (Wikidata entity)
|
- org:classification - Custodian type (MUSEUM, LIBRARY, ARCHIVE, etc.)
|
||||||
- foaf:homepage - Website URL
|
|
||||||
- crm:P53_has_former_or_current_location - Location link
|
- crm:P53_has_former_or_current_location - Location link
|
||||||
|
- foaf:homepage - Website URL
|
||||||
|
- org:subOrganizationOf - Parent organization
|
||||||
|
- crm:P46_is_composed_of - Collection links
|
||||||
|
- schema:foundingDate - Founding date (xsd:date)
|
||||||
|
|
||||||
TEMPORAL PROPERTIES (for founding/oldest queries):
|
CUSTODIAN TYPE VALUES (use FULL names, not single letters):
|
||||||
- schema:foundingDate - Institution founding date (xsd:date, e.g., "1800-01-01")
|
MUSEUM, LIBRARY, ARCHIVE, GALLERY, OFFICIAL_INSTITUTION, RESEARCH_CENTER,
|
||||||
- hcp:foundingYear - Founding year as integer (xsd:integer, e.g., 1800)
|
COMMERCIAL, UNSPECIFIED, BIO_CUSTODIAN, EDUCATION_PROVIDER, HERITAGE_SOCIETY,
|
||||||
- wdt:P571 - Wikidata inception date (same as schema:foundingDate)
|
FEATURE_CUSTODIAN, INTANGIBLE_HERITAGE_GROUP, MIXED, PERSONAL_COLLECTION,
|
||||||
|
HOLY_SACRED_SITE, DIGITAL_PLATFORM, NON_PROFIT, TASTE_SCENT_HERITAGE
|
||||||
|
|
||||||
Example - Find oldest archives:
|
Example - Find all museums:
|
||||||
```sparql
|
```sparql
|
||||||
PREFIX schema: <http://schema.org/>
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
PREFIX hcp: <https://w3id.org/heritage/custodian/>
|
|
||||||
|
|
||||||
SELECT ?inst ?name ?founded WHERE {
|
SELECT ?custodian ?name WHERE {
|
||||||
?inst a <https://nde.nl/ontology/hc/class/Custodian> ;
|
?custodian a crm:E39_Actor ;
|
||||||
skos:prefLabel ?name ;
|
org:classification "MUSEUM" ;
|
||||||
hcp:institutionType "A" ;
|
skos:prefLabel ?name .
|
||||||
schema:foundingDate ?founded .
|
}
|
||||||
|
LIMIT 100
|
||||||
|
```
|
||||||
|
|
||||||
|
Example - Find custodian by name (case-insensitive search):
|
||||||
|
```sparql
|
||||||
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
|
|
||||||
|
SELECT ?custodian ?name WHERE {
|
||||||
|
?custodian a crm:E39_Actor ;
|
||||||
|
skos:prefLabel ?name .
|
||||||
|
FILTER(CONTAINS(LCASE(STR(?name)), "rijksmuseum"))
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Example - Count custodians by type:
|
||||||
|
```sparql
|
||||||
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
|
|
||||||
|
SELECT ?type (COUNT(?custodian) AS ?count) WHERE {
|
||||||
|
?custodian a crm:E39_Actor ;
|
||||||
|
org:classification ?type .
|
||||||
|
}
|
||||||
|
GROUP BY ?type
|
||||||
|
ORDER BY DESC(?count)
|
||||||
|
```
|
||||||
|
|
||||||
|
Example - Find oldest archives by founding date:
|
||||||
|
```sparql
|
||||||
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
|
PREFIX schema: <http://schema.org/>
|
||||||
|
|
||||||
|
SELECT ?custodian ?name ?founded WHERE {
|
||||||
|
?custodian a crm:E39_Actor ;
|
||||||
|
org:classification "ARCHIVE" ;
|
||||||
|
skos:prefLabel ?name ;
|
||||||
|
schema:foundingDate ?founded .
|
||||||
}
|
}
|
||||||
ORDER BY ?founded
|
ORDER BY ?founded
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
```
|
```
|
||||||
|
|
||||||
Example - Find museums founded before 1900:
|
|
||||||
```sparql
|
|
||||||
PREFIX schema: <http://schema.org/>
|
|
||||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
|
||||||
PREFIX hcp: <https://w3id.org/heritage/custodian/>
|
|
||||||
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
|
||||||
|
|
||||||
SELECT ?inst ?name ?year WHERE {
|
|
||||||
?inst a <https://nde.nl/ontology/hc/class/Custodian> ;
|
|
||||||
skos:prefLabel ?name ;
|
|
||||||
hcp:institutionType "M" ;
|
|
||||||
hcp:foundingYear ?year .
|
|
||||||
FILTER(?year < 1900)
|
|
||||||
}
|
|
||||||
ORDER BY ?year
|
|
||||||
LIMIT 20
|
|
||||||
```
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
question: str = dspy.InputField(desc="Natural language question")
|
question: str = dspy.InputField(desc="Natural language question")
|
||||||
|
|
@ -1530,14 +1653,16 @@ def create_heritage_tools(
|
||||||
JSON string of nearby institutions with distances
|
JSON string of nearby institutions with distances
|
||||||
"""
|
"""
|
||||||
sparql = f"""
|
sparql = f"""
|
||||||
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
|
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
|
||||||
PREFIX hco: <https://w3id.org/hco/>
|
|
||||||
PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
|
PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
|
||||||
|
|
||||||
SELECT ?inst ?name ?type ?distance WHERE {{
|
SELECT ?inst ?name ?type ?distance WHERE {{
|
||||||
?inst a hco:HeritageCustodian ;
|
?inst a crm:E39_Actor ;
|
||||||
hco:name ?name ;
|
skos:prefLabel ?name ;
|
||||||
hco:institutionType ?type ;
|
org:classification ?type ;
|
||||||
geo:hasGeometry/geo:asWKT ?wkt .
|
geo:hasGeometry/geo:asWKT ?wkt .
|
||||||
|
|
||||||
BIND(geof:distance(?wkt, "POINT({longitude} {latitude})"^^geo:wktLiteral, <http://www.opengis.net/def/uom/OGC/1.0/kilometre>) AS ?distance)
|
BIND(geof:distance(?wkt, "POINT({longitude} {latitude})"^^geo:wktLiteral, <http://www.opengis.net/def/uom/OGC/1.0/kilometre>) AS ?distance)
|
||||||
|
|
@ -1571,27 +1696,30 @@ def create_heritage_tools(
|
||||||
JSON string with full institution details
|
JSON string with full institution details
|
||||||
"""
|
"""
|
||||||
sparql = f"""
|
sparql = f"""
|
||||||
PREFIX hco: <https://w3id.org/hco/>
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
PREFIX dct: <http://purl.org/dc/terms/>
|
PREFIX dcterms: <http://purl.org/dc/terms/>
|
||||||
|
PREFIX schema: <http://schema.org/>
|
||||||
|
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||||
|
|
||||||
SELECT * WHERE {{
|
SELECT * WHERE {{
|
||||||
?inst a hco:HeritageCustodian .
|
?inst a crm:E39_Actor .
|
||||||
|
|
||||||
{{
|
{{
|
||||||
?inst hco:ghcid "{identifier}"
|
?inst dcterms:identifier "{identifier}"
|
||||||
}} UNION {{
|
}} UNION {{
|
||||||
?inst hco:isil "{identifier}"
|
?inst dcterms:identifier "{identifier}"
|
||||||
}} UNION {{
|
}} UNION {{
|
||||||
?inst dct:identifier <http://www.wikidata.org/entity/{identifier}>
|
?inst dcterms:identifier <http://www.wikidata.org/entity/{identifier}>
|
||||||
}}
|
}}
|
||||||
|
|
||||||
?inst skos:prefLabel ?name .
|
?inst skos:prefLabel ?name .
|
||||||
OPTIONAL {{ ?inst hco:institutionType ?type }}
|
OPTIONAL {{ ?inst org:classification ?type }}
|
||||||
OPTIONAL {{ ?inst hco:city ?city }}
|
OPTIONAL {{ ?inst crm:P53_has_former_or_current_location ?location }}
|
||||||
OPTIONAL {{ ?inst hco:country ?country }}
|
OPTIONAL {{ ?inst schema:foundingDate ?founded }}
|
||||||
OPTIONAL {{ ?inst hco:foundingDate ?founded }}
|
OPTIONAL {{ ?inst dcterms:description ?desc }}
|
||||||
OPTIONAL {{ ?inst hco:description ?desc }}
|
OPTIONAL {{ ?inst foaf:homepage ?website }}
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
return query_knowledge_graph(sparql)
|
return query_knowledge_graph(sparql)
|
||||||
|
|
@ -1622,15 +1750,15 @@ def create_heritage_tools(
|
||||||
type_filter = f'FILTER(?type = "{institution_type}")'
|
type_filter = f'FILTER(?type = "{institution_type}")'
|
||||||
|
|
||||||
sparql = f"""
|
sparql = f"""
|
||||||
PREFIX hco: <https://w3id.org/hco/>
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
|
|
||||||
SELECT ?{group_by} (COUNT(?inst) AS ?count) WHERE {{
|
SELECT ?type (COUNT(?inst) AS ?count) WHERE {{
|
||||||
?inst a hco:HeritageCustodian ;
|
?inst a crm:E39_Actor ;
|
||||||
hco:{group_by} ?{group_by} ;
|
org:classification ?type .
|
||||||
hco:institutionType ?type .
|
|
||||||
{type_filter}
|
{type_filter}
|
||||||
}}
|
}}
|
||||||
GROUP BY ?{group_by}
|
GROUP BY ?type
|
||||||
ORDER BY DESC(?count)
|
ORDER BY DESC(?count)
|
||||||
LIMIT 50
|
LIMIT 50
|
||||||
"""
|
"""
|
||||||
|
|
@ -3264,6 +3392,376 @@ class HeritageRAGPipeline(dspy.Module):
|
||||||
|
|
||||||
return prediction
|
return prediction
|
||||||
|
|
||||||
|
async def forward_streaming(
|
||||||
|
self,
|
||||||
|
question: str,
|
||||||
|
language: str = "nl",
|
||||||
|
history: History = None,
|
||||||
|
include_viz: bool = True,
|
||||||
|
skip_cache: bool = False,
|
||||||
|
embedding_model: str | None = None,
|
||||||
|
) -> AsyncIterator[dict]:
|
||||||
|
"""Execute RAG pipeline with streaming answer generation.
|
||||||
|
|
||||||
|
Yields dictionaries with different types:
|
||||||
|
- {"type": "retrieval_complete", "context": ..., "routing": ..., "retrieved_results": ...}
|
||||||
|
- {"type": "token", "content": "..."} (streaming answer tokens)
|
||||||
|
- {"type": "answer_complete", "prediction": ...} (final result)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
question: User's natural language question
|
||||||
|
language: Response language (nl, en)
|
||||||
|
history: Previous conversation turns for multi-turn context
|
||||||
|
include_viz: Whether to include visualization config
|
||||||
|
skip_cache: Force bypass cache lookup
|
||||||
|
embedding_model: Optional embedding model (minilm_384, openai_1536, bge_768)
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Dictionaries with streaming progress and tokens
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
# Initialize empty history if not provided
|
||||||
|
if history is None:
|
||||||
|
history = History(messages=[])
|
||||||
|
|
||||||
|
# =================================================================
|
||||||
|
# Cache Check - Look for cached response before expensive LLM calls
|
||||||
|
# =================================================================
|
||||||
|
if SEMANTIC_CACHE_AVAILABLE and not skip_cache:
|
||||||
|
try:
|
||||||
|
if should_bypass_cache is not None and get_cache is not None:
|
||||||
|
if not should_bypass_cache(question):
|
||||||
|
cache = get_cache()
|
||||||
|
cached_response = cache.get_sync(question, language=language)
|
||||||
|
|
||||||
|
if cached_response and not cached_response.get("_warmup_entry"):
|
||||||
|
logger.info(f"Cache HIT for streaming query: {question[:50]}...")
|
||||||
|
# Return cached response immediately
|
||||||
|
yield {
|
||||||
|
"type": "cache_hit",
|
||||||
|
"prediction": Prediction(
|
||||||
|
answer=cached_response.get("answer", ""),
|
||||||
|
intent=cached_response.get("intent", "exploration"),
|
||||||
|
entities=cached_response.get("entities", []),
|
||||||
|
sparql=cached_response.get("sparql"),
|
||||||
|
sources_used=cached_response.get("sources_used", []),
|
||||||
|
confidence=cached_response.get("confidence", 0.9),
|
||||||
|
citations=cached_response.get("citations", []),
|
||||||
|
follow_up=cached_response.get("follow_up", []),
|
||||||
|
visualization=cached_response.get("visualization"),
|
||||||
|
cache_hit=True,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Cache lookup failed in streaming mode: {e}")
|
||||||
|
|
||||||
|
# =================================================================
|
||||||
|
# RETRIEVAL PHASE - Run synchronously in thread pool to not block
|
||||||
|
# =================================================================
|
||||||
|
detected_query_type = "institution"
|
||||||
|
retrieved_results = []
|
||||||
|
context_parts = [f"Query: {question}"]
|
||||||
|
sparql = None
|
||||||
|
entities = None
|
||||||
|
|
||||||
|
# Step 1: Route query (with rate limit retry)
|
||||||
|
try:
|
||||||
|
routing = await call_with_rate_limit_retry(
|
||||||
|
self.router,
|
||||||
|
question=question,
|
||||||
|
language=language,
|
||||||
|
history=history,
|
||||||
|
max_retries=3,
|
||||||
|
base_delay=2.0,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
actual_error = extract_actual_error(e)
|
||||||
|
logger.error(f"Router failed after retries: {actual_error}")
|
||||||
|
raise
|
||||||
|
resolved_question = getattr(routing, 'resolved_question', question)
|
||||||
|
|
||||||
|
# Small delay between LLM calls to reduce rate limit pressure
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
# Step 2: Extract entities (optional, for context - with rate limit retry)
|
||||||
|
try:
|
||||||
|
entities = await call_with_rate_limit_retry(
|
||||||
|
self.entity_extractor,
|
||||||
|
question=question,
|
||||||
|
language=language,
|
||||||
|
max_retries=2, # Fewer retries since this is optional
|
||||||
|
base_delay=1.5,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
actual_error = extract_actual_error(e)
|
||||||
|
logger.warning(f"Entity extraction failed: {actual_error}")
|
||||||
|
entities = None
|
||||||
|
|
||||||
|
# Small delay before streaming answer generation
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
# Step 3: Retrieval from databases
|
||||||
|
if self.retriever:
|
||||||
|
try:
|
||||||
|
# Detect if this is a person query
|
||||||
|
question_lower = question.lower()
|
||||||
|
person_indicators = ['wie ', 'who ', 'medewerker', 'staff', 'curator', 'director', 'werkt', 'works',
|
||||||
|
'employee', 'team', 'directeur', 'conservator', 'archivaris', 'archivist',
|
||||||
|
'bibliothecaris', 'librarian', 'contactpersoon', 'contact person']
|
||||||
|
is_person_query = any(indicator in question_lower for indicator in person_indicators)
|
||||||
|
|
||||||
|
if is_person_query:
|
||||||
|
detected_query_type = "person"
|
||||||
|
logger.info(f"Detected PERSON query for streaming: {resolved_question[:50]}...")
|
||||||
|
|
||||||
|
# Search for persons
|
||||||
|
if hasattr(self.retriever, 'search_persons'):
|
||||||
|
person_results = self.retriever.search_persons(query=resolved_question, k=10, using=embedding_model)
|
||||||
|
|
||||||
|
if person_results:
|
||||||
|
context_parts.append("\n[RETRIEVED STAFF/PEOPLE - Real data from heritage database]:")
|
||||||
|
for p in person_results:
|
||||||
|
name = getattr(p, 'name', 'Unknown')
|
||||||
|
headline = getattr(p, 'headline', '')
|
||||||
|
custodian = getattr(p, 'custodian_name', '')
|
||||||
|
|
||||||
|
entry = f"- {name}"
|
||||||
|
if headline:
|
||||||
|
entry += f" ({headline})"
|
||||||
|
if custodian:
|
||||||
|
entry += f" at {custodian}"
|
||||||
|
context_parts.append(entry)
|
||||||
|
|
||||||
|
retrieved_results.append({
|
||||||
|
"type": "person",
|
||||||
|
"name": name,
|
||||||
|
"headline": headline,
|
||||||
|
"custodian_name": custodian,
|
||||||
|
"score": getattr(p, 'combined_score', 0),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# Institution search
|
||||||
|
logger.info(f"Performing INSTITUTION retrieval for streaming: {resolved_question[:50]}...")
|
||||||
|
inst_results = self.retriever.search(query=resolved_question, k=10, auto_route=False, using=embedding_model)
|
||||||
|
|
||||||
|
if inst_results:
|
||||||
|
context_parts.append("\n[RETRIEVED INSTITUTIONS - Real data from heritage database]:")
|
||||||
|
for inst in inst_results:
|
||||||
|
if hasattr(inst, 'to_dict'):
|
||||||
|
inst_dict = inst.to_dict()
|
||||||
|
name = inst_dict.get('name', 'Unknown')
|
||||||
|
inst_type = inst_dict.get('metadata', {}).get('type', '')
|
||||||
|
city = inst_dict.get('metadata', {}).get('city', '')
|
||||||
|
inst_dict['type'] = 'institution'
|
||||||
|
retrieved_results.append(inst_dict)
|
||||||
|
else:
|
||||||
|
name = getattr(inst, 'name', 'Unknown')
|
||||||
|
inst_type = getattr(inst, 'type', '')
|
||||||
|
city = getattr(inst, 'city', '')
|
||||||
|
retrieved_results.append({
|
||||||
|
"type": "institution",
|
||||||
|
"name": name,
|
||||||
|
"institution_type": inst_type,
|
||||||
|
"city": city,
|
||||||
|
})
|
||||||
|
|
||||||
|
entry = f"- {name}"
|
||||||
|
if inst_type:
|
||||||
|
entry += f" ({inst_type})"
|
||||||
|
if city:
|
||||||
|
entry += f" in {city}"
|
||||||
|
context_parts.append(entry)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Retrieval failed in streaming mode: {e}")
|
||||||
|
context_parts.append(f"\n[Retrieval error: {str(e)}]")
|
||||||
|
|
||||||
|
context = "\n".join(context_parts)
|
||||||
|
|
||||||
|
# Yield retrieval complete event
|
||||||
|
yield {
|
||||||
|
"type": "retrieval_complete",
|
||||||
|
"context": context,
|
||||||
|
"routing": {
|
||||||
|
"intent": routing.intent,
|
||||||
|
"sources": routing.sources,
|
||||||
|
"resolved_question": resolved_question,
|
||||||
|
},
|
||||||
|
"retrieved_results": retrieved_results,
|
||||||
|
"query_type": detected_query_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
# =================================================================
|
||||||
|
# ANSWER GENERATION PHASE - Stream tokens using dspy.streamify
|
||||||
|
# =================================================================
|
||||||
|
answer_text = ""
|
||||||
|
confidence = 0.8
|
||||||
|
citations = []
|
||||||
|
follow_up = []
|
||||||
|
streaming_succeeded = False
|
||||||
|
retry_count = 0
|
||||||
|
max_stream_retries = 2
|
||||||
|
|
||||||
|
while not streaming_succeeded and retry_count <= max_stream_retries:
|
||||||
|
try:
|
||||||
|
# Create streamified version of the answer generator
|
||||||
|
streamified_answer_gen = dspy.streamify(self.answer_gen)
|
||||||
|
|
||||||
|
# Use quality_lm context if available
|
||||||
|
lm_context = dspy.settings.context(lm=self.quality_lm) if self.quality_lm else dspy.settings.context()
|
||||||
|
|
||||||
|
with lm_context:
|
||||||
|
async for value in streamified_answer_gen(
|
||||||
|
question=resolved_question,
|
||||||
|
context=context,
|
||||||
|
history=history,
|
||||||
|
sources=routing.sources,
|
||||||
|
language=language,
|
||||||
|
):
|
||||||
|
if isinstance(value, dspy.Prediction):
|
||||||
|
# Final prediction - extract all fields
|
||||||
|
answer_text = value.answer
|
||||||
|
confidence = getattr(value, 'confidence', 0.8)
|
||||||
|
citations = getattr(value, 'citations', [])
|
||||||
|
follow_up = getattr(value, 'follow_up', [])
|
||||||
|
streaming_succeeded = True
|
||||||
|
elif isinstance(value, str):
|
||||||
|
# Streaming token
|
||||||
|
yield {"type": "token", "content": value}
|
||||||
|
else:
|
||||||
|
# Handle ModelResponseStream from litellm/DSPy
|
||||||
|
# Token text is in choices[0].delta.content or .reasoning_content
|
||||||
|
token_text = None
|
||||||
|
|
||||||
|
# Try to extract content from streaming response
|
||||||
|
if hasattr(value, 'choices') and value.choices:
|
||||||
|
delta = getattr(value.choices[0], 'delta', None)
|
||||||
|
if delta:
|
||||||
|
# Check both content and reasoning_content (for GLM models)
|
||||||
|
token_text = getattr(delta, 'content', None) or getattr(delta, 'reasoning_content', None)
|
||||||
|
|
||||||
|
# Fallback: check for message attribute (StatusMessage)
|
||||||
|
if token_text is None and hasattr(value, 'message'):
|
||||||
|
yield {"type": "status", "message": value.message}
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Yield extracted token if we got text
|
||||||
|
if token_text:
|
||||||
|
yield {"type": "token", "content": token_text}
|
||||||
|
|
||||||
|
# If we get here, streaming completed
|
||||||
|
streaming_succeeded = True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
actual_error = extract_actual_error(e)
|
||||||
|
retry_count += 1
|
||||||
|
|
||||||
|
# Check if rate limited and can retry
|
||||||
|
if is_rate_limit_error(e) and retry_count <= max_stream_retries:
|
||||||
|
delay = 2.0 * (2 ** (retry_count - 1)) + random.uniform(0, 1)
|
||||||
|
logger.warning(
|
||||||
|
f"Streaming rate limited (attempt {retry_count}/{max_stream_retries + 1}), "
|
||||||
|
f"waiting {delay:.1f}s before retry. Error: {actual_error}"
|
||||||
|
)
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Not rate limited or max retries exceeded - fall back to sync
|
||||||
|
logger.warning(
|
||||||
|
f"Streaming answer generation failed after {retry_count} attempts, "
|
||||||
|
f"falling back to sync. Error: {actual_error}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Fallback to synchronous generation if streaming failed
|
||||||
|
if not streaming_succeeded:
|
||||||
|
try:
|
||||||
|
# Use rate limit retry for sync fallback too
|
||||||
|
lm_context = dspy.settings.context(lm=self.quality_lm) if self.quality_lm else dspy.settings.context()
|
||||||
|
with lm_context:
|
||||||
|
answer_result = await call_with_rate_limit_retry(
|
||||||
|
self.answer_gen,
|
||||||
|
question=resolved_question,
|
||||||
|
context=context,
|
||||||
|
history=history,
|
||||||
|
sources=routing.sources,
|
||||||
|
language=language,
|
||||||
|
max_retries=3,
|
||||||
|
base_delay=2.0,
|
||||||
|
)
|
||||||
|
answer_text = answer_result.answer
|
||||||
|
confidence = answer_result.confidence
|
||||||
|
citations = answer_result.citations
|
||||||
|
follow_up = answer_result.follow_up
|
||||||
|
# Yield the full answer as one token (fallback behavior)
|
||||||
|
logger.info("Sync fallback succeeded - yielding full answer as single token")
|
||||||
|
yield {"type": "token", "content": answer_text}
|
||||||
|
except Exception as fallback_e:
|
||||||
|
actual_error = extract_actual_error(fallback_e)
|
||||||
|
logger.exception(f"Fallback answer generation also failed: {actual_error}")
|
||||||
|
answer_text = "Er is een fout opgetreden bij het genereren van het antwoord."
|
||||||
|
yield {"type": "token", "content": answer_text}
|
||||||
|
|
||||||
|
# Step 4: Visualization selection (if needed)
|
||||||
|
viz_config = None
|
||||||
|
if include_viz:
|
||||||
|
try:
|
||||||
|
viz_result = self.viz_selector(
|
||||||
|
question=question,
|
||||||
|
intent=routing.intent,
|
||||||
|
schema_fields=["name", "type", "city", "country", "lat", "lon"],
|
||||||
|
result_count=len(retrieved_results),
|
||||||
|
)
|
||||||
|
viz_config = {
|
||||||
|
"type": viz_result.viz_type,
|
||||||
|
"config": viz_result.config,
|
||||||
|
"reasoning": viz_result.reasoning,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Visualization selection failed: {e}")
|
||||||
|
|
||||||
|
# Build final prediction
|
||||||
|
prediction = Prediction(
|
||||||
|
answer=answer_text,
|
||||||
|
intent=routing.intent,
|
||||||
|
entities=entities,
|
||||||
|
sparql=sparql,
|
||||||
|
sources_used=routing.sources,
|
||||||
|
confidence=confidence,
|
||||||
|
citations=citations,
|
||||||
|
follow_up=follow_up,
|
||||||
|
visualization=viz_config,
|
||||||
|
cache_hit=False,
|
||||||
|
resolved_question=resolved_question,
|
||||||
|
retrieved_results=retrieved_results,
|
||||||
|
query_type=detected_query_type,
|
||||||
|
embedding_model_used=embedding_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache the response (fire and forget)
|
||||||
|
if SEMANTIC_CACHE_AVAILABLE and not skip_cache and confidence >= 0.7:
|
||||||
|
try:
|
||||||
|
if get_cache is not None:
|
||||||
|
cache = get_cache()
|
||||||
|
response_dict = {
|
||||||
|
"answer": answer_text,
|
||||||
|
"intent": routing.intent,
|
||||||
|
"entities": entities.institutions if hasattr(entities, 'institutions') else [],
|
||||||
|
"sparql": sparql,
|
||||||
|
"sources_used": routing.sources,
|
||||||
|
"confidence": confidence,
|
||||||
|
"citations": citations,
|
||||||
|
"follow_up": follow_up,
|
||||||
|
"visualization": viz_config,
|
||||||
|
}
|
||||||
|
cache.set_sync(question, response_dict, intent=routing.intent, language=language)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to cache streaming response: {e}")
|
||||||
|
|
||||||
|
# Yield final prediction
|
||||||
|
yield {"type": "answer_complete", "prediction": prediction}
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# 7. FACTORY FUNCTIONS
|
# 7. FACTORY FUNCTIONS
|
||||||
|
|
|
||||||
|
|
@ -369,7 +369,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"signature": {
|
"signature": {
|
||||||
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"prefix": "Question:",
|
"prefix": "Question:",
|
||||||
|
|
@ -547,7 +547,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"signature": {
|
"signature": {
|
||||||
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"prefix": "Question:",
|
"prefix": "Question:",
|
||||||
|
|
@ -1072,4 +1072,4 @@
|
||||||
"cloudpickle": "3.1"
|
"cloudpickle": "3.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -369,7 +369,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"signature": {
|
"signature": {
|
||||||
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"prefix": "Question:",
|
"prefix": "Question:",
|
||||||
|
|
@ -547,7 +547,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"signature": {
|
"signature": {
|
||||||
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
"instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
|
||||||
"fields": [
|
"fields": [
|
||||||
{
|
{
|
||||||
"prefix": "Question:",
|
"prefix": "Question:",
|
||||||
|
|
@ -1072,4 +1072,4 @@
|
||||||
"cloudpickle": "3.1"
|
"cloudpickle": "3.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -49,8 +49,8 @@ def test_sparql_endpoint():
|
||||||
|
|
||||||
# Count custodians
|
# Count custodians
|
||||||
query = """
|
query = """
|
||||||
PREFIX hc: <https://nde.nl/ontology/hc/class/>
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
SELECT (COUNT(*) as ?count) WHERE { ?s a hc:Custodian }
|
SELECT (COUNT(*) as ?count) WHERE { ?s a crm:E39_Actor }
|
||||||
"""
|
"""
|
||||||
|
|
||||||
response = httpx.post(
|
response = httpx.post(
|
||||||
|
|
@ -200,35 +200,35 @@ def run_sample_queries():
|
||||||
|
|
||||||
queries = [
|
queries = [
|
||||||
("Museums by country", """
|
("Museums by country", """
|
||||||
PREFIX hc: <https://nde.nl/ontology/hc/class/>
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
PREFIX hcp: <https://nde.nl/ontology/hc/>
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
PREFIX schema: <http://schema.org/>
|
PREFIX schema: <http://schema.org/>
|
||||||
SELECT ?country (COUNT(?s) as ?count) WHERE {
|
SELECT ?country (COUNT(?s) as ?count) WHERE {
|
||||||
?s a hc:Custodian ;
|
?s a crm:E39_Actor ;
|
||||||
hcp:custodian_type "MUSEUM" ;
|
org:classification "MUSEUM" ;
|
||||||
schema:addressCountry ?country .
|
schema:addressCountry ?country .
|
||||||
} GROUP BY ?country ORDER BY DESC(?count) LIMIT 10
|
} GROUP BY ?country ORDER BY DESC(?count) LIMIT 10
|
||||||
"""),
|
"""),
|
||||||
("Dutch archives with websites", """
|
("Dutch archives with websites", """
|
||||||
PREFIX hc: <https://nde.nl/ontology/hc/class/>
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
PREFIX hcp: <https://nde.nl/ontology/hc/>
|
PREFIX org: <http://www.w3.org/ns/org#>
|
||||||
PREFIX schema: <http://schema.org/>
|
PREFIX schema: <http://schema.org/>
|
||||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
SELECT ?name ?homepage WHERE {
|
SELECT ?name ?homepage WHERE {
|
||||||
?s a hc:Custodian ;
|
?s a crm:E39_Actor ;
|
||||||
hcp:custodian_type "ARCHIVE" ;
|
org:classification "ARCHIVE" ;
|
||||||
schema:addressCountry "NL" ;
|
schema:addressCountry "NL" ;
|
||||||
skos:prefLabel ?name ;
|
skos:prefLabel ?name ;
|
||||||
foaf:homepage ?homepage .
|
foaf:homepage ?homepage .
|
||||||
} LIMIT 10
|
} LIMIT 10
|
||||||
"""),
|
"""),
|
||||||
("Heritage institutions with social media", """
|
("Heritage institutions with social media", """
|
||||||
PREFIX hc: <https://nde.nl/ontology/hc/class/>
|
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
|
||||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||||
SELECT ?name (COUNT(?account) as ?social_count) WHERE {
|
SELECT ?name (COUNT(?account) as ?social_count) WHERE {
|
||||||
?s a hc:Custodian ;
|
?s a crm:E39_Actor ;
|
||||||
skos:prefLabel ?name ;
|
skos:prefLabel ?name ;
|
||||||
foaf:account ?account .
|
foaf:account ?account .
|
||||||
} GROUP BY ?s ?name ORDER BY DESC(?social_count) LIMIT 10
|
} GROUP BY ?s ?name ORDER BY DESC(?social_count) LIMIT 10
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue