diff --git a/backend/rag/dspy_heritage_rag.py b/backend/rag/dspy_heritage_rag.py
index f6fb2a2bee..7deb94cc5a 100644
--- a/backend/rag/dspy_heritage_rag.py
+++ b/backend/rag/dspy_heritage_rag.py
@@ -20,6 +20,7 @@ from __future__ import annotations
 import asyncio
 import json
 import logging
+import random
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from enum import Enum
@@ -32,6 +33,104 @@ from dspy.streaming import StatusMessage, StreamListener, StatusMessageProvider
 
 logger = logging.getLogger(__name__)
 
+
+# =============================================================================
+# RATE LIMIT HANDLING
+# =============================================================================
+
+def is_rate_limit_error(error: Exception) -> bool:
+    """Check if an exception is a rate limit error (429).
+    
+    Handles both direct errors and ExceptionGroups from asyncio.TaskGroup.
+    """
+    error_str = str(error).lower()
+    
+    # Direct rate limit indicators
+    if '429' in error_str or 'rate' in error_str or '1305' in error_str:
+        return True
+    
+    # Check nested exceptions in ExceptionGroup (from asyncio.TaskGroup)
+    if hasattr(error, 'exceptions'):
+        for sub_exc in error.exceptions:
+            if is_rate_limit_error(sub_exc):
+                return True
+    
+    # Check __cause__ chain
+    if error.__cause__ and is_rate_limit_error(error.__cause__):
+        return True
+    
+    return False
+
+
+def extract_actual_error(error: Exception) -> Exception:
+    """Extract the actual error from an ExceptionGroup if present."""
+    if hasattr(error, 'exceptions'):
+        for sub_exc in error.exceptions:
+            # Return rate limit error if found
+            if is_rate_limit_error(sub_exc):
+                return sub_exc
+            # Recursively check nested groups
+            actual = extract_actual_error(sub_exc)
+            if actual is not sub_exc:
+                return actual
+    return error
+
+
+async def call_with_rate_limit_retry(
+    func: Callable,
+    *args,
+    max_retries: int = 3,
+    base_delay: float = 2.0,
+    max_delay: float = 30.0,
+    **kwargs
+) -> Any:
+    """Call a function with exponential backoff retry on rate limit errors.
+    
+    Args:
+        func: The function to call (can be sync or async)
+        *args: Positional arguments for the function
+        max_retries: Maximum number of retry attempts
+        base_delay: Initial delay in seconds
+        max_delay: Maximum delay in seconds
+        **kwargs: Keyword arguments for the function
+        
+    Returns:
+        The function's return value
+        
+    Raises:
+        The original exception if max retries exceeded or non-rate-limit error
+    """
+    last_exception = None
+    
+    for attempt in range(max_retries + 1):
+        try:
+            # Call the function (handle both sync and async)
+            result = func(*args, **kwargs)
+            if asyncio.iscoroutine(result):
+                result = await result
+            return result
+            
+        except Exception as e:
+            last_exception = e
+            actual_error = extract_actual_error(e)
+            
+            if is_rate_limit_error(e) and attempt < max_retries:
+                # Calculate delay with exponential backoff + jitter
+                delay = min(base_delay * (2 ** attempt) + random.uniform(0, 1), max_delay)
+                logger.warning(
+                    f"Rate limited (attempt {attempt + 1}/{max_retries + 1}), "
+                    f"waiting {delay:.1f}s before retry. Error: {actual_error}"
+                )
+                await asyncio.sleep(delay)
+            else:
+                # Not a rate limit error, or max retries exceeded
+                if is_rate_limit_error(e):
+                    logger.error(f"Max retries ({max_retries}) exceeded for rate limit")
+                raise
+    
+    # Should not reach here, but just in case
+    raise last_exception if last_exception else RuntimeError("Unexpected retry loop exit")
+
 # Semantic cache imports (graceful degradation if not available)
 SEMANTIC_CACHE_AVAILABLE = False
 get_cache: Optional[Callable[[], Any]] = None
@@ -229,71 +328,95 @@ class HeritageQueryIntent(dspy.Signature):
 class HeritageSPARQLGenerator(dspy.Signature):
     """Generate SPARQL queries for heritage custodian knowledge graph.
     
-    You are an expert in SPARQL and the Heritage Custodian Ontology.
+    You are an expert in SPARQL and the Heritage Custodian Ontology (based on LinkML schema).
     Generate valid SPARQL queries that work with our Oxigraph endpoint.
     
-    Key prefixes (MUST USE THESE EXACT URIs):
-    - PREFIX hc: <https://nde.nl/ontology/hc/class/>
-    - PREFIX hcp: <https://w3id.org/heritage/custodian/>
-    - PREFIX ghcid: <https://w3id.org/heritage/custodian/>
+    REQUIRED PREFIXES (MUST USE THESE EXACT URIs):
+    - PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+    - PREFIX org: <http://www.w3.org/ns/org#>
     - PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
     - PREFIX schema: <http://schema.org/>
     - PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-    - PREFIX dct: <http://purl.org/dc/terms/>
-    - PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
-    - PREFIX wdt: <http://www.wikidata.org/prop/direct/>
+    - PREFIX dcterms: <http://purl.org/dc/terms/>
+    - PREFIX hc: <https://nde.nl/ontology/hc/>
+    - PREFIX rico: <https://www.ica.org/standards/RiC/ontology#>
+    - PREFIX prov: <http://www.w3.org/ns/prov#>
     - PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
     
-    Key classes:
-    - hc:Custodian - Heritage custodian institution
-    - schema:Place - Geographic location
-    - foaf:OnlineAccount - Social media profile
+    MAIN CLASS (from LinkML Custodian.yaml):
+    - crm:E39_Actor - Heritage custodian institution (this is THE class for all custodians)
     
-    Key properties:
-    - skos:prefLabel - Institution name (literal)
-    - hcp:institutionType - Type code (M, L, A, G, etc.)
-    - schema:addressCountry - Country (Wikidata entity)
-    - foaf:homepage - Website URL
+    KEY PROPERTIES:
+    - dcterms:identifier - Unique identifier (hc_id)
+    - skos:prefLabel - Preferred/display name (literal)
+    - org:classification - Custodian type (MUSEUM, LIBRARY, ARCHIVE, etc.)
     - crm:P53_has_former_or_current_location - Location link
+    - foaf:homepage - Website URL
+    - org:subOrganizationOf - Parent organization
+    - crm:P46_is_composed_of - Collection links
+    - schema:foundingDate - Founding date (xsd:date)
     
-    TEMPORAL PROPERTIES (for founding/oldest queries):
-    - schema:foundingDate - Institution founding date (xsd:date, e.g., "1800-01-01")
-    - hcp:foundingYear - Founding year as integer (xsd:integer, e.g., 1800)
-    - wdt:P571 - Wikidata inception date (same as schema:foundingDate)
+    CUSTODIAN TYPE VALUES (use FULL names, not single letters):
+    MUSEUM, LIBRARY, ARCHIVE, GALLERY, OFFICIAL_INSTITUTION, RESEARCH_CENTER,
+    COMMERCIAL, UNSPECIFIED, BIO_CUSTODIAN, EDUCATION_PROVIDER, HERITAGE_SOCIETY,
+    FEATURE_CUSTODIAN, INTANGIBLE_HERITAGE_GROUP, MIXED, PERSONAL_COLLECTION,
+    HOLY_SACRED_SITE, DIGITAL_PLATFORM, NON_PROFIT, TASTE_SCENT_HERITAGE
     
-    Example - Find oldest archives:
+    Example - Find all museums:
     ```sparql
-    PREFIX schema: <http://schema.org/>
+    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+    PREFIX org: <http://www.w3.org/ns/org#>
     PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
-    PREFIX hcp: <https://w3id.org/heritage/custodian/>
     
-    SELECT ?inst ?name ?founded WHERE {
-      ?inst a <https://nde.nl/ontology/hc/class/Custodian> ;
-            skos:prefLabel ?name ;
-            hcp:institutionType "A" ;
-            schema:foundingDate ?founded .
+    SELECT ?custodian ?name WHERE {
+      ?custodian a crm:E39_Actor ;
+                 org:classification "MUSEUM" ;
+                 skos:prefLabel ?name .
+    }
+    LIMIT 100
+    ```
+    
+    Example - Find custodian by name (case-insensitive search):
+    ```sparql
+    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
+    
+    SELECT ?custodian ?name WHERE {
+      ?custodian a crm:E39_Actor ;
+                 skos:prefLabel ?name .
+      FILTER(CONTAINS(LCASE(STR(?name)), "rijksmuseum"))
+    }
+    ```
+    
+    Example - Count custodians by type:
+    ```sparql
+    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+    PREFIX org: <http://www.w3.org/ns/org#>
+    
+    SELECT ?type (COUNT(?custodian) AS ?count) WHERE {
+      ?custodian a crm:E39_Actor ;
+                 org:classification ?type .
+    }
+    GROUP BY ?type
+    ORDER BY DESC(?count)
+    ```
+    
+    Example - Find oldest archives by founding date:
+    ```sparql
+    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+    PREFIX org: <http://www.w3.org/ns/org#>
+    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
+    PREFIX schema: <http://schema.org/>
+    
+    SELECT ?custodian ?name ?founded WHERE {
+      ?custodian a crm:E39_Actor ;
+                 org:classification "ARCHIVE" ;
+                 skos:prefLabel ?name ;
+                 schema:foundingDate ?founded .
     }
     ORDER BY ?founded
     LIMIT 10
     ```
-    
-    Example - Find museums founded before 1900:
-    ```sparql
-    PREFIX schema: <http://schema.org/>
-    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
-    PREFIX hcp: <https://w3id.org/heritage/custodian/>
-    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
-    
-    SELECT ?inst ?name ?year WHERE {
-      ?inst a <https://nde.nl/ontology/hc/class/Custodian> ;
-            skos:prefLabel ?name ;
-            hcp:institutionType "M" ;
-            hcp:foundingYear ?year .
-      FILTER(?year < 1900)
-    }
-    ORDER BY ?year
-    LIMIT 20
-    ```
     """
     
     question: str = dspy.InputField(desc="Natural language question")
@@ -1530,14 +1653,16 @@ def create_heritage_tools(
             JSON string of nearby institutions with distances
         """
         sparql = f"""
+        PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+        PREFIX org: <http://www.w3.org/ns/org#>
+        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
         PREFIX geo: <http://www.opengis.net/ont/geosparql#>
-        PREFIX hco: <https://w3id.org/hco/>
         PREFIX geof: <http://www.opengis.net/def/function/geosparql/>
         
         SELECT ?inst ?name ?type ?distance WHERE {{
-            ?inst a hco:HeritageCustodian ;
-                  hco:name ?name ;
-                  hco:institutionType ?type ;
+            ?inst a crm:E39_Actor ;
+                  skos:prefLabel ?name ;
+                  org:classification ?type ;
                   geo:hasGeometry/geo:asWKT ?wkt .
             
             BIND(geof:distance(?wkt, "POINT({longitude} {latitude})"^^geo:wktLiteral, <http://www.opengis.net/def/uom/OGC/1.0/kilometre>) AS ?distance)
@@ -1571,27 +1696,30 @@ def create_heritage_tools(
             JSON string with full institution details
         """
         sparql = f"""
-        PREFIX hco: <https://w3id.org/hco/>
+        PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+        PREFIX org: <http://www.w3.org/ns/org#>
         PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
-        PREFIX dct: <http://purl.org/dc/terms/>
+        PREFIX dcterms: <http://purl.org/dc/terms/>
+        PREFIX schema: <http://schema.org/>
+        PREFIX foaf: <http://xmlns.com/foaf/0.1/>
         
         SELECT * WHERE {{
-            ?inst a hco:HeritageCustodian .
+            ?inst a crm:E39_Actor .
             
             {{
-                ?inst hco:ghcid "{identifier}"
+                ?inst dcterms:identifier "{identifier}"
             }} UNION {{
-                ?inst hco:isil "{identifier}"
+                ?inst dcterms:identifier "{identifier}"
             }} UNION {{
-                ?inst dct:identifier <http://www.wikidata.org/entity/{identifier}>
+                ?inst dcterms:identifier <http://www.wikidata.org/entity/{identifier}>
             }}
             
             ?inst skos:prefLabel ?name .
-            OPTIONAL {{ ?inst hco:institutionType ?type }}
-            OPTIONAL {{ ?inst hco:city ?city }}
-            OPTIONAL {{ ?inst hco:country ?country }}
-            OPTIONAL {{ ?inst hco:foundingDate ?founded }}
-            OPTIONAL {{ ?inst hco:description ?desc }}
+            OPTIONAL {{ ?inst org:classification ?type }}
+            OPTIONAL {{ ?inst crm:P53_has_former_or_current_location ?location }}
+            OPTIONAL {{ ?inst schema:foundingDate ?founded }}
+            OPTIONAL {{ ?inst dcterms:description ?desc }}
+            OPTIONAL {{ ?inst foaf:homepage ?website }}
         }}
         """
         return query_knowledge_graph(sparql)
@@ -1622,15 +1750,15 @@ def create_heritage_tools(
             type_filter = f'FILTER(?type = "{institution_type}")'
         
         sparql = f"""
-        PREFIX hco: <https://w3id.org/hco/>
+        PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+        PREFIX org: <http://www.w3.org/ns/org#>
         
-        SELECT ?{group_by} (COUNT(?inst) AS ?count) WHERE {{
-            ?inst a hco:HeritageCustodian ;
-                  hco:{group_by} ?{group_by} ;
-                  hco:institutionType ?type .
+        SELECT ?type (COUNT(?inst) AS ?count) WHERE {{
+            ?inst a crm:E39_Actor ;
+                  org:classification ?type .
             {type_filter}
         }}
-        GROUP BY ?{group_by}
+        GROUP BY ?type
         ORDER BY DESC(?count)
         LIMIT 50
         """
@@ -3264,6 +3392,376 @@ class HeritageRAGPipeline(dspy.Module):
         
         return prediction
 
+    async def forward_streaming(
+        self,
+        question: str,
+        language: str = "nl",
+        history: History = None,
+        include_viz: bool = True,
+        skip_cache: bool = False,
+        embedding_model: str | None = None,
+    ) -> AsyncIterator[dict]:
+        """Execute RAG pipeline with streaming answer generation.
+        
+        Yields dictionaries with different types:
+        - {"type": "retrieval_complete", "context": ..., "routing": ..., "retrieved_results": ...}
+        - {"type": "token", "content": "..."}  (streaming answer tokens)
+        - {"type": "answer_complete", "prediction": ...}  (final result)
+        
+        Args:
+            question: User's natural language question
+            language: Response language (nl, en)
+            history: Previous conversation turns for multi-turn context
+            include_viz: Whether to include visualization config
+            skip_cache: Force bypass cache lookup
+            embedding_model: Optional embedding model (minilm_384, openai_1536, bge_768)
+            
+        Yields:
+            Dictionaries with streaming progress and tokens
+        """
+        import asyncio
+        
+        # Initialize empty history if not provided
+        if history is None:
+            history = History(messages=[])
+        
+        # =================================================================
+        # Cache Check - Look for cached response before expensive LLM calls
+        # =================================================================
+        if SEMANTIC_CACHE_AVAILABLE and not skip_cache:
+            try:
+                if should_bypass_cache is not None and get_cache is not None:
+                    if not should_bypass_cache(question):
+                        cache = get_cache()
+                        cached_response = cache.get_sync(question, language=language)
+                        
+                        if cached_response and not cached_response.get("_warmup_entry"):
+                            logger.info(f"Cache HIT for streaming query: {question[:50]}...")
+                            # Return cached response immediately
+                            yield {
+                                "type": "cache_hit",
+                                "prediction": Prediction(
+                                    answer=cached_response.get("answer", ""),
+                                    intent=cached_response.get("intent", "exploration"),
+                                    entities=cached_response.get("entities", []),
+                                    sparql=cached_response.get("sparql"),
+                                    sources_used=cached_response.get("sources_used", []),
+                                    confidence=cached_response.get("confidence", 0.9),
+                                    citations=cached_response.get("citations", []),
+                                    follow_up=cached_response.get("follow_up", []),
+                                    visualization=cached_response.get("visualization"),
+                                    cache_hit=True,
+                                )
+                            }
+                            return
+            except Exception as e:
+                logger.warning(f"Cache lookup failed in streaming mode: {e}")
+        
+        # =================================================================
+        # RETRIEVAL PHASE - Run synchronously in thread pool to not block
+        # =================================================================
+        detected_query_type = "institution"
+        retrieved_results = []
+        context_parts = [f"Query: {question}"]
+        sparql = None
+        entities = None
+        
+        # Step 1: Route query (with rate limit retry)
+        try:
+            routing = await call_with_rate_limit_retry(
+                self.router,
+                question=question,
+                language=language,
+                history=history,
+                max_retries=3,
+                base_delay=2.0,
+            )
+        except Exception as e:
+            actual_error = extract_actual_error(e)
+            logger.error(f"Router failed after retries: {actual_error}")
+            raise
+        resolved_question = getattr(routing, 'resolved_question', question)
+        
+        # Small delay between LLM calls to reduce rate limit pressure
+        await asyncio.sleep(0.5)
+        
+        # Step 2: Extract entities (optional, for context - with rate limit retry)
+        try:
+            entities = await call_with_rate_limit_retry(
+                self.entity_extractor,
+                question=question,
+                language=language,
+                max_retries=2,  # Fewer retries since this is optional
+                base_delay=1.5,
+            )
+        except Exception as e:
+            actual_error = extract_actual_error(e)
+            logger.warning(f"Entity extraction failed: {actual_error}")
+            entities = None
+        
+        # Small delay before streaming answer generation
+        await asyncio.sleep(0.5)
+        
+        # Step 3: Retrieval from databases
+        if self.retriever:
+            try:
+                # Detect if this is a person query
+                question_lower = question.lower()
+                person_indicators = ['wie ', 'who ', 'medewerker', 'staff', 'curator', 'director', 'werkt', 'works', 
+                                     'employee', 'team', 'directeur', 'conservator', 'archivaris', 'archivist',
+                                     'bibliothecaris', 'librarian', 'contactpersoon', 'contact person']
+                is_person_query = any(indicator in question_lower for indicator in person_indicators)
+                
+                if is_person_query:
+                    detected_query_type = "person"
+                    logger.info(f"Detected PERSON query for streaming: {resolved_question[:50]}...")
+                    
+                    # Search for persons
+                    if hasattr(self.retriever, 'search_persons'):
+                        person_results = self.retriever.search_persons(query=resolved_question, k=10, using=embedding_model)
+                        
+                        if person_results:
+                            context_parts.append("\n[RETRIEVED STAFF/PEOPLE - Real data from heritage database]:")
+                            for p in person_results:
+                                name = getattr(p, 'name', 'Unknown')
+                                headline = getattr(p, 'headline', '')
+                                custodian = getattr(p, 'custodian_name', '')
+                                
+                                entry = f"- {name}"
+                                if headline:
+                                    entry += f" ({headline})"
+                                if custodian:
+                                    entry += f" at {custodian}"
+                                context_parts.append(entry)
+                                
+                                retrieved_results.append({
+                                    "type": "person",
+                                    "name": name,
+                                    "headline": headline,
+                                    "custodian_name": custodian,
+                                    "score": getattr(p, 'combined_score', 0),
+                                })
+                else:
+                    # Institution search
+                    logger.info(f"Performing INSTITUTION retrieval for streaming: {resolved_question[:50]}...")
+                    inst_results = self.retriever.search(query=resolved_question, k=10, auto_route=False, using=embedding_model)
+                    
+                    if inst_results:
+                        context_parts.append("\n[RETRIEVED INSTITUTIONS - Real data from heritage database]:")
+                        for inst in inst_results:
+                            if hasattr(inst, 'to_dict'):
+                                inst_dict = inst.to_dict()
+                                name = inst_dict.get('name', 'Unknown')
+                                inst_type = inst_dict.get('metadata', {}).get('type', '')
+                                city = inst_dict.get('metadata', {}).get('city', '')
+                                inst_dict['type'] = 'institution'
+                                retrieved_results.append(inst_dict)
+                            else:
+                                name = getattr(inst, 'name', 'Unknown')
+                                inst_type = getattr(inst, 'type', '')
+                                city = getattr(inst, 'city', '')
+                                retrieved_results.append({
+                                    "type": "institution",
+                                    "name": name,
+                                    "institution_type": inst_type,
+                                    "city": city,
+                                })
+                            
+                            entry = f"- {name}"
+                            if inst_type:
+                                entry += f" ({inst_type})"
+                            if city:
+                                entry += f" in {city}"
+                            context_parts.append(entry)
+            except Exception as e:
+                logger.warning(f"Retrieval failed in streaming mode: {e}")
+                context_parts.append(f"\n[Retrieval error: {str(e)}]")
+        
+        context = "\n".join(context_parts)
+        
+        # Yield retrieval complete event
+        yield {
+            "type": "retrieval_complete",
+            "context": context,
+            "routing": {
+                "intent": routing.intent,
+                "sources": routing.sources,
+                "resolved_question": resolved_question,
+            },
+            "retrieved_results": retrieved_results,
+            "query_type": detected_query_type,
+        }
+        
+        # =================================================================
+        # ANSWER GENERATION PHASE - Stream tokens using dspy.streamify
+        # =================================================================
+        answer_text = ""
+        confidence = 0.8
+        citations = []
+        follow_up = []
+        streaming_succeeded = False
+        retry_count = 0
+        max_stream_retries = 2
+        
+        while not streaming_succeeded and retry_count <= max_stream_retries:
+            try:
+                # Create streamified version of the answer generator
+                streamified_answer_gen = dspy.streamify(self.answer_gen)
+                
+                # Use quality_lm context if available
+                lm_context = dspy.settings.context(lm=self.quality_lm) if self.quality_lm else dspy.settings.context()
+                
+                with lm_context:
+                    async for value in streamified_answer_gen(
+                        question=resolved_question,
+                        context=context,
+                        history=history,
+                        sources=routing.sources,
+                        language=language,
+                    ):
+                        if isinstance(value, dspy.Prediction):
+                            # Final prediction - extract all fields
+                            answer_text = value.answer
+                            confidence = getattr(value, 'confidence', 0.8)
+                            citations = getattr(value, 'citations', [])
+                            follow_up = getattr(value, 'follow_up', [])
+                            streaming_succeeded = True
+                        elif isinstance(value, str):
+                            # Streaming token
+                            yield {"type": "token", "content": value}
+                        else:
+                            # Handle ModelResponseStream from litellm/DSPy
+                            # Token text is in choices[0].delta.content or .reasoning_content
+                            token_text = None
+                            
+                            # Try to extract content from streaming response
+                            if hasattr(value, 'choices') and value.choices:
+                                delta = getattr(value.choices[0], 'delta', None)
+                                if delta:
+                                    # Check both content and reasoning_content (for GLM models)
+                                    token_text = getattr(delta, 'content', None) or getattr(delta, 'reasoning_content', None)
+                            
+                            # Fallback: check for message attribute (StatusMessage)
+                            if token_text is None and hasattr(value, 'message'):
+                                yield {"type": "status", "message": value.message}
+                                continue
+                            
+                            # Yield extracted token if we got text
+                            if token_text:
+                                yield {"type": "token", "content": token_text}
+                
+                # If we get here, streaming completed
+                streaming_succeeded = True
+            
+            except Exception as e:
+                actual_error = extract_actual_error(e)
+                retry_count += 1
+                
+                # Check if rate limited and can retry
+                if is_rate_limit_error(e) and retry_count <= max_stream_retries:
+                    delay = 2.0 * (2 ** (retry_count - 1)) + random.uniform(0, 1)
+                    logger.warning(
+                        f"Streaming rate limited (attempt {retry_count}/{max_stream_retries + 1}), "
+                        f"waiting {delay:.1f}s before retry. Error: {actual_error}"
+                    )
+                    await asyncio.sleep(delay)
+                    continue
+                
+                # Not rate limited or max retries exceeded - fall back to sync
+                logger.warning(
+                    f"Streaming answer generation failed after {retry_count} attempts, "
+                    f"falling back to sync. Error: {actual_error}"
+                )
+                break
+        
+        # Fallback to synchronous generation if streaming failed
+        if not streaming_succeeded:
+            try:
+                # Use rate limit retry for sync fallback too
+                lm_context = dspy.settings.context(lm=self.quality_lm) if self.quality_lm else dspy.settings.context()
+                with lm_context:
+                    answer_result = await call_with_rate_limit_retry(
+                        self.answer_gen,
+                        question=resolved_question,
+                        context=context,
+                        history=history,
+                        sources=routing.sources,
+                        language=language,
+                        max_retries=3,
+                        base_delay=2.0,
+                    )
+                answer_text = answer_result.answer
+                confidence = answer_result.confidence
+                citations = answer_result.citations
+                follow_up = answer_result.follow_up
+                # Yield the full answer as one token (fallback behavior)
+                logger.info("Sync fallback succeeded - yielding full answer as single token")
+                yield {"type": "token", "content": answer_text}
+            except Exception as fallback_e:
+                actual_error = extract_actual_error(fallback_e)
+                logger.exception(f"Fallback answer generation also failed: {actual_error}")
+                answer_text = "Er is een fout opgetreden bij het genereren van het antwoord."
+                yield {"type": "token", "content": answer_text}
+        
+        # Step 4: Visualization selection (if needed)
+        viz_config = None
+        if include_viz:
+            try:
+                viz_result = self.viz_selector(
+                    question=question,
+                    intent=routing.intent,
+                    schema_fields=["name", "type", "city", "country", "lat", "lon"],
+                    result_count=len(retrieved_results),
+                )
+                viz_config = {
+                    "type": viz_result.viz_type,
+                    "config": viz_result.config,
+                    "reasoning": viz_result.reasoning,
+                }
+            except Exception as e:
+                logger.warning(f"Visualization selection failed: {e}")
+        
+        # Build final prediction
+        prediction = Prediction(
+            answer=answer_text,
+            intent=routing.intent,
+            entities=entities,
+            sparql=sparql,
+            sources_used=routing.sources,
+            confidence=confidence,
+            citations=citations,
+            follow_up=follow_up,
+            visualization=viz_config,
+            cache_hit=False,
+            resolved_question=resolved_question,
+            retrieved_results=retrieved_results,
+            query_type=detected_query_type,
+            embedding_model_used=embedding_model,
+        )
+        
+        # Cache the response (fire and forget)
+        if SEMANTIC_CACHE_AVAILABLE and not skip_cache and confidence >= 0.7:
+            try:
+                if get_cache is not None:
+                    cache = get_cache()
+                    response_dict = {
+                        "answer": answer_text,
+                        "intent": routing.intent,
+                        "entities": entities.institutions if hasattr(entities, 'institutions') else [],
+                        "sparql": sparql,
+                        "sources_used": routing.sources,
+                        "confidence": confidence,
+                        "citations": citations,
+                        "follow_up": follow_up,
+                        "visualization": viz_config,
+                    }
+                    cache.set_sync(question, response_dict, intent=routing.intent, language=language)
+            except Exception as e:
+                logger.warning(f"Failed to cache streaming response: {e}")
+        
+        # Yield final prediction
+        yield {"type": "answer_complete", "prediction": prediction}
+
 
 # =============================================================================
 # 7. FACTORY FUNCTIONS
diff --git a/backend/rag/optimized_models/heritage_rag_bootstrap_20251211_142813.json b/backend/rag/optimized_models/heritage_rag_bootstrap_20251211_142813.json
index 5119f003a2..27d84a74e7 100644
--- a/backend/rag/optimized_models/heritage_rag_bootstrap_20251211_142813.json
+++ b/backend/rag/optimized_models/heritage_rag_bootstrap_20251211_142813.json
@@ -369,7 +369,7 @@
       }
     ],
     "signature": {
-      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
+      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
       "fields": [
         {
           "prefix": "Question:",
@@ -547,7 +547,7 @@
       }
     ],
     "signature": {
-      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
+      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
       "fields": [
         {
           "prefix": "Question:",
@@ -1072,4 +1072,4 @@
       "cloudpickle": "3.1"
     }
   }
-}
+}
\ No newline at end of file
diff --git a/backend/rag/optimized_models/heritage_rag_bootstrap_latest.json b/backend/rag/optimized_models/heritage_rag_bootstrap_latest.json
index 5119f003a2..27d84a74e7 100644
--- a/backend/rag/optimized_models/heritage_rag_bootstrap_latest.json
+++ b/backend/rag/optimized_models/heritage_rag_bootstrap_latest.json
@@ -369,7 +369,7 @@
       }
     ],
     "signature": {
-      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
+      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
       "fields": [
         {
           "prefix": "Question:",
@@ -547,7 +547,7 @@
       }
     ],
     "signature": {
-      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX hc: <https://nde.nl/ontology/hc/class/>\n- PREFIX hcp: <https://nde.nl/ontology/hc/>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- hc:Custodian - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- hcp:custodian_type - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
+      "instructions": "Generate SPARQL queries for heritage custodian knowledge graph.\n\nYou are an expert in SPARQL and the Heritage Custodian Ontology.\nGenerate valid SPARQL queries that work with our Oxigraph endpoint.\n\nKey prefixes (MUST USE THESE EXACT URIs):\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX org: <http://www.w3.org/ns/org#>\n- PREFIX ghcid: <https://w3id.org/heritage/custodian/>\n- PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n- PREFIX schema: <http://schema.org/>\n- PREFIX foaf: <http://xmlns.com/foaf/0.1/>\n- PREFIX dct: <http://purl.org/dc/terms/>\n- PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>\n- PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n\nKey classes:\n- crm:E39_Actor - Heritage custodian institution\n- schema:Place - Geographic location\n- foaf:OnlineAccount - Social media profile\n\nKey properties:\n- skos:prefLabel - Institution name\n- org:classification - Type (MUSEUM, LIBRARY, ARCHIVE, etc.)\n- schema:addressCountry - Country code\n- foaf:homepage - Website\n- crm:P53_has_former_or_current_location - Location link",
       "fields": [
         {
           "prefix": "Question:",
@@ -1072,4 +1072,4 @@
       "cloudpickle": "3.1"
     }
   }
-}
+}
\ No newline at end of file
diff --git a/backend/rag/test_live_rag.py b/backend/rag/test_live_rag.py
index f2f88bf734..1fe189553c 100644
--- a/backend/rag/test_live_rag.py
+++ b/backend/rag/test_live_rag.py
@@ -49,8 +49,8 @@ def test_sparql_endpoint():
     
     # Count custodians
     query = """
-    PREFIX hc: <https://nde.nl/ontology/hc/class/>
-    SELECT (COUNT(*) as ?count) WHERE { ?s a hc:Custodian }
+    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+    SELECT (COUNT(*) as ?count) WHERE { ?s a crm:E39_Actor }
     """
     
     response = httpx.post(
@@ -200,35 +200,35 @@ def run_sample_queries():
     
     queries = [
         ("Museums by country", """
-PREFIX hc: <https://nde.nl/ontology/hc/class/>
-PREFIX hcp: <https://nde.nl/ontology/hc/>
+PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+PREFIX org: <http://www.w3.org/ns/org#>
 PREFIX schema: <http://schema.org/>
 SELECT ?country (COUNT(?s) as ?count) WHERE {
-  ?s a hc:Custodian ;
-     hcp:custodian_type "MUSEUM" ;
+  ?s a crm:E39_Actor ;
+     org:classification "MUSEUM" ;
      schema:addressCountry ?country .
 } GROUP BY ?country ORDER BY DESC(?count) LIMIT 10
         """),
         ("Dutch archives with websites", """
-PREFIX hc: <https://nde.nl/ontology/hc/class/>
-PREFIX hcp: <https://nde.nl/ontology/hc/>
+PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
+PREFIX org: <http://www.w3.org/ns/org#>
 PREFIX schema: <http://schema.org/>
 PREFIX foaf: <http://xmlns.com/foaf/0.1/>
 PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
 SELECT ?name ?homepage WHERE {
-  ?s a hc:Custodian ;
-     hcp:custodian_type "ARCHIVE" ;
+  ?s a crm:E39_Actor ;
+     org:classification "ARCHIVE" ;
      schema:addressCountry "NL" ;
      skos:prefLabel ?name ;
      foaf:homepage ?homepage .
 } LIMIT 10
         """),
         ("Heritage institutions with social media", """
-PREFIX hc: <https://nde.nl/ontology/hc/class/>
+PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
 PREFIX foaf: <http://xmlns.com/foaf/0.1/>
 PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
 SELECT ?name (COUNT(?account) as ?social_count) WHERE {
-  ?s a hc:Custodian ;
+  ?s a crm:E39_Actor ;
      skos:prefLabel ?name ;
      foaf:account ?account .
 } GROUP BY ?s ?name ORDER BY DESC(?social_count) LIMIT 10