feat(rag): add factual query fast path - skip LLM for count/list queries

- Add ontology cache warming at startup in lifespan() function - Add is_factual_query() detection in template_sparql.py (12 templates) - Add factual_result and sparql_query fields to DSPyQueryResponse - Skip LLM generation for factual templates (count, list, compare) - Execute SPARQL directly and return results as table (~15s → ~2s latency) - Update ConversationPanel.tsx to render factual results table - Add CSS styling for factual results with green theme For queries like 'hoeveel archieven zijn er in Den Haag', the SPARQL results ARE the answer - no need for expensive LLM prose generation.
2026-01-08 13:34:23 +01:00 · 2026-01-08 13:34:23 +01:00 · 0b0ea75070
commit 0b0ea75070
parent 85d9cee82f
4 changed files with 1458 additions and 59 deletions
--- a/backend/rag/main.py
+++ b/backend/rag/main.py
@ -683,6 +683,10 @@ class DSPyQueryResponse(BaseModel):
    template_used: bool = False  # Whether template-based SPARQL was used (vs LLM generation)
    template_id: str | None = None  # Which template was used (e.g., "institution_by_city", "person_by_name")
    
+    # Factual query mode - skip LLM generation for count/list queries
+    factual_result: bool = False  # True if this is a direct SPARQL result (no LLM prose generation)
+    sparql_query: str | None = None  # The SPARQL query that was executed (for transparency)
+

 def extract_llm_response_metadata(
    lm: Any,
@ -1833,6 +1837,28 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
            except Exception as e:
                logger.warning(f"Failed to initialize AtomicCacheManager: {e}")
        
+        # === ONTOLOGY CACHE WARMUP: Pre-load KG values to avoid cold-start latency ===
+        # The OntologyLoader queries the Knowledge Graph for valid slot values (cities, regions, types).
+        # These queries can take 1-3 seconds each on first access.
+        # By pre-loading at startup, we eliminate this delay for users.
+        ontology_warmup_start = time.perf_counter()
+        try:
+            from template_sparql import get_ontology_loader
+            
+            logger.info("Warming up ontology cache (pre-loading KG values)...")
+            ontology = get_ontology_loader()
+            ontology.load()  # Triggers KG queries for institution_types, subregions, cities, etc.
+            
+            ontology_warmup_duration = time.perf_counter() - ontology_warmup_start
+            cache_stats = ontology.get_kg_cache_stats()
+            logger.info(
+                f"✅ Ontology cache warmed up in {ontology_warmup_duration:.2f}s "
+                f"({cache_stats['cache_size']} KG queries cached, TTL={cache_stats['ttl_seconds']}s)"
+            )
+        except Exception as e:
+            ontology_warmup_duration = time.perf_counter() - ontology_warmup_start
+            logger.warning(f"Failed to warm up ontology cache: {e}")
+    
    logger.info("Heritage RAG API started")
    
    yield
@ -2967,6 +2993,158 @@ async def dspy_query(request: DSPyQueryRequest) -> DSPyQueryResponse:
        except Exception as e:
            logger.warning(f"Atomic decomposition failed: {e}")
    
+    # ==========================================================================
+    # FACTUAL QUERY FAST PATH: Skip LLM for count/list queries
+    # ==========================================================================
+    # For factual queries (counts, lists, comparisons), the SPARQL results ARE
+    # the answer. No need for expensive LLM prose generation - just return the
+    # table directly. This can reduce latency from ~15s to ~2s.
+    # ==========================================================================
+    try:
+        from template_sparql import get_template_pipeline, is_factual_query
+        
+        template_pipeline = get_template_pipeline()
+        
+        # Build conversation history for template context resolution
+        history_for_template = []
+        for turn in request.context:
+            if turn.get("question") and turn.get("answer"):
+                history_for_template.append({
+                    "question": turn["question"],
+                    "answer": turn["answer"]
+                })
+        
+        # Try template matching (this handles follow-up resolution internally)
+        template_result = template_pipeline(
+            question=request.question,
+            language=request.language,
+            history=history_for_template,
+            conversation_state=conversation_state,
+        )
+        
+        # Check if this is a factual query that can skip LLM
+        if template_result.matched and is_factual_query(template_result.template_id):
+            logger.info(
+                f"[FACTUAL-QUERY] Template '{template_result.template_id}' is factual - "
+                f"skipping LLM generation (confidence={template_result.confidence:.2f})"
+            )
+            
+            # Execute SPARQL directly
+            sparql_query = template_result.sparql
+            sparql_results: list[dict[str, Any]] = []
+            sparql_error: str | None = None
+            
+            try:
+                if retriever:
+                    client = await retriever._get_sparql_client()
+                    response = await client.post(
+                        settings.sparql_endpoint,
+                        data={"query": sparql_query},
+                        headers={"Accept": "application/sparql-results+json"},
+                        timeout=30.0,
+                    )
+                    
+                    if response.status_code == 200:
+                        data = response.json()
+                        bindings = data.get("results", {}).get("bindings", [])
+                        sparql_results = [
+                            {k: v.get("value") for k, v in binding.items()}
+                            for binding in bindings
+                        ]
+                    else:
+                        sparql_error = f"SPARQL returned {response.status_code}"
+                else:
+                    sparql_error = "Retriever not available"
+                    
+            except Exception as e:
+                sparql_error = str(e)
+                logger.warning(f"[FACTUAL-QUERY] SPARQL execution failed: {e}")
+            
+            elapsed_ms = (time.time() - start_time) * 1000
+            
+            # Generate a simple summary answer based on result type
+            if sparql_error:
+                answer = f"Er is een fout opgetreden bij het uitvoeren van de query: {sparql_error}"
+            elif not sparql_results:
+                answer = "Geen resultaten gevonden."
+            elif template_result.template_id and "count" in template_result.template_id:
+                # Count query - format as count
+                count_value = sparql_results[0].get("count", len(sparql_results))
+                answer = f"Aantal: {count_value}"
+            else:
+                # List query - just indicate result count
+                answer = f"Gevonden: {len(sparql_results)} resultaten. Zie de tabel hieronder."
+            
+            # Build response with factual_result=True
+            factual_response = DSPyQueryResponse(
+                question=request.question,
+                resolved_question=getattr(template_result, "resolved_question", None),
+                answer=answer,
+                sources_used=["SPARQL Knowledge Graph"],
+                visualization={"type": "table", "sparql_query": sparql_query},
+                retrieved_results=sparql_results,
+                query_type="factual",
+                query_time_ms=round(elapsed_ms, 2),
+                conversation_turn=len(request.context),
+                cache_hit=False,
+                session_id=session_id,
+                template_used=True,
+                template_id=template_result.template_id,
+                factual_result=True,
+                sparql_query=sparql_query,
+            )
+            
+            # Update session with this turn
+            if session_mgr and session_id:
+                try:
+                    await session_mgr.add_turn_to_session(
+                        session_id=session_id,
+                        question=request.question,
+                        answer=answer,
+                        resolved_question=getattr(template_result, "resolved_question", None),
+                        template_id=template_result.template_id,
+                        slots=template_result.slots or {},
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to update session: {e}")
+            
+            # Record metrics
+            if METRICS_AVAILABLE and record_query:
+                try:
+                    record_query(
+                        endpoint="dspy_query",
+                        template_used=True,
+                        template_id=template_result.template_id,
+                        cache_hit=False,
+                        status="success",
+                        duration_seconds=elapsed_ms / 1000,
+                        intent="factual",
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to record metrics: {e}")
+            
+            # Cache the response
+            if retriever:
+                await retriever.cache.set_dspy(
+                    question=request.question,
+                    language=request.language,
+                    llm_provider="none",  # No LLM used
+                    embedding_model=request.embedding_model,
+                    response=factual_response.model_dump(),
+                    context=request.context if request.context else None,
+                )
+            
+            logger.info(f"[FACTUAL-QUERY] Returned {len(sparql_results)} results in {elapsed_ms:.2f}ms (LLM skipped)")
+            return factual_response
+            
+    except ImportError as e:
+        logger.debug(f"Template SPARQL not available for factual query detection: {e}")
+    except Exception as e:
+        logger.warning(f"Factual query detection failed (continuing with full pipeline): {e}")
+    
+    # ==========================================================================
+    # FULL RAG PIPELINE: For non-factual queries or when factual detection fails
+    # ==========================================================================
    try:
        # Import DSPy pipeline and History
        import dspy
--- a/backend/rag/template_sparql.py
+++ b/backend/rag/template_sparql.py
--- a/frontend/src/components/query/ConversationPanel.css
+++ b/frontend/src/components/query/ConversationPanel.css
@ -1033,3 +1033,107 @@
    font-size: 0.625rem;
  }
 }
+
+/* =============================================================================
+   FACTUAL QUERY RESULTS TABLE
+   For direct SPARQL results when LLM generation is skipped (count/list queries)
+   ============================================================================= */
+
+.conversation-panel__factual-results {
+  margin-top: 0.75rem;
+  border: 1px solid #10b981;
+  border-radius: 8px;
+  overflow: hidden;
+  background: white;
+}
+
+.conversation-panel__factual-badge {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  padding: 0.5rem 0.75rem;
+  background: linear-gradient(135deg, #dcfce7 0%, #d1fae5 100%);
+  border-bottom: 1px solid #10b981;
+  font-size: 0.75rem;
+  font-weight: 600;
+  color: #166534;
+}
+
+.conversation-panel__factual-badge svg {
+  color: #10b981;
+}
+
+.conversation-panel__results-table-wrapper {
+  overflow-x: auto;
+  max-height: 400px;
+  overflow-y: auto;
+}
+
+.conversation-panel__results-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-size: 0.8125rem;
+}
+
+.conversation-panel__results-table th {
+  position: sticky;
+  top: 0;
+  background: #f0fdf4;
+  padding: 0.5rem 0.75rem;
+  text-align: left;
+  font-weight: 600;
+  color: #166534;
+  border-bottom: 2px solid #10b981;
+  white-space: nowrap;
+}
+
+.conversation-panel__results-table td {
+  padding: 0.5rem 0.75rem;
+  border-bottom: 1px solid #e5e7eb;
+  vertical-align: top;
+  max-width: 300px;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.conversation-panel__results-table tr:hover td {
+  background: #f0fdf4;
+}
+
+.conversation-panel__results-table a {
+  color: #10b981;
+  text-decoration: none;
+}
+
+.conversation-panel__results-table a:hover {
+  text-decoration: underline;
+}
+
+.conversation-panel__results-truncated {
+  padding: 0.5rem 0.75rem;
+  background: #fef3c7;
+  color: #92400e;
+  font-size: 0.75rem;
+  text-align: center;
+  border-top: 1px solid #fbbf24;
+}
+
+/* Responsive: Factual Results Table */
+@media (max-width: 768px) {
+  .conversation-panel__results-table-wrapper {
+    max-height: 250px;
+  }
+  
+  .conversation-panel__results-table {
+    font-size: 0.75rem;
+  }
+  
+  .conversation-panel__results-table th,
+  .conversation-panel__results-table td {
+    padding: 0.375rem 0.5rem;
+  }
+  
+  .conversation-panel__results-table td {
+    max-width: 150px;
+  }
+}
--- a/frontend/src/components/query/ConversationPanel.tsx
+++ b/frontend/src/components/query/ConversationPanel.tsx
@ -213,6 +213,7 @@ interface Message {
  errorCode?: string;
  llmProviderUsed?: string;  // Which LLM provider generated this response
  llmResponse?: LLMResponseMetadata;  // Full LLM response metadata including chain-of-thought
+  factualResult?: boolean;  // True if this is a direct SPARQL result (no LLM prose generation)
 }

 interface HistoryItem {
@ -374,6 +375,7 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
    sourcesUsed: string[];
    llmProviderUsed?: string;
    llmResponse?: LLMResponseMetadata;  // Full LLM response with reasoning_content
+    factualResult?: boolean;  // True if LLM was skipped for factual query
  }> => {
    // Determine API endpoint based on environment
    const hostname = window.location.hostname;
@ -414,12 +416,13 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
    
    const data = await response.json();
    return {
-      sparql: data.visualization?.sparql_query || data.sparql,  // Get SPARQL from visualization if available
+      sparql: data.visualization?.sparql_query || data.sparql_query || data.sparql,  // Get SPARQL from visualization or direct field
      sparqlResults: data.retrieved_results,  // Raw results for debug display
      answer: data.answer || data.explanation || '',
      sourcesUsed: data.sources_used || selectedSources,
      llmProviderUsed: data.llm_provider_used,
      llmResponse: data.llm_response,  // Pass through chain-of-thought metadata
+      factualResult: data.factual_result,  // True if LLM was skipped for factual query
    };
  };
  
@ -472,6 +475,7 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
              sourcesUsed: result.sourcesUsed,
              llmProviderUsed: result.llmProviderUsed,
              llmResponse: result.llmResponse,
+              factualResult: result.factualResult,  // Direct SPARQL result flag
              isLoading: false,
            }
          : msg
@ -965,6 +969,51 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
                    <>
                      <p>{message.content}</p>
                      
+                      {/* Factual Query Results Table - shown when LLM was skipped */}
+                      {message.factualResult && message.sparqlResults && message.sparqlResults.length > 0 && (
+                        <div className="conversation-panel__factual-results">
+                          <div className="conversation-panel__factual-badge">
+                            <Database size={14} />
+                            <span>{language === 'nl' ? 'Direct uit kennisgraaf' : 'Direct from knowledge graph'}</span>
+                          </div>
+                          <div className="conversation-panel__results-table-wrapper">
+                            <table className="conversation-panel__results-table">
+                              <thead>
+                                <tr>
+                                  {Object.keys(message.sparqlResults[0]).map(key => (
+                                    <th key={key}>{key}</th>
+                                  ))}
+                                </tr>
+                              </thead>
+                              <tbody>
+                                {message.sparqlResults.slice(0, 50).map((row, idx) => (
+                                  <tr key={idx}>
+                                    {Object.entries(row).map(([key, value]) => (
+                                      <td key={key}>
+                                        {typeof value === 'string' && value.startsWith('http') ? (
+                                          <a href={value} target="_blank" rel="noopener noreferrer" title={value}>
+                                            {value.split('/').pop() || value}
+                                          </a>
+                                        ) : (
+                                          String(value ?? '')
+                                        )}
+                                      </td>
+                                    ))}
+                                  </tr>
+                                ))}
+                              </tbody>
+                            </table>
+                            {message.sparqlResults.length > 50 && (
+                              <div className="conversation-panel__results-truncated">
+                                {language === 'nl' 
+                                  ? `Toont 50 van ${message.sparqlResults.length} resultaten`
+                                  : `Showing 50 of ${message.sparqlResults.length} results`}
+                              </div>
+                            )}
+                          </div>
+                        </div>
+                      )}
+                      
                      {/* Chain-of-Thought Reasoning (GLM 4.7 Interleaved Thinking) */}
                      {message.llmResponse?.reasoning_content && (
                        <details className="conversation-panel__reasoning">