""" Specificity-aware classifier wrapper. This module provides SpecificityAwareClassifier, which wraps the existing TemplateClassifier and adds specificity scoring to the output. The wrapper: 1. Delegates classification to existing TemplateClassifier 2. Maps SPARQL template → context template via SPARQLToContextMapper 3. Looks up specificity scores via SpecificityLookup 4. Returns ClassificationWithScores with filtered classes This follows Option C (Hybrid) architecture: the existing classifier is preserved unchanged, and we add a thin wrapper layer. """ from __future__ import annotations import logging from typing import Any, Optional from .models import ( ClassificationResult, ClassificationWithScores, ContextTemplate, ) from .mapper import get_sparql_to_context_mapper, SPARQLToContextMapper from .lookup import get_specificity_lookup, SpecificityLookup logger = logging.getLogger(__name__) class SpecificityAwareClassifier: """Wrapper that adds specificity scoring to TemplateClassifier. This class wraps the existing TemplateClassifier (from template_sparql.py) and enriches its output with specificity scores, allowing the RAG pipeline to filter schema classes based on query relevance. Architecture (Option C - Hybrid): ``` User Question ↓ ┌─────────────────────────────────────┐ │ EXISTING: TemplateClassifier │ ← Unchanged │ (Classifies to SPARQL template ID) │ └─────────────────────────────────────┘ ↓ sparql_template_id + slots ┌─────────────────────────────────────┐ │ SPARQLToContextMapper │ ← New (this module) │ (Maps SPARQL → Context templates) │ └─────────────────────────────────────┘ ↓ context_template_id ┌─────────────────────────────────────┐ │ SpecificityLookup │ ← New (lookup.py) │ (Filters classes by score threshold)│ └─────────────────────────────────────┘ ↓ filtered_classes + scores ┌─────────────────────────────────────┐ │ RAG Pipeline / UML Visualization │ └─────────────────────────────────────┘ ``` Usage: from backend.rag.template_sparql import TemplateClassifier from backend.rag.specificity import SpecificityAwareClassifier # Wrap existing classifier base_classifier = TemplateClassifier() classifier = SpecificityAwareClassifier(base_classifier) # Classify with specificity scoring result = classifier.classify_with_scores( question="Welke archieven zijn er in Amsterdam?", threshold=0.6 ) print(result.context_template) # ContextTemplate.ARCHIVE_SEARCH print(result.filtered_classes) # ['Archive', 'CustodianObservation', ...] """ def __init__( self, template_classifier: Optional["TemplateClassifier"] = None, mapper: Optional[SPARQLToContextMapper] = None, lookup: Optional[SpecificityLookup] = None, default_threshold: float = 0.6, ): """Initialize the specificity-aware classifier. Args: template_classifier: Existing TemplateClassifier (lazy-loaded if None) mapper: SPARQL to context mapper (uses singleton if None) lookup: Specificity score lookup (uses singleton if None) default_threshold: Default specificity threshold """ self._template_classifier = template_classifier self._mapper = mapper or get_sparql_to_context_mapper() self._lookup = lookup or get_specificity_lookup() self.default_threshold = default_threshold @property def template_classifier(self) -> "TemplateClassifier": """Lazy-load TemplateClassifier to avoid circular imports.""" if self._template_classifier is None: from backend.rag.template_sparql import TemplateClassifier self._template_classifier = TemplateClassifier() return self._template_classifier def classify_with_scores( self, question: str, language: str = "nl", threshold: Optional[float] = None, conversation_state: Optional[object] = None, ) -> ClassificationWithScores: """Classify question and return result with specificity scores. This is the main entry point for the RAG pipeline. Args: question: User's natural language question language: Language code (nl, en, de, fr) threshold: Specificity threshold (uses default if None) conversation_state: Optional conversation state for context Returns: ClassificationWithScores with filtered classes """ threshold = threshold if threshold is not None else self.default_threshold # Step 1: Classify using existing TemplateClassifier try: match_result = self.template_classifier.forward( question=question, language=language, conversation_state=conversation_state, ) classification = ClassificationResult( template_id=match_result.template_id, confidence=match_result.confidence, reasoning=match_result.reasoning, slots=match_result.slots or {}, ) except Exception as e: logger.warning(f"Template classification failed: {e}") # Fallback to "none" template classification = ClassificationResult( template_id="none", confidence=0.0, reasoning=f"Classification failed: {e}", slots={}, ) # Step 2: Map to context template context_template = self._mapper.map( classification.template_id, classification.slots, ) logger.debug( f"Mapped {classification.template_id} → {context_template.value} " f"(slots: {classification.slots})" ) # Step 3: Get filtered classes filtered_scores = self._lookup.get_filtered_scores(context_template, threshold) filtered_classes = list(filtered_scores.keys()) logger.info( f"Specificity filter: {len(filtered_classes)} classes pass " f"threshold {threshold} for {context_template.value}" ) return ClassificationWithScores( classification=classification, context_template=context_template, filtered_classes=filtered_classes, all_scores=filtered_scores, threshold_used=threshold, ) def get_context_template( self, sparql_template_id: str, slots: Optional[dict[str, str]] = None, ) -> ContextTemplate: """Get context template for a SPARQL template ID. Convenience method for direct mapping without full classification. Args: sparql_template_id: SPARQL template ID slots: Optional slots for refinement Returns: Context template """ return self._mapper.map(sparql_template_id, slots) def get_filtered_classes( self, context_template: ContextTemplate, threshold: Optional[float] = None, ) -> list[str]: """Get filtered classes for a context template. Convenience method for direct lookup without classification. Args: context_template: Context template to filter by threshold: Specificity threshold Returns: List of class names that pass threshold """ threshold = threshold if threshold is not None else self.default_threshold return self._lookup.get_classes_for_template(context_template, threshold) # Singleton instance _classifier_instance: Optional[SpecificityAwareClassifier] = None def get_specificity_aware_classifier() -> SpecificityAwareClassifier: """Get singleton classifier instance.""" global _classifier_instance if _classifier_instance is None: _classifier_instance = SpecificityAwareClassifier() return _classifier_instance