glam/backend/rag/specificity/__init__.py
kempersc 11983014bb Enhance specificity scoring system integration with existing infrastructure
- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework.
- Added detailed mapping of SPARQL templates to context templates for improved specificity filtering.
- Implemented wrapper patterns around existing classifiers to extend functionality without duplication.
- Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality.
- Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
2026-01-05 17:37:49 +01:00

140 lines
4.3 KiB
Python

"""
Specificity score system for Heritage Custodian RAG.
This module provides specificity-aware filtering of LinkML schema classes
based on query relevance. It integrates with the existing TemplateClassifier
from template_sparql.py to provide filtered class lists for RAG retrieval.
Architecture (Option C - Hybrid):
```
User Question
┌─────────────────────────────────────┐
│ EXISTING: TemplateClassifier │ ← backend/rag/template_sparql.py:1104
│ (Classifies to SPARQL template ID) │
└─────────────────────────────────────┘
↓ sparql_template_id + slots
┌─────────────────────────────────────┐
│ NEW: SPARQLToContextMapper │ ← mapper.py
│ (Maps SPARQL → Context templates) │
└─────────────────────────────────────┘
↓ context_template_id
┌─────────────────────────────────────┐
│ NEW: SpecificityLookup │ ← lookup.py
│ (Filters classes by score threshold)│
└─────────────────────────────────────┘
↓ filtered_classes + scores
┌─────────────────────────────────────┐
│ RAG Pipeline / UML Visualization │
└─────────────────────────────────────┘
```
Usage:
from backend.rag.specificity import (
SpecificityAwareClassifier,
get_specificity_aware_classifier,
ContextTemplate,
)
# Get singleton classifier
classifier = get_specificity_aware_classifier()
# Classify with specificity scoring
result = classifier.classify_with_scores(
question="Welke archieven zijn er in Amsterdam?",
threshold=0.6
)
print(result.context_template) # ContextTemplate.ARCHIVE_SEARCH
print(result.filtered_classes) # ['Archive', 'CustodianObservation', ...]
See also:
- docs/plan/specificity_score/ - Planning documentation
- .opencode/rules/specificity-score-convention.md - Annotation rules
"""
from .models import (
ContextTemplate,
SpecificityScore,
ClassificationResult,
ClassificationWithScores,
INSTITUTION_TYPE_TO_CONTEXT,
)
from .mapper import (
SPARQLToContextMapper,
get_sparql_to_context_mapper,
SPARQL_TO_CONTEXT_MAP,
)
from .lookup import (
SpecificityLookup,
get_specificity_lookup,
get_classes_for_template_cached,
)
from .classifier import (
SpecificityAwareClassifier,
get_specificity_aware_classifier,
)
from .context_selector import (
DynamicContextSelector,
ContextSelectionResult,
get_dynamic_context_selector,
select_context_for_query,
INTENT_TO_CONTEXT_MAP,
ENTITY_TYPE_TO_CONTEXT_MAP,
DEFAULT_THRESHOLDS,
)
from .token_counter import (
count_tokens,
count_tokens_for_context,
compare_context_sizes,
benchmark_all_templates,
format_benchmark_report,
estimate_cost_savings,
quick_benchmark,
ContextSizeComparison,
CostEstimate,
)
__all__ = [
# Models
"ContextTemplate",
"SpecificityScore",
"ClassificationResult",
"ClassificationWithScores",
"INSTITUTION_TYPE_TO_CONTEXT",
# Mapper
"SPARQLToContextMapper",
"get_sparql_to_context_mapper",
"SPARQL_TO_CONTEXT_MAP",
# Lookup
"SpecificityLookup",
"get_specificity_lookup",
"get_classes_for_template_cached",
# Classifier
"SpecificityAwareClassifier",
"get_specificity_aware_classifier",
# Context Selector
"DynamicContextSelector",
"ContextSelectionResult",
"get_dynamic_context_selector",
"select_context_for_query",
"INTENT_TO_CONTEXT_MAP",
"ENTITY_TYPE_TO_CONTEXT_MAP",
"DEFAULT_THRESHOLDS",
# Token Counter
"count_tokens",
"count_tokens_for_context",
"compare_context_sizes",
"benchmark_all_templates",
"format_benchmark_report",
"estimate_cost_savings",
"quick_benchmark",
"ContextSizeComparison",
"CostEstimate",
]