- Updated documentation to clarify integration points with existing components in the RAG pipeline and DSPy framework. - Added detailed mapping of SPARQL templates to context templates for improved specificity filtering. - Implemented wrapper patterns around existing classifiers to extend functionality without duplication. - Introduced new tests for the SpecificityAwareClassifier and SPARQLToContextMapper to ensure proper integration and functionality. - Enhanced the CustodianRDFConverter to include ISO country and subregion codes from GHCID for better geospatial data handling.
140 lines
4.3 KiB
Python
140 lines
4.3 KiB
Python
"""
|
|
Specificity score system for Heritage Custodian RAG.
|
|
|
|
This module provides specificity-aware filtering of LinkML schema classes
|
|
based on query relevance. It integrates with the existing TemplateClassifier
|
|
from template_sparql.py to provide filtered class lists for RAG retrieval.
|
|
|
|
Architecture (Option C - Hybrid):
|
|
```
|
|
User Question
|
|
↓
|
|
┌─────────────────────────────────────┐
|
|
│ EXISTING: TemplateClassifier │ ← backend/rag/template_sparql.py:1104
|
|
│ (Classifies to SPARQL template ID) │
|
|
└─────────────────────────────────────┘
|
|
↓ sparql_template_id + slots
|
|
┌─────────────────────────────────────┐
|
|
│ NEW: SPARQLToContextMapper │ ← mapper.py
|
|
│ (Maps SPARQL → Context templates) │
|
|
└─────────────────────────────────────┘
|
|
↓ context_template_id
|
|
┌─────────────────────────────────────┐
|
|
│ NEW: SpecificityLookup │ ← lookup.py
|
|
│ (Filters classes by score threshold)│
|
|
└─────────────────────────────────────┘
|
|
↓ filtered_classes + scores
|
|
┌─────────────────────────────────────┐
|
|
│ RAG Pipeline / UML Visualization │
|
|
└─────────────────────────────────────┘
|
|
```
|
|
|
|
Usage:
|
|
from backend.rag.specificity import (
|
|
SpecificityAwareClassifier,
|
|
get_specificity_aware_classifier,
|
|
ContextTemplate,
|
|
)
|
|
|
|
# Get singleton classifier
|
|
classifier = get_specificity_aware_classifier()
|
|
|
|
# Classify with specificity scoring
|
|
result = classifier.classify_with_scores(
|
|
question="Welke archieven zijn er in Amsterdam?",
|
|
threshold=0.6
|
|
)
|
|
|
|
print(result.context_template) # ContextTemplate.ARCHIVE_SEARCH
|
|
print(result.filtered_classes) # ['Archive', 'CustodianObservation', ...]
|
|
|
|
See also:
|
|
- docs/plan/specificity_score/ - Planning documentation
|
|
- .opencode/rules/specificity-score-convention.md - Annotation rules
|
|
"""
|
|
|
|
from .models import (
|
|
ContextTemplate,
|
|
SpecificityScore,
|
|
ClassificationResult,
|
|
ClassificationWithScores,
|
|
INSTITUTION_TYPE_TO_CONTEXT,
|
|
)
|
|
|
|
from .mapper import (
|
|
SPARQLToContextMapper,
|
|
get_sparql_to_context_mapper,
|
|
SPARQL_TO_CONTEXT_MAP,
|
|
)
|
|
|
|
from .lookup import (
|
|
SpecificityLookup,
|
|
get_specificity_lookup,
|
|
get_classes_for_template_cached,
|
|
)
|
|
|
|
from .classifier import (
|
|
SpecificityAwareClassifier,
|
|
get_specificity_aware_classifier,
|
|
)
|
|
|
|
from .context_selector import (
|
|
DynamicContextSelector,
|
|
ContextSelectionResult,
|
|
get_dynamic_context_selector,
|
|
select_context_for_query,
|
|
INTENT_TO_CONTEXT_MAP,
|
|
ENTITY_TYPE_TO_CONTEXT_MAP,
|
|
DEFAULT_THRESHOLDS,
|
|
)
|
|
|
|
from .token_counter import (
|
|
count_tokens,
|
|
count_tokens_for_context,
|
|
compare_context_sizes,
|
|
benchmark_all_templates,
|
|
format_benchmark_report,
|
|
estimate_cost_savings,
|
|
quick_benchmark,
|
|
ContextSizeComparison,
|
|
CostEstimate,
|
|
)
|
|
|
|
|
|
__all__ = [
|
|
# Models
|
|
"ContextTemplate",
|
|
"SpecificityScore",
|
|
"ClassificationResult",
|
|
"ClassificationWithScores",
|
|
"INSTITUTION_TYPE_TO_CONTEXT",
|
|
# Mapper
|
|
"SPARQLToContextMapper",
|
|
"get_sparql_to_context_mapper",
|
|
"SPARQL_TO_CONTEXT_MAP",
|
|
# Lookup
|
|
"SpecificityLookup",
|
|
"get_specificity_lookup",
|
|
"get_classes_for_template_cached",
|
|
# Classifier
|
|
"SpecificityAwareClassifier",
|
|
"get_specificity_aware_classifier",
|
|
# Context Selector
|
|
"DynamicContextSelector",
|
|
"ContextSelectionResult",
|
|
"get_dynamic_context_selector",
|
|
"select_context_for_query",
|
|
"INTENT_TO_CONTEXT_MAP",
|
|
"ENTITY_TYPE_TO_CONTEXT_MAP",
|
|
"DEFAULT_THRESHOLDS",
|
|
# Token Counter
|
|
"count_tokens",
|
|
"count_tokens_for_context",
|
|
"compare_context_sizes",
|
|
"benchmark_all_templates",
|
|
"format_benchmark_report",
|
|
"estimate_cost_savings",
|
|
"quick_benchmark",
|
|
"ContextSizeComparison",
|
|
"CostEstimate",
|
|
]
|