# Design Patterns for Template-Based SPARQL ## Overview This document describes the software design patterns used in the template-based SPARQL query generation system. The patterns are chosen to maximize: - **Extensibility** - Easy to add new templates - **Testability** - Each component can be tested in isolation - **Maintainability** - Clear separation of concerns - **DSPy Compatibility** - Integration with existing DSPy pipeline ## Pattern Summary | Pattern | Component | Purpose | |---------|-----------|---------| | Strategy | TemplateRouter | Select query strategy based on intent | | Template Method | BaseTemplate | Define template instantiation skeleton | | Chain of Responsibility | QueryPipeline | Process query through handlers | | Factory | TemplateFactory | Create template instances | | Registry | TemplateRegistry | Register and lookup templates | | Builder | SPARQLBuilder | Construct complex SPARQL queries | ## 1. Strategy Pattern - Template Router The Strategy pattern allows selecting different query generation strategies at runtime based on question intent. ### Class Diagram ``` +-------------------+ | QueryStrategy | | (Protocol) | +-------------------+ | + can_handle() | | + generate_query()| +-------------------+ ^ | +-------------------+-------------------+ | | | +---------------+ +---------------+ +---------------+ | Template | | LLMFallback | | Hybrid | | Strategy | | Strategy | | Strategy | +---------------+ +---------------+ +---------------+ | + template_id | | + lm_client | | + template_st | | + slots | | + schema_ctx | | + llm_fallback| +---------------+ +---------------+ +---------------+ ``` ### Implementation ```python from typing import Protocol, Optional from dataclasses import dataclass class QueryStrategy(Protocol): """Protocol for query generation strategies.""" def can_handle(self, intent: QueryIntent) -> bool: """Check if this strategy can handle the intent.""" ... def generate_query(self, intent: QueryIntent, slots: dict[str, str]) -> str: """Generate SPARQL query from intent and slots.""" ... @dataclass class TemplateStrategy: """Generate queries from predefined templates.""" template_registry: "TemplateRegistry" slot_filler: "SlotFiller" def can_handle(self, intent: QueryIntent) -> bool: return intent.template_id in self.template_registry def generate_query(self, intent: QueryIntent, slots: dict[str, str]) -> str: template = self.template_registry.get(intent.template_id) filled_slots = self.slot_filler.fill(template.slots, slots) return template.instantiate(filled_slots) @dataclass class LLMFallbackStrategy: """Fall back to LLM generation when no template matches.""" lm_client: dspy.LM schema_context: str def can_handle(self, intent: QueryIntent) -> bool: return True # Always available as fallback def generate_query(self, intent: QueryIntent, slots: dict[str, str]) -> str: # Use existing DSPy signature for SPARQL generation generator = dspy.ChainOfThought(GenerateSPARQL) return generator( question=intent.original_question, schema_context=self.schema_context ).sparql_query class QueryRouter: """Route queries to appropriate strategy.""" def __init__(self, strategies: list[QueryStrategy]): self.strategies = strategies def route(self, intent: QueryIntent, slots: dict[str, str]) -> str: for strategy in self.strategies: if strategy.can_handle(intent): return strategy.generate_query(intent, slots) raise ValueError(f"No strategy can handle intent: {intent}") ``` ### Usage ```python # Configure strategies in priority order router = QueryRouter([ TemplateStrategy(registry, slot_filler), # Try templates first LLMFallbackStrategy(lm, schema_ctx), # Fall back to LLM ]) # Route query sparql = router.route(intent, slots) ``` ## 2. Template Method Pattern - Base Template The Template Method pattern defines the skeleton of template instantiation, allowing subclasses to override specific steps. ### Class Diagram ``` +---------------------------+ | BaseTemplate | +---------------------------+ | # validate_slots() | <- Hook method | # pre_process() | <- Hook method | # fill_template() | <- Abstract method | # post_process() | <- Hook method | + instantiate() | <- Template method +---------------------------+ ^ | +-------+-------+ | | +---------------+ +------------------+ | SimpleTemplate| | CompositeTemplate| +---------------+ +------------------+ | + sparql_text | | + sub_templates | +---------------+ +------------------+ ``` ### Implementation ```python from abc import ABC, abstractmethod from typing import Optional class BaseTemplate(ABC): """Base class for SPARQL templates using Template Method pattern.""" def __init__( self, template_id: str, description: str, slots: dict[str, SlotDefinition], ): self.template_id = template_id self.description = description self.slots = slots def instantiate(self, slot_values: dict[str, str]) -> str: """Template method - defines the algorithm skeleton.""" # Step 1: Validate slots self._validate_slots(slot_values) # Step 2: Pre-process slot values processed_values = self._pre_process(slot_values) # Step 3: Fill template (implemented by subclass) query = self._fill_template(processed_values) # Step 4: Post-process query return self._post_process(query) def _validate_slots(self, slot_values: dict[str, str]) -> None: """Hook method - can be overridden.""" for slot_name, definition in self.slots.items(): if definition.required and slot_name not in slot_values: raise ValueError(f"Missing required slot: {slot_name}") if slot_name in slot_values and definition.valid_values: if slot_values[slot_name] not in definition.valid_values: raise ValueError( f"Invalid value for {slot_name}: {slot_values[slot_name]}" ) def _pre_process(self, slot_values: dict[str, str]) -> dict[str, str]: """Hook method - can be overridden.""" return slot_values @abstractmethod def _fill_template(self, slot_values: dict[str, str]) -> str: """Abstract method - must be implemented by subclass.""" ... def _post_process(self, query: str) -> str: """Hook method - can be overridden.""" # Default: clean up whitespace import re return re.sub(r'\n\s*\n', '\n', query.strip()) class SimpleTemplate(BaseTemplate): """Template with a single SPARQL string.""" def __init__( self, template_id: str, description: str, slots: dict[str, SlotDefinition], sparql_template: str, ): super().__init__(template_id, description, slots) self.sparql_template = sparql_template def _fill_template(self, slot_values: dict[str, str]) -> str: query = self.sparql_template for slot_name, value in slot_values.items(): query = query.replace(f"{{{{{slot_name}}}}}", value) return query class CompositeTemplate(BaseTemplate): """Template composed of multiple sub-templates.""" def __init__( self, template_id: str, description: str, slots: dict[str, SlotDefinition], sub_templates: list[BaseTemplate], join_type: str = "AND", # AND, UNION, OPTIONAL ): super().__init__(template_id, description, slots) self.sub_templates = sub_templates self.join_type = join_type def _fill_template(self, slot_values: dict[str, str]) -> str: # Fill each sub-template clauses = [] for sub in self.sub_templates: relevant_slots = { k: v for k, v in slot_values.items() if k in sub.slots } if relevant_slots: clauses.append(sub._fill_template(relevant_slots)) # Join clauses based on join type if self.join_type == "AND": return "\n".join(clauses) elif self.join_type == "UNION": return " UNION ".join(f"{{ {c} }}" for c in clauses) elif self.join_type == "OPTIONAL": return clauses[0] + "\n" + "\n".join( f"OPTIONAL {{ {c} }}" for c in clauses[1:] ) return "\n".join(clauses) ``` ## 3. Chain of Responsibility - Query Pipeline The Chain of Responsibility pattern allows processing queries through a series of handlers. ### Class Diagram ``` +-------------------+ | QueryHandler | | (Protocol) | +-------------------+ | + handle() | | + set_next() | +-------------------+ ^ | +-------+-------+-------+-------+ | | | | | +-------+ +-------+ +-------+ +-------+ |Intent | |Slot | |Template| |Validate| |Classif| |Extract| |Fill | |Query | +-------+ +-------+ +-------+ +-------+ ``` ### Implementation ```python from typing import Optional, Any class QueryHandler(Protocol): """Protocol for query processing handlers.""" def handle(self, context: QueryContext) -> QueryContext: """Process query context and pass to next handler.""" ... def set_next(self, handler: "QueryHandler") -> "QueryHandler": """Set the next handler in the chain.""" ... @dataclass class QueryContext: """Context passed through the handler chain.""" original_question: str language: str = "nl" intent: Optional[QueryIntent] = None slots: dict[str, str] = field(default_factory=dict) sparql_query: Optional[str] = None results: Optional[list[dict]] = None errors: list[str] = field(default_factory=list) class BaseHandler: """Base class for handlers.""" _next: Optional["BaseHandler"] = None def set_next(self, handler: "BaseHandler") -> "BaseHandler": self._next = handler return handler def handle(self, context: QueryContext) -> QueryContext: context = self._process(context) if self._next and not context.errors: return self._next.handle(context) return context def _process(self, context: QueryContext) -> QueryContext: """Override in subclass.""" return context class IntentClassificationHandler(BaseHandler): """Classify question intent.""" def __init__(self, classifier: "IntentClassifier"): self.classifier = classifier def _process(self, context: QueryContext) -> QueryContext: try: context.intent = self.classifier.classify( context.original_question, context.language ) except Exception as e: context.errors.append(f"Intent classification failed: {e}") return context class SlotExtractionHandler(BaseHandler): """Extract slot values from question.""" def __init__(self, extractor: "SlotExtractor"): self.extractor = extractor def _process(self, context: QueryContext) -> QueryContext: if not context.intent: return context try: context.slots = self.extractor.extract( context.original_question, context.intent ) except Exception as e: context.errors.append(f"Slot extraction failed: {e}") return context class TemplateFillingHandler(BaseHandler): """Fill template with slot values.""" def __init__(self, registry: "TemplateRegistry"): self.registry = registry def _process(self, context: QueryContext) -> QueryContext: if not context.intent or not context.slots: return context try: template = self.registry.get(context.intent.template_id) context.sparql_query = template.instantiate(context.slots) except Exception as e: context.errors.append(f"Template filling failed: {e}") return context class QueryValidationHandler(BaseHandler): """Validate generated SPARQL query.""" def __init__(self, linter: "SPARQLLinter"): self.linter = linter def _process(self, context: QueryContext) -> QueryContext: if not context.sparql_query: return context try: corrected, result = self.linter.lint_and_correct(context.sparql_query) context.sparql_query = corrected if not result.valid: context.errors.extend( f"SPARQL error: {issue.message}" for issue in result.issues ) except Exception as e: context.errors.append(f"Query validation failed: {e}") return context ``` ### Usage ```python # Build the handler chain intent_handler = IntentClassificationHandler(classifier) slot_handler = SlotExtractionHandler(extractor) template_handler = TemplateFillingHandler(registry) validate_handler = QueryValidationHandler(linter) # Chain handlers intent_handler.set_next(slot_handler).set_next(template_handler).set_next(validate_handler) # Process query context = QueryContext(original_question="Welke archieven zijn er in Drenthe?") result = intent_handler.handle(context) if result.errors: print(f"Errors: {result.errors}") else: print(f"SPARQL: {result.sparql_query}") ``` ## 4. Factory Pattern - Template Factory The Factory pattern creates template instances from configuration. ### Implementation ```python from enum import Enum from typing import Type class TemplateType(Enum): SIMPLE = "simple" COMPOSITE = "composite" AGGREGATION = "aggregation" class TemplateFactory: """Factory for creating template instances.""" _creators: dict[TemplateType, Type[BaseTemplate]] = { TemplateType.SIMPLE: SimpleTemplate, TemplateType.COMPOSITE: CompositeTemplate, } @classmethod def register(cls, template_type: TemplateType, creator: Type[BaseTemplate]): """Register a new template type.""" cls._creators[template_type] = creator @classmethod def create(cls, config: dict) -> BaseTemplate: """Create template from configuration dictionary.""" template_type = TemplateType(config.get("type", "simple")) creator = cls._creators.get(template_type) if not creator: raise ValueError(f"Unknown template type: {template_type}") # Parse slot definitions slots = {} for slot_name, slot_config in config.get("slots", {}).items(): slots[slot_name] = SlotDefinition( name=slot_name, required=slot_config.get("required", True), valid_values=slot_config.get("valid_values"), source=slot_config.get("source"), ) return creator( template_id=config["id"], description=config.get("description", ""), slots=slots, sparql_template=config.get("sparql_template", ""), ) ``` ## 5. Registry Pattern - Template Registry The Registry pattern provides a central location for template lookup. ### Implementation ```python from pathlib import Path import yaml class TemplateRegistry: """Registry for SPARQL templates.""" _templates: dict[str, BaseTemplate] = {} _patterns: dict[str, list[str]] = {} # template_id -> question patterns @classmethod def register(cls, template: BaseTemplate, patterns: list[str]): """Register a template with its question patterns.""" cls._templates[template.template_id] = template cls._patterns[template.template_id] = patterns @classmethod def get(cls, template_id: str) -> BaseTemplate: """Get template by ID.""" if template_id not in cls._templates: raise KeyError(f"Template not found: {template_id}") return cls._templates[template_id] @classmethod def find_by_pattern(cls, question: str) -> Optional[str]: """Find template ID matching question pattern.""" import re for template_id, patterns in cls._patterns.items(): for pattern in patterns: if re.match(pattern, question, re.IGNORECASE): return template_id return None @classmethod def load_from_yaml(cls, yaml_path: Path): """Load templates from YAML configuration.""" with open(yaml_path) as f: config = yaml.safe_load(f) for template_config in config.get("templates", []): template = TemplateFactory.create(template_config) patterns = template_config.get("question_patterns", []) cls.register(template, patterns) @classmethod def list_all(cls) -> list[str]: """List all registered template IDs.""" return list(cls._templates.keys()) ``` ## 6. Builder Pattern - SPARQL Builder The Builder pattern constructs complex SPARQL queries step by step. ### Implementation ```python class SPARQLBuilder: """Builder for constructing SPARQL queries.""" def __init__(self): self._prefixes: list[str] = [] self._select_vars: list[str] = [] self._where_clauses: list[str] = [] self._filters: list[str] = [] self._group_by: Optional[str] = None self._order_by: Optional[str] = None self._limit: Optional[int] = None def add_prefix(self, prefix: str, uri: str) -> "SPARQLBuilder": self._prefixes.append(f"PREFIX {prefix}: <{uri}>") return self def add_select(self, *vars: str) -> "SPARQLBuilder": self._select_vars.extend(vars) return self def add_triple( self, subject: str, predicate: str, obj: str ) -> "SPARQLBuilder": self._where_clauses.append(f"{subject} {predicate} {obj} .") return self def add_filter(self, condition: str) -> "SPARQLBuilder": self._filters.append(f"FILTER({condition})") return self def add_optional(self, clause: str) -> "SPARQLBuilder": self._where_clauses.append(f"OPTIONAL {{ {clause} }}") return self def group_by(self, var: str) -> "SPARQLBuilder": self._group_by = var return self def order_by(self, var: str, desc: bool = False) -> "SPARQLBuilder": self._order_by = f"DESC({var})" if desc else var return self def limit(self, n: int) -> "SPARQLBuilder": self._limit = n return self def build(self) -> str: """Build the final SPARQL query.""" parts = [] # Prefixes if self._prefixes: parts.append("\n".join(self._prefixes)) # SELECT select_clause = "SELECT " + " ".join( f"?{v}" if not v.startswith("?") else v for v in self._select_vars ) parts.append(select_clause) # WHERE where_content = "\n ".join(self._where_clauses + self._filters) parts.append(f"WHERE {{\n {where_content}\n}}") # GROUP BY if self._group_by: parts.append(f"GROUP BY ?{self._group_by}") # ORDER BY if self._order_by: parts.append(f"ORDER BY {self._order_by}") # LIMIT if self._limit: parts.append(f"LIMIT {self._limit}") return "\n".join(parts) ``` ### Usage ```python query = ( SPARQLBuilder() .add_prefix("hc", "https://nde.nl/ontology/hc/class/") .add_prefix("hcp", "https://nde.nl/ontology/hc/") .add_prefix("skos", "http://www.w3.org/2004/02/skos/core#") .add_select("institution", "name") .add_triple("?institution", "a", "hc:Custodian") .add_triple("?institution", "hcp:institutionType", '"A"') .add_triple("?institution", "skos:prefLabel", "?name") .add_filter('CONTAINS(STR(?institution), "NL-DR")') .order_by("name") .limit(100) .build() ) ``` ## Integration with Existing Code ### dspy_heritage_rag.py Integration Points ```python # Add template-based query generation before LLM generation class HeritageRAGWithTemplates(HeritageRAG): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Initialize template components self.template_registry = TemplateRegistry() self.template_registry.load_from_yaml( Path("data/templates/sparql_templates.yaml") ) self.intent_classifier = IntentClassifier() self.slot_extractor = SlotExtractor() # Build handler chain self.query_pipeline = self._build_pipeline() def _build_pipeline(self) -> IntentClassificationHandler: intent = IntentClassificationHandler(self.intent_classifier) slot = SlotExtractionHandler(self.slot_extractor) template = TemplateFillingHandler(self.template_registry) validate = QueryValidationHandler(self.sparql_linter) intent.set_next(slot).set_next(template).set_next(validate) return intent async def generate_sparql(self, question: str) -> str: # Try template-based generation first context = QueryContext(original_question=question) result = self.query_pipeline.handle(context) if result.sparql_query and not result.errors: return result.sparql_query # Fall back to LLM generation return await super().generate_sparql(question) ``` ## Testing Strategy Each pattern component should be tested independently: ```python # test_template_method.py def test_simple_template_instantiation(): template = SimpleTemplate( template_id="test", description="Test template", slots={"province_code": SlotDefinition(name="province_code")}, sparql_template="FILTER(CONTAINS(STR(?s), '{{province_code}}'))" ) result = template.instantiate({"province_code": "NL-DR"}) assert "NL-DR" in result # test_chain_of_responsibility.py def test_handler_chain(): context = QueryContext(original_question="Welke archieven zijn er in Drenthe?") result = intent_handler.handle(context) assert result.intent is not None assert result.sparql_query is not None ``` ## References - Gamma, E., et al. (1994). Design Patterns: Elements of Reusable Object-Oriented Software. - Martin, R. C. (2017). Clean Architecture. - DSPy Documentation: https://dspy-docs.vercel.app/