""" Tests for Temporal Intent Extraction Module Tests the TemporalConstraintExtractor and TemporalIntentExtractorModule classes which enable fast LLM-free extraction of temporal constraints from queries. """ import pytest from .temporal_intent import ( TemporalConstraint, TemporalConstraintExtractor, TemporalIntentExtractorModule, get_temporal_extractor, ) class TestTemporalConstraintExtractor: """Tests for TemporalConstraintExtractor class.""" @pytest.fixture def extractor(self): return TemporalConstraintExtractor() # ===== Timeline/History Queries ===== def test_timeline_dutch_geschiedenis(self, extractor): """Dutch 'geschiedenis' should trigger timeline constraint.""" constraint = extractor.extract("Wat is de geschiedenis van het Rijksmuseum?") assert constraint.constraint_type == "timeline" assert constraint.recommended_template == "institution_timeline" assert constraint.confidence >= 0.9 def test_timeline_english_history(self, extractor): """English 'history' should trigger timeline constraint.""" constraint = extractor.extract("Tell me the history of the British Museum") assert constraint.constraint_type == "timeline" assert constraint.recommended_template == "institution_timeline" def test_timeline_tijdlijn(self, extractor): """Dutch 'tijdlijn' should trigger timeline constraint.""" constraint = extractor.extract("Geef me een tijdlijn van het Noord-Hollands Archief") assert constraint.constraint_type == "timeline" def test_timeline_evolution(self, extractor): """English 'evolution' should trigger timeline constraint.""" constraint = extractor.extract("What was the evolution of this archive?") assert constraint.constraint_type == "timeline" # ===== Superlative Queries (Oldest/Newest) ===== def test_oldest_dutch_oudste(self, extractor): """Dutch 'oudste' should trigger oldest constraint.""" constraint = extractor.extract("Wat is het oudste museum in Nederland?") assert constraint.constraint_type == "oldest" assert constraint.recommended_template == "find_by_founding" assert constraint.confidence >= 0.9 def test_oldest_english(self, extractor): """English 'oldest' should trigger oldest constraint.""" constraint = extractor.extract("What is the oldest library in Amsterdam?") assert constraint.constraint_type == "oldest" def test_oldest_eerste(self, extractor): """Dutch 'eerste' (first) should trigger oldest constraint.""" constraint = extractor.extract("Welke was de eerste openbare bibliotheek?") assert constraint.constraint_type == "oldest" def test_oldest_earliest(self, extractor): """English 'earliest' should trigger oldest constraint.""" constraint = extractor.extract("What is the earliest archive in the region?") assert constraint.constraint_type == "oldest" def test_newest_dutch_nieuwste(self, extractor): """Dutch 'nieuwste' should trigger newest constraint.""" constraint = extractor.extract("Wat is het nieuwste museum?") assert constraint.constraint_type == "newest" assert constraint.recommended_template == "find_by_founding" def test_newest_english_latest(self, extractor): """English 'latest' should trigger newest constraint.""" constraint = extractor.extract("What is the latest museum to open?") assert constraint.constraint_type == "newest" def test_newest_most_recent(self, extractor): """English 'most recent' should trigger newest constraint.""" constraint = extractor.extract("What is the most recent archive established?") assert constraint.constraint_type == "newest" # ===== Change Event Keywords ===== def test_merger_dutch_fusie(self, extractor): """Dutch 'fusie' should trigger change_event constraint.""" constraint = extractor.extract("Wanneer was de fusie van het archief?") assert constraint.constraint_type == "change_event" assert constraint.reference_event == "merger" assert constraint.recommended_template == "events_in_period" def test_merger_english(self, extractor): """English 'merger' should trigger change_event constraint.""" constraint = extractor.extract("When did the merger happen?") assert constraint.constraint_type == "change_event" assert constraint.reference_event == "merger" def test_merger_merged(self, extractor): """English 'merged' should trigger change_event constraint.""" constraint = extractor.extract("Which archives merged in 2001?") assert constraint.constraint_type == "change_event" def test_founding_dutch_opgericht(self, extractor): """Dutch 'opgericht' should trigger founding constraint.""" constraint = extractor.extract("Wanneer is het Rijksmuseum opgericht?") assert constraint.constraint_type == "founding" assert constraint.recommended_template == "institution_timeline" def test_founding_english_founded(self, extractor): """English 'founded' should trigger founding constraint.""" constraint = extractor.extract("When was the library founded?") assert constraint.constraint_type == "founding" def test_founding_established(self, extractor): """English 'established' should trigger founding constraint.""" constraint = extractor.extract("When was this archive established?") assert constraint.constraint_type == "founding" def test_closure_dutch_gesloten(self, extractor): """Dutch 'gesloten' should trigger closure constraint.""" constraint = extractor.extract("Wanneer is het museum gesloten?") assert constraint.constraint_type == "closure" assert constraint.recommended_template == "institution_timeline" def test_closure_english_closed(self, extractor): """English 'closed' should trigger closure constraint.""" # Note: "close" (verb form) vs "closed" (past participle) # The extractor only has "closed" in CLOSURE_KEYWORDS constraint = extractor.extract("When was the archive closed?") assert constraint.constraint_type == "closure" def test_closure_dissolved(self, extractor): """English 'dissolved' should trigger closure constraint.""" constraint = extractor.extract("When was the organization dissolved?") assert constraint.constraint_type == "closure" # ===== Year Extraction ===== def test_single_year_point_in_time(self, extractor): """Single year should trigger point_in_time constraint.""" constraint = extractor.extract("Wat was de status van het museum in 1990?") assert constraint.constraint_type == "point_in_time" assert constraint.date_start == "1990-01-01" assert constraint.date_end == "1990-12-31" assert constraint.recommended_template == "point_in_time_state" def test_two_years_between(self, extractor): """Two years should trigger between constraint.""" constraint = extractor.extract("Welke veranderingen waren er tussen 1990 en 2000?") assert constraint.constraint_type == "between" assert constraint.date_start == "1990-01-01" assert constraint.date_end == "2000-12-31" assert constraint.recommended_template == "events_in_period" def test_three_years_uses_first_and_last(self, extractor): """Three years should use first and last for range.""" constraint = extractor.extract("Musea in 1950, 1975 en 2000") assert constraint.constraint_type == "between" assert constraint.date_start == "1950-01-01" assert constraint.date_end == "2000-12-31" def test_year_with_before_dutch(self, extractor): """Year with Dutch 'voor' should trigger before constraint.""" constraint = extractor.extract("Welke archieven bestonden voor 1950?") assert constraint.constraint_type == "before" assert constraint.date_end == "1950-01-01" assert constraint.recommended_template == "point_in_time_state" def test_year_with_before_english(self, extractor): """Year with English 'before' should trigger before constraint.""" constraint = extractor.extract("Which museums existed before 1900?") assert constraint.constraint_type == "before" assert constraint.date_end == "1900-01-01" def test_year_with_after_dutch(self, extractor): """Year with Dutch 'na' should trigger after constraint. Note: More specific keywords (like 'opgericht') take precedence. We use a neutral query without founding/closure keywords. """ constraint = extractor.extract("Welke veranderingen waren er na 1980?") assert constraint.constraint_type == "after" assert constraint.date_start == "1980-12-31" assert constraint.recommended_template == "point_in_time_state" def test_year_with_after_english(self, extractor): """Year with English 'after' should trigger after constraint.""" constraint = extractor.extract("What happened after 2010?") assert constraint.constraint_type == "after" assert constraint.date_start == "2010-12-31" def test_year_with_since(self, extractor): """'Since' should trigger after constraint.""" constraint = extractor.extract("Museums opened since 2000") assert constraint.constraint_type == "after" assert constraint.date_start == "2000-12-31" # ===== Year Extraction Edge Cases ===== def test_year_1800s(self, extractor): """Should extract years from 1800s.""" constraint = extractor.extract("Archieven uit 1856") assert constraint.constraint_type == "point_in_time" assert "1856" in constraint.date_start def test_year_2020s(self, extractor): """Should extract years from 2020s.""" constraint = extractor.extract("Nieuwe musea in 2023") assert constraint.constraint_type == "point_in_time" assert "2023" in constraint.date_start def test_ignore_numbers_that_are_not_years(self, extractor): """Should not extract non-year numbers as years.""" # Numbers like 500 or 50 should not be treated as years constraint = extractor.extract("Het museum heeft 500 werken in de collectie") assert constraint.constraint_type == "none" # ===== No Temporal Constraint ===== def test_no_constraint_simple_query(self, extractor): """Query without temporal indicators should return none.""" constraint = extractor.extract("Welke musea zijn er in Amsterdam?") assert constraint.constraint_type == "none" assert constraint.recommended_template is None def test_no_constraint_descriptive_query(self, extractor): """Descriptive query should return none.""" constraint = extractor.extract("Vertel me over de collectie van het Rijksmuseum") assert constraint.constraint_type == "none" # ===== Word Boundary Matching ===== def test_na_in_nationaal_not_matched(self, extractor): """'na' inside 'nationaal' should NOT trigger after constraint.""" constraint = extractor.extract("Nationaal Archief in Den Haag") # 'nationaal' contains 'na' but it's not a word boundary assert constraint.constraint_type == "none" def test_na_as_word_is_matched(self, extractor): """'na' as standalone word SHOULD trigger after constraint.""" constraint = extractor.extract("Na de renovatie in 1995 werd het museum heropend") assert constraint.constraint_type == "after" assert "1995" in constraint.date_start def test_voor_in_voorwerpen_not_matched(self, extractor): """'voor' inside 'voorwerpen' should NOT trigger before.""" constraint = extractor.extract("De collectie bevat voorwerpen uit de 18e eeuw") # No explicit year, so should be none assert constraint.constraint_type == "none" def test_voor_as_word_is_matched(self, extractor): """'voor' as standalone word SHOULD trigger before constraint.""" constraint = extractor.extract("Archieven van voor 1900") assert constraint.constraint_type == "before" assert "1900" in constraint.date_end # ===== Template Mapping ===== def test_template_mapping_point_in_time(self, extractor): """point_in_time should map to point_in_time_state template.""" constraint = extractor.extract("Status in 1990") template = extractor.get_template_for_constraint(constraint) assert template == "point_in_time_state" def test_template_mapping_between(self, extractor): """between should map to events_in_period template.""" constraint = extractor.extract("Veranderingen tussen 1990 en 2000") template = extractor.get_template_for_constraint(constraint) assert template == "events_in_period" def test_template_mapping_oldest(self, extractor): """oldest should map to find_by_founding template.""" constraint = extractor.extract("Het oudste museum") template = extractor.get_template_for_constraint(constraint) assert template == "find_by_founding" def test_template_mapping_timeline(self, extractor): """timeline should map to institution_timeline template.""" constraint = extractor.extract("Geschiedenis van het archief") template = extractor.get_template_for_constraint(constraint) assert template == "institution_timeline" def test_template_mapping_none(self, extractor): """none constraint should return None template.""" constraint = extractor.extract("Welke musea zijn er?") template = extractor.get_template_for_constraint(constraint) assert template is None # ===== Confidence Scoring ===== def test_high_confidence_timeline(self, extractor): """Timeline queries should have high confidence.""" constraint = extractor.extract("Geschiedenis van het Rijksmuseum") assert constraint.confidence >= 0.9 def test_high_confidence_superlative(self, extractor): """Superlative queries should have high confidence.""" constraint = extractor.extract("Het oudste archief") assert constraint.confidence >= 0.9 def test_moderate_confidence_year_only(self, extractor): """Year-only queries should have moderate confidence.""" constraint = extractor.extract("Musea in 1990") assert 0.7 <= constraint.confidence <= 0.9 def test_lower_confidence_no_constraint(self, extractor): """No-constraint queries should have lower confidence.""" constraint = extractor.extract("Algemene informatie over erfgoed") assert constraint.confidence <= 0.75 class TestTemporalConstraintDataclass: """Tests for TemporalConstraint dataclass.""" def test_default_values(self): """Test default values of TemporalConstraint.""" constraint = TemporalConstraint(constraint_type="none") assert constraint.date_start is None assert constraint.date_end is None assert constraint.reference_event is None assert constraint.confidence == 0.8 assert constraint.recommended_template is None def test_full_constraint(self): """Test TemporalConstraint with all fields.""" constraint = TemporalConstraint( constraint_type="between", date_start="1990-01-01", date_end="2000-12-31", reference_event=None, confidence=0.95, recommended_template="events_in_period" ) assert constraint.constraint_type == "between" assert constraint.date_start == "1990-01-01" assert constraint.date_end == "2000-12-31" assert constraint.confidence == 0.95 assert constraint.recommended_template == "events_in_period" class TestTemporalIntentExtractorModule: """Tests for the DSPy module (without actual LLM calls).""" def test_module_initialization(self): """Test module initializes correctly.""" module = TemporalIntentExtractorModule(confidence_threshold=0.75) assert module.confidence_threshold == 0.75 assert module.fast_extractor is not None def test_high_confidence_uses_fast_extraction(self): """High confidence queries should use fast extraction, not LLM.""" module = TemporalIntentExtractorModule(confidence_threshold=0.75) # This query has high confidence (timeline keyword) constraint = module.forward("Geschiedenis van het Rijksmuseum") # Should use fast extraction result assert constraint.constraint_type == "timeline" assert constraint.confidence >= 0.75 class TestSingletonInstance: """Tests for singleton pattern.""" def test_get_temporal_extractor_singleton(self): """get_temporal_extractor should return same instance.""" ext1 = get_temporal_extractor() ext2 = get_temporal_extractor() assert ext1 is ext2 def test_singleton_is_temporal_constraint_extractor(self): """Singleton should be TemporalConstraintExtractor instance.""" ext = get_temporal_extractor() assert isinstance(ext, TemporalConstraintExtractor) class TestIntegration: """Integration tests for full temporal extraction flow.""" def test_dutch_point_in_time_full_flow(self): """Test complete flow for Dutch point-in-time query.""" extractor = get_temporal_extractor() constraint = extractor.extract( "Wat was de status van het Rijksmuseum in 1990?" ) assert constraint.constraint_type == "point_in_time" assert constraint.date_start == "1990-01-01" assert constraint.date_end == "1990-12-31" assert constraint.recommended_template == "point_in_time_state" def test_english_timeline_full_flow(self): """Test complete flow for English timeline query.""" extractor = get_temporal_extractor() constraint = extractor.extract( "What is the history of the British Museum?" ) assert constraint.constraint_type == "timeline" assert constraint.recommended_template == "institution_timeline" def test_date_range_full_flow(self): """Test complete flow for date range query.""" extractor = get_temporal_extractor() constraint = extractor.extract( "Welke fusies vonden plaats tussen 1990 en 2010?" ) # Should detect "fusie" (merger) keyword first # But since there are two years, it should be change_event or between # Merger keywords take precedence assert constraint.constraint_type == "change_event" assert constraint.reference_event == "merger" def test_superlative_with_location(self): """Test superlative query with location.""" extractor = get_temporal_extractor() constraint = extractor.extract( "Wat is het oudste archief in Noord-Holland?" ) assert constraint.constraint_type == "oldest" assert constraint.recommended_template == "find_by_founding" def test_complex_query_multiple_indicators(self): """Test query with multiple temporal indicators.""" extractor = get_temporal_extractor() # "geschiedenis" (timeline) and "oudste" (oldest) - timeline wins (checked first) constraint = extractor.extract( "Vertel me de geschiedenis van de oudste bibliotheek" ) assert constraint.constraint_type == "timeline" def test_query_templates_for_sparql(self): """Test that all temporal constraints map to valid templates.""" extractor = get_temporal_extractor() test_cases = [ ("Geschiedenis van het archief", "institution_timeline"), ("Het oudste museum", "find_by_founding"), ("Het nieuwste archief", "find_by_founding"), ("Status in 1990", "point_in_time_state"), ("Voor 1950", "point_in_time_state"), # Year + before ("Na 2000", "point_in_time_state"), # Year + after ("Fusies in de regio", "events_in_period"), ("Wanneer opgericht", "institution_timeline"), ("Wanneer gesloten", "institution_timeline"), ] for query, expected_template in test_cases: constraint = extractor.extract(query) # Some queries may not extract years, check if template matches expectation if constraint.constraint_type != "none": assert constraint.recommended_template == expected_template, ( f"Query '{query}' expected template '{expected_template}', " f"got '{constraint.recommended_template}' " f"(constraint_type: {constraint.constraint_type})" ) class TestRealWorldQueries: """Tests with real-world heritage queries.""" @pytest.fixture def extractor(self): return get_temporal_extractor() def test_noord_hollands_archief_history(self, extractor): """Real query about Noord-Hollands Archief history.""" constraint = extractor.extract( "Wat is de geschiedenis van het Noord-Hollands Archief sinds de fusie in 2001?" ) # "geschiedenis" (timeline) is checked before merger/year assert constraint.constraint_type == "timeline" def test_museum_founding_date(self, extractor): """Real query about museum founding.""" constraint = extractor.extract( "Wanneer is het Rijksmuseum in Amsterdam opgericht?" ) assert constraint.constraint_type == "founding" def test_archives_before_ww2(self, extractor): """Query about archives before WWII.""" constraint = extractor.extract( "Welke gemeentearchieven bestonden voor 1940?" ) assert constraint.constraint_type == "before" assert "1940" in constraint.date_end def test_oldest_university_library(self, extractor): """Query about oldest university library.""" constraint = extractor.extract( "Wat is de oudste universiteitsbibliotheek van Nederland?" ) assert constraint.constraint_type == "oldest" def test_museum_closures_pandemic(self, extractor): """Query about closures during pandemic.""" constraint = extractor.extract( "Welke musea zijn gesloten tijdens de pandemie in 2020?" ) # "gesloten" (closure) keyword assert constraint.constraint_type == "closure" def test_digital_archives_recent(self, extractor): """Query about recent digital archives.""" constraint = extractor.extract( "Welke digitale archieven zijn na 2015 gelanceerd?" ) assert constraint.constraint_type == "after" assert "2015" in constraint.date_start # Run with: pytest backend/rag/test_temporal_intent.py -v