""" Unit tests for GHCID collision detection and resolution. This module tests the GHCIDCollisionDetector class that implements temporal collision resolution for Global Heritage Custodian Identifiers (GHCIDs). Test Coverage: - First batch collision resolution (all get Q-numbers) - Historical addition collision resolution (only new gets Q-number) - Q-number assignment (Wikidata preferred, synthetic fallback) - GHCID history tracking with temporal validity - PID stability guarantees (published GHCIDs never modified) References: - Implementation: src/glam_extractor/identifiers/collision_detector.py - Specification: docs/PERSISTENT_IDENTIFIERS.md - Algorithm: docs/plan/global_glam/07-ghcid-collision-resolution.md """ import pytest from datetime import datetime, timezone from glam_extractor.identifiers.collision_detector import ( GHCIDCollisionDetector, CollisionGroup ) from glam_extractor.models import HeritageCustodian, Identifier, Provenance, Location class TestFirstBatchCollision: """ Test first batch collision scenario. Scenario: Multiple institutions discovered simultaneously (same extraction_date) generate identical base GHCIDs. Expected Behavior: - ALL institutions receive Q-number suffixes (fair treatment) - No temporal precedence since all extracted on same date """ def test_two_institutions_same_base_ghcid_same_date(self): """ Two institutions with same base GHCID extracted on same date. Expected: Both get Q-numbers appended. """ # Arrange extraction_date = datetime(2025, 11, 1, 10, 0, 0, tzinfo=timezone.utc) stedelijk = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/stedelijk-ams", name="Stedelijk Museum Amsterdam", ghcid="NL-NH-AMS-M-SM", ghcid_numeric=123456789012, institution_type="MUSEUM", identifiers=[ Identifier( identifier_scheme="Wikidata", identifier_value="Q621531" ) ], locations=[ Location( city="Amsterdam", country="NL" ) ], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=extraction_date, extraction_method="AI agent NER" ) ) science = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/science-ams", name="Science Museum Amsterdam", ghcid="NL-NH-AMS-M-SM", ghcid_numeric=987654321098, institution_type="MUSEUM", identifiers=[ Identifier( identifier_scheme="Wikidata", identifier_value="Q98765432" ) ], locations=[ Location( city="Amsterdam", country="NL" ) ], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=extraction_date, # Same date! extraction_method="AI agent NER" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act resolved = detector.resolve_collisions([stedelijk, science]) # Assert assert len(resolved) == 2 # Both should have Q-numbers ghcids = {inst.ghcid for inst in resolved} assert "NL-NH-AMS-M-SM-Q621531" in ghcids assert "NL-NH-AMS-M-SM-Q98765432" in ghcids # Both should have GHCID history entries for inst in resolved: assert inst.ghcid_history is not None assert len(inst.ghcid_history) == 2 # Current entry (with Q-number) current = inst.ghcid_history[0] assert current.ghcid.endswith(('-Q621531', '-Q98765432')) assert current.valid_to is None # Current assert "first batch collision" in current.reason # Base entry (without Q-number) base = inst.ghcid_history[1] assert base.ghcid == "NL-NH-AMS-M-SM" assert base.valid_to == extraction_date # Immediately superseded def test_three_institutions_same_base_ghcid_same_date(self): """ Three institutions with same base GHCID extracted on same date. Expected: All three get Q-numbers. """ # Arrange extraction_date = datetime(2025, 11, 1, 10, 0, 0, tzinfo=timezone.utc) institutions = [] wikidata_ids = ["Q111111", "Q222222", "Q333333"] names = ["Museum A", "Museum B", "Museum C"] for i, (name, qid) in enumerate(zip(names, wikidata_ids)): inst = HeritageCustodian( id=f"https://w3id.org/heritage/custodian/nl/museum-{i}", name=name, ghcid="NL-NH-UTR-M-HM", ghcid_numeric=100000000000 + i, institution_type="MUSEUM", identifiers=[ Identifier( identifier_scheme="Wikidata", identifier_value=qid ) ], locations=[ Location(city="Utrecht", country="NL") ], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=extraction_date, extraction_method="AI agent NER" ) ) institutions.append(inst) detector = GHCIDCollisionDetector(published_dataset=[]) # Act resolved = detector.resolve_collisions(institutions) # Assert assert len(resolved) == 3 # All should have Q-numbers ghcids = {inst.ghcid for inst in resolved} assert "NL-NH-UTR-M-HM-Q111111" in ghcids assert "NL-NH-UTR-M-HM-Q222222" in ghcids assert "NL-NH-UTR-M-HM-Q333333" in ghcids # All should have history entries for inst in resolved: assert len(inst.ghcid_history) == 2 assert inst.ghcid_history[0].valid_to is None # Current def test_first_batch_collision_uses_synthetic_qnumber_when_no_wikidata(self): """ First batch collision where institutions lack Wikidata identifiers. Expected: Synthetic Q-numbers generated from ghcid_numeric hash. """ # Arrange extraction_date = datetime(2025, 11, 1, 10, 0, 0, tzinfo=timezone.utc) # No Wikidata identifiers inst1 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/archive-1", name="Archive A", ghcid="NL-NH-AMS-A-AA", ghcid_numeric=123456789012, # Will generate synthetic Q-number institution_type="ARCHIVE", identifiers=[], # No Wikidata! locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=extraction_date, extraction_method="CSV parser" ) ) inst2 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/archive-2", name="Archive B", ghcid="NL-NH-AMS-A-AA", ghcid_numeric=987654321098, # Different hash institution_type="ARCHIVE", identifiers=[], locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=extraction_date, extraction_method="CSV parser" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act resolved = detector.resolve_collisions([inst1, inst2]) # Assert assert len(resolved) == 2 # Both should have synthetic Q-numbers for inst in resolved: assert inst.ghcid.startswith("NL-NH-AMS-A-AA-Q") # Extract Q-number qnum = inst.ghcid.split('-')[-1] assert qnum.startswith('Q') assert qnum[1:].isdigit() # Synthetic Q-number is numeric # Verify deterministic generation (same hash → same Q-number) expected_q1 = f"Q{inst1.ghcid_numeric % 100000000}" expected_q2 = f"Q{inst2.ghcid_numeric % 100000000}" ghcids = {inst.ghcid for inst in resolved} assert f"NL-NH-AMS-A-AA-{expected_q1}" in ghcids assert f"NL-NH-AMS-A-AA-{expected_q2}" in ghcids def test_detect_collisions_identifies_first_batch(self): """ Test collision detection correctly identifies first batch collisions. Expected: CollisionGroup with collision_type="FIRST_BATCH" """ # Arrange extraction_date = datetime(2025, 11, 1, 10, 0, 0, tzinfo=timezone.utc) inst1 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/lib-1", name="Library 1", ghcid="NL-NH-AMS-L-LB", ghcid_numeric=111111111111, institution_type="LIBRARY", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=extraction_date, extraction_method="CSV parser" ) ) inst2 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/lib-2", name="Library 2", ghcid="NL-NH-AMS-L-LB", ghcid_numeric=222222222222, institution_type="LIBRARY", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=extraction_date, # Same date extraction_method="CSV parser" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act collisions = detector.detect_collisions([inst1, inst2]) # Assert assert len(collisions) == 1 assert "NL-NH-AMS-L-LB" in collisions collision_group = collisions["NL-NH-AMS-L-LB"] assert collision_group.collision_type == "FIRST_BATCH" assert collision_group.base_ghcid == "NL-NH-AMS-L-LB" assert len(collision_group.institutions) == 2 assert collision_group.earliest_extraction_date == extraction_date class TestHistoricalAdditionCollision: """ Test historical addition collision scenario. Scenario: New institution discovered AFTER existing GHCID is published. Expected Behavior: - EXISTING institution keeps base GHCID (PID stability!) - ONLY new institution receives Q-number suffix """ def test_new_institution_collides_with_published_ghcid(self): """ New institution added later collides with published base GHCID. Expected: - Published GHCID unchanged (PID stability) - New institution gets Q-number """ # Arrange published_date = datetime(2025, 11, 1, 10, 0, 0, tzinfo=timezone.utc) new_date = datetime(2025, 11, 15, 14, 30, 0, tzinfo=timezone.utc) # Published institution (already in PID registry) hermitage = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/hermitage-ams", name="Hermitage Amsterdam", ghcid="NL-NH-AMS-M-HM", # NO Q-number (published first) ghcid_numeric=100000000000, institution_type="MUSEUM", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=published_date, extraction_method="CSV parser" ) ) # New institution discovered later historical = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/historical-ams", name="Historical Museum Amsterdam", ghcid="NL-NH-AMS-M-HM", # COLLISION with published! ghcid_numeric=200000000000, institution_type="MUSEUM", identifiers=[ Identifier( identifier_scheme="Wikidata", identifier_value="Q17339437" ) ], locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=new_date, # LATER date extraction_method="AI agent NER" ) ) # Initialize detector with published dataset detector = GHCIDCollisionDetector(published_dataset=[hermitage]) # Act resolved = detector.resolve_collisions([historical]) # Assert assert len(resolved) == 1 new_inst = resolved[0] # New institution should have Q-number assert new_inst.ghcid == "NL-NH-AMS-M-HM-Q17339437" # Published institution UNCHANGED (PID stability) assert hermitage.ghcid == "NL-NH-AMS-M-HM" # New institution should have history entries assert len(new_inst.ghcid_history) == 2 # Current entry (with Q-number) current = new_inst.ghcid_history[0] assert current.ghcid == "NL-NH-AMS-M-HM-Q17339437" assert current.valid_to is None assert "collision with existing" in current.reason assert "Hermitage Amsterdam" in current.reason # Base entry (without Q-number) base = new_inst.ghcid_history[1] assert base.ghcid == "NL-NH-AMS-M-HM" assert base.valid_to == new_date def test_multiple_new_institutions_collide_with_same_published_ghcid(self): """ Multiple new institutions added over time, all colliding with same published GHCID. Expected: Each new institution gets Q-number, published remains unchanged. """ # Arrange published_date = datetime(2025, 11, 1, 10, 0, 0, tzinfo=timezone.utc) # Published institution published_inst = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/museum-pub", name="Published Museum", ghcid="NL-UT-UTR-S-HK", ghcid_numeric=100000000000, institution_type="COLLECTING_SOCIETY", locations=[Location(city="Utrecht", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=published_date, extraction_method="CSV parser" ) ) # New institutions added on different dates new_inst1 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/society-1", name="Historical Society 1", ghcid="NL-UT-UTR-S-HK", ghcid_numeric=200000000000, institution_type="COLLECTING_SOCIETY", identifiers=[Identifier(identifier_scheme="Wikidata", identifier_value="Q111111")], locations=[Location(city="Utrecht", country="NL")], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=datetime(2025, 11, 15, tzinfo=timezone.utc), extraction_method="AI agent NER" ) ) new_inst2 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/society-2", name="Historical Society 2", ghcid="NL-UT-UTR-S-HK", ghcid_numeric=300000000000, institution_type="COLLECTING_SOCIETY", identifiers=[Identifier(identifier_scheme="Wikidata", identifier_value="Q222222")], locations=[Location(city="Utrecht", country="NL")], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=datetime(2025, 12, 1, tzinfo=timezone.utc), extraction_method="AI agent NER" ) ) detector = GHCIDCollisionDetector(published_dataset=[published_inst]) # Act - Process new institutions separately (simulating discovery over time) resolved1 = detector.resolve_collisions([new_inst1]) resolved2 = detector.resolve_collisions([new_inst2]) # Assert assert resolved1[0].ghcid == "NL-UT-UTR-S-HK-Q111111" assert resolved2[0].ghcid == "NL-UT-UTR-S-HK-Q222222" # Published GHCID still unchanged assert published_inst.ghcid == "NL-UT-UTR-S-HK" def test_detect_collisions_identifies_historical_addition(self): """ Test collision detection identifies historical additions. Expected: CollisionGroup with collision_type="HISTORICAL_ADDITION" """ # Arrange date1 = datetime(2025, 11, 1, tzinfo=timezone.utc) date2 = datetime(2025, 11, 15, tzinfo=timezone.utc) inst1 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/inst-1", name="Institution 1", ghcid="NL-NH-AMS-G-GA", ghcid_numeric=111111111111, institution_type="GALLERY", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=date1, # Earlier extraction_method="CSV parser" ) ) inst2 = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/inst-2", name="Institution 2", ghcid="NL-NH-AMS-G-GA", ghcid_numeric=222222222222, institution_type="GALLERY", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=date2, # Later extraction_method="AI agent NER" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act collisions = detector.detect_collisions([inst1, inst2]) # Assert assert len(collisions) == 1 collision_group = collisions["NL-NH-AMS-G-GA"] assert collision_group.collision_type == "HISTORICAL_ADDITION" assert collision_group.earliest_extraction_date == date1 class TestQNumberAssignment: """Test Q-number assignment logic (Wikidata preferred, synthetic fallback).""" def test_wikidata_qnumber_preferred_over_synthetic(self): """When Wikidata QID exists, it should be used instead of synthetic.""" # Arrange extraction_date = datetime(2025, 11, 1, tzinfo=timezone.utc) inst = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/rijksmuseum", name="Rijksmuseum", ghcid="NL-NH-AMS-M-RM", ghcid_numeric=123456789012, # Would generate synthetic Q23456789 institution_type="MUSEUM", identifiers=[ Identifier(identifier_scheme="ISIL", identifier_value="NL-AsdRM"), Identifier(identifier_scheme="Wikidata", identifier_value="Q190804") # Should use this! ], locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=extraction_date, extraction_method="CSV parser" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act qnumber = detector._assign_qnumber(inst) # Assert assert qnumber == "Q190804" # Wikidata QID, not synthetic def test_synthetic_qnumber_when_no_wikidata(self): """When no Wikidata QID, synthetic Q-number should be generated.""" # Arrange inst = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/local-archive", name="Local Archive", ghcid="NL-NH-AMS-A-LA", ghcid_numeric=123456789012, institution_type="ARCHIVE", identifiers=[ Identifier(identifier_scheme="ISIL", identifier_value="NL-AsdLA") # No Wikidata! ], locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=datetime(2025, 11, 1, tzinfo=timezone.utc), extraction_method="CSV parser" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act qnumber = detector._assign_qnumber(inst) # Assert expected_synthetic = f"Q{inst.ghcid_numeric % 100000000}" assert qnumber == expected_synthetic def test_extract_wikidata_qid_normalizes_format(self): """Wikidata QID extraction should normalize format (ensure Q prefix).""" # Arrange inst_with_q = HeritageCustodian( id="https://example.org/1", name="Museum 1", institution_type="MUSEUM", identifiers=[ Identifier(identifier_scheme="Wikidata", identifier_value="Q621531") ], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=datetime(2025, 11, 1, tzinfo=timezone.utc), extraction_method="CSV parser" ) ) inst_without_q = HeritageCustodian( id="https://example.org/2", name="Museum 2", institution_type="MUSEUM", identifiers=[ Identifier(identifier_scheme="Wikidata", identifier_value="621531") # No Q prefix ], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=datetime(2025, 11, 1, tzinfo=timezone.utc), extraction_method="CSV parser" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act qid1 = detector._extract_wikidata_qid(inst_with_q) qid2 = detector._extract_wikidata_qid(inst_without_q) # Assert assert qid1 == "Q621531" assert qid2 == "Q621531" # Normalized class TestGHCIDHistoryTracking: """Test GHCID history entry creation and temporal validity tracking.""" def test_ghcid_history_tracks_collision_resolution(self): """GHCID history should document transition from base to Q-number GHCID.""" # Arrange extraction_date = datetime(2025, 11, 15, 14, 30, 0, tzinfo=timezone.utc) published = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/pub", name="Published Institution", ghcid="NL-NH-AMS-M-PM", ghcid_numeric=100000000000, institution_type="MUSEUM", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=datetime(2025, 11, 1, tzinfo=timezone.utc), extraction_method="CSV parser" ) ) new_inst = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/new", name="New Museum", ghcid="NL-NH-AMS-M-PM", ghcid_numeric=200000000000, institution_type="MUSEUM", identifiers=[ Identifier(identifier_scheme="Wikidata", identifier_value="Q12345") ], locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=extraction_date, extraction_method="AI agent NER" ) ) detector = GHCIDCollisionDetector(published_dataset=[published]) # Act resolved = detector.resolve_collisions([new_inst]) # Assert inst = resolved[0] assert len(inst.ghcid_history) == 2 # Entry 1: Current (with Q-number) current = inst.ghcid_history[0] assert current.ghcid == "NL-NH-AMS-M-PM-Q12345" assert current.valid_from == extraction_date assert current.valid_to is None # Still current assert current.institution_name == "New Museum" assert current.location_city == "Amsterdam" assert current.location_country == "NL" # Entry 2: Base (without Q-number) base = inst.ghcid_history[1] assert base.ghcid == "NL-NH-AMS-M-PM" assert base.valid_from == extraction_date assert base.valid_to == extraction_date # Immediately superseded def test_ghcid_history_handles_missing_location(self): """GHCID history should use fallback values when location is missing.""" # Arrange inst = HeritageCustodian( id="https://example.org/inst", name="Unknown Location Museum", ghcid="XX-XX-XXX-M-UM", ghcid_numeric=100000000000, institution_type="MUSEUM", locations=[], # No location data provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=datetime(2025, 11, 1, tzinfo=timezone.utc), extraction_method="AI agent NER" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act history_entry = detector._create_ghcid_history_entry( institution=inst, ghcid="XX-XX-XXX-M-UM", valid_from=datetime(2025, 11, 1, tzinfo=timezone.utc), valid_to=None, reason="Test entry" ) # Assert assert history_entry.location_city == "Unknown" assert history_entry.location_country == "Unknown" class TestPIDStabilityGuarantees: """Test PID stability guarantees - published GHCIDs must never change.""" def test_published_ghcids_never_modified(self): """Published GHCIDs must remain unchanged even when collisions occur.""" # Arrange published = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/stable", name="Stable Museum", ghcid="NL-NH-AMS-M-SM", ghcid_numeric=100000000000, institution_type="MUSEUM", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=datetime(2025, 11, 1, tzinfo=timezone.utc), extraction_method="CSV parser" ) ) original_ghcid = published.ghcid new_inst = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/new-collision", name="Colliding Museum", ghcid="NL-NH-AMS-M-SM", # Collision! ghcid_numeric=200000000000, institution_type="MUSEUM", identifiers=[ Identifier(identifier_scheme="Wikidata", identifier_value="Q99999") ], locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="CONVERSATION_NLP", data_tier="TIER_4_INFERRED", extraction_date=datetime(2025, 11, 15, tzinfo=timezone.utc), extraction_method="AI agent NER" ) ) detector = GHCIDCollisionDetector(published_dataset=[published]) # Act detector.resolve_collisions([new_inst]) # Assert - Published GHCID UNCHANGED assert published.ghcid == original_ghcid assert published.ghcid == "NL-NH-AMS-M-SM" # No Q-number added def test_no_collision_when_ghcid_already_has_qnumber(self): """Institutions with Q-numbers in GHCID should not be modified.""" # Arrange inst_with_q = HeritageCustodian( id="https://w3id.org/heritage/custodian/nl/with-q", name="Museum with Q-number", ghcid="NL-NH-AMS-M-MQ-Q621531", # Already has Q-number ghcid_numeric=100000000000, institution_type="MUSEUM", locations=[Location(city="Amsterdam", country="NL")], provenance=Provenance( data_source="DUTCH_ORG_CSV", data_tier="TIER_1_AUTHORITATIVE", extraction_date=datetime(2025, 11, 1, tzinfo=timezone.utc), extraction_method="CSV parser" ) ) detector = GHCIDCollisionDetector(published_dataset=[]) # Act resolved = detector.resolve_collisions([inst_with_q]) # Assert - GHCID unchanged assert resolved[0].ghcid == "NL-NH-AMS-M-MQ-Q621531" if __name__ == "__main__": pytest.main([__file__, "-v"])