glam/tests/identifiers/test_lookups.py
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

134 lines
4.8 KiB
Python

"""Tests for GHCID lookup tables."""
import pytest
from glam_extractor.identifiers.lookups import (
get_city_locode,
get_province_code,
get_ghcid_components_for_dutch_city,
)
class TestCityLookups:
"""Test city → GeoNames abbreviation lookups."""
def test_amsterdam_locode(self):
"""Test Amsterdam city abbreviation lookup."""
locode = get_city_locode("Amsterdam", "NL")
assert locode == "AMS"
def test_rotterdam_locode(self):
"""Test Rotterdam city abbreviation lookup."""
locode = get_city_locode("Rotterdam", "NL")
assert locode == "ROT" # First 3 letters
def test_the_hague_locode(self):
"""Test The Hague city abbreviation lookup."""
# GeoNames stores as "The Hague" not "Den Haag"
locode = get_city_locode("The Hague", "NL")
assert locode == "THE"
def test_den_haag_found_via_alias(self):
"""Test Den Haag (Dutch name) is found via alias mapping."""
# GeoNames uses English "The Hague", but we map "Den Haag" to it
locode = get_city_locode("Den Haag", "NL")
assert locode == "THE" # Via alias mapping
def test_unknown_city(self):
"""Test unknown city returns None."""
locode = get_city_locode("UnknownCity", "NL")
assert locode is None
def test_global_city_support(self):
"""Test global city support (Paris, Tokyo, etc.)."""
# Now supports all countries via GeoNames
assert get_city_locode("Paris", "FR") == "PAR"
assert get_city_locode("Tokyo", "JP") == "TOK"
assert get_city_locode("Rio de Janeiro", "BR") == "RIO"
class TestProvinceLookups:
"""Test city → ISO 3166-2 province code lookups."""
def test_amsterdam_province(self):
"""Test Amsterdam province lookup."""
province = get_province_code("Amsterdam", "NL")
assert province == "NH" # Noord-Holland
def test_rotterdam_province(self):
"""Test Rotterdam province lookup."""
province = get_province_code("Rotterdam", "NL")
assert province == "ZH" # Zuid-Holland
def test_groningen_province(self):
"""Test Groningen province lookup."""
province = get_province_code("Groningen", "NL")
assert province == "GR" # Groningen
def test_maastricht_province(self):
"""Test Maastricht province lookup."""
province = get_province_code("Maastricht", "NL")
assert province == "LI" # Limburg
def test_unknown_city_province(self):
"""Test unknown city returns None."""
province = get_province_code("UnknownCity", "NL")
assert province is None
class TestCombinedLookup:
"""Test combined lookup for all GHCID components."""
def test_amsterdam_components(self):
"""Test getting all components for Amsterdam institution."""
components = get_ghcid_components_for_dutch_city(
city="Amsterdam",
institution_name="Rijksmuseum",
institution_type="M",
english_name="State Museum Amsterdam"
)
assert components is not None
assert components["country_code"] == "NL"
assert components["region_code"] == "NH"
assert components["city_locode"] == "AMS"
assert components["institution_type"] == "M"
assert components["english_name"] == "State Museum Amsterdam"
def test_rotterdam_components(self):
"""Test getting all components for Rotterdam institution."""
components = get_ghcid_components_for_dutch_city(
city="Rotterdam",
institution_name="Museum Boijmans Van Beuningen",
institution_type="M"
)
assert components is not None
assert components["country_code"] == "NL"
assert components["region_code"] == "ZH"
assert components["city_locode"] == "ROT" # First 3 letters of Rotterdam
# English name should default to institution name if not provided
assert components["english_name"] == "Museum Boijmans Van Beuningen"
def test_unknown_city_components(self):
"""Test unknown city returns None."""
components = get_ghcid_components_for_dutch_city(
city="UnknownCity",
institution_name="Test Museum",
institution_type="M"
)
assert components is None
def test_leiden_components(self):
"""Test Leiden components."""
components = get_ghcid_components_for_dutch_city(
city="Leiden",
institution_name="Museum Volkenkunde",
institution_type="M",
english_name="National Museum of Ethnology"
)
assert components is not None
assert components["region_code"] == "ZH"
assert components["city_locode"] == "LEI"