- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
134 lines
4.8 KiB
Python
134 lines
4.8 KiB
Python
"""Tests for GHCID lookup tables."""
|
|
|
|
import pytest
|
|
|
|
from glam_extractor.identifiers.lookups import (
|
|
get_city_locode,
|
|
get_province_code,
|
|
get_ghcid_components_for_dutch_city,
|
|
)
|
|
|
|
|
|
class TestCityLookups:
|
|
"""Test city → GeoNames abbreviation lookups."""
|
|
|
|
def test_amsterdam_locode(self):
|
|
"""Test Amsterdam city abbreviation lookup."""
|
|
locode = get_city_locode("Amsterdam", "NL")
|
|
assert locode == "AMS"
|
|
|
|
def test_rotterdam_locode(self):
|
|
"""Test Rotterdam city abbreviation lookup."""
|
|
locode = get_city_locode("Rotterdam", "NL")
|
|
assert locode == "ROT" # First 3 letters
|
|
|
|
def test_the_hague_locode(self):
|
|
"""Test The Hague city abbreviation lookup."""
|
|
# GeoNames stores as "The Hague" not "Den Haag"
|
|
locode = get_city_locode("The Hague", "NL")
|
|
assert locode == "THE"
|
|
|
|
def test_den_haag_found_via_alias(self):
|
|
"""Test Den Haag (Dutch name) is found via alias mapping."""
|
|
# GeoNames uses English "The Hague", but we map "Den Haag" to it
|
|
locode = get_city_locode("Den Haag", "NL")
|
|
assert locode == "THE" # Via alias mapping
|
|
|
|
def test_unknown_city(self):
|
|
"""Test unknown city returns None."""
|
|
locode = get_city_locode("UnknownCity", "NL")
|
|
assert locode is None
|
|
|
|
def test_global_city_support(self):
|
|
"""Test global city support (Paris, Tokyo, etc.)."""
|
|
# Now supports all countries via GeoNames
|
|
assert get_city_locode("Paris", "FR") == "PAR"
|
|
assert get_city_locode("Tokyo", "JP") == "TOK"
|
|
assert get_city_locode("Rio de Janeiro", "BR") == "RIO"
|
|
|
|
|
|
class TestProvinceLookups:
|
|
"""Test city → ISO 3166-2 province code lookups."""
|
|
|
|
def test_amsterdam_province(self):
|
|
"""Test Amsterdam province lookup."""
|
|
province = get_province_code("Amsterdam", "NL")
|
|
assert province == "NH" # Noord-Holland
|
|
|
|
def test_rotterdam_province(self):
|
|
"""Test Rotterdam province lookup."""
|
|
province = get_province_code("Rotterdam", "NL")
|
|
assert province == "ZH" # Zuid-Holland
|
|
|
|
def test_groningen_province(self):
|
|
"""Test Groningen province lookup."""
|
|
province = get_province_code("Groningen", "NL")
|
|
assert province == "GR" # Groningen
|
|
|
|
def test_maastricht_province(self):
|
|
"""Test Maastricht province lookup."""
|
|
province = get_province_code("Maastricht", "NL")
|
|
assert province == "LI" # Limburg
|
|
|
|
def test_unknown_city_province(self):
|
|
"""Test unknown city returns None."""
|
|
province = get_province_code("UnknownCity", "NL")
|
|
assert province is None
|
|
|
|
|
|
class TestCombinedLookup:
|
|
"""Test combined lookup for all GHCID components."""
|
|
|
|
def test_amsterdam_components(self):
|
|
"""Test getting all components for Amsterdam institution."""
|
|
components = get_ghcid_components_for_dutch_city(
|
|
city="Amsterdam",
|
|
institution_name="Rijksmuseum",
|
|
institution_type="M",
|
|
english_name="State Museum Amsterdam"
|
|
)
|
|
|
|
assert components is not None
|
|
assert components["country_code"] == "NL"
|
|
assert components["region_code"] == "NH"
|
|
assert components["city_locode"] == "AMS"
|
|
assert components["institution_type"] == "M"
|
|
assert components["english_name"] == "State Museum Amsterdam"
|
|
|
|
def test_rotterdam_components(self):
|
|
"""Test getting all components for Rotterdam institution."""
|
|
components = get_ghcid_components_for_dutch_city(
|
|
city="Rotterdam",
|
|
institution_name="Museum Boijmans Van Beuningen",
|
|
institution_type="M"
|
|
)
|
|
|
|
assert components is not None
|
|
assert components["country_code"] == "NL"
|
|
assert components["region_code"] == "ZH"
|
|
assert components["city_locode"] == "ROT" # First 3 letters of Rotterdam
|
|
# English name should default to institution name if not provided
|
|
assert components["english_name"] == "Museum Boijmans Van Beuningen"
|
|
|
|
def test_unknown_city_components(self):
|
|
"""Test unknown city returns None."""
|
|
components = get_ghcid_components_for_dutch_city(
|
|
city="UnknownCity",
|
|
institution_name="Test Museum",
|
|
institution_type="M"
|
|
)
|
|
|
|
assert components is None
|
|
|
|
def test_leiden_components(self):
|
|
"""Test Leiden components."""
|
|
components = get_ghcid_components_for_dutch_city(
|
|
city="Leiden",
|
|
institution_name="Museum Volkenkunde",
|
|
institution_type="M",
|
|
english_name="National Museum of Ethnology"
|
|
)
|
|
|
|
assert components is not None
|
|
assert components["region_code"] == "ZH"
|
|
assert components["city_locode"] == "LEI"
|