- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
238 lines
6.5 KiB
Python
Executable file
238 lines
6.5 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Add locations to institutions that are missing them.
|
|
|
|
For Mexican national institutions: Mexico City
|
|
For international resources: Their actual headquarters
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
# Location mappings
|
|
LOCATION_MAP = {
|
|
# Mexican National Institutions - All in Mexico City
|
|
"Instituto Nacional de Antropología e Historia": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Sistema de Información Cultural (SIC)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Mexicana - Repositorio del Patrimonio Cultural": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Archivo General de la Nación": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Memórica México Platform": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Biblioteca Nacional de México": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Hemeroteca Nacional Digital de México": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Museo Nacional de Arte (MUNAL)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Museo Universitario Arte Contemporáneo (MUAC)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Museo Nacional de Antropología": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Museo del Templo Mayor": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Sistema Bibliotecario UNAM (SIBIUNAM)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Instituto Politécnico Nacional (IPN)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"El Colegio de México (COLMEX)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Centro de Investigación y Docencia Económicas (CIDE)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Biblioteca Virtual de Yucatán": {
|
|
"city": "Mérida",
|
|
"region": "Yucatán",
|
|
"country": "MX",
|
|
},
|
|
"Mexico City Resources": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Archivo Histórico Diplomático (SRE)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Mapoteca Manuel Orozco y Berra": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Archivo Histórico de la UNAM (AHUNAM)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Centro de Estudios de Historia de México Carso": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Memórica México": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Mexicana Repository": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Red de Humanidades Digitales (RedHD)": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Códices de México": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
"Fonoteca Nacional": {
|
|
"city": "Mexico City",
|
|
"region": "Ciudad de México",
|
|
"country": "MX",
|
|
},
|
|
|
|
# International Resources - Actual headquarters
|
|
"WorldCat.org": {
|
|
"city": "Dublin",
|
|
"region": "Ohio",
|
|
"country": "US",
|
|
},
|
|
"WorldCat Registry": {
|
|
"city": "Dublin",
|
|
"region": "Ohio",
|
|
"country": "US",
|
|
},
|
|
"HathiTrust Digital Library": {
|
|
"city": "Ann Arbor",
|
|
"region": "Michigan",
|
|
"country": "US",
|
|
},
|
|
"Internet Archive": {
|
|
"city": "San Francisco",
|
|
"region": "California",
|
|
"country": "US",
|
|
},
|
|
"Nettie Lee Benson Collection (UT Austin)": {
|
|
"city": "Austin",
|
|
"region": "Texas",
|
|
"country": "US",
|
|
},
|
|
"Library of Congress Hispanic Reading Room": {
|
|
"city": "Washington",
|
|
"region": "District of Columbia",
|
|
"country": "US",
|
|
},
|
|
"CLACSO Virtual Libraries": {
|
|
"city": "Buenos Aires",
|
|
"region": "Buenos Aires",
|
|
"country": "AR",
|
|
},
|
|
"Latin American Network Information Center (LANIC)": {
|
|
"city": "Austin",
|
|
"region": "Texas",
|
|
"country": "US",
|
|
},
|
|
}
|
|
|
|
|
|
def main():
|
|
project_root = Path(__file__).parent.parent
|
|
input_file = project_root / "data" / "instances" / "latin_american_institutions_AUTHORITATIVE.yaml"
|
|
|
|
print(f"Loading institutions from: {input_file}")
|
|
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
institutions = yaml.safe_load(f)
|
|
|
|
print(f"Found {len(institutions)} institutions")
|
|
|
|
# Add locations
|
|
updated_count = 0
|
|
|
|
for inst in institutions:
|
|
name = inst.get('name')
|
|
if name in LOCATION_MAP and not inst.get('locations'):
|
|
location = LOCATION_MAP[name]
|
|
inst['locations'] = [location]
|
|
updated_count += 1
|
|
print(f"✓ Added location for: {name}")
|
|
print(f" → {location['city']}, {location['country']}")
|
|
|
|
print()
|
|
print(f"Updated {updated_count} institutions with locations")
|
|
|
|
# Write back
|
|
print(f"Writing updated YAML to: {input_file}")
|
|
|
|
with open(input_file, 'w', encoding='utf-8') as f:
|
|
# Keep the header
|
|
header = """---
|
|
# Latin American GLAM Institutions - GHCID Enhanced
|
|
# Last updated with locations for national/international institutions
|
|
# Ready for complete GHCID generation
|
|
|
|
"""
|
|
f.write(header)
|
|
yaml.dump(institutions, f,
|
|
default_flow_style=False,
|
|
allow_unicode=True,
|
|
sort_keys=False,
|
|
width=100)
|
|
|
|
print("✅ Done!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|