658 lines
20 KiB
Python
658 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Import Argentina heritage institutions from Wikidata into custodian YAML files.
|
|
|
|
Queries Wikidata for museums and archives in Argentina, filters out institutions
|
|
that already exist in custodian files, and creates new YAML files with complete
|
|
GHCID metadata.
|
|
|
|
GLAM Data Extraction Project
|
|
Schema: LinkML v0.2.1
|
|
Country: Argentina (AR)
|
|
Source: Wikidata SPARQL queries
|
|
|
|
Usage:
|
|
python scripts/import_argentina_wikidata_institutions.py [--dry-run]
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sqlite3
|
|
import sys
|
|
import time
|
|
import unicodedata
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
|
|
import requests
|
|
import yaml
|
|
|
|
# Add project root to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
from glam_extractor.identifiers.ghcid import GHCIDComponents
|
|
|
|
# Constants
|
|
SPARQL_ENDPOINT = "https://query.wikidata.org/sparql"
|
|
USER_AGENT = "GLAM-Argentina-Wikidata-Import/1.0 (https://github.com/glam-project)"
|
|
BASE_DIR = Path(__file__).parent.parent
|
|
CUSTODIAN_DIR = BASE_DIR / "data" / "custodian"
|
|
GEONAMES_DB = BASE_DIR / "data" / "reference" / "geonames.db"
|
|
ISO_3166_2_AR = BASE_DIR / "data" / "reference" / "iso_3166_2_ar.json"
|
|
|
|
# Argentina ISO 3166-2 region codes
|
|
AR_REGION_CODES = {
|
|
"Salta": "A",
|
|
"Buenos Aires": "B",
|
|
"Buenos Aires Province": "B",
|
|
"Provincia de Buenos Aires": "B",
|
|
"Ciudad Autónoma de Buenos Aires": "C",
|
|
"Ciudad de Buenos Aires": "C",
|
|
"Autonomous City of Buenos Aires": "C",
|
|
"Capital Federal": "C",
|
|
"CABA": "C",
|
|
"San Luis": "D",
|
|
"Entre Ríos": "E",
|
|
"Entre Rios": "E",
|
|
"La Rioja": "F",
|
|
"Santiago del Estero": "G",
|
|
"Chaco": "H",
|
|
"San Juan": "J",
|
|
"Catamarca": "K",
|
|
"La Pampa": "L",
|
|
"Mendoza": "M",
|
|
"Misiones": "N",
|
|
"Formosa": "P",
|
|
"Neuquén": "Q",
|
|
"Neuquen": "Q",
|
|
"Río Negro": "R",
|
|
"Rio Negro": "R",
|
|
"Santa Fe": "S",
|
|
"Tucumán": "T",
|
|
"Tucuman": "T",
|
|
"Chubut": "U",
|
|
"Tierra del Fuego": "V",
|
|
"Corrientes": "W",
|
|
"Córdoba": "X",
|
|
"Cordoba": "X",
|
|
"Jujuy": "Y",
|
|
"Santa Cruz": "Z",
|
|
}
|
|
|
|
# GeoNames admin1 code to ISO 3166-2 mapping for Argentina
|
|
GEONAMES_ADMIN1_TO_ISO = {
|
|
"01": "B", # Buenos Aires Province
|
|
"02": "K", # Catamarca
|
|
"03": "H", # Chaco
|
|
"04": "U", # Chubut
|
|
"05": "X", # Córdoba
|
|
"06": "W", # Corrientes
|
|
"07": "C", # Ciudad de Buenos Aires (CABA)
|
|
"08": "E", # Entre Ríos
|
|
"09": "P", # Formosa
|
|
"10": "Y", # Jujuy
|
|
"11": "L", # La Pampa
|
|
"12": "F", # La Rioja
|
|
"13": "M", # Mendoza
|
|
"14": "N", # Misiones
|
|
"15": "Q", # Neuquén
|
|
"16": "R", # Río Negro
|
|
"17": "A", # Salta
|
|
"18": "J", # San Juan
|
|
"19": "D", # San Luis
|
|
"20": "Z", # Santa Cruz
|
|
"21": "S", # Santa Fe
|
|
"22": "G", # Santiago del Estero
|
|
"23": "V", # Tierra del Fuego
|
|
"24": "T", # Tucumán
|
|
}
|
|
|
|
|
|
def normalize_to_ascii(text: str) -> str:
|
|
"""Normalize text to ASCII, removing diacritics."""
|
|
# NFD decomposition separates base characters from combining marks
|
|
normalized = unicodedata.normalize("NFD", text)
|
|
# Remove combining marks (category 'Mn' = Mark, Nonspacing)
|
|
ascii_text = "".join(c for c in normalized if unicodedata.category(c) != "Mn")
|
|
return ascii_text
|
|
|
|
|
|
def generate_city_code(city_name: str) -> str:
|
|
"""
|
|
Generate 3-letter city code from city name.
|
|
|
|
Rules:
|
|
- Single word: First 3 letters
|
|
- Multi-word: First letter of each word (up to 3)
|
|
- Dutch articles (de, het, den, 's): Article initial + 2 from main word
|
|
"""
|
|
if not city_name:
|
|
return "XXX"
|
|
|
|
# Normalize to ASCII
|
|
city_ascii = normalize_to_ascii(city_name)
|
|
|
|
# Split into words
|
|
words = city_ascii.split()
|
|
|
|
if len(words) == 1:
|
|
# Single word: first 3 letters
|
|
return words[0][:3].upper()
|
|
|
|
# Check for Spanish articles (la, el, los, las)
|
|
spanish_articles = {"la", "el", "los", "las", "de", "del"}
|
|
if words[0].lower() in spanish_articles:
|
|
# Skip article, use main word(s)
|
|
remaining = [w for w in words if w.lower() not in spanish_articles]
|
|
if remaining:
|
|
if len(remaining) == 1:
|
|
return remaining[0][:3].upper()
|
|
else:
|
|
# Initials of remaining words
|
|
return "".join(w[0] for w in remaining[:3]).upper()
|
|
|
|
# Multi-word: initials
|
|
return "".join(w[0] for w in words[:3]).upper()
|
|
|
|
|
|
def extract_abbreviation_from_name(name: str) -> str:
|
|
"""
|
|
Generate institution abbreviation from emic name.
|
|
|
|
Takes first letter of each significant word (skipping articles, prepositions).
|
|
Maximum 10 characters.
|
|
"""
|
|
if not name:
|
|
return "UNK"
|
|
|
|
# Spanish skip words (articles, prepositions, conjunctions)
|
|
skip_words = {
|
|
"el", "la", "los", "las", "un", "una", "unos", "unas",
|
|
"de", "del", "a", "al", "en", "con", "por", "para",
|
|
"sobre", "bajo", "y", "o", "e", "u"
|
|
}
|
|
|
|
# Normalize to ASCII
|
|
name_ascii = normalize_to_ascii(name)
|
|
|
|
# Remove special characters except spaces
|
|
name_clean = re.sub(r"[^a-zA-Z0-9\s]", "", name_ascii)
|
|
|
|
# Split into words
|
|
words = name_clean.split()
|
|
|
|
# Filter skip words
|
|
significant_words = [w for w in words if w.lower() not in skip_words and len(w) > 0]
|
|
|
|
if not significant_words:
|
|
# Fallback: use all words
|
|
significant_words = words
|
|
|
|
if not significant_words:
|
|
return "UNK"
|
|
|
|
# Take first letter of each significant word
|
|
abbrev = "".join(w[0].upper() for w in significant_words)
|
|
|
|
# Limit to 10 characters
|
|
return abbrev[:10]
|
|
|
|
|
|
def query_wikidata_museums() -> list[dict]:
|
|
"""Query Wikidata for museums in Argentina."""
|
|
query = """
|
|
SELECT DISTINCT ?item ?itemLabel ?coords ?cityLabel ?websiteUrl WHERE {
|
|
?item wdt:P31/wdt:P279* wd:Q33506 . # instance of museum (or subclass)
|
|
?item wdt:P17 wd:Q414 . # country: Argentina
|
|
|
|
OPTIONAL { ?item wdt:P625 ?coords . }
|
|
OPTIONAL { ?item wdt:P131 ?city . }
|
|
OPTIONAL { ?item wdt:P856 ?websiteUrl . }
|
|
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "es,en" . }
|
|
}
|
|
ORDER BY ?itemLabel
|
|
"""
|
|
return _execute_sparql(query)
|
|
|
|
|
|
def query_wikidata_archives() -> list[dict]:
|
|
"""Query Wikidata for archives in Argentina."""
|
|
query = """
|
|
SELECT DISTINCT ?item ?itemLabel ?coords ?cityLabel ?websiteUrl WHERE {
|
|
?item wdt:P31/wdt:P279* wd:Q166118 . # instance of archive (or subclass)
|
|
?item wdt:P17 wd:Q414 . # country: Argentina
|
|
|
|
OPTIONAL { ?item wdt:P625 ?coords . }
|
|
OPTIONAL { ?item wdt:P131 ?city . }
|
|
OPTIONAL { ?item wdt:P856 ?websiteUrl . }
|
|
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "es,en" . }
|
|
}
|
|
ORDER BY ?itemLabel
|
|
"""
|
|
return _execute_sparql(query)
|
|
|
|
|
|
def query_wikidata_galleries() -> list[dict]:
|
|
"""Query Wikidata for art galleries in Argentina."""
|
|
query = """
|
|
SELECT DISTINCT ?item ?itemLabel ?coords ?cityLabel ?websiteUrl WHERE {
|
|
?item wdt:P31/wdt:P279* wd:Q1007870 . # instance of art gallery (or subclass)
|
|
?item wdt:P17 wd:Q414 . # country: Argentina
|
|
|
|
OPTIONAL { ?item wdt:P625 ?coords . }
|
|
OPTIONAL { ?item wdt:P131 ?city . }
|
|
OPTIONAL { ?item wdt:P856 ?websiteUrl . }
|
|
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "es,en" . }
|
|
}
|
|
ORDER BY ?itemLabel
|
|
"""
|
|
return _execute_sparql(query)
|
|
|
|
|
|
def _execute_sparql(query: str) -> list[dict]:
|
|
"""Execute SPARQL query and return parsed results."""
|
|
headers = {
|
|
"User-Agent": USER_AGENT,
|
|
"Accept": "application/sparql-results+json"
|
|
}
|
|
params = {"query": query, "format": "json"}
|
|
|
|
time.sleep(1.0) # Rate limiting
|
|
|
|
try:
|
|
response = requests.get(SPARQL_ENDPOINT, params=params, headers=headers, timeout=60)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return data.get("results", {}).get("bindings", [])
|
|
except Exception as e:
|
|
print(f" ❌ SPARQL query failed: {e}")
|
|
return []
|
|
|
|
|
|
def parse_wikidata_result(binding: dict, institution_type: str) -> Optional[dict]:
|
|
"""Parse a Wikidata SPARQL result binding into a normalized dict."""
|
|
item_uri = binding.get("item", {}).get("value", "")
|
|
qid = item_uri.split("/")[-1] if item_uri else None
|
|
|
|
if not qid or not qid.startswith("Q"):
|
|
return None
|
|
|
|
label = binding.get("itemLabel", {}).get("value", "")
|
|
if not label or label == qid: # Skip if label is just the QID (no label found)
|
|
return None
|
|
|
|
result = {
|
|
"qid": qid,
|
|
"name": label,
|
|
"institution_type": institution_type,
|
|
}
|
|
|
|
# Parse coordinates
|
|
coords_str = binding.get("coords", {}).get("value", "")
|
|
if coords_str and coords_str.startswith("Point("):
|
|
try:
|
|
lon, lat = coords_str[6:-1].split()
|
|
result["latitude"] = float(lat)
|
|
result["longitude"] = float(lon)
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
# Parse city
|
|
city = binding.get("cityLabel", {}).get("value", "")
|
|
if city and not city.startswith("Q"): # Skip if city label is QID
|
|
result["city"] = city
|
|
|
|
# Parse website
|
|
website = binding.get("websiteUrl", {}).get("value", "")
|
|
if website:
|
|
result["website"] = website
|
|
|
|
return result
|
|
|
|
|
|
def get_existing_qids() -> set[str]:
|
|
"""Get set of Wikidata QIDs already in Argentina custodian files."""
|
|
qids = set()
|
|
for filepath in CUSTODIAN_DIR.glob("AR-*.yaml"):
|
|
try:
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
# Check wikidata_enrichment section
|
|
wd_id = data.get("wikidata_enrichment", {}).get("wikidata_entity_id")
|
|
if wd_id:
|
|
qids.add(wd_id)
|
|
|
|
# Check original_entry section
|
|
wd_id = data.get("original_entry", {}).get("wikidata_id")
|
|
if wd_id:
|
|
qids.add(wd_id)
|
|
except Exception:
|
|
continue
|
|
|
|
return qids
|
|
|
|
|
|
def reverse_geocode_to_region(lat: float, lon: float) -> Optional[tuple[str, str, str]]:
|
|
"""
|
|
Reverse geocode coordinates to find region code and city.
|
|
|
|
Returns: (region_code, city_name, city_code) or None
|
|
"""
|
|
if not GEONAMES_DB.exists():
|
|
return None
|
|
|
|
try:
|
|
conn = sqlite3.connect(GEONAMES_DB)
|
|
cursor = conn.cursor()
|
|
|
|
# Find nearest city with proper feature codes (not neighborhoods)
|
|
cursor.execute("""
|
|
SELECT name, ascii_name, admin1_code, admin1_name,
|
|
latitude, longitude,
|
|
((latitude - ?) * (latitude - ?) + (longitude - ?) * (longitude - ?)) as distance_sq
|
|
FROM cities
|
|
WHERE country_code = 'AR'
|
|
AND feature_code IN ('PPL', 'PPLA', 'PPLA2', 'PPLA3', 'PPLA4', 'PPLC', 'PPLS', 'PPLG')
|
|
ORDER BY distance_sq
|
|
LIMIT 1
|
|
""", (lat, lat, lon, lon))
|
|
|
|
row = cursor.fetchone()
|
|
conn.close()
|
|
|
|
if row:
|
|
city_name = row[0]
|
|
ascii_name = row[1]
|
|
admin1_code = row[2]
|
|
|
|
# Map GeoNames admin1 to ISO 3166-2
|
|
region_code = GEONAMES_ADMIN1_TO_ISO.get(admin1_code, "XX")
|
|
city_code = generate_city_code(ascii_name or city_name)
|
|
|
|
return (region_code, city_name, city_code)
|
|
except Exception as e:
|
|
print(f" ⚠️ GeoNames lookup failed: {e}")
|
|
|
|
return None
|
|
|
|
|
|
def city_label_to_region(city_label: str) -> Optional[str]:
|
|
"""Try to map city label to region code."""
|
|
if not city_label:
|
|
return None
|
|
|
|
# Direct match in region codes
|
|
for name, code in AR_REGION_CODES.items():
|
|
if name.lower() == city_label.lower():
|
|
return code
|
|
if name.lower() in city_label.lower():
|
|
return code
|
|
|
|
# Known city to region mappings
|
|
city_to_region = {
|
|
"la plata": "B",
|
|
"mar del plata": "B",
|
|
"bahía blanca": "B",
|
|
"bahia blanca": "B",
|
|
"rosario": "S",
|
|
"ushuaia": "V",
|
|
"resistencia": "H",
|
|
"posadas": "N",
|
|
"paraná": "E",
|
|
"parana": "E",
|
|
"san salvador de jujuy": "Y",
|
|
"san miguel de tucumán": "T",
|
|
"san miguel de tucuman": "T",
|
|
}
|
|
|
|
city_lower = city_label.lower()
|
|
for city, region in city_to_region.items():
|
|
if city in city_lower:
|
|
return region
|
|
|
|
return None
|
|
|
|
|
|
def create_custodian_yaml(inst: dict, dry_run: bool = False) -> Optional[Path]:
|
|
"""Create a custodian YAML file for an institution."""
|
|
qid = inst["qid"]
|
|
name = inst["name"]
|
|
inst_type = inst["institution_type"]
|
|
|
|
# Determine location
|
|
region_code = "XX"
|
|
city_code = "XXX"
|
|
city_name = inst.get("city", "")
|
|
|
|
# Try reverse geocoding first (most accurate)
|
|
if "latitude" in inst and "longitude" in inst:
|
|
geo_result = reverse_geocode_to_region(inst["latitude"], inst["longitude"])
|
|
if geo_result:
|
|
region_code, city_name, city_code = geo_result
|
|
|
|
# Fallback: try city label
|
|
if region_code == "XX" and city_name:
|
|
region = city_label_to_region(city_name)
|
|
if region:
|
|
region_code = region
|
|
city_code = generate_city_code(city_name)
|
|
|
|
# Generate abbreviation
|
|
abbreviation = extract_abbreviation_from_name(name)
|
|
|
|
# Create GHCID components
|
|
try:
|
|
components = GHCIDComponents(
|
|
country_code="AR",
|
|
region_code=region_code,
|
|
city_locode=city_code,
|
|
institution_type=inst_type,
|
|
abbreviation=abbreviation,
|
|
)
|
|
ghcid_current = components.to_string()
|
|
ghcid_uuid = str(components.to_uuid())
|
|
ghcid_uuid_sha256 = str(components.to_uuid_sha256())
|
|
ghcid_numeric = components.to_numeric()
|
|
except Exception as e:
|
|
print(f" ❌ GHCID generation failed for {name}: {e}")
|
|
return None
|
|
|
|
# Check for collision
|
|
filename = f"{ghcid_current}.yaml"
|
|
filepath = CUSTODIAN_DIR / filename
|
|
|
|
if filepath.exists():
|
|
# Collision - append Wikidata QID
|
|
components.wikidata_qid = qid.replace("Q", "")
|
|
ghcid_current = components.to_string()
|
|
ghcid_uuid = str(components.to_uuid())
|
|
ghcid_uuid_sha256 = str(components.to_uuid_sha256())
|
|
ghcid_numeric = components.to_numeric()
|
|
filename = f"{ghcid_current}.yaml"
|
|
filepath = CUSTODIAN_DIR / filename
|
|
|
|
timestamp = datetime.now(timezone.utc).isoformat()
|
|
|
|
# Build YAML structure
|
|
data = {
|
|
"original_entry": {
|
|
"name": name,
|
|
"source": "Wikidata SPARQL import",
|
|
"wikidata_id": qid,
|
|
},
|
|
"processing_timestamp": timestamp,
|
|
"ghcid": {
|
|
"ghcid_current": ghcid_current,
|
|
"ghcid_uuid": ghcid_uuid,
|
|
"ghcid_uuid_sha256": ghcid_uuid_sha256,
|
|
"ghcid_numeric": ghcid_numeric,
|
|
"record_id": str(__import__("uuid").uuid4()),
|
|
"generation_timestamp": timestamp,
|
|
"location_resolution": {
|
|
"method": "WIKIDATA_IMPORT",
|
|
"country_code": "AR",
|
|
"region_code": region_code,
|
|
"city_code": city_code,
|
|
"city_label": city_name or None,
|
|
},
|
|
},
|
|
"custodian_name": {
|
|
"claim_type": "custodian_name",
|
|
"claim_value": name,
|
|
"source_type": "wikidata",
|
|
"emic_name": name,
|
|
"name_language": "es",
|
|
},
|
|
"institution_type": {
|
|
"M": "MUSEUM",
|
|
"A": "ARCHIVE",
|
|
"G": "GALLERY",
|
|
"L": "LIBRARY",
|
|
}.get(inst_type, "UNKNOWN"),
|
|
"location": {
|
|
"country": "AR",
|
|
"region_code": region_code,
|
|
},
|
|
"wikidata_enrichment": {
|
|
"wikidata_entity_id": qid,
|
|
"enrichment_date": timestamp,
|
|
"source": "Wikidata SPARQL import",
|
|
},
|
|
}
|
|
|
|
# Add optional fields
|
|
if city_name:
|
|
data["location"]["city"] = city_name
|
|
|
|
if "latitude" in inst and "longitude" in inst:
|
|
data["location"]["latitude"] = inst["latitude"]
|
|
data["location"]["longitude"] = inst["longitude"]
|
|
|
|
if "website" in inst:
|
|
data["website"] = inst["website"]
|
|
|
|
if dry_run:
|
|
print(f" [DRY RUN] Would create: {filename}")
|
|
return filepath
|
|
|
|
# Write YAML file
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
|
|
|
return filepath
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Import Argentina institutions from Wikidata")
|
|
parser.add_argument("--dry-run", action="store_true", help="Don't create files, just report what would be done")
|
|
args = parser.parse_args()
|
|
|
|
print("=" * 80)
|
|
print("ARGENTINA WIKIDATA INSTITUTION IMPORT")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
# Get existing QIDs
|
|
print("📂 Scanning existing custodian files...")
|
|
existing_qids = get_existing_qids()
|
|
print(f" Found {len(existing_qids)} existing Wikidata QIDs")
|
|
print()
|
|
|
|
# Query Wikidata
|
|
print("🔍 Querying Wikidata for Argentina institutions...")
|
|
|
|
print(" Museums...", end=" ", flush=True)
|
|
museum_results = query_wikidata_museums()
|
|
print(f"found {len(museum_results)} raw results")
|
|
|
|
print(" Archives...", end=" ", flush=True)
|
|
archive_results = query_wikidata_archives()
|
|
print(f"found {len(archive_results)} raw results")
|
|
|
|
print(" Galleries...", end=" ", flush=True)
|
|
gallery_results = query_wikidata_galleries()
|
|
print(f"found {len(gallery_results)} raw results")
|
|
print()
|
|
|
|
# Parse and deduplicate results
|
|
institutions = {}
|
|
|
|
for binding in museum_results:
|
|
inst = parse_wikidata_result(binding, "M")
|
|
if inst and inst["qid"] not in institutions:
|
|
institutions[inst["qid"]] = inst
|
|
|
|
for binding in archive_results:
|
|
inst = parse_wikidata_result(binding, "A")
|
|
if inst and inst["qid"] not in institutions:
|
|
institutions[inst["qid"]] = inst
|
|
|
|
for binding in gallery_results:
|
|
inst = parse_wikidata_result(binding, "G")
|
|
if inst and inst["qid"] not in institutions:
|
|
institutions[inst["qid"]] = inst
|
|
|
|
print(f"📊 Total unique institutions: {len(institutions)}")
|
|
|
|
# Filter out existing
|
|
new_institutions = {qid: inst for qid, inst in institutions.items() if qid not in existing_qids}
|
|
print(f" After filtering existing: {len(new_institutions)} new institutions")
|
|
print()
|
|
|
|
# Create custodian files
|
|
stats = {
|
|
"created": 0,
|
|
"collisions": 0,
|
|
"errors": 0,
|
|
"by_type": {"M": 0, "A": 0, "G": 0},
|
|
}
|
|
|
|
if args.dry_run:
|
|
print("🔄 [DRY RUN] Would create the following files:")
|
|
else:
|
|
print("🔄 Creating custodian YAML files...")
|
|
print()
|
|
|
|
for qid, inst in sorted(new_institutions.items(), key=lambda x: x[1]["name"]):
|
|
name = inst["name"]
|
|
inst_type = inst["institution_type"]
|
|
city = inst.get("city", "Unknown")
|
|
|
|
print(f" [{inst_type}] {name}")
|
|
print(f" 📍 {city}, QID: {qid}")
|
|
|
|
filepath = create_custodian_yaml(inst, dry_run=args.dry_run)
|
|
|
|
if filepath:
|
|
if not args.dry_run:
|
|
print(f" ✅ Created: {filepath.name}")
|
|
stats["created"] += 1
|
|
stats["by_type"][inst_type] += 1
|
|
else:
|
|
stats["errors"] += 1
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 80)
|
|
print("IMPORT COMPLETE")
|
|
print("=" * 80)
|
|
print(f"✅ Created: {stats['created']} custodian files")
|
|
print(f" - Museums: {stats['by_type']['M']}")
|
|
print(f" - Archives: {stats['by_type']['A']}")
|
|
print(f" - Galleries: {stats['by_type']['G']}")
|
|
print(f"❌ Errors: {stats['errors']}")
|
|
print()
|
|
|
|
# Final count
|
|
if not args.dry_run:
|
|
final_count = len(list(CUSTODIAN_DIR.glob("AR-*.yaml")))
|
|
print(f"📁 Total Argentina custodian files: {final_count}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|