glam/scripts/mark_virtual_itinerant_ar.py
2025-12-21 00:01:54 +01:00

154 lines
5.4 KiB
Python

#!/usr/bin/env python3
"""
Mark Argentina AR-XX-XXX files as VIRTUAL or ITINERANT based on their nature.
These institutions intentionally keep XX-XXX codes because they don't have
fixed physical locations.
Usage:
python scripts/mark_virtual_itinerant_ar.py [--dry-run]
"""
import yaml
from pathlib import Path
from datetime import datetime, timezone
CUSTODIAN_DIR = Path("data/custodian")
# Institutions that are digital/virtual or itinerant
# These should remain AR-XX-XXX because they have no fixed physical location
VIRTUAL_OR_ITINERANT = {
# Digital Platforms (VIRTUAL)
"AR-XX-XXX-A-ADP.yaml": {
"location_type": "VIRTUAL",
"reason": "National Open Data Portal - datos.gob.ar - web platform with no physical collection location"
},
"AR-XX-XXX-A-RAA.yaml": {
"location_type": "VIRTUAL",
"reason": "Repositorios de acceso abierto - digital repositories aggregator for Argentine open access materials"
},
"AR-XX-XXX-A-AMT.yaml": {
"location_type": "VIRTUAL",
"reason": "Archivo de la Memoria Trans - community archive primarily online (archivotrans.ar), preserves trans community history"
},
"AR-XX-XXX-A-PDDH1AVJML.yaml": {
"location_type": "VIRTUAL",
"reason": "UNESCO Memory of the World heritage - distributed archival network documenting human rights violations 1976-1983"
},
"AR-XX-XXX-A-MMD.yaml": {
"location_type": "VIRTUAL",
"reason": "Mapeo Mostri Disidente - digital mapping/archive project for LGBTQ+ dissident culture"
},
"AR-XX-XXX-A-PT.yaml": {
"location_type": "VIRTUAL",
"reason": "Potencia Tortillera - digital archive/collective (blogspot) for lesbian feminist activism and history"
},
# Itinerant Museums (no fixed location)
"AR-XX-XXX-M-MDI.yaml": {
"location_type": "ITINERANT",
"reason": "Museo del Dibujo y la Ilustración (MuDi) - museo itinerante (itinerant museum) with no fixed location, holds exhibitions at various venues"
},
}
def mark_file(filename: str, info: dict, dry_run: bool = False) -> bool:
"""Add location_type to a custodian file."""
filepath = CUSTODIAN_DIR / filename
if not filepath.exists():
print(f" ⚠️ File not found: {filename}")
return False
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
print(f"\n 📁 {filename}")
print(f" Name: {data.get('custodian_name', {}).get('claim_value', 'Unknown')}")
print(f" Type: {info['location_type']}")
print(f" Reason: {info['reason']}")
if dry_run:
print(f" [DRY RUN] Would add location_type: {info['location_type']}")
return True
# Add location_type to location block (create if needed)
if 'location' not in data:
data['location'] = {'country': 'AR'}
data['location']['location_type'] = info['location_type']
data['location']['location_type_reason'] = info['reason']
data['location']['location_type_date'] = datetime.now(timezone.utc).isoformat()
# Also add a note to ghcid resolution explaining why XX-XXX is intentional
if 'ghcid' in data and 'location_resolution' in data['ghcid']:
data['ghcid']['location_resolution']['intentional_xx_xxx'] = True
data['ghcid']['location_resolution']['xx_xxx_reason'] = info['reason']
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f, allow_unicode=True, sort_keys=False, default_flow_style=False)
print(f" ✅ Marked as {info['location_type']}")
return True
def main():
import sys
dry_run = '--dry-run' in sys.argv
print("=" * 60)
print("Mark Virtual/Itinerant Argentina Institutions")
print("=" * 60)
if dry_run:
print("\n🔍 DRY RUN MODE - No files will be modified\n")
marked_count = 0
error_count = 0
for filename, info in VIRTUAL_OR_ITINERANT.items():
try:
if mark_file(filename, info, dry_run):
marked_count += 1
except Exception as e:
print(f"\n ❌ ERROR processing {filename}: {e}")
error_count += 1
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
print(f" Total to mark: {len(VIRTUAL_OR_ITINERANT)}")
print(f" Successfully marked: {marked_count}")
print(f" Errors: {error_count}")
if dry_run:
print("\n Run without --dry-run to apply changes.")
# List remaining unknown AR-XX-XXX files
print("\n" + "=" * 60)
print("REMAINING AR-XX-XXX FILES (Need Research)")
print("=" * 60)
all_ar_xx = list(CUSTODIAN_DIR.glob("AR-XX-*.yaml"))
marked_files = set(VIRTUAL_OR_ITINERANT.keys())
remaining = [f for f in all_ar_xx if f.name not in marked_files]
if remaining:
for f in sorted(remaining):
# Load to get name
with open(f, 'r', encoding='utf-8') as fp:
data = yaml.safe_load(fp)
name = data.get('custodian_name', {}).get('claim_value', 'Unknown')
wikidata = data.get('original_entry', {}).get('wikidata_id', 'N/A')
print(f"{f.name}")
print(f" Name: {name}")
print(f" Wikidata: {wikidata}")
print(f"\n Total needing research: {len(remaining)}")
else:
print(" None! All AR-XX-XXX files have been categorized.")
if __name__ == "__main__":
main()