#!/usr/bin/env python3 """ Mark Argentina AR-XX-XXX files as VIRTUAL or ITINERANT based on their nature. These institutions intentionally keep XX-XXX codes because they don't have fixed physical locations. Usage: python scripts/mark_virtual_itinerant_ar.py [--dry-run] """ import yaml from pathlib import Path from datetime import datetime, timezone CUSTODIAN_DIR = Path("data/custodian") # Institutions that are digital/virtual or itinerant # These should remain AR-XX-XXX because they have no fixed physical location VIRTUAL_OR_ITINERANT = { # Digital Platforms (VIRTUAL) "AR-XX-XXX-A-ADP.yaml": { "location_type": "VIRTUAL", "reason": "National Open Data Portal - datos.gob.ar - web platform with no physical collection location" }, "AR-XX-XXX-A-RAA.yaml": { "location_type": "VIRTUAL", "reason": "Repositorios de acceso abierto - digital repositories aggregator for Argentine open access materials" }, "AR-XX-XXX-A-AMT.yaml": { "location_type": "VIRTUAL", "reason": "Archivo de la Memoria Trans - community archive primarily online (archivotrans.ar), preserves trans community history" }, "AR-XX-XXX-A-PDDH1AVJML.yaml": { "location_type": "VIRTUAL", "reason": "UNESCO Memory of the World heritage - distributed archival network documenting human rights violations 1976-1983" }, "AR-XX-XXX-A-MMD.yaml": { "location_type": "VIRTUAL", "reason": "Mapeo Mostri Disidente - digital mapping/archive project for LGBTQ+ dissident culture" }, "AR-XX-XXX-A-PT.yaml": { "location_type": "VIRTUAL", "reason": "Potencia Tortillera - digital archive/collective (blogspot) for lesbian feminist activism and history" }, # Itinerant Museums (no fixed location) "AR-XX-XXX-M-MDI.yaml": { "location_type": "ITINERANT", "reason": "Museo del Dibujo y la Ilustración (MuDi) - museo itinerante (itinerant museum) with no fixed location, holds exhibitions at various venues" }, } def mark_file(filename: str, info: dict, dry_run: bool = False) -> bool: """Add location_type to a custodian file.""" filepath = CUSTODIAN_DIR / filename if not filepath.exists(): print(f" ⚠️ File not found: {filename}") return False with open(filepath, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) print(f"\n 📁 {filename}") print(f" Name: {data.get('custodian_name', {}).get('claim_value', 'Unknown')}") print(f" Type: {info['location_type']}") print(f" Reason: {info['reason']}") if dry_run: print(f" [DRY RUN] Would add location_type: {info['location_type']}") return True # Add location_type to location block (create if needed) if 'location' not in data: data['location'] = {'country': 'AR'} data['location']['location_type'] = info['location_type'] data['location']['location_type_reason'] = info['reason'] data['location']['location_type_date'] = datetime.now(timezone.utc).isoformat() # Also add a note to ghcid resolution explaining why XX-XXX is intentional if 'ghcid' in data and 'location_resolution' in data['ghcid']: data['ghcid']['location_resolution']['intentional_xx_xxx'] = True data['ghcid']['location_resolution']['xx_xxx_reason'] = info['reason'] with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(data, f, allow_unicode=True, sort_keys=False, default_flow_style=False) print(f" ✅ Marked as {info['location_type']}") return True def main(): import sys dry_run = '--dry-run' in sys.argv print("=" * 60) print("Mark Virtual/Itinerant Argentina Institutions") print("=" * 60) if dry_run: print("\n🔍 DRY RUN MODE - No files will be modified\n") marked_count = 0 error_count = 0 for filename, info in VIRTUAL_OR_ITINERANT.items(): try: if mark_file(filename, info, dry_run): marked_count += 1 except Exception as e: print(f"\n ❌ ERROR processing {filename}: {e}") error_count += 1 print("\n" + "=" * 60) print("SUMMARY") print("=" * 60) print(f" Total to mark: {len(VIRTUAL_OR_ITINERANT)}") print(f" Successfully marked: {marked_count}") print(f" Errors: {error_count}") if dry_run: print("\n Run without --dry-run to apply changes.") # List remaining unknown AR-XX-XXX files print("\n" + "=" * 60) print("REMAINING AR-XX-XXX FILES (Need Research)") print("=" * 60) all_ar_xx = list(CUSTODIAN_DIR.glob("AR-XX-*.yaml")) marked_files = set(VIRTUAL_OR_ITINERANT.keys()) remaining = [f for f in all_ar_xx if f.name not in marked_files] if remaining: for f in sorted(remaining): # Load to get name with open(f, 'r', encoding='utf-8') as fp: data = yaml.safe_load(fp) name = data.get('custodian_name', {}).get('claim_value', 'Unknown') wikidata = data.get('original_entry', {}).get('wikidata_id', 'N/A') print(f" • {f.name}") print(f" Name: {name}") print(f" Wikidata: {wikidata}") print(f"\n Total needing research: {len(remaining)}") else: print(" None! All AR-XX-XXX files have been categorized.") if __name__ == "__main__": main()