154 lines
5.4 KiB
Python
154 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Mark Argentina AR-XX-XXX files as VIRTUAL or ITINERANT based on their nature.
|
|
|
|
These institutions intentionally keep XX-XXX codes because they don't have
|
|
fixed physical locations.
|
|
|
|
Usage:
|
|
python scripts/mark_virtual_itinerant_ar.py [--dry-run]
|
|
"""
|
|
|
|
import yaml
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
|
|
CUSTODIAN_DIR = Path("data/custodian")
|
|
|
|
# Institutions that are digital/virtual or itinerant
|
|
# These should remain AR-XX-XXX because they have no fixed physical location
|
|
VIRTUAL_OR_ITINERANT = {
|
|
# Digital Platforms (VIRTUAL)
|
|
"AR-XX-XXX-A-ADP.yaml": {
|
|
"location_type": "VIRTUAL",
|
|
"reason": "National Open Data Portal - datos.gob.ar - web platform with no physical collection location"
|
|
},
|
|
"AR-XX-XXX-A-RAA.yaml": {
|
|
"location_type": "VIRTUAL",
|
|
"reason": "Repositorios de acceso abierto - digital repositories aggregator for Argentine open access materials"
|
|
},
|
|
"AR-XX-XXX-A-AMT.yaml": {
|
|
"location_type": "VIRTUAL",
|
|
"reason": "Archivo de la Memoria Trans - community archive primarily online (archivotrans.ar), preserves trans community history"
|
|
},
|
|
"AR-XX-XXX-A-PDDH1AVJML.yaml": {
|
|
"location_type": "VIRTUAL",
|
|
"reason": "UNESCO Memory of the World heritage - distributed archival network documenting human rights violations 1976-1983"
|
|
},
|
|
"AR-XX-XXX-A-MMD.yaml": {
|
|
"location_type": "VIRTUAL",
|
|
"reason": "Mapeo Mostri Disidente - digital mapping/archive project for LGBTQ+ dissident culture"
|
|
},
|
|
"AR-XX-XXX-A-PT.yaml": {
|
|
"location_type": "VIRTUAL",
|
|
"reason": "Potencia Tortillera - digital archive/collective (blogspot) for lesbian feminist activism and history"
|
|
},
|
|
|
|
# Itinerant Museums (no fixed location)
|
|
"AR-XX-XXX-M-MDI.yaml": {
|
|
"location_type": "ITINERANT",
|
|
"reason": "Museo del Dibujo y la Ilustración (MuDi) - museo itinerante (itinerant museum) with no fixed location, holds exhibitions at various venues"
|
|
},
|
|
}
|
|
|
|
|
|
def mark_file(filename: str, info: dict, dry_run: bool = False) -> bool:
|
|
"""Add location_type to a custodian file."""
|
|
filepath = CUSTODIAN_DIR / filename
|
|
|
|
if not filepath.exists():
|
|
print(f" ⚠️ File not found: {filename}")
|
|
return False
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
print(f"\n 📁 {filename}")
|
|
print(f" Name: {data.get('custodian_name', {}).get('claim_value', 'Unknown')}")
|
|
print(f" Type: {info['location_type']}")
|
|
print(f" Reason: {info['reason']}")
|
|
|
|
if dry_run:
|
|
print(f" [DRY RUN] Would add location_type: {info['location_type']}")
|
|
return True
|
|
|
|
# Add location_type to location block (create if needed)
|
|
if 'location' not in data:
|
|
data['location'] = {'country': 'AR'}
|
|
|
|
data['location']['location_type'] = info['location_type']
|
|
data['location']['location_type_reason'] = info['reason']
|
|
data['location']['location_type_date'] = datetime.now(timezone.utc).isoformat()
|
|
|
|
# Also add a note to ghcid resolution explaining why XX-XXX is intentional
|
|
if 'ghcid' in data and 'location_resolution' in data['ghcid']:
|
|
data['ghcid']['location_resolution']['intentional_xx_xxx'] = True
|
|
data['ghcid']['location_resolution']['xx_xxx_reason'] = info['reason']
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
yaml.dump(data, f, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
|
|
|
print(f" ✅ Marked as {info['location_type']}")
|
|
return True
|
|
|
|
|
|
def main():
|
|
import sys
|
|
|
|
dry_run = '--dry-run' in sys.argv
|
|
|
|
print("=" * 60)
|
|
print("Mark Virtual/Itinerant Argentina Institutions")
|
|
print("=" * 60)
|
|
|
|
if dry_run:
|
|
print("\n🔍 DRY RUN MODE - No files will be modified\n")
|
|
|
|
marked_count = 0
|
|
error_count = 0
|
|
|
|
for filename, info in VIRTUAL_OR_ITINERANT.items():
|
|
try:
|
|
if mark_file(filename, info, dry_run):
|
|
marked_count += 1
|
|
except Exception as e:
|
|
print(f"\n ❌ ERROR processing {filename}: {e}")
|
|
error_count += 1
|
|
|
|
print("\n" + "=" * 60)
|
|
print("SUMMARY")
|
|
print("=" * 60)
|
|
print(f" Total to mark: {len(VIRTUAL_OR_ITINERANT)}")
|
|
print(f" Successfully marked: {marked_count}")
|
|
print(f" Errors: {error_count}")
|
|
|
|
if dry_run:
|
|
print("\n Run without --dry-run to apply changes.")
|
|
|
|
# List remaining unknown AR-XX-XXX files
|
|
print("\n" + "=" * 60)
|
|
print("REMAINING AR-XX-XXX FILES (Need Research)")
|
|
print("=" * 60)
|
|
|
|
all_ar_xx = list(CUSTODIAN_DIR.glob("AR-XX-*.yaml"))
|
|
marked_files = set(VIRTUAL_OR_ITINERANT.keys())
|
|
|
|
remaining = [f for f in all_ar_xx if f.name not in marked_files]
|
|
|
|
if remaining:
|
|
for f in sorted(remaining):
|
|
# Load to get name
|
|
with open(f, 'r', encoding='utf-8') as fp:
|
|
data = yaml.safe_load(fp)
|
|
name = data.get('custodian_name', {}).get('claim_value', 'Unknown')
|
|
wikidata = data.get('original_entry', {}).get('wikidata_id', 'N/A')
|
|
print(f" • {f.name}")
|
|
print(f" Name: {name}")
|
|
print(f" Wikidata: {wikidata}")
|
|
print(f"\n Total needing research: {len(remaining)}")
|
|
else:
|
|
print(" None! All AR-XX-XXX files have been categorized.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|