208 lines
8.1 KiB
Python
208 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Final precise matching of LinkedIn custodians to existing YAML files.
|
|
Expanded manual mapping based on research.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
BASE_DIR = Path("/Users/kempersc/apps/glam")
|
|
CUSTODIAN_DIR = BASE_DIR / "data" / "custodian"
|
|
TO_PROFILE_FILE = CUSTODIAN_DIR / "person" / "affiliated" / "parsed" / "custodians_to_profile.json"
|
|
OUTPUT_FILE = CUSTODIAN_DIR / "person" / "affiliated" / "parsed" / "custodian_yaml_matches_final.json"
|
|
|
|
# Comprehensive manual mapping (LinkedIn slug -> YAML filename)
|
|
KNOWN_MAPPINGS = {
|
|
# Major Dutch institutions
|
|
"rijksmuseum": "NL-NH-AMS-M-RM.yaml",
|
|
"kb-nationale-bibliotheek": "NL-ZH-DHA-L-KB-kb_nationale_bibliotheek.yaml",
|
|
"van-gogh-museum": "NL-NH-AMS-M-GM-van_gogh_museum.yaml",
|
|
"nationaal-archief": "NL-ZH-DHA-A-NA.yaml",
|
|
"nederlands-instituut-voor-beeld-en-geluid": "NL-NH-HIL-M-NIBG.yaml",
|
|
"nederlands-openluchtmuseum": "NL-GE-ARN-M-NOM.yaml",
|
|
"stedelijk-museum-amsterdam": "NL-NH-AMS-M-SMA.yaml",
|
|
"eye-filmmuseum": "NL-NH-AMS-M-EFM-eye_filmmuseum.yaml",
|
|
"anne-frank-stichting": "NL-NH-AMS-M-AFH.yaml",
|
|
"amsterdam-museum": "NL-NH-AMS-M-AM.yaml",
|
|
"paleis-het-loo": "NL-GE-APE-M-NMPL.yaml",
|
|
"zuiderzeemuseum": "NL-NH-ENK-M-ZM.yaml",
|
|
"huygens-institute": "NL-NH-AMS-R-HI.yaml",
|
|
"niod-institute-for-war-holocaust-and-genocide-studies": "NL-NH-AMS-A-NIWHGS.yaml",
|
|
"kroller-muller-museum": "NL-GE-OTT-M-KMM.yaml",
|
|
"noord-hollands-archief": "NL-NH-HAA-A-NHA.yaml",
|
|
"allard-pierson": "NL-NH-AMS-M-AP.yaml",
|
|
"het-utrechts-archief": "NL-UT-UTR-A-UA.yaml",
|
|
"gelders-archief": "NL-GE-ARN-A-GA.yaml",
|
|
"collectie-overijssel": "NL-OV-ZWO-A-CO-collectie_overijssel.yaml",
|
|
"nieuwe-instituut": "NL-ZH-ROT-A-NI.yaml",
|
|
|
|
# More Dutch institutions
|
|
"museum-boijmans-van-beuningen": "NL-ZH-ROT-M-MBVB.yaml",
|
|
"mauritshuis": "NL-ZH-DHA-M-MH.yaml",
|
|
"museum-het-valkhof": "NL-GE-NIJ-M-MHV.yaml",
|
|
"museum-rembrandthuis": "NL-NH-AMS-M-MRH.yaml",
|
|
|
|
# Regional archives
|
|
"regionaal-archief-alkmaar": "NL-NH-ALK-A-RAA.yaml",
|
|
"regionaal-archief-tilburg": "NL-NB-TIL-A-RAT.yaml",
|
|
"regionaal-archief-rivierenland": "NL-GE-TIE-A-RAR.yaml",
|
|
"regionaal-archief-nijmegen": "NL-GE-NIJ-A-RAN.yaml",
|
|
"regionaal-archief-zuid-utrecht": "NL-UT-WOU-A-RAZU.yaml",
|
|
"waterlands-archief": "NL-NH-PUR-A-WA.yaml",
|
|
"west-brabants-archief": "NL-NB-BER-A-WBA.yaml",
|
|
"het-flevolands-archief": "NL-FL-LEL-A-FA.yaml",
|
|
"drents-archief": "NL-DR-ASS-A-DA.yaml",
|
|
"westfries-archief": "NL-NH-HOO-A-WFA.yaml",
|
|
|
|
# Other Dutch museums/institutions
|
|
"zeeuws-museum": "NL-ZE-MID-M-ZM.yaml",
|
|
"h-art-museum": "NL-NH-AMS-M-HM.yaml", # formerly Hermitage Amsterdam
|
|
"nationaal-militair-museum": "NL-UT-SOE-M-NMM.yaml",
|
|
"museum-bronbeek": "NL-GE-ARN-M-MB.yaml",
|
|
"airborne-museum": "NL-GE-OOS-M-AM.yaml",
|
|
"nemo-science-museum": "NL-NH-AMS-M-NEMO.yaml",
|
|
"naturalis-biodiversity-center": "NL-ZH-LEI-M-NBC.yaml",
|
|
"museum-arnhem": "NL-GE-ARN-M-MA.yaml",
|
|
"coda-apeldoorn": "NL-GE-APE-M-C.yaml",
|
|
|
|
# Libraries
|
|
"tu-delft-library": "NL-ZH-DEL-L-TUD.yaml",
|
|
"de-bblthk": "NL-GE-WAG-L-BBL.yaml",
|
|
|
|
# Other organizations
|
|
"raad-voor-cultuur": None, # Advisory council, not a custodian
|
|
"fonds-voor-cultuurparticipatie": None, # Fund, not a custodian
|
|
"stichting-kunst-cultuur": None, # Provincial support org
|
|
"cultuur-oost": None, # Provincial support org
|
|
"european-cultural-foundation": None, # Foundation, not a custodian
|
|
}
|
|
|
|
# International institutions (outside NL scope for now)
|
|
INTERNATIONAL = {
|
|
"national-library-of-australia",
|
|
"internet-archive",
|
|
"yad-vashem-the-world-holocaust-remembrance-center",
|
|
"caen-memorial",
|
|
"the-art-loss-register",
|
|
"arolsen-archives",
|
|
"centre-for-media-communication-and-information-sciences",
|
|
"culture-action-europe",
|
|
"advn-archief-voor-nationale-bewegingen",
|
|
}
|
|
|
|
|
|
def main():
|
|
print("Loading custodians to profile...")
|
|
with open(TO_PROFILE_FILE) as f:
|
|
to_profile_data = json.load(f)
|
|
|
|
custodians = to_profile_data['custodians']
|
|
print(f"Found {len(custodians)} custodians to match\n")
|
|
|
|
results = {
|
|
'matched': [],
|
|
'international': [],
|
|
'not_heritage_custodian': [],
|
|
'needs_yaml_creation': [],
|
|
'summary': {},
|
|
'generated_at': datetime.now().isoformat()
|
|
}
|
|
|
|
for custodian in custodians:
|
|
slug = custodian['slug']
|
|
name = custodian['name']
|
|
heritage_count = custodian['heritage_count']
|
|
|
|
# Check if international
|
|
if slug in INTERNATIONAL:
|
|
results['international'].append({
|
|
'custodian': custodian,
|
|
'note': 'International institution - outside NL scope'
|
|
})
|
|
continue
|
|
|
|
# Check known mappings
|
|
if slug in KNOWN_MAPPINGS:
|
|
yaml_file = KNOWN_MAPPINGS[slug]
|
|
if yaml_file is None:
|
|
results['not_heritage_custodian'].append({
|
|
'custodian': custodian,
|
|
'note': 'Organization exists but is not a heritage custodian (fund, council, etc.)'
|
|
})
|
|
else:
|
|
yaml_path = CUSTODIAN_DIR / yaml_file
|
|
if yaml_path.exists():
|
|
results['matched'].append({
|
|
'custodian': custodian,
|
|
'yaml_file': str(yaml_path),
|
|
'yaml_filename': yaml_file,
|
|
'match_method': 'manual_mapping'
|
|
})
|
|
else:
|
|
results['needs_yaml_creation'].append({
|
|
'custodian': custodian,
|
|
'expected_file': yaml_file,
|
|
'note': 'Expected YAML file does not exist'
|
|
})
|
|
else:
|
|
# Not in mapping - needs investigation or creation
|
|
results['needs_yaml_creation'].append({
|
|
'custodian': custodian,
|
|
'note': 'No mapping exists - needs YAML creation or research'
|
|
})
|
|
|
|
# Summary
|
|
results['summary'] = {
|
|
'total': len(custodians),
|
|
'matched_to_yaml': len(results['matched']),
|
|
'international': len(results['international']),
|
|
'not_heritage_custodian': len(results['not_heritage_custodian']),
|
|
'needs_yaml_creation': len(results['needs_yaml_creation'])
|
|
}
|
|
|
|
# Print results
|
|
print("=" * 70)
|
|
print("FINAL MATCHING RESULTS")
|
|
print("=" * 70)
|
|
print(f"Total custodians: {results['summary']['total']}")
|
|
print(f"Matched to existing YAML: {results['summary']['matched_to_yaml']}")
|
|
print(f"International (out of scope): {results['summary']['international']}")
|
|
print(f"Not heritage custodians: {results['summary']['not_heritage_custodian']}")
|
|
print(f"Needs YAML creation: {results['summary']['needs_yaml_creation']}")
|
|
|
|
print("\n" + "=" * 70)
|
|
print("MATCHED TO EXISTING YAML FILES")
|
|
print("=" * 70)
|
|
for i, m in enumerate(results['matched'], 1):
|
|
c = m['custodian']
|
|
print(f"{i:2}. {c['name'][:45]:<45} | {c['heritage_count']:>3} staff | {m['yaml_filename']}")
|
|
|
|
print("\n" + "=" * 70)
|
|
print("NEEDS YAML CREATION (Dutch heritage institutions)")
|
|
print("=" * 70)
|
|
for i, m in enumerate([x for x in results['needs_yaml_creation'] if x['custodian']['heritage_count'] > 0][:40], 1):
|
|
c = m['custodian']
|
|
loc = c.get('location', {})
|
|
city = loc.get('city', 'Unknown') if isinstance(loc, dict) else 'Unknown'
|
|
print(f"{i:2}. {c['name'][:45]:<45} | {c['heritage_count']:>3} staff | {city}")
|
|
|
|
print("\n" + "=" * 70)
|
|
print("INTERNATIONAL (outside current scope)")
|
|
print("=" * 70)
|
|
for i, m in enumerate(results['international'], 1):
|
|
c = m['custodian']
|
|
print(f"{i:2}. {c['name'][:50]:<50} | {c['heritage_count']:>3} staff")
|
|
|
|
# Save results
|
|
with open(OUTPUT_FILE, 'w') as f:
|
|
json.dump(results, f, indent=2, default=str)
|
|
print(f"\nResults saved to: {OUTPUT_FILE}")
|
|
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|