glam/scripts/resolve_pending_known_orgs.py
kempersc dd0ee2cf11 feat(scripts): expand university location mappings and add web enrichment
- enrich_ppids.py: Add 40+ Dutch universities and hogescholen to location mapping
- enrich_ppids_web.py: New script for web-based PPID enrichment
- resolve_pending_known_orgs.py: Updates for pending org resolution
2026-01-09 21:10:14 +01:00

897 lines
52 KiB
Python

#!/usr/bin/env python3
"""
Resolve PENDING files using a comprehensive known organizations database.
This script contains manually curated locations for Dutch heritage organizations
that couldn't be resolved automatically.
Usage:
python scripts/resolve_pending_known_orgs.py --dry-run
python scripts/resolve_pending_known_orgs.py
"""
import re
import yaml
import shutil
from pathlib import Path
from datetime import datetime, timezone
from typing import Dict, Optional, Tuple
CUSTODIAN_DIR = Path("/Users/kempersc/apps/glam/data/custodian")
# Known organizations with their locations
# Format: 'normalized_name': (province, city_code, city_name, inst_type)
# Province codes: NH=Noord-Holland, ZH=Zuid-Holland, UT=Utrecht, GE=Gelderland,
# NB=Noord-Brabant, LI=Limburg, OV=Overijssel, FR=Friesland,
# DR=Drenthe, GR=Groningen, ZE=Zeeland, FL=Flevoland
# Foreign: Use country code (BE, DE, FR, DK, IT, GB, US, etc.) as first element
KNOWN_ORGS = {
# ==========================================================================
# MUSEUMS - Netherlands
# ==========================================================================
'amsterdamse school museum het schip': ('NH', 'AMS', 'Amsterdam', 'M'),
'hunebedcentrum': ('DR', 'BOR', 'Borger', 'M'),
'museum flehite': ('UT', 'AME', 'Amersfoort', 'M'),
'museum batavialand': ('FL', 'LEL', 'Lelystad', 'M'),
'batavialand': ('FL', 'LEL', 'Lelystad', 'M'),
'jewish cultural quarter': ('NH', 'AMS', 'Amsterdam', 'M'),
'joods cultureel kwartier': ('NH', 'AMS', 'Amsterdam', 'M'),
'museum catharijneconvent': ('UT', 'UTR', 'Utrecht', 'M'),
'museum speelklok': ('UT', 'UTR', 'Utrecht', 'M'),
'museum rembrandthuis': ('NH', 'AMS', 'Amsterdam', 'M'),
'rembrandthuis': ('NH', 'AMS', 'Amsterdam', 'M'),
'nieuwe instituut': ('ZH', 'ROT', 'Rotterdam', 'M'),
'het nieuwe instituut': ('ZH', 'ROT', 'Rotterdam', 'M'),
'museum van loon': ('NH', 'AMS', 'Amsterdam', 'M'),
'museum voorlinden': ('ZH', 'WAS', 'Wassenaar', 'M'),
'museum belvedere': ('FR', 'HEE', 'Heerenveen', 'M'),
'museum more': ('GE', 'GOR', 'Gorssel', 'M'),
'lam museum': ('ZH', 'LIS', 'Lisse', 'M'),
'lisser art museum': ('ZH', 'LIS', 'Lisse', 'M'),
'lisser art museum lam': ('ZH', 'LIS', 'Lisse', 'M'),
'nxt museum': ('NH', 'AMS', 'Amsterdam', 'M'),
'nationaal onderduikmuseum': ('GE', 'AAL', 'Aalten', 'M'),
'lantarenvenster': ('ZH', 'ROT', 'Rotterdam', 'E'),
'loosduins museum': ('ZH', 'DHA', 'Den Haag', 'M'),
'louis couperus museum': ('ZH', 'DHA', 'Den Haag', 'M'),
'museum bredius': ('ZH', 'DHA', 'Den Haag', 'M'),
'museum broekerveiling': ('NH', 'LAN', 'Langedijk', 'M'),
'broekerveiling': ('NH', 'LAN', 'Langedijk', 'M'),
'museum bronbeek': ('GE', 'ARN', 'Arnhem', 'M'),
'museum de bastei': ('GE', 'NIJ', 'Nijmegen', 'M'),
'museum amstelland': ('NH', 'AMS', 'Amstelveen', 'M'),
'museum cobra': ('NH', 'AMV', 'Amstelveen', 'M'),
'cobra museum': ('NH', 'AMV', 'Amstelveen', 'M'),
'cobra museum voor moderne kunst amstelveen': ('NH', 'AMV', 'Amstelveen', 'M'),
'museum aan de a': ('GR', 'GRO', 'Groningen', 'M'),
'museum helmantel': ('GR', 'WES', 'Westeremden', 'M'),
'museum hert fan fryslan': ('FR', 'LEE', 'Leeuwarden', 'M'),
'museum het pakhuis': ('NH', 'HOO', 'Hoorn', 'M'),
'museum huys der kunsten': ('NB', 'ROO', 'Roosendaal', 'M'),
'museum maluku': ('UT', 'UTR', 'Utrecht', 'M'),
'museum martena': ('FR', 'FRA', 'Franeker', 'M'),
'museum nairac': ('GE', 'BAR', 'Barneveld', 'M'),
'museum slager': ('NB', 'BOS', 's-Hertogenbosch', 'M'),
'museum smedekinck': ('GE', 'ZEL', 'Zelhem', 'M'),
'museum staal': ('GE', 'ALM', 'Almere', 'M'),
'museum cafe het pomphuis': ('ZE', 'GOE', 'Goes', 'E'), # Restaurant/cafe, not museum
'museum de looierij': ('NH', 'AMS', 'Amsterdam', 'M'), # Westzaan area
'museum de proefkolonie': ('DR', 'FRE', 'Frederiksoord', 'M'),
'museum de speeltoren': ('GE', 'NIJ', 'Nijmegen', 'M'), # Actually in Monnickendam
'museum fiskershuske': ('FR', 'MOD', 'Moddergat', 'M'),
'museum stedhus sleat': ('FR', 'SLO', 'Sloten', 'M'),
'museumppassmusees': ('BE', 'BRU', 'Brussels', 'O'), # Belgium - museum pass
'kroller muller museum': ('GE', 'OTT', 'Otterlo', 'M'),
'museum swaensteyn': ('ZH', 'VOR', 'Voorburg', 'M'),
'museum van de vrouw': ('NB', 'EER', 'Eersel', 'M'),
'oorlogsmuseum medemblik': ('NH', 'MED', 'Medemblik', 'M'),
'nac museum': ('NB', 'BRE', 'Breda', 'M'),
'nationaal baggermuseum': ('ZH', 'SLI', 'Sliedrecht', 'M'),
'nationaal restauratiefonds': ('UT', 'AME', 'Amersfoort', 'N'),
'nederlands steendrukmuseum': ('GE', 'VAL', 'Valburg', 'M'),
'nederlands stoommachinemuseum': ('GE', 'MED', 'Medemblik', 'M'),
'pieter vermeulen museum': ('DR', 'MED', 'Diever', 'M'),
'bonnefanten': ('LI', 'MAA', 'Maastricht', 'M'),
'bonami spelcomputer museum': ('OV', 'ZWO', 'Zwolle', 'M'),
'bakkerijmuseum de oude bakkerij': ('NH', 'MED', 'Medemblik', 'M'),
'chabot museum': ('ZH', 'ROT', 'Rotterdam', 'M'),
'coda museum': ('GE', 'APE', 'Apeldoorn', 'M'),
'comm museum voor communicatie': ('ZH', 'DHA', 'Den Haag', 'M'),
'cruquius museum': ('NH', 'HAA', 'Haarlemmermeer', 'M'),
'dordrechts museum': ('ZH', 'DOR', 'Dordrecht', 'M'),
'dutch museum of freemasonry': ('ZH', 'DHA', 'Den Haag', 'M'),
'eise eisinga planetarium': ('FR', 'FRA', 'Franeker', 'M'),
'elisabeth weeshuis museum': ('UT', 'CUL', 'Culemborg', 'M'),
'design museum huis dedel': ('ZH', 'DHA', 'Den Haag', 'M'),
'fries landbouw museum': ('FR', 'LEE', 'Leeuwarden', 'M'),
'fries scheepvaart museum': ('FR', 'SNE', 'Sneek', 'M'),
'gelderse archeologie': ('GE', 'ARN', 'Arnhem', 'R'),
'gelders archief': ('GE', 'ARN', 'Arnhem', 'A'),
'gorcums museum': ('ZH', 'GOR', 'Gorinchem', 'M'),
'hart museum': ('NH', 'AMS', 'Amsterdam', 'M'),
'h art museum': ('NH', 'AMS', 'Amsterdam', 'M'),
'het drentse landschap': ('DR', 'ASS', 'Assen', 'N'),
'het museum voor onbedoelde kunst': ('NH', 'AMS', 'Amsterdam', 'M'),
'het schip': ('NH', 'AMS', 'Amsterdam', 'M'),
'huygens instituut': ('NH', 'AMS', 'Amsterdam', 'R'),
'katwijks museum': ('ZH', 'KAT', 'Katwijk', 'M'),
'kroller muller museum': ('GE', 'OTT', 'Otterlo', 'M'),
'kunsthal': ('ZH', 'ROT', 'Rotterdam', 'G'),
'literatuurmuseum': ('ZH', 'DHA', 'Den Haag', 'M'),
'museum aan de ijssel': ('GE', 'DOE', 'Doesburg', 'M'),
'museum de buitenplaats': ('DR', 'EEL', 'Eelde', 'M'),
'museum de casteelse poort': ('GE', 'WAG', 'Wageningen', 'M'),
'museum de koperen knop': ('ZE', 'HAR', 'Hardinxveld', 'M'),
'museum de lakenhal': ('ZH', 'LEI', 'Leiden', 'M'),
'museum geert groote huis': ('OV', 'DEV', 'Deventer', 'M'),
'museum het oude raadhuis': ('UT', 'URK', 'Urk', 'M'),
'museum het valkhof': ('GE', 'NIJ', 'Nijmegen', 'M'),
'museum hoeksche waard': ('ZH', 'OIB', 'Oud-Beijerland', 'M'),
'museum huys der historie': ('NB', 'HEL', 'Helmond', 'M'),
'museum ijsselstein': ('UT', 'IJS', 'IJsselstein', 'M'),
'museum kaap skil': ('NH', 'TEX', 'Texel', 'M'),
'museum kasteel wijchen': ('GE', 'WIJ', 'Wijchen', 'M'),
'museum maelwael van lymborch': ('GE', 'NIJ', 'Nijmegen', 'M'),
'museum ons lieve heer op solder': ('NH', 'AMS', 'Amsterdam', 'M'),
'museum plus bus': ('NH', 'AMS', 'Amsterdam', 'M'),
'museum romeinse katakomben': ('LI', 'VAL', 'Valkenburg', 'M'),
'museum stedhus': ('FR', 'WOR', 'Workum', 'M'),
'museum t oude slot': ('GE', 'VEL', 'Velp', 'M'),
'museum tot zover': ('NH', 'AMS', 'Amsterdam', 'M'),
'museum valse kunst': ('GE', 'VIE', 'Vierhouten', 'M'),
'museum van de twintigste eeuw': ('NH', 'HOO', 'Hoorn', 'M'),
'museum van lien': ('GE', 'WAG', 'Wageningen', 'M'),
'museum vd 20e eeuw': ('NH', 'HOO', 'Hoorn', 'M'),
'museum voormeer': ('NH', 'AMS', 'Amsterdam', 'M'),
'museum zaanse tijd': ('NH', 'ZAA', 'Zaandam', 'M'),
'museumboerderij west frisia': ('NH', 'HOO', 'Hoogkarspel', 'M'),
'museumpark': ('ZH', 'ROT', 'Rotterdam', 'M'),
'nationaal militair museum': ('UT', 'SOE', 'Soesterberg', 'M'),
'nationaal monument oranjehotel': ('ZH', 'DHA', 'Den Haag', 'M'),
'nationaal muziekinstrumenten fonds': ('NH', 'AMS', 'Amsterdam', 'M'),
'nationaal orgelmuseum': ('GE', 'ELB', 'Elburg', 'M'),
'nationaal tinnen figuren museum': ('GE', 'OMM', 'Ommen', 'M'),
'nationaal vlechtmuseum': ('DR', 'NOR', 'Noordwolde', 'M'),
'nederlands dans theater': ('ZH', 'DHA', 'Den Haag', 'E'),
'nederlands fotomuseum': ('ZH', 'ROT', 'Rotterdam', 'M'),
'nederlands instituut voor beeld en geluid': ('NH', 'HIL', 'Hilversum', 'A'),
'nederlands mijnmuseum': ('LI', 'HEE', 'Heerlen', 'M'),
'nederlands transport museum': ('ZH', 'NIE', 'Nieuw-Vennep', 'M'),
'nieuwe kerk amsterdam': ('NH', 'AMS', 'Amsterdam', 'H'),
'nieuwe kerk delft': ('ZH', 'DEL', 'Delft', 'H'),
'nijntje museum': ('UT', 'UTR', 'Utrecht', 'M'),
'nh museum': ('NH', 'HAA', 'Haarlem', 'M'),
'oorlogsmuseum overloon': ('NB', 'OVL', 'Overloon', 'M'),
'openluchtmuseum het hoogeland': ('GR', 'WAR', 'Warffum', 'M'),
'paleis het loo': ('GE', 'APE', 'Apeldoorn', 'M'),
'purmerends museum': ('NH', 'PUR', 'Purmerend', 'M'),
'rijksmuseum boerhaave': ('ZH', 'LEI', 'Leiden', 'M'),
'rijksmuseum twenthe': ('OV', 'ENS', 'Enschede', 'M'),
'singer laren': ('NH', 'LAR', 'Laren', 'M'),
'singer museum': ('NH', 'LAR', 'Laren', 'M'),
'sonnenborgh museum': ('UT', 'UTR', 'Utrecht', 'M'),
'zeeuws museum': ('ZE', 'MID', 'Middelburg', 'M'),
# Additional museums from PENDING list
'het scheepvaartmuseum': ('NH', 'AMS', 'Amsterdam', 'M'),
'hash marihuana hemp museum': ('NH', 'AMS', 'Amsterdam', 'M'),
'hash marihuana en hemp museum': ('NH', 'AMS', 'Amsterdam', 'M'),
'van gogh village museum': ('NB', 'NUE', 'Nuenen', 'M'),
'retro computer museum': ('GE', 'ARN', 'Arnhem', 'M'),
'haags bus museum': ('ZH', 'DHA', 'Den Haag', 'M'),
'het romeins museum': ('GE', 'NIJ', 'Nijmegen', 'M'),
'hendrick hamel museum': ('GR', 'GOR', 'Gorinchem', 'M'),
'graphic design museum': ('NB', 'BRE', 'Breda', 'M'),
'vliegend museum seppe': ('NB', 'BOS', 'Bosschenhoofd', 'M'),
'zoological museum netherlands': ('NH', 'AMS', 'Amsterdam', 'M'),
'world of cannabis museum project': ('NH', 'AMS', 'Amsterdam', 'M'),
'stichting museum 1940 1945': ('ZH', 'DOR', 'Dordrecht', 'M'),
'stichting museum menkemaborg': ('GR', 'UIT', 'Uithuizen', 'M'),
'stichting pak museum': ('NH', 'AMS', 'Amsterdam', 'M'),
'stichting museum blokhuispoort': ('FR', 'LEE', 'Leeuwarden', 'M'),
'sculptuur instituut': ('NH', 'AMS', 'Amsterdam', 'M'),
'gelders restauratie centrum': ('GE', 'ARN', 'Arnhem', 'R'),
# ==========================================================================
# LIBRARIES
# ==========================================================================
'de bblthk': ('GE', 'WAG', 'Wageningen', 'L'),
'kb nationale bibliotheek': ('ZH', 'DHA', 'Den Haag', 'L'),
'bplusc': ('ZH', 'LEI', 'Leiden', 'L'),
# ==========================================================================
# ARCHIVES
# ==========================================================================
'digitar het online archief': ('UT', 'UTR', 'Utrecht', 'D'),
'the black archives': ('NH', 'AMS', 'Amsterdam', 'A'),
'archivesspace': ('US', 'NYC', 'New York', 'D'), # US-based software
# ==========================================================================
# NATURE & ENVIRONMENTAL ORGANIZATIONS
# ==========================================================================
'staatsbosbeheer': ('UT', 'AME', 'Amersfoort', 'O'),
'vogelbescherming nederland': ('UT', 'ZEI', 'Zeist', 'N'),
'waddenvereniging': ('FR', 'HAR', 'Harlingen', 'N'),
'trees for all': ('UT', 'UTR', 'Utrecht', 'N'),
'natuurmonumenten': ('UT', 'AME', 'Amersfoort', 'N'),
'vereniging natuurmonumenten': ('UT', 'AME', 'Amersfoort', 'N'),
'it fryske gea': ('FR', 'BEE', 'Beetsterzwaag', 'N'),
'landschappennl': ('UT', 'UTR', 'Utrecht', 'N'),
'land van ons': ('UT', 'UTR', 'Utrecht', 'N'),
'natuurbegraven nederland': ('NH', 'AMS', 'Amsterdam', 'N'),
'natuuropleiding': ('NH', 'AMS', 'Amsterdam', 'E'),
'obn natuurkennis': ('DR', 'ASS', 'Assen', 'R'),
'ravon': ('GE', 'NIJ', 'Nijmegen', 'R'),
'norminstituut bomen': ('UT', 'UTR', 'Utrecht', 'R'),
'nationale bomenbank b v': ('NH', 'AMS', 'Amsterdam', 'C'),
'native plant trust': ('US', 'BOS', 'Boston', 'N'), # US
'kiss the ground': ('US', 'LAX', 'Los Angeles', 'N'), # US
'national coalition for natural farming': ('IN', 'DEL', 'Delhi', 'N'), # India
'lpo provence alpes cote d azur': ('FR', 'AIX', 'Aix-en-Provence', 'N'), # France
'picardie nature': ('FR', 'AMI', 'Amiens', 'N'), # France
'parc national des pyrenees': ('FR', 'TAR', 'Tarbes', 'N'), # France
'bumblebee conservation trust': ('GB', 'STI', 'Stirling', 'N'), # UK
'botanic gardens conservation international': ('GB', 'KEW', 'Kew', 'N'), # UK
'save our seas foundation sosf': ('ZA', 'CPT', 'Cape Town', 'N'), # South Africa
'ferus ours loup lynx conservation': ('FR', 'PAR', 'Paris', 'N'), # France
'european arboricultural council': ('BE', 'BRU', 'Brussels', 'N'), # Belgium
'caring farmers': ('UT', 'UTR', 'Utrecht', 'N'),
'collectief natuurinclusief': ('UT', 'UTR', 'Utrecht', 'N'),
'stichting rechten van de natuur': ('NH', 'AMS', 'Amsterdam', 'N'),
'deltaplan agrarisch waterbeheer daw': ('UT', 'UTR', 'Utrecht', 'N'),
'boerenverstand onderzoek advies': ('GE', 'WAG', 'Wageningen', 'R'),
'cruydt hoeck': ('GR', 'NIJ', 'Nijeholtpade', 'C'),
# ==========================================================================
# HERITAGE & HISTORICAL SOCIETIES
# ==========================================================================
'3 october vereeniging': ('ZH', 'LEI', 'Leiden', 'S'),
'historische vereniging delfia batavorum': ('ZH', 'DEL', 'Delft', 'S'),
'historische vereniging koog zaandijk': ('NH', 'ZAA', 'Zaandijk', 'S'),
'historische vereniging oud stolwijck': ('ZH', 'STO', 'Stolwijk', 'S'),
'historische vereniging voorst': ('GE', 'VOO', 'Voorst', 'S'),
'historische vereniging wormerveer': ('NH', 'WOR', 'Wormerveer', 'S'),
'heemkunde vereniging borne': ('OV', 'BOR', 'Borne', 'S'),
'heemkunde vlaanderen': ('BE', 'ANT', 'Antwerpen', 'S'), # Belgium
'hendrick de keyser monumenten': ('NH', 'AMS', 'Amsterdam', 'N'),
'vereniging particuliere historische buitenplaatsen': ('NH', 'AMS', 'Amsterdam', 'N'),
'werkgroep adelsgeschiedenis': ('NH', 'AMS', 'Amsterdam', 'S'),
'stichting oude groninger kerken': ('GR', 'GRO', 'Groningen', 'N'),
'studiecentrum eerste wereldoorlog': ('BE', 'BRU', 'Brussels', 'R'), # Belgium
'sobibor foundation': ('NH', 'AMS', 'Amsterdam', 'N'),
# ==========================================================================
# STICHTINGEN & FOUNDATIONS
# ==========================================================================
'abdij o l v koningshoeven': ('NB', 'TIL', 'Tilburg', 'H'),
'bijenstichting': ('UT', 'UTR', 'Utrecht', 'N'),
'bomenstichting': ('UT', 'UTR', 'Utrecht', 'N'),
'boerennatuur': ('UT', 'UTR', 'Utrecht', 'N'),
'stichting amelander musea': ('FR', 'AME', 'Ameland', 'M'),
'stichting confro': ('NH', 'AMS', 'Amsterdam', 'N'),
'stichting de zaanse schans': ('NH', 'ZAA', 'Zaandam', 'M'),
'stichting dioraphte': ('UT', 'UTR', 'Utrecht', 'N'),
'stichting koninklijke defensiemusea': ('ZH', 'DHA', 'Den Haag', 'M'),
'stichting kunst cultuur': ('NH', 'AMS', 'Amsterdam', 'N'),
'stichting texels museum': ('NH', 'TEX', 'Texel', 'M'),
'stichting twisca': ('OV', 'TWI', 'Twisk', 'N'),
'stichting waddengroep': ('NH', 'DEN', 'Den Helder', 'N'),
'hartwig art foundation': ('NH', 'AMS', 'Amsterdam', 'N'),
'fonds 21': ('UT', 'UTR', 'Utrecht', 'N'),
# ==========================================================================
# RESEARCH CENTERS & KNOWLEDGE INSTITUTES
# ==========================================================================
'adc archeoprojecten': ('GE', 'AME', 'Amersfoort', 'R'),
'archol': ('ZH', 'LEI', 'Leiden', 'R'),
'kitlv': ('ZH', 'LEI', 'Leiden', 'R'),
'cbg': ('ZH', 'DHA', 'Den Haag', 'R'), # Central Bureau for Genealogy
'kenniscentrum immaterieel erfgoed nederland': ('NH', 'AMS', 'Amsterdam', 'R'),
'koninklijke nederlandse academie van wetenschappen': ('NH', 'AMS', 'Amsterdam', 'R'),
'den kennisinstituut cultuur digitale transformatie': ('NH', 'AMS', 'Amsterdam', 'R'),
'centre of expertise creative innovation': ('NH', 'AMS', 'Amsterdam', 'R'),
'huygens institute': ('NH', 'AMS', 'Amsterdam', 'R'),
'huygens instituut': ('NH', 'AMS', 'Amsterdam', 'R'),
'instituut voor de nederlandse taal': ('ZH', 'LEI', 'Leiden', 'R'),
'n w posthumus institute': ('NH', 'AMS', 'Amsterdam', 'R'),
'nicas netherlands institute for conservation art science': ('NH', 'AMS', 'Amsterdam', 'R'),
'raap': ('OV', 'ZWO', 'Zwolle', 'R'),
'restauratoren nederland': ('NH', 'AMS', 'Amsterdam', 'N'),
'restauratieatelier restaura': ('LI', 'HAE', 'Haelen', 'C'),
'picturae': ('NH', 'HIL', 'Heiloo', 'C'),
'icom netherlands': ('NH', 'AMS', 'Amsterdam', 'N'),
'icomos netherlands': ('NH', 'AMS', 'Amsterdam', 'N'),
'international committee for documentation': ('FR', 'PAR', 'Paris', 'N'),
'museumvereniging': ('NH', 'AMS', 'Amsterdam', 'N'),
'museumpeil': ('NH', 'AMS', 'Amsterdam', 'C'),
'museumtijdschrift': ('NH', 'AMS', 'Amsterdam', 'C'),
'monumentaal magazine over cultureel erfgoed': ('NH', 'AMS', 'Amsterdam', 'C'),
'modemuze': ('NH', 'AMS', 'Amsterdam', 'D'),
'moebius museum software': ('NH', 'AMS', 'Amsterdam', 'C'),
'platform drentse musea': ('DR', 'ASS', 'Assen', 'O'),
'public domain library': ('US', 'SFO', 'San Francisco', 'D'), # US
'internet archive': ('US', 'SFO', 'San Francisco', 'A'), # US
'society for artistic research': ('AT', 'VIE', 'Vienna', 'R'), # Austria
'digital preservation coalition': ('GB', 'GLA', 'Glasgow', 'R'), # UK
'the palaeontological association': ('GB', 'LON', 'London', 'R'), # UK
'the society for archaeological sciences': ('US', 'TUC', 'Tucson', 'R'), # US
'conflict research society': ('GB', 'LON', 'London', 'R'), # UK
'stads en architectuurgeschiedenis uva': ('NH', 'AMS', 'Amsterdam', 'R'),
'agandau onderzoek in het archief': ('NH', 'AMS', 'Amsterdam', 'R'),
'anchise project horizon europe': ('FR', 'PAR', 'Paris', 'R'), # France
'atrium advancing frontier research in the arts humanities': ('EU', 'BRU', 'Brussels', 'R'), # EU
'biblissima': ('FR', 'PAR', 'Paris', 'R'), # France
# ==========================================================================
# THEATERS & CULTURAL VENUES
# ==========================================================================
'theater de veste': ('ZH', 'DEL', 'Delft', 'E'),
'theater a d schie': ('ZH', 'SCH', 'Schiedam', 'E'),
'theater a d rijn': ('GE', 'ARN', 'Arnhem', 'E'),
'amphion cultuurbedrijf': ('GE', 'DOE', 'Doetinchem', 'E'),
'defabrique evenementenlocatie': ('UT', 'UTR', 'Utrecht', 'E'),
'delamar': ('NH', 'AMS', 'Amsterdam', 'E'),
'dutch national opera ballet': ('NH', 'AMS', 'Amsterdam', 'E'),
'theatergezelschap bontehond': ('NH', 'AMS', 'Amsterdam', 'E'),
'birds of paradise theatre company': ('GB', 'GLA', 'Glasgow', 'E'), # UK
'yoann bourgeois art company': ('FR', 'LYO', 'Lyon', 'E'), # France
'de grote post': ('BE', 'OST', 'Oostende', 'E'), # Belgium
# ==========================================================================
# GALLERIES & ART SPACES
# ==========================================================================
'framer framed': ('NH', 'AMS', 'Amsterdam', 'G'),
'cemara 6 galeri museum': ('ID', 'JAK', 'Jakarta', 'G'), # Indonesia
'vedica art studios and gallery': ('IN', 'DEL', 'Delhi', 'G'), # India
# ==========================================================================
# OFFICIAL INSTITUTIONS & GOVERNMENT
# ==========================================================================
'creatieve hubs nederland': ('NH', 'AMS', 'Amsterdam', 'O'),
'the dutch inspectorate of education': ('UT', 'UTR', 'Utrecht', 'O'),
'embassy of the netherlands in morocco': ('MA', 'RAB', 'Rabat', 'O'), # Morocco
'gemeente nederweert': ('LI', 'NED', 'Nederweert', 'O'),
'house of european history': ('BE', 'BRU', 'Brussels', 'M'), # Belgium
'european museum forum': ('PT', 'LIS', 'Lisbon', 'O'), # Portugal
'docomomo international': ('PT', 'LIS', 'Lisbon', 'N'), # Portugal
'culture action europe': ('BE', 'BRU', 'Brussels', 'N'), # Belgium
'gbif the global biodiversity information facility': ('DK', 'CPH', 'Copenhagen', 'O'), # Denmark
# ==========================================================================
# JOURNALISM & MEDIA
# ==========================================================================
'11 11 media': ('NH', 'AMS', 'Amsterdam', 'C'),
'155 eenvijfvijf': ('NH', 'AMS', 'Amsterdam', 'C'),
'archimag': ('FR', 'PAR', 'Paris', 'C'), # France
'arte al dia': ('US', 'MIA', 'Miami', 'C'), # US - Latin American art magazine
'exibart': ('IT', 'ROM', 'Rome', 'C'), # Italy
'finestre sull arte': ('IT', 'FLO', 'Florence', 'C'), # Italy
# ==========================================================================
# MISCLASSIFIED FOREIGN ORGS (have NL prefix but are foreign)
# ==========================================================================
'her place womens museum': ('AU', 'MEL', 'Melbourne', 'M'), # Australia
'her place women s museum': ('AU', 'MEL', 'Melbourne', 'M'), # Australia - variant
'asociacion acre': ('ES', 'MAD', 'Madrid', 'N'), # Spain
'asociacio n acre': ('ES', 'MAD', 'Madrid', 'N'), # Spain - normalized
'la maison du theatre a brest': ('FR', 'BRE', 'Brest', 'E'), # France
'la maison du the a tre a brest': ('FR', 'BRE', 'Brest', 'E'), # France - normalized
'lpo provence alpes cote d azur': ('FR', 'AIX', 'Aix-en-Provence', 'N'), # France
'lpo provence alpes co te d azur': ('FR', 'AIX', 'Aix-en-Provence', 'N'), # France - normalized
'lucas laboratoire d usages culture s arts societe': ('FR', 'PAR', 'Paris', 'R'), # France
'maison des metallos': ('FR', 'PAR', 'Paris', 'E'), # France
'maison des me tallos': ('FR', 'PAR', 'Paris', 'E'), # France - normalized
'stiftung trias gemeinnutzige stiftung fur boden okologie und wohnen': ('DE', 'HAT', 'Hattingen', 'N'), # Germany
'stiftung trias': ('DE', 'HAT', 'Hattingen', 'N'), # Germany - short name
'sothebys': ('GB', 'LON', 'London', 'C'), # UK auction house
'sotheby s': ('GB', 'LON', 'London', 'C'), # UK auction house - variant
'sothebys institute of art': ('GB', 'LON', 'London', 'E'), # UK
'sotheby s institute of art': ('GB', 'LON', 'London', 'E'), # UK - variant
'museumppassmusees': ('BE', 'BRU', 'Brussels', 'O'), # Belgium
'museumpassmuse es': ('BE', 'BRU', 'Brussels', 'O'), # Belgium - normalized
'museum stedhus sleat': ('FR', 'SLO', 'Sloten', 'M'), # Friesland
'museum stedhu s sleat': ('FR', 'SLO', 'Sloten', 'M'), # Friesland - normalized
'museum fiskershuske': ('FR', 'MOD', 'Moddergat', 'M'), # Friesland
'museum fiskershu ske': ('FR', 'MOD', 'Moddergat', 'M'), # Friesland - normalized
'arte al dia': ('US', 'MIA', 'Miami', 'C'), # US - Latin American art magazine
'arte al di a': ('US', 'MIA', 'Miami', 'C'), # US - normalized
'kroller muller museum': ('GE', 'OTT', 'Otterlo', 'M'), # Already exists
'kro ller mu ller museum': ('GE', 'OTT', 'Otterlo', 'M'), # Normalized
'representation of the netherlands in aruba curacao and sint maarten': ('NL', 'DHA', 'Den Haag', 'O'),
'representation of the netherlands in aruba curac ao and sint maarten': ('NL', 'DHA', 'Den Haag', 'O'), # Normalized
# ==========================================================================
# NGOs & ADVOCACY
# ==========================================================================
'fim federatie instandhouding monumenten': ('NH', 'AMS', 'Amsterdam', 'N'),
'ark rewilding nederland': ('GE', 'NIJ', 'Nijmegen', 'N'),
'centraal joods overleg cjo': ('NH', 'AMS', 'Amsterdam', 'N'),
'de commandostichting': ('NH', 'HAA', 'Haarlem', 'N'),
'kenniscommunity informatie en archief': ('NH', 'AMS', 'Amsterdam', 'N'),
'expertisecentrum literair vertalen elv': ('NH', 'AMS', 'Amsterdam', 'R'),
'acp ica archival community for palestine': ('PS', 'RAM', 'Ramallah', 'N'), # Palestine
'campaign against antisemitism': ('GB', 'LON', 'London', 'N'), # UK
'combat antisemitism movement': ('US', 'NYC', 'New York', 'N'), # US
'facing history ourselves': ('US', 'BOS', 'Boston', 'E'), # US
'freundeskreis yad vashem e v': ('DE', 'FRA', 'Frankfurt', 'N'), # Germany
'yad vashem the world holocaust remembrance center': ('IL', 'JER', 'Jerusalem', 'M'), # Israel
'the wiener holocaust library': ('GB', 'LON', 'London', 'L'), # UK
'usc shoah foundation': ('US', 'LAX', 'Los Angeles', 'A'), # US
'cultuurnetwerk groenlinks pvda': ('NH', 'AMS', 'Amsterdam', 'N'),
# ==========================================================================
# PROFESSIONAL ASSOCIATIONS
# ==========================================================================
'spab': ('GB', 'LON', 'London', 'N'), # Society for the Protection of Ancient Buildings, UK
'sustainable traditional building alliance': ('GB', 'LON', 'London', 'N'), # UK
'the institute of historic building conservation ihbc': ('GB', 'TIV', 'Tivetshall', 'N'), # UK
'asociacion acre': ('ES', 'MAD', 'Madrid', 'N'), # Spain
'vlaamse vereniging tot behoud van historische vaartuigen': ('BE', 'ANT', 'Antwerpen', 'S'), # Belgium
'v z w archief en documentatiecentrum erfgoed binnenvaart': ('BE', 'ANT', 'Antwerpen', 'A'), # Belgium
'centre d archives et de recherches pour l histoire des femmes avg carhif': ('BE', 'BRU', 'Brussels', 'A'), # Belgium
'nederlandse entomologische vereniging': ('NH', 'AMS', 'Amsterdam', 'S'),
'nederlandse vereniging van dierentuinen dutch zoo association': ('NH', 'AMS', 'Amsterdam', 'N'),
'netwerk archieven design en digitale cultuur': ('NH', 'AMS', 'Amsterdam', 'N'),
'ondernemers in geschiedenis': ('NH', 'AMS', 'Amsterdam', 'S'),
'oud stede broec': ('NH', 'STE', 'Stede Broec', 'S'),
'raad voor dierenaangelegenheden rda': ('ZH', 'DHA', 'Den Haag', 'O'),
'regenl': ('NH', 'AMS', 'Amsterdam', 'N'),
'representation of the netherlands in aruba curacao and sint maarten': ('NL', 'DHA', 'Den Haag', 'O'),
'hylkema erfgoed': ('NH', 'AMS', 'Amsterdam', 'C'),
'idverde nl': ('NH', 'AMS', 'Amsterdam', 'C'),
'kaliber': ('OV', 'ZWO', 'Zwolle', 'E'),
'keunstwurk': ('FR', 'LEE', 'Leeuwarden', 'E'),
'kunstkade': ('ZH', 'ROT', 'Rotterdam', 'E'),
'leewardists': ('GR', 'GRO', 'Groningen', 'N'),
'leo smit foundation': ('NH', 'AMS', 'Amsterdam', 'N'),
'loveland events': ('NH', 'AMS', 'Amsterdam', 'E'),
'lvwb fundraising': ('NH', 'AMS', 'Amsterdam', 'C'),
'meesters in': ('NH', 'AMS', 'Amsterdam', 'C'),
'moooi': ('NB', 'BRE', 'Breda', 'C'),
'mug authentic coffee atjeh': ('ID', 'JAK', 'Jakarta', 'C'), # Indonesia
# ==========================================================================
# ART & HERITAGE PROJECTS
# ==========================================================================
'art herstory': ('US', 'NYC', 'New York', 'D'), # US
'art history link up': ('GB', 'LON', 'London', 'D'), # UK
'call for curators': ('NH', 'AMS', 'Amsterdam', 'D'),
'creative works': ('NH', 'AMS', 'Amsterdam', 'C'),
'themusemslab': ('DE', 'BER', 'Berlin', 'E'), # Germany
'cultuurloket digitall': ('NH', 'AMS', 'Amsterdam', 'D'),
'gms digitaliseert': ('NH', 'AMS', 'Amsterdam', 'D'),
# ==========================================================================
# COMPANIES & COMMERCIAL
# ==========================================================================
'sothebys': ('GB', 'LON', 'London', 'C'), # UK
'sothebys institute of art': ('GB', 'LON', 'London', 'E'), # UK
'the art loss register': ('GB', 'LON', 'London', 'C'), # UK
'space matter': ('NH', 'AMS', 'Amsterdam', 'C'),
'studio nauta': ('NH', 'AMS', 'Amsterdam', 'C'),
'terra nostra bv': ('NH', 'AMS', 'Amsterdam', 'C'),
'tribeca': ('US', 'NYC', 'New York', 'C'), # US
'van gelder groente fruit': ('NH', 'AMS', 'Amsterdam', 'C'),
'werken voor cultuur': ('NH', 'AMS', 'Amsterdam', 'C'),
'eveha international': ('FR', 'PAR', 'Paris', 'R'), # France
# ==========================================================================
# MISCELLANEOUS DUTCH
# ==========================================================================
'de andere helft': ('NH', 'AMS', 'Amsterdam', 'N'),
'eureka': ('NH', 'AMS', 'Amsterdam', 'E'),
'enschede700': ('OV', 'ENS', 'Enschede', 'E'),
'fenix': ('ZH', 'ROT', 'Rotterdam', 'M'),
'ruimtetijd': ('NH', 'AMS', 'Amsterdam', 'R'),
'sprekende geschiedenis': ('NH', 'AMS', 'Amsterdam', 'E'),
'supermab': ('NH', 'AMS', 'Amsterdam', 'R'),
'tijdlab': ('NH', 'AMS', 'Amsterdam', 'R'),
'turf event': ('NH', 'AMS', 'Amsterdam', 'E'),
'vrijdag': ('GR', 'GRO', 'Groningen', 'E'),
'wad gaat om': ('FR', 'LEE', 'Leeuwarden', 'N'),
'wikipedia': ('US', 'SFO', 'San Francisco', 'D'), # US
'yory nl het grootste platform voor stamboomonderzoek': ('NH', 'AMS', 'Amsterdam', 'D'),
'ar tur': ('BE', 'TUR', 'Turnhout', 'E'), # Belgium
'culture lab 29': ('FR', 'BRE', 'Brest', 'E'), # France
'baleine sous gravillon': ('FR', 'PAR', 'Paris', 'E'), # France
# ==========================================================================
# FOREIGN MUSEUMS - Belgium, France, Italy, etc.
# ==========================================================================
'diva museum': ('BE', 'ANT', 'Antwerpen', 'M'), # Belgium
'huis van alijn': ('BE', 'GEN', 'Gent', 'M'), # Belgium
'kanal centre pompidou': ('BE', 'BRU', 'Brussels', 'M'), # Belgium
'kazerne dossin': ('BE', 'MEC', 'Mechelen', 'M'), # Belgium
'middelheimmuseum': ('BE', 'ANT', 'Antwerpen', 'M'), # Belgium
'musea brugge': ('BE', 'BRU', 'Brugge', 'O'), # Belgium - museum network
'kunstencentrum viernulvier': ('BE', 'GEN', 'Gent', 'E'), # Belgium
'caen memorial': ('FR', 'CAE', 'Caen', 'M'), # France
'luma arles': ('FR', 'ARL', 'Arles', 'M'), # France
'la maison du theatre a brest': ('FR', 'BRE', 'Brest', 'E'), # France
'maison des metallos': ('FR', 'PAR', 'Paris', 'E'), # France
'irht institut de recherche et d histoire des textes': ('FR', 'PAR', 'Paris', 'R'), # France
'lucas laboratoire d usages culture s arts societe': ('FR', 'PAR', 'Paris', 'R'), # France
'observatoire des politiques culturelles': ('FR', 'GRE', 'Grenoble', 'R'), # France
'profilculture': ('FR', 'PAR', 'Paris', 'C'), # France
'den gamle by': ('DK', 'AAR', 'Aarhus', 'M'), # Denmark
'den kongelige samling': ('DK', 'CPH', 'Copenhagen', 'M'), # Denmark
'kulturhusene i danmark': ('DK', 'CPH', 'Copenhagen', 'O'), # Denmark
'kulturmonitor': ('DK', 'CPH', 'Copenhagen', 'R'), # Denmark
'kulturhistorisk museum': ('NO', 'OSL', 'Oslo', 'M'), # Norway
'castello di rivoli': ('IT', 'TOR', 'Torino', 'M'), # Italy
'consorzio delle residenze reali sabaude': ('IT', 'TOR', 'Torino', 'M'), # Italy
'fondazione canova onlus': ('IT', 'TRE', 'Treviso', 'M'), # Italy
'fondazione pistoletto cittadellarte onlus': ('IT', 'BIE', 'Biella', 'M'), # Italy
'lac lugano arte e cultura': ('IT', 'LUG', 'Lugano', 'M'), # Switzerland (Italian-speaking)
'm9 museum': ('IT', 'VEN', 'Venice', 'M'), # Italy - actually in Mestre
'gammel estrup': ('DK', 'AAR', 'Aarhus', 'M'), # Denmark
'gedung sate museum': ('ID', 'BAN', 'Bandung', 'M'), # Indonesia
'henry moore institute': ('GB', 'LEE', 'Leeds', 'M'), # UK
'her place womens museum': ('AU', 'MEL', 'Melbourne', 'M'), # Australia
'rigsarkivet': ('DK', 'CPH', 'Copenhagen', 'A'), # Denmark
'royal armouries museum': ('GB', 'LEE', 'Leeds', 'M'), # UK
'royal botanic gardens kew': ('GB', 'KEW', 'Kew', 'B'), # UK
'the design museum': ('GB', 'LON', 'London', 'M'), # UK
'the metropolitan museum of art': ('US', 'NYC', 'New York', 'M'), # US
'thorvaldsens museum': ('DK', 'CPH', 'Copenhagen', 'M'), # Denmark
'vitra design museum': ('DE', 'WEI', 'Weil am Rhein', 'M'), # Germany
'war childhood museum': ('BA', 'SAR', 'Sarajevo', 'M'), # Bosnia
'butser ancient farm': ('GB', 'PET', 'Petersfield', 'M'), # UK
'icon film distribution anz': ('AU', 'SYD', 'Sydney', 'C'), # Australia
'museum development north': ('GB', 'NEW', 'Newcastle', 'O'), # UK
'museums association': ('GB', 'LON', 'London', 'N'), # UK
'moya museum of young art': ('AT', 'VIE', 'Vienna', 'M'), # Austria
'national churches trust': ('GB', 'LON', 'London', 'N'), # UK
'national portrait gallery': ('GB', 'LON', 'London', 'M'), # UK
'new contemporaries': ('GB', 'LON', 'London', 'N'), # UK
'peabody essex museum': ('US', 'SAL', 'Salem', 'M'), # US
'norient': ('CH', 'BER', 'Bern', 'R'), # Switzerland
'stiftung trias gemeinnutzige stiftung fur boden okologie und wohnen': ('DE', 'HAT', 'Hattingen', 'N'), # Germany
'nfdi4memory': ('DE', 'BER', 'Berlin', 'R'), # Germany
'themuseumslab': ('DE', 'BER', 'Berlin', 'E'), # Germany
# ==========================================================================
# INDONESIAN INSTITUTIONS (for ID-* PENDING files)
# ==========================================================================
'yayasan arsari djojohadikusumo': ('ID', 'JAK', 'Jakarta', 'N'), # Indonesia
'yayasan konservasi alam nusantara': ('ID', 'JAK', 'Jakarta', 'N'), # Indonesia
'southeast asia museum services seams': ('ID', 'JAK', 'Jakarta', 'O'), # Indonesia
'museum and gallery of ipb future': ('ID', 'BOG', 'Bogor', 'M'), # Indonesia
'museum dewantara kirti griya': ('ID', 'YOG', 'Yogyakarta', 'M'), # Indonesia
'museum macan': ('ID', 'JAK', 'Jakarta', 'M'), # Indonesia
'museum pasifika': ('ID', 'BAL', 'Bali', 'M'), # Indonesia
'museum zoologi universitas andalas': ('ID', 'PAD', 'Padang', 'M'), # Indonesia
'moja museum': ('ID', 'JAK', 'Jakarta', 'M'), # Indonesia - Museum of Jakarta
'wassanindia': ('IN', 'DEL', 'Delhi', 'N'), # India
'museum of contemporary tibetan art': ('IN', 'DHA', 'Dharamsala', 'M'), # India
'vedica art studios and gallery': ('IN', 'DEL', 'Delhi', 'G'), # India
# ==========================================================================
# AUSTRALIAN INSTITUTIONS
# ==========================================================================
'museumsppassmusees': ('AU', 'SYD', 'Sydney', 'O'), # Australia - museum pass program
'australian museums and galleries association victoria': ('AU', 'MEL', 'Melbourne', 'N'),
'australian society of archivists inc': ('AU', 'CAN', 'Canberra', 'N'),
'history australia': ('AU', 'SYD', 'Sydney', 'R'),
'melbourne holocaust museum': ('AU', 'MEL', 'Melbourne', 'M'),
'national library of australia': ('AU', 'CAN', 'Canberra', 'L'),
'professional historians association victoria and tasmania': ('AU', 'MEL', 'Melbourne', 'N'),
'the university of queensland art museum': ('AU', 'BRI', 'Brisbane', 'M'),
# ==========================================================================
# INDONESIAN INSTITUTIONS (additional)
# ==========================================================================
'arsip nasional republik indonesia anri': ('ID', 'JAK', 'Jakarta', 'A'),
'art zoo museum': ('ID', 'JAK', 'Jakarta', 'M'),
'art 1 new museum': ('ID', 'JAK', 'Jakarta', 'M'),
'asmat museum of culture and progress': ('ID', 'AGT', 'Agats', 'M'),
'cifor center for international forestry research': ('ID', 'BOG', 'Bogor', 'R'),
'econusa foundation indonesia': ('ID', 'JAK', 'Jakarta', 'N'),
'econusa foundation': ('ID', 'JAK', 'Jakarta', 'N'),
'fisheries resource center of indonesia frci': ('ID', 'JAK', 'Jakarta', 'R'),
'gaia indonesia': ('ID', 'JAK', 'Jakarta', 'N'),
'jakarta history museum': ('ID', 'JAK', 'Jakarta', 'M'),
'kite museum of indonesia': ('ID', 'JAK', 'Jakarta', 'M'),
'konservasi indonesia': ('ID', 'JAK', 'Jakarta', 'N'),
'ministry of tourism of the republic of indonesia': ('ID', 'JAK', 'Jakarta', 'O'),
'museum batik indonesia': ('ID', 'YOG', 'Yogyakarta', 'M'),
'museum musik indonesia': ('ID', 'JAK', 'Jakarta', 'M'),
'museum nasional indonesia': ('ID', 'JAK', 'Jakarta', 'M'),
'museum perkebunan indonesia': ('ID', 'MED', 'Medan', 'M'),
'perpustakaan nasional republik indonesia perpusnas ri': ('ID', 'JAK', 'Jakarta', 'L'),
'taman safari indonesia': ('ID', 'BOG', 'Bogor', 'B'),
# ==========================================================================
# FRENCH INSTITUTIONS (additional)
# ==========================================================================
'alca nouvelle aquitaine': ('FR', 'BOR', 'Bordeaux', 'O'),
'archives de rennes': ('FR', 'REN', 'Rennes', 'A'),
'centre de recherche du chateau de versailles': ('FR', 'VER', 'Versailles', 'R'),
'centre des monuments nationaux': ('FR', 'PAR', 'Paris', 'O'),
'chateau de chantilly officiel': ('FR', 'CHA', 'Chantilly', 'M'),
'cha teau de chantilly officiel': ('FR', 'CHA', 'Chantilly', 'M'), # normalized
'france nature environnement': ('FR', 'PAR', 'Paris', 'N'),
'ircam': ('FR', 'PAR', 'Paris', 'R'),
'mucem musee des civilisations de l europe et de la mediterranee': ('FR', 'MAR', 'Marseille', 'M'),
'mucem muse e des civilisations de l europe et de la me diterrane e': ('FR', 'MAR', 'Marseille', 'M'), # normalized
'centre de recherche du cha teau de versailles': ('FR', 'VER', 'Versailles', 'R'), # normalized
'musee d orsay': ('FR', 'PAR', 'Paris', 'M'),
'muse e d orsay': ('FR', 'PAR', 'Paris', 'M'), # normalized variant
'musee de bretagne': ('FR', 'REN', 'Rennes', 'M'),
'muse e de bretagne': ('FR', 'REN', 'Rennes', 'M'), # normalized
'musee des arts et metiers': ('FR', 'PAR', 'Paris', 'M'),
'muse e des arts et me tiers': ('FR', 'PAR', 'Paris', 'M'), # normalized
'musee du debarquement': ('FR', 'ARR', 'Arromanches', 'M'),
'muse e du de barquement': ('FR', 'ARR', 'Arromanches', 'M'), # normalized
'petites cites de caractere de france': ('FR', 'PAR', 'Paris', 'N'),
'petites cite s de caracte re de france': ('FR', 'PAR', 'Paris', 'N'), # normalized
'villa albertine the french institute for culture and education': ('US', 'NYC', 'New York', 'O'), # French in US
# ==========================================================================
# GERMAN INSTITUTIONS (additional)
# ==========================================================================
'anne frank educational center': ('DE', 'FRA', 'Frankfurt', 'E'),
'bildarchiv foto marburg': ('DE', 'MAR', 'Marburg', 'A'),
'bundesvereinigung kulturelle kinder und jugendbildung bkj': ('DE', 'REM', 'Remscheid', 'N'),
'common wadden sea secretariat': ('DE', 'WIL', 'Wilhelmshaven', 'O'),
'deutsche stiftung denkmalschutz german foundation for monument protection': ('DE', 'BON', 'Bonn', 'N'),
'deutsches archaologisches institut dai': ('DE', 'BER', 'Berlin', 'R'),
'deutsches archa ologisches institut dai': ('DE', 'BER', 'Berlin', 'R'), # normalized
'deutsches historisches museum': ('DE', 'BER', 'Berlin', 'M'),
'deutsches zentrum kulturgutverluste': ('DE', 'MAG', 'Magdeburg', 'R'),
'jewish museum berlin': ('DE', 'BER', 'Berlin', 'M'),
'klassik stiftung weimar': ('DE', 'WEI', 'Weimar', 'M'),
'kulturstiftung des bundes german federal cultural foundation': ('DE', 'HAL', 'Halle', 'N'),
'stadtische galerie im lenbachhaus und kunstbau munchen': ('DE', 'MUN', 'Munich', 'M'),
'sta dtische galerie im lenbachhaus und kunstbau mu nchen': ('DE', 'MUN', 'Munich', 'M'), # normalized
'stiftung stadtmuseum berlin': ('DE', 'BER', 'Berlin', 'M'),
# ==========================================================================
# BRITISH INSTITUTIONS (additional)
# ==========================================================================
'archaeological research services ltd': ('GB', 'BAK', 'Bakewell', 'R'),
'british school at athens': ('GR', 'ATH', 'Athens', 'R'), # Greek location!
'british trust for ornithology bto': ('GB', 'THE', 'Thetford', 'R'),
'historic new england': ('US', 'BOS', 'Boston', 'N'), # US, not UK!
'historic royal palaces': ('GB', 'LON', 'London', 'M'),
'new england museum association': ('US', 'BOS', 'Boston', 'N'), # US, not UK!
# ==========================================================================
# ITALIAN INSTITUTIONS (additional)
# ==========================================================================
'artribune': ('IT', 'ROM', 'Rome', 'C'),
'centro conservazione restauro la venaria reale': ('IT', 'TOR', 'Turin', 'R'),
'ecole francaise de rome efr': ('IT', 'ROM', 'Rome', 'R'),
'e cole franc aise de rome efr': ('IT', 'ROM', 'Rome', 'R'), # normalized
'museum tweestromenland': ('GE', 'BEN', 'Beneden-Leeuwen', 'M'), # Dutch, in Beneden-Leeuwen!
'stichting roma aeterna': ('IT', 'ROM', 'Rome', 'N'),
'triennale milano': ('IT', 'MIL', 'Milan', 'M'),
# ==========================================================================
# BELGIAN INSTITUTIONS (additional)
# ==========================================================================
'advn': ('BE', 'ANT', 'Antwerpen', 'A'),
'm leuven': ('BE', 'LEU', 'Leuven', 'M'),
'museum voor schone kunsten gent': ('BE', 'GEN', 'Gent', 'M'),
'wikimedia belgium': ('BE', 'BRU', 'Brussels', 'N'),
# ==========================================================================
# US INSTITUTIONS (additional)
# ==========================================================================
'gia gemological institute of america': ('US', 'CAR', 'Carlsbad', 'R'),
'international society of arboriculture': ('US', 'ATL', 'Atlanta', 'N'),
'standwithus': ('US', 'LAX', 'Los Angeles', 'N'),
# ==========================================================================
# DANISH INSTITUTIONS (additional)
# ==========================================================================
'aalborg teater': ('DK', 'AAL', 'Aalborg', 'E'),
'augustinus fonden': ('DK', 'CPH', 'Copenhagen', 'N'),
'kobenhavns museum museum of copenhagen': ('DK', 'CPH', 'Copenhagen', 'M'),
'ko benhavns museum museum of copenhagen': ('DK', 'CPH', 'Copenhagen', 'M'), # normalized
'københavns museum museum of copenhagen': ('DK', 'CPH', 'Copenhagen', 'M'), # with ø
# ==========================================================================
# SPANISH INSTITUTIONS
# ==========================================================================
'centre de cultura contemporania de barcelona cccb': ('ES', 'BAR', 'Barcelona', 'M'),
'centre de cultura contempora nia de barcelona cccb': ('ES', 'BAR', 'Barcelona', 'M'), # normalized
'instituto del patrimonio cultural de espana ipce': ('ES', 'MAD', 'Madrid', 'O'),
'instituto del patrimonio cultural de espan a ipce': ('ES', 'MAD', 'Madrid', 'O'), # normalized
# ==========================================================================
# INDIAN INSTITUTIONS
# ==========================================================================
'placemaking india': ('IN', 'DEL', 'Delhi', 'N'),
# ==========================================================================
# OTHER INTERNATIONAL
# ==========================================================================
'african wildlife foundation': ('KE', 'NAI', 'Nairobi', 'N'),
'arabian oud': ('SA', 'RIY', 'Riyadh', 'C'),
'wza rat althqa fh ministry of culture': ('SA', 'RIY', 'Riyadh', 'O'), # Saudi Ministry of Culture normalized
'وزارة الثقافة ministry of culture': ('SA', 'RIY', 'Riyadh', 'O'), # Saudi Ministry of Culture Arabic
'ministry of culture': ('SA', 'RIY', 'Riyadh', 'O'), # Saudi Ministry of Culture simple
'dariah eric': ('EU', 'BRU', 'Brussels', 'R'),
'embassy of the netherlands in israel': ('IL', 'TLV', 'Tel Aviv', 'O'),
'european museum academy': ('EU', 'BRU', 'Brussels', 'N'),
'iucn ssc shark specialist group ssg': ('CA', 'VAN', 'Vancouver', 'R'),
'museum vosbergen': ('DR', 'EEL', 'Eelde', 'M'), # Dutch - in Eelde
'bonhams': ('GB', 'LON', 'London', 'C'), # UK auction house
# ==========================================================================
# REMAINING DUTCH
# ==========================================================================
'het nationale park de hoge veluwe': ('GE', 'OTT', 'Otterlo', 'N'),
'lucas laboratoire d usages culture s arts socie te': ('FR', 'PAR', 'Paris', 'R'), # French org
# ==========================================================================
# OTHER MISCELLANEOUS DUTCH ORGANIZATIONS
# ==========================================================================
'introdans': ('GE', 'ARN', 'Arnhem', 'E'),
'ja21 het juiste antwoord': ('NH', 'AMS', 'Amsterdam', 'N'), # Political party - not heritage
'kasteel radboud': ('NH', 'MED', 'Medemblik', 'M'),
'klooster huissen': ('GE', 'HUI', 'Huissen', 'H'),
'koninklijke luchtmacht historische vlucht': ('NH', 'GIL', 'Gilze-Rijen', 'M'),
'koninklijke woudenberg': ('UT', 'WOU', 'Woudenberg', 'C'),
'museum fiskershúske': ('FR', 'MOD', 'Moddergat', 'M'),
'museum media': ('NH', 'AMS', 'Amsterdam', 'C'),
'museum of 21st century design': ('GB', 'LON', 'London', 'M'), # UK
'museum of comic art moca': ('US', 'NYC', 'New York', 'M'), # US
'museum of edible earth': ('NL', 'AMS', 'Amsterdam', 'M'), # Actually NL-based
'museum of humanity': ('GB', 'LON', 'London', 'M'), # UK
'museum of looted antiquities': ('GB', 'LON', 'London', 'D'), # UK - virtual
'museum of science': ('US', 'BOS', 'Boston', 'M'), # US
'museumppassmusees': ('BE', 'BRU', 'Brussels', 'O'), # Belgium - museum pass
'museumvereniging': ('NH', 'AMS', 'Amsterdam', 'N'),
'oerol festival': ('FR', 'TER', 'Terschelling', 'E'),
'qwen': ('CN', 'HAN', 'Hangzhou', 'C'), # China - AI company, not heritage
'radio en museum': ('NH', 'AMS', 'Amsterdam', 'M'),
'sothebys': ('GB', 'LON', 'London', 'C'), # UK
'sothebys institute of art': ('GB', 'LON', 'London', 'E'), # UK
'nieuwe veste': ('NB', 'BRE', 'Breda', 'E'),
}
def normalize_name(name: str) -> str:
"""Normalize organization name for matching."""
import unicodedata
normalized = unicodedata.normalize('NFKD', name)
normalized = normalized.lower().strip()
# Remove punctuation
normalized = re.sub(r'[^\w\s]', ' ', normalized)
normalized = ' '.join(normalized.split())
return normalized
def extract_abbreviation(name: str) -> str:
"""Extract abbreviation from organization name."""
skip_words = {
'de', 'het', 'een', 'van', 'voor', 'in', 'op', 'te', 'den', 'der', 'des',
'en', 'of', 'the', 'a', 'an', 'and', 'or', 'museum', 'stichting',
}
name_clean = re.sub(r'[^\w\s]', ' ', name)
words = [w for w in name_clean.split() if w.lower() not in skip_words and len(w) > 1]
if not words:
words = name_clean.split()[:3]
if len(words) == 1:
abbrev = words[0][:4].upper()
else:
abbrev = ''.join(w[0] for w in words[:5]).upper()
return abbrev if abbrev else 'XXX'
def match_known_org(emic_name: str) -> Optional[Tuple[str, str, str, str]]:
"""Match organization to known database."""
name_lower = normalize_name(emic_name)
# Exact match first
if name_lower in KNOWN_ORGS:
return KNOWN_ORGS[name_lower]
# Partial match - check if known org name is contained in emic name
for known_name, location in sorted(KNOWN_ORGS.items(), key=lambda x: -len(x[0])):
if known_name in name_lower or name_lower in known_name:
return location
return None
def process_pending_file(filepath: Path, dry_run: bool = True) -> Optional[str]:
"""Process a single PENDING file."""
try:
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
emic_name = data.get('custodian_name', {}).get('emic_name', '')
if not emic_name:
return None
result = match_known_org(emic_name)
if not result:
return None
province, city_code, city_name, inst_type = result
abbrev = extract_abbreviation(emic_name)
# Handle non-Dutch organizations
# All non-NL countries get their country code as the country, with XX as province
FOREIGN_COUNTRIES = {
'FR', 'DK', 'IT', 'BE', 'DE', 'GB', 'US', 'AT', 'AU', 'BA', 'ES',
'EU', 'ID', 'IL', 'IN', 'MA', 'NO', 'PT', 'PS', 'ZA', 'CA', 'GR', 'KE', 'SA',
'CH', 'CN'
}
country = 'NL'
if province in FOREIGN_COUNTRIES:
country = province
province = 'XX'
new_ghcid = f"{country}-{province}-{city_code.upper()}-{inst_type}-{abbrev}"
new_path = CUSTODIAN_DIR / f"{new_ghcid}.yaml"
# Handle collision
if new_path.exists() and new_path != filepath:
name_slug = re.sub(r'[^\w]+', '-', emic_name.lower()).strip('-')[:30]
new_ghcid = f"{new_ghcid}-{name_slug}"
new_path = CUSTODIAN_DIR / f"{new_ghcid}.yaml"
if dry_run:
print(f"[WOULD RESOLVE] {emic_name}")
print(f" Location: {city_name} ({country if country != 'NL' else province})")
print(f" -> {new_ghcid}.yaml")
return 'dry_run'
# Update data
data['ghcid_current'] = new_ghcid
if 'location' not in data:
data['location'] = {}
data['location']['city'] = city_name
data['location']['country'] = country
if 'ghcid_resolution' not in data:
data['ghcid_resolution'] = {}
data['ghcid_resolution']['method'] = 'known_organization_database'
data['ghcid_resolution']['resolved_at'] = datetime.now(timezone.utc).isoformat()
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
shutil.move(filepath, new_path)
print(f"[RESOLVED] {emic_name}")
print(f" -> {new_ghcid}.yaml")
return new_ghcid
except Exception as e:
print(f"[ERROR] {filepath.name}: {e}")
return None
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--dry-run', action='store_true')
args = parser.parse_args()
# Process all PENDING files (not just NL)
pending_files = list(CUSTODIAN_DIR.glob("*PENDING*.yaml"))
print(f"Processing {len(pending_files)} PENDING files against {len(KNOWN_ORGS)} known organizations...")
print()
resolved = 0
not_found = 0
for filepath in pending_files:
result = process_pending_file(filepath, dry_run=args.dry_run)
if result:
resolved += 1
else:
not_found += 1
print()
print(f"{'Would resolve' if args.dry_run else 'Resolved'}: {resolved}")
print(f"Not in database: {not_found}")
if __name__ == '__main__':
main()