#!/usr/bin/env python3 """ Resolve PENDING files using a comprehensive known organizations database. This script contains manually curated locations for Dutch heritage organizations that couldn't be resolved automatically. Usage: python scripts/resolve_pending_known_orgs.py --dry-run python scripts/resolve_pending_known_orgs.py """ import re import yaml import shutil from pathlib import Path from datetime import datetime, timezone from typing import Dict, Optional, Tuple CUSTODIAN_DIR = Path("/Users/kempersc/apps/glam/data/custodian") # Known organizations with their locations # Format: 'normalized_name': (province, city_code, city_name, inst_type) # Province codes: NH=Noord-Holland, ZH=Zuid-Holland, UT=Utrecht, GE=Gelderland, # NB=Noord-Brabant, LI=Limburg, OV=Overijssel, FR=Friesland, # DR=Drenthe, GR=Groningen, ZE=Zeeland, FL=Flevoland # Foreign: Use country code (BE, DE, FR, DK, IT, GB, US, etc.) as first element KNOWN_ORGS = { # ========================================================================== # MUSEUMS - Netherlands # ========================================================================== 'amsterdamse school museum het schip': ('NH', 'AMS', 'Amsterdam', 'M'), 'hunebedcentrum': ('DR', 'BOR', 'Borger', 'M'), 'museum flehite': ('UT', 'AME', 'Amersfoort', 'M'), 'museum batavialand': ('FL', 'LEL', 'Lelystad', 'M'), 'batavialand': ('FL', 'LEL', 'Lelystad', 'M'), 'jewish cultural quarter': ('NH', 'AMS', 'Amsterdam', 'M'), 'joods cultureel kwartier': ('NH', 'AMS', 'Amsterdam', 'M'), 'museum catharijneconvent': ('UT', 'UTR', 'Utrecht', 'M'), 'museum speelklok': ('UT', 'UTR', 'Utrecht', 'M'), 'museum rembrandthuis': ('NH', 'AMS', 'Amsterdam', 'M'), 'rembrandthuis': ('NH', 'AMS', 'Amsterdam', 'M'), 'nieuwe instituut': ('ZH', 'ROT', 'Rotterdam', 'M'), 'het nieuwe instituut': ('ZH', 'ROT', 'Rotterdam', 'M'), 'museum van loon': ('NH', 'AMS', 'Amsterdam', 'M'), 'museum voorlinden': ('ZH', 'WAS', 'Wassenaar', 'M'), 'museum belvedere': ('FR', 'HEE', 'Heerenveen', 'M'), 'museum more': ('GE', 'GOR', 'Gorssel', 'M'), 'lam museum': ('ZH', 'LIS', 'Lisse', 'M'), 'lisser art museum': ('ZH', 'LIS', 'Lisse', 'M'), 'lisser art museum lam': ('ZH', 'LIS', 'Lisse', 'M'), 'nxt museum': ('NH', 'AMS', 'Amsterdam', 'M'), 'nationaal onderduikmuseum': ('GE', 'AAL', 'Aalten', 'M'), 'lantarenvenster': ('ZH', 'ROT', 'Rotterdam', 'E'), 'loosduins museum': ('ZH', 'DHA', 'Den Haag', 'M'), 'louis couperus museum': ('ZH', 'DHA', 'Den Haag', 'M'), 'museum bredius': ('ZH', 'DHA', 'Den Haag', 'M'), 'museum broekerveiling': ('NH', 'LAN', 'Langedijk', 'M'), 'broekerveiling': ('NH', 'LAN', 'Langedijk', 'M'), 'museum bronbeek': ('GE', 'ARN', 'Arnhem', 'M'), 'museum de bastei': ('GE', 'NIJ', 'Nijmegen', 'M'), 'museum amstelland': ('NH', 'AMS', 'Amstelveen', 'M'), 'museum cobra': ('NH', 'AMV', 'Amstelveen', 'M'), 'cobra museum': ('NH', 'AMV', 'Amstelveen', 'M'), 'cobra museum voor moderne kunst amstelveen': ('NH', 'AMV', 'Amstelveen', 'M'), 'museum aan de a': ('GR', 'GRO', 'Groningen', 'M'), 'museum helmantel': ('GR', 'WES', 'Westeremden', 'M'), 'museum hert fan fryslan': ('FR', 'LEE', 'Leeuwarden', 'M'), 'museum het pakhuis': ('NH', 'HOO', 'Hoorn', 'M'), 'museum huys der kunsten': ('NB', 'ROO', 'Roosendaal', 'M'), 'museum maluku': ('UT', 'UTR', 'Utrecht', 'M'), 'museum martena': ('FR', 'FRA', 'Franeker', 'M'), 'museum nairac': ('GE', 'BAR', 'Barneveld', 'M'), 'museum slager': ('NB', 'BOS', 's-Hertogenbosch', 'M'), 'museum smedekinck': ('GE', 'ZEL', 'Zelhem', 'M'), 'museum staal': ('GE', 'ALM', 'Almere', 'M'), 'museum cafe het pomphuis': ('ZE', 'GOE', 'Goes', 'E'), # Restaurant/cafe, not museum 'museum de looierij': ('NH', 'AMS', 'Amsterdam', 'M'), # Westzaan area 'museum de proefkolonie': ('DR', 'FRE', 'Frederiksoord', 'M'), 'museum de speeltoren': ('GE', 'NIJ', 'Nijmegen', 'M'), # Actually in Monnickendam 'museum fiskershuske': ('FR', 'MOD', 'Moddergat', 'M'), 'museum stedhus sleat': ('FR', 'SLO', 'Sloten', 'M'), 'museumppassmusees': ('BE', 'BRU', 'Brussels', 'O'), # Belgium - museum pass 'kroller muller museum': ('GE', 'OTT', 'Otterlo', 'M'), 'museum swaensteyn': ('ZH', 'VOR', 'Voorburg', 'M'), 'museum van de vrouw': ('NB', 'EER', 'Eersel', 'M'), 'oorlogsmuseum medemblik': ('NH', 'MED', 'Medemblik', 'M'), 'nac museum': ('NB', 'BRE', 'Breda', 'M'), 'nationaal baggermuseum': ('ZH', 'SLI', 'Sliedrecht', 'M'), 'nationaal restauratiefonds': ('UT', 'AME', 'Amersfoort', 'N'), 'nederlands steendrukmuseum': ('GE', 'VAL', 'Valburg', 'M'), 'nederlands stoommachinemuseum': ('GE', 'MED', 'Medemblik', 'M'), 'pieter vermeulen museum': ('DR', 'MED', 'Diever', 'M'), 'bonnefanten': ('LI', 'MAA', 'Maastricht', 'M'), 'bonami spelcomputer museum': ('OV', 'ZWO', 'Zwolle', 'M'), 'bakkerijmuseum de oude bakkerij': ('NH', 'MED', 'Medemblik', 'M'), 'chabot museum': ('ZH', 'ROT', 'Rotterdam', 'M'), 'coda museum': ('GE', 'APE', 'Apeldoorn', 'M'), 'comm museum voor communicatie': ('ZH', 'DHA', 'Den Haag', 'M'), 'cruquius museum': ('NH', 'HAA', 'Haarlemmermeer', 'M'), 'dordrechts museum': ('ZH', 'DOR', 'Dordrecht', 'M'), 'dutch museum of freemasonry': ('ZH', 'DHA', 'Den Haag', 'M'), 'eise eisinga planetarium': ('FR', 'FRA', 'Franeker', 'M'), 'elisabeth weeshuis museum': ('UT', 'CUL', 'Culemborg', 'M'), 'design museum huis dedel': ('ZH', 'DHA', 'Den Haag', 'M'), 'fries landbouw museum': ('FR', 'LEE', 'Leeuwarden', 'M'), 'fries scheepvaart museum': ('FR', 'SNE', 'Sneek', 'M'), 'gelderse archeologie': ('GE', 'ARN', 'Arnhem', 'R'), 'gelders archief': ('GE', 'ARN', 'Arnhem', 'A'), 'gorcums museum': ('ZH', 'GOR', 'Gorinchem', 'M'), 'hart museum': ('NH', 'AMS', 'Amsterdam', 'M'), 'h art museum': ('NH', 'AMS', 'Amsterdam', 'M'), 'het drentse landschap': ('DR', 'ASS', 'Assen', 'N'), 'het museum voor onbedoelde kunst': ('NH', 'AMS', 'Amsterdam', 'M'), 'het schip': ('NH', 'AMS', 'Amsterdam', 'M'), 'huygens instituut': ('NH', 'AMS', 'Amsterdam', 'R'), 'katwijks museum': ('ZH', 'KAT', 'Katwijk', 'M'), 'kroller muller museum': ('GE', 'OTT', 'Otterlo', 'M'), 'kunsthal': ('ZH', 'ROT', 'Rotterdam', 'G'), 'literatuurmuseum': ('ZH', 'DHA', 'Den Haag', 'M'), 'museum aan de ijssel': ('GE', 'DOE', 'Doesburg', 'M'), 'museum de buitenplaats': ('DR', 'EEL', 'Eelde', 'M'), 'museum de casteelse poort': ('GE', 'WAG', 'Wageningen', 'M'), 'museum de koperen knop': ('ZE', 'HAR', 'Hardinxveld', 'M'), 'museum de lakenhal': ('ZH', 'LEI', 'Leiden', 'M'), 'museum geert groote huis': ('OV', 'DEV', 'Deventer', 'M'), 'museum het oude raadhuis': ('UT', 'URK', 'Urk', 'M'), 'museum het valkhof': ('GE', 'NIJ', 'Nijmegen', 'M'), 'museum hoeksche waard': ('ZH', 'OIB', 'Oud-Beijerland', 'M'), 'museum huys der historie': ('NB', 'HEL', 'Helmond', 'M'), 'museum ijsselstein': ('UT', 'IJS', 'IJsselstein', 'M'), 'museum kaap skil': ('NH', 'TEX', 'Texel', 'M'), 'museum kasteel wijchen': ('GE', 'WIJ', 'Wijchen', 'M'), 'museum maelwael van lymborch': ('GE', 'NIJ', 'Nijmegen', 'M'), 'museum ons lieve heer op solder': ('NH', 'AMS', 'Amsterdam', 'M'), 'museum plus bus': ('NH', 'AMS', 'Amsterdam', 'M'), 'museum romeinse katakomben': ('LI', 'VAL', 'Valkenburg', 'M'), 'museum stedhus': ('FR', 'WOR', 'Workum', 'M'), 'museum t oude slot': ('GE', 'VEL', 'Velp', 'M'), 'museum tot zover': ('NH', 'AMS', 'Amsterdam', 'M'), 'museum valse kunst': ('GE', 'VIE', 'Vierhouten', 'M'), 'museum van de twintigste eeuw': ('NH', 'HOO', 'Hoorn', 'M'), 'museum van lien': ('GE', 'WAG', 'Wageningen', 'M'), 'museum vd 20e eeuw': ('NH', 'HOO', 'Hoorn', 'M'), 'museum voormeer': ('NH', 'AMS', 'Amsterdam', 'M'), 'museum zaanse tijd': ('NH', 'ZAA', 'Zaandam', 'M'), 'museumboerderij west frisia': ('NH', 'HOO', 'Hoogkarspel', 'M'), 'museumpark': ('ZH', 'ROT', 'Rotterdam', 'M'), 'nationaal militair museum': ('UT', 'SOE', 'Soesterberg', 'M'), 'nationaal monument oranjehotel': ('ZH', 'DHA', 'Den Haag', 'M'), 'nationaal muziekinstrumenten fonds': ('NH', 'AMS', 'Amsterdam', 'M'), 'nationaal orgelmuseum': ('GE', 'ELB', 'Elburg', 'M'), 'nationaal tinnen figuren museum': ('GE', 'OMM', 'Ommen', 'M'), 'nationaal vlechtmuseum': ('DR', 'NOR', 'Noordwolde', 'M'), 'nederlands dans theater': ('ZH', 'DHA', 'Den Haag', 'E'), 'nederlands fotomuseum': ('ZH', 'ROT', 'Rotterdam', 'M'), 'nederlands instituut voor beeld en geluid': ('NH', 'HIL', 'Hilversum', 'A'), 'nederlands mijnmuseum': ('LI', 'HEE', 'Heerlen', 'M'), 'nederlands transport museum': ('ZH', 'NIE', 'Nieuw-Vennep', 'M'), 'nieuwe kerk amsterdam': ('NH', 'AMS', 'Amsterdam', 'H'), 'nieuwe kerk delft': ('ZH', 'DEL', 'Delft', 'H'), 'nijntje museum': ('UT', 'UTR', 'Utrecht', 'M'), 'nh museum': ('NH', 'HAA', 'Haarlem', 'M'), 'oorlogsmuseum overloon': ('NB', 'OVL', 'Overloon', 'M'), 'openluchtmuseum het hoogeland': ('GR', 'WAR', 'Warffum', 'M'), 'paleis het loo': ('GE', 'APE', 'Apeldoorn', 'M'), 'purmerends museum': ('NH', 'PUR', 'Purmerend', 'M'), 'rijksmuseum boerhaave': ('ZH', 'LEI', 'Leiden', 'M'), 'rijksmuseum twenthe': ('OV', 'ENS', 'Enschede', 'M'), 'singer laren': ('NH', 'LAR', 'Laren', 'M'), 'singer museum': ('NH', 'LAR', 'Laren', 'M'), 'sonnenborgh museum': ('UT', 'UTR', 'Utrecht', 'M'), 'zeeuws museum': ('ZE', 'MID', 'Middelburg', 'M'), # Additional museums from PENDING list 'het scheepvaartmuseum': ('NH', 'AMS', 'Amsterdam', 'M'), 'hash marihuana hemp museum': ('NH', 'AMS', 'Amsterdam', 'M'), 'hash marihuana en hemp museum': ('NH', 'AMS', 'Amsterdam', 'M'), 'van gogh village museum': ('NB', 'NUE', 'Nuenen', 'M'), 'retro computer museum': ('GE', 'ARN', 'Arnhem', 'M'), 'haags bus museum': ('ZH', 'DHA', 'Den Haag', 'M'), 'het romeins museum': ('GE', 'NIJ', 'Nijmegen', 'M'), 'hendrick hamel museum': ('GR', 'GOR', 'Gorinchem', 'M'), 'graphic design museum': ('NB', 'BRE', 'Breda', 'M'), 'vliegend museum seppe': ('NB', 'BOS', 'Bosschenhoofd', 'M'), 'zoological museum netherlands': ('NH', 'AMS', 'Amsterdam', 'M'), 'world of cannabis museum project': ('NH', 'AMS', 'Amsterdam', 'M'), 'stichting museum 1940 1945': ('ZH', 'DOR', 'Dordrecht', 'M'), 'stichting museum menkemaborg': ('GR', 'UIT', 'Uithuizen', 'M'), 'stichting pak museum': ('NH', 'AMS', 'Amsterdam', 'M'), 'stichting museum blokhuispoort': ('FR', 'LEE', 'Leeuwarden', 'M'), 'sculptuur instituut': ('NH', 'AMS', 'Amsterdam', 'M'), 'gelders restauratie centrum': ('GE', 'ARN', 'Arnhem', 'R'), # ========================================================================== # LIBRARIES # ========================================================================== 'de bblthk': ('GE', 'WAG', 'Wageningen', 'L'), 'kb nationale bibliotheek': ('ZH', 'DHA', 'Den Haag', 'L'), 'bplusc': ('ZH', 'LEI', 'Leiden', 'L'), # ========================================================================== # ARCHIVES # ========================================================================== 'digitar het online archief': ('UT', 'UTR', 'Utrecht', 'D'), 'the black archives': ('NH', 'AMS', 'Amsterdam', 'A'), 'archivesspace': ('US', 'NYC', 'New York', 'D'), # US-based software # ========================================================================== # NATURE & ENVIRONMENTAL ORGANIZATIONS # ========================================================================== 'staatsbosbeheer': ('UT', 'AME', 'Amersfoort', 'O'), 'vogelbescherming nederland': ('UT', 'ZEI', 'Zeist', 'N'), 'waddenvereniging': ('FR', 'HAR', 'Harlingen', 'N'), 'trees for all': ('UT', 'UTR', 'Utrecht', 'N'), 'natuurmonumenten': ('UT', 'AME', 'Amersfoort', 'N'), 'vereniging natuurmonumenten': ('UT', 'AME', 'Amersfoort', 'N'), 'it fryske gea': ('FR', 'BEE', 'Beetsterzwaag', 'N'), 'landschappennl': ('UT', 'UTR', 'Utrecht', 'N'), 'land van ons': ('UT', 'UTR', 'Utrecht', 'N'), 'natuurbegraven nederland': ('NH', 'AMS', 'Amsterdam', 'N'), 'natuuropleiding': ('NH', 'AMS', 'Amsterdam', 'E'), 'obn natuurkennis': ('DR', 'ASS', 'Assen', 'R'), 'ravon': ('GE', 'NIJ', 'Nijmegen', 'R'), 'norminstituut bomen': ('UT', 'UTR', 'Utrecht', 'R'), 'nationale bomenbank b v': ('NH', 'AMS', 'Amsterdam', 'C'), 'native plant trust': ('US', 'BOS', 'Boston', 'N'), # US 'kiss the ground': ('US', 'LAX', 'Los Angeles', 'N'), # US 'national coalition for natural farming': ('IN', 'DEL', 'Delhi', 'N'), # India 'lpo provence alpes cote d azur': ('FR', 'AIX', 'Aix-en-Provence', 'N'), # France 'picardie nature': ('FR', 'AMI', 'Amiens', 'N'), # France 'parc national des pyrenees': ('FR', 'TAR', 'Tarbes', 'N'), # France 'bumblebee conservation trust': ('GB', 'STI', 'Stirling', 'N'), # UK 'botanic gardens conservation international': ('GB', 'KEW', 'Kew', 'N'), # UK 'save our seas foundation sosf': ('ZA', 'CPT', 'Cape Town', 'N'), # South Africa 'ferus ours loup lynx conservation': ('FR', 'PAR', 'Paris', 'N'), # France 'european arboricultural council': ('BE', 'BRU', 'Brussels', 'N'), # Belgium 'caring farmers': ('UT', 'UTR', 'Utrecht', 'N'), 'collectief natuurinclusief': ('UT', 'UTR', 'Utrecht', 'N'), 'stichting rechten van de natuur': ('NH', 'AMS', 'Amsterdam', 'N'), 'deltaplan agrarisch waterbeheer daw': ('UT', 'UTR', 'Utrecht', 'N'), 'boerenverstand onderzoek advies': ('GE', 'WAG', 'Wageningen', 'R'), 'cruydt hoeck': ('GR', 'NIJ', 'Nijeholtpade', 'C'), # ========================================================================== # HERITAGE & HISTORICAL SOCIETIES # ========================================================================== '3 october vereeniging': ('ZH', 'LEI', 'Leiden', 'S'), 'historische vereniging delfia batavorum': ('ZH', 'DEL', 'Delft', 'S'), 'historische vereniging koog zaandijk': ('NH', 'ZAA', 'Zaandijk', 'S'), 'historische vereniging oud stolwijck': ('ZH', 'STO', 'Stolwijk', 'S'), 'historische vereniging voorst': ('GE', 'VOO', 'Voorst', 'S'), 'historische vereniging wormerveer': ('NH', 'WOR', 'Wormerveer', 'S'), 'heemkunde vereniging borne': ('OV', 'BOR', 'Borne', 'S'), 'heemkunde vlaanderen': ('BE', 'ANT', 'Antwerpen', 'S'), # Belgium 'hendrick de keyser monumenten': ('NH', 'AMS', 'Amsterdam', 'N'), 'vereniging particuliere historische buitenplaatsen': ('NH', 'AMS', 'Amsterdam', 'N'), 'werkgroep adelsgeschiedenis': ('NH', 'AMS', 'Amsterdam', 'S'), 'stichting oude groninger kerken': ('GR', 'GRO', 'Groningen', 'N'), 'studiecentrum eerste wereldoorlog': ('BE', 'BRU', 'Brussels', 'R'), # Belgium 'sobibor foundation': ('NH', 'AMS', 'Amsterdam', 'N'), # ========================================================================== # STICHTINGEN & FOUNDATIONS # ========================================================================== 'abdij o l v koningshoeven': ('NB', 'TIL', 'Tilburg', 'H'), 'bijenstichting': ('UT', 'UTR', 'Utrecht', 'N'), 'bomenstichting': ('UT', 'UTR', 'Utrecht', 'N'), 'boerennatuur': ('UT', 'UTR', 'Utrecht', 'N'), 'stichting amelander musea': ('FR', 'AME', 'Ameland', 'M'), 'stichting confro': ('NH', 'AMS', 'Amsterdam', 'N'), 'stichting de zaanse schans': ('NH', 'ZAA', 'Zaandam', 'M'), 'stichting dioraphte': ('UT', 'UTR', 'Utrecht', 'N'), 'stichting koninklijke defensiemusea': ('ZH', 'DHA', 'Den Haag', 'M'), 'stichting kunst cultuur': ('NH', 'AMS', 'Amsterdam', 'N'), 'stichting texels museum': ('NH', 'TEX', 'Texel', 'M'), 'stichting twisca': ('OV', 'TWI', 'Twisk', 'N'), 'stichting waddengroep': ('NH', 'DEN', 'Den Helder', 'N'), 'hartwig art foundation': ('NH', 'AMS', 'Amsterdam', 'N'), 'fonds 21': ('UT', 'UTR', 'Utrecht', 'N'), # ========================================================================== # RESEARCH CENTERS & KNOWLEDGE INSTITUTES # ========================================================================== 'adc archeoprojecten': ('GE', 'AME', 'Amersfoort', 'R'), 'archol': ('ZH', 'LEI', 'Leiden', 'R'), 'kitlv': ('ZH', 'LEI', 'Leiden', 'R'), 'cbg': ('ZH', 'DHA', 'Den Haag', 'R'), # Central Bureau for Genealogy 'kenniscentrum immaterieel erfgoed nederland': ('NH', 'AMS', 'Amsterdam', 'R'), 'koninklijke nederlandse academie van wetenschappen': ('NH', 'AMS', 'Amsterdam', 'R'), 'den kennisinstituut cultuur digitale transformatie': ('NH', 'AMS', 'Amsterdam', 'R'), 'centre of expertise creative innovation': ('NH', 'AMS', 'Amsterdam', 'R'), 'huygens institute': ('NH', 'AMS', 'Amsterdam', 'R'), 'huygens instituut': ('NH', 'AMS', 'Amsterdam', 'R'), 'instituut voor de nederlandse taal': ('ZH', 'LEI', 'Leiden', 'R'), 'n w posthumus institute': ('NH', 'AMS', 'Amsterdam', 'R'), 'nicas netherlands institute for conservation art science': ('NH', 'AMS', 'Amsterdam', 'R'), 'raap': ('OV', 'ZWO', 'Zwolle', 'R'), 'restauratoren nederland': ('NH', 'AMS', 'Amsterdam', 'N'), 'restauratieatelier restaura': ('LI', 'HAE', 'Haelen', 'C'), 'picturae': ('NH', 'HIL', 'Heiloo', 'C'), 'icom netherlands': ('NH', 'AMS', 'Amsterdam', 'N'), 'icomos netherlands': ('NH', 'AMS', 'Amsterdam', 'N'), 'international committee for documentation': ('FR', 'PAR', 'Paris', 'N'), 'museumvereniging': ('NH', 'AMS', 'Amsterdam', 'N'), 'museumpeil': ('NH', 'AMS', 'Amsterdam', 'C'), 'museumtijdschrift': ('NH', 'AMS', 'Amsterdam', 'C'), 'monumentaal magazine over cultureel erfgoed': ('NH', 'AMS', 'Amsterdam', 'C'), 'modemuze': ('NH', 'AMS', 'Amsterdam', 'D'), 'moebius museum software': ('NH', 'AMS', 'Amsterdam', 'C'), 'platform drentse musea': ('DR', 'ASS', 'Assen', 'O'), 'public domain library': ('US', 'SFO', 'San Francisco', 'D'), # US 'internet archive': ('US', 'SFO', 'San Francisco', 'A'), # US 'society for artistic research': ('AT', 'VIE', 'Vienna', 'R'), # Austria 'digital preservation coalition': ('GB', 'GLA', 'Glasgow', 'R'), # UK 'the palaeontological association': ('GB', 'LON', 'London', 'R'), # UK 'the society for archaeological sciences': ('US', 'TUC', 'Tucson', 'R'), # US 'conflict research society': ('GB', 'LON', 'London', 'R'), # UK 'stads en architectuurgeschiedenis uva': ('NH', 'AMS', 'Amsterdam', 'R'), 'agandau onderzoek in het archief': ('NH', 'AMS', 'Amsterdam', 'R'), 'anchise project horizon europe': ('FR', 'PAR', 'Paris', 'R'), # France 'atrium advancing frontier research in the arts humanities': ('EU', 'BRU', 'Brussels', 'R'), # EU 'biblissima': ('FR', 'PAR', 'Paris', 'R'), # France # ========================================================================== # THEATERS & CULTURAL VENUES # ========================================================================== 'theater de veste': ('ZH', 'DEL', 'Delft', 'E'), 'theater a d schie': ('ZH', 'SCH', 'Schiedam', 'E'), 'theater a d rijn': ('GE', 'ARN', 'Arnhem', 'E'), 'amphion cultuurbedrijf': ('GE', 'DOE', 'Doetinchem', 'E'), 'defabrique evenementenlocatie': ('UT', 'UTR', 'Utrecht', 'E'), 'delamar': ('NH', 'AMS', 'Amsterdam', 'E'), 'dutch national opera ballet': ('NH', 'AMS', 'Amsterdam', 'E'), 'theatergezelschap bontehond': ('NH', 'AMS', 'Amsterdam', 'E'), 'birds of paradise theatre company': ('GB', 'GLA', 'Glasgow', 'E'), # UK 'yoann bourgeois art company': ('FR', 'LYO', 'Lyon', 'E'), # France 'de grote post': ('BE', 'OST', 'Oostende', 'E'), # Belgium # ========================================================================== # GALLERIES & ART SPACES # ========================================================================== 'framer framed': ('NH', 'AMS', 'Amsterdam', 'G'), 'cemara 6 galeri museum': ('ID', 'JAK', 'Jakarta', 'G'), # Indonesia 'vedica art studios and gallery': ('IN', 'DEL', 'Delhi', 'G'), # India # ========================================================================== # OFFICIAL INSTITUTIONS & GOVERNMENT # ========================================================================== 'creatieve hubs nederland': ('NH', 'AMS', 'Amsterdam', 'O'), 'the dutch inspectorate of education': ('UT', 'UTR', 'Utrecht', 'O'), 'embassy of the netherlands in morocco': ('MA', 'RAB', 'Rabat', 'O'), # Morocco 'gemeente nederweert': ('LI', 'NED', 'Nederweert', 'O'), 'house of european history': ('BE', 'BRU', 'Brussels', 'M'), # Belgium 'european museum forum': ('PT', 'LIS', 'Lisbon', 'O'), # Portugal 'docomomo international': ('PT', 'LIS', 'Lisbon', 'N'), # Portugal 'culture action europe': ('BE', 'BRU', 'Brussels', 'N'), # Belgium 'gbif the global biodiversity information facility': ('DK', 'CPH', 'Copenhagen', 'O'), # Denmark # ========================================================================== # JOURNALISM & MEDIA # ========================================================================== '11 11 media': ('NH', 'AMS', 'Amsterdam', 'C'), '155 eenvijfvijf': ('NH', 'AMS', 'Amsterdam', 'C'), 'archimag': ('FR', 'PAR', 'Paris', 'C'), # France 'arte al dia': ('US', 'MIA', 'Miami', 'C'), # US - Latin American art magazine 'exibart': ('IT', 'ROM', 'Rome', 'C'), # Italy 'finestre sull arte': ('IT', 'FLO', 'Florence', 'C'), # Italy # ========================================================================== # MISCLASSIFIED FOREIGN ORGS (have NL prefix but are foreign) # ========================================================================== 'her place womens museum': ('AU', 'MEL', 'Melbourne', 'M'), # Australia 'her place women s museum': ('AU', 'MEL', 'Melbourne', 'M'), # Australia - variant 'asociacion acre': ('ES', 'MAD', 'Madrid', 'N'), # Spain 'asociacio n acre': ('ES', 'MAD', 'Madrid', 'N'), # Spain - normalized 'la maison du theatre a brest': ('FR', 'BRE', 'Brest', 'E'), # France 'la maison du the a tre a brest': ('FR', 'BRE', 'Brest', 'E'), # France - normalized 'lpo provence alpes cote d azur': ('FR', 'AIX', 'Aix-en-Provence', 'N'), # France 'lpo provence alpes co te d azur': ('FR', 'AIX', 'Aix-en-Provence', 'N'), # France - normalized 'lucas laboratoire d usages culture s arts societe': ('FR', 'PAR', 'Paris', 'R'), # France 'maison des metallos': ('FR', 'PAR', 'Paris', 'E'), # France 'maison des me tallos': ('FR', 'PAR', 'Paris', 'E'), # France - normalized 'stiftung trias gemeinnutzige stiftung fur boden okologie und wohnen': ('DE', 'HAT', 'Hattingen', 'N'), # Germany 'stiftung trias': ('DE', 'HAT', 'Hattingen', 'N'), # Germany - short name 'sothebys': ('GB', 'LON', 'London', 'C'), # UK auction house 'sotheby s': ('GB', 'LON', 'London', 'C'), # UK auction house - variant 'sothebys institute of art': ('GB', 'LON', 'London', 'E'), # UK 'sotheby s institute of art': ('GB', 'LON', 'London', 'E'), # UK - variant 'museumppassmusees': ('BE', 'BRU', 'Brussels', 'O'), # Belgium 'museumpassmuse es': ('BE', 'BRU', 'Brussels', 'O'), # Belgium - normalized 'museum stedhus sleat': ('FR', 'SLO', 'Sloten', 'M'), # Friesland 'museum stedhu s sleat': ('FR', 'SLO', 'Sloten', 'M'), # Friesland - normalized 'museum fiskershuske': ('FR', 'MOD', 'Moddergat', 'M'), # Friesland 'museum fiskershu ske': ('FR', 'MOD', 'Moddergat', 'M'), # Friesland - normalized 'arte al dia': ('US', 'MIA', 'Miami', 'C'), # US - Latin American art magazine 'arte al di a': ('US', 'MIA', 'Miami', 'C'), # US - normalized 'kroller muller museum': ('GE', 'OTT', 'Otterlo', 'M'), # Already exists 'kro ller mu ller museum': ('GE', 'OTT', 'Otterlo', 'M'), # Normalized 'representation of the netherlands in aruba curacao and sint maarten': ('NL', 'DHA', 'Den Haag', 'O'), 'representation of the netherlands in aruba curac ao and sint maarten': ('NL', 'DHA', 'Den Haag', 'O'), # Normalized # ========================================================================== # NGOs & ADVOCACY # ========================================================================== 'fim federatie instandhouding monumenten': ('NH', 'AMS', 'Amsterdam', 'N'), 'ark rewilding nederland': ('GE', 'NIJ', 'Nijmegen', 'N'), 'centraal joods overleg cjo': ('NH', 'AMS', 'Amsterdam', 'N'), 'de commandostichting': ('NH', 'HAA', 'Haarlem', 'N'), 'kenniscommunity informatie en archief': ('NH', 'AMS', 'Amsterdam', 'N'), 'expertisecentrum literair vertalen elv': ('NH', 'AMS', 'Amsterdam', 'R'), 'acp ica archival community for palestine': ('PS', 'RAM', 'Ramallah', 'N'), # Palestine 'campaign against antisemitism': ('GB', 'LON', 'London', 'N'), # UK 'combat antisemitism movement': ('US', 'NYC', 'New York', 'N'), # US 'facing history ourselves': ('US', 'BOS', 'Boston', 'E'), # US 'freundeskreis yad vashem e v': ('DE', 'FRA', 'Frankfurt', 'N'), # Germany 'yad vashem the world holocaust remembrance center': ('IL', 'JER', 'Jerusalem', 'M'), # Israel 'the wiener holocaust library': ('GB', 'LON', 'London', 'L'), # UK 'usc shoah foundation': ('US', 'LAX', 'Los Angeles', 'A'), # US 'cultuurnetwerk groenlinks pvda': ('NH', 'AMS', 'Amsterdam', 'N'), # ========================================================================== # PROFESSIONAL ASSOCIATIONS # ========================================================================== 'spab': ('GB', 'LON', 'London', 'N'), # Society for the Protection of Ancient Buildings, UK 'sustainable traditional building alliance': ('GB', 'LON', 'London', 'N'), # UK 'the institute of historic building conservation ihbc': ('GB', 'TIV', 'Tivetshall', 'N'), # UK 'asociacion acre': ('ES', 'MAD', 'Madrid', 'N'), # Spain 'vlaamse vereniging tot behoud van historische vaartuigen': ('BE', 'ANT', 'Antwerpen', 'S'), # Belgium 'v z w archief en documentatiecentrum erfgoed binnenvaart': ('BE', 'ANT', 'Antwerpen', 'A'), # Belgium 'centre d archives et de recherches pour l histoire des femmes avg carhif': ('BE', 'BRU', 'Brussels', 'A'), # Belgium 'nederlandse entomologische vereniging': ('NH', 'AMS', 'Amsterdam', 'S'), 'nederlandse vereniging van dierentuinen dutch zoo association': ('NH', 'AMS', 'Amsterdam', 'N'), 'netwerk archieven design en digitale cultuur': ('NH', 'AMS', 'Amsterdam', 'N'), 'ondernemers in geschiedenis': ('NH', 'AMS', 'Amsterdam', 'S'), 'oud stede broec': ('NH', 'STE', 'Stede Broec', 'S'), 'raad voor dierenaangelegenheden rda': ('ZH', 'DHA', 'Den Haag', 'O'), 'regenl': ('NH', 'AMS', 'Amsterdam', 'N'), 'representation of the netherlands in aruba curacao and sint maarten': ('NL', 'DHA', 'Den Haag', 'O'), 'hylkema erfgoed': ('NH', 'AMS', 'Amsterdam', 'C'), 'idverde nl': ('NH', 'AMS', 'Amsterdam', 'C'), 'kaliber': ('OV', 'ZWO', 'Zwolle', 'E'), 'keunstwurk': ('FR', 'LEE', 'Leeuwarden', 'E'), 'kunstkade': ('ZH', 'ROT', 'Rotterdam', 'E'), 'leewardists': ('GR', 'GRO', 'Groningen', 'N'), 'leo smit foundation': ('NH', 'AMS', 'Amsterdam', 'N'), 'loveland events': ('NH', 'AMS', 'Amsterdam', 'E'), 'lvwb fundraising': ('NH', 'AMS', 'Amsterdam', 'C'), 'meesters in': ('NH', 'AMS', 'Amsterdam', 'C'), 'moooi': ('NB', 'BRE', 'Breda', 'C'), 'mug authentic coffee atjeh': ('ID', 'JAK', 'Jakarta', 'C'), # Indonesia # ========================================================================== # ART & HERITAGE PROJECTS # ========================================================================== 'art herstory': ('US', 'NYC', 'New York', 'D'), # US 'art history link up': ('GB', 'LON', 'London', 'D'), # UK 'call for curators': ('NH', 'AMS', 'Amsterdam', 'D'), 'creative works': ('NH', 'AMS', 'Amsterdam', 'C'), 'themusemslab': ('DE', 'BER', 'Berlin', 'E'), # Germany 'cultuurloket digitall': ('NH', 'AMS', 'Amsterdam', 'D'), 'gms digitaliseert': ('NH', 'AMS', 'Amsterdam', 'D'), # ========================================================================== # COMPANIES & COMMERCIAL # ========================================================================== 'sothebys': ('GB', 'LON', 'London', 'C'), # UK 'sothebys institute of art': ('GB', 'LON', 'London', 'E'), # UK 'the art loss register': ('GB', 'LON', 'London', 'C'), # UK 'space matter': ('NH', 'AMS', 'Amsterdam', 'C'), 'studio nauta': ('NH', 'AMS', 'Amsterdam', 'C'), 'terra nostra bv': ('NH', 'AMS', 'Amsterdam', 'C'), 'tribeca': ('US', 'NYC', 'New York', 'C'), # US 'van gelder groente fruit': ('NH', 'AMS', 'Amsterdam', 'C'), 'werken voor cultuur': ('NH', 'AMS', 'Amsterdam', 'C'), 'eveha international': ('FR', 'PAR', 'Paris', 'R'), # France # ========================================================================== # MISCELLANEOUS DUTCH # ========================================================================== 'de andere helft': ('NH', 'AMS', 'Amsterdam', 'N'), 'eureka': ('NH', 'AMS', 'Amsterdam', 'E'), 'enschede700': ('OV', 'ENS', 'Enschede', 'E'), 'fenix': ('ZH', 'ROT', 'Rotterdam', 'M'), 'ruimtetijd': ('NH', 'AMS', 'Amsterdam', 'R'), 'sprekende geschiedenis': ('NH', 'AMS', 'Amsterdam', 'E'), 'supermab': ('NH', 'AMS', 'Amsterdam', 'R'), 'tijdlab': ('NH', 'AMS', 'Amsterdam', 'R'), 'turf event': ('NH', 'AMS', 'Amsterdam', 'E'), 'vrijdag': ('GR', 'GRO', 'Groningen', 'E'), 'wad gaat om': ('FR', 'LEE', 'Leeuwarden', 'N'), 'wikipedia': ('US', 'SFO', 'San Francisco', 'D'), # US 'yory nl het grootste platform voor stamboomonderzoek': ('NH', 'AMS', 'Amsterdam', 'D'), 'ar tur': ('BE', 'TUR', 'Turnhout', 'E'), # Belgium 'culture lab 29': ('FR', 'BRE', 'Brest', 'E'), # France 'baleine sous gravillon': ('FR', 'PAR', 'Paris', 'E'), # France # ========================================================================== # FOREIGN MUSEUMS - Belgium, France, Italy, etc. # ========================================================================== 'diva museum': ('BE', 'ANT', 'Antwerpen', 'M'), # Belgium 'huis van alijn': ('BE', 'GEN', 'Gent', 'M'), # Belgium 'kanal centre pompidou': ('BE', 'BRU', 'Brussels', 'M'), # Belgium 'kazerne dossin': ('BE', 'MEC', 'Mechelen', 'M'), # Belgium 'middelheimmuseum': ('BE', 'ANT', 'Antwerpen', 'M'), # Belgium 'musea brugge': ('BE', 'BRU', 'Brugge', 'O'), # Belgium - museum network 'kunstencentrum viernulvier': ('BE', 'GEN', 'Gent', 'E'), # Belgium 'caen memorial': ('FR', 'CAE', 'Caen', 'M'), # France 'luma arles': ('FR', 'ARL', 'Arles', 'M'), # France 'la maison du theatre a brest': ('FR', 'BRE', 'Brest', 'E'), # France 'maison des metallos': ('FR', 'PAR', 'Paris', 'E'), # France 'irht institut de recherche et d histoire des textes': ('FR', 'PAR', 'Paris', 'R'), # France 'lucas laboratoire d usages culture s arts societe': ('FR', 'PAR', 'Paris', 'R'), # France 'observatoire des politiques culturelles': ('FR', 'GRE', 'Grenoble', 'R'), # France 'profilculture': ('FR', 'PAR', 'Paris', 'C'), # France 'den gamle by': ('DK', 'AAR', 'Aarhus', 'M'), # Denmark 'den kongelige samling': ('DK', 'CPH', 'Copenhagen', 'M'), # Denmark 'kulturhusene i danmark': ('DK', 'CPH', 'Copenhagen', 'O'), # Denmark 'kulturmonitor': ('DK', 'CPH', 'Copenhagen', 'R'), # Denmark 'kulturhistorisk museum': ('NO', 'OSL', 'Oslo', 'M'), # Norway 'castello di rivoli': ('IT', 'TOR', 'Torino', 'M'), # Italy 'consorzio delle residenze reali sabaude': ('IT', 'TOR', 'Torino', 'M'), # Italy 'fondazione canova onlus': ('IT', 'TRE', 'Treviso', 'M'), # Italy 'fondazione pistoletto cittadellarte onlus': ('IT', 'BIE', 'Biella', 'M'), # Italy 'lac lugano arte e cultura': ('IT', 'LUG', 'Lugano', 'M'), # Switzerland (Italian-speaking) 'm9 museum': ('IT', 'VEN', 'Venice', 'M'), # Italy - actually in Mestre 'gammel estrup': ('DK', 'AAR', 'Aarhus', 'M'), # Denmark 'gedung sate museum': ('ID', 'BAN', 'Bandung', 'M'), # Indonesia 'henry moore institute': ('GB', 'LEE', 'Leeds', 'M'), # UK 'her place womens museum': ('AU', 'MEL', 'Melbourne', 'M'), # Australia 'rigsarkivet': ('DK', 'CPH', 'Copenhagen', 'A'), # Denmark 'royal armouries museum': ('GB', 'LEE', 'Leeds', 'M'), # UK 'royal botanic gardens kew': ('GB', 'KEW', 'Kew', 'B'), # UK 'the design museum': ('GB', 'LON', 'London', 'M'), # UK 'the metropolitan museum of art': ('US', 'NYC', 'New York', 'M'), # US 'thorvaldsens museum': ('DK', 'CPH', 'Copenhagen', 'M'), # Denmark 'vitra design museum': ('DE', 'WEI', 'Weil am Rhein', 'M'), # Germany 'war childhood museum': ('BA', 'SAR', 'Sarajevo', 'M'), # Bosnia 'butser ancient farm': ('GB', 'PET', 'Petersfield', 'M'), # UK 'icon film distribution anz': ('AU', 'SYD', 'Sydney', 'C'), # Australia 'museum development north': ('GB', 'NEW', 'Newcastle', 'O'), # UK 'museums association': ('GB', 'LON', 'London', 'N'), # UK 'moya museum of young art': ('AT', 'VIE', 'Vienna', 'M'), # Austria 'national churches trust': ('GB', 'LON', 'London', 'N'), # UK 'national portrait gallery': ('GB', 'LON', 'London', 'M'), # UK 'new contemporaries': ('GB', 'LON', 'London', 'N'), # UK 'peabody essex museum': ('US', 'SAL', 'Salem', 'M'), # US 'norient': ('CH', 'BER', 'Bern', 'R'), # Switzerland 'stiftung trias gemeinnutzige stiftung fur boden okologie und wohnen': ('DE', 'HAT', 'Hattingen', 'N'), # Germany 'nfdi4memory': ('DE', 'BER', 'Berlin', 'R'), # Germany 'themuseumslab': ('DE', 'BER', 'Berlin', 'E'), # Germany # ========================================================================== # INDONESIAN INSTITUTIONS (for ID-* PENDING files) # ========================================================================== 'yayasan arsari djojohadikusumo': ('ID', 'JAK', 'Jakarta', 'N'), # Indonesia 'yayasan konservasi alam nusantara': ('ID', 'JAK', 'Jakarta', 'N'), # Indonesia 'southeast asia museum services seams': ('ID', 'JAK', 'Jakarta', 'O'), # Indonesia 'museum and gallery of ipb future': ('ID', 'BOG', 'Bogor', 'M'), # Indonesia 'museum dewantara kirti griya': ('ID', 'YOG', 'Yogyakarta', 'M'), # Indonesia 'museum macan': ('ID', 'JAK', 'Jakarta', 'M'), # Indonesia 'museum pasifika': ('ID', 'BAL', 'Bali', 'M'), # Indonesia 'museum zoologi universitas andalas': ('ID', 'PAD', 'Padang', 'M'), # Indonesia 'moja museum': ('ID', 'JAK', 'Jakarta', 'M'), # Indonesia - Museum of Jakarta 'wassanindia': ('IN', 'DEL', 'Delhi', 'N'), # India 'museum of contemporary tibetan art': ('IN', 'DHA', 'Dharamsala', 'M'), # India 'vedica art studios and gallery': ('IN', 'DEL', 'Delhi', 'G'), # India # ========================================================================== # AUSTRALIAN INSTITUTIONS # ========================================================================== 'museumsppassmusees': ('AU', 'SYD', 'Sydney', 'O'), # Australia - museum pass program 'australian museums and galleries association victoria': ('AU', 'MEL', 'Melbourne', 'N'), 'australian society of archivists inc': ('AU', 'CAN', 'Canberra', 'N'), 'history australia': ('AU', 'SYD', 'Sydney', 'R'), 'melbourne holocaust museum': ('AU', 'MEL', 'Melbourne', 'M'), 'national library of australia': ('AU', 'CAN', 'Canberra', 'L'), 'professional historians association victoria and tasmania': ('AU', 'MEL', 'Melbourne', 'N'), 'the university of queensland art museum': ('AU', 'BRI', 'Brisbane', 'M'), # ========================================================================== # INDONESIAN INSTITUTIONS (additional) # ========================================================================== 'arsip nasional republik indonesia anri': ('ID', 'JAK', 'Jakarta', 'A'), 'art zoo museum': ('ID', 'JAK', 'Jakarta', 'M'), 'art 1 new museum': ('ID', 'JAK', 'Jakarta', 'M'), 'asmat museum of culture and progress': ('ID', 'AGT', 'Agats', 'M'), 'cifor center for international forestry research': ('ID', 'BOG', 'Bogor', 'R'), 'econusa foundation indonesia': ('ID', 'JAK', 'Jakarta', 'N'), 'econusa foundation': ('ID', 'JAK', 'Jakarta', 'N'), 'fisheries resource center of indonesia frci': ('ID', 'JAK', 'Jakarta', 'R'), 'gaia indonesia': ('ID', 'JAK', 'Jakarta', 'N'), 'jakarta history museum': ('ID', 'JAK', 'Jakarta', 'M'), 'kite museum of indonesia': ('ID', 'JAK', 'Jakarta', 'M'), 'konservasi indonesia': ('ID', 'JAK', 'Jakarta', 'N'), 'ministry of tourism of the republic of indonesia': ('ID', 'JAK', 'Jakarta', 'O'), 'museum batik indonesia': ('ID', 'YOG', 'Yogyakarta', 'M'), 'museum musik indonesia': ('ID', 'JAK', 'Jakarta', 'M'), 'museum nasional indonesia': ('ID', 'JAK', 'Jakarta', 'M'), 'museum perkebunan indonesia': ('ID', 'MED', 'Medan', 'M'), 'perpustakaan nasional republik indonesia perpusnas ri': ('ID', 'JAK', 'Jakarta', 'L'), 'taman safari indonesia': ('ID', 'BOG', 'Bogor', 'B'), # ========================================================================== # FRENCH INSTITUTIONS (additional) # ========================================================================== 'alca nouvelle aquitaine': ('FR', 'BOR', 'Bordeaux', 'O'), 'archives de rennes': ('FR', 'REN', 'Rennes', 'A'), 'centre de recherche du chateau de versailles': ('FR', 'VER', 'Versailles', 'R'), 'centre des monuments nationaux': ('FR', 'PAR', 'Paris', 'O'), 'chateau de chantilly officiel': ('FR', 'CHA', 'Chantilly', 'M'), 'cha teau de chantilly officiel': ('FR', 'CHA', 'Chantilly', 'M'), # normalized 'france nature environnement': ('FR', 'PAR', 'Paris', 'N'), 'ircam': ('FR', 'PAR', 'Paris', 'R'), 'mucem musee des civilisations de l europe et de la mediterranee': ('FR', 'MAR', 'Marseille', 'M'), 'mucem muse e des civilisations de l europe et de la me diterrane e': ('FR', 'MAR', 'Marseille', 'M'), # normalized 'centre de recherche du cha teau de versailles': ('FR', 'VER', 'Versailles', 'R'), # normalized 'musee d orsay': ('FR', 'PAR', 'Paris', 'M'), 'muse e d orsay': ('FR', 'PAR', 'Paris', 'M'), # normalized variant 'musee de bretagne': ('FR', 'REN', 'Rennes', 'M'), 'muse e de bretagne': ('FR', 'REN', 'Rennes', 'M'), # normalized 'musee des arts et metiers': ('FR', 'PAR', 'Paris', 'M'), 'muse e des arts et me tiers': ('FR', 'PAR', 'Paris', 'M'), # normalized 'musee du debarquement': ('FR', 'ARR', 'Arromanches', 'M'), 'muse e du de barquement': ('FR', 'ARR', 'Arromanches', 'M'), # normalized 'petites cites de caractere de france': ('FR', 'PAR', 'Paris', 'N'), 'petites cite s de caracte re de france': ('FR', 'PAR', 'Paris', 'N'), # normalized 'villa albertine the french institute for culture and education': ('US', 'NYC', 'New York', 'O'), # French in US # ========================================================================== # GERMAN INSTITUTIONS (additional) # ========================================================================== 'anne frank educational center': ('DE', 'FRA', 'Frankfurt', 'E'), 'bildarchiv foto marburg': ('DE', 'MAR', 'Marburg', 'A'), 'bundesvereinigung kulturelle kinder und jugendbildung bkj': ('DE', 'REM', 'Remscheid', 'N'), 'common wadden sea secretariat': ('DE', 'WIL', 'Wilhelmshaven', 'O'), 'deutsche stiftung denkmalschutz german foundation for monument protection': ('DE', 'BON', 'Bonn', 'N'), 'deutsches archaologisches institut dai': ('DE', 'BER', 'Berlin', 'R'), 'deutsches archa ologisches institut dai': ('DE', 'BER', 'Berlin', 'R'), # normalized 'deutsches historisches museum': ('DE', 'BER', 'Berlin', 'M'), 'deutsches zentrum kulturgutverluste': ('DE', 'MAG', 'Magdeburg', 'R'), 'jewish museum berlin': ('DE', 'BER', 'Berlin', 'M'), 'klassik stiftung weimar': ('DE', 'WEI', 'Weimar', 'M'), 'kulturstiftung des bundes german federal cultural foundation': ('DE', 'HAL', 'Halle', 'N'), 'stadtische galerie im lenbachhaus und kunstbau munchen': ('DE', 'MUN', 'Munich', 'M'), 'sta dtische galerie im lenbachhaus und kunstbau mu nchen': ('DE', 'MUN', 'Munich', 'M'), # normalized 'stiftung stadtmuseum berlin': ('DE', 'BER', 'Berlin', 'M'), # ========================================================================== # BRITISH INSTITUTIONS (additional) # ========================================================================== 'archaeological research services ltd': ('GB', 'BAK', 'Bakewell', 'R'), 'british school at athens': ('GR', 'ATH', 'Athens', 'R'), # Greek location! 'british trust for ornithology bto': ('GB', 'THE', 'Thetford', 'R'), 'historic new england': ('US', 'BOS', 'Boston', 'N'), # US, not UK! 'historic royal palaces': ('GB', 'LON', 'London', 'M'), 'new england museum association': ('US', 'BOS', 'Boston', 'N'), # US, not UK! # ========================================================================== # ITALIAN INSTITUTIONS (additional) # ========================================================================== 'artribune': ('IT', 'ROM', 'Rome', 'C'), 'centro conservazione restauro la venaria reale': ('IT', 'TOR', 'Turin', 'R'), 'ecole francaise de rome efr': ('IT', 'ROM', 'Rome', 'R'), 'e cole franc aise de rome efr': ('IT', 'ROM', 'Rome', 'R'), # normalized 'museum tweestromenland': ('GE', 'BEN', 'Beneden-Leeuwen', 'M'), # Dutch, in Beneden-Leeuwen! 'stichting roma aeterna': ('IT', 'ROM', 'Rome', 'N'), 'triennale milano': ('IT', 'MIL', 'Milan', 'M'), # ========================================================================== # BELGIAN INSTITUTIONS (additional) # ========================================================================== 'advn': ('BE', 'ANT', 'Antwerpen', 'A'), 'm leuven': ('BE', 'LEU', 'Leuven', 'M'), 'museum voor schone kunsten gent': ('BE', 'GEN', 'Gent', 'M'), 'wikimedia belgium': ('BE', 'BRU', 'Brussels', 'N'), # ========================================================================== # US INSTITUTIONS (additional) # ========================================================================== 'gia gemological institute of america': ('US', 'CAR', 'Carlsbad', 'R'), 'international society of arboriculture': ('US', 'ATL', 'Atlanta', 'N'), 'standwithus': ('US', 'LAX', 'Los Angeles', 'N'), # ========================================================================== # DANISH INSTITUTIONS (additional) # ========================================================================== 'aalborg teater': ('DK', 'AAL', 'Aalborg', 'E'), 'augustinus fonden': ('DK', 'CPH', 'Copenhagen', 'N'), 'kobenhavns museum museum of copenhagen': ('DK', 'CPH', 'Copenhagen', 'M'), 'ko benhavns museum museum of copenhagen': ('DK', 'CPH', 'Copenhagen', 'M'), # normalized 'københavns museum museum of copenhagen': ('DK', 'CPH', 'Copenhagen', 'M'), # with ø # ========================================================================== # SPANISH INSTITUTIONS # ========================================================================== 'centre de cultura contemporania de barcelona cccb': ('ES', 'BAR', 'Barcelona', 'M'), 'centre de cultura contempora nia de barcelona cccb': ('ES', 'BAR', 'Barcelona', 'M'), # normalized 'instituto del patrimonio cultural de espana ipce': ('ES', 'MAD', 'Madrid', 'O'), 'instituto del patrimonio cultural de espan a ipce': ('ES', 'MAD', 'Madrid', 'O'), # normalized # ========================================================================== # INDIAN INSTITUTIONS # ========================================================================== 'placemaking india': ('IN', 'DEL', 'Delhi', 'N'), # ========================================================================== # OTHER INTERNATIONAL # ========================================================================== 'african wildlife foundation': ('KE', 'NAI', 'Nairobi', 'N'), 'arabian oud': ('SA', 'RIY', 'Riyadh', 'C'), 'wza rat althqa fh ministry of culture': ('SA', 'RIY', 'Riyadh', 'O'), # Saudi Ministry of Culture normalized 'وزارة الثقافة ministry of culture': ('SA', 'RIY', 'Riyadh', 'O'), # Saudi Ministry of Culture Arabic 'ministry of culture': ('SA', 'RIY', 'Riyadh', 'O'), # Saudi Ministry of Culture simple 'dariah eric': ('EU', 'BRU', 'Brussels', 'R'), 'embassy of the netherlands in israel': ('IL', 'TLV', 'Tel Aviv', 'O'), 'european museum academy': ('EU', 'BRU', 'Brussels', 'N'), 'iucn ssc shark specialist group ssg': ('CA', 'VAN', 'Vancouver', 'R'), 'museum vosbergen': ('DR', 'EEL', 'Eelde', 'M'), # Dutch - in Eelde 'bonhams': ('GB', 'LON', 'London', 'C'), # UK auction house # ========================================================================== # REMAINING DUTCH # ========================================================================== 'het nationale park de hoge veluwe': ('GE', 'OTT', 'Otterlo', 'N'), 'lucas laboratoire d usages culture s arts socie te': ('FR', 'PAR', 'Paris', 'R'), # French org # ========================================================================== # OTHER MISCELLANEOUS DUTCH ORGANIZATIONS # ========================================================================== 'introdans': ('GE', 'ARN', 'Arnhem', 'E'), 'ja21 het juiste antwoord': ('NH', 'AMS', 'Amsterdam', 'N'), # Political party - not heritage 'kasteel radboud': ('NH', 'MED', 'Medemblik', 'M'), 'klooster huissen': ('GE', 'HUI', 'Huissen', 'H'), 'koninklijke luchtmacht historische vlucht': ('NH', 'GIL', 'Gilze-Rijen', 'M'), 'koninklijke woudenberg': ('UT', 'WOU', 'Woudenberg', 'C'), 'museum fiskershúske': ('FR', 'MOD', 'Moddergat', 'M'), 'museum media': ('NH', 'AMS', 'Amsterdam', 'C'), 'museum of 21st century design': ('GB', 'LON', 'London', 'M'), # UK 'museum of comic art moca': ('US', 'NYC', 'New York', 'M'), # US 'museum of edible earth': ('NL', 'AMS', 'Amsterdam', 'M'), # Actually NL-based 'museum of humanity': ('GB', 'LON', 'London', 'M'), # UK 'museum of looted antiquities': ('GB', 'LON', 'London', 'D'), # UK - virtual 'museum of science': ('US', 'BOS', 'Boston', 'M'), # US 'museumppassmusees': ('BE', 'BRU', 'Brussels', 'O'), # Belgium - museum pass 'museumvereniging': ('NH', 'AMS', 'Amsterdam', 'N'), 'oerol festival': ('FR', 'TER', 'Terschelling', 'E'), 'qwen': ('CN', 'HAN', 'Hangzhou', 'C'), # China - AI company, not heritage 'radio en museum': ('NH', 'AMS', 'Amsterdam', 'M'), 'sothebys': ('GB', 'LON', 'London', 'C'), # UK 'sothebys institute of art': ('GB', 'LON', 'London', 'E'), # UK 'nieuwe veste': ('NB', 'BRE', 'Breda', 'E'), } def normalize_name(name: str) -> str: """Normalize organization name for matching.""" import unicodedata normalized = unicodedata.normalize('NFKD', name) normalized = normalized.lower().strip() # Remove punctuation normalized = re.sub(r'[^\w\s]', ' ', normalized) normalized = ' '.join(normalized.split()) return normalized def extract_abbreviation(name: str) -> str: """Extract abbreviation from organization name.""" skip_words = { 'de', 'het', 'een', 'van', 'voor', 'in', 'op', 'te', 'den', 'der', 'des', 'en', 'of', 'the', 'a', 'an', 'and', 'or', 'museum', 'stichting', } name_clean = re.sub(r'[^\w\s]', ' ', name) words = [w for w in name_clean.split() if w.lower() not in skip_words and len(w) > 1] if not words: words = name_clean.split()[:3] if len(words) == 1: abbrev = words[0][:4].upper() else: abbrev = ''.join(w[0] for w in words[:5]).upper() return abbrev if abbrev else 'XXX' def match_known_org(emic_name: str) -> Optional[Tuple[str, str, str, str]]: """Match organization to known database.""" name_lower = normalize_name(emic_name) # Exact match first if name_lower in KNOWN_ORGS: return KNOWN_ORGS[name_lower] # Partial match - check if known org name is contained in emic name for known_name, location in sorted(KNOWN_ORGS.items(), key=lambda x: -len(x[0])): if known_name in name_lower or name_lower in known_name: return location return None def process_pending_file(filepath: Path, dry_run: bool = True) -> Optional[str]: """Process a single PENDING file.""" try: with open(filepath, 'r', encoding='utf-8') as f: data = yaml.safe_load(f) emic_name = data.get('custodian_name', {}).get('emic_name', '') if not emic_name: return None result = match_known_org(emic_name) if not result: return None province, city_code, city_name, inst_type = result abbrev = extract_abbreviation(emic_name) # Handle non-Dutch organizations # All non-NL countries get their country code as the country, with XX as province FOREIGN_COUNTRIES = { 'FR', 'DK', 'IT', 'BE', 'DE', 'GB', 'US', 'AT', 'AU', 'BA', 'ES', 'EU', 'ID', 'IL', 'IN', 'MA', 'NO', 'PT', 'PS', 'ZA', 'CA', 'GR', 'KE', 'SA', 'CH', 'CN' } country = 'NL' if province in FOREIGN_COUNTRIES: country = province province = 'XX' new_ghcid = f"{country}-{province}-{city_code.upper()}-{inst_type}-{abbrev}" new_path = CUSTODIAN_DIR / f"{new_ghcid}.yaml" # Handle collision if new_path.exists() and new_path != filepath: name_slug = re.sub(r'[^\w]+', '-', emic_name.lower()).strip('-')[:30] new_ghcid = f"{new_ghcid}-{name_slug}" new_path = CUSTODIAN_DIR / f"{new_ghcid}.yaml" if dry_run: print(f"[WOULD RESOLVE] {emic_name}") print(f" Location: {city_name} ({country if country != 'NL' else province})") print(f" -> {new_ghcid}.yaml") return 'dry_run' # Update data data['ghcid_current'] = new_ghcid if 'location' not in data: data['location'] = {} data['location']['city'] = city_name data['location']['country'] = country if 'ghcid_resolution' not in data: data['ghcid_resolution'] = {} data['ghcid_resolution']['method'] = 'known_organization_database' data['ghcid_resolution']['resolved_at'] = datetime.now(timezone.utc).isoformat() with open(filepath, 'w', encoding='utf-8') as f: yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False) shutil.move(filepath, new_path) print(f"[RESOLVED] {emic_name}") print(f" -> {new_ghcid}.yaml") return new_ghcid except Exception as e: print(f"[ERROR] {filepath.name}: {e}") return None def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--dry-run', action='store_true') args = parser.parse_args() # Process all PENDING files (not just NL) pending_files = list(CUSTODIAN_DIR.glob("*PENDING*.yaml")) print(f"Processing {len(pending_files)} PENDING files against {len(KNOWN_ORGS)} known organizations...") print() resolved = 0 not_found = 0 for filepath in pending_files: result = process_pending_file(filepath, dry_run=args.dry_run) if result: resolved += 1 else: not_found += 1 print() print(f"{'Would resolve' if args.dry_run else 'Resolved'}: {resolved}") print(f"Not in database: {not_found}") if __name__ == '__main__': main()