glam/scripts/enrich_not_found_with_exa.py

209 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Enrich NDE entries that were not found on Google Maps using Exa search results.
This script manually adds enrichment data from Exa web searches for the 8 entries
that were not found on Google Maps.
"""
import yaml
from pathlib import Path
from datetime import datetime, timezone
# Base path for enriched entries
ENTRIES_DIR = Path("/Users/kempersc/apps/glam/data/nde/enriched/entries")
# Exa-sourced enrichment data for the 8 "not found" entries
EXA_ENRICHMENTS = {
"0032_unknown.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "Stichting Oudheidkamer Zuidwolde",
"description": "Local heritage society founded in 1983, now with over 500 members. Operates the Cultuur-Historisch Museum 'De Wemme' in Zuidwolde. Publishes the journal 'Mandiêlig'.",
"website": "https://www.oudheidkamerzuidwolde.nl/",
"museum_name": "Cultuur-Historisch Museum De Wemme",
"museum_website": "https://dewemme.nl/",
"address": {
"street": "Tolweg 2",
"city": "Zuidwolde",
"postal_code": "7921 JA",
"country": "Netherlands"
},
"contact": {
"email": "info@oudheidkamerzuidwolde.nl",
"phone": "0528-372276"
},
"founded": "1983",
"membership_fee": "€15.00/year",
"notes": "The Oudheidkamer is the heritage society; De Wemme is the museum they operate."
}
},
"0478_Q110907423.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "Heemkundegroep Ulendonc",
"description": "Local heritage group (heemkundekring) in the Netherlands. Part of a network of local heritage societies that focus on collecting, studying, documenting and preserving local history and culture.",
"type": "Heemkundekring (local heritage society)",
"notes": "Limited online presence. Heemkundekringen are volunteer-run organizations focused on local history preservation."
}
},
"0542_Q110907376.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "Museum De Bewogen Jaren 1939-1950",
"description": "WWII museum focusing on the period 1939-1950, including the Dutch East Indies. Features military and civilian history exhibits with artifacts, uniforms, and documents from the war years.",
"website": "https://www.museumdebewogenjaren.nl/",
"address": {
"street": "Floreffestraat 1",
"city": "Hooge Mierde",
"country": "Netherlands"
},
"contact": {
"phone": "+31 013-5093133",
"email": "info@museumdebewogenjaren.nl"
},
"founded": "2005",
"theme": "World War II and Dutch East Indies (1939-1950)",
"current_exhibition": "Het verzet in de Kempen (Resistance in the Kempen region)",
"accessibility": "Wheelchair accessible, all exhibits on ground floor",
"notes": "Originally started as a private collection in the 1970s, became a museum in 2005. Moved to former Rabobank building opposite the church."
}
},
"0643_Q56459403.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "Cultuurhistorisch Museum Texel (Museum Waelstee)",
"description": "Heritage museum chronicling the history of Texel island and its cultural evolution. Features exhibits on maritime heritage, local artifacts, dikes, farming, and traditional crafts.",
"website": "https://www.cultuurmuseumtexel.nl/",
"address": {
"street": "Hogereind 6",
"city": "Den Burg",
"island": "Texel",
"country": "Netherlands"
},
"opening_hours": {
"tuesday_saturday": "10:00 - 17:00",
"sunday_holidays": "13:30 - 17:00",
"monday": "Closed"
},
"admission": {
"adults": "€8.00",
"children_4_12": "€5.00",
"groups_10_plus_adults": "€7.00",
"groups_10_plus_children": "€4.00"
},
"founded": "1991",
"exhibits": ["Maritime heritage", "Dikes and putters", "Fight against water", "Carriage workshop", "Stolp farm", "Horse harnesses", "Cattle farming", "Dairy preparation", "Forge"],
"notes": "Dogs welcome on leash. Also known as Museum Waelstee."
}
},
"0711_Q110671441.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "ModeMuze",
"description": "Online platform and growing network representing 20 Dutch and Flemish museums with unique fashion and jewellery collections. Since 2015, connects diverse fashion collections across time and cultures.",
"website": "https://modemuze.nl/",
"type": "Digital platform / museum network",
"sister_site": "https://sieradenmuze.nl/ (jewelry collections)",
"partner_museums": "20 Dutch and Flemish museums",
"projects": ["Unlocking Fashion Heritage (ULFH)", "Digital fitting mirror"],
"focus": ["Fashion heritage", "Costume collections", "Jewelry collections", "Digitization"],
"notes": "ModeMuze is a digital aggregation platform, not a physical museum. It connects fashion collections from multiple partner museums. The Unlocking Fashion Heritage project explores digital presentation methods including 360-degree photography and photogrammetry."
}
},
"0896_Q110891813.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "Historische Muurreclames Zwolle",
"description": "Organization documenting and preserving historic wall advertisements (muurreclames) in Zwolle, Netherlands. Part of the heritage preservation movement in this historic Hanseatic city.",
"type": "Heritage documentation project",
"location": "Zwolle, Netherlands",
"social_media": "https://www.facebook.com/historischemuurreclameszwolle/",
"notes": "Zwolle is a medieval fortified city with many historical elements. This organization focuses specifically on documenting painted wall advertisements from the past. Limited online presence beyond Facebook."
}
},
"0999_unknown.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "Vincent Erdin",
"description": "Unclear entry - name appears to be a person rather than an organization. May be related to a personal heritage collection or individual contributor to heritage projects.",
"type": "Unknown - possibly personal collection or individual",
"notes": "Limited information available. Search results returned unrelated Vincent van Gogh photography projects. Entry may need manual verification."
}
},
"1299_Q1441483.yaml": {
"exa_enrichment": {
"source": "exa_web_search",
"fetch_timestamp": datetime.now(timezone.utc).isoformat(),
"name_verified": "Rotterdams Tramweg Museum (RTM)",
"description": "Tram museum that operates historic trams from the former Rotterdamse Tramweg Maatschappij. From 1898-1966, RTM provided passenger and freight transport between Rotterdam and the South Holland islands with trams, buses and ferries.",
"website": "https://rtm-ouddorp.nl/",
"address": {
"location": "At the foot of the Brouwersdam",
"city": "Ouddorp",
"region": "Goeree-Overflakkee",
"country": "Netherlands"
},
"founded_as_museum": "1966 (transport company), 1989 (current location)",
"activities": ["Tram rides through dunes of De Punt van Goeree", "Rides over Brouwersdam to Scharendijke", "Cafe on site"],
"collection": "Restored RTM trams from 1898-1966 period",
"dog_policy": "Dogs welcome on leash",
"type": "Transport museum / heritage railway",
"notes": "Operating heritage railway that runs restored trams. Combined museum experience with scenic tram rides through dunes."
}
}
}
def update_entry(filename: str, enrichment_data: dict) -> bool:
"""Update a YAML entry with Exa enrichment data."""
filepath = ENTRIES_DIR / filename
if not filepath.exists():
print(f"ERROR: File not found: {filepath}")
return False
# Read existing YAML
with open(filepath, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
# Add Exa enrichment
data.update(enrichment_data)
data["exa_enrichment_status"] = "SUCCESS"
data["exa_enrichment_timestamp"] = datetime.now(timezone.utc).isoformat()
# Write back
with open(filepath, 'w', encoding='utf-8') as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
print(f"✅ Updated: {filename}")
return True
def main():
print("=" * 60)
print("EXA ENRICHMENT FOR 'NOT FOUND' ENTRIES")
print("=" * 60)
print()
success_count = 0
for filename, enrichment_data in EXA_ENRICHMENTS.items():
if update_entry(filename, enrichment_data):
success_count += 1
print()
print("=" * 60)
print(f"COMPLETE: {success_count}/{len(EXA_ENRICHMENTS)} entries enriched")
print("=" * 60)
if __name__ == "__main__":
main()