From 85d9cee82fff1914bdb35bcf4b79a76ab0aab9b5 Mon Sep 17 00:00:00 2001 From: kempersc Date: Thu, 8 Jan 2026 13:26:53 +0100 Subject: [PATCH] fix: mark 8 more Google Maps false matches detected via name mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additional Type I custodian files with obvious name mismatches between KIEN registry entries and Google Maps results. These couldn't be auto-detected via domain mismatch because they lack official websites. Fixes: - Dick Timmerman (person) → carpentry business - Ria Bos (cigar maker) → money transfer agent - Stichting Kracom (Krampuslauf) → Happy Caps retail - Fed. Nederlandse Vertelorganisaties → NET Foundation - Stichting dodenherdenking Alphen → wrong memorial - Sao Joao Rotterdam → Heemraadsplein (location not org) - sport en spel (heritage) → equipment rental - Eiertikken Ommen → restaurant Also adds detection and fix scripts for Google Maps false matches. --- data/custodian/NL-GE-ARN-I-DT.yaml | 114 +++++------ data/custodian/NL-GE-OOS-I-SS.yaml | 90 ++++----- data/custodian/NL-OV-IJS-I-RB.yaml | 60 +++--- data/custodian/NL-OV-OMM-I-EO.yaml | 105 +++++----- data/custodian/NL-UT-UTR-I-FNV.yaml | 67 +++--- data/custodian/NL-ZH-AAD-I-DA.yaml | 61 +++--- data/custodian/NL-ZH-ROT-I-K.yaml | 48 +++-- data/custodian/NL-ZH-ROT-I-SJR.yaml | 106 +++++----- scripts/detect_gmaps_mismatches.py | 224 +++++++++++++++++++++ scripts/fix_gmaps_false_matches.py | 195 ++++++++++++++++++ scripts/fix_moza_false_matches.py | 154 ++++++++++++++ scripts/fix_name_mismatch_false_matches.py | 132 ++++++++++++ 12 files changed, 1030 insertions(+), 326 deletions(-) create mode 100644 scripts/detect_gmaps_mismatches.py create mode 100644 scripts/fix_gmaps_false_matches.py create mode 100644 scripts/fix_moza_false_matches.py create mode 100644 scripts/fix_name_mismatch_false_matches.py diff --git a/data/custodian/NL-GE-ARN-I-DT.yaml b/data/custodian/NL-GE-ARN-I-DT.yaml index 9c762a96f1..f49631041a 100644 --- a/data/custodian/NL-GE-ARN-I-DT.yaml +++ b/data/custodian/NL-GE-ARN-I-DT.yaml @@ -118,21 +118,29 @@ ghcid: country_code: NL geonames_id: 2759661 google_maps_enrichment: - place_id: ChIJ9w1N5c01xkcRt6VDu57TGfE - name: Timmer & Onderhoudsbedrijf Dik Lubbertsen - formatted_address: Parklaan 14, 3881 CT Putten, Netherlands - fetch_timestamp: '2025-12-06T19:31:45.339848+00:00' - api_status: OK - coordinates: - latitude: 52.2661407 - longitude: 5.6104113 - phone_international: +31 341 358 563 - phone_local: 0341 358 563 - website: http://www.lubbertsen.nl/ - google_place_types: - - point_of_interest - - establishment - business_status: OPERATIONAL + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "Timmer & Onderhoudsbedrijf Dik Lubbertsen" (carpentry business) instead of "Dick + Timmerman" (heritage practitioner (person)). Name mismatch detected during manual review. Per Rule 40: KIEN is authoritative + for Type I intangible heritage custodians.' + original_false_match: + place_id: ChIJ9w1N5c01xkcRt6VDu57TGfE + name: Timmer & Onderhoudsbedrijf Dik Lubbertsen + formatted_address: Parklaan 14, 3881 CT Putten, Netherlands + fetch_timestamp: '2025-12-06T19:31:45.339848+00:00' + api_status: OK + coordinates: + latitude: 52.2661407 + longitude: 5.6104113 + phone_international: +31 341 358 563 + phone_local: 0341 358 563 + website: http://www.lubbertsen.nl/ + google_place_types: + - point_of_interest + - establishment + business_status: OPERATIONAL + correction_timestamp: '2026-01-08T12:24:16Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: status: NOT_FOUND fetch_timestamp: '2025-12-06T19:35:33.940708+00:00' @@ -157,13 +165,11 @@ unesco_enrichment: lon: 5.1475555556 lat: 52.0853333333 url: https://whc.unesco.org/en/list/965 - short_description: The Rietveld Schröder House in Utrecht was commissioned by - Ms Truus Schröder-Schräder, designed by the architect Gerrit Thomas Rietveld, - and built in 1924. This small family house, with its interior, the flexible - spatial arrangement, and the visual and formal qualities, was a manifesto of - the ideals of the De Stijl group of artists and architects in the Netherlands - in the 1920s, and has since been considered one of the icons of the Modern Movement - in architecture. + short_description: The Rietveld Schröder House in Utrecht was commissioned by Ms Truus Schröder-Schräder, designed by + the architect Gerrit Thomas Rietveld, and built in 1924. This small family house, with its interior, the flexible spatial + arrangement, and the visual and formal qualities, was a manifesto of the ideals of the De Stijl group of artists and + architects in the Netherlands in the 1920s, and has since been considered one of the icons of the Modern Movement in + architecture. - unesco_id: '739' uuid: 46eedc7a-a087-55dc-b552-49d7cac966a2 name_en: Schokland and Surroundings @@ -180,12 +186,10 @@ unesco_enrichment: lon: 5.771666667 lat: 52.63861111 url: https://whc.unesco.org/en/list/739 - short_description: Schokland was a peninsula that by the 15th century had become - an island. Occupied and then abandoned as the sea encroached, it had to be evacuated - in 1859. But following the draining of the Zuider Zee, it has, since the 1940s, - formed part of the land reclaimed from the sea. Schokland has vestiges of human - habitation going back to prehistoric times. It symbolizes the heroic, age-old - struggle of the people of the Netherlands against the encroachment of the waters. + short_description: Schokland was a peninsula that by the 15th century had become an island. Occupied and then abandoned + as the sea encroached, it had to be evacuated in 1859. But following the draining of the Zuider Zee, it has, since the + 1940s, formed part of the land reclaimed from the sea. Schokland has vestiges of human habitation going back to prehistoric + times. It symbolizes the heroic, age-old struggle of the people of the Netherlands against the encroachment of the waters. unesco_ich_enrichment: country_code: NL total_elements_in_country: 5 @@ -198,11 +202,9 @@ unesco_ich_enrichment: inscription_year: 2023 multinational: false url: https://ich.unesco.org/en/RL/rotterdam-summer-carnival-01870 - description: The Rotterdam Summer Carnival is a multicultural celebration that - unites participants from the Caribbean and European Netherlands, as well as - ethnic minority groups from Central and South America and Africa living in the - Netherlands. The event includes a street parade, a brass band competition and - a... + description: The Rotterdam Summer Carnival is a multicultural celebration that unites participants from the Caribbean + and European Netherlands, as well as ethnic minority groups from Central and South America and Africa living in the + Netherlands. The event includes a street parade, a brass band competition and a... - unesco_ich_id: '1979' name: 'Traditional irrigation: knowledge, technique, and organization' list_type: RL @@ -210,9 +212,8 @@ unesco_ich_enrichment: inscription_year: 2023 multinational: true url: https://ich.unesco.org/en/RL/traditional-irrigation-knowledge-technique-and-organization-01979 - description: Traditional irrigation uses gravity and hand-made constructions such - as channels and ditches to distribute water from naturally-occurring water catchment - points (such as springs, streams and glaciers) to the fields. Practitioners + description: Traditional irrigation uses gravity and hand-made constructions such as channels and ditches to distribute + water from naturally-occurring water catchment points (such as springs, streams and glaciers) to the fields. Practitioners choose specific days and periods to manually divert the water, and the... - unesco_ich_id: '1707' name: Corso culture, flower and fruit parades in the Netherlands @@ -221,11 +222,9 @@ unesco_ich_enrichment: inscription_year: 2021 multinational: false url: https://ich.unesco.org/en/RL/corso-culture-flower-and-fruit-parades-in-the-netherlands-01707 - description: Dating back to the late nineteenth century, a corso is an annual - parade of floats or boats decorated with flowers, fruit, vegetables and, in - some cases, people in costumes. Originating in the south of France and Italy, - the practice spread to the Netherlands in the nineteenth century. The parade - take... + description: Dating back to the late nineteenth century, a corso is an annual parade of floats or boats decorated with + flowers, fruit, vegetables and, in some cases, people in costumes. Originating in the south of France and Italy, the + practice spread to the Netherlands in the nineteenth century. The parade take... - unesco_ich_id: '1708' name: Falconry, a living human heritage list_type: RL @@ -233,9 +232,8 @@ unesco_ich_enrichment: inscription_year: 2021 multinational: true url: https://ich.unesco.org/en/RL/falconry-a-living-human-heritage-01708 - description: Falconry is the traditional art and practice of training and flying - falcons (and sometimes eagles, hawks, buzzards and other birds of prey). It - has been practised for over 4000 years. The practice of falconry in early and + description: Falconry is the traditional art and practice of training and flying falcons (and sometimes eagles, hawks, + buzzards and other birds of prey). It has been practised for over 4000 years. The practice of falconry in early and medieval periods of history is documented in many parts of the world. Original... - unesco_ich_id: '1265' name: Craft of the miller operating windmills and watermills @@ -244,9 +242,8 @@ unesco_ich_enrichment: inscription_year: 2017 multinational: false url: https://ich.unesco.org/en/RL/craft-of-the-miller-operating-windmills-and-watermills-01265 - description: The craft of the miller operating windmills and watermills involves - the knowledge and skills necessary to operate a mill and maintain it in a good - state of repair. With a declining number of people earning their livelihood + description: The craft of the miller operating windmills and watermills involves the knowledge and skills necessary to + operate a mill and maintain it in a good state of repair. With a declining number of people earning their livelihood from the craft, millers today also play a key role in transmitting the cultur... location: latitude: 52.2661407 @@ -274,9 +271,8 @@ crawl4ai_enrichment: links_count: 9 open_graph: title: Timmer & Onderhoudsbedrijf Dick Lubbertsen - description: U bent op zoek naar een bedrijf voor uw timmer- of onderhoudsklus? - Wij helpen u graag! Dankzij onze brede ervaring kunnen we alle facetten van - een klus voor onze rekening nemen. Hierdoor bespaart u... + description: U bent op zoek naar een bedrijf voor uw timmer- of onderhoudsklus? Wij helpen u graag! Dankzij onze brede + ervaring kunnen we alle facetten van een klus voor onze rekening nemen. Hierdoor bespaart u... image: http://www.lubbertsen.nl/uploads/4/9/5/7/49571465/1435164563.png url: http://www.lubbertsen.nl/ site_name: Timmer & Onderhoudsbedrijf Dick Lubbertsen @@ -304,16 +300,14 @@ timespan: events: - date: '2007' type: founding - description: is opgericht onder auspiciën van de gemeente Renkum en in samenwerking - met lokale instanties + description: is opgericht onder auspiciën van de gemeente Renkum en in samenwerking met lokale instanties timeline_enrichment: timeline_events: - event_type: FOUNDING event_date: '2007' date_precision: year approximate: false - description: is opgericht onder auspiciën van de gemeente Renkum en in samenwerking - met lokale instanties + description: is opgericht onder auspiciën van de gemeente Renkum en in samenwerking met lokale instanties source_urls: - https://www.timmerman-nu.nl/arnhem/ - https://www.werkspot.nl/timmerwerken/timmerman-vakmannen/arnhem @@ -321,13 +315,11 @@ timeline_enrichment: - https://sb-bouw.com/timmerman-arnhem/ - https://www.klusup.nl/timmerman/arnhem linkup_query: '"Dick Timmerman" Arnhem opgericht OR gesticht OR sinds' - linkup_answer: Dick Timmerman uit Arnhem is betrokken bij een groep die is opgericht - rond het zoeken naar vermiste militairen van de Slag om Arnhem. Deze groep, - bestaande uit Hans Timmerman, zijn broer Dick en David van Buggenum, is opgericht - onder auspiciën van de gemeente Renkum en in samenwerking met lokale instanties. - De oprichting van deze groep vond plaats vóór 2007, aangezien er in 2007 en - 2008 al zoekacties werden uitgevoerd. Er is geen specifieke oprichtingsdatum - van een timmerbedrijf of ander bedrijf van Dick Timmerman in Arnhem vermeld. + linkup_answer: Dick Timmerman uit Arnhem is betrokken bij een groep die is opgericht rond het zoeken naar vermiste militairen + van de Slag om Arnhem. Deze groep, bestaande uit Hans Timmerman, zijn broer Dick en David van Buggenum, is opgericht + onder auspiciën van de gemeente Renkum en in samenwerking met lokale instanties. De oprichting van deze groep vond plaats + vóór 2007, aangezien er in 2007 en 2008 al zoekacties werden uitgevoerd. Er is geen specifieke oprichtingsdatum van + een timmerbedrijf of ander bedrijf van Dick Timmerman in Arnhem vermeld. fetch_timestamp: '2025-12-15T17:21:48.150538+00:00' archive_path: web/1703/linkup/linkup_founding_20251215T172148Z.json extraction_method: linkup_answer_regex diff --git a/data/custodian/NL-GE-OOS-I-SS.yaml b/data/custodian/NL-GE-OOS-I-SS.yaml index 33db4cab55..740dbb6729 100644 --- a/data/custodian/NL-GE-OOS-I-SS.yaml +++ b/data/custodian/NL-GE-OOS-I-SS.yaml @@ -120,21 +120,29 @@ ghcid: distance_km: 0.0 geonames_id: 2749503 google_maps_enrichment: - place_id: ChIJc3ARj3DHx0cRCYlrOyxPl4U - name: Damu Sport en Spel Verhuur - formatted_address: Kanaal Noord 145, 7317 AB Apeldoorn, Netherlands - fetch_timestamp: '2025-12-06T19:32:03.990098+00:00' - api_status: OK - coordinates: - latitude: 52.221916 - longitude: 5.972760200000001 - phone_international: +31 55 542 3300 - phone_local: 055 542 3300 - website: http://www.damu-verhuur.nl/ - google_place_types: - - point_of_interest - - establishment - business_status: OPERATIONAL + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "Damu Sport en Spel Verhuur" (sports equipment rental business) instead of "sport + en spel" (traditional games heritage organization). Name mismatch detected during manual review. Per Rule 40: KIEN is + authoritative for Type I intangible heritage custodians.' + original_false_match: + place_id: ChIJc3ARj3DHx0cRCYlrOyxPl4U + name: Damu Sport en Spel Verhuur + formatted_address: Kanaal Noord 145, 7317 AB Apeldoorn, Netherlands + fetch_timestamp: '2025-12-06T19:32:03.990098+00:00' + api_status: OK + coordinates: + latitude: 52.221916 + longitude: 5.972760200000001 + phone_international: +31 55 542 3300 + phone_local: 055 542 3300 + website: http://www.damu-verhuur.nl/ + google_place_types: + - point_of_interest + - establishment + business_status: OPERATIONAL + correction_timestamp: '2026-01-08T12:24:17Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: wikidata_entity_id: Q131420360 api_metadata: @@ -166,8 +174,7 @@ wikidata_enrichment: instance_of: &id001 - id: Q59861107 label: temporary art exhibition - description: organized presentation and display of a selection of works of art, - with an expected end date + description: organized presentation and display of a selection of works of art, with an expected end date wikidata_instance_of: *id001 wikidata_location: location: @@ -194,12 +201,10 @@ unesco_enrichment: lon: 5.771666667 lat: 52.63861111 url: https://whc.unesco.org/en/list/739 - short_description: Schokland was a peninsula that by the 15th century had become - an island. Occupied and then abandoned as the sea encroached, it had to be evacuated - in 1859. But following the draining of the Zuider Zee, it has, since the 1940s, - formed part of the land reclaimed from the sea. Schokland has vestiges of human - habitation going back to prehistoric times. It symbolizes the heroic, age-old - struggle of the people of the Netherlands against the encroachment of the waters. + short_description: Schokland was a peninsula that by the 15th century had become an island. Occupied and then abandoned + as the sea encroached, it had to be evacuated in 1859. But following the draining of the Zuider Zee, it has, since the + 1940s, formed part of the land reclaimed from the sea. Schokland has vestiges of human habitation going back to prehistoric + times. It symbolizes the heroic, age-old struggle of the people of the Netherlands against the encroachment of the waters. unesco_ich_enrichment: country_code: NL total_elements_in_country: 5 @@ -212,11 +217,9 @@ unesco_ich_enrichment: inscription_year: 2023 multinational: false url: https://ich.unesco.org/en/RL/rotterdam-summer-carnival-01870 - description: The Rotterdam Summer Carnival is a multicultural celebration that - unites participants from the Caribbean and European Netherlands, as well as - ethnic minority groups from Central and South America and Africa living in the - Netherlands. The event includes a street parade, a brass band competition and - a... + description: The Rotterdam Summer Carnival is a multicultural celebration that unites participants from the Caribbean + and European Netherlands, as well as ethnic minority groups from Central and South America and Africa living in the + Netherlands. The event includes a street parade, a brass band competition and a... - unesco_ich_id: '1979' name: 'Traditional irrigation: knowledge, technique, and organization' list_type: RL @@ -224,9 +227,8 @@ unesco_ich_enrichment: inscription_year: 2023 multinational: true url: https://ich.unesco.org/en/RL/traditional-irrigation-knowledge-technique-and-organization-01979 - description: Traditional irrigation uses gravity and hand-made constructions such - as channels and ditches to distribute water from naturally-occurring water catchment - points (such as springs, streams and glaciers) to the fields. Practitioners + description: Traditional irrigation uses gravity and hand-made constructions such as channels and ditches to distribute + water from naturally-occurring water catchment points (such as springs, streams and glaciers) to the fields. Practitioners choose specific days and periods to manually divert the water, and the... - unesco_ich_id: '1707' name: Corso culture, flower and fruit parades in the Netherlands @@ -235,11 +237,9 @@ unesco_ich_enrichment: inscription_year: 2021 multinational: false url: https://ich.unesco.org/en/RL/corso-culture-flower-and-fruit-parades-in-the-netherlands-01707 - description: Dating back to the late nineteenth century, a corso is an annual - parade of floats or boats decorated with flowers, fruit, vegetables and, in - some cases, people in costumes. Originating in the south of France and Italy, - the practice spread to the Netherlands in the nineteenth century. The parade - take... + description: Dating back to the late nineteenth century, a corso is an annual parade of floats or boats decorated with + flowers, fruit, vegetables and, in some cases, people in costumes. Originating in the south of France and Italy, the + practice spread to the Netherlands in the nineteenth century. The parade take... - unesco_ich_id: '1708' name: Falconry, a living human heritage list_type: RL @@ -247,9 +247,8 @@ unesco_ich_enrichment: inscription_year: 2021 multinational: true url: https://ich.unesco.org/en/RL/falconry-a-living-human-heritage-01708 - description: Falconry is the traditional art and practice of training and flying - falcons (and sometimes eagles, hawks, buzzards and other birds of prey). It - has been practised for over 4000 years. The practice of falconry in early and + description: Falconry is the traditional art and practice of training and flying falcons (and sometimes eagles, hawks, + buzzards and other birds of prey). It has been practised for over 4000 years. The practice of falconry in early and medieval periods of history is documented in many parts of the world. Original... - unesco_ich_id: '1265' name: Craft of the miller operating windmills and watermills @@ -258,9 +257,8 @@ unesco_ich_enrichment: inscription_year: 2017 multinational: false url: https://ich.unesco.org/en/RL/craft-of-the-miller-operating-windmills-and-watermills-01265 - description: The craft of the miller operating windmills and watermills involves - the knowledge and skills necessary to operate a mill and maintain it in a good - state of repair. With a declining number of people earning their livelihood + description: The craft of the miller operating windmills and watermills involves the knowledge and skills necessary to + operate a mill and maintain it in a good state of repair. With a declining number of people earning their livelihood from the craft, millers today also play a key role in transmitting the cultur... location: latitude: 52.221916 @@ -295,8 +293,8 @@ digital_platform_v2: platform_name: 'Sport- en spelmaterialen: bekijk ons complete assortiment Website' platform_url: https://tebe.nl/productgroep/sport-en-spel/ platform_type: DISCOVERY_PORTAL - description: Sport- en spelactiviteiten organiseren? TéBé Evenementen verhuurt - sport- en spelmaterialen. Bekijk of reserveer hier. + description: Sport- en spelactiviteiten organiseren? TéBé Evenementen verhuurt sport- en spelmaterialen. Bekijk of reserveer + hier. language: nl og_image: https://tebe.nl/wp-content/uploads/2016/09/Springkussen-Clown-huren-TeBe-2-250x188.jpg favicon: https://tebe.nl/wp-content/themes/tebe/img/icons/favicon.ico @@ -324,8 +322,8 @@ logo_enrichment: - claim_type: logo_url claim_value: https://tebe.nl/wp-content/themes/tebe/img/logo-tebe-evenementen-service-en-partyverhuur-apeldoorn.png source_url: http://www.damu-verhuur.nl - css_selector: '[document] > html.js.flexbox > body.home.wp-singular > div.wrapper:nth-of-type(3) - > header > div.mobile-logo > a > img' + css_selector: '[document] > html.js.flexbox > body.home.wp-singular > div.wrapper:nth-of-type(3) > header > div.mobile-logo + > a > img' retrieved_on: '2025-12-21T20:54:33.883595+00:00' extraction_method: crawl4ai_header_logo detection_confidence: high diff --git a/data/custodian/NL-OV-IJS-I-RB.yaml b/data/custodian/NL-OV-IJS-I-RB.yaml index 7d2fe465f9..8e02e8c12e 100644 --- a/data/custodian/NL-OV-IJS-I-RB.yaml +++ b/data/custodian/NL-OV-IJS-I-RB.yaml @@ -120,25 +120,33 @@ ghcid: distance_km: 0.0 geonames_id: 2753358 google_maps_enrichment: - place_id: ChIJ_ZtDzyDHx0cRIw-S4I8eCik - name: Ria Money Transfer Agent - formatted_address: RepairPhone, Asselsestraat 26, 7311 EM Apeldoorn, Netherlands - fetch_timestamp: '2025-12-06T19:32:56.963775+00:00' - api_status: OK - coordinates: - latitude: 52.214168799999996 - longitude: 5.958730999999999 - phone_international: +31 55 785 0896 - phone_local: 055 785 0896 - website: https://www.riamoneytransfer.com/en-nl/ - google_place_types: - - finance - - point_of_interest - - establishment - business_status: OPERATIONAL - rating: 2.8 - user_rating_count: 6 - photo_count: 1 + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "Ria Money Transfer Agent" (money transfer business) instead of "Ria Bos" (traditional + cigar maker (person)). Name mismatch detected during manual review. Per Rule 40: KIEN is authoritative for Type I intangible + heritage custodians.' + original_false_match: + place_id: ChIJ_ZtDzyDHx0cRIw-S4I8eCik + name: Ria Money Transfer Agent + formatted_address: RepairPhone, Asselsestraat 26, 7311 EM Apeldoorn, Netherlands + fetch_timestamp: '2025-12-06T19:32:56.963775+00:00' + api_status: OK + coordinates: + latitude: 52.214168799999996 + longitude: 5.958730999999999 + phone_international: +31 55 785 0896 + phone_local: 055 785 0896 + website: https://www.riamoneytransfer.com/en-nl/ + google_place_types: + - finance + - point_of_interest + - establishment + business_status: OPERATIONAL + rating: 2.8 + user_rating_count: 6 + photo_count: 1 + correction_timestamp: '2026-01-08T12:24:16Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: wikidata_entity_id: Q97774077 api_metadata: @@ -214,8 +222,7 @@ wikidata_enrichment: instance_of: &id001 - id: Q5 label: human - description: any single member of Homo sapiens, unique extant species of the - genus Homo + description: any single member of Homo sapiens, unique extant species of the genus Homo wikidata_instance_of: *id001 location: latitude: 52.214168799999996 @@ -247,15 +254,12 @@ digital_platform_v2: source_status_code: 200 primary_platform: platform_id: primary_website_riamoneytransfer_com - platform_name: Send Money Online Quickly & Securely to Over 190 Countries | Ria - Money Transfer - Netherlands Website + platform_name: Send Money Online Quickly & Securely to Over 190 Countries | Ria Money Transfer - Netherlands Website platform_url: https://www.riamoneytransfer.com/en-nl/ platform_type: INSTITUTIONAL_WEBSITE - description: Send money online from your mobile device in Netherlands with the - Ria Money Transfer app. Safely and quickly remit money to 190+ countries in - our network, with over 600,000 partner locations. With receiving options including - cash pickup, bank deposit, mobile wallets, and home delivery, we make receiving - money simple. + description: Send money online from your mobile device in Netherlands with the Ria Money Transfer app. Safely and quickly + remit money to 190+ countries in our network, with over 600,000 partner locations. With receiving options including + cash pickup, bank deposit, mobile wallets, and home delivery, we make receiving money simple. language: en og_image: https://www.riamoneytransfer.com/meta-image.png favicon: https://www.riamoneytransfer.com/favicon.ico diff --git a/data/custodian/NL-OV-OMM-I-EO.yaml b/data/custodian/NL-OV-OMM-I-EO.yaml index 9a2c957bf5..ea08b178c0 100644 --- a/data/custodian/NL-OV-OMM-I-EO.yaml +++ b/data/custodian/NL-OV-OMM-I-EO.yaml @@ -103,8 +103,7 @@ ghcid: - ghcid: NL-OV-OMM-I-EO ghcid_numeric: 12419799121087638065 valid_from: '2025-12-14T21:40:38.654382+00:00' - reason: 'Type corrected: intangible_heritage_custodian should use type I (Intangible - Heritage), not U (Unknown)' + reason: 'Type corrected: intangible_heritage_custodian should use type I (Intangible Heritage), not U (Unknown)' - ghcid: NL-OV-OMM-U-EO ghcid_numeric: 14519261499902116425 valid_from: '2025-12-06T23:52:48.611395+00:00' @@ -129,31 +128,39 @@ ghcid: resolution_date: '2025-12-06T23:52:48.611395+00:00' geonames_id: 2759661 google_maps_enrichment: - place_id: ChIJmzW-m0_9x0cReZydPligYBI - name: Restaurant Ekkelenkamp Ommen - formatted_address: Stationsweg 1, 7731 AW Ommen, Netherlands - fetch_timestamp: '2025-12-06T19:31:45.957934+00:00' - api_status: OK - coordinates: - latitude: 52.515932 - longitude: 6.4226101 - phone_international: +31 529 451 782 - phone_local: 0529 451 782 - website: http://www.ekkelenkamp-ommen.nl/ - google_place_types: - - restaurant - - ice_cream_shop - - dessert_shop - - confectionery - - food_store - - food - - point_of_interest - - store - - establishment - business_status: OPERATIONAL - rating: 4.5 - user_rating_count: 2482 - photo_count: 10 + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "Restaurant Ekkelenkamp Ommen" (restaurant) instead of "Eiertikken Ommen" (traditional + egg-tapping game heritage). Name mismatch detected during manual review. Per Rule 40: KIEN is authoritative for Type I + intangible heritage custodians.' + original_false_match: + place_id: ChIJmzW-m0_9x0cReZydPligYBI + name: Restaurant Ekkelenkamp Ommen + formatted_address: Stationsweg 1, 7731 AW Ommen, Netherlands + fetch_timestamp: '2025-12-06T19:31:45.957934+00:00' + api_status: OK + coordinates: + latitude: 52.515932 + longitude: 6.4226101 + phone_international: +31 529 451 782 + phone_local: 0529 451 782 + website: http://www.ekkelenkamp-ommen.nl/ + google_place_types: + - restaurant + - ice_cream_shop + - dessert_shop + - confectionery + - food_store + - food + - point_of_interest + - store + - establishment + business_status: OPERATIONAL + rating: 4.5 + user_rating_count: 2482 + photo_count: 10 + correction_timestamp: '2026-01-08T12:24:17Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: status: NOT_FOUND fetch_timestamp: '2025-12-06T19:35:34.794723+00:00' @@ -178,12 +185,10 @@ unesco_enrichment: lon: 5.771666667 lat: 52.63861111 url: https://whc.unesco.org/en/list/739 - short_description: Schokland was a peninsula that by the 15th century had become - an island. Occupied and then abandoned as the sea encroached, it had to be evacuated - in 1859. But following the draining of the Zuider Zee, it has, since the 1940s, - formed part of the land reclaimed from the sea. Schokland has vestiges of human - habitation going back to prehistoric times. It symbolizes the heroic, age-old - struggle of the people of the Netherlands against the encroachment of the waters. + short_description: Schokland was a peninsula that by the 15th century had become an island. Occupied and then abandoned + as the sea encroached, it had to be evacuated in 1859. But following the draining of the Zuider Zee, it has, since the + 1940s, formed part of the land reclaimed from the sea. Schokland has vestiges of human habitation going back to prehistoric + times. It symbolizes the heroic, age-old struggle of the people of the Netherlands against the encroachment of the waters. unesco_ich_enrichment: country_code: NL total_elements_in_country: 5 @@ -196,11 +201,9 @@ unesco_ich_enrichment: inscription_year: 2023 multinational: false url: https://ich.unesco.org/en/RL/rotterdam-summer-carnival-01870 - description: The Rotterdam Summer Carnival is a multicultural celebration that - unites participants from the Caribbean and European Netherlands, as well as - ethnic minority groups from Central and South America and Africa living in the - Netherlands. The event includes a street parade, a brass band competition and - a... + description: The Rotterdam Summer Carnival is a multicultural celebration that unites participants from the Caribbean + and European Netherlands, as well as ethnic minority groups from Central and South America and Africa living in the + Netherlands. The event includes a street parade, a brass band competition and a... - unesco_ich_id: '1979' name: 'Traditional irrigation: knowledge, technique, and organization' list_type: RL @@ -208,9 +211,8 @@ unesco_ich_enrichment: inscription_year: 2023 multinational: true url: https://ich.unesco.org/en/RL/traditional-irrigation-knowledge-technique-and-organization-01979 - description: Traditional irrigation uses gravity and hand-made constructions such - as channels and ditches to distribute water from naturally-occurring water catchment - points (such as springs, streams and glaciers) to the fields. Practitioners + description: Traditional irrigation uses gravity and hand-made constructions such as channels and ditches to distribute + water from naturally-occurring water catchment points (such as springs, streams and glaciers) to the fields. Practitioners choose specific days and periods to manually divert the water, and the... - unesco_ich_id: '1707' name: Corso culture, flower and fruit parades in the Netherlands @@ -219,11 +221,9 @@ unesco_ich_enrichment: inscription_year: 2021 multinational: false url: https://ich.unesco.org/en/RL/corso-culture-flower-and-fruit-parades-in-the-netherlands-01707 - description: Dating back to the late nineteenth century, a corso is an annual - parade of floats or boats decorated with flowers, fruit, vegetables and, in - some cases, people in costumes. Originating in the south of France and Italy, - the practice spread to the Netherlands in the nineteenth century. The parade - take... + description: Dating back to the late nineteenth century, a corso is an annual parade of floats or boats decorated with + flowers, fruit, vegetables and, in some cases, people in costumes. Originating in the south of France and Italy, the + practice spread to the Netherlands in the nineteenth century. The parade take... - unesco_ich_id: '1708' name: Falconry, a living human heritage list_type: RL @@ -231,9 +231,8 @@ unesco_ich_enrichment: inscription_year: 2021 multinational: true url: https://ich.unesco.org/en/RL/falconry-a-living-human-heritage-01708 - description: Falconry is the traditional art and practice of training and flying - falcons (and sometimes eagles, hawks, buzzards and other birds of prey). It - has been practised for over 4000 years. The practice of falconry in early and + description: Falconry is the traditional art and practice of training and flying falcons (and sometimes eagles, hawks, + buzzards and other birds of prey). It has been practised for over 4000 years. The practice of falconry in early and medieval periods of history is documented in many parts of the world. Original... - unesco_ich_id: '1265' name: Craft of the miller operating windmills and watermills @@ -242,9 +241,8 @@ unesco_ich_enrichment: inscription_year: 2017 multinational: false url: https://ich.unesco.org/en/RL/craft-of-the-miller-operating-windmills-and-watermills-01265 - description: The craft of the miller operating windmills and watermills involves - the knowledge and skills necessary to operate a mill and maintain it in a good - state of repair. With a declining number of people earning their livelihood + description: The craft of the miller operating windmills and watermills involves the knowledge and skills necessary to + operate a mill and maintain it in a good state of repair. With a declining number of people earning their livelihood from the craft, millers today also play a key role in transmitting the cultur... location: latitude: 52.515932 @@ -279,8 +277,7 @@ digital_platform_v2: platform_name: Restaurant Ekkelenkamp Ommen | Restaurant en IJssalon Website platform_url: https://www.ekkelenkamp-ommen.nl/ platform_type: INSTITUTIONAL_WEBSITE - description: Kom genieten van lunch, diner, heerlijke koffie en natuurlijk ons - welbekende ijs! + description: Kom genieten van lunch, diner, heerlijke koffie en natuurlijk ons welbekende ijs! language: nl og_image: null favicon: https://www.ekkelenkamp-ommen.nl/wp-content/uploads/2025/07/favicon-300x300.png diff --git a/data/custodian/NL-UT-UTR-I-FNV.yaml b/data/custodian/NL-UT-UTR-I-FNV.yaml index 084b6b4172..4f7de45113 100644 --- a/data/custodian/NL-UT-UTR-I-FNV.yaml +++ b/data/custodian/NL-UT-UTR-I-FNV.yaml @@ -107,24 +107,32 @@ ghcid: distance_km: 0.07428445760936703 geonames_id: 2745912 google_maps_enrichment: - place_id: ChIJD_OoDEK4x0cRJVl5FEdQLyM - name: NET Foundation - formatted_address: Laan van Westenenk 12, 7336 AZ Apeldoorn, Netherlands - fetch_timestamp: '2025-12-06T19:33:10.001718+00:00' - api_status: OK - coordinates: - latitude: 52.190509299999995 - longitude: 5.9380245 - phone_international: +31 55 200 0236 - phone_local: 055 200 0236 - website: http://netfoundation.nl/ - google_place_types: - - point_of_interest - - establishment - business_status: OPERATIONAL - rating: 4.8 - user_rating_count: 4 - photo_count: 2 + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "NET Foundation" (different foundation) instead of "Federatie Nederlandse Vertelorganisaties" + (Dutch storytelling federation). Name mismatch detected during manual review. Per Rule 40: KIEN is authoritative for Type + I intangible heritage custodians.' + original_false_match: + place_id: ChIJD_OoDEK4x0cRJVl5FEdQLyM + name: NET Foundation + formatted_address: Laan van Westenenk 12, 7336 AZ Apeldoorn, Netherlands + fetch_timestamp: '2025-12-06T19:33:10.001718+00:00' + api_status: OK + coordinates: + latitude: 52.190509299999995 + longitude: 5.9380245 + phone_international: +31 55 200 0236 + phone_local: 055 200 0236 + website: http://netfoundation.nl/ + google_place_types: + - point_of_interest + - establishment + business_status: OPERATIONAL + rating: 4.8 + user_rating_count: 4 + photo_count: 2 + correction_timestamp: '2026-01-08T12:24:17Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: status: NOT_FOUND fetch_timestamp: '2025-12-06T19:41:17.465971+00:00' @@ -165,15 +173,13 @@ digital_platform_v2: founding_context: Founded at Vertel Event 2019 in Culemborg data_quality_notes: google_maps_status: CONFLATED - google_maps_issue: 'Google Maps returned "NET Foundation" in Apeldoorn (place_id: - ChIJD_OoDEK4x0cRJVl5FEdQLyM), which is a completely different organization. - The correct address is Biltstraat 166, 3572 BP Utrecht, as confirmed by KIEN + google_maps_issue: 'Google Maps returned "NET Foundation" in Apeldoorn (place_id: ChIJD_OoDEK4x0cRJVl5FEdQLyM), which + is a completely different organization. The correct address is Biltstraat 166, 3572 BP Utrecht, as confirmed by KIEN registry. Coordinates in location block are WRONG.' wikidata_status: NOT_FOUND correct_address: Biltstraat 166, 3572 BP Utrecht - coordinate_warning: The location.latitude/longitude values (52.190509, 5.938024) - point to Apeldoorn, not Utrecht. These coordinates are from the conflated Google - Maps result and should NOT be used for mapping this organization. + coordinate_warning: The location.latitude/longitude values (52.190509, 5.938024) point to Apeldoorn, not Utrecht. These + coordinates are from the conflated Google Maps result and should NOT be used for mapping this organization. intangible_heritage: heritage_form_name: Verhalen Vertellen heritage_form_name_en: Storytelling @@ -190,11 +196,9 @@ digital_platform_v2: organization_profile: organization_type: federation scope: national - description: The Federatie Nederlandse Vertelorganisaties (Federation of Dutch - Storytelling Organizations) is an umbrella organization uniting major Dutch - storytelling organizations. Founded in 2019, it aims to contribute to the emancipation - of storytelling (verhalen vertellen), both artistically and as applied art, - within Dutch society. + description: The Federatie Nederlandse Vertelorganisaties (Federation of Dutch Storytelling Organizations) is an umbrella + organization uniting major Dutch storytelling organizations. Founded in 2019, it aims to contribute to the emancipation + of storytelling (verhalen vertellen), both artistically and as applied art, within Dutch society. mission_goals: - Recognition of storytelling as a theater genre - Lobbying cultural funds and Ministry of OC&W @@ -259,9 +263,8 @@ digital_platform_v2: provenance: research_date: '2025-12-15T16:30:00+00:00' research_agent: claude-opus-4 - research_notes: Research confirmed Google Maps conflation issue - returned completely - wrong organization (NET Foundation in Apeldoorn). Correct data sourced from - KIEN registry and organization website. Federation founded 2019, unites 8 major + research_notes: Research confirmed Google Maps conflation issue - returned completely wrong organization (NET Foundation + in Apeldoorn). Correct data sourced from KIEN registry and organization website. Federation founded 2019, unites 8 major Dutch storytelling organizations. data_sources: - source_type: institutional_registry diff --git a/data/custodian/NL-ZH-AAD-I-DA.yaml b/data/custodian/NL-ZH-AAD-I-DA.yaml index a904148d45..e08e20ab6b 100644 --- a/data/custodian/NL-ZH-AAD-I-DA.yaml +++ b/data/custodian/NL-ZH-AAD-I-DA.yaml @@ -28,8 +28,7 @@ provenance: linkup_timespan: - source_type: linkup_web_search fetch_timestamp: '2025-12-15T22:09:47.438623+00:00' - search_query: '"Dodenherdenking Alphen" Alphen aan den Rijn opgericht OR gesticht - OR sinds' + search_query: '"Dodenherdenking Alphen" Alphen aan den Rijn opgericht OR gesticht OR sinds' source_urls: - https://www.alphens.nl/nieuws/dodenherdenking-2025-in-alphen-aan-den-rijn.html - https://www.alphens.nl/nieuws/dodenherdenking-2024-in-alphen-aan-den-rijn.html @@ -50,8 +49,7 @@ provenance: - Entry created from KIEN (Kenniscentrum Immaterieel Erfgoed Nederland) registry - Intangible heritage custodian organization - 'safeguards slot added 2025-12-05T09:07:10.732807+00:00: linked to 1 IntangibleHeritageForm(s)' - - Location extracted from organization name 'Stichting dodenherdenking Alphen' - - matched place 'Alphen aan den Rijn' (NAME_EXTRACTION_GEONAMES) + - Location extracted from organization name 'Stichting dodenherdenking Alphen' - matched place 'Alphen aan den Rijn' (NAME_EXTRACTION_GEONAMES) - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:19:43Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T07:10:52Z kien_enrichment: @@ -137,23 +135,31 @@ ghcid: distance_km: 0.0 geonames_id: 2759875 google_maps_enrichment: - place_id: ChIJrZf_p9m5xUcRgJU1bZQIuQw - name: Waalsdorpervlakte Bourdon Bell - formatted_address: 2244 AJ Wassenaar, Netherlands - fetch_timestamp: '2025-12-06T19:33:17.179987+00:00' - api_status: OK - coordinates: - latitude: 52.115082099999995 - longitude: 4.3364664 - website: https://www.erepeloton.nl/ - google_place_types: - - monument - - point_of_interest - - establishment - business_status: OPERATIONAL - rating: 4.6 - user_rating_count: 628 - photo_count: 10 + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "Waalsdorpervlakte Bourdon Bell" (different memorial in different location) instead + of "Stichting dodenherdenking Alphen" (memorial foundation Alphen aan den Rijn). Name mismatch detected during manual + review. Per Rule 40: KIEN is authoritative for Type I intangible heritage custodians.' + original_false_match: + place_id: ChIJrZf_p9m5xUcRgJU1bZQIuQw + name: Waalsdorpervlakte Bourdon Bell + formatted_address: 2244 AJ Wassenaar, Netherlands + fetch_timestamp: '2025-12-06T19:33:17.179987+00:00' + api_status: OK + coordinates: + latitude: 52.115082099999995 + longitude: 4.3364664 + website: https://www.erepeloton.nl/ + google_place_types: + - monument + - point_of_interest + - establishment + business_status: OPERATIONAL + rating: 4.6 + user_rating_count: 628 + photo_count: 10 + correction_timestamp: '2026-01-08T12:24:17Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: status: NOT_FOUND fetch_timestamp: '2025-12-06T19:41:47.754154+00:00' @@ -191,10 +197,9 @@ digital_platform_v2: platform_name: Home - Erepeloton Waalsdorp Website platform_url: https://erepeloton.nl/ platform_type: DISCOVERY_PORTAL - description: 'Helpt u mee?Het Erepeloton Waalsdorp heeft tot doel jaarlijks een - herdenking te verzorgen die de nagedachtenis aan de slachtoffers waardig is. - Als u deze doelstelling een warm hart toedraagt en u wilt bijdragen aan het - voortbestaan van deze herdenking, kunt u een donatie doen. Doneren + description: 'Helpt u mee?Het Erepeloton Waalsdorp heeft tot doel jaarlijks een herdenking te verzorgen die de nagedachtenis + aan de slachtoffers waardig is. Als u deze doelstelling een warm hart toedraagt en u wilt bijdragen aan het voortbestaan + van deze herdenking, kunt u een donatie doen. Doneren Updates over Erepeloton Waalsdorp? Volg onze' language: nl @@ -218,8 +223,7 @@ logo_enrichment: - claim_type: favicon_url claim_value: https://erepeloton.nl/wp-content/uploads/2021/04/cropped-Logo-180x180.jpg source_url: https://www.erepeloton.nl - css_selector: '[document] > html.avada-html-layout-wide.avada-html-header-position-top - > head > link:nth-of-type(13)' + css_selector: '[document] > html.avada-html-layout-wide.avada-html-header-position-top > head > link:nth-of-type(13)' retrieved_on: '2025-12-22T12:01:13.310940+00:00' extraction_method: crawl4ai_link_rel favicon_type: '' @@ -227,8 +231,7 @@ logo_enrichment: - claim_type: og_image_url claim_value: https://erepeloton.nl/wp-content/uploads/2021/04/Logo.jpg source_url: https://www.erepeloton.nl - css_selector: '[document] > html.avada-html-layout-wide.avada-html-header-position-top - > head > meta:nth-of-type(13)' + css_selector: '[document] > html.avada-html-layout-wide.avada-html-header-position-top > head > meta:nth-of-type(13)' retrieved_on: '2025-12-22T12:01:13.310940+00:00' extraction_method: crawl4ai_meta_og summary: diff --git a/data/custodian/NL-ZH-ROT-I-K.yaml b/data/custodian/NL-ZH-ROT-I-K.yaml index 56552964c8..ed3ce87448 100644 --- a/data/custodian/NL-ZH-ROT-I-K.yaml +++ b/data/custodian/NL-ZH-ROT-I-K.yaml @@ -124,26 +124,34 @@ ghcid: distance_km: 0.0 geonames_id: 2747891 google_maps_enrichment: - place_id: ChIJJRL3HLFQzWMRfIKrnP4qQoU - name: Happy Caps - formatted_address: Arnhemseweg 55A, 7331 BB Apeldoorn, Netherlands - fetch_timestamp: '2025-12-06T19:33:21.984508+00:00' - api_status: OK - coordinates: - latitude: 52.205657599999995 - longitude: 5.9628179 - phone_international: +31 85 401 6820 - phone_local: 085 401 6820 - website: https://happy-caps.com/ - google_place_types: - - health - - point_of_interest - - store - - establishment - business_status: OPERATIONAL - rating: 5 - user_rating_count: 44 - photo_count: 6 + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "Happy Caps" (retail store in different city (Apeldoorn)) instead of "Stichting + Kracom" (Krampuslauf Rotterdam heritage foundation). Name mismatch detected during manual review. Per Rule 40: KIEN is + authoritative for Type I intangible heritage custodians.' + original_false_match: + place_id: ChIJJRL3HLFQzWMRfIKrnP4qQoU + name: Happy Caps + formatted_address: Arnhemseweg 55A, 7331 BB Apeldoorn, Netherlands + fetch_timestamp: '2025-12-06T19:33:21.984508+00:00' + api_status: OK + coordinates: + latitude: 52.205657599999995 + longitude: 5.9628179 + phone_international: +31 85 401 6820 + phone_local: 085 401 6820 + website: https://happy-caps.com/ + google_place_types: + - health + - point_of_interest + - store + - establishment + business_status: OPERATIONAL + rating: 5 + user_rating_count: 44 + photo_count: 6 + correction_timestamp: '2026-01-08T12:24:16Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: status: NOT_FOUND fetch_timestamp: '2025-12-06T19:42:22.510788+00:00' diff --git a/data/custodian/NL-ZH-ROT-I-SJR.yaml b/data/custodian/NL-ZH-ROT-I-SJR.yaml index e62a0869e6..4750a73001 100644 --- a/data/custodian/NL-ZH-ROT-I-SJR.yaml +++ b/data/custodian/NL-ZH-ROT-I-SJR.yaml @@ -48,8 +48,7 @@ provenance: notes: - Entry created from KIEN (Kenniscentrum Immaterieel Erfgoed Nederland) registry - Intangible heritage custodian organization - - Location extracted from organization name 'Sao Joao Rotterdam' - matched place - 'Rotterdam' (NAME_EXTRACTION_GEONAMES) + - Location extracted from organization name 'Sao Joao Rotterdam' - matched place 'Rotterdam' (NAME_EXTRACTION_GEONAMES) - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:20:34Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T07:13:16Z kien_enrichment: @@ -127,22 +126,30 @@ ghcid: distance_km: 0.0 geonames_id: 2747891 google_maps_enrichment: - place_id: ChIJtYmZWJQ0xEcRXeaDiHmHrMA - name: Heemraadsplein - formatted_address: Heemraadsplein, 3023 BD Rotterdam, Netherlands - fetch_timestamp: '2025-12-06T19:33:25.065137+00:00' - api_status: OK - coordinates: - latitude: 51.9128672 - longitude: 4.454419700000001 - google_place_types: - - park - - point_of_interest - - establishment - business_status: OPERATIONAL - rating: 4.1 - user_rating_count: 108 - photo_count: 10 + status: FALSE_MATCH + false_match_reason: 'Google Maps returned "Heemraadsplein" (public square (location, not organization)) instead of "Sao + Joao Rotterdam" (Portuguese heritage festival organization). Name mismatch detected during manual review. Per Rule 40: + KIEN is authoritative for Type I intangible heritage custodians.' + original_false_match: + place_id: ChIJtYmZWJQ0xEcRXeaDiHmHrMA + name: Heemraadsplein + formatted_address: Heemraadsplein, 3023 BD Rotterdam, Netherlands + fetch_timestamp: '2025-12-06T19:33:25.065137+00:00' + api_status: OK + coordinates: + latitude: 51.9128672 + longitude: 4.454419700000001 + google_place_types: + - park + - point_of_interest + - establishment + business_status: OPERATIONAL + rating: 4.1 + user_rating_count: 108 + photo_count: 10 + correction_timestamp: '2026-01-08T12:24:17Z' + correction_agent: opencode-claude-sonnet-4 + correction_method: manual_name_mismatch_review wikidata_enrichment: status: NOT_FOUND fetch_timestamp: '2025-12-06T19:42:27.320456+00:00' @@ -177,39 +184,30 @@ digital_platform_v2: data_quality_notes: wikidata_status: NOT_FOUND google_maps_status: current - google_maps_notes: 'Google Maps found Heemraadsplein (place_id: ChIJtYmZWJQ0xEcRXeaDiHmHrMA). - This is CORRECT - Heemraadsplein is the traditional location where the São João - Baptista celebration takes place. The square was historically called Pracinha - dQuebrôd (the penniless square) by the Cape Verdean community.' - notes: São João Baptista Rotterdam is organized by the SJBR working group, a volunteer - organization of second-generation Cape Verdeans in the Netherlands. The celebration - has taken place since the 1970s when Cape Verdean sailors settled in Rotterdam. - Since 2011, the festival has been organized by SJBR in cooperation with Stichting - Bruggenbouwers (which handles grants and sponsorships) and other Cape Verdean - organizations. The celebration is part of the festas de romária (pilgrimage - feasts) tradition from Cape Verde. No dedicated website exists; KIEN page serves - as primary online presence. + google_maps_notes: 'Google Maps found Heemraadsplein (place_id: ChIJtYmZWJQ0xEcRXeaDiHmHrMA). This is CORRECT - Heemraadsplein + is the traditional location where the São João Baptista celebration takes place. The square was historically called + Pracinha dQuebrôd (the penniless square) by the Cape Verdean community.' + notes: São João Baptista Rotterdam is organized by the SJBR working group, a volunteer organization of second-generation + Cape Verdeans in the Netherlands. The celebration has taken place since the 1970s when Cape Verdean sailors settled + in Rotterdam. Since 2011, the festival has been organized by SJBR in cooperation with Stichting Bruggenbouwers (which + handles grants and sponsorships) and other Cape Verdean organizations. The celebration is part of the festas de romária + (pilgrimage feasts) tradition from Cape Verde. No dedicated website exists; KIEN page serves as primary online presence. organization_profile: organization_type: Intangible Heritage Custodian - Cape Verdean Festival Organization scope: regional - description: Werkgroep São João Baptista Rotterdam (SJBR) organizes the annual - celebration of São João Baptista (Saint John the Baptist) for the Cape Verdean - community in Rotterdam and surroundings. The celebration, held on the Saturday - before or after June 24, includes a church service at Sint Viktor Church in - Waddinxveen, a procession carrying the statue of Saint John to Heemraadsplein, - traditional Kola San Jon dancing, tambores (drums), the navio (miniature ship) - ritual, and cultural activities including Cape Verdean cuisine, performances, - and storytelling. The celebration honors sailors, asks for protection, peace, - and fertility. Attendees come from Rotterdam, Zaandam, Belgium, Luxembourg, - and the UK. + description: Werkgroep São João Baptista Rotterdam (SJBR) organizes the annual celebration of São João Baptista (Saint + John the Baptist) for the Cape Verdean community in Rotterdam and surroundings. The celebration, held on the Saturday + before or after June 24, includes a church service at Sint Viktor Church in Waddinxveen, a procession carrying the statue + of Saint John to Heemraadsplein, traditional Kola San Jon dancing, tambores (drums), the navio (miniature ship) ritual, + and cultural activities including Cape Verdean cuisine, performances, and storytelling. The celebration honors sailors, + asks for protection, peace, and fertility. Attendees come from Rotterdam, Zaandam, Belgium, Luxembourg, and the UK. heritage_forms: - form_name: São João Baptista Rotterdam form_type: Religious and spiritual practice / Procession / Social practice kien_url: https://www.immaterieelerfgoed.nl/nl/sao-joao-bapista-rotterdam - description: Cape Verdean celebration of the birthday of Saint John the Baptist, - part of the festas de romária (pilgrimage feasts) tradition. Combines Catholic - religious elements with traditional Cape Verdean customs including Kola San - Jon dance, tambores drumming, and navio ship ritual. + description: Cape Verdean celebration of the birthday of Saint John the Baptist, part of the festas de romária (pilgrimage + feasts) tradition. Combines Catholic religious elements with traditional Cape Verdean customs including Kola San Jon + dance, tambores drumming, and navio ship ritual. primary_platform: platform_id: kien_page platform_url: https://www.immaterieelerfgoed.nl/nl/page/5331/sao-joao-rotterdam @@ -218,16 +216,14 @@ digital_platform_v2: key_locations: - name: Heemraadsplein type: festival_location - description: Main location for the São João Baptista celebration. Historically - called Pracinha dQuebrôd by the Cape Verdean community. Meeting place for Cape - Verdean sailors since the late 1950s. + description: Main location for the São João Baptista celebration. Historically called Pracinha dQuebrôd by the Cape Verdean + community. Meeting place for Cape Verdean sailors since the late 1950s. google_place_id: ChIJtYmZWJQ0xEcRXeaDiHmHrMA - name: Sint Viktor Church type: religious_venue location: Waddinxveen - description: Church where the statue of Saint John the Baptist is collected at - the start of the celebration, referencing the Cape Verdean tradition of fetching - the statue from the highlands. + description: Church where the statue of Saint John the Baptist is collected at the start of the celebration, referencing + the Cape Verdean tradition of fetching the statue from the highlands. partner_organizations: - name: Stichting Bruggenbouwers role: Coordinator for grants and sponsorship @@ -283,12 +279,10 @@ timeline_enrichment: - https://www.uitagendarotterdam.nl/alle-artikelen/vier-de-kaapverdiaanse-cultuur-bij-sao-joao-op-het-heemraadsplein/ - https://www.lilithmag.nl/agenda/2024/6/22/festival-gidstour-so-joo-pracinha-dquebrod-rotterdam linkup_query: '"Sao Joao Rotterdam" Rotterdam opgericht OR gesticht OR sinds' - linkup_answer: De viering van São João Baptista in Rotterdam wordt sinds eind - jaren zeventig georganiseerd door de Kaapverdiaanse gemeenschap. De werkgroep - São João Baptista Rotterdam (SJBR) organiseert het evenement sinds 2011. De - Stichting Sao Joao Baptista Rotterdam zelf is opgericht op 19 december 2017. - Het feest vindt plaats op het Heemraadsplein, ook wel Pracinha d’ Quebrôd genoemd, - een ontmoetingsplek voor Kaapverdiaanse zeelieden sinds eind jaren vijftig. + linkup_answer: De viering van São João Baptista in Rotterdam wordt sinds eind jaren zeventig georganiseerd door de Kaapverdiaanse + gemeenschap. De werkgroep São João Baptista Rotterdam (SJBR) organiseert het evenement sinds 2011. De Stichting Sao + Joao Baptista Rotterdam zelf is opgericht op 19 december 2017. Het feest vindt plaats op het Heemraadsplein, ook wel + Pracinha d’ Quebrôd genoemd, een ontmoetingsplek voor Kaapverdiaanse zeelieden sinds eind jaren vijftig. fetch_timestamp: '2025-12-15T22:49:18.192162+00:00' archive_path: web/1776/linkup/linkup_founding_20251215T224918Z.json extraction_method: linkup_answer_regex diff --git a/scripts/detect_gmaps_mismatches.py b/scripts/detect_gmaps_mismatches.py new file mode 100644 index 0000000000..dcd23e7745 --- /dev/null +++ b/scripts/detect_gmaps_mismatches.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +""" +Detect Google Maps domain mismatches for Type I (Intangible Heritage) custodians. + +Per Rule 40: KIEN Registry is authoritative for Type I custodians. +Google Maps frequently returns false matches for virtual/volunteer organizations. + +This script: +1. Reads all NL-*-I-*.yaml files +2. Compares google_maps_enrichment.website with contact.website +3. Reports domain mismatches indicating likely false Google Maps matches +""" + +import os +import sys +from pathlib import Path +from urllib.parse import urlparse +import yaml + +# Add parent dir to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +def extract_domain(url: str | None) -> str | None: + """Extract domain from URL, handling common edge cases.""" + if not url: + return None + + # Normalize URL + url = url.strip() + if not url.startswith(('http://', 'https://')): + url = 'https://' + url + + try: + parsed = urlparse(url) + domain = parsed.netloc.lower() + # Remove www. prefix for comparison + if domain.startswith('www.'): + domain = domain[4:] + return domain + except Exception: + return None + + +def load_yaml_file(filepath: Path) -> dict | None: + """Load a YAML file, handling errors gracefully.""" + try: + with open(filepath, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + except Exception as e: + print(f" Error loading {filepath}: {e}") + return None + + +def check_domain_mismatch(data: dict) -> dict | None: + """ + Check if Google Maps website differs from official contact website. + + Returns mismatch details if found, None otherwise. + """ + # Get Google Maps website + gmaps = data.get('google_maps_enrichment', {}) + gmaps_website = gmaps.get('website') + gmaps_name = gmaps.get('name', '') + gmaps_status = gmaps.get('status', '') + + # Skip if already marked as FALSE_MATCH + if gmaps_status == 'FALSE_MATCH': + return None + + # Get official website from multiple possible sources (priority order) + official_website = None + + # 1. Check custodian_name.official_website + custodian_name = data.get('custodian_name', {}) + if isinstance(custodian_name, dict): + official_website = custodian_name.get('official_website') + + # 2. Check original_entry.webadres_organisatie (KIEN source) + if not official_website: + original_entry = data.get('original_entry', {}) + if isinstance(original_entry, dict): + official_website = original_entry.get('webadres_organisatie') + + # 3. Check contact.website + if not official_website: + contact = data.get('contact', {}) + if isinstance(contact, dict): + official_website = contact.get('website') + + # 4. Check digital_platforms for WEBSITE type + if not official_website: + platforms = data.get('digital_platforms', []) or [] + for p in platforms: + if isinstance(p, dict) and p.get('platform_type') == 'WEBSITE': + official_website = p.get('platform_url') + break + + # Extract domains + gmaps_domain = extract_domain(gmaps_website) + official_domain = extract_domain(official_website) + + # Skip if no Google Maps website + if not gmaps_domain: + return None + + # Skip if no official website to compare + if not official_domain: + return { + 'type': 'NO_OFFICIAL_WEBSITE', + 'gmaps_domain': gmaps_domain, + 'gmaps_name': gmaps_name, + 'gmaps_website': gmaps_website, + } + + # Compare domains + if gmaps_domain != official_domain: + return { + 'type': 'DOMAIN_MISMATCH', + 'gmaps_domain': gmaps_domain, + 'official_domain': official_domain, + 'gmaps_name': gmaps_name, + 'gmaps_website': gmaps_website, + 'official_website': official_website, + } + + return None + + +def main(): + """Main function to scan all Type I custodian files.""" + # Find all Type I files + custodian_dir = Path(__file__).parent.parent / 'data' / 'custodian' + type_i_files = list(custodian_dir.glob('NL-*-I-*.yaml')) + + print(f"Scanning {len(type_i_files)} Type I custodian files...\n") + + mismatches = [] + no_gmaps = [] + already_fixed = [] + no_official = [] + ok = [] + + for filepath in sorted(type_i_files): + data = load_yaml_file(filepath) + if not data: + continue + + # Check for Google Maps enrichment + gmaps = data.get('google_maps_enrichment', {}) + if not gmaps: + no_gmaps.append(filepath.name) + continue + + # Check if already marked as FALSE_MATCH + if gmaps.get('status') == 'FALSE_MATCH': + already_fixed.append(filepath.name) + continue + + # Check for domain mismatch + result = check_domain_mismatch(data) + + if result: + if result['type'] == 'DOMAIN_MISMATCH': + mismatches.append({ + 'file': filepath.name, + 'custodian_name': data.get('custodian_name', {}).get('emic_name', filepath.stem), + **result + }) + elif result['type'] == 'NO_OFFICIAL_WEBSITE': + no_official.append({ + 'file': filepath.name, + 'custodian_name': data.get('custodian_name', {}).get('emic_name', filepath.stem), + **result + }) + else: + ok.append(filepath.name) + + # Print results + print("=" * 80) + print("DOMAIN MISMATCHES (Likely False Google Maps Matches)") + print("=" * 80) + + if mismatches: + for m in mismatches: + print(f"\n📛 {m['file']}") + print(f" Custodian: {m['custodian_name']}") + print(f" GMaps domain: {m['gmaps_domain']} ({m['gmaps_website']})") + print(f" Official domain: {m['official_domain']} ({m['official_website']})") + print(f" GMaps name: {m['gmaps_name']}") + else: + print("\nNo domain mismatches found!") + + print("\n" + "=" * 80) + print("NO OFFICIAL WEBSITE (Cannot verify Google Maps match)") + print("=" * 80) + + if no_official: + for item in no_official[:10]: # Show first 10 + print(f"\n⚠️ {item['file']}") + print(f" Custodian: {item['custodian_name']}") + print(f" GMaps: {item['gmaps_domain']} ({item['gmaps_website']})") + if len(no_official) > 10: + print(f"\n ... and {len(no_official) - 10} more") + + print("\n" + "=" * 80) + print("SUMMARY") + print("=" * 80) + print(f"\n✅ Domain matches OK: {len(ok)}") + print(f"📛 Domain mismatches: {len(mismatches)}") + print(f"⚠️ No official website: {len(no_official)}") + print(f"🔧 Already fixed (FALSE_MATCH): {len(already_fixed)}") + print(f"📭 No Google Maps data: {len(no_gmaps)}") + print(f"\nTotal files scanned: {len(type_i_files)}") + + # Return exit code based on findings + if mismatches: + print(f"\n⚠️ {len(mismatches)} files need review for potential false Google Maps matches!") + return 1 + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/fix_gmaps_false_matches.py b/scripts/fix_gmaps_false_matches.py new file mode 100644 index 0000000000..792f58e792 --- /dev/null +++ b/scripts/fix_gmaps_false_matches.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +""" +Fix Google Maps false matches in Type I custodian files. + +This script marks Google Maps enrichment as FALSE_MATCH when the domain +from Google Maps doesn't match the official domain from KIEN registry. + +Per Rule 40: KIEN is TIER_1_AUTHORITATIVE for Type I custodians. +""" + +import yaml +import sys +from pathlib import Path +from urllib.parse import urlparse + + +def extract_domain(url: str) -> str: + """Extract domain from URL, handling None and empty strings.""" + if not url: + return "" + try: + parsed = urlparse(url) + domain = parsed.netloc or parsed.path + domain = domain.lower().replace("www.", "") + return domain + except Exception: + return "" + + +def fix_gmaps_false_match(file_path: Path, dry_run: bool = False) -> dict: + """ + Fix a single file's Google Maps enrichment if it's a false match. + + Returns dict with status info. + """ + with open(file_path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + result = { + 'file': file_path.name, + 'action': 'skipped', + 'reason': None + } + + # Check if already marked as FALSE_MATCH + gmaps = data.get('google_maps_enrichment', {}) + if gmaps.get('status') == 'FALSE_MATCH': + result['reason'] = 'Already marked as FALSE_MATCH' + return result + + # Get domains to compare + gmaps_website = gmaps.get('website', '') + + # Check contact website first + official_website = data.get('contact', {}).get('website', '') + + # If no contact website, check digital_platforms for WEBSITE type + if not official_website: + for p in data.get('digital_platforms', []): + if p.get('platform_type') == 'WEBSITE': + official_website = p.get('platform_url', '') + break + + gmaps_domain = extract_domain(gmaps_website) + official_domain = extract_domain(official_website) + + if not gmaps_domain or not official_domain: + result['reason'] = 'Missing domain info' + return result + + # Check if domains match + if gmaps_domain == official_domain: + result['reason'] = 'Domains match' + return result + + # Domains don't match - this is a false match + gmaps_name = gmaps.get('name', 'Unknown') + custodian_name = data.get('custodian_name', {}).get('claim_value', 'Unknown') + + # Create the FALSE_MATCH structure + data['google_maps_enrichment'] = { + 'status': 'FALSE_MATCH', + 'false_match_reason': ( + f'Google Maps returned "{gmaps_name}" (website: {gmaps_website}) ' + f'instead of "{custodian_name}" (official website: {official_website}). ' + f'Domain mismatch: {gmaps_domain} vs {official_domain}. ' + 'Per Rule 40: KIEN is authoritative for Type I intangible heritage custodians.' + ), + 'original_false_match': gmaps, + 'correction_timestamp': '2025-01-08T00:00:00Z', + 'correction_agent': 'opencode-claude-sonnet-4' + } + + # Fix location if it has Google Maps coordinates + location = data.get('location', {}) + coord_prov = location.get('coordinate_provenance', {}) + if coord_prov.get('source_type') == 'GOOGLE_MAPS': + # Remove coordinates, keep city info + data['location'] = { + 'city': location.get('city'), + 'region_code': location.get('region_code'), + 'country': location.get('country', 'NL'), + 'geonames_id': location.get('geonames_id'), + 'geonames_name': location.get('geonames_name'), + 'feature_code': location.get('feature_code'), + 'note': ( + 'Coordinates removed due to Google Maps false match. ' + f'Original coordinates were from "{gmaps_name}".' + ), + 'coordinate_provenance_removed': { + 'reason': 'FALSE_MATCH', + 'original_latitude': location.get('latitude'), + 'original_longitude': location.get('longitude'), + }, + 'normalization_timestamp': '2025-01-08T00:00:00Z' + } + + # Add provenance correction + if 'provenance' not in data: + data['provenance'] = {} + if 'corrections' not in data['provenance']: + data['provenance']['corrections'] = [] + + data['provenance']['corrections'].append({ + 'correction_date': '2025-01-08T00:00:00Z', + 'correction_type': 'google_maps_false_match', + 'description': ( + f'Marked Google Maps enrichment as FALSE_MATCH. ' + f'GMaps returned "{gmaps_name}" ({gmaps_domain}) instead of ' + f'"{custodian_name}" ({official_domain}).' + ), + 'corrected_by': 'opencode-claude-sonnet-4' + }) + + if not dry_run: + with open(file_path, 'w', encoding='utf-8') as f: + yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=100) + + result['action'] = 'fixed' + result['gmaps_name'] = gmaps_name + result['gmaps_domain'] = gmaps_domain + result['official_domain'] = official_domain + + return result + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Fix Google Maps false matches') + parser.add_argument('files', nargs='*', help='Specific files to fix (or all Type I if none)') + parser.add_argument('--dry-run', action='store_true', help='Show what would be fixed without changing files') + args = parser.parse_args() + + custodian_dir = Path("/Users/kempersc/apps/glam/data/custodian") + + if args.files: + files = [custodian_dir / f for f in args.files] + else: + # Find all Type I files + files = list(custodian_dir.glob("*-I-*.yaml")) + + print(f"{'[DRY RUN] ' if args.dry_run else ''}Processing {len(files)} files...") + print() + + fixed = [] + skipped = [] + + for file_path in sorted(files): + if not file_path.exists(): + print(f"⚠️ {file_path.name}: File not found") + continue + + try: + result = fix_gmaps_false_match(file_path, dry_run=args.dry_run) + + if result['action'] == 'fixed': + fixed.append(result) + print(f"✅ {result['file']}") + print(f" GMaps: {result['gmaps_name']} ({result['gmaps_domain']})") + print(f" Official: {result['official_domain']}") + else: + skipped.append(result) + if args.dry_run: + print(f"⏭️ {result['file']}: {result['reason']}") + except Exception as e: + print(f"❌ {file_path.name}: {e}") + + print() + print(f"{'[DRY RUN] ' if args.dry_run else ''}Summary:") + print(f" Fixed: {len(fixed)}") + print(f" Skipped: {len(skipped)}") + + +if __name__ == '__main__': + main() diff --git a/scripts/fix_moza_false_matches.py b/scripts/fix_moza_false_matches.py new file mode 100644 index 0000000000..0bdfa66aed --- /dev/null +++ b/scripts/fix_moza_false_matches.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +Fix false matches in NL-GE-TIE-I-M.yaml (Stichting MOZA) + +Issues to fix: +1. Google Maps FALSE MATCH: MOZA Makelaardij (real estate) vs Stichting MOZA (heritage) +2. YouTube FALSE MATCH: Wolfgang Amadeus Mozart - Topic vs Stichting MOZA +3. Location coordinates: Wrong Wapenveld coords from Google Maps +""" + +import yaml +from pathlib import Path +from datetime import datetime + +# Custom representer to handle multi-line strings nicely +def str_representer(dumper, data): + if '\n' in data: + return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='>') + return dumper.represent_scalar('tag:yaml.org,2002:str', data) + +yaml.add_representer(str, str_representer) + +def fix_moza_file(): + file_path = Path("/Users/kempersc/apps/glam/data/custodian/NL-GE-TIE-I-M.yaml") + + with open(file_path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + # 1. Fix Google Maps enrichment - mark as FALSE_MATCH + original_gmaps = data.get('google_maps_enrichment', {}) + data['google_maps_enrichment'] = { + 'status': 'FALSE_MATCH', + 'false_match_reason': ( + 'Google Maps returned "MOZA Makelaardij" (real estate agency at moza.nl in Wapenveld) ' + 'instead of "Stichting MOZA" (Molukse heritage foundation at moza.nu in Tiel). ' + 'These are completely different organizations - one is a real estate agency, ' + 'the other is an intangible heritage custodian for Moluccan neighborhood culture.' + ), + 'original_false_match': original_gmaps, + 'correction_timestamp': '2025-01-08T00:00:00Z', + 'correction_agent': 'opencode-claude-sonnet-4' + } + + # 2. Fix YouTube enrichment - mark as FALSE_MATCH + original_youtube = data.get('youtube_enrichment', {}) + # Remove the massive videos list from original - just keep metadata + videos_removed = [] + if 'videos' in original_youtube: + videos_removed = original_youtube.pop('videos') + + data['youtube_enrichment'] = { + 'status': 'FALSE_MATCH', + 'false_match_reason': ( + 'YouTube search returned "Wolfgang Amadeus Mozart - Topic" (classical music auto-generated ' + 'channel with 190K subscribers, 5709 videos of Mozart compositions) instead of a channel ' + 'for "Stichting MOZA" (Molukse heritage foundation). The search matched "MOZA" to "Mozart" ' + 'due to name similarity. No YouTube channel found for Stichting MOZA.' + ), + 'original_false_match': { + 'fetch_timestamp': original_youtube.get('fetch_timestamp'), + 'api_endpoint': original_youtube.get('api_endpoint'), + 'api_version': original_youtube.get('api_version'), + 'channel_id': original_youtube.get('channel_id'), + 'channel_url': original_youtube.get('channel_url'), + 'title': original_youtube.get('title'), + 'subscriber_count': original_youtube.get('subscriber_count'), + 'video_count': original_youtube.get('video_count'), + 'view_count': original_youtube.get('view_count'), + 'status': original_youtube.get('status'), + }, + 'videos_removed_note': ( + f'Removed {len(videos_removed)} Mozart classical music videos from original data as they have ' + 'no relevance to Stichting MOZA. Videos were from YouTube auto-generated Mozart Topic channel.' + ), + 'correction_timestamp': '2025-01-08T00:00:00Z', + 'correction_agent': 'opencode-claude-sonnet-4' + } + + # 3. Fix location - remove wrong Google Maps coordinates, use KIEN data + # The correct location is in 'locations' array (from KIEN), but 'location' has wrong coords + if 'location' in data: + original_location = data['location'] + # Keep only the correct parts, remove Google Maps coords + data['location'] = { + 'city': 'Tiel', + 'region_code': 'GE', + 'country': 'NL', + 'geonames_id': 2746331, + 'geonames_name': 'Tiel', + 'feature_code': 'PPL', + 'note': ( + 'Coordinates removed due to Google Maps false match. ' + 'Location derived from KIEN registry (Tiel). ' + 'Original false coordinates were from MOZA Makelaardij in Wapenveld.' + ), + 'coordinate_provenance_removed': { + 'reason': 'FALSE_MATCH - coordinates were from wrong organization', + 'original_latitude': original_location.get('latitude'), + 'original_longitude': original_location.get('longitude'), + 'original_source': original_location.get('coordinate_provenance', {}).get('source_type'), + 'original_entity_id': original_location.get('coordinate_provenance', {}).get('entity_id'), + }, + 'normalization_timestamp': '2025-01-08T00:00:00Z' + } + + # 4. Add provenance note about corrections + if 'provenance' not in data: + data['provenance'] = {} + if 'corrections' not in data['provenance']: + data['provenance']['corrections'] = [] + + data['provenance']['corrections'].append({ + 'correction_date': '2025-01-08T00:00:00Z', + 'correction_type': 'google_maps_false_match', + 'description': ( + 'Marked Google Maps enrichment as FALSE_MATCH. Google Maps returned "MOZA Makelaardij" ' + '(real estate agency at moza.nl) instead of "Stichting MOZA" (heritage foundation at moza.nu). ' + 'Per Rule 40: KIEN is authoritative for Type I custodians.' + ), + 'corrected_by': 'opencode-claude-sonnet-4' + }) + + data['provenance']['corrections'].append({ + 'correction_date': '2025-01-08T00:00:00Z', + 'correction_type': 'youtube_false_match', + 'description': ( + 'Marked YouTube enrichment as FALSE_MATCH. YouTube search returned "Wolfgang Amadeus Mozart - Topic" ' + 'channel instead of Stichting MOZA. This is a name similarity false match (MOZA → Mozart).' + ), + 'corrected_by': 'opencode-claude-sonnet-4' + }) + + data['provenance']['corrections'].append({ + 'correction_date': '2025-01-08T00:00:00Z', + 'correction_type': 'location_coordinates_removed', + 'description': ( + 'Removed incorrect coordinates from location section. Coordinates were from Google Maps ' + 'false match (MOZA Makelaardij in Wapenveld). Correct location is Tiel per KIEN registry.' + ), + 'corrected_by': 'opencode-claude-sonnet-4' + }) + + # Write back + with open(file_path, 'w', encoding='utf-8') as f: + yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=100) + + print(f"Fixed {file_path}") + print("- Marked Google Maps as FALSE_MATCH") + print("- Marked YouTube as FALSE_MATCH (removed Mozart videos)") + print("- Fixed location coordinates") + print("- Added provenance corrections") + +if __name__ == '__main__': + fix_moza_file() diff --git a/scripts/fix_name_mismatch_false_matches.py b/scripts/fix_name_mismatch_false_matches.py new file mode 100644 index 0000000000..a54aa721d3 --- /dev/null +++ b/scripts/fix_name_mismatch_false_matches.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Fix obvious name mismatch false matches for Type I custodians. + +These are files where Google Maps returned a completely different organization +than the KIEN registry entry, but we couldn't auto-detect because there was +no official website to compare domains. + +Per Rule 40: KIEN Registry is authoritative for Type I custodians. +""" + +import yaml +from pathlib import Path +from datetime import datetime, timezone + +# Files with definite name mismatches (manually verified) +FILES_TO_FIX = { + "NL-GE-ARN-I-DT.yaml": { + "kien_name": "Dick Timmerman", + "kien_type": "heritage practitioner (person)", + "gmaps_name": "Timmer & Onderhoudsbedrijf Dik Lubbertsen", + "gmaps_type": "carpentry business" + }, + "NL-OV-IJS-I-RB.yaml": { + "kien_name": "Ria Bos", + "kien_type": "traditional cigar maker (person)", + "gmaps_name": "Ria Money Transfer Agent", + "gmaps_type": "money transfer business" + }, + "NL-ZH-ROT-I-K.yaml": { + "kien_name": "Stichting Kracom", + "kien_type": "Krampuslauf Rotterdam heritage foundation", + "gmaps_name": "Happy Caps", + "gmaps_type": "retail store in different city (Apeldoorn)" + }, + "NL-UT-UTR-I-FNV.yaml": { + "kien_name": "Federatie Nederlandse Vertelorganisaties", + "kien_type": "Dutch storytelling federation", + "gmaps_name": "NET Foundation", + "gmaps_type": "different foundation" + }, + "NL-ZH-AAD-I-DA.yaml": { + "kien_name": "Stichting dodenherdenking Alphen", + "kien_type": "memorial foundation Alphen aan den Rijn", + "gmaps_name": "Waalsdorpervlakte Bourdon Bell", + "gmaps_type": "different memorial in different location" + }, + "NL-ZH-ROT-I-SJR.yaml": { + "kien_name": "Sao Joao Rotterdam", + "kien_type": "Portuguese heritage festival organization", + "gmaps_name": "Heemraadsplein", + "gmaps_type": "public square (location, not organization)" + }, + "NL-GE-OOS-I-SS.yaml": { + "kien_name": "sport en spel", + "kien_type": "traditional games heritage organization", + "gmaps_name": "Damu Sport en Spel Verhuur", + "gmaps_type": "sports equipment rental business" + }, + "NL-OV-OMM-I-EO.yaml": { + "kien_name": "Eiertikken Ommen", + "kien_type": "traditional egg-tapping game heritage", + "gmaps_name": "Restaurant Ekkelenkamp Ommen", + "gmaps_type": "restaurant" + }, +} + + +def fix_gmaps_false_match(filepath: Path, fix_info: dict) -> bool: + """Mark Google Maps enrichment as FALSE_MATCH for a file.""" + + with open(filepath, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + gmaps = data.get('google_maps_enrichment', {}) + if not gmaps: + print(f" No Google Maps data in {filepath.name}") + return False + + if gmaps.get('status') == 'FALSE_MATCH': + print(f" Already fixed: {filepath.name}") + return False + + # Create the false match record + false_match_reason = ( + f"Google Maps returned \"{fix_info['gmaps_name']}\" ({fix_info['gmaps_type']}) " + f"instead of \"{fix_info['kien_name']}\" ({fix_info['kien_type']}). " + f"Name mismatch detected during manual review. " + f"Per Rule 40: KIEN is authoritative for Type I intangible heritage custodians." + ) + + # Restructure the data + data['google_maps_enrichment'] = { + 'status': 'FALSE_MATCH', + 'false_match_reason': false_match_reason, + 'original_false_match': gmaps, + 'correction_timestamp': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'), + 'correction_agent': 'opencode-claude-sonnet-4', + 'correction_method': 'manual_name_mismatch_review' + } + + # Write back + with open(filepath, 'w', encoding='utf-8') as f: + yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120) + + print(f" ✓ Fixed: {filepath.name}") + print(f" KIEN: {fix_info['kien_name']}") + print(f" GMaps (wrong): {fix_info['gmaps_name']}") + return True + + +def main(): + custodian_dir = Path(__file__).parent.parent / 'data' / 'custodian' + + print(f"Fixing {len(FILES_TO_FIX)} files with name mismatch false matches...\n") + + fixed = 0 + for filename, fix_info in FILES_TO_FIX.items(): + filepath = custodian_dir / filename + if not filepath.exists(): + print(f" File not found: {filename}") + continue + + if fix_gmaps_false_match(filepath, fix_info): + fixed += 1 + + print(f"\n✓ Fixed {fixed} files") + return 0 + + +if __name__ == '__main__': + exit(main())