From 9159ff35db567fbf10e1606d0d5da233036bcbb7 Mon Sep 17 00:00:00 2001 From: kempersc Date: Tue, 30 Dec 2025 03:43:47 +0100 Subject: [PATCH] Add custodian entry for Leica AG with data contamination fixes and location corrections --- data/custodian/CH-SG-HEE-L-LABD.yaml | 206 +++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 data/custodian/CH-SG-HEE-L-LABD.yaml diff --git a/data/custodian/CH-SG-HEE-L-LABD.yaml b/data/custodian/CH-SG-HEE-L-LABD.yaml new file mode 100644 index 0000000000..0fa17f6c2f --- /dev/null +++ b/data/custodian/CH-SG-HEE-L-LABD.yaml @@ -0,0 +1,206 @@ +original_entry: + name: Leica AG ‒ Bibliothek und Dokumentation + institution_type: LIBRARY + source: CH-Annotator (switzerland_isil_ch_annotator.yaml) + identifiers: + - &id001 + identifier_scheme: ISIL + identifier_value: CH-001172-7 + identifier_url: https://www.isil.nb.admin.ch/en/?isil=CH-001172-7 + locations: + - region: SG + country: CH +processing_timestamp: '2025-12-06T23:39:11.889212+00:00' +ghcid: + ghcid_current: CH-SG-HEE-L-LABD + ghcid_original: CH-SG-XXX-L-LABD + ghcid_uuid: 5d87e93d-3ea5-56cd-8848-816caad77808 + ghcid_uuid_sha256: b02cbd79-83ad-83c4-a10e-8ee35d418e13 + ghcid_numeric: 12694729779248464836 + record_id: 18164a6e-37ad-4acb-89c2-6c8f87bc45ee + generation_timestamp: '2025-12-06T23:39:11.889212+00:00' + location_resolution: + method: GEONAMES_DIRECT + country_code: CH + region_code: SG + region_name: St. Gallen + city_code: HEE + city_name: Heerbrugg + geonames_id: 2660395 + feature_code: PPL + latitude: 47.41488 + longitude: 9.62684 + resolution_date: '2025-12-30T00:00:00+00:00' + notes: 'Data contamination fix: Google Places API returned Wetzlar, Germany (Leica Welt) instead of Heerbrugg, Switzerland (Leica Geosystems AG). Corrected using GeoNames and original ISIL registry data (region hint: SG).' + ghcid_history: + - ghcid: CH-SG-XXX-L-LABD + ghcid_numeric: 2323009709213273646 + valid_from: '2025-12-06T23:39:11.889212+00:00' + reason: Initial GHCID from CH-Annotator (switzerland_isil_ch_annotator.yaml) + valid_to: '2025-12-07T16:39:44.774469+00:00' + - ghcid: CH-SH-BAR-L-LABD + ghcid_numeric: 2323009709213273646 + valid_from: '2025-12-07T16:39:44.774469+00:00' + valid_to: '2025-12-30T00:00:00+00:00' + reason: 'CONTAMINATED: Google Places API returned wrong institution (Wetzlar, Germany leica-welt.com)' + - ghcid: CH-SG-HEE-L-LABD + ghcid_numeric: 12694729779248464836 + valid_from: '2025-12-30T00:00:00+00:00' + reason: 'Data contamination fix: Corrected location to Heerbrugg, Canton of St. Gallen, Switzerland based on original ISIL registry data (region hint: SG)' +custodian_name: + claim_type: custodian_name + claim_value: Leica AG ‒ Bibliothek und Dokumentation + source_type: ch_annotator +identifiers: + - identifier_scheme: GHCID + identifier_value: CH-SG-HEE-L-LABD + - identifier_scheme: GHCID_UUID + identifier_value: 5d87e93d-3ea5-56cd-8848-816caad77808 + - identifier_scheme: GHCID_UUID_SHA256 + identifier_value: b02cbd79-83ad-83c4-a10e-8ee35d418e13 + - identifier_scheme: GHCID_NUMERIC + identifier_value: '12694729779248464836' + - identifier_scheme: RECORD_ID + identifier_value: 18164a6e-37ad-4acb-89c2-6c8f87bc45ee + - *id001 +provenance: + data_source: CSV_REGISTRY + data_tier: TIER_1_AUTHORITATIVE + extraction_date: '2025-11-19T09:22:31.452165' + extraction_method: 'Created from CH-Annotator file: switzerland_isil_ch_annotator.yaml' + confidence_score: 0.95 + notes: + - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:07Z + - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:47:34Z + - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:48:58Z + - 'YouTube/Google Maps enrichment 2025-12-09T09:15:52Z: YouTube: not found' + - 'DATA CONTAMINATION FIX 2025-12-30: Google Places API returned Wetzlar, Germany (leica-welt.com - Leica Camera AG museum) instead of correct location Heerbrugg, Switzerland (Leica Geosystems AG library). Removed contaminated google_maps_enrichment and logo_enrichment sections. Corrected location using GeoNames (ID 2660395) and original ISIL registry data which indicated SG (St. Gallen) region.' + schema_version: 2.0.0 + enrichment_provenance: + google_maps_enrichment: + content_hash: sha256-KAYY/BYHOjLbgYAPhVGXu8H37D/6M02/Gpm5qUFUFVU= + verified_at: '2025-12-28T19:58:03.221554+00:00' + status: INVALIDATED + invalidation_reason: 'Data contamination: returned Wetzlar, Germany instead of Heerbrugg, Switzerland' + invalidated_at: '2025-12-30T00:00:00+00:00' + logo_enrichment: + status: INVALIDATED + invalidation_reason: 'Based on wrong website (leica-welt.com - Leica Camera AG in Germany)' + invalidated_at: '2025-12-30T00:00:00+00:00' + provenance_schema_version: '2.0' + standards_compliance: + - W3C PROV-O + - W3C SRI (content hashes) + generated_at: '2025-12-30T00:00:00+00:00' +ch_annotator: + convention_id: ch_annotator-v1_7_0 + convention_version: 1.7.0 + entity_classification: + hypernym: GRP + hypernym_label: GROUP + subtype: GRP.HER.LIB + subtype_label: LIBRARY + ontology_class: schema:Library + alternative_classes: + - org:FormalOrganization + - rov:RegisteredOrganization + - glam:HeritageCustodian + extraction_provenance: + namespace: glam + path: /files/switzerland_isil.yaml + timestamp: '2025-11-19T09:22:31.452165' + agent: claude-conversation + context_convention: ch_annotator-v1_7_0 + annotation_provenance: + annotation_agent: opencode-claude-sonnet-4 + annotation_date: '2025-12-06T21:13:14.508879+00:00' + annotation_method: retroactive CH-Annotator application via batch script + source_file: switzerland_isil.yaml + annotation_metadata: + confidence_score: 0.95 + verified: false + verification_date: + verified_by: + entity_claims: + - claim_type: full_name + claim_value: Leica AG ‒ Bibliothek und Dokumentation + property_uri: skos:prefLabel + provenance: + namespace: glam + path: /files/switzerland_isil.yaml + timestamp: '2025-11-19T09:22:31.452165' + agent: claude-conversation + context_convention: ch_annotator-v1_7_0 + confidence: 0.95 + - claim_type: institution_type + claim_value: LIBRARY + property_uri: rdf:type + provenance: + namespace: glam + path: /files/switzerland_isil.yaml + timestamp: '2025-11-19T09:22:31.452165' + agent: claude-conversation + context_convention: ch_annotator-v1_7_0 + confidence: 0.95 + integration_note: + created_from: switzerland_isil_ch_annotator.yaml + creation_date: '2025-12-06T23:39:11.889212+00:00' + creation_method: create_custodian_from_ch_annotator.py +# REMOVED: google_maps_enrichment - contained data from Wetzlar, Germany (wrong country) +# REMOVED: logo_enrichment - based on wrong website (leica-welt.com - Leica Camera AG in Germany) +youtube_status: NOT_FOUND +youtube_search_query: Leica AG ‒ Bibliothek und Dokumentation official +youtube_search_timestamp: '2025-12-09T09:15:52.995368+00:00' +location: + latitude: 47.41488 + longitude: 9.62684 + coordinate_provenance: + source_type: GEONAMES_DIRECT + source_path: geonames.db + entity_id: 2660395 + resolution_method: GEONAMES_DIRECT + notes: 'Data contamination fix: Coordinates from GeoNames for Heerbrugg, Switzerland' + city: Heerbrugg + region: St. Gallen + region_code: SG + country: CH + geonames_id: 2660395 + feature_code: PPL + normalization_timestamp: '2025-12-30T00:00:00+00:00' +wikidata_enrichment: + wikidata_id: + enrichment_timestamp: '2025-12-30T00:00:00Z' + enrichment_method: manual_wikidata_lookup + enrichment_note: No Wikidata entry found for Leica Geosystems AG library. Note - Leica Geosystems AG (Q693tried72) is headquartered in Heerbrugg, Switzerland. This is different from Leica Camera AG (Q156481) headquartered in Wetzlar, Germany. +data_contamination_fix: + fix_date: '2025-12-30T00:00:00+00:00' + fix_agent: opencode-claude-sonnet-4 + contamination_source: Google Places API + contamination_type: wrong_country + wrong_institution: Leica Welt (Leica Camera AG museum/visitor center) + wrong_country: Germany + wrong_coordinates: + latitude: 50.5527416 + longitude: 8.5356831 + wrong_address: Am Leitz-Park 6, 35578 Wetzlar, Germany + wrong_website: https://leica-welt.com/ + correct_institution: Leica AG ‒ Bibliothek und Dokumentation (Leica Geosystems AG) + correct_country: Switzerland + correct_coordinates: + latitude: 47.41488 + longitude: 9.62684 + correct_city: Heerbrugg + correct_region: St. Gallen (SG) + removed_sections: + - google_maps_enrichment + - logo_enrichment + old_ghcid: CH-SH-BAR-L-LABD + new_ghcid: CH-SG-HEE-L-LABD + evidence: + - type: isil_registry + value: ISIL CH-001172-7 specifies region SG (St. Gallen canton) + url: https://www.isil.nb.admin.ch/en/?isil=CH-001172-7 + - type: original_source + value: Original entry locations specified region SG, country CH + - type: corporate_disambiguation + value: Leica Geosystems AG (surveying/geospatial company, HQ in Heerbrugg, CH) is distinct from Leica Camera AG (camera company, HQ in Wetzlar, DE). Google confused the two companies with similar names.