Add custodian entry for Leica AG with data contamination fixes and location corrections

This commit is contained in:
kempersc 2025-12-30 03:43:47 +01:00
parent d64f857aa9
commit 9159ff35db

View file

@ -0,0 +1,206 @@
original_entry:
name: Leica AG Bibliothek und Dokumentation
institution_type: LIBRARY
source: CH-Annotator (switzerland_isil_ch_annotator.yaml)
identifiers:
- &id001
identifier_scheme: ISIL
identifier_value: CH-001172-7
identifier_url: https://www.isil.nb.admin.ch/en/?isil=CH-001172-7
locations:
- region: SG
country: CH
processing_timestamp: '2025-12-06T23:39:11.889212+00:00'
ghcid:
ghcid_current: CH-SG-HEE-L-LABD
ghcid_original: CH-SG-XXX-L-LABD
ghcid_uuid: 5d87e93d-3ea5-56cd-8848-816caad77808
ghcid_uuid_sha256: b02cbd79-83ad-83c4-a10e-8ee35d418e13
ghcid_numeric: 12694729779248464836
record_id: 18164a6e-37ad-4acb-89c2-6c8f87bc45ee
generation_timestamp: '2025-12-06T23:39:11.889212+00:00'
location_resolution:
method: GEONAMES_DIRECT
country_code: CH
region_code: SG
region_name: St. Gallen
city_code: HEE
city_name: Heerbrugg
geonames_id: 2660395
feature_code: PPL
latitude: 47.41488
longitude: 9.62684
resolution_date: '2025-12-30T00:00:00+00:00'
notes: 'Data contamination fix: Google Places API returned Wetzlar, Germany (Leica Welt) instead of Heerbrugg, Switzerland (Leica Geosystems AG). Corrected using GeoNames and original ISIL registry data (region hint: SG).'
ghcid_history:
- ghcid: CH-SG-XXX-L-LABD
ghcid_numeric: 2323009709213273646
valid_from: '2025-12-06T23:39:11.889212+00:00'
reason: Initial GHCID from CH-Annotator (switzerland_isil_ch_annotator.yaml)
valid_to: '2025-12-07T16:39:44.774469+00:00'
- ghcid: CH-SH-BAR-L-LABD
ghcid_numeric: 2323009709213273646
valid_from: '2025-12-07T16:39:44.774469+00:00'
valid_to: '2025-12-30T00:00:00+00:00'
reason: 'CONTAMINATED: Google Places API returned wrong institution (Wetzlar, Germany leica-welt.com)'
- ghcid: CH-SG-HEE-L-LABD
ghcid_numeric: 12694729779248464836
valid_from: '2025-12-30T00:00:00+00:00'
reason: 'Data contamination fix: Corrected location to Heerbrugg, Canton of St. Gallen, Switzerland based on original ISIL registry data (region hint: SG)'
custodian_name:
claim_type: custodian_name
claim_value: Leica AG Bibliothek und Dokumentation
source_type: ch_annotator
identifiers:
- identifier_scheme: GHCID
identifier_value: CH-SG-HEE-L-LABD
- identifier_scheme: GHCID_UUID
identifier_value: 5d87e93d-3ea5-56cd-8848-816caad77808
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: b02cbd79-83ad-83c4-a10e-8ee35d418e13
- identifier_scheme: GHCID_NUMERIC
identifier_value: '12694729779248464836'
- identifier_scheme: RECORD_ID
identifier_value: 18164a6e-37ad-4acb-89c2-6c8f87bc45ee
- *id001
provenance:
data_source: CSV_REGISTRY
data_tier: TIER_1_AUTHORITATIVE
extraction_date: '2025-11-19T09:22:31.452165'
extraction_method: 'Created from CH-Annotator file: switzerland_isil_ch_annotator.yaml'
confidence_score: 0.95
notes:
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:07Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:47:34Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:48:58Z
- 'YouTube/Google Maps enrichment 2025-12-09T09:15:52Z: YouTube: not found'
- 'DATA CONTAMINATION FIX 2025-12-30: Google Places API returned Wetzlar, Germany (leica-welt.com - Leica Camera AG museum) instead of correct location Heerbrugg, Switzerland (Leica Geosystems AG library). Removed contaminated google_maps_enrichment and logo_enrichment sections. Corrected location using GeoNames (ID 2660395) and original ISIL registry data which indicated SG (St. Gallen) region.'
schema_version: 2.0.0
enrichment_provenance:
google_maps_enrichment:
content_hash: sha256-KAYY/BYHOjLbgYAPhVGXu8H37D/6M02/Gpm5qUFUFVU=
verified_at: '2025-12-28T19:58:03.221554+00:00'
status: INVALIDATED
invalidation_reason: 'Data contamination: returned Wetzlar, Germany instead of Heerbrugg, Switzerland'
invalidated_at: '2025-12-30T00:00:00+00:00'
logo_enrichment:
status: INVALIDATED
invalidation_reason: 'Based on wrong website (leica-welt.com - Leica Camera AG in Germany)'
invalidated_at: '2025-12-30T00:00:00+00:00'
provenance_schema_version: '2.0'
standards_compliance:
- W3C PROV-O
- W3C SRI (content hashes)
generated_at: '2025-12-30T00:00:00+00:00'
ch_annotator:
convention_id: ch_annotator-v1_7_0
convention_version: 1.7.0
entity_classification:
hypernym: GRP
hypernym_label: GROUP
subtype: GRP.HER.LIB
subtype_label: LIBRARY
ontology_class: schema:Library
alternative_classes:
- org:FormalOrganization
- rov:RegisteredOrganization
- glam:HeritageCustodian
extraction_provenance:
namespace: glam
path: /files/switzerland_isil.yaml
timestamp: '2025-11-19T09:22:31.452165'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
annotation_provenance:
annotation_agent: opencode-claude-sonnet-4
annotation_date: '2025-12-06T21:13:14.508879+00:00'
annotation_method: retroactive CH-Annotator application via batch script
source_file: switzerland_isil.yaml
annotation_metadata:
confidence_score: 0.95
verified: false
verification_date:
verified_by:
entity_claims:
- claim_type: full_name
claim_value: Leica AG Bibliothek und Dokumentation
property_uri: skos:prefLabel
provenance:
namespace: glam
path: /files/switzerland_isil.yaml
timestamp: '2025-11-19T09:22:31.452165'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.95
- claim_type: institution_type
claim_value: LIBRARY
property_uri: rdf:type
provenance:
namespace: glam
path: /files/switzerland_isil.yaml
timestamp: '2025-11-19T09:22:31.452165'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.95
integration_note:
created_from: switzerland_isil_ch_annotator.yaml
creation_date: '2025-12-06T23:39:11.889212+00:00'
creation_method: create_custodian_from_ch_annotator.py
# REMOVED: google_maps_enrichment - contained data from Wetzlar, Germany (wrong country)
# REMOVED: logo_enrichment - based on wrong website (leica-welt.com - Leica Camera AG in Germany)
youtube_status: NOT_FOUND
youtube_search_query: Leica AG Bibliothek und Dokumentation official
youtube_search_timestamp: '2025-12-09T09:15:52.995368+00:00'
location:
latitude: 47.41488
longitude: 9.62684
coordinate_provenance:
source_type: GEONAMES_DIRECT
source_path: geonames.db
entity_id: 2660395
resolution_method: GEONAMES_DIRECT
notes: 'Data contamination fix: Coordinates from GeoNames for Heerbrugg, Switzerland'
city: Heerbrugg
region: St. Gallen
region_code: SG
country: CH
geonames_id: 2660395
feature_code: PPL
normalization_timestamp: '2025-12-30T00:00:00+00:00'
wikidata_enrichment:
wikidata_id:
enrichment_timestamp: '2025-12-30T00:00:00Z'
enrichment_method: manual_wikidata_lookup
enrichment_note: No Wikidata entry found for Leica Geosystems AG library. Note - Leica Geosystems AG (Q693tried72) is headquartered in Heerbrugg, Switzerland. This is different from Leica Camera AG (Q156481) headquartered in Wetzlar, Germany.
data_contamination_fix:
fix_date: '2025-12-30T00:00:00+00:00'
fix_agent: opencode-claude-sonnet-4
contamination_source: Google Places API
contamination_type: wrong_country
wrong_institution: Leica Welt (Leica Camera AG museum/visitor center)
wrong_country: Germany
wrong_coordinates:
latitude: 50.5527416
longitude: 8.5356831
wrong_address: Am Leitz-Park 6, 35578 Wetzlar, Germany
wrong_website: https://leica-welt.com/
correct_institution: Leica AG Bibliothek und Dokumentation (Leica Geosystems AG)
correct_country: Switzerland
correct_coordinates:
latitude: 47.41488
longitude: 9.62684
correct_city: Heerbrugg
correct_region: St. Gallen (SG)
removed_sections:
- google_maps_enrichment
- logo_enrichment
old_ghcid: CH-SH-BAR-L-LABD
new_ghcid: CH-SG-HEE-L-LABD
evidence:
- type: isil_registry
value: ISIL CH-001172-7 specifies region SG (St. Gallen canton)
url: https://www.isil.nb.admin.ch/en/?isil=CH-001172-7
- type: original_source
value: Original entry locations specified region SG, country CH
- type: corporate_disambiguation
value: Leica Geosystems AG (surveying/geospatial company, HQ in Heerbrugg, CH) is distinct from Leica Camera AG (camera company, HQ in Wetzlar, DE). Google confused the two companies with similar names.