glam/data/custodian.backup.20251230/CL-ML-TAL-A-DA.yaml
2025-12-30 23:07:03 +01:00

209 lines
8.2 KiB
YAML

original_entry:
name: Diócesis archives
institution_type: ARCHIVE
source: CH-Annotator (latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml)
identifiers:
- identifier_scheme: GHCID
identifier_value: CL-00-XXX-A-DA
- identifier_scheme: GHCID_NUMERIC
identifier_value: '12366715145533336373'
- &id001
identifier_scheme: OLD_ID
identifier_value: https://w3id.org/heritage/custodian/cl/a-di-cesis-archives-0050
- identifier_scheme: GHCID_UUID
identifier_value: ed96d164-cd5a-5944-aa94-a7ecae976d95
identifier_url: urn:uuid:ed96d164-cd5a-5944-aa94-a7ecae976d95
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: ab9f65e7-37f5-8b35-b96f-f717a460009e
identifier_url: urn:uuid:ab9f65e7-37f5-8b35-b96f-f717a460009e
- identifier_scheme: RECORD_ID
identifier_value: 019a58fd-3227-7881-b0ba-8b237f563ec5
identifier_url: urn:uuid:019a58fd-3227-7881-b0ba-8b237f563ec5
- &id002
identifier_scheme: OLD_ID
identifier_value: 12366715145533336373
locations:
- region: Talca
country: CL
processing_timestamp: '2025-12-06T23:39:01.901519+00:00'
ghcid:
ghcid_current: CL-ML-TAL-A-DA
ghcid_original: CL-TA-XXX-A-DA
ghcid_uuid: 251a1ea5-b2f3-5bf9-9b9a-6ea165c60aa4
ghcid_uuid_sha256: d7cc6022-fb99-8793-5b5a-65071be5262c
ghcid_numeric: 15549909316769806227
record_id: 11ba9af8-ee86-4d8c-a6b4-263e76e21672
generation_timestamp: '2025-12-06T23:39:01.901519+00:00'
location_resolution:
method: GEONAMES_DIRECT
country_code: CL
region_code: ML
region_name: Maule
city_code: TAL
city_name: Talca
geonames_id: 3870294
feature_code: PPLA
latitude: -35.4232
longitude: -71.64974
resolution_date: '2025-12-30T00:00:00+00:00'
notes: 'Data contamination fix: Google Places API returned Santiago de Compostela, Spain instead of Talca, Chile. Corrected using GeoNames and original source data.'
ghcid_history:
- ghcid: CL-TA-XXX-A-DA
ghcid_numeric: 14267124111139144161
valid_from: '2025-12-06T23:39:01.901519+00:00'
reason: Initial GHCID from CH-Annotator (latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml)
valid_to: '2025-12-07T16:40:43.090933+00:00'
- ghcid: CL-AP-GUA-A-DA
ghcid_numeric: 14267124111139144161
valid_from: '2025-12-07T16:40:43.090933+00:00'
valid_to: '2025-12-30T00:00:00+00:00'
reason: 'CONTAMINATED: Google Places API returned wrong institution (Santiago de Compostela, Spain)'
- ghcid: CL-ML-TAL-A-DA
ghcid_numeric: 15549909316769806227
valid_from: '2025-12-30T00:00:00+00:00'
reason: 'Data contamination fix: Corrected location to Talca, Maule Region, Chile based on original source data'
custodian_name:
claim_type: custodian_name
claim_value: Diócesis archives
source_type: ch_annotator
identifiers:
- identifier_scheme: GHCID
identifier_value: CL-ML-TAL-A-DA
- identifier_scheme: GHCID_UUID
identifier_value: 251a1ea5-b2f3-5bf9-9b9a-6ea165c60aa4
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: d7cc6022-fb99-8793-5b5a-65071be5262c
- identifier_scheme: GHCID_NUMERIC
identifier_value: '15549909316769806227'
- identifier_scheme: RECORD_ID
identifier_value: 11ba9af8-ee86-4d8c-a6b4-263e76e21672
- *id001
- *id002
provenance:
data_source: CONVERSATION_NLP
data_tier: TIER_4_INFERRED
extraction_date: '2025-11-06T08:02:44.238055+00:00'
extraction_method: 'Created from CH-Annotator file: latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml'
confidence_score: 0.85
notes:
- 'YouTube/Google Maps enrichment 2025-12-08T20:36:00Z: YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:17Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:47:59Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:23Z
- 'DATA CONTAMINATION FIX 2025-12-30: Google Places API returned Santiago de Compostela, Spain (ahds.es) instead of correct location Talca, Chile. Removed contaminated google_maps_enrichment and logo_enrichment sections. Corrected location using GeoNames (ID 3870294) and original source data which indicated Talca region.'
schema_version: 2.0.0
provenance_schema_version: '2.0'
standards_compliance:
- W3C PROV-O
- W3C SRI (content hashes)
generated_at: '2025-12-30T00:00:00+00:00'
ch_annotator:
convention_id: ch_annotator-v1_7_0
convention_version: 1.7.0
entity_classification:
hypernym: GRP
hypernym_label: GROUP
subtype: GRP.HER.ARC
subtype_label: ARCHIVE
ontology_class: schema:ArchiveOrganization
alternative_classes:
- org:FormalOrganization
- rov:RegisteredOrganization
- glam:HeritageCustodian
extraction_provenance:
namespace: glam
path: /conversations/edc75d66-ee42-4199-8e22-65b0d2347922
timestamp: '2025-11-06T08:02:44.238055+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
annotation_provenance:
annotation_agent: opencode-claude-sonnet-4
annotation_date: '2025-12-06T21:13:56.173868+00:00'
annotation_method: retroactive CH-Annotator application via batch script
source_file: latin_american_institutions_AUTHORITATIVE.yaml
annotation_metadata:
confidence_score: 0.85
verified: false
verification_date:
verified_by:
entity_claims:
- claim_type: full_name
claim_value: Diócesis archives
property_uri: skos:prefLabel
provenance:
namespace: glam
path: /conversations/edc75d66-ee42-4199-8e22-65b0d2347922
timestamp: '2025-11-06T08:02:44.238055+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.85
- claim_type: institution_type
claim_value: ARCHIVE
property_uri: rdf:type
provenance:
namespace: glam
path: /conversations/edc75d66-ee42-4199-8e22-65b0d2347922
timestamp: '2025-11-06T08:02:44.238055+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.95
- claim_type: ghcid
claim_value: CL-00-XXX-A-DA
property_uri: glam:ghcid
provenance:
namespace: glam
path: /conversations/edc75d66-ee42-4199-8e22-65b0d2347922
timestamp: '2025-11-06T08:02:44.238055+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 1.0
integration_note:
created_from: latin_american_institutions_AUTHORITATIVE_ch_annotator.yaml
creation_date: '2025-12-06T23:39:01.901519+00:00'
creation_method: create_custodian_from_ch_annotator.py
# REMOVED: google_maps_enrichment - contained data from Santiago de Compostela, Spain (wrong country)
# REMOVED: logo_enrichment - based on wrong website (ahds.es - Spanish diocesan archives)
youtube_status: NOT_FOUND
youtube_search_query: Diócesis archives official
youtube_search_timestamp: '2025-12-08T20:36:00.764633+00:00'
location:
latitude: -35.4232
longitude: -71.64974
coordinate_provenance:
source_type: GEONAMES_DIRECT
source_path: geonames.db
entity_id: 3870294
resolution_method: GEONAMES_DIRECT
notes: 'Data contamination fix: Coordinates from GeoNames for Talca, Chile'
city: Talca
region: Maule
region_code: ML
country: CL
geonames_id: 3870294
feature_code: PPLA
normalization_timestamp: '2025-12-30T00:00:00+00:00'
wikidata_enrichment:
wikidata_id:
enrichment_timestamp: '2025-12-21T00:00:00Z'
enrichment_method: manual_wikidata_lookup
enrichment_note: No Wikidata entry found for Diócesis archives (generic diocesan archives in Chile)
data_contamination_fix:
fix_date: '2025-12-30T00:00:00+00:00'
fix_agent: opencode-claude-sonnet-4
contamination_source: Google Places API
contamination_type: wrong_country
wrong_institution: Archivo Histórico Diocesano de Santiago de Compostela
wrong_country: Spain
wrong_coordinates:
latitude: 42.8814701
longitude: -8.5439332
correct_institution: Diócesis de Talca archives
correct_country: Chile
correct_coordinates:
latitude: -35.4232
longitude: -71.64974
removed_sections:
- google_maps_enrichment
- logo_enrichment
old_ghcid: CL-AP-GUA-A-DA
new_ghcid: CL-ML-TAL-A-DA