glam/data/custodian/CZ-10-PRA-L-AVCPUJK.yaml
kempersc 2137c522db geocode: add coordinates to JP compound cities and CZ files from GeoNames
- JP: Handle Gun/Cho/Machi/Mura compound city names (2615 files)
- CZ: Map city codes to GeoNames entries (667 files)
- Overall coverage: 84.5% → 96.4%
2025-12-09 21:49:40 +01:00

175 lines
6.4 KiB
YAML

original_entry:
name: Akademie věd ČR - Pedagogický ústav J.A.Komenského
institution_type: LIBRARY
source: CH-Annotator (czech_unified_ch_annotator.yaml)
identifiers:
- &id001
identifier_scheme: Sigla
identifier_value: ABB011
locations:
- street_address: Máchova 7
postal_code: 120 00
city: Praha 2
country: CZ
processing_timestamp: '2025-12-06T23:37:23.639904+00:00'
ghcid:
ghcid_current: CZ-10-PRA-L-AVCPUJK
ghcid_original: CZ-XX-PRA-L-AVČPÚJK
ghcid_uuid: 6c0c0023-d809-5af7-9792-3a88280b05cc
ghcid_uuid_sha256: c7d810dd-72ec-871c-87f0-147e22c4f1b9
ghcid_numeric: 14400278351819056924
record_id: b51bbf12-d5c6-492c-9a1a-f19d1d6b61e7
generation_timestamp: '2025-12-06T23:37:23.639904+00:00'
location_resolution:
country_code: CZ
region_code: '10'
city_code: PRA
method: GEONAMES_CITY_LOOKUP
region_name: CZ-10
geonames_id: 8629195
resolution_timestamp: '2025-12-07T11:42:54.224540+00:00'
ghcid_history:
- ghcid: CZ-10-PRA-L-AVCPUJK
ghcid_numeric: 14400278351819056924
valid_from: '2025-12-08T11:21:29.200637+00:00'
reason: 'Normalized diacritics to ASCII per ABBREV-DIACRITICS rule (was: CZ-10-PRA-L-AVČPÚJK)'
- ghcid: CZ-XX-PRA-L-AVČPÚJK
ghcid_numeric: 11911444671767460800
valid_from: '2025-12-06T23:37:23.639904+00:00'
reason: Initial GHCID from CH-Annotator (czech_unified_ch_annotator.yaml)
valid_to: '2025-12-08T11:21:29.200637+00:00'
- ghcid: CZ-10-PRA-L-AVČPÚJK
valid_from: '2025-12-07T11:42:54.224551+00:00'
reason: 'Region resolved via GeoNames city lookup: XX->10 (Praha Klanovice)'
custodian_name:
claim_type: custodian_name
claim_value: Akademie věd ČR - Pedagogický ústav J.A.Komenského
source_type: ch_annotator
identifiers:
- identifier_scheme: GHCID
identifier_value: CZ-10-PRA-L-AVCPUJK
- identifier_scheme: GHCID_UUID
identifier_value: 6c0c0023-d809-5af7-9792-3a88280b05cc
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: c7d810dd-72ec-871c-87f0-147e22c4f1b9
- identifier_scheme: GHCID_NUMERIC
identifier_value: '14400278351819056924'
- identifier_scheme: RECORD_ID
identifier_value: b51bbf12-d5c6-492c-9a1a-f19d1d6b61e7
- *id001
provenance:
data_source: API_SCRAPING
data_tier: TIER_1_AUTHORITATIVE
extraction_date: '2025-11-19T10:14:30.030561+00:00'
extraction_method: 'Created from CH-Annotator file: czech_unified_ch_annotator.yaml'
confidence_score: 0.95
notes:
- 'YouTube/Google Maps enrichment 2025-12-08T19:49:41Z: Maps: rejected by LLM; YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:29Z
ch_annotator:
convention_id: ch_annotator-v1_7_0
convention_version: 1.7.0
entity_classification:
hypernym: GRP
hypernym_label: GROUP
subtype: GRP.HER.LIB
subtype_label: LIBRARY
ontology_class: schema:Library
alternative_classes:
- org:FormalOrganization
- rov:RegisteredOrganization
- glam:HeritageCustodian
extraction_provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T10:14:30.030561+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
annotation_provenance:
annotation_agent: opencode-claude-sonnet-4
annotation_date: '2025-12-06T21:12:32.389491+00:00'
annotation_method: retroactive CH-Annotator application via batch script
source_file: czech_unified.yaml
annotation_metadata:
confidence_score: 0.95
verified: false
verification_date:
verified_by:
entity_claims:
- claim_type: full_name
claim_value: Akademie věd ČR - Pedagogický ústav J.A.Komenského
property_uri: skos:prefLabel
provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T10:14:30.030561+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.95
- claim_type: institution_type
claim_value: LIBRARY
property_uri: rdf:type
provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T10:14:30.030561+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.95
- claim_type: located_in_city
claim_value: Praha 2
property_uri: schema:addressLocality
provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T10:14:30.030561+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.9
- claim_type: ghcid
claim_value: CZ-ABB011
property_uri: glam:ghcid
provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T10:14:30.030561+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 1.0
integration_note:
created_from: czech_unified_ch_annotator.yaml
creation_date: '2025-12-06T23:37:23.639904+00:00'
creation_method: create_custodian_from_ch_annotator.py
google_maps_status: NO_MATCH
google_maps_rejected:
candidate_name: Czech Academy of Sciences
rejection_reason: This is a mismatch. The source is a specific research institute within the Czech Academy of Sciences.
The Google Maps candidate represents the overarching parent organization, the Czech Academy of Sciences, located at its
headquarters. The candidate's Google Place types ('point_of_interest', 'establishment') are generic and do not match the
expected types for a heritage institution. Furthermore, the website links to the main academy, not the specific Pedagogical
Institute.
timestamp: '2025-12-08T19:49:41.002827+00:00'
youtube_status: NOT_FOUND
youtube_search_query: Akademie věd ČR - Pedagogický ústav J.A.Komenského official
youtube_search_timestamp: '2025-12-08T19:49:41.371035+00:00'
location:
city: Prague
region: CZ-10
region_code: '10'
country: CZ
postal_code: 120 00
street_address: Máchova 7
geonames_id: 3067696
normalization_timestamp: '2025-12-09T20:40:58.718568+00:00'
latitude: 50.08804
longitude: 14.42076
geonames_name: Prague
feature_code: PPLC
coordinate_provenance:
source_type: GEONAMES_GHCID_CITY_CODE
source_path: data/reference/geonames.db
entity_id: 3067696
city_code: PRA
original_timestamp: '2025-12-09T20:40:58.718556+00:00'