glam/data/custodian.backup.20251230/CZ-10-CES-A-ACT.yaml
2025-12-30 23:07:03 +01:00

264 lines
9.6 KiB
YAML

custodian_name:
claim_type: custodian_name
claim_value: Archiv České televize
source_type: ch_annotator
original_entry:
name: Archiv České televize
institution_type: ARCHIVE
source: CH-Annotator (czech_unified_ch_annotator.yaml)
identifiers:
- &id001
identifier_scheme: ARON_UUID
identifier_value: 0d7b3af0-21ed-41df-9826-93e4b382305d
identifier_url: https://portal.nacr.cz/aron/apu/0d7b3af0-21ed-41df-9826-93e4b382305d
- &id002
identifier_scheme: INSTITUTION_CODE
identifier_value: '610000060'
- &id003
identifier_scheme: Wikidata
identifier_value: Q101473466
identifier_url: https://www.wikidata.org/wiki/Q101473466
locations: []
processing_timestamp: '2025-12-06T23:37:43.795470+00:00'
ghcid:
ghcid_current: CZ-10-CES-A-ACT
ghcid_original: XX-XX-XXX-A-AČT
ghcid_uuid: 840fe82b-3e87-5190-b011-f779ddb14dac
ghcid_uuid_sha256: 4fd21084-3411-882f-a1d3-b4d8721814a1
ghcid_numeric: 5751677834100807727
record_id: 91c11450-cccb-4be5-9ad5-e0d045e63778
generation_timestamp: '2025-12-06T23:37:43.795470+00:00'
location_resolution:
country_code: CZ
region_code: '10'
city_code: CES
method: CH_ANNOTATOR_TOP_SET
resolution_timestamp: '2025-12-07T00:19:33.141899+00:00'
region_name: CZ-10
iso_code_source: CZ-10
city_name: Ceske Budejovice
geonames_id: 3077916
feature_code: PPLA
extracted_toponym: České
latitude: 48.97447
longitude: 14.47434
ghcid_history:
- previous_ghcid_component: CB
new_ghcid_component: CES
change_date: '2025-12-20T19:55:24Z'
reason: 'Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City: Ceske Budejovice'
- ghcid: CZ-10-CB-A-ACT
ghcid_numeric: 5751677834100807727
valid_from: '2025-12-08T11:21:40.560688+00:00'
reason: 'Normalized diacritics to ASCII per ABBREV-DIACRITICS rule (was: CZ-10-CB-A-AČT)'
- ghcid: XX-XX-XXX-A-AČT
ghcid_numeric: 5993941688104191875
valid_from: '2025-12-06T23:37:43.795470+00:00'
reason: Initial GHCID from CH-Annotator (czech_unified_ch_annotator.yaml)
valid_to: '2025-12-08T11:21:40.560688+00:00'
- ghcid: CZ-XX-XXX-A-AČT
valid_from: '2025-12-06T23:54:40.625461+00:00'
reason: 'Country resolved via Wikidata P17: XX→CZ'
- ghcid: CZ-10-XXX-A-AČT
valid_from: '2025-12-07T00:01:59.388577+00:00'
reason: 'Region resolved via Wikidata P131: XX->10 (CZ-10)'
- ghcid: CZ-10-CB-A-AČT
valid_from: '2025-12-07T00:19:33.141905+00:00'
reason: 'Location resolved via CH-Annotator TOP.SET extraction: České -> Ceske Budejovice (GeoNames:3077916)'
identifiers:
- identifier_scheme: GHCID
identifier_value: CZ-10-CB-A-ACT
- identifier_scheme: GHCID_UUID
identifier_value: 840fe82b-3e87-5190-b011-f779ddb14dac
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: 4fd21084-3411-882f-a1d3-b4d8721814a1
- identifier_scheme: GHCID_NUMERIC
identifier_value: '5751677834100807727'
- identifier_scheme: RECORD_ID
identifier_value: 91c11450-cccb-4be5-9ad5-e0d045e63778
- *id001
- *id002
- *id003
provenance:
data_source: API_SCRAPING
data_tier: TIER_1_AUTHORITATIVE
extraction_date: '2025-11-19T11:35:45.743393+00:00'
extraction_method: 'Created from CH-Annotator file: czech_unified_ch_annotator.yaml'
confidence_score: 0.85
notes:
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
- 'Region resolved 2025-12-07T00:01:59Z: XX->10 via Wikidata P131 (CZ-10)'
- 'Location resolved 2025-12-07T00:19:33Z: CH-Annotator TOP.SET extraction ''České'' -> Ceske Budejovice (GeoNames:3077916, Region:31)'
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:08Z: Maps: rejected by LLM; YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:03Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
schema_version: 2.0.0
enrichment_provenance:
wikidata_enrichment:
content_hash: sha256-CosPptw9SDXZrNnBlpvjHhV+LoeEtnlAAu2L6Cc0aHg=
verified_at: '2025-12-28T19:58:20.725362+00:00'
wasDerivedFrom: https://www.wikidata.org/wiki/Q101473466
provenance_schema_version: '2.0'
standards_compliance:
- W3C PROV-O
- W3C SRI (content hashes)
generated_at: '2025-12-28T19:58:20.725380+00:00'
ch_annotator:
convention_id: ch_annotator-v1_7_0
convention_version: 1.7.0
entity_classification:
hypernym: GRP
hypernym_label: GROUP
subtype: GRP.HER.ARC
subtype_label: ARCHIVE
ontology_class: schema:ArchiveOrganization
alternative_classes:
- org:FormalOrganization
- rov:RegisteredOrganization
- glam:HeritageCustodian
extraction_provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T11:35:45.743393+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
annotation_provenance:
annotation_agent: opencode-claude-sonnet-4
annotation_date: '2025-12-06T21:12:32.389491+00:00'
annotation_method: retroactive CH-Annotator application via batch script
source_file: czech_unified.yaml
annotation_metadata:
confidence_score: 0.85
verified: false
verification_date:
verified_by:
entity_claims:
- claim_type: full_name
claim_value: Archiv České televize
property_uri: skos:prefLabel
provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T11:35:45.743393+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.85
- claim_type: institution_type
claim_value: ARCHIVE
property_uri: rdf:type
provenance:
namespace: glam
path: /files/czech_unified.yaml
timestamp: '2025-11-19T11:35:45.743393+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.95
- claim_type: wikidata_id
claim_value: Q101473466
property_uri: owl:sameAs
provenance:
namespace: wikidata
path: /entity/Q101473466
timestamp: '2025-11-19T11:35:45.743393+00:00'
agent: claude-conversation
context_convention: ch_annotator-v1_7_0
confidence: 0.98
- claim_type: location_settlement
claim_value: Ceske Budejovice
property_uri: schema:location
hypernym_code: TOP.SET
hypernym_label: SETTLEMENT
provenance:
namespace: geonames
path: /geonames/3077916
timestamp: '2025-12-07T00:19:33.141907+00:00'
agent: extract_locations_ch_annotator.py
context_convention: ch_annotator-v1_7_0
confidence: 0.85
extraction_source:
field: institution_name
extracted_text: České
method: pattern_matching
integration_note:
created_from: czech_unified_ch_annotator.yaml
creation_date: '2025-12-06T23:37:43.795470+00:00'
creation_method: create_custodian_from_ch_annotator.py
wikidata_enrichment:
wikidata_entity_id: Q101473466
api_metadata:
api_endpoint: https://www.wikidata.org/w/rest.php/wikibase/v1
fetch_timestamp: '2025-12-08T23:41:05.959613+00:00'
user_agent: GLAMDataExtractor/1.1 (glam-data@example.com) Python/httpx
enrichment_version: 2.0_full
properties_found:
- P31
- P159
- P17
- P361
wikidata_labels:
cs: Archiv České televize
wikidata_descriptions:
cs: český specializovaný archiv
wikidata_classification:
instance_of: &id004
- id: Q101470010
label: specialized archives
description: type of archives in Czechia
- id: Q27030766
label: audiovisual archive
description: archive that contains audio-visual materials
wikidata_instance_of: *id004
wikidata_location:
headquarters_location:
id: Q84492526
label: Prague 4
description: municipal district of Prague
country: &id005
id: Q213
label: Czech Republic
description: country in Central Europe
wikidata_country: *id005
wikidata_organization:
part_of:
id: Q341134
label: Česká televize
description: public television broadcaster in Czechia
_provenance:
content_hash:
algorithm: sha256
value: sha256-CosPptw9SDXZrNnBlpvjHhV+LoeEtnlAAu2L6Cc0aHg=
scope: enrichment_section
computed_at: '2025-12-28T19:58:20.725362+00:00'
prov:
wasGeneratedBy:
'@type': prov:Activity
name: wikidata_api_fetch
used: https://www.wikidata.org/w/rest.php/wikibase/v1
wasDerivedFrom: https://www.wikidata.org/wiki/Q101473466
verification:
status: verified
last_verified: '2025-12-28T19:58:20.725372+00:00'
google_maps_status: NO_MATCH
google_maps_rejected:
candidate_name: Česká Televize
rejection_reason: TYPE MATCH FAILED. The candidate's Google Place types ('point_of_interest', 'establishment') do not include any of the expected heritage institution types (museum, library, gallery, archive). The name 'Česká Televize' refers to the national broadcaster, and while it maintains an archive, this specific place likely represents its main headquarters or studios, which is not a publicly-facing heritage custodian institution.
timestamp: '2025-12-08T19:45:07.895514+00:00'
youtube_status: NOT_FOUND
youtube_search_query: Archiv České televize official
youtube_search_timestamp: '2025-12-08T19:45:08.254794+00:00'
location:
latitude: 48.97447
longitude: 14.47434
coordinate_provenance:
source_type: GHCID_RESOLUTION_DIRECT
source_path: ghcid.location_resolution
original_timestamp: '2025-12-07T00:19:33.141899+00:00'
entity_id: 3077916
resolution_method: CH_ANNOTATOR_TOP_SET
city: Ceske Budejovice
region: CZ-10
region_code: '10'
country: CZ
geonames_id: 3077916
feature_code: PPLA
normalization_timestamp: '2025-12-09T06:49:27.485977+00:00'