264 lines
9.6 KiB
YAML
264 lines
9.6 KiB
YAML
custodian_name:
|
|
claim_type: custodian_name
|
|
claim_value: Archiv České televize
|
|
source_type: ch_annotator
|
|
original_entry:
|
|
name: Archiv České televize
|
|
institution_type: ARCHIVE
|
|
source: CH-Annotator (czech_unified_ch_annotator.yaml)
|
|
identifiers:
|
|
- &id001
|
|
identifier_scheme: ARON_UUID
|
|
identifier_value: 0d7b3af0-21ed-41df-9826-93e4b382305d
|
|
identifier_url: https://portal.nacr.cz/aron/apu/0d7b3af0-21ed-41df-9826-93e4b382305d
|
|
- &id002
|
|
identifier_scheme: INSTITUTION_CODE
|
|
identifier_value: '610000060'
|
|
- &id003
|
|
identifier_scheme: Wikidata
|
|
identifier_value: Q101473466
|
|
identifier_url: https://www.wikidata.org/wiki/Q101473466
|
|
locations: []
|
|
processing_timestamp: '2025-12-06T23:37:43.795470+00:00'
|
|
ghcid:
|
|
ghcid_current: CZ-10-CES-A-ACT
|
|
ghcid_original: XX-XX-XXX-A-AČT
|
|
ghcid_uuid: 840fe82b-3e87-5190-b011-f779ddb14dac
|
|
ghcid_uuid_sha256: 4fd21084-3411-882f-a1d3-b4d8721814a1
|
|
ghcid_numeric: 5751677834100807727
|
|
record_id: 91c11450-cccb-4be5-9ad5-e0d045e63778
|
|
generation_timestamp: '2025-12-06T23:37:43.795470+00:00'
|
|
location_resolution:
|
|
country_code: CZ
|
|
region_code: '10'
|
|
city_code: CES
|
|
method: CH_ANNOTATOR_TOP_SET
|
|
resolution_timestamp: '2025-12-07T00:19:33.141899+00:00'
|
|
region_name: CZ-10
|
|
iso_code_source: CZ-10
|
|
city_name: Ceske Budejovice
|
|
geonames_id: 3077916
|
|
feature_code: PPLA
|
|
extracted_toponym: České
|
|
latitude: 48.97447
|
|
longitude: 14.47434
|
|
ghcid_history:
|
|
- previous_ghcid_component: CB
|
|
new_ghcid_component: CES
|
|
change_date: '2025-12-20T19:55:24Z'
|
|
reason: 'Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City: Ceske Budejovice'
|
|
- ghcid: CZ-10-CB-A-ACT
|
|
ghcid_numeric: 5751677834100807727
|
|
valid_from: '2025-12-08T11:21:40.560688+00:00'
|
|
reason: 'Normalized diacritics to ASCII per ABBREV-DIACRITICS rule (was: CZ-10-CB-A-AČT)'
|
|
- ghcid: XX-XX-XXX-A-AČT
|
|
ghcid_numeric: 5993941688104191875
|
|
valid_from: '2025-12-06T23:37:43.795470+00:00'
|
|
reason: Initial GHCID from CH-Annotator (czech_unified_ch_annotator.yaml)
|
|
valid_to: '2025-12-08T11:21:40.560688+00:00'
|
|
- ghcid: CZ-XX-XXX-A-AČT
|
|
valid_from: '2025-12-06T23:54:40.625461+00:00'
|
|
reason: 'Country resolved via Wikidata P17: XX→CZ'
|
|
- ghcid: CZ-10-XXX-A-AČT
|
|
valid_from: '2025-12-07T00:01:59.388577+00:00'
|
|
reason: 'Region resolved via Wikidata P131: XX->10 (CZ-10)'
|
|
- ghcid: CZ-10-CB-A-AČT
|
|
valid_from: '2025-12-07T00:19:33.141905+00:00'
|
|
reason: 'Location resolved via CH-Annotator TOP.SET extraction: České -> Ceske Budejovice (GeoNames:3077916)'
|
|
identifiers:
|
|
- identifier_scheme: GHCID
|
|
identifier_value: CZ-10-CB-A-ACT
|
|
- identifier_scheme: GHCID_UUID
|
|
identifier_value: 840fe82b-3e87-5190-b011-f779ddb14dac
|
|
- identifier_scheme: GHCID_UUID_SHA256
|
|
identifier_value: 4fd21084-3411-882f-a1d3-b4d8721814a1
|
|
- identifier_scheme: GHCID_NUMERIC
|
|
identifier_value: '5751677834100807727'
|
|
- identifier_scheme: RECORD_ID
|
|
identifier_value: 91c11450-cccb-4be5-9ad5-e0d045e63778
|
|
- *id001
|
|
- *id002
|
|
- *id003
|
|
provenance:
|
|
data_source: API_SCRAPING
|
|
data_tier: TIER_1_AUTHORITATIVE
|
|
extraction_date: '2025-11-19T11:35:45.743393+00:00'
|
|
extraction_method: 'Created from CH-Annotator file: czech_unified_ch_annotator.yaml'
|
|
confidence_score: 0.85
|
|
notes:
|
|
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
|
- 'Region resolved 2025-12-07T00:01:59Z: XX->10 via Wikidata P131 (CZ-10)'
|
|
- 'Location resolved 2025-12-07T00:19:33Z: CH-Annotator TOP.SET extraction ''České'' -> Ceske Budejovice (GeoNames:3077916, Region:31)'
|
|
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:08Z: Maps: rejected by LLM; YouTube: not found'
|
|
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:03Z
|
|
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
|
schema_version: 2.0.0
|
|
enrichment_provenance:
|
|
wikidata_enrichment:
|
|
content_hash: sha256-CosPptw9SDXZrNnBlpvjHhV+LoeEtnlAAu2L6Cc0aHg=
|
|
verified_at: '2025-12-28T19:58:20.725362+00:00'
|
|
wasDerivedFrom: https://www.wikidata.org/wiki/Q101473466
|
|
provenance_schema_version: '2.0'
|
|
standards_compliance:
|
|
- W3C PROV-O
|
|
- W3C SRI (content hashes)
|
|
generated_at: '2025-12-28T19:58:20.725380+00:00'
|
|
ch_annotator:
|
|
convention_id: ch_annotator-v1_7_0
|
|
convention_version: 1.7.0
|
|
entity_classification:
|
|
hypernym: GRP
|
|
hypernym_label: GROUP
|
|
subtype: GRP.HER.ARC
|
|
subtype_label: ARCHIVE
|
|
ontology_class: schema:ArchiveOrganization
|
|
alternative_classes:
|
|
- org:FormalOrganization
|
|
- rov:RegisteredOrganization
|
|
- glam:HeritageCustodian
|
|
extraction_provenance:
|
|
namespace: glam
|
|
path: /files/czech_unified.yaml
|
|
timestamp: '2025-11-19T11:35:45.743393+00:00'
|
|
agent: claude-conversation
|
|
context_convention: ch_annotator-v1_7_0
|
|
annotation_provenance:
|
|
annotation_agent: opencode-claude-sonnet-4
|
|
annotation_date: '2025-12-06T21:12:32.389491+00:00'
|
|
annotation_method: retroactive CH-Annotator application via batch script
|
|
source_file: czech_unified.yaml
|
|
annotation_metadata:
|
|
confidence_score: 0.85
|
|
verified: false
|
|
verification_date:
|
|
verified_by:
|
|
entity_claims:
|
|
- claim_type: full_name
|
|
claim_value: Archiv České televize
|
|
property_uri: skos:prefLabel
|
|
provenance:
|
|
namespace: glam
|
|
path: /files/czech_unified.yaml
|
|
timestamp: '2025-11-19T11:35:45.743393+00:00'
|
|
agent: claude-conversation
|
|
context_convention: ch_annotator-v1_7_0
|
|
confidence: 0.85
|
|
- claim_type: institution_type
|
|
claim_value: ARCHIVE
|
|
property_uri: rdf:type
|
|
provenance:
|
|
namespace: glam
|
|
path: /files/czech_unified.yaml
|
|
timestamp: '2025-11-19T11:35:45.743393+00:00'
|
|
agent: claude-conversation
|
|
context_convention: ch_annotator-v1_7_0
|
|
confidence: 0.95
|
|
- claim_type: wikidata_id
|
|
claim_value: Q101473466
|
|
property_uri: owl:sameAs
|
|
provenance:
|
|
namespace: wikidata
|
|
path: /entity/Q101473466
|
|
timestamp: '2025-11-19T11:35:45.743393+00:00'
|
|
agent: claude-conversation
|
|
context_convention: ch_annotator-v1_7_0
|
|
confidence: 0.98
|
|
- claim_type: location_settlement
|
|
claim_value: Ceske Budejovice
|
|
property_uri: schema:location
|
|
hypernym_code: TOP.SET
|
|
hypernym_label: SETTLEMENT
|
|
provenance:
|
|
namespace: geonames
|
|
path: /geonames/3077916
|
|
timestamp: '2025-12-07T00:19:33.141907+00:00'
|
|
agent: extract_locations_ch_annotator.py
|
|
context_convention: ch_annotator-v1_7_0
|
|
confidence: 0.85
|
|
extraction_source:
|
|
field: institution_name
|
|
extracted_text: České
|
|
method: pattern_matching
|
|
integration_note:
|
|
created_from: czech_unified_ch_annotator.yaml
|
|
creation_date: '2025-12-06T23:37:43.795470+00:00'
|
|
creation_method: create_custodian_from_ch_annotator.py
|
|
wikidata_enrichment:
|
|
wikidata_entity_id: Q101473466
|
|
api_metadata:
|
|
api_endpoint: https://www.wikidata.org/w/rest.php/wikibase/v1
|
|
fetch_timestamp: '2025-12-08T23:41:05.959613+00:00'
|
|
user_agent: GLAMDataExtractor/1.1 (glam-data@example.com) Python/httpx
|
|
enrichment_version: 2.0_full
|
|
properties_found:
|
|
- P31
|
|
- P159
|
|
- P17
|
|
- P361
|
|
wikidata_labels:
|
|
cs: Archiv České televize
|
|
wikidata_descriptions:
|
|
cs: český specializovaný archiv
|
|
wikidata_classification:
|
|
instance_of: &id004
|
|
- id: Q101470010
|
|
label: specialized archives
|
|
description: type of archives in Czechia
|
|
- id: Q27030766
|
|
label: audiovisual archive
|
|
description: archive that contains audio-visual materials
|
|
wikidata_instance_of: *id004
|
|
wikidata_location:
|
|
headquarters_location:
|
|
id: Q84492526
|
|
label: Prague 4
|
|
description: municipal district of Prague
|
|
country: &id005
|
|
id: Q213
|
|
label: Czech Republic
|
|
description: country in Central Europe
|
|
wikidata_country: *id005
|
|
wikidata_organization:
|
|
part_of:
|
|
id: Q341134
|
|
label: Česká televize
|
|
description: public television broadcaster in Czechia
|
|
_provenance:
|
|
content_hash:
|
|
algorithm: sha256
|
|
value: sha256-CosPptw9SDXZrNnBlpvjHhV+LoeEtnlAAu2L6Cc0aHg=
|
|
scope: enrichment_section
|
|
computed_at: '2025-12-28T19:58:20.725362+00:00'
|
|
prov:
|
|
wasGeneratedBy:
|
|
'@type': prov:Activity
|
|
name: wikidata_api_fetch
|
|
used: https://www.wikidata.org/w/rest.php/wikibase/v1
|
|
wasDerivedFrom: https://www.wikidata.org/wiki/Q101473466
|
|
verification:
|
|
status: verified
|
|
last_verified: '2025-12-28T19:58:20.725372+00:00'
|
|
google_maps_status: NO_MATCH
|
|
google_maps_rejected:
|
|
candidate_name: Česká Televize
|
|
rejection_reason: TYPE MATCH FAILED. The candidate's Google Place types ('point_of_interest', 'establishment') do not include any of the expected heritage institution types (museum, library, gallery, archive). The name 'Česká Televize' refers to the national broadcaster, and while it maintains an archive, this specific place likely represents its main headquarters or studios, which is not a publicly-facing heritage custodian institution.
|
|
timestamp: '2025-12-08T19:45:07.895514+00:00'
|
|
youtube_status: NOT_FOUND
|
|
youtube_search_query: Archiv České televize official
|
|
youtube_search_timestamp: '2025-12-08T19:45:08.254794+00:00'
|
|
location:
|
|
latitude: 48.97447
|
|
longitude: 14.47434
|
|
coordinate_provenance:
|
|
source_type: GHCID_RESOLUTION_DIRECT
|
|
source_path: ghcid.location_resolution
|
|
original_timestamp: '2025-12-07T00:19:33.141899+00:00'
|
|
entity_id: 3077916
|
|
resolution_method: CH_ANNOTATOR_TOP_SET
|
|
city: Ceske Budejovice
|
|
region: CZ-10
|
|
region_code: '10'
|
|
country: CZ
|
|
geonames_id: 3077916
|
|
feature_code: PPLA
|
|
normalization_timestamp: '2025-12-09T06:49:27.485977+00:00'
|