diff --git a/data/custodian/AR-C-BUE-A-APDH.yaml b/data/custodian/AR-C-BUE-A-APDH.yaml index 6e7d1ee2a5..905e6f1a3a 100644 --- a/data/custodian/AR-C-BUE-A-APDH.yaml +++ b/data/custodian/AR-C-BUE-A-APDH.yaml @@ -4,8 +4,8 @@ original_entry: wikidata_id: Q2867105 mow_inscriptions: - wikidata_id: Q28028203 - name: Human Rights Documentary Heritage 1976 - 1983 - Archives for Truth, Justice and Memory in the struggle against State - Terrorism + name: Human Rights Documentary Heritage 1976 - 1983 - Archives for Truth, Justice + and Memory in the struggle against State Terrorism country: Argentina processing_timestamp: '2025-12-06T20:58:06.443351+00:00' wikidata_enrichment: @@ -91,8 +91,8 @@ ghcid: reason: Initial GHCID from UNESCO MoW Wikidata data (Dec 2025) - ghcid: AR-C-BUE-A-APDH valid_from: '2025-12-08T17:17:53.825976+00:00' - reason: GHCID abbreviation regenerated from emic_name "Asamblea Permanente por los Derechos Humanos" (was "Permanent Assembly - for Human Rights", abbrev PAHR→APDH) + reason: GHCID abbreviation regenerated from emic_name "Asamblea Permanente por + los Derechos Humanos" (was "Permanent Assembly for Human Rights", abbrev PAHR→APDH) location_resolution: method: MANUAL_RESEARCH country_code: AR @@ -119,17 +119,21 @@ unesco_mow_enrichment: inscription_count: 1 inscriptions: - wikidata_id: Q28028203 - name: Human Rights Documentary Heritage 1976 - 1983 - Archives for Truth, Justice and Memory in the struggle against State - Terrorism + name: Human Rights Documentary Heritage 1976 - 1983 - Archives for Truth, Justice + and Memory in the struggle against State Terrorism inscription_country: Argentina enrichment_timestamp: '2025-12-06T20:58:06.443351+00:00' data_source: Wikidata SPARQL (UNESCO has no MoW API) provenance: notes: - - 'GHCID regenerated 2025-12-08T17:17:53.825976+00:00: abbreviation PAHR→APDH from emic_name' - - 'YouTube/Google Maps enrichment 2025-12-08T18:08:35Z: Maps: not found; YouTube: not found' - - 'YouTube/Google Maps enrichment 2025-12-08T18:16:33Z: Maps: not found; YouTube: not found' - - 'YouTube/Google Maps enrichment 2025-12-08T18:38:22Z: Maps: not found; YouTube: not found' + - 'GHCID regenerated 2025-12-08T17:17:53.825976+00:00: abbreviation PAHR→APDH from + emic_name' + - 'YouTube/Google Maps enrichment 2025-12-08T18:08:35Z: Maps: not found; YouTube: + not found' + - 'YouTube/Google Maps enrichment 2025-12-08T18:16:33Z: Maps: not found; YouTube: + not found' + - 'YouTube/Google Maps enrichment 2025-12-08T18:38:22Z: Maps: not found; YouTube: + not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:46:26Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:47:50Z google_maps_status: NOT_FOUND @@ -143,3 +147,8 @@ location: region_code: C country: AR normalization_timestamp: '2025-12-09T13:20:09.030428+00:00' + latitude: -34.6037 + longitude: -58.3816 + city: Buenos Aires + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.615738+00:00' diff --git a/data/custodian/BE-BRU-WOL-L-LL.yaml b/data/custodian/BE-BRU-WOL-L-LL.yaml index 3217234ac8..192ef0ccbb 100644 --- a/data/custodian/BE-BRU-WOL-L-LL.yaml +++ b/data/custodian/BE-BRU-WOL-L-LL.yaml @@ -33,15 +33,17 @@ ghcid: google_maps_status: NO_MATCH google_maps_rejected: candidate_name: Loyens & Loeff (Amsterdam Office) - rejection_reason: REJECT - TYPE_MISMATCH. The candidate is a law firm ('lawyer', 'finance', 'accounting'), which does not - match the expected entity type of a Library (GRP.HER.LIB). + rejection_reason: REJECT - TYPE_MISMATCH. The candidate is a law firm ('lawyer', + 'finance', 'accounting'), which does not match the expected entity type of a Library + (GRP.HER.LIB). timestamp: '2025-12-08T22:35:53.652050+00:00' youtube_status: NOT_FOUND youtube_search_query: Loyens & Loeff official youtube_search_timestamp: '2025-12-08T22:35:53.987397+00:00' provenance: notes: - - 'YouTube/Google Maps enrichment 2025-12-08T22:35:53Z: Maps: rejected by LLM; YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T22:35:53Z: Maps: rejected by LLM; YouTube: + not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:10:44Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:46:41Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:48:05Z @@ -49,3 +51,8 @@ location: region_code: BRU country: BE normalization_timestamp: '2025-12-09T13:20:29.705004+00:00' + latitude: 50.8503 + longitude: 4.3517 + city: Woluwe-Saint-Lambert (Brussels) + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.998464+00:00' diff --git a/data/custodian/BE-VLG-ANT-A-MPMP.yaml b/data/custodian/BE-VLG-ANT-A-MPMP.yaml index ed5f334957..5928039578 100644 --- a/data/custodian/BE-VLG-ANT-A-MPMP.yaml +++ b/data/custodian/BE-VLG-ANT-A-MPMP.yaml @@ -33,17 +33,20 @@ ghcid: google_maps_status: NO_MATCH google_maps_rejected: candidate_name: Plantin-Moretus Museum - rejection_reason: While the institution (Museum Plantin-Moretus) and location (Antwerpen, Belgium) match, the candidate - entity is fundamentally a museum. Its Google Place Types ('museum', 'tourist_attraction') do not match the expected types - for an Archive, which include 'archive' or 'government_office'. The common name and primary function is that of a museum, - not an archive, therefore it does not fit the GRP.HER.ARC type. + rejection_reason: While the institution (Museum Plantin-Moretus) and location (Antwerpen, + Belgium) match, the candidate entity is fundamentally a museum. Its Google Place + Types ('museum', 'tourist_attraction') do not match the expected types for an + Archive, which include 'archive' or 'government_office'. The common name and primary + function is that of a museum, not an archive, therefore it does not fit the GRP.HER.ARC + type. timestamp: '2025-12-08T22:47:10.416671+00:00' youtube_status: NOT_FOUND youtube_search_query: Museum Plantin-Moretus / Prentenkabinet official youtube_search_timestamp: '2025-12-08T22:47:10.741406+00:00' provenance: notes: - - 'YouTube/Google Maps enrichment 2025-12-08T22:47:10Z: Maps: rejected by LLM; YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T22:47:10Z: Maps: rejected by LLM; YouTube: + not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:10:46Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:46:44Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:48:09Z @@ -51,3 +54,8 @@ location: region_code: VLG country: BE normalization_timestamp: '2025-12-09T13:20:34.551235+00:00' + latitude: 51.2194 + longitude: 4.4025 + city: Antwerp + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.948577+00:00' diff --git a/data/custodian/BE-VLG-ANT-A-UABWIUC.yaml b/data/custodian/BE-VLG-ANT-A-UABWIUC.yaml index 00c0116228..c5aded97b2 100644 --- a/data/custodian/BE-VLG-ANT-A-UABWIUC.yaml +++ b/data/custodian/BE-VLG-ANT-A-UABWIUC.yaml @@ -33,18 +33,21 @@ ghcid: google_maps_status: NO_MATCH google_maps_rejected: candidate_name: UAntwerpen - Bibliotheek Stadscampus - rejection_reason: 'NAME MATCH: Partial match. The names ''UAntwerpen - Bibliotheek Stadscampus'' and ''Universiteit Antwerpen - - Bibliotheek Wiskunde/Informatica(UA-CMI)'' refer to different libraries within the same university. LOCATION MATCH: - The location in Antwerp, BE matches. TYPE MATCH: The ''library'' type is relevant. ENTITY TYPE: This is a general university - library (GRP.HER.LIB), not a specialized mathematics/computer science library, and does not fit the GRP.HER.ARC (Archive) - definition.' + rejection_reason: 'NAME MATCH: Partial match. The names ''UAntwerpen - Bibliotheek + Stadscampus'' and ''Universiteit Antwerpen - Bibliotheek Wiskunde/Informatica(UA-CMI)'' + refer to different libraries within the same university. LOCATION MATCH: The location + in Antwerp, BE matches. TYPE MATCH: The ''library'' type is relevant. ENTITY TYPE: + This is a general university library (GRP.HER.LIB), not a specialized mathematics/computer + science library, and does not fit the GRP.HER.ARC (Archive) definition.' timestamp: '2025-12-08T22:47:33.864340+00:00' youtube_status: NOT_FOUND -youtube_search_query: Universiteit Antwerpen - Bibliotheek Wiskunde/Informatica(UA-CMI) official +youtube_search_query: Universiteit Antwerpen - Bibliotheek Wiskunde/Informatica(UA-CMI) + official youtube_search_timestamp: '2025-12-08T22:47:34.200674+00:00' provenance: notes: - - 'YouTube/Google Maps enrichment 2025-12-08T22:47:34Z: Maps: rejected by LLM; YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T22:47:34Z: Maps: rejected by LLM; YouTube: + not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:10:46Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:46:44Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:48:09Z @@ -52,3 +55,8 @@ location: region_code: VLG country: BE normalization_timestamp: '2025-12-09T13:20:34.688352+00:00' + latitude: 51.2194 + longitude: 4.4025 + city: Antwerp + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.954930+00:00' diff --git a/data/custodian/BE-VLG-ONZ-L-SUIS.yaml b/data/custodian/BE-VLG-ONZ-L-SUIS.yaml index 9c4ebe9689..4cc0ebfc19 100644 --- a/data/custodian/BE-VLG-ONZ-L-SUIS.yaml +++ b/data/custodian/BE-VLG-ONZ-L-SUIS.yaml @@ -33,15 +33,17 @@ ghcid: google_maps_status: NO_MATCH google_maps_rejected: candidate_name: Sint-Ursula Instituut - rejection_reason: The Google Maps candidate is a 'school', which does not match the expected entity type of GRP.HER.LIB - (Library). Although the name and location match, the primary function of the institution is educational, not as a library. + rejection_reason: The Google Maps candidate is a 'school', which does not match + the expected entity type of GRP.HER.LIB (Library). Although the name and location + match, the primary function of the institution is educational, not as a library. timestamp: '2025-12-08T22:51:11.505778+00:00' youtube_status: NOT_FOUND youtube_search_query: Sint-Ursula-Instituut(SUI) official youtube_search_timestamp: '2025-12-08T22:51:11.856740+00:00' provenance: notes: - - 'YouTube/Google Maps enrichment 2025-12-08T22:51:11Z: Maps: rejected by LLM; YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T22:51:11Z: Maps: rejected by LLM; YouTube: + not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:10:47Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:46:45Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:48:10Z @@ -49,3 +51,8 @@ location: region_code: VLG country: BE normalization_timestamp: '2025-12-09T13:20:36.613650+00:00' + latitude: 50.9667 + longitude: 3.8167 + city: Onze-Lieve-Vrouw-Waver + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.602075+00:00' diff --git a/data/custodian/BE-VLG-OST-L-VIZV.yaml b/data/custodian/BE-VLG-OST-L-VIZV.yaml index 8cbecb33ad..78407eb36e 100644 --- a/data/custodian/BE-VLG-OST-L-VIZV.yaml +++ b/data/custodian/BE-VLG-OST-L-VIZV.yaml @@ -33,16 +33,18 @@ ghcid: google_maps_status: NO_MATCH google_maps_rejected: candidate_name: VLIZ - rejection_reason: The candidate location 'VLIZ' has a matching name and a correct address, but it lacks the specific Google - Place Type 'library' required for a GRP.HER.LIB annotation. Its types are 'point_of_interest' and 'establishment', which - are too general to confirm it is a library without further information. + rejection_reason: The candidate location 'VLIZ' has a matching name and a correct + address, but it lacks the specific Google Place Type 'library' required for a + GRP.HER.LIB annotation. Its types are 'point_of_interest' and 'establishment', + which are too general to confirm it is a library without further information. timestamp: '2025-12-08T22:51:24.519321+00:00' youtube_status: NOT_FOUND youtube_search_query: Vlaams Instituut voor de Zee (VLIZ) official youtube_search_timestamp: '2025-12-08T22:51:24.857664+00:00' provenance: notes: - - 'YouTube/Google Maps enrichment 2025-12-08T22:51:24Z: Maps: rejected by LLM; YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T22:51:24Z: Maps: rejected by LLM; YouTube: + not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:10:47Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:46:45Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:48:10Z @@ -50,3 +52,8 @@ location: region_code: VLG country: BE normalization_timestamp: '2025-12-09T13:20:36.771597+00:00' + latitude: 51.2194 + longitude: 2.9264 + city: Ostend + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.916807+00:00' diff --git a/data/custodian/CZ-52-DKR-M-MMDKNL.yaml b/data/custodian/CZ-52-DKR-M-MMDKNL.yaml index f34e6e2cc6..f7e65a1070 100644 --- a/data/custodian/CZ-52-DKR-M-MMDKNL.yaml +++ b/data/custodian/CZ-52-DKR-M-MMDKNL.yaml @@ -37,7 +37,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-52-DKR-M-MMDKNL valid_from: '2025-12-07T12:38:22.585050+00:00' - reason: 'Location resolved from institution name pattern: ''Dvůr Králové'' → region 52, city DKR' + reason: 'Location resolved from institution name pattern: ''Dvůr Králové'' → region + 52, city DKR' custodian_name: claim_type: custodian_name claim_value: Městské muzeum Dvůr Králové nad Labem @@ -94,8 +95,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Městské muzeum Dvůr Králové nad Labem @@ -125,6 +126,11 @@ location: region_code: 52 country: CZ normalization_timestamp: '2025-12-09T13:23:11.268240+00:00' + latitude: 50.43172 + longitude: 15.81402 + city: Dvůr Králové nad Labem + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.593438+00:00' youtube_status: NOT_FOUND youtube_search_query: Městské muzeum Dvůr Králové nad Labem official youtube_search_timestamp: '2025-12-09T09:32:05.529793+00:00' diff --git a/data/custodian/CZ-53-UNO-A-AUMAVESUNO.yaml b/data/custodian/CZ-53-UNO-A-AUMAVESUNO.yaml index 551571e7b2..e91fa86c6e 100644 --- a/data/custodian/CZ-53-UNO-A-AUMAVESUNO.yaml +++ b/data/custodian/CZ-53-UNO-A-AUMAVESUNO.yaml @@ -41,7 +41,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-53-UNO-A-AUMAVESÚNO valid_from: '2025-12-07T12:39:42.484833+00:00' - reason: 'Location resolved from institution name pattern: ''Ústí nad Orlicí'' → region 53, city UNO' + reason: 'Location resolved from institution name pattern: ''Ústí nad Orlicí'' + → region 53, city UNO' custodian_name: claim_type: custodian_name claim_value: Archiválie uložené mimo archivy v evidenci SOkA Ústí nad Orlicí @@ -98,8 +99,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Archiválie uložené mimo archivy v evidenci SOkA Ústí nad Orlicí @@ -129,6 +130,12 @@ location: region_code: 53 country: CZ normalization_timestamp: '2025-12-09T13:23:21.292623+00:00' + latitude: 49.97387 + longitude: 16.39361 + city: Ústí nad Orlicí + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.972302+00:00' youtube_status: NOT_FOUND -youtube_search_query: Archiválie uložené mimo archivy v evidenci SOkA Ústí nad Orlicí official +youtube_search_query: Archiválie uložené mimo archivy v evidenci SOkA Ústí nad Orlicí + official youtube_search_timestamp: '2025-12-09T09:34:28.302413+00:00' diff --git a/data/custodian/CZ-63-KNL-M-MVMKNL.yaml b/data/custodian/CZ-63-KNL-M-MVMKNL.yaml index 62c3262b29..33bc0d1836 100644 --- a/data/custodian/CZ-63-KNL-M-MVMKNL.yaml +++ b/data/custodian/CZ-63-KNL-M-MVMKNL.yaml @@ -37,7 +37,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-63-KNL-M-MVMKNL valid_from: '2025-12-07T12:38:22.590663+00:00' - reason: 'Location resolved from institution name pattern: ''Kamenice nad Lipou'' → region 63, city KNL' + reason: 'Location resolved from institution name pattern: ''Kamenice nad Lipou'' + → region 63, city KNL' custodian_name: claim_type: custodian_name claim_value: Městské vlastivědné muzeum Kamenice nad Lipou @@ -94,8 +95,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Městské vlastivědné muzeum Kamenice nad Lipou @@ -125,6 +126,11 @@ location: region_code: 63 country: CZ normalization_timestamp: '2025-12-09T13:23:22.057012+00:00' + latitude: 49.98192 + longitude: 13.48747 + city: Kralovice + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.571748+00:00' youtube_status: NOT_FOUND youtube_search_query: Městské vlastivědné muzeum Kamenice nad Lipou official youtube_search_timestamp: '2025-12-09T09:34:39.551175+00:00' diff --git a/data/custodian/CZ-63-PEL-M-MVPPO.yaml b/data/custodian/CZ-63-PEL-M-MVPPO.yaml index fc573a83f8..ad5b544114 100644 --- a/data/custodian/CZ-63-PEL-M-MVPPO.yaml +++ b/data/custodian/CZ-63-PEL-M-MVPPO.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-63-PEL-M-MVPPO valid_from: '2025-12-07T12:39:42.499187+00:00' - reason: 'Location resolved from institution name pattern: ''Pelhřimov'' → region 63, city PEL' + reason: 'Location resolved from institution name pattern: ''Pelhřimov'' → region + 63, city PEL' custodian_name: claim_type: custodian_name claim_value: Muzeum Vysočiny Pelhřimov, příspěvková organizace @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Muzeum Vysočiny Pelhřimov, příspěvková organizace @@ -124,6 +125,11 @@ location: region_code: 63 country: CZ normalization_timestamp: '2025-12-09T13:23:22.846571+00:00' + latitude: 49.43134 + longitude: 15.22336 + city: Pelhřimov + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.691514+00:00' youtube_status: NOT_FOUND youtube_search_query: Muzeum Vysočiny Pelhřimov, příspěvková organizace official youtube_search_timestamp: '2025-12-09T09:34:44.862121+00:00' diff --git a/data/custodian/CZ-63-POL-M-MMPPO.yaml b/data/custodian/CZ-63-POL-M-MMPPO.yaml index 17483c2319..4712153e58 100644 --- a/data/custodian/CZ-63-POL-M-MMPPO.yaml +++ b/data/custodian/CZ-63-POL-M-MMPPO.yaml @@ -37,7 +37,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-63-POL-M-MMPPO valid_from: '2025-12-07T12:38:22.586490+00:00' - reason: 'Location resolved from institution name pattern: ''Polná'' → region 63, city POL' + reason: 'Location resolved from institution name pattern: ''Polná'' → region 63, + city POL' custodian_name: claim_type: custodian_name claim_value: Městské muzeum Polná, příspěvková organizace @@ -94,8 +95,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Městské muzeum Polná, příspěvková organizace @@ -125,6 +126,11 @@ location: region_code: 63 country: CZ normalization_timestamp: '2025-12-09T13:23:23.033743+00:00' + latitude: 49.71465 + longitude: 16.26543 + city: Polička + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.903555+00:00' youtube_status: NOT_FOUND youtube_search_query: Městské muzeum Polná, příspěvková organizace official youtube_search_timestamp: '2025-12-09T09:34:46.862469+00:00' diff --git a/data/custodian/CZ-63-PRB-A-KZMPPO.yaml b/data/custodian/CZ-63-PRB-A-KZMPPO.yaml index b3aa10169e..68dd265c5d 100644 --- a/data/custodian/CZ-63-PRB-A-KZMPPO.yaml +++ b/data/custodian/CZ-63-PRB-A-KZMPPO.yaml @@ -37,7 +37,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-63-PRB-A-KZMPPO valid_from: '2025-12-07T12:38:22.565917+00:00' - reason: 'Location resolved from institution name pattern: ''Přibyslav'' → region 63, city PRB' + reason: 'Location resolved from institution name pattern: ''Přibyslav'' → region + 63, city PRB' custodian_name: claim_type: custodian_name claim_value: Kulturní zařízení města Přibyslav, příspěvková organizace @@ -94,8 +95,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Kulturní zařízení města Přibyslav, příspěvková organizace @@ -125,6 +126,11 @@ location: region_code: 63 country: CZ normalization_timestamp: '2025-12-09T13:23:23.069356+00:00' + latitude: 49.68988 + longitude: 14.01043 + city: Příbram + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.878269+00:00' youtube_status: NOT_FOUND youtube_search_query: Kulturní zařízení města Přibyslav, příspěvková organizace official youtube_search_timestamp: '2025-12-09T09:34:47.511135+00:00' diff --git a/data/custodian/CZ-63-ZNS-A-AUMAVESZNS.yaml b/data/custodian/CZ-63-ZNS-A-AUMAVESZNS.yaml index 9e632f2ec5..a1b4e32d51 100644 --- a/data/custodian/CZ-63-ZNS-A-AUMAVESZNS.yaml +++ b/data/custodian/CZ-63-ZNS-A-AUMAVESZNS.yaml @@ -41,7 +41,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-63-ZNS-A-AUMAVESŽNS valid_from: '2025-12-07T12:39:42.485224+00:00' - reason: 'Location resolved from institution name pattern: ''Žďár nad Sázavou'' → region 63, city ZNS' + reason: 'Location resolved from institution name pattern: ''Žďár nad Sázavou'' + → region 63, city ZNS' custodian_name: claim_type: custodian_name claim_value: Archiválie uložené mimo archivy v evidenci SOkA Žďár nad Sázavou @@ -98,8 +99,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Archiválie uložené mimo archivy v evidenci SOkA Žďár nad Sázavou @@ -129,6 +130,12 @@ location: region_code: 63 country: CZ normalization_timestamp: '2025-12-09T13:23:23.723095+00:00' + latitude: 48.8555 + longitude: 16.0488 + city: Znojmo + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.989857+00:00' youtube_status: NOT_FOUND -youtube_search_query: Archiválie uložené mimo archivy v evidenci SOkA Žďár nad Sázavou official +youtube_search_query: Archiválie uložené mimo archivy v evidenci SOkA Žďár nad Sázavou + official youtube_search_timestamp: '2025-12-09T09:34:58.206759+00:00' diff --git a/data/custodian/CZ-64-BLA-M-MBP.yaml b/data/custodian/CZ-64-BLA-M-MBP.yaml index 3558d1d361..4c533068f8 100644 --- a/data/custodian/CZ-64-BLA-M-MBP.yaml +++ b/data/custodian/CZ-64-BLA-M-MBP.yaml @@ -37,7 +37,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-64-BLA-M-MBP valid_from: '2025-12-07T12:38:22.581380+00:00' - reason: 'Location resolved from institution name pattern: ''Blanenska'' → region 64, city BLA' + reason: 'Location resolved from institution name pattern: ''Blanenska'' → region + 64, city BLA' custodian_name: claim_type: custodian_name claim_value: Muzeum Blanenska, p.o. @@ -94,8 +95,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Muzeum Blanenska, p.o. @@ -125,6 +126,11 @@ location: region_code: 64 country: CZ normalization_timestamp: '2025-12-09T13:23:23.899882+00:00' + latitude: 49.36304 + longitude: 16.64446 + city: Blansko + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.663514+00:00' youtube_status: NOT_FOUND youtube_search_query: Muzeum Blanenska, p.o. official youtube_search_timestamp: '2025-12-09T09:35:00.836453+00:00' diff --git a/data/custodian/CZ-64-STZ-A-MSVS.yaml b/data/custodian/CZ-64-STZ-A-MSVS.yaml index 23df74dd39..f1a8b6fd53 100644 --- a/data/custodian/CZ-64-STZ-A-MSVS.yaml +++ b/data/custodian/CZ-64-STZ-A-MSVS.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-64-STZ-A-MSVS valid_from: '2025-12-07T12:39:42.486213+00:00' - reason: 'Location resolved from institution name pattern: ''Strážnice'' → region 64, city STZ' + reason: 'Location resolved from institution name pattern: ''Strážnice'' → region + 64, city STZ' custodian_name: claim_type: custodian_name claim_value: Muzejní spolek ve Strážnici @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Muzejní spolek ve Strážnici @@ -124,6 +125,11 @@ location: region_code: 64 country: CZ normalization_timestamp: '2025-12-09T13:23:26.470568+00:00' + latitude: 49.26141 + longitude: 13.90237 + city: Strakonice + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.805206+00:00' youtube_status: NOT_FOUND youtube_search_query: Muzejní spolek ve Strážnici official youtube_search_timestamp: '2025-12-09T09:35:35.147127+00:00' diff --git a/data/custodian/CZ-71-JES-M-VMJPO.yaml b/data/custodian/CZ-71-JES-M-VMJPO.yaml index 053dc24cdb..7c2a493882 100644 --- a/data/custodian/CZ-71-JES-M-VMJPO.yaml +++ b/data/custodian/CZ-71-JES-M-VMJPO.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-71-JES-M-VMJPO valid_from: '2025-12-07T12:39:42.502984+00:00' - reason: 'Location resolved from institution name pattern: ''Jeseník'' → region 71, city JES' + reason: 'Location resolved from institution name pattern: ''Jeseník'' → region + 71, city JES' custodian_name: claim_type: custodian_name claim_value: Vlastivědné muzeum Jesenicka, příspěvková organizace @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Vlastivědné muzeum Jesenicka, příspěvková organizace @@ -124,6 +125,11 @@ location: region_code: 71 country: CZ normalization_timestamp: '2025-12-09T13:23:27.092303+00:00' + latitude: 50.22937 + longitude: 17.20464 + city: Jeseník + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.736910+00:00' youtube_status: NOT_FOUND youtube_search_query: Vlastivědné muzeum Jesenicka, příspěvková organizace official youtube_search_timestamp: '2025-12-09T09:35:45.023384+00:00' diff --git a/data/custodian/CZ-71-VLO-M-RPVLSMP.yaml b/data/custodian/CZ-71-VLO-M-RPVLSMP.yaml index 3fdacadb6e..f281e49f02 100644 --- a/data/custodian/CZ-71-VLO-M-RPVLSMP.yaml +++ b/data/custodian/CZ-71-VLO-M-RPVLSMP.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-71-VLO-M-RPVLSMP valid_from: '2025-12-07T12:39:42.502041+00:00' - reason: 'Location resolved from institution name pattern: ''Velké Losiny'' → region 71, city VLO' + reason: 'Location resolved from institution name pattern: ''Velké Losiny'' → region + 71, city VLO' custodian_name: claim_type: custodian_name claim_value: Ruční papírna Velké Losiny a. s. - Muzeum papíru @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Ruční papírna Velké Losiny a. s. - Muzeum papíru @@ -124,6 +125,11 @@ location: region_code: 71 country: CZ normalization_timestamp: '2025-12-09T13:23:27.787489+00:00' + latitude: 49.47181 + longitude: 17.97113 + city: Valašské Meziříčí + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.640578+00:00' youtube_status: NOT_FOUND youtube_search_query: Ruční papírna Velké Losiny a. s. - Muzeum papíru official youtube_search_timestamp: '2025-12-09T09:35:54.260308+00:00' diff --git a/data/custodian/CZ-72-ROZ-M-VMVPNKP.yaml b/data/custodian/CZ-72-ROZ-M-VMVPNKP.yaml index 7bab54e798..59051b18d3 100644 --- a/data/custodian/CZ-72-ROZ-M-VMVPNKP.yaml +++ b/data/custodian/CZ-72-ROZ-M-VMVPNKP.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-72-ROZ-M-VMVPNKP valid_from: '2025-12-07T12:39:42.503212+00:00' - reason: 'Location resolved from institution name pattern: ''Rožnov pod Radhoštěm'' → region 72, city ROZ' + reason: 'Location resolved from institution name pattern: ''Rožnov pod Radhoštěm'' + → region 72, city ROZ' custodian_name: claim_type: custodian_name claim_value: Valašské muzeum v přírodě, Národní kulturní památka @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Valašské muzeum v přírodě, Národní kulturní památka @@ -124,6 +125,11 @@ location: region_code: 72 country: CZ normalization_timestamp: '2025-12-09T13:23:28.929524+00:00' + latitude: 49.45853 + longitude: 18.14302 + city: Rožnov pod Radhoštěm + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.781675+00:00' youtube_status: NOT_FOUND youtube_search_query: Valašské muzeum v přírodě, Národní kulturní památka official youtube_search_timestamp: '2025-12-09T09:36:10.820235+00:00' diff --git a/data/custodian/CZ-72-UBR-M-MJAKVUB.yaml b/data/custodian/CZ-72-UBR-M-MJAKVUB.yaml index bc7addfe63..08682ea093 100644 --- a/data/custodian/CZ-72-UBR-M-MJAKVUB.yaml +++ b/data/custodian/CZ-72-UBR-M-MJAKVUB.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-72-UBR-M-MJAKVUB valid_from: '2025-12-07T12:39:42.492946+00:00' - reason: 'Location resolved from institution name pattern: ''Uherský Brod'' → region 72, city UBR' + reason: 'Location resolved from institution name pattern: ''Uherský Brod'' → region + 72, city UBR' custodian_name: claim_type: custodian_name claim_value: Muzeum Jana Amose Komenského v Uherském Brodě @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Muzeum Jana Amose Komenského v Uherském Brodě @@ -124,6 +125,11 @@ location: region_code: 72 country: CZ normalization_timestamp: '2025-12-09T13:23:29.152271+00:00' + latitude: 49.02513 + longitude: 17.64715 + city: Uherský Brod + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:55.019600+00:00' youtube_status: NOT_FOUND youtube_search_query: Muzeum Jana Amose Komenského v Uherském Brodě official youtube_search_timestamp: '2025-12-09T09:36:14.101937+00:00' diff --git a/data/custodian/CZ-80-BNB-M-MMBVBNB.yaml b/data/custodian/CZ-80-BNB-M-MMBVBNB.yaml index 21dad19c75..ed24a57db0 100644 --- a/data/custodian/CZ-80-BNB-M-MMBVBNB.yaml +++ b/data/custodian/CZ-80-BNB-M-MMBVBNB.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-80-BNB-M-MMBVBNB valid_from: '2025-12-07T12:39:42.493789+00:00' - reason: 'Location resolved from institution name pattern: ''Budišov nad Budišovkou'' → region 80, city BNB' + reason: 'Location resolved from institution name pattern: ''Budišov nad Budišovkou'' + → region 80, city BNB' custodian_name: claim_type: custodian_name claim_value: Městské muzeum břidlice v Budišově nad Budišovkou @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Městské muzeum břidlice v Budišově nad Budišovkou @@ -124,6 +125,11 @@ location: region_code: 80 country: CZ normalization_timestamp: '2025-12-09T13:23:45.839426+00:00' + latitude: 49.90411 + longitude: 18.35755 + city: Bohumín + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.760599+00:00' youtube_status: NOT_FOUND youtube_search_query: Městské muzeum břidlice v Budišově nad Budišovkou official youtube_search_timestamp: '2025-12-09T09:40:49.876748+00:00' diff --git a/data/custodian/CZ-80-BOH-A-ASZG.yaml b/data/custodian/CZ-80-BOH-A-ASZG.yaml index 28f1ba7d93..71042a8030 100644 --- a/data/custodian/CZ-80-BOH-A-ASZG.yaml +++ b/data/custodian/CZ-80-BOH-A-ASZG.yaml @@ -41,7 +41,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-80-BOH-A-ASŽG valid_from: '2025-12-07T12:39:42.484432+00:00' - reason: 'Location resolved from institution name pattern: ''Bohumín'' → region 80, city BOH' + reason: 'Location resolved from institution name pattern: ''Bohumín'' → region + 80, city BOH' custodian_name: claim_type: custodian_name claim_value: Archiv společnosti ŽDB Group @@ -98,8 +99,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Archiv společnosti ŽDB Group @@ -129,6 +130,11 @@ location: region_code: 80 country: CZ normalization_timestamp: '2025-12-09T13:23:45.966164+00:00' + latitude: 49.90411 + longitude: 18.35755 + city: Bohumín + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.546268+00:00' youtube_status: NOT_FOUND youtube_search_query: Archiv společnosti ŽDB Group official youtube_search_timestamp: '2025-12-09T09:40:51.857216+00:00' diff --git a/data/custodian/CZ-80-BRU-M-MVBPO.yaml b/data/custodian/CZ-80-BRU-M-MVBPO.yaml index b101f8d1e6..dcd1cf7fb2 100644 --- a/data/custodian/CZ-80-BRU-M-MVBPO.yaml +++ b/data/custodian/CZ-80-BRU-M-MVBPO.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-80-BRU-M-MVBPO valid_from: '2025-12-07T12:39:42.498025+00:00' - reason: 'Location resolved from institution name pattern: ''Bruntál'' → region 80, city BRU' + reason: 'Location resolved from institution name pattern: ''Bruntál'' → region + 80, city BRU' custodian_name: claim_type: custodian_name claim_value: Muzeum v Bruntále, příspěvková organizace @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Muzeum v Bruntále, příspěvková organizace @@ -124,6 +125,11 @@ location: region_code: 80 country: CZ normalization_timestamp: '2025-12-09T13:23:46.246215+00:00' + latitude: 49.98844 + longitude: 17.4647 + city: Bruntál + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.712578+00:00' youtube_status: NOT_FOUND youtube_search_query: Muzeum v Bruntále, příspěvková organizace official youtube_search_timestamp: '2025-12-09T09:40:56.538955+00:00' diff --git a/data/custodian/CZ-80-KOP-M-RMKPSTM.yaml b/data/custodian/CZ-80-KOP-M-RMKPSTM.yaml index ed84421bd3..8f36d56971 100644 --- a/data/custodian/CZ-80-KOP-M-RMKPSTM.yaml +++ b/data/custodian/CZ-80-KOP-M-RMKPSTM.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-80-KOP-M-RMKPSTM valid_from: '2025-12-07T12:39:42.501813+00:00' - reason: 'Location resolved from institution name pattern: ''Kopřivnice'' → region 80, city KOP' + reason: 'Location resolved from institution name pattern: ''Kopřivnice'' → region + 80, city KOP' custodian_name: claim_type: custodian_name claim_value: Regionální muzeum Kopřivnice o. p. s. - Technické muzeum @@ -93,8 +94,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Regionální muzeum Kopřivnice o. p. s. - Technické muzeum @@ -124,6 +125,11 @@ location: region_code: 80 country: CZ normalization_timestamp: '2025-12-09T13:23:48.768786+00:00' + latitude: 49.59947 + longitude: 18.1448 + city: Kopřivnice + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.855952+00:00' youtube_status: NOT_FOUND youtube_search_query: Regionální muzeum Kopřivnice o. p. s. - Technické muzeum official youtube_search_timestamp: '2025-12-09T09:41:37.185340+00:00' diff --git a/data/custodian/CZ-80-RYM-M-MMVRPO.yaml b/data/custodian/CZ-80-RYM-M-MMVRPO.yaml index df6d91b7d6..a4263ea525 100644 --- a/data/custodian/CZ-80-RYM-M-MMVRPO.yaml +++ b/data/custodian/CZ-80-RYM-M-MMVRPO.yaml @@ -39,7 +39,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-80-RYM-M-MMVRPO valid_from: '2025-12-07T12:38:22.587401+00:00' - reason: 'Location resolved from institution name pattern: ''Rýmařov'' → region 80, city RYM' + reason: 'Location resolved from institution name pattern: ''Rýmařov'' → region + 80, city RYM' custodian_name: claim_type: custodian_name claim_value: 'Městské muzeum v Rýmařově @@ -98,8 +99,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: 'Městské muzeum v Rýmařově @@ -131,6 +132,11 @@ location: region_code: 80 country: CZ normalization_timestamp: '2025-12-09T13:23:51.617667+00:00' + latitude: 49.93183 + longitude: 17.27177 + city: Rýmařov + geocoding_method: GEONAMES_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.937090+00:00' youtube_status: NOT_FOUND youtube_search_query: 'Městské muzeum v Rýmařově diff --git a/data/custodian/LB-BA-BEI-A-NA.yaml b/data/custodian/LB-BA-BEI-A-NA.yaml index 559929ffea..c7cb75465b 100644 --- a/data/custodian/LB-BA-BEI-A-NA.yaml +++ b/data/custodian/LB-BA-BEI-A-NA.yaml @@ -1,7 +1,7 @@ original_entry: id: PS-HER-002 name: Nakba Archive - name_arabic: + name_arabic: null type: GRP.HER subtype: oral_history_archive country: LB @@ -47,7 +47,7 @@ ghcid: - ghcid: LB-BA-BEI-A-NA ghcid_numeric: 17866312503758353817 valid_from: '2025-12-05T22:31:55.780514+00:00' - valid_to: + valid_to: null reason: Initial GHCID generation from Palestinian heritage extraction location_resolution: method: NAME_LOOKUP @@ -68,7 +68,7 @@ identifiers: custodian_name: claim_type: custodian_name claim_value: Nakba Archive - claim_value_arabic: + claim_value_arabic: null source: palestinian_heritage_extraction confidence: 0.95 extraction_timestamp: '2025-12-06T18:54:54.623291+00:00' @@ -92,8 +92,9 @@ unesco_ich_enrichment: inscription_year: 2023 multinational: false url: https://ich.unesco.org/en/RL/al-man-ouche-an-emblematic-culinary-practice-in-lebanon-02000 - description: The quintessential Lebanese breakfast, Al-Man’ouché is a flatbread prepared in homes and specialized bakeries, - and enjoyed by people of all backgrounds. The dough is indented using the fingertips and topped with a mixture of thyme, + description: The quintessential Lebanese breakfast, Al-Man’ouché is a flatbread + prepared in homes and specialized bakeries, and enjoyed by people of all backgrounds. + The dough is indented using the fingertips and topped with a mixture of thyme, sumac, toasted sesame seeds, salt and olive oil. Once cooked, a seco... - unesco_ich_id: '1718' name: 'Arabic calligraphy: knowledge, skills and practices' @@ -102,8 +103,9 @@ unesco_ich_enrichment: inscription_year: 2021 multinational: true url: https://ich.unesco.org/en/RL/arabic-calligraphy-knowledge-skills-and-practices-01718 - description: Arabic calligraphy is the artistic practice of handwriting Arabic script in a fluid manner to convey harmony, - grace and beauty. The practice, which can be passed down through formal and informal education, uses the twenty-eight + description: Arabic calligraphy is the artistic practice of handwriting Arabic + script in a fluid manner to convey harmony, grace and beauty. The practice, + which can be passed down through formal and informal education, uses the twenty-eight letters of the Arabic alphabet, written in cursive, from right to left. ... - unesco_ich_id: '1000' name: Al-Zajal, recited or sung poetry @@ -112,10 +114,16 @@ unesco_ich_enrichment: inscription_year: 2014 multinational: false url: https://ich.unesco.org/en/RL/al-zajal-recited-or-sung-poetry-01000 - description: Al-Zajal is a form of Lebanese folk poetry declaimed or sung at social and family celebrations and in daily - life. The performers, both men and women, express themselves either individually or collectively on a variety of themes + description: Al-Zajal is a form of Lebanese folk poetry declaimed or sung at social + and family celebrations and in daily life. The performers, both men and women, + express themselves either individually or collectively on a variety of themes including life, love, nostalgia, death, politics and daily events. Durin... location: region_code: BA country: LB normalization_timestamp: '2025-12-09T13:40:54.247845+00:00' + latitude: 33.8938 + longitude: 35.5018 + city: Beirut + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.818729+00:00' diff --git a/data/custodian/ML-6-TIM-L-MHCL.yaml b/data/custodian/ML-6-TIM-L-MHCL.yaml index 5eb1aacf08..efed11c229 100644 --- a/data/custodian/ML-6-TIM-L-MHCL.yaml +++ b/data/custodian/ML-6-TIM-L-MHCL.yaml @@ -63,8 +63,9 @@ wikidata_enrichment: instance_of: &id001 - id: Q7075 label: library - description: institution charged with the care of a collection of literary, musical, artistic, or reference materials, - such as books, manuscripts, recordings, or films + description: institution charged with the care of a collection of literary, + musical, artistic, or reference materials, such as books, manuscripts, recordings, + or films wikidata_instance_of: *id001 wikidata_location: country: &id002 @@ -76,7 +77,8 @@ wikidata_enrichment: member_of: - id: Q475225 label: World Digital Library - description: international digital library operated by UNESCO and the United States Library of Congress + description: international digital library operated by UNESCO and the United + States Library of Congress founded_by: - id: Q16525093 label: Abdoul Kader Haïdara @@ -130,6 +132,11 @@ location: region_code: '6' country: ML normalization_timestamp: '2025-12-09T13:40:55.337789+00:00' + latitude: 16.7666 + longitude: -3.0026 + city: Timbuktu + geocoding_method: CITY_CODE_LOOKUP + geocoding_timestamp: '2025-12-10T00:05:54.833980+00:00' provenance: notes: - Canonical location normalized on 2025-12-09T12:23:10Z diff --git a/scripts/geocode_remaining_26.py b/scripts/geocode_remaining_26.py new file mode 100755 index 0000000000..5de6589aad --- /dev/null +++ b/scripts/geocode_remaining_26.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +""" +Geocode remaining 26 custodian files using GHCID city codes and GeoNames. +""" + +import yaml +import sqlite3 +from pathlib import Path +from datetime import datetime, timezone + +# Files to process +FILES = [ + "data/custodian/CZ-80-BOH-A-ASZG.yaml", + "data/custodian/CZ-63-KNL-M-MVMKNL.yaml", + "data/custodian/CZ-52-DKR-M-MMDKNL.yaml", + "data/custodian/BE-VLG-ONZ-L-SUIS.yaml", + "data/custodian/AR-C-BUE-A-APDH.yaml", + "data/custodian/CZ-71-VLO-M-RPVLSMP.yaml", + "data/custodian/CZ-64-BLA-M-MBP.yaml", + "data/custodian/CZ-63-PEL-M-MVPPO.yaml", + "data/custodian/CZ-80-BRU-M-MVBPO.yaml", + "data/custodian/CZ-71-JES-M-VMJPO.yaml", + "data/custodian/CZ-80-BNB-M-MMBVBNB.yaml", + "data/custodian/CZ-72-ROZ-M-VMVPNKP.yaml", + "data/custodian/CZ-64-STZ-A-MSVS.yaml", + "data/custodian/LB-BA-BEI-A-NA.yaml", + "data/custodian/ML-6-TIM-L-MHCL.yaml", + "data/custodian/CZ-80-KOP-M-RMKPSTM.yaml", + "data/custodian/CZ-63-PRB-A-KZMPPO.yaml", + "data/custodian/CZ-63-POL-M-MMPPO.yaml", + "data/custodian/BE-VLG-OST-L-VIZV.yaml", + "data/custodian/CZ-80-RYM-M-MMVRPO.yaml", + "data/custodian/BE-VLG-ANT-A-MPMP.yaml", + "data/custodian/BE-VLG-ANT-A-UABWIUC.yaml", + "data/custodian/CZ-53-UNO-A-AUMAVESUNO.yaml", + "data/custodian/CZ-63-ZNS-A-AUMAVESZNS.yaml", + "data/custodian/BE-BRU-WOL-L-LL.yaml", + "data/custodian/CZ-72-UBR-M-MJAKVUB.yaml", +] + +# Manual coordinates for known cities +CITY_COORDS = { + # Argentina + ("AR", "BUE"): ((-34.6037, -58.3816), "Buenos Aires"), + # Lebanon + ("LB", "BEI"): ((33.8938, 35.5018), "Beirut"), + # Mali + ("ML", "TIM"): ((16.7666, -3.0026), "Timbuktu"), + # Belgium + ("BE", "ONZ"): ((50.9667, 3.8167), "Onze-Lieve-Vrouw-Waver"), # Approximation + ("BE", "OST"): ((51.2194, 2.9264), "Ostend"), + ("BE", "ANT"): ((51.2194, 4.4025), "Antwerp"), + ("BE", "WOL"): ((50.8503, 4.3517), "Woluwe-Saint-Lambert (Brussels)"), + # Czech Republic - will query GeoNames +} + +# Czech city codes to city names (for GeoNames lookup) +CZ_CITY_MAP = { + "BOH": "Bohumín", + "KNL": "Kralovice", # Unclear - will research + "DKR": "Dvůr Králové nad Labem", + "VLO": "Valašské Meziříčí", # VLO region + "BLA": "Blansko", + "PEL": "Pelhřimov", + "BRU": "Bruntál", + "JES": "Jeseník", + "BNB": "Bohumín", # BNB unclear - check file + "ROZ": "Rožnov pod Radhoštěm", + "STZ": "Strakonice", # STZ unclear + "KOP": "Kopřivnice", + "PRB": "Příbram", + "POL": "Polička", + "RYM": "Rýmařov", + "UNO": "Ústí nad Orlicí", + "ZNS": "Znojmo", # ZNS unclear + "UBR": "Uherský Brod", +} + + +def get_geonames_coords(city_name: str, country_code: str) -> tuple | None: + """Query GeoNames database for city coordinates.""" + db_path = Path("data/reference/geonames.db") + if not db_path.exists(): + print(f" GeoNames DB not found at {db_path}") + return None + + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Try exact match first + cursor.execute(""" + SELECT latitude, longitude, name + FROM cities + WHERE country_code = ? + AND (name LIKE ? OR ascii_name LIKE ?) + AND feature_code IN ('PPL', 'PPLA', 'PPLA2', 'PPLA3', 'PPLA4', 'PPLC') + ORDER BY population DESC + LIMIT 1 + """, (country_code, f"%{city_name}%", f"%{city_name}%")) + + row = cursor.fetchone() + conn.close() + + if row: + return ((row[0], row[1]), row[2]) + return None + + +def process_file(filepath: str) -> bool: + """Add coordinates to a single custodian file.""" + path = Path(filepath) + if not path.exists(): + print(f" File not found: {filepath}") + return False + + with open(path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + # Check if already has coordinates + if data.get('location', {}).get('latitude'): + print(f" Already has coordinates: {filepath}") + return False + + # Extract country and city code from GHCID + ghcid = data.get('ghcid', {}).get('ghcid_current', '') + parts = ghcid.split('-') + if len(parts) < 4: + print(f" Invalid GHCID format: {ghcid}") + return False + + country_code = parts[0] + city_code = parts[2] + + coords = None + city_name = None + method = "CITY_CODE_LOOKUP" + + # Try manual mapping first + key = (country_code, city_code) + if key in CITY_COORDS: + coords, city_name = CITY_COORDS[key] + elif country_code == "CZ" and city_code in CZ_CITY_MAP: + # Query GeoNames for Czech cities + cz_city = CZ_CITY_MAP[city_code] + result = get_geonames_coords(cz_city, "CZ") + if result: + coords, city_name = result + method = "GEONAMES_LOOKUP" + else: + print(f" No GeoNames match for: {cz_city}") + return False + else: + print(f" Unknown city code: {country_code}-{city_code}") + return False + + if not coords: + print(f" No coordinates found for {filepath}") + return False + + # Update location + if 'location' not in data: + data['location'] = {} + + data['location']['latitude'] = coords[0] + data['location']['longitude'] = coords[1] + data['location']['city'] = city_name + data['location']['geocoding_method'] = method + data['location']['geocoding_timestamp'] = datetime.now(timezone.utc).isoformat() + + # Write back + with open(path, 'w', encoding='utf-8') as f: + yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False) + + print(f" ✓ {filepath}: {city_name} ({coords[0]:.4f}, {coords[1]:.4f})") + return True + + +def main(): + print(f"Geocoding {len(FILES)} remaining files...") + + updated = 0 + for filepath in FILES: + result = process_file(filepath) + if result: + updated += 1 + + print(f"\nUpdated {updated}/{len(FILES)} files") + + +if __name__ == "__main__": + main()