From cc61d99acf2d4623f7ba1008edb9fb6d4eae0e10 Mon Sep 17 00:00:00 2001 From: kempersc Date: Tue, 9 Dec 2025 21:59:58 +0100 Subject: [PATCH] geocode: add coordinates to BG and EG custodian files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - BG: Add lat/lon from existing GeoNames IDs (28 files) - EG: Map city codes to GeoNames (CAI→Cairo, ALX→Alexandria, etc.) (28 files) - Fix malformed EG-IS-\`A\`-O-SCA.yaml → EG-IS-ISM-O-SCA.yaml - Overall coverage: 96.4% → 96.6% --- data/custodian/BG-BGS-CHE-L-БКНЧДН-1908.yaml | 4 + data/custodian/BG-BGS-CHE-L-БКНЧДН.yaml | 4 + data/custodian/BG-BLG-BEL-L-БПНЧГТ-1885.yaml | 4 + data/custodian/BG-BLG-BEL-L-БПНЧГТ.yaml | 4 + .../BG-BLG-GDE-L-LB-library_bg_0130005.yaml | 4 + data/custodian/BG-BLG-GTO-L-БПНЧЯС-1946.yaml | 4 + data/custodian/BG-BLG-GTO-L-БПНЧЯС.yaml | 4 + .../BG-BLG-HAD-L-LB-library_bg_0130003.yaml | 4 + data/custodian/BG-BLG-LEV-L-ЧБОП.yaml | 4 + data/custodian/BG-BLG-PLE-L-ЧБП-1937.yaml | 4 + data/custodian/BG-BLG-PLE-L-ЧБП.yaml | 4 + .../BG-BLG-PLO-L-LB-library_bg_0130001.yaml | 4 + data/custodian/BG-BLG-PLO-L-LB.yaml | 4 + .../BG-BLG-YAK-L-LB-library_bg_0130004.yaml | 4 + .../BG-KHO-DIM-L-LB-library_bg_2620000.yaml | 4 + .../BG-LOV-ABL-L-LB-library_bg_0130002.yaml | 4 + data/custodian/BG-PAZ-DYU-L-КЦБП-1886.yaml | 4 + data/custodian/BG-PAZ-DYU-L-КЦБП.yaml | 4 + .../BG-PAZ-PAN-L-LB-library_bg_1320000.yaml | 4 + .../BG-PDV-ASE-L-LB-library_bg_1620001.yaml | 4 + .../BG-PDV-GAL-L-LB-library_bg_2420002.yaml | 4 + .../BG-PDV-KAR-L-LB-library_bg_1620000.yaml | 4 + .../BG-RAZ-ISP-L-LB-library_bg_1720000.yaml | 4 + .../BG-SFO-SAM-L-LB-library_bg_2320000.yaml | 4 + .../BG-SLV-KOT-L-LB-library_bg_2020000.yaml | 4 + .../BG-SZR-KAZ-L-LB-library_bg_2420000.yaml | 4 + .../BG-SZR-RAD-L-LB-library_bg_2420001.yaml | 4 + .../BG-VTR-GOR-L-LB-library_bg_0420000.yaml | 4 + ...L-AUL-alexandria_university_libraries.yaml | 6 + data/custodian/EG-ALX-ALX-L-AUL.yaml | 6 + ...-M-GRMA-greco_roman_museum_alexandria.yaml | 6 + data/custodian/EG-ALX-ALX-M-GRMA.yaml | 6 + ...ASS-L-AUL-assiut_university_libraries.yaml | 6 + ...ICC-contemporary_image_collective_cic.yaml | 4 + data/custodian/EG-C-CAI-G-CICC.yaml | 4 + ...CAC-darb_1718_contemporary_art_center.yaml | 4 + data/custodian/EG-C-CAI-G-DCAC.yaml | 4 + .../EG-C-CAI-G-MG-mashrabia_gallery.yaml | 4 + data/custodian/EG-C-CAI-G-MG.yaml | 4 + ...ACOH-palace_of_arts_cairo_opera_house.yaml | 4 + data/custodian/EG-C-CAI-G-PACOH.yaml | 4 + .../EG-C-CAI-G-SG-safarkhan_gallery.yaml | 4 + data/custodian/EG-C-CAI-G-SG.yaml | 4 + ...L-ASUL-ain_shams_university_libraries.yaml | 6 + data/custodian/EG-C-CAI-L-ASUL.yaml | 6 + ...can_university_in_cairo_auc_libraries.yaml | 6 + data/custodian/EG-C-CAI-L-AUCAL.yaml | 6 + ...CAI-L-AUL-al_azhar_university_library.yaml | 6 + ...itish_university_in_egypt_bue_library.yaml | 6 + data/custodian/EG-C-CAI-L-BUEBL.yaml | 6 + ...-CULS-cairo_university_library_system.yaml | 6 + data/custodian/EG-C-CAI-L-CULS.yaml | 6 + ...onal_library_and_archives_dar_al_kutu.yaml | 6 + data/custodian/EG-C-CAI-L-ENLADK.yaml | 6 + ...erman_university_in_cairo_guc_library.yaml | 6 + data/custodian/EG-C-CAI-L-GUCGL.yaml | 6 + ...CAI-L-HUL-helwan_university_libraries.yaml | 6 + data/custodian/EG-C-CAI-L-HUL.yaml | 6 + ...G-C-CAI-L-NUL-nile_university_library.yaml | 6 + data/custodian/EG-C-CAI-L-NUL.yaml | 6 + ...AI-O-EKBE-egyptian_knowledge_bank_ekb.yaml | 6 + data/custodian/EG-C-CAI-O-EKBE.yaml | 6 + ...EG-C-CAI-O-GEM-global_egyptian_museum.yaml | 6 + data/custodian/EG-C-CAI-O-GEM.yaml | 6 + ...R-FIEI-french_institute_of_egypt_ifao.yaml | 4 + data/custodian/EG-C-CAI-R-FIEI.yaml | 4 + ...an_archaeological_institute_cairo_dai.yaml | 4 + data/custodian/EG-C-CAI-R-GAICD.yaml | 4 + data/custodian/EG-C-CAI-R-NFICN.yaml | 5 + ...-NIL-A-NAE-national_archives_of_egypt.yaml | 4 + data/custodian/EG-C-NIL-A-NAE.yaml | 4 + data/custodian/EG-CA-CAI-D-PAG.yaml | 16 +- scripts/geocode_eg_from_city_code.py | 190 ++++++++++++++++++ scripts/geocode_from_geonames_id.py | 160 +++++++++++++++ 74 files changed, 694 insertions(+), 9 deletions(-) create mode 100755 scripts/geocode_eg_from_city_code.py create mode 100755 scripts/geocode_from_geonames_id.py diff --git a/data/custodian/BG-BGS-CHE-L-БКНЧДН-1908.yaml b/data/custodian/BG-BGS-CHE-L-БКНЧДН-1908.yaml index 608b34ff14..bd39432c7f 100644 --- a/data/custodian/BG-BGS-CHE-L-БКНЧДН-1908.yaml +++ b/data/custodian/BG-BGS-CHE-L-БКНЧДН-1908.yaml @@ -145,6 +145,10 @@ location: geonames_id: 732519 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.090293+00:00' + latitude: 42.44408 + longitude: 27.63902 + geocoding_timestamp: '2025-12-09T20:51:37.959549+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Библиотека към Народно Читалище „Димо Николов – 1908” official youtube_search_timestamp: '2025-12-09T08:50:29.181062+00:00' diff --git a/data/custodian/BG-BGS-CHE-L-БКНЧДН.yaml b/data/custodian/BG-BGS-CHE-L-БКНЧДН.yaml index 41bb01286f..b55043536f 100644 --- a/data/custodian/BG-BGS-CHE-L-БКНЧДН.yaml +++ b/data/custodian/BG-BGS-CHE-L-БКНЧДН.yaml @@ -145,6 +145,10 @@ location: geonames_id: 732519 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.297577+00:00' + latitude: 42.44408 + longitude: 27.63902 + geocoding_timestamp: '2025-12-09T20:51:38.795424+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Библиотека към Народно Читалище „Димо Николов – 1908” official youtube_search_timestamp: '2025-12-09T08:50:29.846396+00:00' diff --git a/data/custodian/BG-BLG-BEL-L-БПНЧГТ-1885.yaml b/data/custodian/BG-BLG-BEL-L-БПНЧГТ-1885.yaml index 3e5169af49..9f92f2991f 100644 --- a/data/custodian/BG-BLG-BEL-L-БПНЧГТ-1885.yaml +++ b/data/custodian/BG-BLG-BEL-L-БПНЧГТ-1885.yaml @@ -145,6 +145,10 @@ location: geonames_id: 733322 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.361454+00:00' + latitude: 41.95077 + longitude: 23.55831 + geocoding_timestamp: '2025-12-09T20:51:44.914573+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Библиотека при Народно читалище „Георги Тодоров-1885” official youtube_search_timestamp: '2025-12-09T08:50:30.505307+00:00' diff --git a/data/custodian/BG-BLG-BEL-L-БПНЧГТ.yaml b/data/custodian/BG-BLG-BEL-L-БПНЧГТ.yaml index 3320b906f0..4e8a4202ba 100644 --- a/data/custodian/BG-BLG-BEL-L-БПНЧГТ.yaml +++ b/data/custodian/BG-BLG-BEL-L-БПНЧГТ.yaml @@ -145,6 +145,10 @@ location: geonames_id: 733322 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.406917+00:00' + latitude: 41.95077 + longitude: 23.55831 + geocoding_timestamp: '2025-12-09T20:51:39.188678+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Библиотека при Народно читалище „Георги Тодоров-1885” official youtube_search_timestamp: '2025-12-09T08:50:31.187390+00:00' diff --git a/data/custodian/BG-BLG-GDE-L-LB-library_bg_0130005.yaml b/data/custodian/BG-BLG-GDE-L-LB-library_bg_0130005.yaml index 00647fa796..9d8c852685 100644 --- a/data/custodian/BG-BLG-GDE-L-LB-library_bg_0130005.yaml +++ b/data/custodian/BG-BLG-GDE-L-LB-library_bg_0130005.yaml @@ -145,6 +145,10 @@ location: geonames_id: 731108 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.640351+00:00' + latitude: 41.56667 + longitude: 23.73333 + geocoding_timestamp: '2025-12-09T20:51:45.009762+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-0130005 official youtube_search_timestamp: '2025-12-09T08:50:31.923614+00:00' diff --git a/data/custodian/BG-BLG-GTO-L-БПНЧЯС-1946.yaml b/data/custodian/BG-BLG-GTO-L-БПНЧЯС-1946.yaml index 3d877c98ce..f7ab95da63 100644 --- a/data/custodian/BG-BLG-GTO-L-БПНЧЯС-1946.yaml +++ b/data/custodian/BG-BLG-GTO-L-БПНЧЯС-1946.yaml @@ -140,6 +140,10 @@ location: geonames_id: 727962 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.746784+00:00' + latitude: 41.45498 + longitude: 23.28628 + geocoding_timestamp: '2025-12-09T20:51:41.130295+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Библиотека при Народно читалище „Яне Сандански-1946” official youtube_search_timestamp: '2025-12-09T08:50:32.591939+00:00' diff --git a/data/custodian/BG-BLG-GTO-L-БПНЧЯС.yaml b/data/custodian/BG-BLG-GTO-L-БПНЧЯС.yaml index e14d6e1947..197d41dc2e 100644 --- a/data/custodian/BG-BLG-GTO-L-БПНЧЯС.yaml +++ b/data/custodian/BG-BLG-GTO-L-БПНЧЯС.yaml @@ -140,6 +140,10 @@ location: geonames_id: 727962 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.857993+00:00' + latitude: 41.45498 + longitude: 23.28628 + geocoding_timestamp: '2025-12-09T20:51:42.414160+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Библиотека при Народно читалище „Яне Сандански-1946” official youtube_search_timestamp: '2025-12-09T08:50:33.257161+00:00' diff --git a/data/custodian/BG-BLG-HAD-L-LB-library_bg_0130003.yaml b/data/custodian/BG-BLG-HAD-L-LB-library_bg_0130003.yaml index 7bc66cc0de..dd605f32b1 100644 --- a/data/custodian/BG-BLG-HAD-L-LB-library_bg_0130003.yaml +++ b/data/custodian/BG-BLG-HAD-L-LB-library_bg_0130003.yaml @@ -140,6 +140,10 @@ location: geonames_id: 730464 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.934972+00:00' + latitude: 41.52222 + longitude: 23.86861 + geocoding_timestamp: '2025-12-09T20:51:37.443529+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-0130003 official youtube_search_timestamp: '2025-12-09T08:50:33.916328+00:00' diff --git a/data/custodian/BG-BLG-LEV-L-ЧБОП.yaml b/data/custodian/BG-BLG-LEV-L-ЧБОП.yaml index 605a1bb7a2..fb847c134c 100644 --- a/data/custodian/BG-BLG-LEV-L-ЧБОП.yaml +++ b/data/custodian/BG-BLG-LEV-L-ЧБОП.yaml @@ -140,6 +140,10 @@ location: geonames_id: 729634 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:51.983209+00:00' + latitude: 41.48782 + longitude: 23.30392 + geocoding_timestamp: '2025-12-09T20:51:40.564263+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Читалищна библиотека „Отец Паисий” official youtube_search_timestamp: '2025-12-09T08:50:34.565585+00:00' diff --git a/data/custodian/BG-BLG-PLE-L-ЧБП-1937.yaml b/data/custodian/BG-BLG-PLE-L-ЧБП-1937.yaml index 864f990462..e5755e8d9e 100644 --- a/data/custodian/BG-BLG-PLE-L-ЧБП-1937.yaml +++ b/data/custodian/BG-BLG-PLE-L-ЧБП-1937.yaml @@ -140,6 +140,10 @@ location: geonames_id: 728205 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.023879+00:00' + latitude: 41.63333 + longitude: 23.96667 + geocoding_timestamp: '2025-12-09T20:51:40.289469+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Читалищна библиотека “Просвета-1937” official youtube_search_timestamp: '2025-12-09T08:50:35.219443+00:00' diff --git a/data/custodian/BG-BLG-PLE-L-ЧБП.yaml b/data/custodian/BG-BLG-PLE-L-ЧБП.yaml index 2559134fc0..1c181abd09 100644 --- a/data/custodian/BG-BLG-PLE-L-ЧБП.yaml +++ b/data/custodian/BG-BLG-PLE-L-ЧБП.yaml @@ -140,6 +140,10 @@ location: geonames_id: 728205 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.063405+00:00' + latitude: 41.63333 + longitude: 23.96667 + geocoding_timestamp: '2025-12-09T20:51:44.661730+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Читалищна библиотека “Просвета-1937” official youtube_search_timestamp: '2025-12-09T08:50:35.873186+00:00' diff --git a/data/custodian/BG-BLG-PLO-L-LB-library_bg_0130001.yaml b/data/custodian/BG-BLG-PLO-L-LB-library_bg_0130001.yaml index a8acd82bf0..687e0d12cd 100644 --- a/data/custodian/BG-BLG-PLO-L-LB-library_bg_0130001.yaml +++ b/data/custodian/BG-BLG-PLO-L-LB-library_bg_0130001.yaml @@ -140,6 +140,10 @@ location: geonames_id: 728195 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.110926+00:00' + latitude: 41.65 + longitude: 23.26667 + geocoding_timestamp: '2025-12-09T20:51:41.806036+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-0130001 official youtube_search_timestamp: '2025-12-09T08:50:36.527702+00:00' diff --git a/data/custodian/BG-BLG-PLO-L-LB.yaml b/data/custodian/BG-BLG-PLO-L-LB.yaml index f733d479cf..ad75bcbeab 100644 --- a/data/custodian/BG-BLG-PLO-L-LB.yaml +++ b/data/custodian/BG-BLG-PLO-L-LB.yaml @@ -140,6 +140,10 @@ location: geonames_id: 728195 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.151286+00:00' + latitude: 41.65 + longitude: 23.26667 + geocoding_timestamp: '2025-12-09T20:51:37.709987+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-0130001 official youtube_search_timestamp: '2025-12-09T08:50:37.188079+00:00' diff --git a/data/custodian/BG-BLG-YAK-L-LB-library_bg_0130004.yaml b/data/custodian/BG-BLG-YAK-L-LB-library_bg_0130004.yaml index 3e272dbc20..04c0426d11 100644 --- a/data/custodian/BG-BLG-YAK-L-LB-library_bg_0130004.yaml +++ b/data/custodian/BG-BLG-YAK-L-LB-library_bg_0130004.yaml @@ -145,6 +145,10 @@ location: geonames_id: 725586 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.218974+00:00' + latitude: 42.02528 + longitude: 23.68417 + geocoding_timestamp: '2025-12-09T20:51:43.041277+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-0130004 official youtube_search_timestamp: '2025-12-09T08:50:37.845289+00:00' diff --git a/data/custodian/BG-KHO-DIM-L-LB-library_bg_2620000.yaml b/data/custodian/BG-KHO-DIM-L-LB-library_bg_2620000.yaml index 2402b3a55b..bb8f8a5de4 100644 --- a/data/custodian/BG-KHO-DIM-L-LB-library_bg_2620000.yaml +++ b/data/custodian/BG-KHO-DIM-L-LB-library_bg_2620000.yaml @@ -145,6 +145,10 @@ location: geonames_id: 732263 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.297290+00:00' + latitude: 42.05 + longitude: 25.6 + geocoding_timestamp: '2025-12-09T20:51:37.558971+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-2620000 official youtube_search_timestamp: '2025-12-09T08:50:38.507775+00:00' diff --git a/data/custodian/BG-LOV-ABL-L-LB-library_bg_0130002.yaml b/data/custodian/BG-LOV-ABL-L-LB-library_bg_0130002.yaml index 0e55f9460a..b1d36a36c0 100644 --- a/data/custodian/BG-LOV-ABL-L-LB-library_bg_0130002.yaml +++ b/data/custodian/BG-LOV-ABL-L-LB-library_bg_0130002.yaml @@ -140,6 +140,10 @@ location: geonames_id: 733738 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.343996+00:00' + latitude: 43.0393 + longitude: 24.69255 + geocoding_timestamp: '2025-12-09T20:51:38.073930+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-0130002 official youtube_search_timestamp: '2025-12-09T08:50:39.168843+00:00' diff --git a/data/custodian/BG-PAZ-DYU-L-КЦБП-1886.yaml b/data/custodian/BG-PAZ-DYU-L-КЦБП-1886.yaml index 1bd77babc0..c2a8309775 100644 --- a/data/custodian/BG-PAZ-DYU-L-КЦБП-1886.yaml +++ b/data/custodian/BG-PAZ-DYU-L-КЦБП-1886.yaml @@ -140,6 +140,10 @@ location: geonames_id: 731750 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.385656+00:00' + latitude: 42.45 + longitude: 24.38333 + geocoding_timestamp: '2025-12-09T20:51:42.560405+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Културен център Библиотека „Просвета-1886” official youtube_search_timestamp: '2025-12-09T08:50:39.828686+00:00' diff --git a/data/custodian/BG-PAZ-DYU-L-КЦБП.yaml b/data/custodian/BG-PAZ-DYU-L-КЦБП.yaml index 931118bbdd..06ff20fbcd 100644 --- a/data/custodian/BG-PAZ-DYU-L-КЦБП.yaml +++ b/data/custodian/BG-PAZ-DYU-L-КЦБП.yaml @@ -140,6 +140,10 @@ location: geonames_id: 731750 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.423305+00:00' + latitude: 42.45 + longitude: 24.38333 + geocoding_timestamp: '2025-12-09T20:51:36.654778+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Културен център Библиотека „Просвета-1886” official youtube_search_timestamp: '2025-12-09T08:50:40.490371+00:00' diff --git a/data/custodian/BG-PAZ-PAN-L-LB-library_bg_1320000.yaml b/data/custodian/BG-PAZ-PAN-L-LB-library_bg_1320000.yaml index 6b595bac29..f6bf21d27f 100644 --- a/data/custodian/BG-PAZ-PAN-L-LB-library_bg_1320000.yaml +++ b/data/custodian/BG-PAZ-PAN-L-LB-library_bg_1320000.yaml @@ -145,6 +145,10 @@ location: geonames_id: 728448 feature_code: PPLA2 normalization_timestamp: '2025-12-09T13:20:52.470400+00:00' + latitude: 42.49518 + longitude: 24.19021 + geocoding_timestamp: '2025-12-09T20:51:41.561084+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-1320000 official youtube_search_timestamp: '2025-12-09T08:50:41.151627+00:00' diff --git a/data/custodian/BG-PDV-ASE-L-LB-library_bg_1620001.yaml b/data/custodian/BG-PDV-ASE-L-LB-library_bg_1620001.yaml index e792863173..ab6edbcf0f 100644 --- a/data/custodian/BG-PDV-ASE-L-LB-library_bg_1620001.yaml +++ b/data/custodian/BG-PDV-ASE-L-LB-library_bg_1620001.yaml @@ -145,6 +145,10 @@ location: geonames_id: 733618 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.524861+00:00' + latitude: 42.01667 + longitude: 24.86667 + geocoding_timestamp: '2025-12-09T20:51:41.932206+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-1620001 official youtube_search_timestamp: '2025-12-09T08:50:41.806284+00:00' diff --git a/data/custodian/BG-PDV-GAL-L-LB-library_bg_2420002.yaml b/data/custodian/BG-PDV-GAL-L-LB-library_bg_2420002.yaml index e8f044a375..b0d5de09f7 100644 --- a/data/custodian/BG-PDV-GAL-L-LB-library_bg_2420002.yaml +++ b/data/custodian/BG-PDV-GAL-L-LB-library_bg_2420002.yaml @@ -140,6 +140,10 @@ location: geonames_id: 730988 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.595161+00:00' + latitude: 42.01667 + longitude: 24.71667 + geocoding_timestamp: '2025-12-09T20:51:42.746953+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-2420002 official youtube_search_timestamp: '2025-12-09T08:50:42.464004+00:00' diff --git a/data/custodian/BG-PDV-KAR-L-LB-library_bg_1620000.yaml b/data/custodian/BG-PDV-KAR-L-LB-library_bg_1620000.yaml index f50e93a84e..80de8e3adb 100644 --- a/data/custodian/BG-PDV-KAR-L-LB-library_bg_1620000.yaml +++ b/data/custodian/BG-PDV-KAR-L-LB-library_bg_1620000.yaml @@ -145,6 +145,10 @@ location: geonames_id: 730565 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.650671+00:00' + latitude: 42.63333 + longitude: 24.8 + geocoding_timestamp: '2025-12-09T20:51:41.469746+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-1620000 official youtube_search_timestamp: '2025-12-09T08:50:43.116946+00:00' diff --git a/data/custodian/BG-RAZ-ISP-L-LB-library_bg_1720000.yaml b/data/custodian/BG-RAZ-ISP-L-LB-library_bg_1720000.yaml index 503877edb8..1d4d187018 100644 --- a/data/custodian/BG-RAZ-ISP-L-LB-library_bg_1720000.yaml +++ b/data/custodian/BG-RAZ-ISP-L-LB-library_bg_1720000.yaml @@ -145,6 +145,10 @@ location: geonames_id: 730866 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.699870+00:00' + latitude: 43.71667 + longitude: 26.83333 + geocoding_timestamp: '2025-12-09T20:51:44.256682+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-1720000 official youtube_search_timestamp: '2025-12-09T08:50:43.771232+00:00' diff --git a/data/custodian/BG-SFO-SAM-L-LB-library_bg_2320000.yaml b/data/custodian/BG-SFO-SAM-L-LB-library_bg_2320000.yaml index 8338de453f..8bb5753d95 100644 --- a/data/custodian/BG-SFO-SAM-L-LB-library_bg_2320000.yaml +++ b/data/custodian/BG-SFO-SAM-L-LB-library_bg_2320000.yaml @@ -140,6 +140,10 @@ location: geonames_id: 727462 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.742143+00:00' + latitude: 42.33855 + longitude: 23.55805 + geocoding_timestamp: '2025-12-09T20:51:35.934662+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-2320000 official youtube_search_timestamp: '2025-12-09T08:50:44.429842+00:00' diff --git a/data/custodian/BG-SLV-KOT-L-LB-library_bg_2020000.yaml b/data/custodian/BG-SLV-KOT-L-LB-library_bg_2020000.yaml index ceef101a67..5b74678fa1 100644 --- a/data/custodian/BG-SLV-KOT-L-LB-library_bg_2020000.yaml +++ b/data/custodian/BG-SLV-KOT-L-LB-library_bg_2020000.yaml @@ -145,6 +145,10 @@ location: geonames_id: 730073 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.788441+00:00' + latitude: 42.88333 + longitude: 26.45 + geocoding_timestamp: '2025-12-09T20:51:37.300285+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-2020000 official youtube_search_timestamp: '2025-12-09T08:50:45.087117+00:00' diff --git a/data/custodian/BG-SZR-KAZ-L-LB-library_bg_2420000.yaml b/data/custodian/BG-SZR-KAZ-L-LB-library_bg_2420000.yaml index 617f3546d1..02157db84d 100644 --- a/data/custodian/BG-SZR-KAZ-L-LB-library_bg_2420000.yaml +++ b/data/custodian/BG-SZR-KAZ-L-LB-library_bg_2420000.yaml @@ -145,6 +145,10 @@ location: geonames_id: 730496 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.868948+00:00' + latitude: 42.61667 + longitude: 25.4 + geocoding_timestamp: '2025-12-09T20:51:38.847068+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-2420000 official youtube_search_timestamp: '2025-12-09T08:50:45.738151+00:00' diff --git a/data/custodian/BG-SZR-RAD-L-LB-library_bg_2420001.yaml b/data/custodian/BG-SZR-RAD-L-LB-library_bg_2420001.yaml index bbb8ce27b2..b7dc308117 100644 --- a/data/custodian/BG-SZR-RAD-L-LB-library_bg_2420001.yaml +++ b/data/custodian/BG-SZR-RAD-L-LB-library_bg_2420001.yaml @@ -140,6 +140,10 @@ location: geonames_id: 727838 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.940065+00:00' + latitude: 42.3 + longitude: 25.93333 + geocoding_timestamp: '2025-12-09T20:51:36.498169+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-2420001 official youtube_search_timestamp: '2025-12-09T08:50:46.394966+00:00' diff --git a/data/custodian/BG-VTR-GOR-L-LB-library_bg_0420000.yaml b/data/custodian/BG-VTR-GOR-L-LB-library_bg_0420000.yaml index bb861d18c0..a57ed3d358 100644 --- a/data/custodian/BG-VTR-GOR-L-LB-library_bg_0420000.yaml +++ b/data/custodian/BG-VTR-GOR-L-LB-library_bg_0420000.yaml @@ -140,6 +140,10 @@ location: geonames_id: 731233 feature_code: PPL normalization_timestamp: '2025-12-09T13:20:52.989667+00:00' + latitude: 43.12778 + longitude: 25.70167 + geocoding_timestamp: '2025-12-09T20:51:43.758232+00:00' + geocoding_method: GEONAMES_ID_LOOKUP youtube_status: NOT_FOUND youtube_search_query: Library BG-0420000 official youtube_search_timestamp: '2025-12-09T08:50:47.052626+00:00' diff --git a/data/custodian/EG-ALX-ALX-L-AUL-alexandria_university_libraries.yaml b/data/custodian/EG-ALX-ALX-L-AUL-alexandria_university_libraries.yaml index bb2dbaf80e..a77ae13bb7 100644 --- a/data/custodian/EG-ALX-ALX-L-AUL-alexandria_university_libraries.yaml +++ b/data/custodian/EG-ALX-ALX-L-AUL-alexandria_university_libraries.yaml @@ -40,6 +40,12 @@ location: region_code: ALX country: EG normalization_timestamp: '2025-12-09T13:32:15.353814+00:00' + city: Alexandria + latitude: 31.20176 + longitude: 29.91582 + geonames_id: 361058 + geocoding_timestamp: '2025-12-09T20:54:53.993254+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-ALX-ALX-L-AUL.yaml b/data/custodian/EG-ALX-ALX-L-AUL.yaml index 9130365a0a..8ad708c4d9 100644 --- a/data/custodian/EG-ALX-ALX-L-AUL.yaml +++ b/data/custodian/EG-ALX-ALX-L-AUL.yaml @@ -40,6 +40,12 @@ location: region_code: ALX country: EG normalization_timestamp: '2025-12-09T13:32:15.368532+00:00' + city: Alexandria + latitude: 31.20176 + longitude: 29.91582 + geonames_id: 361058 + geocoding_timestamp: '2025-12-09T20:54:53.587384+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-ALX-ALX-M-GRMA-greco_roman_museum_alexandria.yaml b/data/custodian/EG-ALX-ALX-M-GRMA-greco_roman_museum_alexandria.yaml index 56c6232cad..ce53d27da5 100644 --- a/data/custodian/EG-ALX-ALX-M-GRMA-greco_roman_museum_alexandria.yaml +++ b/data/custodian/EG-ALX-ALX-M-GRMA-greco_roman_museum_alexandria.yaml @@ -37,6 +37,12 @@ location: region_code: ALX country: EG normalization_timestamp: '2025-12-09T13:32:15.389275+00:00' + city: Alexandria + latitude: 31.20176 + longitude: 29.91582 + geonames_id: 361058 + geocoding_timestamp: '2025-12-09T20:54:53.911959+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-ALX-ALX-M-GRMA.yaml b/data/custodian/EG-ALX-ALX-M-GRMA.yaml index 47c4530306..a21d93f7a5 100644 --- a/data/custodian/EG-ALX-ALX-M-GRMA.yaml +++ b/data/custodian/EG-ALX-ALX-M-GRMA.yaml @@ -37,6 +37,12 @@ location: region_code: ALX country: EG normalization_timestamp: '2025-12-09T13:32:15.403860+00:00' + city: Alexandria + latitude: 31.20176 + longitude: 29.91582 + geonames_id: 361058 + geocoding_timestamp: '2025-12-09T20:54:53.273511+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-AST-ASS-L-AUL-assiut_university_libraries.yaml b/data/custodian/EG-AST-ASS-L-AUL-assiut_university_libraries.yaml index 8f99b9682a..e963cecb1c 100644 --- a/data/custodian/EG-AST-ASS-L-AUL-assiut_university_libraries.yaml +++ b/data/custodian/EG-AST-ASS-L-AUL-assiut_university_libraries.yaml @@ -37,6 +37,12 @@ location: region_code: AST country: EG normalization_timestamp: '2025-12-09T13:32:15.420856+00:00' + city: Assiut + latitude: 27.18096 + longitude: 31.18368 + geonames_id: 359783 + geocoding_timestamp: '2025-12-09T20:54:52.900410+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-CICC-contemporary_image_collective_cic.yaml b/data/custodian/EG-C-CAI-G-CICC-contemporary_image_collective_cic.yaml index 1644997150..eeee212fd4 100644 --- a/data/custodian/EG-C-CAI-G-CICC-contemporary_image_collective_cic.yaml +++ b/data/custodian/EG-C-CAI-G-CICC-contemporary_image_collective_cic.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.529765+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:02.181456+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-CICC.yaml b/data/custodian/EG-C-CAI-G-CICC.yaml index 4eaaa84849..f5358c809e 100644 --- a/data/custodian/EG-C-CAI-G-CICC.yaml +++ b/data/custodian/EG-C-CAI-G-CICC.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.546994+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.879733+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-DCAC-darb_1718_contemporary_art_center.yaml b/data/custodian/EG-C-CAI-G-DCAC-darb_1718_contemporary_art_center.yaml index fddfa21970..e7a804d9a3 100644 --- a/data/custodian/EG-C-CAI-G-DCAC-darb_1718_contemporary_art_center.yaml +++ b/data/custodian/EG-C-CAI-G-DCAC-darb_1718_contemporary_art_center.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.574602+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.747894+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-DCAC.yaml b/data/custodian/EG-C-CAI-G-DCAC.yaml index 8cf20578e5..3f7de0f309 100644 --- a/data/custodian/EG-C-CAI-G-DCAC.yaml +++ b/data/custodian/EG-C-CAI-G-DCAC.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.589395+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:02.029274+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-MG-mashrabia_gallery.yaml b/data/custodian/EG-C-CAI-G-MG-mashrabia_gallery.yaml index 92a0525332..ae56aa213f 100644 --- a/data/custodian/EG-C-CAI-G-MG-mashrabia_gallery.yaml +++ b/data/custodian/EG-C-CAI-G-MG-mashrabia_gallery.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.626735+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.926885+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-MG.yaml b/data/custodian/EG-C-CAI-G-MG.yaml index 1278d91232..b549c3accd 100644 --- a/data/custodian/EG-C-CAI-G-MG.yaml +++ b/data/custodian/EG-C-CAI-G-MG.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.648683+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:02.195743+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-PACOH-palace_of_arts_cairo_opera_house.yaml b/data/custodian/EG-C-CAI-G-PACOH-palace_of_arts_cairo_opera_house.yaml index b0eae1ea58..c21fd5594f 100644 --- a/data/custodian/EG-C-CAI-G-PACOH-palace_of_arts_cairo_opera_house.yaml +++ b/data/custodian/EG-C-CAI-G-PACOH-palace_of_arts_cairo_opera_house.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.664000+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.513913+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-PACOH.yaml b/data/custodian/EG-C-CAI-G-PACOH.yaml index c5fdff0976..4d1f0b08ef 100644 --- a/data/custodian/EG-C-CAI-G-PACOH.yaml +++ b/data/custodian/EG-C-CAI-G-PACOH.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.679180+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.903642+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-SG-safarkhan_gallery.yaml b/data/custodian/EG-C-CAI-G-SG-safarkhan_gallery.yaml index 35a2601b57..b22e7033e5 100644 --- a/data/custodian/EG-C-CAI-G-SG-safarkhan_gallery.yaml +++ b/data/custodian/EG-C-CAI-G-SG-safarkhan_gallery.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.698208+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.476557+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-G-SG.yaml b/data/custodian/EG-C-CAI-G-SG.yaml index 84d1597d72..ca0f927f1c 100644 --- a/data/custodian/EG-C-CAI-G-SG.yaml +++ b/data/custodian/EG-C-CAI-G-SG.yaml @@ -44,6 +44,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:15.716605+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:02.058436+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-ASUL-ain_shams_university_libraries.yaml b/data/custodian/EG-C-CAI-L-ASUL-ain_shams_university_libraries.yaml index 1125ee242a..ce5890e41f 100644 --- a/data/custodian/EG-C-CAI-L-ASUL-ain_shams_university_libraries.yaml +++ b/data/custodian/EG-C-CAI-L-ASUL-ain_shams_university_libraries.yaml @@ -40,6 +40,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.732617+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.664197+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-ASUL.yaml b/data/custodian/EG-C-CAI-L-ASUL.yaml index 5c15b207fd..440de2f0b2 100644 --- a/data/custodian/EG-C-CAI-L-ASUL.yaml +++ b/data/custodian/EG-C-CAI-L-ASUL.yaml @@ -40,6 +40,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.747417+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.478356+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-AUCAL-american_university_in_cairo_auc_libraries.yaml b/data/custodian/EG-C-CAI-L-AUCAL-american_university_in_cairo_auc_libraries.yaml index d561efe946..78c630225a 100644 --- a/data/custodian/EG-C-CAI-L-AUCAL-american_university_in_cairo_auc_libraries.yaml +++ b/data/custodian/EG-C-CAI-L-AUCAL-american_university_in_cairo_auc_libraries.yaml @@ -40,6 +40,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.767017+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:54.132822+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-AUCAL.yaml b/data/custodian/EG-C-CAI-L-AUCAL.yaml index 84be624e99..44e992f2b2 100644 --- a/data/custodian/EG-C-CAI-L-AUCAL.yaml +++ b/data/custodian/EG-C-CAI-L-AUCAL.yaml @@ -40,6 +40,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.782524+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:55.086687+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-AUL-al_azhar_university_library.yaml b/data/custodian/EG-C-CAI-L-AUL-al_azhar_university_library.yaml index 0faf625d18..ca9558bc2d 100644 --- a/data/custodian/EG-C-CAI-L-AUL-al_azhar_university_library.yaml +++ b/data/custodian/EG-C-CAI-L-AUL-al_azhar_university_library.yaml @@ -40,6 +40,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.801850+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:52.932236+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-BUEBL-british_university_in_egypt_bue_library.yaml b/data/custodian/EG-C-CAI-L-BUEBL-british_university_in_egypt_bue_library.yaml index cd835250d2..ef7673a7f4 100644 --- a/data/custodian/EG-C-CAI-L-BUEBL-british_university_in_egypt_bue_library.yaml +++ b/data/custodian/EG-C-CAI-L-BUEBL-british_university_in_egypt_bue_library.yaml @@ -36,6 +36,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.815469+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.121684+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-BUEBL.yaml b/data/custodian/EG-C-CAI-L-BUEBL.yaml index e55f9dd851..db86c7abad 100644 --- a/data/custodian/EG-C-CAI-L-BUEBL.yaml +++ b/data/custodian/EG-C-CAI-L-BUEBL.yaml @@ -36,6 +36,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.838454+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.617048+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-CULS-cairo_university_library_system.yaml b/data/custodian/EG-C-CAI-L-CULS-cairo_university_library_system.yaml index 6155abd864..886644db1a 100644 --- a/data/custodian/EG-C-CAI-L-CULS-cairo_university_library_system.yaml +++ b/data/custodian/EG-C-CAI-L-CULS-cairo_university_library_system.yaml @@ -37,6 +37,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.850521+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.092940+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-CULS.yaml b/data/custodian/EG-C-CAI-L-CULS.yaml index 6450c24b39..8a5a3a35ed 100644 --- a/data/custodian/EG-C-CAI-L-CULS.yaml +++ b/data/custodian/EG-C-CAI-L-CULS.yaml @@ -37,6 +37,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.864345+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.362496+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-ENLADK-egyptian_national_library_and_archives_dar_al_kutu.yaml b/data/custodian/EG-C-CAI-L-ENLADK-egyptian_national_library_and_archives_dar_al_kutu.yaml index a8bd7f5761..801a6cdc59 100644 --- a/data/custodian/EG-C-CAI-L-ENLADK-egyptian_national_library_and_archives_dar_al_kutu.yaml +++ b/data/custodian/EG-C-CAI-L-ENLADK-egyptian_national_library_and_archives_dar_al_kutu.yaml @@ -39,6 +39,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.883449+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.398547+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-ENLADK.yaml b/data/custodian/EG-C-CAI-L-ENLADK.yaml index c85f93befe..b6fea5de92 100644 --- a/data/custodian/EG-C-CAI-L-ENLADK.yaml +++ b/data/custodian/EG-C-CAI-L-ENLADK.yaml @@ -39,6 +39,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.901361+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.222960+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-GUCGL-german_university_in_cairo_guc_library.yaml b/data/custodian/EG-C-CAI-L-GUCGL-german_university_in_cairo_guc_library.yaml index 91430d1976..c2a2f1f5d0 100644 --- a/data/custodian/EG-C-CAI-L-GUCGL-german_university_in_cairo_guc_library.yaml +++ b/data/custodian/EG-C-CAI-L-GUCGL-german_university_in_cairo_guc_library.yaml @@ -37,6 +37,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.913980+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.428143+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-GUCGL.yaml b/data/custodian/EG-C-CAI-L-GUCGL.yaml index eeb5443b8a..935c449ea5 100644 --- a/data/custodian/EG-C-CAI-L-GUCGL.yaml +++ b/data/custodian/EG-C-CAI-L-GUCGL.yaml @@ -37,6 +37,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.929836+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:54.421956+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-HUL-helwan_university_libraries.yaml b/data/custodian/EG-C-CAI-L-HUL-helwan_university_libraries.yaml index 85f114a5f5..351bd3344b 100644 --- a/data/custodian/EG-C-CAI-L-HUL-helwan_university_libraries.yaml +++ b/data/custodian/EG-C-CAI-L-HUL-helwan_university_libraries.yaml @@ -34,6 +34,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.942731+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.152999+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-HUL.yaml b/data/custodian/EG-C-CAI-L-HUL.yaml index 361f55ff05..f6b7eeaeaa 100644 --- a/data/custodian/EG-C-CAI-L-HUL.yaml +++ b/data/custodian/EG-C-CAI-L-HUL.yaml @@ -34,6 +34,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.954581+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:54.976236+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-NUL-nile_university_library.yaml b/data/custodian/EG-C-CAI-L-NUL-nile_university_library.yaml index 7df546e4aa..5604c1dea6 100644 --- a/data/custodian/EG-C-CAI-L-NUL-nile_university_library.yaml +++ b/data/custodian/EG-C-CAI-L-NUL-nile_university_library.yaml @@ -37,6 +37,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.971248+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.504845+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-L-NUL.yaml b/data/custodian/EG-C-CAI-L-NUL.yaml index 1e32a70604..6400872168 100644 --- a/data/custodian/EG-C-CAI-L-NUL.yaml +++ b/data/custodian/EG-C-CAI-L-NUL.yaml @@ -37,6 +37,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:15.984211+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:54.076403+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-O-EKBE-egyptian_knowledge_bank_ekb.yaml b/data/custodian/EG-C-CAI-O-EKBE-egyptian_knowledge_bank_ekb.yaml index 67351e50ad..397f042cea 100644 --- a/data/custodian/EG-C-CAI-O-EKBE-egyptian_knowledge_bank_ekb.yaml +++ b/data/custodian/EG-C-CAI-O-EKBE-egyptian_knowledge_bank_ekb.yaml @@ -39,6 +39,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:16.000376+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:54.941429+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-O-EKBE.yaml b/data/custodian/EG-C-CAI-O-EKBE.yaml index e47475d759..92e05fffed 100644 --- a/data/custodian/EG-C-CAI-O-EKBE.yaml +++ b/data/custodian/EG-C-CAI-O-EKBE.yaml @@ -39,6 +39,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:16.020329+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.005364+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-O-GEM-global_egyptian_museum.yaml b/data/custodian/EG-C-CAI-O-GEM-global_egyptian_museum.yaml index 717ebe7dfd..33d368aee0 100644 --- a/data/custodian/EG-C-CAI-O-GEM-global_egyptian_museum.yaml +++ b/data/custodian/EG-C-CAI-O-GEM-global_egyptian_museum.yaml @@ -39,6 +39,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:16.037081+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.871205+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-O-GEM.yaml b/data/custodian/EG-C-CAI-O-GEM.yaml index 61b4e3672b..2973653bc2 100644 --- a/data/custodian/EG-C-CAI-O-GEM.yaml +++ b/data/custodian/EG-C-CAI-O-GEM.yaml @@ -39,6 +39,12 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:16.052040+00:00' + city: Cairo + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.545252+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-R-FIEI-french_institute_of_egypt_ifao.yaml b/data/custodian/EG-C-CAI-R-FIEI-french_institute_of_egypt_ifao.yaml index 5faa6b2e62..49984f4d0a 100644 --- a/data/custodian/EG-C-CAI-R-FIEI-french_institute_of_egypt_ifao.yaml +++ b/data/custodian/EG-C-CAI-R-FIEI-french_institute_of_egypt_ifao.yaml @@ -47,6 +47,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:16.068156+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.864476+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-R-FIEI.yaml b/data/custodian/EG-C-CAI-R-FIEI.yaml index fb4350ac3f..415898c765 100644 --- a/data/custodian/EG-C-CAI-R-FIEI.yaml +++ b/data/custodian/EG-C-CAI-R-FIEI.yaml @@ -47,6 +47,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:16.085807+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:02.232230+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-R-GAICD-german_archaeological_institute_cairo_dai.yaml b/data/custodian/EG-C-CAI-R-GAICD-german_archaeological_institute_cairo_dai.yaml index 61f66a7b16..8adbd1c214 100644 --- a/data/custodian/EG-C-CAI-R-GAICD-german_archaeological_institute_cairo_dai.yaml +++ b/data/custodian/EG-C-CAI-R-GAICD-german_archaeological_institute_cairo_dai.yaml @@ -47,6 +47,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:16.103659+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.658871+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-R-GAICD.yaml b/data/custodian/EG-C-CAI-R-GAICD.yaml index 88d23dd91a..3cd4d1dd63 100644 --- a/data/custodian/EG-C-CAI-R-GAICD.yaml +++ b/data/custodian/EG-C-CAI-R-GAICD.yaml @@ -47,6 +47,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:16.120962+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.776237+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-CAI-R-NFICN.yaml b/data/custodian/EG-C-CAI-R-NFICN.yaml index b4ad2d09bc..51798a53db 100644 --- a/data/custodian/EG-C-CAI-R-NFICN.yaml +++ b/data/custodian/EG-C-CAI-R-NFICN.yaml @@ -123,6 +123,11 @@ location: region_code: C country: EG normalization_timestamp: '2025-12-09T13:32:16.161958+00:00' + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:53.057618+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP provenance: notes: - Canonical location normalized on 2025-12-09T12:10:56Z diff --git a/data/custodian/EG-C-NIL-A-NAE-national_archives_of_egypt.yaml b/data/custodian/EG-C-NIL-A-NAE-national_archives_of_egypt.yaml index 483d50d27b..f7b978894b 100644 --- a/data/custodian/EG-C-NIL-A-NAE-national_archives_of_egypt.yaml +++ b/data/custodian/EG-C-NIL-A-NAE-national_archives_of_egypt.yaml @@ -41,6 +41,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:16.182402+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.762919+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-C-NIL-A-NAE.yaml b/data/custodian/EG-C-NIL-A-NAE.yaml index 7d7de68413..4252be55ec 100644 --- a/data/custodian/EG-C-NIL-A-NAE.yaml +++ b/data/custodian/EG-C-NIL-A-NAE.yaml @@ -41,6 +41,10 @@ location: country: EG geonames_id: 360630 normalization_timestamp: '2025-12-09T13:32:16.198399+00:00' + latitude: 30.06263 + longitude: 31.24967 + geocoding_timestamp: '2025-12-09T20:52:01.617483+00:00' + geocoding_method: GEONAMES_ID_LOOKUP provenance: notes: - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:12:51Z diff --git a/data/custodian/EG-CA-CAI-D-PAG.yaml b/data/custodian/EG-CA-CAI-D-PAG.yaml index 4dcfc0e0cf..02d8e36dff 100644 --- a/data/custodian/EG-CA-CAI-D-PAG.yaml +++ b/data/custodian/EG-CA-CAI-D-PAG.yaml @@ -52,13 +52,7 @@ custodian_name: source: palestinian_heritage_extraction confidence: 0.9 extraction_timestamp: '2025-12-06T18:54:54.701573+00:00' -description: "Palestinian Archives Gathering (تجمع الأرشيفات الفلسطينية) is a digital\ - \ umbrella \nplatform project led by Rasha Shaheen. The initiative is based at the\ - \ Egyptian Museum \nin Cairo, Egypt.\n\nThe project was featured in UNESCO and ICA\ - \ (International Council on Archives) \ndocumentation on global efforts to safeguard\ - \ and promote documentary heritage, \nparticularly focusing on Palestinian archival\ - \ materials.\n\nThe platform serves as an aggregation and coordination point for\ - \ Palestinian \narchival initiatives across the diaspora.\n" +description: "Palestinian Archives Gathering (تجمع الأرشيفات الفلسطينية) is a digital umbrella \nplatform project led by Rasha Shaheen. The initiative is based at the Egyptian Museum \nin Cairo, Egypt.\n\nThe project was featured in UNESCO and ICA (International Council on Archives) \ndocumentation on global efforts to safeguard and promote documentary heritage, \nparticularly focusing on Palestinian archival materials.\n\nThe platform serves as an aggregation and coordination point for Palestinian \narchival initiatives across the diaspora.\n" founder: Rasha Shaheen ghcid: ghcid_current: EG-CA-CAI-D-PAG @@ -71,8 +65,7 @@ ghcid: - ghcid: EG-CA-CAI-D-PAG valid_from: '2025-12-07T01:40:00.000000+00:00' ghcid_numeric: 9711908441858038101 - reason: Location corrected to Egyptian Museum Cairo based on UNESCO/ICA documentation - and Google Maps verification + reason: Location corrected to Egyptian Museum Cairo based on UNESCO/ICA documentation and Google Maps verification - ghcid: PS-WE-RAM-D-PAGW valid_from: '2025-12-06T19:07:04.674545+00:00' ghcid_numeric: 12132199637978317992 @@ -81,3 +74,8 @@ location: city: Cairo country: EG normalization_timestamp: '2025-12-09T10:54:41.565987+00:00' + latitude: 30.06263 + longitude: 31.24967 + geonames_id: 360630 + geocoding_timestamp: '2025-12-09T20:54:54.808148+00:00' + geocoding_method: EG_CITY_CODE_LOOKUP diff --git a/scripts/geocode_eg_from_city_code.py b/scripts/geocode_eg_from_city_code.py new file mode 100755 index 0000000000..e5d4ab58c6 --- /dev/null +++ b/scripts/geocode_eg_from_city_code.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +""" +Add lat/lon coordinates to Egyptian custodian files based on city codes. + +City code mapping for Egypt: +- CAI = Cairo +- ALX = Alexandria +- ASS = Assiut +- NIL = Nile (various locations, use Cairo as proxy) +- GIZ = Giza +- LUX = Luxor +""" + +import sqlite3 +from pathlib import Path +from datetime import datetime, timezone +from ruamel.yaml import YAML + +GEONAMES_DB = Path("data/reference/geonames.db") +CUSTODIAN_DIR = Path("data/custodian") + +yaml = YAML() +yaml.preserve_quotes = True +yaml.width = 4096 + +# Egypt city code mappings (city_code -> GeoNames city name) +EG_CITY_MAPPING = { + 'CAI': 'Cairo', + 'ALX': 'Alexandria', + 'ASS': 'Assiut', # Assiut city + 'NIL': 'Cairo', # Default to Cairo for Nile-related + 'GIZ': 'Giza', + 'LUX': 'Luxor', + 'ASW': 'Aswan', + 'POR': 'Port Said', + 'SUE': 'Suez', + 'MAN': 'Mansoura', + 'TAN': 'Tanta', + 'ISM': 'Ismailia', +} + + +def get_coords_for_city(conn: sqlite3.Connection, city_name: str, country_code: str = 'EG') -> tuple[float, float, int] | None: + """Get lat/lon and geonames_id for a city.""" + cursor = conn.execute( + """SELECT latitude, longitude, geonames_id + FROM cities + WHERE country_code = ? + AND (name = ? OR ascii_name = ?) + AND feature_code IN ('PPL', 'PPLA', 'PPLA2', 'PPLA3', 'PPLC') + ORDER BY population DESC + LIMIT 1""", + (country_code, city_name, city_name) + ) + row = cursor.fetchone() + if row: + return row[0], row[1], row[2] + return None + + +def process_file(filepath: Path, conn: sqlite3.Connection) -> bool: + """Process a single custodian file. Returns True if updated.""" + with open(filepath, 'r', encoding='utf-8') as f: + data = yaml.load(f) + + if not data: + return False + + # Check if already has coordinates + location = data.get('location', {}) + if location.get('latitude') and location.get('longitude'): + return False + + # Get city code from location_resolution or GHCID + city_code = None + + ghcid = data.get('ghcid', {}) + loc_res = ghcid.get('location_resolution', {}) + if loc_res.get('city_code'): + city_code = loc_res['city_code'] + + # Also try to extract from ghcid_current (e.g., EG-C-CAI-L-...) + if not city_code and ghcid.get('ghcid_current'): + parts = ghcid['ghcid_current'].split('-') + if len(parts) >= 3 and parts[0] == 'EG': + city_code = parts[2] + + if not city_code: + print(f" No city code found: {filepath.name}") + return False + + # Map city code to city name + city_name = EG_CITY_MAPPING.get(city_code) + if not city_name: + print(f" Unknown city code {city_code}: {filepath.name}") + return False + + # Look up coordinates + result = get_coords_for_city(conn, city_name) + if not result: + print(f" City not found in GeoNames: {city_name} ({city_code}): {filepath.name}") + return False + + lat, lon, geonames_id = result + + # Update location block + if 'location' not in data: + data['location'] = {} + + data['location']['city'] = city_name + data['location']['latitude'] = lat + data['location']['longitude'] = lon + data['location']['geonames_id'] = geonames_id + data['location']['geocoding_timestamp'] = datetime.now(timezone.utc).isoformat() + data['location']['geocoding_method'] = 'EG_CITY_CODE_LOOKUP' + + # Write back + with open(filepath, 'w', encoding='utf-8') as f: + yaml.dump(data, f) + + return True + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Geocode Egyptian institutions by city code') + parser.add_argument('--dry-run', action='store_true', help='Show what would be done') + args = parser.parse_args() + + conn = sqlite3.connect(GEONAMES_DB) + + # Find EG files + files = list(CUSTODIAN_DIR.glob("EG-*.yaml")) + print(f"Found {len(files)} EG files") + + updated = 0 + skipped = 0 + + for filepath in files: + if not filepath.is_file(): + continue + + try: + with open(filepath, 'r', encoding='utf-8') as f: + data = yaml.load(f) + + if not data: + continue + + location = data.get('location', {}) + if location.get('latitude'): + skipped += 1 + continue + + if args.dry_run: + # Check what would happen + ghcid = data.get('ghcid', {}) + city_code = ghcid.get('location_resolution', {}).get('city_code') + if not city_code and ghcid.get('ghcid_current'): + parts = ghcid['ghcid_current'].split('-') + if len(parts) >= 3 and parts[0] == 'EG': + city_code = parts[2] + + if city_code and city_code in EG_CITY_MAPPING: + city_name = EG_CITY_MAPPING[city_code] + result = get_coords_for_city(conn, city_name) + if result: + print(f"Would update: {filepath.name} -> {city_name} ({result[0]}, {result[1]})") + updated += 1 + else: + print(f" City not in GeoNames: {city_name}") + else: + print(f" Unknown/no city code: {filepath.name} ({city_code})") + else: + if process_file(filepath, conn): + print(f"Updated: {filepath.name}") + updated += 1 + + except Exception as e: + print(f"Error: {filepath.name}: {e}") + + conn.close() + + print(f"\nSummary:") + print(f" Updated: {updated}") + print(f" Skipped (already has coords): {skipped}") + + +if __name__ == "__main__": + main() diff --git a/scripts/geocode_from_geonames_id.py b/scripts/geocode_from_geonames_id.py new file mode 100755 index 0000000000..f6e898477d --- /dev/null +++ b/scripts/geocode_from_geonames_id.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +""" +Add lat/lon coordinates to custodian files that have geonames_id but missing coordinates. + +Usage: + python scripts/geocode_from_geonames_id.py --country BG + python scripts/geocode_from_geonames_id.py --country EG + python scripts/geocode_from_geonames_id.py # Process all countries +""" + +import sqlite3 +import sys +from pathlib import Path +from datetime import datetime, timezone +from ruamel.yaml import YAML + +GEONAMES_DB = Path("data/reference/geonames.db") +CUSTODIAN_DIR = Path("data/custodian") + +yaml = YAML() +yaml.preserve_quotes = True +yaml.width = 4096 + +def get_coords_from_geonames(conn: sqlite3.Connection, geonames_id: int) -> tuple[float, float] | None: + """Get lat/lon for a GeoNames ID.""" + cursor = conn.execute( + "SELECT latitude, longitude FROM cities WHERE geonames_id = ?", + (geonames_id,) + ) + row = cursor.fetchone() + if row: + return row[0], row[1] + return None + + +def process_file(filepath: Path, conn: sqlite3.Connection) -> bool: + """Process a single custodian file. Returns True if updated.""" + with open(filepath, 'r', encoding='utf-8') as f: + data = yaml.load(f) + + if not data: + return False + + # Check if already has coordinates + location = data.get('location', {}) + if location.get('latitude') and location.get('longitude'): + return False # Already has coordinates + + # Try to find geonames_id in location or location_resolution + geonames_id = None + + # Check location block + if location.get('geonames_id'): + geonames_id = location['geonames_id'] + + # Check ghcid.location_resolution + if not geonames_id: + ghcid = data.get('ghcid', {}) + loc_res = ghcid.get('location_resolution', {}) + if loc_res.get('geonames_id'): + geonames_id = loc_res['geonames_id'] + + if not geonames_id: + return False + + # Look up coordinates + coords = get_coords_from_geonames(conn, geonames_id) + if not coords: + print(f" Warning: GeoNames ID {geonames_id} not found in DB for {filepath.name}") + return False + + lat, lon = coords + + # Update location block + if 'location' not in data: + data['location'] = {} + + data['location']['latitude'] = lat + data['location']['longitude'] = lon + data['location']['geocoding_timestamp'] = datetime.now(timezone.utc).isoformat() + data['location']['geocoding_method'] = 'GEONAMES_ID_LOOKUP' + + # Write back + with open(filepath, 'w', encoding='utf-8') as f: + yaml.dump(data, f) + + return True + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Add coordinates from GeoNames IDs') + parser.add_argument('--country', type=str, help='Country code to process (e.g., BG, EG)') + parser.add_argument('--dry-run', action='store_true', help='Show what would be done without making changes') + args = parser.parse_args() + + if not GEONAMES_DB.exists(): + print(f"Error: GeoNames DB not found at {GEONAMES_DB}") + sys.exit(1) + + conn = sqlite3.connect(GEONAMES_DB) + + # Find files to process + if args.country: + pattern = f"{args.country}-*.yaml" + else: + pattern = "*.yaml" + + files = list(CUSTODIAN_DIR.glob(pattern)) + print(f"Found {len(files)} files matching {pattern}") + + updated = 0 + skipped = 0 + no_geonames = 0 + + for filepath in files: + # Skip subdirectories + if not filepath.is_file(): + continue + + try: + if args.dry_run: + # Just check if it would be updated + with open(filepath, 'r', encoding='utf-8') as f: + data = yaml.load(f) + if data: + location = data.get('location', {}) + if not location.get('latitude'): + geonames_id = location.get('geonames_id') or data.get('ghcid', {}).get('location_resolution', {}).get('geonames_id') + if geonames_id: + coords = get_coords_from_geonames(conn, geonames_id) + if coords: + print(f"Would update: {filepath.name} -> ({coords[0]}, {coords[1]})") + updated += 1 + else: + no_geonames += 1 + else: + no_geonames += 1 + else: + skipped += 1 + else: + if process_file(filepath, conn): + print(f"Updated: {filepath.name}") + updated += 1 + else: + skipped += 1 + except Exception as e: + print(f"Error processing {filepath.name}: {e}") + + conn.close() + + print(f"\nSummary:") + print(f" Updated: {updated}") + print(f" Skipped (already has coords): {skipped}") + if no_geonames: + print(f" No GeoNames ID: {no_geonames}") + + +if __name__ == "__main__": + main()