diff --git a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json index a94cb3f9b3..5ea07895fd 100644 --- a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json +++ b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json @@ -7056,7 +7056,297 @@ "CZ-10-PRA-A-AUMAVESPZ.yaml", "CZ-10-PRA-A-AUPZSI.yaml", "CZ-10-PRA-A-AZMVP.yaml", - "CZ-10-PRA-A-BAKPR.yaml" + "CZ-10-PRA-A-BAKPR.yaml", + "JP-10-MAE-L-GPAL.yaml", + "JP-10-MAE-L-GPCSC.yaml", + "JP-10-MAE-L-GPL.yaml", + "JP-10-MAE-L-GPLES.yaml", + "JP-10-MAE-L-GUHWL.yaml", + "JP-10-MAE-L-LGPCHS.yaml", + "JP-10-MAE-L-LITCGU.yaml", + "JP-10-MAE-L-M.yaml", + "JP-10-MAE-L-MCL.yaml", + "JP-10-MAE-L-MKGCL.yaml", + "JP-10-MAE-L-ML-maebashikokadaigakufuzoku_library.yaml", + "JP-10-MAE-L-ML.yaml", + "JP-10-MAE-L-MLE.yaml", + "JP-10-MAE-L-MLF.yaml", + "JP-10-MAE-L-MLH.yaml", + "JP-10-MAE-L-MLJ.yaml", + "JP-10-MAE-L-MLK-maebashishiritsu_library_kaigayabunkan.yaml", + "JP-10-MAE-L-MLK-maebashishiritsu_library_kasukawabunkan.yaml", + "JP-10-MAE-L-MLK-maebashishiritsu_library_kiyosatobunkan.yaml", + "JP-10-MAE-L-MLK.yaml", + "JP-10-MAE-L-MLM-maebashishiritsu_library_miyagibunkan.yaml", + "JP-10-MAE-L-MLM.yaml", + "JP-10-MAE-L-MLN.yaml", + "JP-10-MAE-L-MLO.yaml", + "JP-10-MAE-L-MLS-maebashishiritsu_library_sogokyoikupurazabunkan.yaml", + "JP-10-MAE-L-MLS-maebashishiritsu_library_sojabunkan.yaml", + "JP-10-MAE-L-MLS.yaml", + "JP-10-MAE-L-NITGCL.yaml", + "JP-10-MAE-M-ABCFM.yaml", + "JP-10-MAE-M-AM.yaml", + "JP-10-MAE-M-GSMC.yaml", + "JP-10-MAE-M-MCCSCC.yaml", + "JP-10-MAE-M-MCML.yaml", + "JP-10-MAE-M-MCSYM.yaml", + "JP-10-MAE-M-MSKMH.yaml", + "JP-10-MAE-M-MSMH.yaml", + "JP-10-MID-L-KL.yaml", + "JP-10-MID-L-M.yaml", + "JP-10-MID-L-ML-midorishiritsuomama_library.yaml", + "JP-10-MID-L-ML.yaml", + "JP-10-MID-M-IM.yaml", + "JP-10-MID-M-LMCS.yaml", + "JP-10-MID-M-MCOM.yaml", + "JP-10-MOT-L-GL.yaml", + "JP-10-NUM-L-NCL.yaml", + "JP-10-NUM-M-NCHM.yaml", + "JP-10-NUM-M-U.yaml", + "JP-10-ORA-L-CL.yaml", + "JP-10-ORA-L-I.yaml", + "JP-10-ORA-L-MTL.yaml", + "JP-10-ORA-L-OL-orachoritsu_library.yaml", + "JP-10-ORA-L-OL.yaml", + "JP-10-ORA-L-TLIL.yaml", + "JP-10-ORA-M-OMERC.yaml", + "JP-10-OTA-L-AMLO.yaml", + "JP-10-OTA-L-GL.yaml", + "JP-10-OTA-L-KL.yaml", + "JP-10-OTA-L-OL-otashiritsunitta_library.yaml", + "JP-10-OTA-L-OL-otashiritsuojima_library.yaml", + "JP-10-OTA-L-OL-otashiritsuyabuzukahommachi_library.yaml", + "JP-10-OTA-L-OL.yaml", + "JP-10-OTA-M-AMLO.yaml", + "JP-10-OTA-M-EDTMJM.yaml", + "JP-10-OTA-M-GCSM.yaml", + "JP-10-OTA-M-JSI.yaml", + "JP-10-OTA-M-NHM.yaml", + "JP-10-OTA-M-THMM.yaml", + "JP-10-REN-M-TMA.yaml", + "JP-10-SAW-L-GPWSUL.yaml", + "JP-10-SAW-L-TTL.yaml", + "JP-10-SHI-L-S-shibukawashiakaborikominkantoshoshitsu.yaml", + "JP-10-SHI-L-S.yaml", + "JP-10-SHI-L-SCL.yaml", + "JP-10-SHI-L-SL.yaml", + "JP-10-SHI-M-HMA.yaml", + "JP-10-SHI-M-IAM.yaml", + "JP-10-SHI-M-SCAHM.yaml", + "JP-10-SHI-M-SCHHM.yaml", + "JP-10-SHI-M-SCMHKSM.yaml", + "JP-10-SHI-M-TRLM.yaml", + "CZ-10-PRA-A-BANUPKIB.yaml", + "CZ-10-PRA-A-BAVZ.yaml", + "CZ-10-PRA-A-EUACVV.yaml", + "CZ-10-PRA-A-MCPUMCOSA.yaml", + "CZ-10-PRA-A-MUAVCAUTGM.yaml", + "CZ-10-PRA-A-NA.yaml", + "CZ-10-PRA-A-NKPV.yaml", + "CZ-10-PRA-A-NMADTVS.yaml", + "CZ-10-PRA-A-NMAMDH.yaml", + "CZ-10-PRA-A-NTMAAS.yaml", + "CZ-10-PRA-A-NULK.yaml", + "CZ-10-PRA-A-OAZIMAM.yaml", + "CZ-10-PRA-A-SACTU.yaml", + "CZ-10-PRA-A-SACUZK.yaml", + "CZ-10-PRA-A-SAMCP.yaml", + "CZ-10-PRA-A-SANBU.yaml", + "CZ-10-PRA-A-SANUPKIB.yaml", + "CZ-10-PRA-A-SOAVP.yaml", + "CZ-10-PRA-A-UAZK.yaml", + "CZ-10-PRA-A-UDUKAUK.yaml", + "CZ-10-PRA-A-VACKOS.yaml", + "CZ-10-PRA-A-ZI.yaml", + "CZ-10-PRA-E-AGJSSPSJZS.yaml", + "CZ-10-PRA-E-DCKK.yaml", + "CZ-10-PRA-E-GPZK.yaml", + "CZ-10-PRA-E-JSHMPK.yaml", + "CZ-10-PRA-E-JVOSSPT.yaml", + "CZ-10-PRA-E-PKSK.yaml", + "CZ-10-PRA-E-SKICZSP.yaml", + "CZ-10-PRA-E-SPSJT.yaml", + "CZ-10-PRA-E-SPSSSHMPK.yaml", + "CZ-10-PRA-E-SZSK.yaml", + "CZ-10-PRA-E-UK.yaml", + "CZ-10-PRA-E-VOSCSRKS.yaml", + "CZ-10-PRA-E-VOSISSSEMI.yaml", + "CZ-10-PRA-E-VOSONSPSOP.yaml", + "CZ-10-PRA-E-VOSSOSPGEK.yaml", + "CZ-10-PRA-E-VOSSPSDK.yaml", + "CZ-10-PRA-E-VOSSSPSSSS.yaml", + "CZ-10-PRA-E-VOSTRSUSTR.yaml", + "CZ-10-PRA-E-VOSUSUS.yaml", + "CZ-10-PRA-E-VOSZSZSVPA.yaml", + "CZ-10-PRA-E-ZKSKSC.yaml", + "CZ-10-PRA-E-ZSHSK.yaml", + "CZ-10-PRA-G-CFGR.yaml", + "CZ-10-PRA-G-GHMPC.yaml", + "CZ-10-PRA-G-NGVPK.yaml", + "CZ-10-PRA-H-BASVSMK.yaml", + "CZ-10-PRA-H-BDCSAP.yaml", + "CZ-10-PRA-H-BPK.yaml", + "CZ-10-PRA-H-CCHUURUAM.yaml", + "CZ-10-PRA-H-CKACKK.yaml", + "CZ-10-PRA-H-CPRSAFSTVP-ceska_provincie_radu_sv_augustina_farnost_sv_tomas.yaml", + "CZ-10-PRA-H-CPRSAFSTVP.yaml", + "CZ-10-PRA-H-ETSVOSTSK.yaml", + "CZ-10-PRA-H-HITS.yaml", + "CZ-10-PRA-H-KDSDMSJK.yaml", + "CZ-10-PRA-H-KKPSSK.yaml", + "CZ-10-PRA-H-MBTSK.yaml", + "CZ-10-PRA-H-PBFK.yaml", + "CZ-10-PRA-H-ZOVPK.yaml", + "CZ-10-PRA-L-A-aritma.yaml", + "CZ-10-PRA-L-A.yaml", + "CZ-10-PRA-L-AAVSK.yaml", + "CZ-10-PRA-L-AC.yaml", + "CZ-10-PRA-L-ACRSRK.yaml", + "CZ-10-PRA-L-ACSRK.yaml", + "CZ-10-PRA-L-ACVUK.yaml", + "CZ-10-PRA-L-AHS.yaml", + "CZ-10-PRA-L-AHUV.yaml", + "CZ-10-PRA-L-AMSK.yaml", + "CZ-10-PRA-L-AMUVPKA.yaml", + "CZ-10-PRA-L-ANAC-academia_nakladatelstvi_av_cr.yaml", + "CZ-10-PRA-L-ANAC.yaml", + "CZ-10-PRA-L-AP.yaml", + "CZ-10-PRA-L-APS.yaml", + "CZ-10-PRA-L-APSR.yaml", + "CZ-10-PRA-L-APSRSPKDTK.yaml", + "CZ-10-PRA-L-AS.yaml", + "CZ-10-PRA-L-ASPK.yaml", + "CZ-10-PRA-L-ASR-acidotechna_sro.yaml", + "CZ-10-PRA-L-ASR.yaml", + "CZ-10-PRA-L-ATPSK.yaml", + "CZ-10-PRA-L-AUACPVVK.yaml", + "CZ-10-PRA-L-AVCAUPP-akademie_ved_cr_astronomicky_ustav_pobocka_praha.yaml", + "CZ-10-PRA-L-AVCAUPP.yaml", + "CZ-10-PRA-L-AVCEI.yaml", + "CZ-10-PRA-L-AVCFU-akademie_ved_cr_farmakologicky_ustav.yaml", + "CZ-10-PRA-L-AVCFU.yaml", + "CZ-10-PRA-L-AVCLEB.yaml", + "CZ-10-PRA-L-AVCMU-akademie_ved_cr_mikrobiologicky_ustav.yaml", + "CZ-10-PRA-L-AVCMU.yaml", + "CZ-10-PRA-L-AVCPLS-akademie_ved_cr_patentove_a_licencni_sluzby.yaml", + "CZ-10-PRA-L-AVCPLS.yaml", + "CZ-10-PRA-L-AVCPU.yaml", + "CZ-10-PRA-L-AVCPUJK.yaml", + "CZ-10-PRA-L-AVCUAC.yaml", + "CZ-10-PRA-L-AVCUFM.yaml", + "CZ-10-PRA-L-AVCUFR.yaml", + "CZ-10-PRA-L-AVCUMG-akademie_ved_cr_ustav_molekularni_genetiky.yaml", + "CZ-10-PRA-L-AVCUMG.yaml", + "CZ-10-PRA-L-AVCUPCSLON.yaml", + "CZ-10-PRA-L-AVCUPEK-akademie_ved_cr_ustav_pro_elektrotechniku_knihovna.yaml", + "CZ-10-PRA-L-AVCUPEK.yaml", + "CZ-10-PRA-L-AVCVD-akademie_ved_cr_vyvojove_dilny.yaml", + "CZ-10-PRA-L-AVCVD.yaml", + "CZ-10-PRA-L-AVSSK.yaml", + "CZ-10-PRA-L-AVUVPK.yaml", + "CZ-10-PRA-L-AZN.yaml", + "CZ-10-PRA-L-B.yaml", + "CZ-10-PRA-L-BL.yaml", + "CZ-10-PRA-L-BLZ.yaml", + "CZ-10-PRA-L-BMSSR.yaml", + "CZ-10-PRA-L-BOPS.yaml", + "CZ-10-PRA-L-BPSRPNM.yaml", + "CZ-10-PRA-L-BS.yaml", + "CZ-10-PRA-L-BTS.yaml", + "CZ-10-PRA-L-BVCRSR.yaml", + "CZ-10-PRA-L-BZHMPK.yaml", + "CZ-10-PRA-L-BZVPM.yaml", + "CZ-10-PRA-L-C.yaml", + "CZ-10-PRA-L-CA.yaml", + "CZ-10-PRA-L-CAPOC.yaml", + "CZ-10-PRA-L-CAS.yaml", + "CZ-10-PRA-L-CBSK.yaml", + "CZ-10-PRA-L-CBSPAC.yaml", + "CZ-10-PRA-L-CBUUER.yaml", + "CZ-10-PRA-L-CCS.yaml", + "CZ-10-PRA-L-CDOPES.yaml", + "CZ-10-PRA-L-CDS.yaml", + "CZ-10-PRA-L-CDSCLS.yaml", + "CZ-10-PRA-L-CDSCTS.yaml", + "CZ-10-PRA-L-CDSS.yaml", + "CZ-10-PRA-L-CDSVZPOKV.yaml", + "CZ-10-PRA-L-CDZUKCD.yaml", + "CZ-10-PRA-L-CEKJK.yaml", + "CZ-10-PRA-L-CES.yaml", + "CZ-10-PRA-L-CEU.yaml", + "CZ-10-PRA-L-CEUEML.yaml", + "CZ-10-PRA-L-CFU.yaml", + "CZ-10-PRA-L-CFUPVVSV.yaml", + "CZ-10-PRA-L-CGSK.yaml", + "CZ-10-PRA-L-CGUMS.yaml", + "CZ-10-PRA-L-CHVDISK.yaml", + "CZ-10-PRA-L-CKCJ.yaml", + "CZ-10-PRA-L-CKP.yaml", + "CZ-10-PRA-L-CKPAE.yaml", + "CZ-10-PRA-L-CKPPVH.yaml", + "CZ-10-PRA-L-CKS.yaml", + "CZ-10-PRA-L-CKTATZP.yaml", + "CZ-10-PRA-L-CLEO.yaml", + "CZ-10-PRA-L-CLPRSR.yaml", + "CZ-10-PRA-L-CLS.yaml", + "CZ-10-PRA-L-CLVNC.yaml", + "CZ-10-PRA-L-CLVVZ.yaml", + "CZ-10-PRA-L-CMJZS.yaml", + "CZ-10-PRA-L-CNES.yaml", + "CZ-10-PRA-L-CNPS.yaml", + "CZ-10-PRA-L-COL.yaml", + "CZ-10-PRA-L-COPK.yaml", + "CZ-10-PRA-L-COTS.yaml", + "CZ-10-PRA-L-CP.yaml", + "CZ-10-PRA-L-CPDSPPS.yaml", + "CZ-10-PRA-L-CPP.yaml", + "CZ-10-PRA-L-CPRPPP.yaml", + "CZ-10-PRA-L-CPS-ceska_pojistovna_as.yaml", + "CZ-10-PRA-L-CPS.yaml", + "CZ-10-PRA-L-CPSPOZSC.yaml", + "CZ-10-PRA-L-CPSR.yaml", + "CZ-10-PRA-L-CPSVSVV.yaml", + "CZ-10-PRA-L-CPU.yaml", + "CZ-10-PRA-L-CRS.yaml", + "CZ-10-PRA-L-CS-cedok_as.yaml", + "CZ-10-PRA-L-CS-ceskoslovensky_spisovatel.yaml", + "CZ-10-PRA-L-CS-cetos_as.yaml", + "CZ-10-PRA-L-CS-chemapol_as.yaml", + "CZ-10-PRA-L-CS-cokoladovny_as.yaml", + "CZ-10-PRA-L-CS.yaml", + "CZ-10-PRA-L-CSBZSUV-cesky_svaz_bojovniku_za_svobodu_ustredni_vybor.yaml", + "CZ-10-PRA-L-CSBZSUV.yaml", + "JP-10-SHI-M-TYIM.yaml", + "JP-10-TAK-F-KS.yaml", + "JP-10-TAK-L-GPCL.yaml", + "JP-10-TAK-L-IUIJCL.yaml", + "JP-10-TAK-L-JLB.yaml", + "JP-10-TAK-L-NGJCL.yaml", + "JP-10-TAK-L-T-takasakikenkofukushidaigakuyakugakubutoshoshiryosh.yaml", + "JP-10-TAK-L-T.yaml", + "JP-10-TAK-L-TCUEL.yaml", + "JP-10-TAK-L-TL-takasakishiritsugumma_library.yaml", + "JP-10-TAK-L-TL-takasakishiritsuharuna_library.yaml", + "JP-10-TAK-L-TL-takasakishiritsumisato_library.yaml", + "JP-10-TAK-L-TL-takasakishiritsushimmachi_library.yaml", + "JP-10-TAK-L-TL-takasakishiritsuyamatanekinenyoshii_library.yaml", + "JP-10-TAK-L-TL-takasakishokadaigaku_library.yaml", + "JP-10-TAK-L-TL.yaml", + "JP-10-TAK-L-TLB.yaml", + "JP-10-TAK-L-TUHWL.yaml", + "JP-10-TAK-M-KMA.yaml", + "JP-10-TAK-M-MMAG.yaml", + "JP-10-TAK-M-NSC.yaml", + "JP-10-TAK-M-TCDPBG.yaml", + "JP-10-TAK-M-TCFHM.yaml", + "JP-10-TAK-M-TSHFHM.yaml", + "JP-10-TAK-M-TTMA.yaml", + "JP-10-TAK-M-TYSC.yaml", + "JP-10-TAK-M-YKMAT.yaml", + "JP-10-TAK-M-YMH.yaml", + "JP-10-TAK-M-YMLMH.yaml", + "JP-10-TAM-M-THM.yaml" ], - "last_index": 39 + "last_index": 29 } \ No newline at end of file diff --git a/data/custodian/CZ-10-PRA-A-MUAVCAUTGM.yaml b/data/custodian/CZ-10-PRA-A-MUAVCAUTGM.yaml index aa39e6eb20..4130a2db58 100644 --- a/data/custodian/CZ-10-PRA-A-MUAVCAUTGM.yaml +++ b/data/custodian/CZ-10-PRA-A-MUAVCAUTGM.yaml @@ -42,7 +42,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-10-PRA-A-MÚAVČAÚTGM valid_from: '2025-12-07T12:38:22.566656+00:00' - reason: 'Location resolved from institution name pattern: ''Akademie věd'' → region 10, city PRA' + reason: 'Location resolved from institution name pattern: ''Akademie věd'' → region + 10, city PRA' custodian_name: claim_type: custodian_name claim_value: Masarykův ústav Akademie věd ČR - Archiv Ústavu T.G.M @@ -68,8 +69,8 @@ provenance: confidence_score: 0.85 notes: - 'Country resolved 2025-12-06T23:56:12Z: XX→CZ via Wikidata P17' - - 'YouTube/Google Maps enrichment 2025-12-08T19:47:28Z: Maps: Masarykův ústav a Archiv AV ČR (conf: 0.95); YouTube: not - found' + - 'YouTube/Google Maps enrichment 2025-12-08T19:47:28Z: Maps: Masarykův ústav a + Archiv AV ČR (conf: 0.95); YouTube: not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z @@ -151,7 +152,8 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/111969964317962148590/reviews rating: 5 relative_time_description: 3 years ago - text: Incredible work by the people behind it all. So many well-preserved books. It's great...👍 + text: Incredible work by the people behind it all. So many well-preserved books. + It's great...👍 publish_time: '2022-08-18T17:08:05.122982Z' - author_name: Josef Prajzler author_uri: https://www.google.com/maps/contrib/104183040336437489284/reviews @@ -206,15 +208,19 @@ google_maps_enrichment: is_match: true confidence: 0.95 entity_type: GRP.HER - reasoning: 'NAME MATCH: The names are a very close match. The source ''Masarykův ústav Akademie věd ČR - Archiv Ústavu - T.G.M'' is an expanded form of the candidate ''Masarykův ústav a Archiv AV ČR''. ''Akademie věd ČR'' (Academy of Sciences - of the Czech Republic) is correctly abbreviated as ''AV ČR''. The source name specifies the archive within the institute, - which is consistent with the candidate''s name. LOCATION MATCH: The address in Praha (Prague), Czechia matches the country - code ''CZ''. TYPE MATCH: Although the Google Place types (''point_of_interest'', ''establishment'') are generic, the - source name explicitly contains ''Archiv'' (Archive). The confirmed website (mua.cas.cz) is for the Masaryk Institute - and Archives of the CAS, which describes itself as an archive and research institution. This confirms its function as - a heritage institution. ENTITY TYPE: Based on the explicit name ''Archive'' and the institution''s description as a - research archive, it is a confirmed GRP.HER.' + reasoning: 'NAME MATCH: The names are a very close match. The source ''Masarykův + ústav Akademie věd ČR - Archiv Ústavu T.G.M'' is an expanded form of the candidate + ''Masarykův ústav a Archiv AV ČR''. ''Akademie věd ČR'' (Academy of Sciences + of the Czech Republic) is correctly abbreviated as ''AV ČR''. The source name + specifies the archive within the institute, which is consistent with the candidate''s + name. LOCATION MATCH: The address in Praha (Prague), Czechia matches the country + code ''CZ''. TYPE MATCH: Although the Google Place types (''point_of_interest'', + ''establishment'') are generic, the source name explicitly contains ''Archiv'' + (Archive). The confirmed website (mua.cas.cz) is for the Masaryk Institute and + Archives of the CAS, which describes itself as an archive and research institution. + This confirms its function as a heritage institution. ENTITY TYPE: Based on + the explicit name ''Archive'' and the institution''s description as a research + archive, it is a confirmed GRP.HER.' agent: glm-4.6 verified: true ch_annotator_version: ch_annotator-v1_7_0 @@ -235,3 +241,28 @@ location: street_address: Gabčíkova 2362, Praha 8-Libeň formatted_address: 10, Gabčíkova 2362, 182 00 Praha 8-Libeň, Czechia normalization_timestamp: '2025-12-09T06:49:28.542096+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:57:42.654886+00:00' + source_url: http://www.mua.cas.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.mua.cas.cz/build/favicon/safari-pinned-tab.svg + source_url: http://www.mua.cas.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T20:57:42.654886+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://mua.greendot.cz/build/img/hp-hero.jpg + source_url: http://www.mua.cas.cz + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T20:57:42.654886+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 4 diff --git a/data/custodian/CZ-10-PRA-A-NA.yaml b/data/custodian/CZ-10-PRA-A-NA.yaml index d1427fca73..4d5c2b9dd2 100644 --- a/data/custodian/CZ-10-PRA-A-NA.yaml +++ b/data/custodian/CZ-10-PRA-A-NA.yaml @@ -73,8 +73,8 @@ provenance: confidence_score: 0.85 notes: - 'Country resolved 2025-12-06T23:54:39Z: XX→CZ via Wikidata P17' - - 'YouTube/Google Maps enrichment 2025-12-08T19:47:31Z: Maps: National Archives of the Czech Republic (conf: 1.00); YouTube: - not found' + - 'YouTube/Google Maps enrichment 2025-12-08T19:47:31Z: Maps: National Archives + of the Czech Republic (conf: 1.00); YouTube: not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z ch_annotator: @@ -227,8 +227,9 @@ wikidata_enrichment: description: archives of a country - id: Q7075 label: library - description: institution charged with the care of a collection of literary, musical, artistic, or reference materials, - such as books, manuscripts, recordings, or films + description: institution charged with the care of a collection of literary, + musical, artistic, or reference materials, such as books, manuscripts, recordings, + or films - id: Q27031009 label: public archive description: repository for official documents @@ -237,7 +238,8 @@ wikidata_enrichment: description: type of Czech research organization - id: Q2085381 label: publishing company - description: company that prints and distributes pressed goods or electronic media + description: company that prints and distributes pressed goods or electronic + media wikidata_instance_of: *id005 wikidata_location: country: &id007 @@ -300,8 +302,8 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/111303903784806922325/reviews rating: 5 relative_time_description: 5 years ago - text: Super welcoming and very helpful archivists (for visitors like myself who are new and struggle with Czech). Thanks - so much, keep up the great work. + text: Super welcoming and very helpful archivists (for visitors like myself who + are new and struggle with Czech). Thanks so much, keep up the great work. publish_time: '2020-09-22T12:20:41.750155Z' - author_name: Mały Oisior author_uri: https://www.google.com/maps/contrib/100968724745138285308/reviews @@ -319,23 +321,29 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/105362763666969692195/reviews rating: 1 relative_time_description: 3 weeks ago - text: The working hours of the employees of this office are ridiculous... Starting at 9, being gone by 1 pm... Paying - for something like this out of taxes is a joy... + text: The working hours of the employees of this office are ridiculous... Starting + at 9, being gone by 1 pm... Paying for something like this out of taxes is a + joy... publish_time: '2025-11-14T12:59:55.176030204Z' - author_name: David Veselík author_uri: https://www.google.com/maps/contrib/116735262197680877900/reviews rating: 3 relative_time_description: 5 months ago - text: Professional approach when searching for documents on the topic of the study. However, they themselves spoiled the - positive evaluation several times. Unfortunately, it happened to me several times that I received inaccurate information - from the research room staff when searching for information about the fund in the inventories. When I asked whether - there was a fund for police stations and it was accessible in the research room, I was told that it was not. When I - wrote directly to the relevant department, they sent it to me for viewing by email without any problems. It also happened - to me once that a younger research room employee in glasses, who hands over archival materials for the study, was sleeping - in his chair and I was very embarrassed to wake him up so that I wouldn't waste time waiting for him to hand me more - archival materials. It happened at the beginning of January. Other researchers noticed it too. That definitely doesn't - add to trust. I wouldn't expect something like that in an institution like the National Archives. It was clear that - the younger archivist didn't even find it inappropriate and he didn't even need an apology for it. + text: Professional approach when searching for documents on the topic of the study. + However, they themselves spoiled the positive evaluation several times. Unfortunately, + it happened to me several times that I received inaccurate information from + the research room staff when searching for information about the fund in the + inventories. When I asked whether there was a fund for police stations and it + was accessible in the research room, I was told that it was not. When I wrote + directly to the relevant department, they sent it to me for viewing by email + without any problems. It also happened to me once that a younger research room + employee in glasses, who hands over archival materials for the study, was sleeping + in his chair and I was very embarrassed to wake him up so that I wouldn't waste + time waiting for him to hand me more archival materials. It happened at the + beginning of January. Other researchers noticed it too. That definitely doesn't + add to trust. I wouldn't expect something like that in an institution like the + National Archives. It was clear that the younger archivist didn't even find + it inappropriate and he didn't even need an apology for it. publish_time: '2025-06-27T19:06:48.934404281Z' opening_hours: open_now: false @@ -369,10 +377,12 @@ google_maps_enrichment: is_match: true confidence: 1.0 entity_type: GRP.HER - reasoning: 'Perfect match. 1. Name is a direct translation: ''Národní archiv'' (Czech) and ''National Archives of the - Czech Republic'' (English). 2. Location matches: The address is in Prague, Czechia, which is consistent with the source''s - country code ''CZ''. 3. Type matches: The name ''National Archives'' and website confirm it is an archive, a type of - heritage institution. 4. Entity type is correct.' + reasoning: 'Perfect match. 1. Name is a direct translation: ''Národní archiv'' + (Czech) and ''National Archives of the Czech Republic'' (English). 2. Location + matches: The address is in Prague, Czechia, which is consistent with the source''s + country code ''CZ''. 3. Type matches: The name ''National Archives'' and website + confirm it is an archive, a type of heritage institution. 4. Entity type is + correct.' agent: glm-4.6 verified: true ch_annotator_version: ch_annotator-v1_7_0 @@ -393,3 +403,32 @@ location: street_address: Archivní 2257/4, Praha 4-Chodov formatted_address: Archivní 2257/4, 149 00 Praha 4-Chodov, Czechia normalization_timestamp: '2025-12-09T06:49:28.591723+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:57:48.334183+00:00' + source_url: http://www.nacr.cz/eindex.htm + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/logo_na_cs.png + source_url: http://www.nacr.cz/eindex.htm + css_selector: '#wrapper-navbar > header.header > div.header__main:nth-of-type(2) + > nav.navbar.navbar-expand-lg > div.navbar-mobile-top > div.navbar-brand > a + > img' + retrieved_on: '2025-12-23T20:57:48.334183+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Národní archiv + - claim_type: favicon_url + claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/favicon/apple-icon-180x180.png + source_url: http://www.nacr.cz/eindex.htm + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T20:57:48.334183+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 13 diff --git a/data/custodian/CZ-10-PRA-A-NTMAAS.yaml b/data/custodian/CZ-10-PRA-A-NTMAAS.yaml index be33912b02..dde1a082f9 100644 --- a/data/custodian/CZ-10-PRA-A-NTMAAS.yaml +++ b/data/custodian/CZ-10-PRA-A-NTMAAS.yaml @@ -36,7 +36,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-10-PRA-A-NTMAAS valid_from: '2025-12-07T12:39:42.486868+00:00' - reason: 'Location resolved from institution name pattern: ''Prague'' → region 10, city PRA' + reason: 'Location resolved from institution name pattern: ''Prague'' → region + 10, city PRA' custodian_name: claim_type: custodian_name claim_value: Národní technické muzeum - Archiv architektury a stavitelství @@ -62,8 +63,8 @@ provenance: confidence_score: 0.85 notes: - 'Country resolved 2025-12-06T23:56:13Z: XX→CZ via Wikidata P17' - - 'YouTube/Google Maps enrichment 2025-12-08T19:47:48Z: Maps: Centrum stavitelského dědictví NTM Plasy (conf: 0.90); YouTube: - not found' + - 'YouTube/Google Maps enrichment 2025-12-08T19:47:48Z: Maps: Centrum stavitelského + dědictví NTM Plasy (conf: 0.90); YouTube: not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z @@ -147,8 +148,8 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/111216860672505384775/reviews rating: 5 relative_time_description: 6 years ago - text: Interessting even for smaller children (5y). Also thanks to a playground inside. Worth to repeat the visit. One - floor dedicated to architect Kaplický. + text: Interessting even for smaller children (5y). Also thanks to a playground + inside. Worth to repeat the visit. One floor dedicated to architect Kaplický. publish_time: '2019-08-09T09:56:04.887896Z' - author_name: Pavel Rec (RudolfII) author_uri: https://www.google.com/maps/contrib/103709696563570701814/reviews @@ -196,18 +197,22 @@ google_maps_enrichment: is_match: true confidence: 0.9 entity_type: GRP.HER - reasoning: The candidate is 'Centrum stavitelského dědictví NTM Plasy', which translates to 'Center for Building Heritage - of the NTM Plasy'. 'NTM' is the standard Czech abbreviation for 'Národní technické muzeum' (National Technical Museum). - The Archiv architektury a stavitelství (Archive of Architecture and Construction) is a specialized department of this - institution. The candidate's name 'Centrum stavitelského dědictví' (Center for Building Heritage) is thematically and - institutionally aligned with the Archive of Architecture and Construction. The Google type 'museum' is a correct match - for a heritage institution. Both are in the Czech Republic. + reasoning: The candidate is 'Centrum stavitelského dědictví NTM Plasy', which + translates to 'Center for Building Heritage of the NTM Plasy'. 'NTM' is the + standard Czech abbreviation for 'Národní technické muzeum' (National Technical + Museum). The Archiv architektury a stavitelství (Archive of Architecture and + Construction) is a specialized department of this institution. The candidate's + name 'Centrum stavitelského dědictví' (Center for Building Heritage) is thematically + and institutionally aligned with the Archive of Architecture and Construction. + The Google type 'museum' is a correct match for a heritage institution. Both + are in the Czech Republic. agent: glm-4.6 verified: true ch_annotator_version: ch_annotator-v1_7_0 google_maps_status: SUCCESS youtube_status: NOT_FOUND -youtube_search_query: Národní technické muzeum - Archiv architektury a stavitelství official +youtube_search_query: Národní technické muzeum - Archiv architektury a stavitelství + official youtube_search_timestamp: '2025-12-08T19:47:48.608047+00:00' location: latitude: 49.9355172 @@ -222,3 +227,22 @@ location: street_address: Pivovarská 5, Plasy formatted_address: Pivovarská 5, 331 01 Plasy, Czechia normalization_timestamp: '2025-12-09T06:49:28.696784+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:58:03.034151+00:00' + source_url: http://muzeum-plasy.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.ntm.cz/file/30dc8e5fefba6ceba5d690d796c861ec/2220/favicon/NTM%20EN%20%C4%8Derven%C3%A1%20negativ.png + source_url: http://muzeum-plasy.cz + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T20:58:03.034151+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-10-PRA-A-NULK.yaml b/data/custodian/CZ-10-PRA-A-NULK.yaml index ed6a610bb8..6eecb1ee82 100644 --- a/data/custodian/CZ-10-PRA-A-NULK.yaml +++ b/data/custodian/CZ-10-PRA-A-NULK.yaml @@ -246,3 +246,22 @@ location: street_address: Zámek 672, Strážnice formatted_address: Zámek 672, 696 62 Strážnice, Czechia normalization_timestamp: '2025-12-09T06:49:28.743251+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:58:08.250911+00:00' + source_url: http://www.nulk.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nulk.cz/wp-content/uploads/2016/11/cropped-favicon-180x180.png + source_url: http://www.nulk.cz + css_selector: '[document] > html > head > link:nth-of-type(26)' + retrieved_on: '2025-12-23T20:58:08.250911+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-10-PRA-A-SACUZK.yaml b/data/custodian/CZ-10-PRA-A-SACUZK.yaml index b485b4582b..ef777b24ba 100644 --- a/data/custodian/CZ-10-PRA-A-SACUZK.yaml +++ b/data/custodian/CZ-10-PRA-A-SACUZK.yaml @@ -47,7 +47,8 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-10-PRA-A-SAČÚZK valid_from: '2025-12-07T12:54:27.628961+00:00' - reason: 'Region resolved via GeoNames research: XX→10, city: Prague (GeoNames ID: 3067696)' + reason: 'Region resolved via GeoNames research: XX→10, city: Prague (GeoNames + ID: 3067696)' custodian_name: claim_type: custodian_name claim_value: Správní archiv Českého úřadu zeměměřictví a katastru @@ -74,8 +75,8 @@ provenance: notes: - 'Country resolved 2025-12-06T23:56:11Z: XX→CZ via Wikidata P17' - 'Region resolved 2025-12-07: XX→CZ-10 (Prague) via GeoNames research' - - 'YouTube/Google Maps enrichment 2025-12-08T19:48:10Z: Maps: Český úřad zeměměřický a katastrální- Zeměměřická knihovna - (conf: 0.90); YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T19:48:10Z: Maps: Český úřad zeměměřický + a katastrální- Zeměměřická knihovna (conf: 0.90); YouTube: not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z @@ -256,11 +257,13 @@ google_maps_enrichment: is_match: true confidence: 0.9 entity_type: GRP.HER - reasoning: The Google Maps candidate 'Český úřad zeměměřický a katastrální- Zeměměřická knihovna' (Land Survey Office - - Surveying Library) is a library ('Zeměměřická knihovna'). The source institution is an administrative archive ('Správní - archiv') for the same parent body, the Czech Land Survey Office. The name of the parent institution matches, the location - in Prague matches, and the Google type 'library' is an expected type for a heritage institution (GRP.HER). The candidate - is the public-facing library of the same administrative body, making it a strong match. + reasoning: The Google Maps candidate 'Český úřad zeměměřický a katastrální- Zeměměřická + knihovna' (Land Survey Office - Surveying Library) is a library ('Zeměměřická + knihovna'). The source institution is an administrative archive ('Správní archiv') + for the same parent body, the Czech Land Survey Office. The name of the parent + institution matches, the location in Prague matches, and the Google type 'library' + is an expected type for a heritage institution (GRP.HER). The candidate is the + public-facing library of the same administrative body, making it a strong match. agent: glm-4.6 verified: true ch_annotator_version: ch_annotator-v1_7_0 @@ -268,3 +271,22 @@ google_maps_status: SUCCESS youtube_status: NOT_FOUND youtube_search_query: Správní archiv Českého úřadu zeměměřictví a katastru official youtube_search_timestamp: '2025-12-08T19:48:10.258212+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:58:18.366792+00:00' + source_url: http://www.cuzk.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.cuzk.cz/App_Themes/CUZK/favicon.png + source_url: http://www.cuzk.cz + css_selector: '#head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T20:58:18.366792+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-10-PRA-A-SAMCP.yaml b/data/custodian/CZ-10-PRA-A-SAMCP.yaml index 0afce94671..038162822f 100644 --- a/data/custodian/CZ-10-PRA-A-SAMCP.yaml +++ b/data/custodian/CZ-10-PRA-A-SAMCP.yaml @@ -70,7 +70,8 @@ provenance: confidence_score: 0.85 notes: - 'Country resolved 2025-12-06T23:56:12Z: XX→CZ via Wikidata P17' - - 'YouTube/Google Maps enrichment 2025-12-08T19:48:15Z: Maps: Prague City Archives (conf: 0.85); YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T19:48:15Z: Maps: Prague City Archives + (conf: 0.85); YouTube: not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z @@ -158,7 +159,8 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/109561705096115465097/reviews rating: 5 relative_time_description: a year ago - text: The building dates back to 1995, it's nice and very interesting. I recommend visiting the archive. + text: The building dates back to 1995, it's nice and very interesting. I recommend + visiting the archive. publish_time: '2024-04-21T15:15:20.793882Z' - author_name: Libor Šedivý author_uri: https://www.google.com/maps/contrib/112581391891260052369/reviews @@ -170,20 +172,25 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/102671748185462032746/reviews rating: 3 relative_time_description: 6 years ago - text: You need to communicate at least two weeks before the planned visit so that you don't lose out unnecessarily. You - won't get anything at first... In addition, you need to take into account that they will present you with 5 archival - units in one day, i.e. not 5 cartons, but only 5 folders, regardless of whether it is a bookmark with one sheet or a - package with hundreds of documents... The ambition to complete more extensive research requires much more time than - we are used to in other archives. So much luck, patience and research happiness! 🍀 + text: You need to communicate at least two weeks before the planned visit so that + you don't lose out unnecessarily. You won't get anything at first... In addition, + you need to take into account that they will present you with 5 archival units + in one day, i.e. not 5 cartons, but only 5 folders, regardless of whether it + is a bookmark with one sheet or a package with hundreds of documents... The + ambition to complete more extensive research requires much more time than we + are used to in other archives. So much luck, patience and research happiness! + 🍀 publish_time: '2019-12-01T07:46:56.111948Z' - author_name: Miroslav Havel author_uri: https://www.google.com/maps/contrib/109030248799737237070/reviews rating: 5 relative_time_description: 7 years ago - text: A modern archive building built in the 1990s, I was lucky enough to see the facilities. Willing and quirky workers, - the profession of archivist probably requires a certain amount of perspective, a sense of humor and self-irony. If you - come to a professional course, you can look forward not only to insightful information, but also to great comments and - glosses on current events. It's worth it. 😉 + text: A modern archive building built in the 1990s, I was lucky enough to see + the facilities. Willing and quirky workers, the profession of archivist probably + requires a certain amount of perspective, a sense of humor and self-irony. If + you come to a professional course, you can look forward not only to insightful + information, but also to great comments and glosses on current events. It's + worth it. 😉 publish_time: '2018-03-17T09:34:30.117Z' opening_hours: open_now: false @@ -217,16 +224,21 @@ google_maps_enrichment: is_match: true confidence: 0.85 entity_type: GRP.HER - reasoning: 'The source institution ''Správní archiv Městské části Praha 4'' (Administrative Archive of the Prague 4 City - District) is a clear match for the Google Maps candidate ''Prague City Archives''. The reasoning is as follows: 1) NAME - MATCH: The Google name is a translation and a more general description of the specific Czech name. The source name indicates - it is the administrative archive for a specific district of Prague, while the Google name refers to the main city archives, - but their identities can be confirmed via the shared website domain ''ahmp.cz'', which stands for Archiv Hlavního města - Prahy (Archives of the Capital City of Prague). This confirms the candidate is the correct parent institution. 2) LOCATION - MATCH: The source name specifies ''Praha 4'' and the Google Maps address is in ''Praha 4-Chodov'', a perfect location - match. 3) TYPE MATCH: Although the Google Place types (''point_of_interest'', ''establishment'') are generic, the institution - is unambiguously an ''archiv'', which is a heritage institution type. 4) ENTITY TYPE: An administrative archive is a - definitive example of a Heritage Institution (GRP.HER).' + reasoning: 'The source institution ''Správní archiv Městské části Praha 4'' (Administrative + Archive of the Prague 4 City District) is a clear match for the Google Maps + candidate ''Prague City Archives''. The reasoning is as follows: 1) NAME MATCH: + The Google name is a translation and a more general description of the specific + Czech name. The source name indicates it is the administrative archive for a + specific district of Prague, while the Google name refers to the main city archives, + but their identities can be confirmed via the shared website domain ''ahmp.cz'', + which stands for Archiv Hlavního města Prahy (Archives of the Capital City of + Prague). This confirms the candidate is the correct parent institution. 2) LOCATION + MATCH: The source name specifies ''Praha 4'' and the Google Maps address is + in ''Praha 4-Chodov'', a perfect location match. 3) TYPE MATCH: Although the + Google Place types (''point_of_interest'', ''establishment'') are generic, the + institution is unambiguously an ''archiv'', which is a heritage institution + type. 4) ENTITY TYPE: An administrative archive is a definitive example of a + Heritage Institution (GRP.HER).' agent: glm-4.6 verified: true ch_annotator_version: ch_annotator-v1_7_0 @@ -248,3 +260,22 @@ location: street_address: Archivní 1280/6, Praha 4-Chodov formatted_address: Archivní 1280/6, 149 00 Praha 4-Chodov, Czechia normalization_timestamp: '2025-12-09T06:49:28.915410+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:58:23.615923+00:00' + source_url: http://www.ahmp.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.ahmp.cz/img/ahmp_favicon.ico + source_url: http://www.ahmp.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T20:58:23.615923+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-10-PRA-A-SANUPKIB.yaml b/data/custodian/CZ-10-PRA-A-SANUPKIB.yaml index 766546a3de..2de372ced2 100644 --- a/data/custodian/CZ-10-PRA-A-SANUPKIB.yaml +++ b/data/custodian/CZ-10-PRA-A-SANUPKIB.yaml @@ -42,10 +42,12 @@ ghcid: reason: 'Country resolved via Wikidata P17: XX→CZ' - ghcid: CZ-10-PRA-A-SANÚPKIB valid_from: '2025-12-07T12:38:22.569556+00:00' - reason: 'Location resolved from institution name pattern: ''Národního úřadu'' → region 10, city PRA' + reason: 'Location resolved from institution name pattern: ''Národního úřadu'' + → region 10, city PRA' custodian_name: claim_type: custodian_name - claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp. + claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační + bezp. source_type: ch_annotator identifiers: - identifier_scheme: GHCID @@ -68,8 +70,8 @@ provenance: confidence_score: 0.85 notes: - 'Country resolved 2025-12-06T23:56:13Z: XX→CZ via Wikidata P17' - - 'YouTube/Google Maps enrichment 2025-12-08T19:48:25Z: Maps: National Archives of the Czech Republic (conf: 0.90); YouTube: - not found' + - 'YouTube/Google Maps enrichment 2025-12-08T19:48:25Z: Maps: National Archives + of the Czech Republic (conf: 0.90); YouTube: not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z @@ -104,7 +106,8 @@ ch_annotator: verified_by: null entity_claims: - claim_type: full_name - claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp. + claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační + bezp. property_uri: skos:prefLabel provenance: namespace: glam @@ -151,8 +154,8 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/111303903784806922325/reviews rating: 5 relative_time_description: 5 years ago - text: Super welcoming and very helpful archivists (for visitors like myself who are new and struggle with Czech). Thanks - so much, keep up the great work. + text: Super welcoming and very helpful archivists (for visitors like myself who + are new and struggle with Czech). Thanks so much, keep up the great work. publish_time: '2020-09-22T12:20:41.750155Z' - author_name: Mały Oisior author_uri: https://www.google.com/maps/contrib/100968724745138285308/reviews @@ -170,23 +173,29 @@ google_maps_enrichment: author_uri: https://www.google.com/maps/contrib/105362763666969692195/reviews rating: 1 relative_time_description: 3 weeks ago - text: The working hours of the employees of this office are ridiculous... Starting at 9, being gone by 1 pm... Paying - for something like this out of taxes is a joy... + text: The working hours of the employees of this office are ridiculous... Starting + at 9, being gone by 1 pm... Paying for something like this out of taxes is a + joy... publish_time: '2025-11-14T12:59:55.176030204Z' - author_name: David Veselík author_uri: https://www.google.com/maps/contrib/116735262197680877900/reviews rating: 3 relative_time_description: 5 months ago - text: Professional approach when searching for documents on the topic of the study. However, they themselves spoiled the - positive evaluation several times. Unfortunately, it happened to me several times that I received inaccurate information - from the research room staff when searching for information about the fund in the inventories. When I asked whether - there was a fund for police stations and it was accessible in the research room, I was told that it was not. When I - wrote directly to the relevant department, they sent it to me for viewing by email without any problems. It also happened - to me once that a younger research room employee in glasses, who hands over archival materials for the study, was sleeping - in his chair and I was very embarrassed to wake him up so that I wouldn't waste time waiting for him to hand me more - archival materials. It happened at the beginning of January. Other researchers noticed it too. That definitely doesn't - add to trust. I wouldn't expect something like that in an institution like the National Archives. It was clear that - the younger archivist didn't even find it inappropriate and he didn't even need an apology for it. + text: Professional approach when searching for documents on the topic of the study. + However, they themselves spoiled the positive evaluation several times. Unfortunately, + it happened to me several times that I received inaccurate information from + the research room staff when searching for information about the fund in the + inventories. When I asked whether there was a fund for police stations and it + was accessible in the research room, I was told that it was not. When I wrote + directly to the relevant department, they sent it to me for viewing by email + without any problems. It also happened to me once that a younger research room + employee in glasses, who hands over archival materials for the study, was sleeping + in his chair and I was very embarrassed to wake him up so that I wouldn't waste + time waiting for him to hand me more archival materials. It happened at the + beginning of January. Other researchers noticed it too. That definitely doesn't + add to trust. I wouldn't expect something like that in an institution like the + National Archives. It was clear that the younger archivist didn't even find + it inappropriate and he didn't even need an apology for it. publish_time: '2025-06-27T19:06:48.934404281Z' opening_hours: open_now: false @@ -220,18 +229,22 @@ google_maps_enrichment: is_match: true confidence: 0.9 entity_type: GRP.HER - reasoning: The Google Maps candidate is the National Archives of the Czech Republic, a clear translation of the source - institution's name 'Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp.', which is a specialized - department within the National Archives. The location in Praha (Prague), Czechia, matches the country code (CZ). The - website (nacr.cz) confirms its identity as a national archive. While the Google Place types ('point_of_interest', 'establishment') - are generic and do not explicitly include 'archive', all other evidence points to this being the correct heritage institution - (an archive). Therefore, it is a match for a Heritage Institution. + reasoning: The Google Maps candidate is the National Archives of the Czech Republic, + a clear translation of the source institution's name 'Specializovaný archiv + Národního úřadu pro kybernetickou a informační bezp.', which is a specialized + department within the National Archives. The location in Praha (Prague), Czechia, + matches the country code (CZ). The website (nacr.cz) confirms its identity as + a national archive. While the Google Place types ('point_of_interest', 'establishment') + are generic and do not explicitly include 'archive', all other evidence points + to this being the correct heritage institution (an archive). Therefore, it is + a match for a Heritage Institution. agent: glm-4.6 verified: true ch_annotator_version: ch_annotator-v1_7_0 google_maps_status: SUCCESS youtube_status: NOT_FOUND -youtube_search_query: Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp. official +youtube_search_query: Specializovaný archiv Národního úřadu pro kybernetickou a informační + bezp. official youtube_search_timestamp: '2025-12-08T19:48:25.873936+00:00' location: latitude: 50.0389097 @@ -246,3 +259,38 @@ location: street_address: Archivní 2257/4, Praha 4-Chodov formatted_address: Archivní 2257/4, 149 00 Praha 4-Chodov, Czechia normalization_timestamp: '2025-12-09T06:49:28.982890+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:58:30.883981+00:00' + source_url: http://www.nacr.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/logo_na_en.png + source_url: http://www.nacr.cz + css_selector: '#wrapper-navbar > header.header > div.header__main:nth-of-type(2) + > nav.navbar.navbar-expand-lg > div.navbar-mobile-top > div.navbar-brand > a + > img' + retrieved_on: '2025-12-23T20:58:30.883981+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: National Archives + - claim_type: favicon_url + claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/favicon/apple-icon-180x180.png + source_url: http://www.nacr.cz + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T20:58:30.883981+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.nacr.cz/wp-content/uploads/2020/04/podatelna2_22-1.jpg + source_url: http://www.nacr.cz + css_selector: '[document] > html > head > meta:nth-of-type(17)' + retrieved_on: '2025-12-23T20:58:30.883981+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 13 diff --git a/data/custodian/CZ-10-PRA-A-UAZK.yaml b/data/custodian/CZ-10-PRA-A-UAZK.yaml index 8b24c7a121..bb6e736dd1 100644 --- a/data/custodian/CZ-10-PRA-A-UAZK.yaml +++ b/data/custodian/CZ-10-PRA-A-UAZK.yaml @@ -34,7 +34,8 @@ ghcid: city_code: PRA method: WIKIDATA_LOCATION_RESEARCH resolution_timestamp: '2025-12-06T23:54:40.402432+00:00' - resolution_notes: Central Archive of Surveying and Cadastre in Prague, part of ČÚZK + resolution_notes: Central Archive of Surveying and Cadastre in Prague, part of + ČÚZK ghcid_history: - ghcid: CZ-10-PRA-A-UAZK ghcid_numeric: 5974277682822411938 @@ -78,7 +79,8 @@ provenance: confidence_score: 0.85 notes: - 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17' - - 'YouTube/Google Maps enrichment 2025-12-08T19:48:35Z: Maps: rejected by LLM; YouTube: not found' + - 'YouTube/Google Maps enrichment 2025-12-08T19:48:35Z: Maps: rejected by LLM; YouTube: + not found' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:29Z ch_annotator: @@ -108,8 +110,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Ústřední archiv zeměměřictví a katastru @@ -213,10 +215,12 @@ wikidata_enrichment: google_maps_status: NO_MATCH google_maps_rejected: candidate_name: Central Military Archives - rejection_reason: The Google Maps candidate, 'Central Military Archives', does not match the source institution, 'Ústřední - archiv zeměměřictví a katastru' (Central Archive of Surveying and Cadastre). Although both are archives in Prague, the - names and institutional focus are different. The Google candidate is military-focused, while the source is focused on - geodesy and land registry. Therefore, these are two different institutions. + rejection_reason: The Google Maps candidate, 'Central Military Archives', does not + match the source institution, 'Ústřední archiv zeměměřictví a katastru' (Central + Archive of Surveying and Cadastre). Although both are archives in Prague, the + names and institutional focus are different. The Google candidate is military-focused, + while the source is focused on geodesy and land registry. Therefore, these are + two different institutions. timestamp: '2025-12-08T19:48:35.122961+00:00' youtube_status: NOT_FOUND youtube_search_query: Ústřední archiv zeměměřictví a katastru official @@ -237,3 +241,22 @@ location: entity_id: 3067696 city_code: PRA original_timestamp: '2025-12-09T20:40:58.298985+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:58:41.770641+00:00' + source_url: https://www.cuzk.cz/Urady/Zememericky-urad/Dalsi-informace/UAZK/Prohlizeni-archivalii,-archivni-mapy.aspx + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.cuzk.cz/App_Themes/CUZK/favicon.png + source_url: https://www.cuzk.cz/Urady/Zememericky-urad/Dalsi-informace/UAZK/Prohlizeni-archivalii,-archivni-mapy.aspx + css_selector: '#head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T20:58:41.770641+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-10-PRA-A-UDUKAUK.yaml b/data/custodian/CZ-10-PRA-A-UDUKAUK.yaml index 1097447da8..01c386d795 100644 --- a/data/custodian/CZ-10-PRA-A-UDUKAUK.yaml +++ b/data/custodian/CZ-10-PRA-A-UDUKAUK.yaml @@ -251,3 +251,37 @@ location: original_timestamp: '2025-12-09T15:34:40.266185+00:00' geonames_name: Prague feature_code: PPLC +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:58:47.105716+00:00' + source_url: http://udauk.cuni.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://udauk.cuni.cz/ARCH-1-version1-afoto.jpg + source_url: http://udauk.cuni.cz + css_selector: '[document] > html > body > div.container-fluid.headerHolder > div.container.headerSubHolder + > div.row > header > div.col-md-7 > div.logo > a > img.logoSize.mobileLogoSize' + retrieved_on: '2025-12-23T20:58:47.105716+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Homepage - Ústav dějin Univerzity Karlovy a Archiv Univerzity Karlovy + - claim_type: favicon_url + claim_value: http://udauk.cuni.cz/newlayout/UK-favicon-32x32px.png + source_url: http://udauk.cuni.cz + css_selector: '[document] > html > head > link:nth-of-type(6)' + retrieved_on: '2025-12-23T20:58:47.105716+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://udauk.cuni.cz/newlayout/images/logosoc.png + source_url: http://udauk.cuni.cz + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T20:58:47.105716+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-10-MAE-M-AM.yaml b/data/custodian/JP-10-MAE-M-AM.yaml index cb955e0f66..40d0825d2e 100644 --- a/data/custodian/JP-10-MAE-M-AM.yaml +++ b/data/custodian/JP-10-MAE-M-AM.yaml @@ -259,3 +259,28 @@ wikidata_enrichment: - id: Q11424955 label: Sakakura Associates Architects and Engineers description: '' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:39:53.520689+00:00' + source_url: https://www.artsmaebashi.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://artsmaebashi.jp/cms/wp-content/themes/artsmaebashi/favicon.ico + source_url: https://www.artsmaebashi.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T20:39:53.520689+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.artsmaebashi.jp/cms/wp-content/themes/artsmaebashi/img/artsmaebashi_ogp_image.jpg + source_url: https://www.artsmaebashi.jp + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T20:39:53.520689+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-10-MID-L-KL.yaml b/data/custodian/JP-10-MID-L-KL.yaml index 41b1e33599..85295e3141 100644 --- a/data/custodian/JP-10-MID-L-KL.yaml +++ b/data/custodian/JP-10-MID-L-KL.yaml @@ -205,3 +205,22 @@ location: geonames_id: 6822155 geonames_name: Midori feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:40:42.012851+00:00' + source_url: http://www.kiryu-u.ac.jp/university/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kiryu-u.ac.jp/wp2024/wp-content/themes/design/favicon.ico + source_url: http://www.kiryu-u.ac.jp/university/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T20:40:42.012851+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-MID-L-M.yaml b/data/custodian/JP-10-MID-L-M.yaml index d01816ab47..8e4a4ac9fd 100644 --- a/data/custodian/JP-10-MID-L-M.yaml +++ b/data/custodian/JP-10-MID-L-M.yaml @@ -204,3 +204,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.midori.gunma.jp/library/ wikidata_official_website: http://www.city.midori.gunma.jp/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:40:50.168438+00:00' + source_url: http://www.city.midori.gunma.jp/library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T20:40:50.168438+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: みどり市公式サイト + - claim_type: favicon_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T20:40:50.168438+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T20:40:50.168438+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-MID-L-ML-midorishiritsuomama_library.yaml b/data/custodian/JP-10-MID-L-ML-midorishiritsuomama_library.yaml index a8f06e78c1..da5e0c4cc8 100644 --- a/data/custodian/JP-10-MID-L-ML-midorishiritsuomama_library.yaml +++ b/data/custodian/JP-10-MID-L-ML-midorishiritsuomama_library.yaml @@ -204,3 +204,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.midori.gunma.jp/library/ wikidata_official_website: http://www.city.midori.gunma.jp/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:40:56.212089+00:00' + source_url: http://www.city.midori.gunma.jp/library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T20:40:56.212089+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: みどり市公式サイト + - claim_type: favicon_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T20:40:56.212089+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T20:40:56.212089+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-MID-L-ML.yaml b/data/custodian/JP-10-MID-L-ML.yaml index 134c8b3ce5..af6a5bae90 100644 --- a/data/custodian/JP-10-MID-L-ML.yaml +++ b/data/custodian/JP-10-MID-L-ML.yaml @@ -204,3 +204,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.midori.gunma.jp/library/ wikidata_official_website: http://www.city.midori.gunma.jp/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:41:02.206429+00:00' + source_url: http://www.city.midori.gunma.jp/library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T20:41:02.206429+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: みどり市公式サイト + - claim_type: favicon_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T20:41:02.206429+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.midori.gunma.jp/library + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T20:41:02.206429+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-MID-M-IM.yaml b/data/custodian/JP-10-MID-M-IM.yaml index f638bf210a..577951fab2 100644 --- a/data/custodian/JP-10-MID-M-IM.yaml +++ b/data/custodian/JP-10-MID-M-IM.yaml @@ -226,3 +226,36 @@ location: geonames_id: 6822155 geonames_name: Midori feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:41:09.788813+00:00' + source_url: https://www.city.midori.gunma.jp/iwajuku + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/iwajuku/images/header/tlogo.png?a + source_url: https://www.city.midori.gunma.jp/iwajuku + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T20:41:09.788813+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 岩宿博物館 + - claim_type: favicon_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.midori.gunma.jp/iwajuku + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T20:41:09.788813+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.midori.gunma.jp/iwajuku + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T20:41:09.788813+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-MID-M-MCOM.yaml b/data/custodian/JP-10-MID-M-MCOM.yaml index d59ed96559..b6152ab8f6 100644 --- a/data/custodian/JP-10-MID-M-MCOM.yaml +++ b/data/custodian/JP-10-MID-M-MCOM.yaml @@ -247,3 +247,36 @@ location: postal_code: 376-0101 street_address: OMAMACHO OMAMA, Midori Shi, Gumma Ken, 376-0101 normalization_timestamp: '2025-12-09T06:54:37.040443+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:41:20.266954+00:00' + source_url: https://www.city.midori.gunma.jp/conodont + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/oomama/images/header/tlogo.png?a + source_url: https://www.city.midori.gunma.jp/conodont + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T20:41:20.266954+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: コノドント館 みどり市大間々博物館 + - claim_type: favicon_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.midori.gunma.jp/conodont + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T20:41:20.266954+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.midori.gunma.jp/conodont + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T20:41:20.266954+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-ORA-L-I.yaml b/data/custodian/JP-10-ORA-L-I.yaml index baa713563c..a858a4f861 100644 --- a/data/custodian/JP-10-ORA-L-I.yaml +++ b/data/custodian/JP-10-ORA-L-I.yaml @@ -201,3 +201,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html wikidata_official_website: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:42:53.398009+00:00' + source_url: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.itakura.gunma.jp/favicon2025.ico + source_url: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T20:42:53.398009+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-ORA-L-TLIL.yaml b/data/custodian/JP-10-ORA-L-TLIL.yaml index 888d09f1ee..9accc6b076 100644 --- a/data/custodian/JP-10-ORA-L-TLIL.yaml +++ b/data/custodian/JP-10-ORA-L-TLIL.yaml @@ -207,3 +207,37 @@ location: geonames_id: 13132763 geonames_name: Itakura feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:43:19.036274+00:00' + source_url: http://www.toyo.ac.jp/site/library/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.toyo.ac.jp/img/common/img_logo.svg + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '#gheader > div.gnav__frm > h1.gnav__logo > a.gnav__logo-link > + img.gnav__logo-img' + retrieved_on: '2025-12-23T20:43:19.036274+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 東洋大学 + - claim_type: favicon_url + claim_value: http://www.toyo.ac.jp/img/common/favicon.ico + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '[document] > html.js_domload.js_imgload > head > link:nth-of-type(6)' + retrieved_on: '2025-12-23T20:43:19.036274+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.toyo.ac.jp/site/library/ogp.jpg + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '[document] > html.js_domload.js_imgload > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T20:43:19.036274+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-10-OTA-L-KL.yaml b/data/custodian/JP-10-OTA-L-KL.yaml index a8501809ac..b137971767 100644 --- a/data/custodian/JP-10-OTA-L-KL.yaml +++ b/data/custodian/JP-10-OTA-L-KL.yaml @@ -216,3 +216,22 @@ location: geonames_id: 8469289 geonames_name: Ōta feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:43:50.781940+00:00' + source_url: https://opac.kanto-gakuen.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://opac.kanto-gakuen.ac.jp/favicon.ico + source_url: https://opac.kanto-gakuen.ac.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T20:43:50.781940+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-OTA-L-OL-otashiritsunitta_library.yaml b/data/custodian/JP-10-OTA-L-OL-otashiritsunitta_library.yaml index 2a45bfad34..de5d7cf0f7 100644 --- a/data/custodian/JP-10-OTA-L-OL-otashiritsunitta_library.yaml +++ b/data/custodian/JP-10-OTA-L-OL-otashiritsunitta_library.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www2.lib.ota.gunma.jp/ wikidata_official_website: http://www2.lib.ota.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:44:00.439944+00:00' + source_url: http://www2.lib.ota.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T20:44:00.439944+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T20:44:00.439944+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-OTA-L-OL-otashiritsuojima_library.yaml b/data/custodian/JP-10-OTA-L-OL-otashiritsuojima_library.yaml index 7e1f126722..311898d986 100644 --- a/data/custodian/JP-10-OTA-L-OL-otashiritsuojima_library.yaml +++ b/data/custodian/JP-10-OTA-L-OL-otashiritsuojima_library.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www2.lib.ota.gunma.jp/ wikidata_official_website: http://www2.lib.ota.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:44:11.876905+00:00' + source_url: http://www2.lib.ota.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T20:44:11.876905+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T20:44:11.876905+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-OTA-L-OL-otashiritsuyabuzukahommachi_library.yaml b/data/custodian/JP-10-OTA-L-OL-otashiritsuyabuzukahommachi_library.yaml index 3473c78563..ed4559c963 100644 --- a/data/custodian/JP-10-OTA-L-OL-otashiritsuyabuzukahommachi_library.yaml +++ b/data/custodian/JP-10-OTA-L-OL-otashiritsuyabuzukahommachi_library.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www2.lib.ota.gunma.jp/ wikidata_official_website: http://www2.lib.ota.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:44:25.192738+00:00' + source_url: http://www2.lib.ota.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T20:44:25.192738+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T20:44:25.192738+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-OTA-L-OL.yaml b/data/custodian/JP-10-OTA-L-OL.yaml index 0d0cdd53ec..2ed832e1ff 100644 --- a/data/custodian/JP-10-OTA-L-OL.yaml +++ b/data/custodian/JP-10-OTA-L-OL.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www2.lib.ota.gunma.jp/ wikidata_official_website: http://www2.lib.ota.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:44:37.130831+00:00' + source_url: http://www2.lib.ota.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T20:44:37.130831+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png + source_url: http://www2.lib.ota.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T20:44:37.130831+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-OTA-M-JSI.yaml b/data/custodian/JP-10-OTA-M-JSI.yaml index ac45ef4bcf..d06530bf97 100644 --- a/data/custodian/JP-10-OTA-M-JSI.yaml +++ b/data/custodian/JP-10-OTA-M-JSI.yaml @@ -245,3 +245,22 @@ location: postal_code: 379-2301 street_address: YABUZUKACHO, Ota Shi, Gumma Ken, 379-2301 normalization_timestamp: '2025-12-09T06:54:37.451277+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:44:59.775139+00:00' + source_url: https://www.snake-center.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.wixstatic.com/media/e0add7_2ee341a95e2742109c6d64a0062cf331%7Emv2.png/v1/fill/w_180%2Ch_180%2Clg_1%2Cusm_0.66_1.00_0.01/e0add7_2ee341a95e2742109c6d64a0062cf331%7Emv2.png + source_url: https://www.snake-center.com + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T20:44:59.775139+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-10-REN-M-TMA.yaml b/data/custodian/JP-10-REN-M-TMA.yaml index 498ba05058..3ea473bcd0 100644 --- a/data/custodian/JP-10-REN-M-TMA.yaml +++ b/data/custodian/JP-10-REN-M-TMA.yaml @@ -384,3 +384,22 @@ location: geonames_id: 1853412 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:31.061825+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:45:13.968373+00:00' + source_url: https://www.city.takasaki.gunma.jp/docs/2014011000353 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://www.city.takasaki.gunma.jp/docs/2014011000353 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T20:45:13.968373+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-10-SAW-L-GPWSUL.yaml b/data/custodian/JP-10-SAW-L-GPWSUL.yaml index d57ec0638b..3d1a738356 100644 --- a/data/custodian/JP-10-SAW-L-GPWSUL.yaml +++ b/data/custodian/JP-10-SAW-L-GPWSUL.yaml @@ -213,3 +213,23 @@ location: geonames_id: 1850746 geonames_name: Tamamura feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:45:22.518565+00:00' + source_url: http://www.gpwu.ac.jp/org/lib + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.gpwu.ac.jp/assets/images/apple-touch-icon.png + source_url: http://www.gpwu.ac.jp/org/lib + css_selector: '[document] > html.font_change_normal.bg_change_normal > head > + link:nth-of-type(10)' + retrieved_on: '2025-12-23T20:45:22.518565+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-10-SHI-M-HMA.yaml b/data/custodian/JP-10-SHI-M-HMA.yaml index 2204118701..de23850555 100644 --- a/data/custodian/JP-10-SHI-M-HMA.yaml +++ b/data/custodian/JP-10-SHI-M-HMA.yaml @@ -246,3 +246,28 @@ wikidata_enrichment: - id: Q317135 label: Arata Isozaki description: Japanese architect (1931-2022) +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:46:06.337035+00:00' + source_url: https://www.haramuseum.or.jp/jp/arc + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.haramuseum.or.jp/jp/arc/wp-content/themes/haramuseum/apple-touch-icon.png + source_url: https://www.haramuseum.or.jp/jp/arc + css_selector: '[document] > html.win.chrome > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T20:46:06.337035+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.haramuseum.or.jp/jp/arc/wp-content/themes/haramuseum/ogp.png + source_url: https://www.haramuseum.or.jp/jp/arc + css_selector: '[document] > html.win.chrome > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T20:46:06.337035+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-SHI-M-TYIM.yaml b/data/custodian/JP-10-SHI-M-TYIM.yaml index e5bc2600cc..b6ecd577c3 100644 --- a/data/custodian/JP-10-SHI-M-TYIM.yaml +++ b/data/custodian/JP-10-SHI-M-TYIM.yaml @@ -241,3 +241,23 @@ wikidata_enrichment: image: Takehisa Yumeji Ikaho Art Museum.JPG commons_category: Takehisa Yumeji Ikaho Memorial wikidata_image: Takehisa Yumeji Ikaho Art Museum.JPG +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:46:37.532898+00:00' + source_url: http://yumeji.or.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://yumeji.or.jp/img/logo.jpg + source_url: http://yumeji.or.jp + css_selector: '[document] > html > body > article > header > table > tbody > tr + > th > a > img' + retrieved_on: '2025-12-23T20:46:37.532898+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 伊香保 観光なら竹久夢二記念館(大正ロマンの森)|公益財団法人 竹久夢二伊香保記念館 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-10-TAK-F-KS.yaml b/data/custodian/JP-10-TAK-F-KS.yaml index f87ed7a6b9..531af69952 100644 --- a/data/custodian/JP-10-TAK-F-KS.yaml +++ b/data/custodian/JP-10-TAK-F-KS.yaml @@ -87,7 +87,8 @@ ghcid: - ghcid: JP-10-TAK-F-KS valid_from: '2025-12-14T12:00:00+00:00' valid_to: null - reason: Initial GHCID for Three Stelae of Kozuke (type F=Feature) replacing deleted JP-10-GP-A-TS.yaml (was Takasaki city hallucination) + reason: Initial GHCID for Three Stelae of Kozuke (type F=Feature) replacing deleted + JP-10-GP-A-TS.yaml (was Takasaki city hallucination) location_resolution: method: WIKIDATA_LOCATION country_code: JP @@ -143,14 +144,17 @@ provenance: data_source: WIKIDATA data_tier: TIER_3_CROWD_SOURCED extraction_date: '2025-12-14T12:00:00+00:00' - extraction_method: Manual Wikidata extraction replacing hallucinated geographic entity + extraction_method: Manual Wikidata extraction replacing hallucinated geographic + entity confidence_score: 0.95 notes: - - 'Created 2025-12-14: Replaces deleted JP-10-GP-A-TS.yaml which was Takasaki city (Q336438) hallucination' - - 'Type: F (Feature) - stone monuments are physical heritage features per GLAMORCUBESFIXPHDNT taxonomy' + - 'Created 2025-12-14: Replaces deleted JP-10-GP-A-TS.yaml which was Takasaki city + (Q336438) hallucination' + - 'Type: F (Feature) - stone monuments are physical heritage features per GLAMORCUBESFIXPHDNT + taxonomy' - 'Comprises three stelae: Tago (Q11431029), Yamanoue (Q11465812), Kanaizawa (Q11646374)' - 'UNESCO Memory of the World inscription: Three Cherished Stelae of Ancient Kozuke' - - 'Japan Special Historic Site designation' + - Japan Special Historic Site designation location: latitude: 36.323111111 longitude: 139.002361111 @@ -164,10 +168,29 @@ location: region_code: '10' country: JP formatted_address: Takasaki, Gunma, Japan -description: >- - The Three Stelae of Kozuke (上野三碑, Kozuke Sanpi) are three ancient stone monuments - located in Takasaki, Gunma Prefecture, Japan. Dating from the 7th-8th centuries CE, - they are written in Classical Chinese and are among the oldest stone inscriptions in - Japan. The three stelae are: Tago Stele (多胡碑, 711 CE), Yamanoue Stele (山上碑, 681 CE), - and Kanaizawa Stele (金井沢碑, 726 CE). They are designated as a Special Historic Site - of Japan and inscribed on the UNESCO Memory of the World Register. +description: 'The Three Stelae of Kozuke (上野三碑, Kozuke Sanpi) are three ancient stone + monuments located in Takasaki, Gunma Prefecture, Japan. Dating from the 7th-8th + centuries CE, they are written in Classical Chinese and are among the oldest stone + inscriptions in Japan. The three stelae are: Tago Stele (多胡碑, 711 CE), Yamanoue + Stele (山上碑, 681 CE), and Kanaizawa Stele (金井沢碑, 726 CE). They are designated as + a Special Historic Site of Japan and inscribed on the UNESCO Memory of the World + Register.' +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:46:44.714095+00:00' + source_url: https://www.city.takasaki.gunma.jp/info/sanpi + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://www.city.takasaki.gunma.jp/info/sanpi + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T20:46:44.714095+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-10-TAK-L-T-takasakikenkofukushidaigakuyakugakubutoshoshiryosh.yaml b/data/custodian/JP-10-TAK-L-T-takasakikenkofukushidaigakuyakugakubutoshoshiryosh.yaml index 3bdae8cd8f..13d2a182ac 100644 --- a/data/custodian/JP-10-TAK-L-T-takasakikenkofukushidaigakuyakugakubutoshoshiryosh.yaml +++ b/data/custodian/JP-10-TAK-L-T-takasakikenkofukushidaigakuyakugakubutoshoshiryosh.yaml @@ -205,3 +205,22 @@ location: geonames_id: 1851002 geonames_name: Takasaki feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T20:48:12.056807+00:00' + source_url: http://www.takasaki-u.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://library.takasaki-u.ac.jp/wp/wp-content/themes/tulibtwelve/favicon.ico + source_url: http://www.takasaki-u.ac.jp/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T20:48:12.056807+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-TAK-L-T.yaml b/data/custodian/JP-10-TAK-L-T.yaml index a89598fb08..971d989e6a 100644 --- a/data/custodian/JP-10-TAK-L-T.yaml +++ b/data/custodian/JP-10-TAK-L-T.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.takasaki.gunma.jp/docs/2013122401257/ wikidata_official_website: http://www.city.takasaki.gunma.jp/docs/2013122401257/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:05:42.815900+00:00' + source_url: http://www.city.takasaki.gunma.jp/docs/2013122401257 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: http://www.city.takasaki.gunma.jp/docs/2013122401257 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:05:42.815900+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-10-TAK-L-TL-takasakishiritsugumma_library.yaml b/data/custodian/JP-10-TAK-L-TL-takasakishiritsugumma_library.yaml index a3fa3d3a70..3e9628e0b4 100644 --- a/data/custodian/JP-10-TAK-L-TL-takasakishiritsugumma_library.yaml +++ b/data/custodian/JP-10-TAK-L-TL-takasakishiritsugumma_library.yaml @@ -211,3 +211,28 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.city.takasaki.gunma.jp/ wikidata_official_website: https://lib.city.takasaki.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:06:00.052914+00:00' + source_url: https://lib.city.takasaki.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:06:00.052914+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:06:00.052914+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-10-TAK-L-TL-takasakishiritsuharuna_library.yaml b/data/custodian/JP-10-TAK-L-TL-takasakishiritsuharuna_library.yaml index 221c78285c..1eeecd8e76 100644 --- a/data/custodian/JP-10-TAK-L-TL-takasakishiritsuharuna_library.yaml +++ b/data/custodian/JP-10-TAK-L-TL-takasakishiritsuharuna_library.yaml @@ -211,3 +211,28 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.city.takasaki.gunma.jp/ wikidata_official_website: https://lib.city.takasaki.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:06:09.588778+00:00' + source_url: https://lib.city.takasaki.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:06:09.588778+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:06:09.588778+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-10-TAK-L-TL-takasakishiritsumisato_library.yaml b/data/custodian/JP-10-TAK-L-TL-takasakishiritsumisato_library.yaml index 9543cd6eb9..1b6a51438a 100644 --- a/data/custodian/JP-10-TAK-L-TL-takasakishiritsumisato_library.yaml +++ b/data/custodian/JP-10-TAK-L-TL-takasakishiritsumisato_library.yaml @@ -211,3 +211,28 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.city.takasaki.gunma.jp/ wikidata_official_website: https://lib.city.takasaki.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:06:19.001109+00:00' + source_url: https://lib.city.takasaki.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:06:19.001109+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:06:19.001109+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-10-TAK-L-TL-takasakishiritsushimmachi_library.yaml b/data/custodian/JP-10-TAK-L-TL-takasakishiritsushimmachi_library.yaml index 5da467d6ec..fdfc956412 100644 --- a/data/custodian/JP-10-TAK-L-TL-takasakishiritsushimmachi_library.yaml +++ b/data/custodian/JP-10-TAK-L-TL-takasakishiritsushimmachi_library.yaml @@ -211,3 +211,28 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.city.takasaki.gunma.jp/ wikidata_official_website: https://lib.city.takasaki.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:06:28.656638+00:00' + source_url: https://lib.city.takasaki.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:06:28.656638+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:06:28.656638+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-10-TAK-L-TL-takasakishiritsuyamatanekinenyoshii_library.yaml b/data/custodian/JP-10-TAK-L-TL-takasakishiritsuyamatanekinenyoshii_library.yaml index 590457ffd3..6d9f14a5e6 100644 --- a/data/custodian/JP-10-TAK-L-TL-takasakishiritsuyamatanekinenyoshii_library.yaml +++ b/data/custodian/JP-10-TAK-L-TL-takasakishiritsuyamatanekinenyoshii_library.yaml @@ -211,3 +211,28 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.city.takasaki.gunma.jp/ wikidata_official_website: https://lib.city.takasaki.gunma.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:06:38.254797+00:00' + source_url: https://lib.city.takasaki.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:06:38.254797+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg + source_url: https://lib.city.takasaki.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:06:38.254797+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-10-TAK-L-TL-takasakishokadaigaku_library.yaml b/data/custodian/JP-10-TAK-L-TL-takasakishokadaigaku_library.yaml index 706979bf22..926af476c0 100644 --- a/data/custodian/JP-10-TAK-L-TL-takasakishokadaigaku_library.yaml +++ b/data/custodian/JP-10-TAK-L-TL-takasakishokadaigaku_library.yaml @@ -211,3 +211,28 @@ location: geonames_id: 1851002 geonames_name: Takasaki feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:06:46.412557+00:00' + source_url: http://tuc.opac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://tuc.opac.jp/common/images/op4-favicon.ico + source_url: http://tuc.opac.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:06:46.412557+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://tuc.opac.jp/unique/images/logo_0001.png + source_url: http://tuc.opac.jp + css_selector: '[document] > html > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-23T21:06:46.412557+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-10-TAK-L-TL.yaml b/data/custodian/JP-10-TAK-L-TL.yaml index 85a3c69e0c..8c921fc0b7 100644 --- a/data/custodian/JP-10-TAK-L-TL.yaml +++ b/data/custodian/JP-10-TAK-L-TL.yaml @@ -240,3 +240,28 @@ wikidata_enrichment: - id: Q3178370 label: Taisei Corporation description: Japanese corporation +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:06:55.674299+00:00' + source_url: https://lib.city.takasaki.gunma.jp/viewer/info.html?id=28 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://lib.city.takasaki.gunma.jp/viewer/info.html?id=28 + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:06:55.674299+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg + source_url: https://lib.city.takasaki.gunma.jp/viewer/info.html?id=28 + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:06:55.674299+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-10-TAK-L-TLB.yaml b/data/custodian/JP-10-TAK-L-TLB.yaml index 18bd72dcb9..fe998c024d 100644 --- a/data/custodian/JP-10-TAK-L-TLB.yaml +++ b/data/custodian/JP-10-TAK-L-TLB.yaml @@ -205,3 +205,22 @@ location: geonames_id: 1851002 geonames_name: Takasaki feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:07:02.806581+00:00' + source_url: http://www.takasaki-u.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://library.takasaki-u.ac.jp/wp/wp-content/themes/tulibtwelve/favicon.ico + source_url: http://www.takasaki-u.ac.jp/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:07:02.806581+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-TAK-L-TUHWL.yaml b/data/custodian/JP-10-TAK-L-TUHWL.yaml index 703874bffb..5401bb350d 100644 --- a/data/custodian/JP-10-TAK-L-TUHWL.yaml +++ b/data/custodian/JP-10-TAK-L-TUHWL.yaml @@ -222,3 +222,22 @@ location: geonames_id: 1851002 geonames_name: Takasaki feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:07:09.585806+00:00' + source_url: http://www.takasaki-u.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://library.takasaki-u.ac.jp/wp/wp-content/themes/tulibtwelve/favicon.ico + source_url: http://www.takasaki-u.ac.jp/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:07:09.585806+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-TAK-M-KMA.yaml b/data/custodian/JP-10-TAK-M-KMA.yaml index 14938e42f4..538f3f09ca 100644 --- a/data/custodian/JP-10-TAK-M-KMA.yaml +++ b/data/custodian/JP-10-TAK-M-KMA.yaml @@ -236,3 +236,22 @@ wikidata_enrichment: image: Kamitsukeno-sato Museum of Archaeology.jpg commons_category: Kamitsukeno-sato Museum of Archaeology wikidata_image: Kamitsukeno-sato Museum of Archaeology.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:07:15.901936+00:00' + source_url: https://www.city.takasaki.gunma.jp/docs/2014010701664 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://www.city.takasaki.gunma.jp/docs/2014010701664 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:07:15.901936+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-10-TAK-M-MMAG.yaml b/data/custodian/JP-10-TAK-M-MMAG.yaml index c149189a71..c09e97c044 100644 --- a/data/custodian/JP-10-TAK-M-MMAG.yaml +++ b/data/custodian/JP-10-TAK-M-MMAG.yaml @@ -314,3 +314,31 @@ location: postal_code: 370-1293 street_address: WATANUKIMACHI, Takasaki Shi, Gumma Ken, 370-1293 normalization_timestamp: '2025-12-09T06:54:38.070396+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:07:28.604419+00:00' + source_url: http://mmag.pref.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://mmag.pref.gunma.jp/cms/wp-content/themes/mmag/img/logo-txt.png + source_url: http://mmag.pref.gunma.jp + css_selector: '#TOP > div.wrap > header.sp > div.sp-nav > div.title > a.menu-logo + > img.contain' + retrieved_on: '2025-12-23T21:07:28.604419+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 群馬県立近代美術館 + - claim_type: favicon_url + claim_value: https://mmag.pref.gunma.jp/cms/wp-content/themes/mmag/favicon.ico + source_url: http://mmag.pref.gunma.jp + css_selector: '[document] > html.fontM > head > link' + retrieved_on: '2025-12-23T21:07:28.604419+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-TAK-M-NSC.yaml b/data/custodian/JP-10-TAK-M-NSC.yaml index e771ff015c..88b14935ce 100644 --- a/data/custodian/JP-10-TAK-M-NSC.yaml +++ b/data/custodian/JP-10-TAK-M-NSC.yaml @@ -250,3 +250,28 @@ location: postal_code: 370-3511 street_address: KANEKOMACHI, Takasaki Shi, Gumma Ken, 370-3511 normalization_timestamp: '2025-12-09T06:54:38.092335+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:07:38.385059+00:00' + source_url: https://www.nippon-kinunosato.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nippon-kinunosato.or.jp/icon/safari-pinned-tab.svg + source_url: https://www.nippon-kinunosato.or.jp + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:07:38.385059+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.nippon-kinunosato.or.jp/sakura/wp-content/themes/kinunosato/assets/img/ogp.png + source_url: https://www.nippon-kinunosato.or.jp + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T21:07:38.385059+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 5 diff --git a/data/custodian/JP-10-TAK-M-TCDPBG.yaml b/data/custodian/JP-10-TAK-M-TCDPBG.yaml index 1a234c4235..5de07c5663 100644 --- a/data/custodian/JP-10-TAK-M-TCDPBG.yaml +++ b/data/custodian/JP-10-TAK-M-TCDPBG.yaml @@ -219,3 +219,22 @@ wikidata_enrichment: wikidata_media: image: Malus sieboldii F.jpg wikidata_image: Malus sieboldii F.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:07:45.481419+00:00' + source_url: http://www.city.takasaki.gunma.jp/docs/2017082200011 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: http://www.city.takasaki.gunma.jp/docs/2017082200011 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:07:45.481419+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-10-TAK-M-TTMA.yaml b/data/custodian/JP-10-TAK-M-TTMA.yaml index ea2553e11b..8a5c2b14ed 100644 --- a/data/custodian/JP-10-TAK-M-TTMA.yaml +++ b/data/custodian/JP-10-TAK-M-TTMA.yaml @@ -218,3 +218,22 @@ location: geonames_id: 1851002 geonames_name: Takasaki feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:07:56.267081+00:00' + source_url: http://www.city.takasaki.gunma.jp/docs/2014021900025 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: http://www.city.takasaki.gunma.jp/docs/2014021900025 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:07:56.267081+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-10-TAK-M-YKMAT.yaml b/data/custodian/JP-10-TAK-M-YKMAT.yaml index 927f1d03ee..5954df518f 100644 --- a/data/custodian/JP-10-TAK-M-YKMAT.yaml +++ b/data/custodian/JP-10-TAK-M-YKMAT.yaml @@ -241,3 +241,22 @@ location: postal_code: 370-0862 street_address: KATAOKAMACHI, Takasaki Shi, Gumma Ken, 370-0862 normalization_timestamp: '2025-12-09T06:54:38.205614+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:08:04.330227+00:00' + source_url: https://www.city.takasaki.gunma.jp/docs/2014040100192 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: https://www.city.takasaki.gunma.jp/docs/2014040100192 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:08:04.330227+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-10-TAN-L-KL.yaml b/data/custodian/JP-10-TAN-L-KL.yaml index 9cf71ebcd9..90736af8ee 100644 --- a/data/custodian/JP-10-TAN-L-KL.yaml +++ b/data/custodian/JP-10-TAN-L-KL.yaml @@ -201,3 +201,28 @@ wikidata_enrichment: wikidata_web: official_website: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751 wikidata_official_website: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:08:29.930154+00:00' + source_url: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.kanna.gunma.jp/theme/base/img_common/smartphone.png + source_url: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:08:29.930154+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.town.kanna.gunma.jp/theme/base/img_common/ogp_noimage.png + source_url: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T21:08:29.930154+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2