diff --git a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json index fa8d43ea65..cca8bbf929 100644 --- a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json +++ b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json @@ -9251,7 +9251,537 @@ "JP-11-TOD-A-TAC.yaml", "JP-11-TOD-L-TCL.yaml", "JP-11-TOD-L-TLK.yaml", - "JP-11-TOD-L-TLM.yaml" + "JP-11-TOD-L-TLM.yaml", + "CZ-20-VRD-L-KFVLV.yaml", + "CZ-20-VSE-L-OKV.yaml", + "CZ-20-VSE-L-OKVV-obecni_knihovna_ve_vsevilech.yaml", + "CZ-20-VSE-L-OKVV.yaml", + "CZ-20-VSE-L-VKICB.yaml", + "CZ-20-VYS-L-MKVV.yaml", + "CZ-20-VYS-L-OKH.yaml", + "CZ-20-XAV-L-VUZVOCD.yaml", + "CZ-20-ZAB-L-OKZNL.yaml", + "CZ-20-ZAD-L-OKVZT.yaml", + "CZ-20-ZAJ-L-OKVZ.yaml", + "CZ-20-ZAK-L-OKVZ.yaml", + "CZ-20-ZAL-L-OKK.yaml", + "CZ-20-ZAL-L-OKVZ.yaml", + "CZ-20-ZAL-L-OKZ.yaml", + "CZ-20-ZAS-L-MKZ.yaml", + "CZ-20-ZAS-L-OKS.yaml", + "CZ-20-ZBE-L-OKVR.yaml", + "CZ-20-ZBE-L-OKVS.yaml", + "CZ-20-ZBE-L-OKVZ-obecni_knihovna_ve_zbecne.yaml", + "CZ-20-ZBE-L-OKVZ.yaml", + "CZ-20-ZBR-L-KZ.yaml", + "CZ-20-ZBY-L-MKZ.yaml", + "CZ-20-ZDI-L-OKVS.yaml", + "CZ-20-ZDI-L-OKZ.yaml", + "CZ-20-ZDI-L-TVUPJT.yaml", + "CZ-20-ZEB-L-MKVZ.yaml", + "CZ-20-ZEH-L-OKVC.yaml", + "CZ-20-ZEH-L-OKVD.yaml", + "CZ-20-ZEH-L-OKZ-obecni_knihovna_zehusice.yaml", + "CZ-20-ZEL-L-OKZ.yaml", + "CZ-20-ZIZ-L-MKZ.yaml", + "CZ-20-ZLO-L-KZ.yaml", + "CZ-20-ZLO-L-OKB.yaml", + "CZ-20-ZLO-L-OKS.yaml", + "CZ-20-ZRU-L-MKZNS.yaml", + "CZ-20-ZRU-L-MLKHI.yaml", + "CZ-20-ZRU-L-OKP.yaml", + "CZ-20-ZRU-L-SS.yaml", + "CZ-20-ZVE-L-MLKH.yaml", + "CZ-20-ZVE-L-OKP.yaml", + "CZ-20-ZVE-L-OKZ.yaml", + "CZ-20-ZVO-L-OKP.yaml", + "CZ-20-ZVO-L-OKZ.yaml", + "CZ-31-BEC-M-MMB-mestske_muzeum_bechyne.yaml", + "CZ-31-BLA-M-MMB-mestske_muzeum_blatna.yaml", + "CZ-31-CBU-G-AJG.yaml", + "CZ-31-CEK-A-AUMAVESCK.yaml", + "CZ-31-CEK-A-ESACPS.yaml", + "CZ-31-CEK-A-MDCK.yaml", + "CZ-31-CEK-A-PUCBSSHZCK.yaml", + "CZ-31-CEK-L-MKCK.yaml", + "CZ-31-CEK-M-RMVCK.yaml", + "CZ-31-CES-A-AUMAVESCB.yaml", + "CZ-31-CES-A-PUCBSSZHNV.yaml", + "CZ-31-CES-A-SOACB.yaml", + "CZ-31-CES-L-JPVCP.yaml", + "CZ-31-CES-M-JMVCB.yaml", + "CZ-31-CET-A-SOACK.yaml", + "CZ-31-DAC-M-MMGD.yaml", + "CZ-31-HUS-M-PMJH.yaml", + "CZ-31-JIH-A-SOAJH.yaml", + "CZ-31-JIH-M-MJ.yaml", + "CZ-31-MVO-M-PM-pamatnik_mladovozicka.yaml", + "CZ-31-PIS-A-AUMAVESP-archivalie_ulozene_mimo_archivy_v_evidenci_soka_pi.yaml", + "CZ-31-PIS-A-SOAP-statni_okresni_archiv_pisek.yaml", + "CZ-31-PIS-L-PPPOTS.yaml", + "CZ-31-PIS-L-UPVVERPP.yaml", + "CZ-31-PIS-M-PMVP.yaml", + "CZ-31-PRA-L-MLK.yaml", + "CZ-31-PRA-M-PM-prachaticke_muzeum.yaml", + "CZ-31-PRK-A-AAVUVP.yaml", + "CZ-31-PRK-A-ACVUTVP.yaml", + "CZ-31-PRK-L-UKVPFTVSVU.yaml", + "CZ-31-STR-A-AUMAVESS.yaml", + "CZ-31-STR-A-SOAS.yaml", + "CZ-31-STR-M-MMVS.yaml", + "CZ-31-TAB-A-SOAT-statni_okresni_archiv_tabor.yaml", + "CZ-31-TAB-M-HMVT.yaml", + "CZ-31-TNV-M-MMTNV.yaml", + "CZ-31-TRE-A-AUMAVEST-archivalie_ulozene_mimo_archivy_v_evidenci_soa_tre.yaml", + "CZ-31-TRE-A-AUMAVESTC-archivalie_ulozene_mimo_archivy_v_evidenci_soa_tre.yaml", + "CZ-31-TRE-A-AUMAVESTC.yaml", + "CZ-31-TRE-A-AUMAVESTJ.yaml", + "CZ-31-TRE-A-SOAVT.yaml", + "CZ-31-TRO-M-PJZZT.yaml", + "CZ-31-VOD-M-MMGV.yaml", + "CZ-31-VOD-M-MVBZS.yaml", + "CZ-32-BEL-L-K.yaml", + "CZ-32-BEZ-L-KKHB.yaml", + "CZ-32-BEZ-L-OKB-obecni_knihovna_bezverov.yaml", + "CZ-32-BEZ-L-OKB.yaml", + "CZ-32-BEZ-L-OKV.yaml", + "CZ-32-BLO-L-MKB.yaml", + "CZ-32-BLO-L-MKC.yaml", + "CZ-32-BLO-L-MKL.yaml", + "CZ-32-BLO-L-MKVZ.yaml", + "CZ-32-BLO-L-OKVU.yaml", + "CZ-32-BOR-L-MLKVB.yaml", + "CZ-32-BRA-L-MKB-mistni_knihovna_brasy.yaml", + "CZ-32-BRA-L-MKB.yaml", + "CZ-32-BRA-L-MKUUSK.yaml", + "CZ-32-BRA-L-MKV.yaml", + "CZ-32-BRA-L-SKS.yaml", + "CZ-32-BRI-M-MHB.yaml", + "CZ-32-BUJ-L-MKB.yaml", + "CZ-32-BUK-L-MKB.yaml", + "CZ-32-CAC-L-MKC.yaml", + "CZ-32-CAS-L-MKC.yaml", + "CZ-32-CER-L-MKS.yaml", + "CZ-32-CER-L-MKV.yaml", + "CZ-32-CER-L-MLKC.yaml", + "CZ-32-CER-L-OKVC.yaml", + "CZ-32-CES-L-MKCK.yaml", + "CZ-32-CHE-L-MKC.yaml", + "CZ-32-CHL-L-LSKZOOS.yaml", + "CZ-32-CHL-L-MLKVC.yaml", + "CZ-32-CHO-L-MKVCP.yaml", + "CZ-32-CHO-L-VKVHVC.yaml", + "CZ-32-CHO-M-MMC.yaml", + "CZ-32-CHR-L-MLKVC-mistni_lidova_knihovna_ve_chrici.yaml", + "CZ-32-CHR-L-MLKVC.yaml", + "CZ-32-CHU-L-KMC.yaml", + "CZ-32-CHU-M-MJD.yaml", + "CZ-32-CHV-L-MKVC.yaml", + "CZ-32-CIM-L-MKC.yaml", + "CZ-32-CLE-M-MCL.yaml", + "CZ-32-DAR-L-MKD.yaml", + "CZ-32-DLO-L-MKDV.yaml", + "CZ-32-DNE-L-OKD.yaml", + "CZ-32-DOB-L-MKD-mistni_knihovna_dobriv.yaml", + "CZ-32-DOB-L-MKD.yaml", + "CZ-32-DOB-L-MKV.yaml", + "CZ-32-DOB-L-MKVU.yaml", + "CZ-32-DOB-L-OKD.yaml", + "CZ-32-DOB-L-PNVDKVLI.yaml", + "CZ-32-DOL-L-MLKVDL.yaml", + "CZ-32-DOL-L-OKHB.yaml", + "CZ-32-DOL-L-OKL-obecni_knihovna_loza.yaml", + "CZ-32-DOL-L-OKL.yaml", + "CZ-32-DOL-L-OKM.yaml", + "CZ-32-DOL-L-OKVDB.yaml", + "CZ-32-DOM-L-DNSLK.yaml", + "CZ-32-DOM-L-MKBND.yaml", + "CZ-32-DOM-L-MKC.yaml", + "CZ-32-DOM-L-MKD.yaml", + "CZ-32-DOM-L-MKH.yaml", + "CZ-32-DOM-L-MKM.yaml", + "CZ-32-DOM-L-MKP.yaml", + "CZ-32-DOM-L-MKVU.yaml", + "CZ-32-DOM-L-MKZ.yaml", + "CZ-32-DOM-L-OKD.yaml", + "CZ-32-DRA-L-MKD.yaml", + "CZ-32-DRA-L-MKDU.yaml", + "CZ-32-DRU-L-OKVD.yaml", + "CZ-32-DYS-L-OKD.yaml", + "CZ-32-DYS-L-SESR.yaml", + "CZ-32-EJP-L-KOE.yaml", + "CZ-32-HAL-L-MKH.yaml", + "CZ-32-HER-L-KHH.yaml", + "CZ-32-HLO-L-MKH.yaml", + "CZ-32-HOL-L-MKB.yaml", + "CZ-32-HOL-L-MKH-mistni_knihovna_holoubkov.yaml", + "CZ-32-HOL-L-MKN.yaml", + "CZ-32-HOR-L-MKB-mistni_knihovna_brezany.yaml", + "CZ-32-HOR-L-MKB.yaml", + "CZ-32-HOR-L-MKC-mistni_knihovna_chanovice.yaml", + "CZ-32-HOR-L-MKC.yaml", + "CZ-32-HOR-L-MKH-mistni_knihovna_hejna.yaml", + "CZ-32-HOR-L-MKH-mistni_knihovna_hradesice.yaml", + "CZ-32-HOR-L-MKHB.yaml", + "CZ-32-HOR-L-MKHL.yaml", + "CZ-32-HOR-L-MKHT.yaml", + "CZ-32-HOR-L-MKK.yaml", + "CZ-32-HOR-L-MKKPMKH.yaml", + "CZ-32-HOR-L-MKL.yaml", + "CZ-32-HOR-L-MKM-mistni_knihovna_mysliv.yaml", + "CZ-32-HOR-L-MKM.yaml", + "CZ-32-HOR-L-MKN.yaml", + "CZ-32-HOR-L-MKT.yaml", + "CZ-32-HOR-L-MKTPMKH.yaml", + "CZ-32-HOR-L-MKV-mistni_knihovna_velenovy.yaml", + "CZ-32-HOR-L-MKV.yaml", + "CZ-32-HOR-L-MKVH.yaml", + "CZ-32-HOR-L-MLKK.yaml", + "CZ-32-HOR-L-MLKMB.yaml", + "CZ-32-HOR-L-OKK.yaml", + "CZ-32-HOR-L-OKS.yaml", + "CZ-32-HOR-L-OKVB.yaml", + "CZ-32-HOR-L-OKVH.yaml", + "CZ-32-HOR-L-ZKZ.yaml", + "CZ-32-HOR-M-MMH-mestske_muzeum_horazdovice.yaml", + "CZ-32-HOS-L-MKH.yaml", + "CZ-32-HOS-L-MKVR.yaml", + "CZ-32-HRA-L-FGSR.yaml", + "CZ-32-HRA-L-KFPVH.yaml", + "CZ-32-HRA-L-MKH.yaml", + "CZ-32-HRA-L-OKH.yaml", + "CZ-32-JAN-L-OKS.yaml", + "CZ-32-JIH-L-VVUDPPJ.yaml", + "CZ-32-JNR-M-MMVJ.yaml", + "CZ-32-KAK-L-MKK.yaml", + "CZ-32-KAM-L-MKK.yaml", + "CZ-32-KAM-L-MKKU.yaml", + "CZ-32-KAS-L-MKKH.yaml", + "CZ-32-KAS-L-OKC.yaml", + "CZ-32-KAS-L-OKK.yaml", + "CZ-32-KAS-L-OKP.yaml", + "CZ-32-KAS-L-OKUUK.yaml", + "CZ-32-KAZ-L-ABP.yaml", + "CZ-32-KAZ-L-MKK.yaml", + "CZ-32-KAZ-L-OKC.yaml", + "CZ-32-KAZ-L-OKJ.yaml", + "CZ-32-KAZ-L-OKK.yaml", + "CZ-32-KAZ-L-OKO.yaml", + "CZ-32-KAZ-L-OKR.yaml", + "CZ-32-KDY-L-E.yaml", + "CZ-32-KDY-L-MKC.yaml", + "CZ-32-KDY-L-MKD.yaml", + "CZ-32-KDY-L-MKL.yaml", + "CZ-32-KDY-L-MKM.yaml", + "CZ-32-KDY-L-MKP.yaml", + "CZ-32-KDY-L-MKS-mistni_knihovna_starec.yaml", + "CZ-32-KDY-L-MKVO.yaml", + "CZ-32-KDY-L-MKVS.yaml", + "CZ-32-KDY-L-OKL.yaml", + "CZ-32-KLA-L-K.yaml", + "CZ-32-KLA-L-KNS.yaml", + "CZ-32-KLA-L-MKH.yaml", + "CZ-32-KLA-L-MKK.yaml", + "CZ-32-KLA-L-MKS.yaml", + "CZ-32-KLA-L-MKT.yaml", + "CZ-32-KLA-L-MKV.yaml", + "CZ-32-KLA-L-MKVK.yaml", + "CZ-32-KLA-L-MLKB-mistni_lidova_knihovna_bezdekov.yaml", + "CZ-32-KLA-L-MLKB.yaml", + "CZ-32-KLA-L-MLKD.yaml", + "CZ-32-KLA-L-MLKP.yaml", + "CZ-32-KLA-L-MLKR.yaml", + "CZ-32-KLA-L-MLKS.yaml", + "CZ-32-KLA-L-OKM-obecni_knihovna_myslovice.yaml", + "CZ-32-KLA-L-OKR.yaml", + "CZ-32-KLA-L-OKV.yaml", + "CZ-32-KLA-L-OKVK.yaml", + "CZ-32-KLA-L-SKSR.yaml", + "CZ-32-KLA-M-VMDHVK.yaml", + "CZ-32-KLA-M-VMDKHVKKIO.yaml", + "CZ-32-KLE-L-MKKPC.yaml", + "CZ-32-KOC-L-MLKVK.yaml", + "CZ-32-KOL-L-KICVK.yaml", + "CZ-32-KOL-L-KMK.yaml", + "CZ-32-KOL-L-MKC.yaml", + "CZ-32-KOL-L-MKH.yaml", + "CZ-32-KOL-L-MKZ.yaml", + "CZ-32-KOL-L-OKC.yaml", + "CZ-32-KOL-L-OKT.yaml", + "CZ-32-KOL-L-OKU.yaml", + "CZ-32-KOP-L-MLKVK.yaml", + "CZ-32-KPC-M-BMOP.yaml", + "CZ-32-KRA-L-MKVK.yaml", + "CZ-32-KRA-L-MLKK-mistni_lidova_knihovna_kozojedy_1.yaml", + "CZ-32-KRA-L-MLKK.yaml", + "CZ-32-KRA-L-MLKVH.yaml", + "CZ-32-KRA-L-OKM.yaml", + "CZ-32-KRA-L-OKVL.yaml", + "CZ-32-KRA-M-MGSP.yaml", + "CZ-32-KRA-M-MGSPMTK.yaml", + "CZ-32-KVI-L-MKK.yaml", + "CZ-32-KVL-M-MPSNPCKOS.yaml", + "CZ-32-KYS-L-OKK.yaml", + "CZ-32-LET-L-MKL.yaml", + "CZ-32-LHO-L-MKLPR.yaml", + "CZ-32-LHO-L-MKLUR.yaml", + "CZ-32-LIB-L-VKL.yaml", + "CZ-32-LIN-L-MKL.yaml", + "CZ-32-LIN-L-VUZPODP.yaml", + "CZ-32-LIP-A-AUMAVESCL.yaml", + "CZ-32-LIS-L-OKL.yaml", + "CZ-32-LUZ-L-MKL.yaml", + "CZ-32-MAN-L-MKM.yaml", + "CZ-32-MAN-L-OKM.yaml", + "CZ-32-MAN-L-OKS.yaml", + "CZ-32-MEC-L-MKM-mistni_knihovna_meclov.yaml", + "CZ-32-MEC-L-MKM.yaml", + "CZ-32-MER-L-OUKMUP.yaml", + "CZ-32-MES-L-OKU.yaml", + "CZ-32-MES-L-OKVC.yaml", + "CZ-32-MIR-L-MKVS.yaml", + "CZ-32-MIR-L-OKVV.yaml", + "CZ-32-MRA-L-MKM.yaml", + "CZ-32-MUT-L-OKM.yaml", + "CZ-32-MYT-L-KMMVC.yaml", + "CZ-32-NAL-L-OKNH.yaml", + "CZ-32-NEM-L-MKN.yaml", + "CZ-32-NEM-L-OUNMK.yaml", + "CZ-32-NEP-L-MKC.yaml", + "CZ-32-NEP-L-MKJ.yaml", + "CZ-32-NEP-L-MKN.yaml", + "CZ-32-NEP-L-MKVC-mistni_knihovna_ve_cmelinech.yaml", + "CZ-32-NEP-L-MKVC.yaml", + "CZ-32-NEP-L-MKVP.yaml", + "CZ-32-NEP-L-OKB-obecni_knihovna_brezi.yaml", + "CZ-32-NEP-L-OKM-obecni_knihovna_manovice.yaml", + "CZ-32-NEP-L-OKM.yaml", + "CZ-32-NEP-L-OKOP.yaml", + "CZ-32-NEP-L-OKS-obecni_knihovna_sobesuky.yaml", + "CZ-32-NEP-L-OKS-obecni_knihovna_srby.yaml", + "CZ-32-NEP-L-OKS.yaml", + "CZ-32-NEP-L-OKT-obecni_knihovna_trebcice.yaml", + "CZ-32-NEP-L-OKT.yaml", + "CZ-32-NEP-L-OKV.yaml", + "CZ-32-NEP-L-OKVK.yaml", + "CZ-32-NEP-L-OKZ-obecni_knihovna_zinkovy.yaml", + "CZ-32-NEP-M-MMGN.yaml", + "CZ-32-NEV-L-MKN.yaml", + "CZ-32-NEZ-L-MKNS.yaml", + "CZ-32-NEZ-L-MKVN.yaml", + "CZ-32-NEZ-L-OKM.yaml", + "CZ-32-NEZ-L-OKVN.yaml", + "CZ-32-NYR-L-OKC.yaml", + "CZ-32-NYR-L-OKU.yaml", + "CZ-32-NYR-L-ONSK.yaml", + "CZ-32-OLB-L-OKO.yaml", + "CZ-32-OSE-L-MKVO.yaml", + "CZ-32-PAC-L-MKP.yaml", + "CZ-32-PAS-L-MKP.yaml", + "CZ-32-PEL-A-AUMAVESP-archivalie_ulozene_mimo_archivy_v_evidenci_soka_pe.yaml", + "CZ-32-PER-L-OKK.yaml", + "CZ-32-PET-L-MKPUS.yaml", + "CZ-32-PIL-A-ACP.yaml", + "CZ-32-PIL-A-ASPPS.yaml", + "CZ-32-PIL-A-ASSP.yaml", + "CZ-32-PIL-A-AUMAVESP-archivalie_ulozene_mimo_archivy_v_evidenci_soa_plz.yaml", + "CZ-32-PIL-A-AUMAVESPJ.yaml", + "CZ-32-PIL-A-AUMAVESPR-archivalie_ulozene_mimo_archivy_v_evidenci_soa_plz.yaml", + "CZ-32-PIL-A-AUMAVESPS.yaml", + "CZ-32-PIL-A-POKPUP.yaml", + "CZ-32-PIL-A-PUP.yaml", + "CZ-32-PIL-A-SOAPJSSVB.yaml", + "CZ-32-PIL-A-SOAPSSSVP.yaml", + "CZ-32-PIL-M-MJPVB.yaml", + "CZ-32-PIL-M-MSP.yaml", + "CZ-32-PIL-M-ZMVP.yaml", + "CZ-32-PLA-L-MKCU.yaml", + "CZ-32-PLA-L-MKP.yaml", + "CZ-32-PLA-L-MLKZ.yaml", + "CZ-32-PLA-L-NPUML.yaml", + "CZ-32-PLA-L-OKD.yaml", + "CZ-32-PLA-L-OKH.yaml", + "CZ-32-PLA-L-OKMKP.yaml", + "CZ-32-PLA-L-OKVK.yaml", + "CZ-32-PLZ-E-CGPK.yaml", + "CZ-32-PLZ-G-ZG.yaml", + "CZ-32-PLZ-G-ZGVPPOK.yaml", + "CZ-32-PLZ-H-KKB.yaml", + "CZ-32-PLZ-L-AOPKCRPSCC.yaml", + "CZ-32-PLZ-L-AVCUTPCDM.yaml", + "CZ-32-PLZ-L-CDSOPRVP.yaml", + "CZ-32-PLZ-L-CZEZ.yaml", + "CZ-32-PLZ-L-FNPLK.yaml", + "CZ-32-PLZ-L-HPP.yaml", + "CZ-32-PLZ-L-KHSPKSSVPK.yaml", + "CZ-32-PLZ-L-MKS.yaml", + "CZ-32-PLZ-L-MP.yaml", + "CZ-32-PLZ-L-NPUUOPVPK.yaml", + "CZ-32-PLZ-L-OPS.yaml", + "CZ-32-PLZ-L-PPMP.yaml", + "CZ-32-PLZ-L-PPS.yaml", + "CZ-32-PLZ-L-S.yaml", + "CZ-32-PLZ-L-SJS.yaml", + "CZ-32-PLZ-L-SPS.yaml", + "CZ-32-PLZ-L-SPSK.yaml", + "CZ-32-PLZ-L-SSHTK.yaml", + "CZ-32-PLZ-L-STS.yaml", + "CZ-32-PLZ-L-SVDI.yaml", + "CZ-32-PLZ-L-UKLFVPSVI.yaml", + "CZ-32-PLZ-L-UKRMPPOTK.yaml", + "CZ-32-PLZ-L-VSSEFE.yaml", + "CZ-32-PLZ-L-VZUPSRTK.yaml", + "CZ-32-PLZ-L-ZPS.yaml", + "CZ-32-PLZ-L-ZSL.yaml", + "CZ-32-PLZ-L-ZUVPUK.yaml", + "CZ-32-PLZ-L-ZUVPUKPZK.yaml", + "CZ-32-PLZ-L-ZVK.yaml", + "CZ-32-PLZ-M-ZMVPK.yaml", + "CZ-32-PLZ-O-AMP.yaml", + "CZ-32-POB-L-MKO.yaml", + "CZ-32-POB-L-MKP.yaml", + "CZ-32-POB-L-MKV.yaml", + "CZ-32-POS-L-MKP.yaml", + "CZ-32-PRA-L-MKP.yaml", + "CZ-32-PRA-L-MKPU.yaml", + "CZ-32-PRE-A-DHP.yaml", + "CZ-32-PRE-L-MKL.yaml", + "CZ-32-PRE-L-MKP.yaml", + "CZ-32-PRE-L-MKVD.yaml", + "CZ-32-PRE-L-MKVH.yaml", + "CZ-32-PRE-L-MKVHL.yaml", + "CZ-32-PRE-L-MKVK-mistni_knihovna_v_krasavcich.yaml", + "CZ-32-PRE-L-MKVK.yaml", + "CZ-32-PRE-L-MKVO-mistni_knihovna_v_oplote.yaml", + "CZ-32-PRE-L-MKVO.yaml", + "CZ-32-PRE-L-MKVP.yaml", + "CZ-32-PRE-L-MKVS.yaml", + "CZ-32-PRE-L-MKVZ.yaml", + "CZ-32-PRE-L-OKM.yaml", + "CZ-32-PRE-L-OKP.yaml", + "CZ-32-PRE-L-OKVV.yaml", + "CZ-32-PRI-L-MKP-mistni_knihovna_primda.yaml", + "CZ-32-PRI-L-MKP.yaml", + "CZ-32-PRI-L-MKT-mistni_knihovna_triskolupy.yaml", + "CZ-32-PRI-L-MKT.yaml", + "CZ-32-PRI-L-MKVD.yaml", + "CZ-32-PRO-L-OKVP.yaml", + "CZ-32-PTR-L-OKC.yaml", + "CZ-32-RAD-L-KJPVR.yaml", + "CZ-32-RAD-L-MKVS.yaml", + "CZ-32-RAD-L-OKC.yaml", + "CZ-32-REJ-L-MKR.yaml", + "CZ-32-REN-L-MKO.yaml", + "CZ-32-REN-L-MKV.yaml", + "CZ-32-ROK-A-SOAR-statni_okresni_archiv_rokycany.yaml", + "CZ-32-ROK-L-FRS.yaml", + "CZ-32-ROK-L-HVRPPOK.yaml", + "CZ-32-ROK-L-KRS.yaml", + "CZ-32-ROK-L-MKR-mistni_knihovna_rakova.yaml", + "CZ-32-ROK-L-MKVS.yaml", + "CZ-32-ROK-L-OKVL.yaml", + "CZ-32-ROK-L-RNS.yaml", + "CZ-32-ROK-M-MDBHVRPZMV.yaml", + "CZ-32-SEM-L-OKS.yaml", + "CZ-32-SNS-A-KVSKSS.yaml", + "CZ-32-SOB-L-MKS.yaml", + "CZ-32-SPA-L-MKP.yaml", + "CZ-32-SPA-L-MKSP.yaml", + "CZ-32-SPA-L-MKZU.yaml", + "CZ-32-SRN-L-MKS.yaml", + "CZ-32-STA-L-KCMK.yaml", + "CZ-32-STA-L-MKC.yaml", + "CZ-32-STA-L-MKH.yaml", + "CZ-32-STA-L-MKSS.yaml", + "CZ-32-STA-L-MKVS-mistni_knihovna_ve_stahlavech.yaml", + "CZ-32-STA-L-MKVS.yaml", + "CZ-32-STA-L-OKP.yaml", + "CZ-32-STE-L-OKS.yaml", + "CZ-32-STE-L-OKVC.yaml", + "CZ-32-STO-L-OKH.yaml", + "CZ-32-STO-L-OKK.yaml", + "CZ-32-STO-L-SNLK.yaml", + "CZ-32-STO-L-VKL.yaml", + "CZ-32-STO-L-VKM.yaml", + "CZ-32-STR-L-MKK.yaml", + "CZ-32-STR-L-MKS.yaml", + "CZ-32-STR-L-MKVK.yaml", + "CZ-32-STR-L-MKVO.yaml", + "CZ-32-STR-L-MKVS-mistni_knihovna_v_sytne.yaml", + "CZ-32-STR-L-MKVS.yaml", + "CZ-32-STR-L-MKVT.yaml", + "CZ-32-STR-L-MMKS.yaml", + "CZ-32-STR-L-OKVS.yaml", + "CZ-32-SUL-L-ZKS.yaml", + "CZ-32-SUS-L-KDDS.yaml", + "CZ-32-SUS-L-MKH.yaml", + "CZ-32-SUS-M-MSSK.yaml", + "CZ-32-SUS-M-MSVS.yaml", + "CZ-32-SVI-L-MKVK.yaml", + "CZ-32-SVI-L-OKJ.yaml", + "CZ-32-SVI-L-OKVCP.yaml", + "CZ-32-SVO-L-MKS.yaml", + "CZ-32-TAC-L-MKDU.yaml", + "CZ-32-TAC-L-MKL.yaml", + "CZ-32-TAC-L-MKLUT.yaml", + "CZ-32-TAC-L-MKSTK.yaml", + "CZ-32-TES-L-MKT.yaml", + "CZ-32-TIS-L-OKVT.yaml", + "CZ-32-TLU-L-MKVT.yaml", + "CZ-32-TRE-L-OKCB.yaml", + "CZ-32-TRE-L-OKD.yaml", + "CZ-32-TRE-L-OKH.yaml", + "CZ-32-TRE-L-OKN.yaml", + "JP-11-TOD-L-TLS-todashiritsu_library_shimotodaminamibunshitsu.yaml", + "JP-11-TOD-L-TLS.yaml", + "JP-11-TOD-M-TCM.yaml", + "JP-11-TOD-M-TKHSSGC.yaml", + "JP-11-TOK-L-AGJCL.yaml", + "JP-11-TOK-L-N.yaml", + "JP-11-TOK-L-NLT.yaml", + "JP-11-TOK-L-NUCL.yaml", + "JP-11-TOK-L-SHFFIC.yaml", + "JP-11-TOK-L-TL.yaml", + "JP-11-TOK-L-TLA.yaml", + "JP-11-TOK-L-TLS-tokorozawashiritsutokorozawa_library_shintokorozaw.yaml", + "JP-11-TOK-L-TLS.yaml", + "JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tomiokabunkan.yaml", + "JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tsubakiminebu.yaml", + "JP-11-TOK-L-TLT.yaml", + "JP-11-TOK-L-TLY.yaml", + "JP-11-TOK-L-WL.yaml", + "JP-11-TOK-M-TAM.yaml", + "JP-11-TOK-M-TFM.yaml", + "JP-11-TOM-M-FSTHFM.yaml", + "JP-11-TOS-L-KL.yaml", + "JP-11-TOS-L-TPL.yaml", + "JP-11-TSU-L-TL.yaml", + "JP-11-TSU-L-TLF.yaml", + "JP-11-TSU-L-TLH.yaml", + "JP-11-TSU-L-TLK.yaml", + "JP-11-TSU-L-TLM.yaml", + "JP-11-TSU-L-TLN.yaml", + "JP-11-TSU-L-TLO.yaml", + "CZ-32-TRE-L-OKP.yaml", + "CZ-32-TRE-L-OKVN.yaml", + "CZ-32-TRE-L-OKZ.yaml", + "CZ-32-TRN-L-OKK.yaml", + "CZ-32-TRN-L-OKVT.yaml", + "CZ-32-TYM-L-MKT.yaml", + "CZ-32-UNE-L-MKU.yaml", + "CZ-32-UNE-L-OKB.yaml", + "CZ-32-UTE-L-MKU.yaml", + "CZ-32-UTU-L-MKVR.yaml", + "CZ-32-VEJ-L-OKV-obecni_knihovna_vejvanov.yaml", + "CZ-32-VEJ-L-OKV.yaml", + "CZ-32-VEL-L-MKV.yaml", + "CZ-32-VOL-L-MKVV.yaml", + "CZ-32-VRC-L-OKV.yaml", + "CZ-32-VSE-L-MKV.yaml", + "CZ-32-VSE-L-OKV.yaml", + "CZ-32-VSE-L-OKVV.yaml", + "CZ-32-ZAD-L-CZCPZC.yaml", + "CZ-32-ZBI-L-MKC.yaml" ], - "last_index": 69 + "last_index": 19 } \ No newline at end of file diff --git a/data/custodian/AR-A-CAP-L-BPAA.yaml b/data/custodian/AR-A-CAP-L-BPAA.yaml index a60bd631ee..7018e429fe 100644 --- a/data/custodian/AR-A-CAP-L-BPAA.yaml +++ b/data/custodian/AR-A-CAP-L-BPAA.yaml @@ -248,3 +248,28 @@ wikidata_enrichment: description: biblioteca popular en Trelew, Chubut, Argentina enrichment_timestamp: '2025-01-13T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:42:48.152690+00:00' + source_url: https://www.facebook.com/p/Biblioteca-Popular-Agustin-Alvarez-100064524949800/?locale=es_LA + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.xx.fbcdn.net/rsrc.php/y1/r/ay1hV6OlegS.ico + source_url: https://www.facebook.com/p/Biblioteca-Popular-Agustin-Alvarez-100064524949800/?locale=es_LA + css_selector: '#facebook > head > link' + retrieved_on: '2025-12-24T01:42:48.152690+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://scontent.frtm1-1.fna.fbcdn.net/v/t39.30808-1/307753068_467536722073822_346580636744390745_n.jpg?stp=dst-jpg_tt6&cstp=mx227x227&ctp=s227x227&_nc_cat=110&ccb=1-7&_nc_sid=3ab345&_nc_ohc=1_Rx27jI7TUQ7kNvwGwgwPM&_nc_oc=AdlzwO-3m3_i7rK3UyIhT8f0zOV_oPmR_qUlbMGXdrMYDLxAj9iBjOHx2XHiuLwv_mM&_nc_zt=24&_nc_ht=scontent.frtm1-1.fna&_nc_gid=HZU9M6njy1zx67_VhF2Paw&oh=00_AflWa03dyJNVYCLmr3OSYe0h_IK__I7AoAafGgNoupNXeA&oe=69510664 + source_url: https://www.facebook.com/p/Biblioteca-Popular-Agustin-Alvarez-100064524949800/?locale=es_LA + css_selector: '#facebook > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-24T01:42:48.152690+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/AR-A-CHO-L-BPNA.yaml b/data/custodian/AR-A-CHO-L-BPNA.yaml index 91e4e0fc52..d5ea9819ac 100644 --- a/data/custodian/AR-A-CHO-L-BPNA.yaml +++ b/data/custodian/AR-A-CHO-L-BPNA.yaml @@ -277,3 +277,28 @@ wikidata_enrichment: description: biblioteca popular en Choele Choel, Rio Negro, Argentina enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:43:01.478026+00:00' + source_url: https://www.facebook.com/biblioteca.nicolasavellaneda.18 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.xx.fbcdn.net/rsrc.php/y1/r/ay1hV6OlegS.ico + source_url: https://www.facebook.com/biblioteca.nicolasavellaneda.18 + css_selector: '#facebook > head > link' + retrieved_on: '2025-12-24T01:43:01.478026+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://scontent.frtm1-3.fna.fbcdn.net/v/t39.30808-1/460638074_4787062361518981_4648199963899850751_n.jpg?stp=dst-jpg_tt6&cstp=mx720x720&ctp=s720x720&_nc_cat=108&ccb=1-7&_nc_sid=3ab345&_nc_ohc=_I1n4SPDT_MQ7kNvwELtrIL&_nc_oc=Adkzf7AFH9T8hTe_43FVwzJVbyNZiAKMn-3FO4bUOqsnB5AF9LNVTXlRUqE_dPPVEsY&_nc_zt=24&_nc_ht=scontent.frtm1-3.fna&_nc_gid=OJ9gjqFc6oDpvmhq2inbaA&oh=00_Afk8yGJTBFLfG-L139Egtt7jXQDZy2lvo-LZUfm2OsDYcQ&oe=69512613 + source_url: https://www.facebook.com/biblioteca.nicolasavellaneda.18 + css_selector: '#facebook > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-24T01:43:01.478026+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/AR-A-COR-L-BPCJR.yaml b/data/custodian/AR-A-COR-L-BPCJR.yaml index 5a011368ae..ef9a7be83f 100644 --- a/data/custodian/AR-A-COR-L-BPCJR.yaml +++ b/data/custodian/AR-A-COR-L-BPCJR.yaml @@ -246,3 +246,28 @@ wikidata_enrichment: description: biblioteca popular en Coronda , Santa Fe, Argentina enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:43:10.732967+00:00' + source_url: https://bibliocoronda.ar + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://bibliocoronda.ar/img/logo_bib_blanco.png + source_url: https://bibliocoronda.ar + css_selector: '[document] > html.hydrated > head > link' + retrieved_on: '2025-12-24T01:43:10.732967+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://scontent.fsfn8-1.fna.fbcdn.net/v/t39.30808-6/396723429_742669421237882_8810191542109674778_n.jpg?_nc_cat=101&ccb=1-7&_nc_sid=5f2048&_nc_eui2=AeGjmjIgFcLYx2yaZnWKvfMsJPR0uWD5Zv8k9HS5YPlm__fbrppFtzvRiOpbQQjWWKRr2AQUN0N0TqvmyldgeuUg&_nc_ohc=mD0U1lzlO6EAX8PrwRn&_nc_ht=scontent.fsfn8-1.fna&oh=00_AfDDkW6NrsvcwV1cfwu2LRe_TQxUp4j0nCkdIJZ4Ygnu7Q&oe=6552386B + source_url: https://bibliocoronda.ar + css_selector: '[document] > html.hydrated > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:43:10.732967+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/AR-A-GEN-L-BPGC.yaml b/data/custodian/AR-A-GEN-L-BPGC.yaml index 6044b6adbc..c6bcaa922a 100644 --- a/data/custodian/AR-A-GEN-L-BPGC.yaml +++ b/data/custodian/AR-A-GEN-L-BPGC.yaml @@ -216,3 +216,30 @@ wikidata_enrichment: description: biblioteca popular en General Conesa, Rio Negro, Argentina enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:43:24.503886+00:00' + source_url: http://biblioconesa.blogspot.com + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://3.bp.blogspot.com/_wd5l544E_JY/S75BonS0-cI/AAAAAAAAAAk/SW6VyfXgRmM/S1600-R/logo+biblioteca+muestra+75%25.jpg + source_url: http://biblioconesa.blogspot.com + css_selector: '#Header1_headerimg' + retrieved_on: '2025-12-24T01:43:24.503886+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Biblioteca Popular General Conesa + - claim_type: favicon_url + claim_value: http://biblioconesa.blogspot.com/favicon.ico + source_url: http://biblioconesa.blogspot.com + css_selector: '[document] > html.v2 > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:43:24.503886+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/AR-A-REC-L-BPMGO.yaml b/data/custodian/AR-A-REC-L-BPMGO.yaml index 9291935b28..af823e3d6a 100644 --- a/data/custodian/AR-A-REC-L-BPMGO.yaml +++ b/data/custodian/AR-A-REC-L-BPMGO.yaml @@ -259,3 +259,28 @@ wikidata_enrichment: description: biblioteca popular en Reconquista , Santa Fe, Argentina enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:43:49.647147+00:00' + source_url: https://www.facebook.com/biblio.manuelobligado + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.xx.fbcdn.net/rsrc.php/y1/r/ay1hV6OlegS.ico + source_url: https://www.facebook.com/biblio.manuelobligado + css_selector: '#facebook > head > link' + retrieved_on: '2025-12-24T01:43:49.647147+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://scontent.frtm1-1.fna.fbcdn.net/v/t39.30808-1/275184221_4804600952927930_4281457691424041595_n.jpg?stp=dst-jpg_tt6&cstp=mx960x720&ctp=s960x720&_nc_cat=109&ccb=1-7&_nc_sid=3ab345&_nc_ohc=QlO2R1ecJSwQ7kNvwHCxjA_&_nc_oc=AdmQTjDU6VZsooip7eSkD-DmEblIjucCi1Y5s10HKwe6FGev_uPolkEkSRoPEq7p328&_nc_zt=24&_nc_ht=scontent.frtm1-1.fna&_nc_gid=gqb7KdSvXdPxkarGhA1z4g&oh=00_AflgqvJc1l-4XBH3tz3ESEEd1Jef3vWLJgE0QZzu9njS4Q&oe=695113C4 + source_url: https://www.facebook.com/biblio.manuelobligado + css_selector: '#facebook > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-24T01:43:49.647147+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/AR-A-ROS-L-BPEAMR.yaml b/data/custodian/AR-A-ROS-L-BPEAMR.yaml index a0d4882cc3..f4ede503e1 100644 --- a/data/custodian/AR-A-ROS-L-BPEAMR.yaml +++ b/data/custodian/AR-A-ROS-L-BPEAMR.yaml @@ -254,3 +254,28 @@ wikidata_enrichment: enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup notes: No Wikidata entry found for this institution +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:43:54.890021+00:00' + source_url: https://www.facebook.com/biblioteca.delconsejodemujeres + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.xx.fbcdn.net/rsrc.php/y1/r/ay1hV6OlegS.ico + source_url: https://www.facebook.com/biblioteca.delconsejodemujeres + css_selector: '#facebook > head > link' + retrieved_on: '2025-12-24T01:43:54.890021+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://scontent.frtm1-1.fna.fbcdn.net/v/t39.30808-1/284115825_3420911908143910_3508003053705555747_n.jpg?stp=dst-jpg_tt6&cstp=mx1080x566&ctp=s1080x566&_nc_cat=110&ccb=1-7&_nc_sid=3ab345&_nc_ohc=WI6v901Ws7sQ7kNvwEPWaEn&_nc_oc=AdkFyhI_rQcmt9KchTdOj7erw3VwL_YLrXVaOSEvGBj3De6vybLvhhD5BGh09PFqImY&_nc_zt=24&_nc_ht=scontent.frtm1-1.fna&_nc_gid=_zD42V9hZn2iSPAumlngtg&oh=00_Afm33tpm8oOkOhR9LE7leyB8wRvmfN46diWdDdPWheentA&oe=695120D6 + source_url: https://www.facebook.com/biblioteca.delconsejodemujeres + css_selector: '#facebook > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-24T01:43:54.890021+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/AR-A-ROS-L-BPPAV.yaml b/data/custodian/AR-A-ROS-L-BPPAV.yaml index c990c64a86..aca0d44381 100644 --- a/data/custodian/AR-A-ROS-L-BPPAV.yaml +++ b/data/custodian/AR-A-ROS-L-BPPAV.yaml @@ -194,3 +194,31 @@ wikidata_enrichment: description: biblioteca popular en Rosario, Santa Fe, Argentina enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:44:07.002160+00:00' + source_url: https://www.conabip.gob.ar/bipop/1534 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.conabip.gob.ar/sites/default/files/Logo-2.png + source_url: https://www.conabip.gob.ar/bipop/1534 + css_selector: '#navbar > div.container > div.navbar-header > a.logo.navbar-btn + > img' + retrieved_on: '2025-12-24T01:44:07.002160+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Inicio + - claim_type: favicon_url + claim_value: https://www.conabip.gob.ar/sites/all/themes/cnbp/favicon.ico + source_url: https://www.conabip.gob.ar/bipop/1534 + css_selector: '[document] > html.js.bootstrap-anchors-processed > head > link' + retrieved_on: '2025-12-24T01:44:07.002160+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/AR-A-ROS-L-BPPPED.yaml b/data/custodian/AR-A-ROS-L-BPPPED.yaml index b77586da09..8405b8c6b8 100644 --- a/data/custodian/AR-A-ROS-L-BPPPED.yaml +++ b/data/custodian/AR-A-ROS-L-BPPPED.yaml @@ -262,3 +262,30 @@ wikidata_enrichment: description: biblioteca Popular y Pedagógica, ubicada en Rosario, Santa Fe, Argentina enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:44:16.144836+00:00' + source_url: https://bibliotecaediaz.com.ar + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://bibliotecaediaz.com.ar/e107_media/9f7c594d44/icons/180x180_favicon.png + source_url: https://bibliotecaediaz.com.ar + css_selector: '[document] > html.fontawesome-i2svg-active.fontawesome-i2svg-complete + > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T01:44:16.144836+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://bibliotecaediaz.com.ar/media/img/800x0/2024-08/Dise_o_sin_t_tulo_1_.png + source_url: https://bibliotecaediaz.com.ar + css_selector: '[document] > html.fontawesome-i2svg-active.fontawesome-i2svg-complete + > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-24T01:44:16.144836+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 6 diff --git a/data/custodian/AR-A-SAN-L-BPGSM.yaml b/data/custodian/AR-A-SAN-L-BPGSM.yaml index cb231dfe59..aa93919b3a 100644 --- a/data/custodian/AR-A-SAN-L-BPGSM.yaml +++ b/data/custodian/AR-A-SAN-L-BPGSM.yaml @@ -356,3 +356,22 @@ location: formatted_address: Matheu 3728, B1650CSL Gran Buenos Aires, Provincia de Buenos Aires, Argentina normalization_timestamp: '2025-12-09T06:47:53.648220+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T03:40:19.161227+00:00' + source_url: https://www.mendoza.gov.ar/bibliotecageneralsanmartin + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://mza-dicaws-portal-uploads-media-prod.s3.amazonaws.com/principal/uploads/2024/12/cropped-Gob-AC_icono-del-sitio-azul-180x180.png + source_url: https://www.mendoza.gov.ar/bibliotecageneralsanmartin + css_selector: '[document] > html.js > head > link:nth-of-type(24)' + retrieved_on: '2025-12-24T03:40:19.161227+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/AR-A-TOS-L-BPJ.yaml b/data/custodian/AR-A-TOS-L-BPJ.yaml index 0107c2c8e1..c1783676d0 100644 --- a/data/custodian/AR-A-TOS-L-BPJ.yaml +++ b/data/custodian/AR-A-TOS-L-BPJ.yaml @@ -255,3 +255,28 @@ wikidata_enrichment: enrichment_method: manual_wikidata_lookup notes: 'Note: Google Maps enrichment found wrong library (in Castelar, Buenos Aires). Wikidata Q64337124 is the correct library in Tostado, Santa Fe.' +logo_enrichment: + enrichment_timestamp: '2025-12-24T04:40:13.613072+00:00' + source_url: https://sites.google.com/view/bp9dejuliocast/inicio + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lh3.googleusercontent.com/sitesv/AAzXCkeDB3EklzDqkplGvRhi9Fl_FzqKnuCJuc91cBMGxWd32jWQzorv3doB7HCwe_Sq1GmmneU7D60qcJQzIuVwbUeagsQEcDBz--i6XAq4JACRYwCD4p5uzlnjLQ4gFFj6Iwn-AZtA-ZPb_kjameD-6p2QvC_X-CRnnK26kzLlipLBViX71AGk5OTab-apYzWTnLznnqy7JID3f6jEWcIpaQ + source_url: https://sites.google.com/view/bp9dejuliocast/inicio + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T04:40:13.613072+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://lh3.googleusercontent.com/sitesv/AAzXCkcZqne4SoCxQVtCTe7DTbir-KSU6qvHv9WzTkvDuClgnnRM_Jm_1JKUPOZ4DBdcTtwXGCz1eDMv69ubBbgAOoe4CqrciGI9217tpP6kKuF3Ow2s43wuj6VkJh24BXpQYVnU2q8nwrGlpM-4nLZxjer1Wehi4ZPt1SiscYxF_6a-uurm9iPkXTa0nhk=w16383 + source_url: https://sites.google.com/view/bp9dejuliocast/inicio + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-24T04:40:13.613072+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/AR-B-ALM-L-BPMEA.yaml b/data/custodian/AR-B-ALM-L-BPMEA.yaml index f860d038b0..3c7172af7f 100644 --- a/data/custodian/AR-B-ALM-L-BPMEA.yaml +++ b/data/custodian/AR-B-ALM-L-BPMEA.yaml @@ -292,3 +292,22 @@ wikidata_enrichment: description: biblioteca popular en Adrogué, Buenos Aires, Argentina enrichment_timestamp: '2025-12-22T00:00:00Z' enrichment_method: manual_wikidata_lookup +logo_enrichment: + enrichment_timestamp: '2025-12-24T05:41:15.003104+00:00' + source_url: http://www.biblioadrogue.com.ar + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.biblioadrogue.com.ar/favicon.ico + source_url: http://www.biblioadrogue.com.ar + css_selector: '[document] > html.v2 > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T05:41:15.003104+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/AR-B-JUA-L-BPDFS.yaml b/data/custodian/AR-B-JUA-L-BPDFS.yaml index f19b167c91..b27532dc80 100644 --- a/data/custodian/AR-B-JUA-L-BPDFS.yaml +++ b/data/custodian/AR-B-JUA-L-BPDFS.yaml @@ -268,3 +268,22 @@ location: street_address: Bucarelli 2583 formatted_address: Bucarelli 2583, C1431DRC Cdad. Autónoma de Buenos Aires, Argentina normalization_timestamp: '2025-12-09T06:47:49.176793+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:36.983843+00:00' + source_url: http://biposarmiento.blogspot.com.ar + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://biposarmiento.blogspot.com/favicon.ico + source_url: http://biposarmiento.blogspot.com.ar + css_selector: '[document] > html.v2 > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T10:34:36.983843+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/AR-B-LOB-L-BPLDFS.yaml b/data/custodian/AR-B-LOB-L-BPLDFS.yaml index 8a806604c5..4ca5f7887b 100644 --- a/data/custodian/AR-B-LOB-L-BPLDFS.yaml +++ b/data/custodian/AR-B-LOB-L-BPLDFS.yaml @@ -263,3 +263,28 @@ location: formatted_address: Domingo Faustino Sarmiento 1503, B1663EDK San Miguel, Provincia de Buenos Aires, Argentina normalization_timestamp: '2025-12-09T06:47:49.631334+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:08.508188+00:00' + source_url: https://instagram.com/bibliosarmiento?igshid=MWZjMTM2ODFkZg== + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.cdninstagram.com/rsrc.php/v4/yG/r/De-Dwpd5CHc.png + source_url: https://instagram.com/bibliosarmiento?igshid=MWZjMTM2ODFkZg== + css_selector: '[document] > html._9dls._ar44 > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T10:35:08.508188+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://scontent.cdninstagram.com/v/t51.2885-19/178662769_165623698812154_31517715601300636_n.jpg?stp=dst-jpg_s100x100_tt6&_nc_cat=100&ccb=7-5&_nc_sid=bf7eb4&efg=eyJ2ZW5jb2RlX3RhZyI6InByb2ZpbGVfcGljLnd3dy43ODIuQzMifQ%3D%3D&_nc_ohc=C0UYtZWB-zkQ7kNvwFbwx_S&_nc_oc=AdlfcZAeWsbQb9nQXFe4DWLpGRUc543YMDk6T6xhIzmnNdLOoeW45-nLPLkKXlSNyxs&_nc_zt=24&_nc_ht=scontent.cdninstagram.com&oh=00_AfkUNxp4JEMEx1M7KTcxSS4o4DUQ1w_jRoixU5fGCNG7fA&oe=695183DA + source_url: https://instagram.com/bibliosarmiento?igshid=MWZjMTM2ODFkZg== + css_selector: '[document] > html._9dls._ar44 > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-24T10:35:08.508188+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 7 diff --git a/data/custodian/AR-B-LOM-L-BPAM.yaml b/data/custodian/AR-B-LOM-L-BPAM.yaml index d0edc0965c..18d2573d85 100644 --- a/data/custodian/AR-B-LOM-L-BPAM.yaml +++ b/data/custodian/AR-B-LOM-L-BPAM.yaml @@ -282,3 +282,28 @@ location: formatted_address: B1832DGB, Italia 44, B1834 Lomas de Zamora, Provincia de Buenos Aires, Argentina normalization_timestamp: '2025-12-09T06:47:49.662574+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:18.245104+00:00' + source_url: https://m.facebook.com/Biblioteca-Popular-Antonio-Mentruyt-321801954821/?locale2=es_LA + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.xx.fbcdn.net/rsrc.php/y1/r/ay1hV6OlegS.ico + source_url: https://m.facebook.com/Biblioteca-Popular-Antonio-Mentruyt-321801954821/?locale2=es_LA + css_selector: '#facebook > head > link' + retrieved_on: '2025-12-24T10:35:18.245104+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://scontent.frtm1-1.fna.fbcdn.net/v/t39.30808-1/326784659_1422617114940773_4284165406403130298_n.jpg?stp=dst-jpg_tt6&cstp=mx1536x1536&ctp=s720x720&_nc_cat=111&ccb=1-7&_nc_sid=3ab345&_nc_ohc=6vhJoNKoC9QQ7kNvwGkPj6k&_nc_oc=AdnY6Zl8W4XK2lrzQBbOcYJxr_A-Yv0VEZncfzzaErnrtTdk6dYaRmhFbqNvRJgVGOU&_nc_zt=24&_nc_ht=scontent.frtm1-1.fna&_nc_gid=xtRiF4i2iiO6rgDGubtqFw&oh=00_Afnhyd8thCVC23Uf7aXk9YJNUvFpF7OFym_upFZxLNqMAg&oe=6951ACBD + source_url: https://m.facebook.com/Biblioteca-Popular-Antonio-Mentruyt-321801954821/?locale2=es_LA + css_selector: '#facebook > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-24T10:35:18.245104+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/AR-B-LUJ-L-BPA.yaml b/data/custodian/AR-B-LUJ-L-BPA.yaml index 3209b1dccf..7ccbb22af4 100644 --- a/data/custodian/AR-B-LUJ-L-BPA.yaml +++ b/data/custodian/AR-B-LUJ-L-BPA.yaml @@ -310,3 +310,28 @@ location: street_address: Luján formatted_address: B6700 Luján, Buenos Aires Province, Argentina normalization_timestamp: '2025-12-09T06:47:49.699042+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:25.736171+00:00' + source_url: https://www.instagram.com/bibliotecaameghinook?igsh=dzZkMjkyczFqMDVh + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.cdninstagram.com/rsrc.php/v4/yG/r/De-Dwpd5CHc.png + source_url: https://www.instagram.com/bibliotecaameghinook?igsh=dzZkMjkyczFqMDVh + css_selector: '[document] > html._9dls._ar44 > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T10:35:25.736171+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://scontent.cdninstagram.com/v/t51.2885-19/23733778_379798469110577_2105609056893796352_n.jpg?stp=dst-jpg_s100x100_tt6&_nc_cat=107&ccb=7-5&_nc_sid=bf7eb4&efg=eyJ2ZW5jb2RlX3RhZyI6InByb2ZpbGVfcGljLnd3dy44NzUuQzMifQ%3D%3D&_nc_ohc=FbNJ3hIvflwQ7kNvwHnZcYO&_nc_oc=AdmquvRKIXymrktUYj6rwaX1Je2Sw5cM5d2mcrWM4aEFp6fKj0yPeCAoYvQzD9-mVF8&_nc_zt=24&_nc_ht=scontent.cdninstagram.com&oh=00_Afk13zplTKT5hRfvOsNhd0jJrqHqhB97HZWTS8A-hi8otg&oe=69519C21 + source_url: https://www.instagram.com/bibliotecaameghinook?igsh=dzZkMjkyczFqMDVh + css_selector: '[document] > html._9dls._ar44 > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-24T10:35:25.736171+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 7 diff --git a/data/custodian/AR-B-OLA-L-BPAC.yaml b/data/custodian/AR-B-OLA-L-BPAC.yaml index 1ae2f6287f..198d639858 100644 --- a/data/custodian/AR-B-OLA-L-BPAC.yaml +++ b/data/custodian/AR-B-OLA-L-BPAC.yaml @@ -252,3 +252,22 @@ location: formatted_address: COI, Alsina 2659, B7400 Olavarría, Provincia de Buenos Aires, Argentina normalization_timestamp: '2025-12-09T06:47:49.815346+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:33.513901+00:00' + source_url: https://www.instagram.com/biblio_armando_collinet + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.cdninstagram.com/rsrc.php/v4/yG/r/De-Dwpd5CHc.png + source_url: https://www.instagram.com/biblio_armando_collinet + css_selector: '[document] > html._9dls._ar44 > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T10:36:33.513901+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 7 diff --git a/data/custodian/CZ-32-BRI-M-MHB.yaml b/data/custodian/CZ-32-BRI-M-MHB.yaml index 050224168b..fcdfde6b4f 100644 --- a/data/custodian/CZ-32-BRI-M-MHB.yaml +++ b/data/custodian/CZ-32-BRI-M-MHB.yaml @@ -250,3 +250,22 @@ location: youtube_status: NOT_FOUND youtube_search_query: Muzeum Horní Bříza official youtube_search_timestamp: '2025-12-09T09:30:36.819150+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T05:29:47.897048+00:00' + source_url: https://muzeum.hornibriza.eu + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://muzeum.hornibriza.eu/skins/muzeum.hornibriza.eu_lego2/favicons/safari-pinned-tab.svg + source_url: https://muzeum.hornibriza.eu + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T05:29:47.897048+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-BUK-L-MKB.yaml b/data/custodian/CZ-32-BUK-L-MKB.yaml index 8d20582c77..ec7ed66bdc 100644 --- a/data/custodian/CZ-32-BUK-L-MKB.yaml +++ b/data/custodian/CZ-32-BUK-L-MKB.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-BUK-L-MKB - valid_from: "2025-12-10T09:47:07Z" + valid_from: '2025-12-10T09:47:07Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-BUK-L-MKB valid_from: null - valid_to: "2025-12-10T09:47:07Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:07Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-BUK-L-MKB ghcid_numeric: 6958571939662859999 valid_from: '2025-12-06T23:37:31.309857+00:00' @@ -210,3 +210,28 @@ location: postal_code: 342 01 street_address: Bukovník 60 normalization_timestamp: '2025-12-09T10:53:42.707730+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T05:41:21.389869+00:00' + source_url: https://www.bukovnik.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=ec37056da5 + source_url: https://www.bukovnik.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T05:41:21.389869+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://ec37056da5.clvaw-cdnwnd.com/338f0c5a2b2340598e9609456dc0cd25/200000020-284b2284b4/700/book-2184568_960_720.jpg?ph=ec37056da5 + source_url: https://www.bukovnik.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T05:41:21.389869+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-CAC-L-MKC.yaml b/data/custodian/CZ-32-CAC-L-MKC.yaml index 72cd6d3a6a..7ba1d70d19 100644 --- a/data/custodian/CZ-32-CAC-L-MKC.yaml +++ b/data/custodian/CZ-32-CAC-L-MKC.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-CAC-L-MKC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-CAC-L-MKC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-CAC-L-MKC ghcid_numeric: 13214319303465419566 valid_from: '2025-12-08T11:21:38.139557+00:00' @@ -216,3 +216,22 @@ location: postal_code: 339 01 street_address: Čachrov 5 normalization_timestamp: '2025-12-09T10:53:42.733847+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T05:41:27.055191+00:00' + source_url: https://www.cachrov.info/mestyscachrov/knihovna.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.cachrov.info/mestyscachrov/icon/safari-pinned-tab.svg + source_url: https://www.cachrov.info/mestyscachrov/knihovna.asp + css_selector: '[document] > html.touch-no.focus-within > head > link:nth-of-type(19)' + retrieved_on: '2025-12-24T05:41:27.055191+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-CER-L-OKVC.yaml b/data/custodian/CZ-32-CER-L-OKVC.yaml index 28f178646d..4697a07ec4 100644 --- a/data/custodian/CZ-32-CER-L-OKVC.yaml +++ b/data/custodian/CZ-32-CER-L-OKVC.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-CER-L-OKVC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-CER-L-OKVC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-CER-L-OKVC ghcid_numeric: 8921761871371804360 valid_from: '2025-12-08T11:21:32.993613+00:00' @@ -216,3 +216,22 @@ location: postal_code: 349 58 street_address: nám. 1. máje 327 normalization_timestamp: '2025-12-09T10:53:42.868461+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T06:42:16.708708+00:00' + source_url: https://www.cernosin.cz/mesto/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.cernosin.cz/skins/cernosin.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.cernosin.cz/mesto/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T06:42:16.708708+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-CES-L-MKCK.yaml b/data/custodian/CZ-32-CES-L-MKCK.yaml index 865e57383d..e50b0d1bb1 100644 --- a/data/custodian/CZ-32-CES-L-MKCK.yaml +++ b/data/custodian/CZ-32-CES-L-MKCK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-CES-L-MKCK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-CES-L-MKCK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-CES-L-MKCK ghcid_numeric: 8847476567208740741 valid_from: '2025-12-08T11:21:34.941405+00:00' @@ -219,3 +219,22 @@ location: postal_code: 345 32 street_address: Česká Kubice 60 normalization_timestamp: '2025-12-09T10:53:42.901862+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T06:42:24.199553+00:00' + source_url: https://www.kubice.cz/kultura-1/sport-a-volny-cas/obecni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kubice.cz/skins/kubice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.kubice.cz/kultura-1/sport-a-volny-cas/obecni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T06:42:24.199553+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-CHE-L-MKC.yaml b/data/custodian/CZ-32-CHE-L-MKC.yaml index 921a08b038..c130080718 100644 --- a/data/custodian/CZ-32-CHE-L-MKC.yaml +++ b/data/custodian/CZ-32-CHE-L-MKC.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-CHE-L-MKC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-CHE-L-MKC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-CHE-L-MKC ghcid_numeric: 15364932705580963763 valid_from: '2025-12-06T23:37:31.938480+00:00' @@ -214,3 +214,22 @@ location: postal_code: 338 06 street_address: Cheznovice 16 normalization_timestamp: '2025-12-09T10:53:42.936539+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T07:15:43.006238+00:00' + source_url: https://www.cheznovice.eu/mistni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.cheznovice.eu/wp-content/uploads/2018/03/cropped-znak-CH-180x180.jpg + source_url: https://www.cheznovice.eu/mistni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(19)' + retrieved_on: '2025-12-24T07:15:43.006238+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-CHO-L-VKVHVC.yaml b/data/custodian/CZ-32-CHO-L-VKVHVC.yaml index a5038ddf1f..9473f6e34e 100644 --- a/data/custodian/CZ-32-CHO-L-VKVHVC.yaml +++ b/data/custodian/CZ-32-CHO-L-VKVHVC.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-CHO-L-VKVHVC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-CHO-L-VKVHVC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-CHO-L-VKVHVC ghcid_numeric: 15995548786081647935 valid_from: '2025-12-06T23:37:31.509013+00:00' @@ -207,3 +207,30 @@ location: postal_code: 332 14 street_address: ul. Osvobozených politických vězňů 313 normalization_timestamp: '2025-12-09T10:53:43.074559+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T09:45:19.579906+00:00' + source_url: https://knihovnachotesov.webk.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://knihovnachotesov.webk.cz/themes/new/blue/logo3.png + source_url: https://knihovnachotesov.webk.cz + css_selector: '#outpage > header.tmava > a > img.mobile_display_none' + retrieved_on: '2025-12-24T09:45:19.579906+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Na úvodní stranu + - claim_type: favicon_url + claim_value: https://knihovnachotesov.webk.cz/themes/new/favicon.ico + source_url: https://knihovnachotesov.webk.cz + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T09:45:19.579906+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-CHV-L-MKVC.yaml b/data/custodian/CZ-32-CHV-L-MKVC.yaml index 58c20c2ccb..449b5b9976 100644 --- a/data/custodian/CZ-32-CHV-L-MKVC.yaml +++ b/data/custodian/CZ-32-CHV-L-MKVC.yaml @@ -210,3 +210,22 @@ location: postal_code: 332 05 street_address: Chválenice 21 normalization_timestamp: '2025-12-09T10:53:43.226689+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:19:52.104986+00:00' + source_url: https://www.chvalenice.cz/obec-107/knihovna-ve-chvalenicich + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.chvalenice.cz/skins/chvalenice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.chvalenice.cz/obec-107/knihovna-ve-chvalenicich + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:19:52.104986+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-CIM-L-MKC.yaml b/data/custodian/CZ-32-CIM-L-MKC.yaml index 0259c3e92a..fc44e344ab 100644 --- a/data/custodian/CZ-32-CIM-L-MKC.yaml +++ b/data/custodian/CZ-32-CIM-L-MKC.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-CIM-L-MKC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-CIM-L-MKC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-CIM-L-MKC ghcid_numeric: 2601963999679758767 valid_from: '2025-12-08T11:21:33.871017+00:00' @@ -215,3 +215,28 @@ location: postal_code: 342 01 street_address: Čimice 20 normalization_timestamp: '2025-12-09T10:53:43.258608+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:19:58.821619+00:00' + source_url: https://www.cimice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=ecce2ee61f + source_url: https://www.cimice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T10:19:58.821619+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://ecce2ee61f.clvaw-cdnwnd.com/a5bb9207122134a4f6c9ee9443d43783/200000003-a46c9a46cc/700/book-1169437_960_720.jpg?ph=ecce2ee61f + source_url: https://www.cimice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T10:19:58.821619+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-DAR-L-MKD.yaml b/data/custodian/CZ-32-DAR-L-MKD.yaml index 59e4abd59c..d3c6f98dc4 100644 --- a/data/custodian/CZ-32-DAR-L-MKD.yaml +++ b/data/custodian/CZ-32-DAR-L-MKD.yaml @@ -36,13 +36,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DAR-L-MKD - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DAR-L-MKD valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DAR-L-MKD ghcid_numeric: 12010678905103653758 valid_from: '2025-12-06T23:37:20.222125+00:00' @@ -100,8 +100,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Místní knihovna Darmyšl @@ -215,3 +215,22 @@ location: geonames_id: 3077266 geonames_name: Darmyšl feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:31:24.944285+00:00' + source_url: https://www.obecstaresedlo.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obecstaresedlo.cz/skins/obecstaresedlo.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.obecstaresedlo.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:31:24.944285+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-DLO-L-MKDV.yaml b/data/custodian/CZ-32-DLO-L-MKDV.yaml index 98fad35bec..f4d93f0009 100644 --- a/data/custodian/CZ-32-DLO-L-MKDV.yaml +++ b/data/custodian/CZ-32-DLO-L-MKDV.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DLO-L-MKDV - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DLO-L-MKDV valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DLO-L-MKDV ghcid_numeric: 96241799794609438 valid_from: '2025-12-06T23:37:31.364928+00:00' @@ -214,3 +214,28 @@ location: postal_code: 341 91 street_address: Dlouhá Ves 155 normalization_timestamp: '2025-12-09T10:53:43.316167+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:31:33.006446+00:00' + source_url: https://www.dlves.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=f083548ca1 + source_url: https://www.dlves.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T10:31:33.006446+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://f083548ca1.clvaw-cdnwnd.com/d57d328eb6018d3d5b5b36c766d42e93/200000013-407d8407db/700/image-crop-200000001-2.jpeg?ph=f083548ca1 + source_url: https://www.dlves.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T10:31:33.006446+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-DNE-L-OKD.yaml b/data/custodian/CZ-32-DNE-L-OKD.yaml index 439380b533..42590b60d9 100644 --- a/data/custodian/CZ-32-DNE-L-OKD.yaml +++ b/data/custodian/CZ-32-DNE-L-OKD.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DNE-L-OKD - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DNE-L-OKD valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DNE-L-OKD ghcid_numeric: 3594806256678443852 valid_from: '2025-12-06T23:37:31.663940+00:00' @@ -207,3 +207,22 @@ location: postal_code: 334 43 street_address: Dnešice 53 normalization_timestamp: '2025-12-09T10:53:43.343309+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:31:40.259356+00:00' + source_url: https://www.dnesice.cz/obec/mistni-knihovna-1 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dnesice.cz/skins/dnesice.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.dnesice.cz/obec/mistni-knihovna-1 + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:31:40.259356+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-DOB-L-MKD-mistni_knihovna_dobriv.yaml b/data/custodian/CZ-32-DOB-L-MKD-mistni_knihovna_dobriv.yaml index 074ee7bffb..87a3e3a78b 100644 --- a/data/custodian/CZ-32-DOB-L-MKD-mistni_knihovna_dobriv.yaml +++ b/data/custodian/CZ-32-DOB-L-MKD-mistni_knihovna_dobriv.yaml @@ -214,3 +214,22 @@ location: postal_code: 338 44 street_address: Dobřív 305 normalization_timestamp: '2025-12-09T10:53:43.368965+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:31:45.394434+00:00' + source_url: https://dobriv.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://dobriv.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://dobriv.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T10:31:45.394434+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-DOB-L-MKD.yaml b/data/custodian/CZ-32-DOB-L-MKD.yaml index 260e769856..08f3a942cb 100644 --- a/data/custodian/CZ-32-DOB-L-MKD.yaml +++ b/data/custodian/CZ-32-DOB-L-MKD.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOB-L-MKD - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOB-L-MKD valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOB-L-MKD ghcid_numeric: 11513332489639185480 valid_from: '2025-12-06T23:37:20.996537+00:00' @@ -223,3 +223,22 @@ location: postal_code: 334 41 street_address: Stromořadí 439 normalization_timestamp: '2025-12-09T10:53:43.396762+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:31:53.713202+00:00' + source_url: https://dobrany.tritius.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://dobrany.tritius.cz/apple-touch-icon-180x180.png + source_url: https://dobrany.tritius.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:31:53.713202+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-DOB-L-MKVU.yaml b/data/custodian/CZ-32-DOB-L-MKVU.yaml index 5d7c116f5d..4f44281be9 100644 --- a/data/custodian/CZ-32-DOB-L-MKVU.yaml +++ b/data/custodian/CZ-32-DOB-L-MKVU.yaml @@ -214,3 +214,22 @@ location: postal_code: 334 41 street_address: Vodní Újezd 66 normalization_timestamp: '2025-12-09T10:53:43.448520+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:32:07.369335+00:00' + source_url: https://www.dobrany.cz/kultura-vzdelani-sport/knihovna/knihovna-vodni-ujezd + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dobrany.cz/skins/dobrany.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.dobrany.cz/kultura-vzdelani-sport/knihovna/knihovna-vodni-ujezd + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:32:07.369335+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-DOB-L-OKD.yaml b/data/custodian/CZ-32-DOB-L-OKD.yaml index 0559cb9222..81de504370 100644 --- a/data/custodian/CZ-32-DOB-L-OKD.yaml +++ b/data/custodian/CZ-32-DOB-L-OKD.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOB-L-OKD - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOB-L-OKD valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOB-L-OKD ghcid_numeric: 8939432286452181604 valid_from: '2025-12-06T23:37:31.875044+00:00' @@ -207,3 +207,22 @@ location: postal_code: 330 05 street_address: Dobříč 84 normalization_timestamp: '2025-12-09T10:53:43.474648+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:32:14.428903+00:00' + source_url: https://www.dobric.cz/obec/sluzby-v-nasi-obci/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dobric.cz/skins/dobric.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.dobric.cz/obec/sluzby-v-nasi-obci/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:32:14.428903+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-DOL-L-OKHB.yaml b/data/custodian/CZ-32-DOL-L-OKHB.yaml index bfc6c909f3..ad826fcd78 100644 --- a/data/custodian/CZ-32-DOL-L-OKHB.yaml +++ b/data/custodian/CZ-32-DOL-L-OKHB.yaml @@ -207,3 +207,22 @@ location: postal_code: 331 52 street_address: Horní Bělá 74 normalization_timestamp: '2025-12-09T10:53:43.552691+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:32:30.869462+00:00' + source_url: https://www.hornibela.cz/obec/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.hornibela.cz/skins/hornibela_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.hornibela.cz/obec/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:32:30.869462+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-DOL-L-OKL-obecni_knihovna_loza.yaml b/data/custodian/CZ-32-DOL-L-OKL-obecni_knihovna_loza.yaml index 6edc125121..88a7dad6a3 100644 --- a/data/custodian/CZ-32-DOL-L-OKL-obecni_knihovna_loza.yaml +++ b/data/custodian/CZ-32-DOL-L-OKL-obecni_knihovna_loza.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOL-L-OKL-obecni_knihovna_loza - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOL-L-OKL-obecni_knihovna_loza valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOL-L-OKL-obecni_knihovna_loza ghcid_numeric: 2388943868058303906 valid_from: '2025-12-06T23:37:31.886843+00:00' @@ -211,3 +211,22 @@ location: postal_code: 331 52 street_address: Loza 47 normalization_timestamp: '2025-12-09T10:53:43.579297+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:32:37.267094+00:00' + source_url: https://www.obec-loza.cz/volny-cas-1/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-loza.cz/skins/obec-loza.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.obec-loza.cz/volny-cas-1/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:32:37.267094+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-DOL-L-OKL.yaml b/data/custodian/CZ-32-DOL-L-OKL.yaml index 6fb752dc96..1389344967 100644 --- a/data/custodian/CZ-32-DOL-L-OKL.yaml +++ b/data/custodian/CZ-32-DOL-L-OKL.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOL-L-OKL - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOL-L-OKL valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOL-L-OKL ghcid_numeric: 15747625563138240835 valid_from: '2025-12-06T23:37:31.816809+00:00' @@ -207,3 +207,22 @@ location: postal_code: 331 52 street_address: Lhotka 7 normalization_timestamp: '2025-12-09T10:53:43.605325+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:32:43.417825+00:00' + source_url: https://www.nekmir.cz/obec-1/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nekmir.cz/skins/nekmir/favicons/safari-pinned-tab.svg + source_url: https://www.nekmir.cz/obec-1/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:32:43.417825+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-DOL-L-OKVDB.yaml b/data/custodian/CZ-32-DOL-L-OKVDB.yaml index cdfc29d89e..34075d9966 100644 --- a/data/custodian/CZ-32-DOL-L-OKVDB.yaml +++ b/data/custodian/CZ-32-DOL-L-OKVDB.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOL-L-OKVDB - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOL-L-OKVDB valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOL-L-OKVDB ghcid_numeric: 1867818463170641978 valid_from: '2025-12-06T23:37:31.822237+00:00' @@ -207,3 +207,22 @@ location: postal_code: 331 52 street_address: Dolní Bělá 88 normalization_timestamp: '2025-12-09T10:53:43.656894+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:32:51.586275+00:00' + source_url: https://www.dolnibela.cz/obec/kultura/obecni-knihovna/obecni-knihovna-3cs.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dolnibela.cz/skins/dolnibela.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.dolnibela.cz/obec/kultura/obecni-knihovna/obecni-knihovna-3cs.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:32:51.586275+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-DOM-L-MKBND.yaml b/data/custodian/CZ-32-DOM-L-MKBND.yaml index b82d3dc977..65f28e09bb 100644 --- a/data/custodian/CZ-32-DOM-L-MKBND.yaml +++ b/data/custodian/CZ-32-DOM-L-MKBND.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOM-L-MKBND - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOM-L-MKBND valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOM-L-MKBND ghcid_numeric: 12675561531441781535 valid_from: '2025-12-06T23:37:17.534488+00:00' @@ -242,3 +242,22 @@ location: postal_code: 344 01 street_address: Pivovarská 10 normalization_timestamp: '2025-12-09T10:53:43.705768+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:32:58.546511+00:00' + source_url: https://domazlice.tritius.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://domazlice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://domazlice.tritius.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:32:58.546511+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-DOM-L-MKC.yaml b/data/custodian/CZ-32-DOM-L-MKC.yaml index 58b6138355..2ffffda931 100644 --- a/data/custodian/CZ-32-DOM-L-MKC.yaml +++ b/data/custodian/CZ-32-DOM-L-MKC.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOM-L-MKC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOM-L-MKC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOM-L-MKC ghcid_numeric: 12708962376936695398 valid_from: '2025-12-06T23:37:31.125386+00:00' @@ -210,3 +210,22 @@ location: postal_code: 344 01 street_address: Chrastavice 28 normalization_timestamp: '2025-12-09T10:53:43.734031+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:06.332724+00:00' + source_url: https://www.chrastavice.cz/obec/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.chrastavice.cz/skins/chrastavice.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.chrastavice.cz/obec/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:33:06.332724+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-DOM-L-MKM.yaml b/data/custodian/CZ-32-DOM-L-MKM.yaml index e85713b4e2..bcebf82dff 100644 --- a/data/custodian/CZ-32-DOM-L-MKM.yaml +++ b/data/custodian/CZ-32-DOM-L-MKM.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOM-L-MKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOM-L-MKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOM-L-MKM ghcid_numeric: 15798194900776646238 valid_from: '2025-12-06T23:37:30.900401+00:00' @@ -210,3 +210,22 @@ location: postal_code: 344 01 street_address: Milavče 116 normalization_timestamp: '2025-12-09T10:53:43.808815+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:15.965280+00:00' + source_url: https://www.milavce.cz/obec/knihovna-milavce + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.milavce.cz/skins/milavce.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.milavce.cz/obec/knihovna-milavce + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:33:15.965280+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-DOM-L-MKP.yaml b/data/custodian/CZ-32-DOM-L-MKP.yaml index 6eab331539..f7dbcadecf 100644 --- a/data/custodian/CZ-32-DOM-L-MKP.yaml +++ b/data/custodian/CZ-32-DOM-L-MKP.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DOM-L-MKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DOM-L-MKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DOM-L-MKP ghcid_numeric: 16658901692723295361 valid_from: '2025-12-06T23:37:30.857515+00:00' @@ -212,3 +212,22 @@ location: postal_code: 344 01 street_address: Pec normalization_timestamp: '2025-12-09T10:53:43.835049+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:22.794514+00:00' + source_url: http://obecpec.cz/knihovna + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://obecpec.cz/wp-content/uploads/2019/03/znak.jpg + source_url: http://obecpec.cz/knihovna + css_selector: '#site-logo > a.custom-logo-link > img.custom-logo' + retrieved_on: '2025-12-24T10:33:22.794514+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Logo for Pec + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/CZ-32-DRA-L-MKD.yaml b/data/custodian/CZ-32-DRA-L-MKD.yaml index ba9979b94f..487f8494f8 100644 --- a/data/custodian/CZ-32-DRA-L-MKD.yaml +++ b/data/custodian/CZ-32-DRA-L-MKD.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DRA-L-MKD - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DRA-L-MKD valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DRA-L-MKD ghcid_numeric: 2707493356005962468 valid_from: '2025-12-06T23:37:31.368940+00:00' @@ -210,3 +210,28 @@ location: postal_code: 342 01 street_address: Dražovice 4 normalization_timestamp: '2025-12-09T10:53:43.940103+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:34.765080+00:00' + source_url: https://www.drazovice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=d5ca92e883 + source_url: https://www.drazovice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T10:33:34.765080+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://d5ca92e883.clvaw-cdnwnd.com/046c2f44cd123eaaee4b42ff77eea14c/200000034-22d7e22d81/700/leaves-1076307_960_720%20-%20Copy.jpg?ph=d5ca92e883 + source_url: https://www.drazovice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T10:33:34.765080+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-DRA-L-MKDU.yaml b/data/custodian/CZ-32-DRA-L-MKDU.yaml index babc8f5f57..61d02023f6 100644 --- a/data/custodian/CZ-32-DRA-L-MKDU.yaml +++ b/data/custodian/CZ-32-DRA-L-MKDU.yaml @@ -219,3 +219,22 @@ location: postal_code: 338 08 street_address: Drahoňův Újezd 20 normalization_timestamp: '2025-12-09T10:53:43.968210+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:39.562366+00:00' + source_url: https://drahonuv-ujezd.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://drahonuv-ujezd.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://drahonuv-ujezd.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T10:33:39.562366+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-DYS-L-OKD.yaml b/data/custodian/CZ-32-DYS-L-OKD.yaml index 44c0401e7e..cf736ad32d 100644 --- a/data/custodian/CZ-32-DYS-L-OKD.yaml +++ b/data/custodian/CZ-32-DYS-L-OKD.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-DYS-L-OKD - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-DYS-L-OKD valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-DYS-L-OKD ghcid_numeric: 7443059342649418025 valid_from: '2025-12-06T23:37:31.917868+00:00' @@ -220,3 +220,22 @@ location: postal_code: 330 02 street_address: Přátelství 281 normalization_timestamp: '2025-12-09T10:53:44.019675+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:49.874388+00:00' + source_url: https://www.obecdysina.cz/zivot-v-obci/obecni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obecdysina.cz/skins/obecdysina.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.obecdysina.cz/zivot-v-obci/obecni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:33:49.874388+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-EJP-L-KOE.yaml b/data/custodian/CZ-32-EJP-L-KOE.yaml index db06346714..d2e101cb6a 100644 --- a/data/custodian/CZ-32-EJP-L-KOE.yaml +++ b/data/custodian/CZ-32-EJP-L-KOE.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-EJP-L-KOE - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-EJP-L-KOE valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-EJP-L-KOE ghcid_numeric: 5535131234777595522 valid_from: '2025-12-06T23:37:31.962926+00:00' @@ -207,3 +207,22 @@ location: postal_code: 337 01 street_address: Ejpovice 24 normalization_timestamp: '2025-12-09T10:53:44.065170+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:59.752364+00:00' + source_url: https://merkur.tritius.cz/library/ejpovice + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://merkur.tritius.cz/apple-touch-icon-180x180.png + source_url: https://merkur.tritius.cz/library/ejpovice + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:33:59.752364+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-HER-L-KHH.yaml b/data/custodian/CZ-32-HER-L-KHH.yaml index b3bebae9b9..1a71993e5b 100644 --- a/data/custodian/CZ-32-HER-L-KHH.yaml +++ b/data/custodian/CZ-32-HER-L-KHH.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HER-L-KHH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HER-L-KHH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HER-L-KHH ghcid_numeric: 9863982338651154811 valid_from: '2025-12-06T23:37:22.424739+00:00' @@ -210,3 +210,22 @@ location: postal_code: 330 24 street_address: Mírové náměstí 264 normalization_timestamp: '2025-12-09T10:53:44.119086+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:06.808350+00:00' + source_url: http://www.hermanovahut.cz/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.hermanovahut.cz/image.php?nid=21302&oid=11569220 + source_url: http://www.hermanovahut.cz/knihovna + css_selector: '[document] > html > head > link:nth-of-type(22)' + retrieved_on: '2025-12-24T10:34:06.808350+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-HOR-L-MKB-mistni_knihovna_brezany.yaml b/data/custodian/CZ-32-HOR-L-MKB-mistni_knihovna_brezany.yaml index c8fa5dded2..37e114a225 100644 --- a/data/custodian/CZ-32-HOR-L-MKB-mistni_knihovna_brezany.yaml +++ b/data/custodian/CZ-32-HOR-L-MKB-mistni_knihovna_brezany.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKB-mistni_knihovna_brezany - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKB-mistni_knihovna_brezany valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKB-mistni_knihovna_brezany ghcid_numeric: 3247929160712936577 valid_from: '2025-12-06T23:37:31.207641+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 01 street_address: Břežany 23 normalization_timestamp: '2025-12-09T10:53:44.244577+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:23.187533+00:00' + source_url: http://brezany.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://brezany.knihovna.cz/favicon.svg + source_url: http://brezany.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:34:23.187533+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKB.yaml b/data/custodian/CZ-32-HOR-L-MKB.yaml index 88bd6b8c23..593ea08457 100644 --- a/data/custodian/CZ-32-HOR-L-MKB.yaml +++ b/data/custodian/CZ-32-HOR-L-MKB.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKB - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKB valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKB ghcid_numeric: 11890912933346898098 valid_from: '2025-12-06T23:37:31.022543+00:00' @@ -210,3 +210,22 @@ location: postal_code: 346 01 street_address: Blížejov 1 normalization_timestamp: '2025-12-09T10:53:44.268017+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:30.561016+00:00' + source_url: https://www.blizejov.cz/obec-1/knihovny + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.blizejov.cz/skins/blizejov.cz-2_lego2/favicons/apple-touch-icon.png + source_url: https://www.blizejov.cz/obec-1/knihovny + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:34:30.561016+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-HOR-L-MKC-mistni_knihovna_chanovice.yaml b/data/custodian/CZ-32-HOR-L-MKC-mistni_knihovna_chanovice.yaml index 895b0bf58a..c90823b884 100644 --- a/data/custodian/CZ-32-HOR-L-MKC-mistni_knihovna_chanovice.yaml +++ b/data/custodian/CZ-32-HOR-L-MKC-mistni_knihovna_chanovice.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKC-mistni_knihovna_chanovice - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKC-mistni_knihovna_chanovice valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKC-mistni_knihovna_chanovice ghcid_numeric: 17827155887603839303 valid_from: '2025-12-06T23:37:31.269857+00:00' @@ -214,3 +214,22 @@ location: postal_code: 341 01 street_address: Chanovice 1 normalization_timestamp: '2025-12-09T10:53:44.292149+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:36.146048+00:00' + source_url: http://www.chanovice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.chanovice.knihovna.cz/favicon.svg + source_url: http://www.chanovice.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:34:36.146048+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hejna.yaml b/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hejna.yaml index 46d173424c..c6871324cc 100644 --- a/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hejna.yaml +++ b/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hejna.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKH-mistni_knihovna_hejna - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKH-mistni_knihovna_hejna valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKH-mistni_knihovna_hejna ghcid_numeric: 10604147390640073876 valid_from: '2025-12-06T23:37:39.765124+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 01 street_address: Hejná 70 normalization_timestamp: '2025-12-09T10:53:44.342957+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:43.563288+00:00' + source_url: https://horazdovice.tritius.cz/library/hejna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://horazdovice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://horazdovice.tritius.cz/library/hejna + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:34:43.563288+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hradesice.yaml b/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hradesice.yaml index 3744a8f2d9..1690b51a27 100644 --- a/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hradesice.yaml +++ b/data/custodian/CZ-32-HOR-L-MKH-mistni_knihovna_hradesice.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKH-mistni_knihovna_hradesice - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKH-mistni_knihovna_hradesice valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKH-mistni_knihovna_hradesice ghcid_numeric: 10693386904805539794 valid_from: '2025-12-06T23:37:31.278740+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 01 street_address: Hradešice 81 normalization_timestamp: '2025-12-09T10:53:44.369497+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:49.094542+00:00' + source_url: https://www.hradesice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.hradesice.knihovna.cz/favicon.svg + source_url: https://www.hradesice.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:34:49.094542+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKHB.yaml b/data/custodian/CZ-32-HOR-L-MKHB.yaml index ab5163f472..27f6199531 100644 --- a/data/custodian/CZ-32-HOR-L-MKHB.yaml +++ b/data/custodian/CZ-32-HOR-L-MKHB.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKHB - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKHB valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKHB ghcid_numeric: 2033675898242589417 valid_from: '2025-12-06T23:37:21.872708+00:00' @@ -212,3 +212,22 @@ location: postal_code: 330 12 street_address: Tř. 1. máje 512 normalization_timestamp: '2025-12-09T10:53:44.396364+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:54.587965+00:00' + source_url: https://knihovna.hornibriza.eu + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://knihovna.hornibriza.eu/skins/knihovna.hornibriza.eu_lego2/favicons/safari-pinned-tab.svg + source_url: https://knihovna.hornibriza.eu + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:34:54.587965+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-HOR-L-MKHL.yaml b/data/custodian/CZ-32-HOR-L-MKHL.yaml index 4b01f1e5a6..ab99d9c6d3 100644 --- a/data/custodian/CZ-32-HOR-L-MKHL.yaml +++ b/data/custodian/CZ-32-HOR-L-MKHL.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKHL - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKHL valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKHL ghcid_numeric: 6522128057382904435 valid_from: '2025-12-06T23:37:42.804881+00:00' @@ -209,3 +209,22 @@ location: postal_code: 341 01 street_address: Horažďovická Lhota 12 normalization_timestamp: '2025-12-09T10:53:44.419858+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:00.030159+00:00' + source_url: http://horazdovickalhota.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://horazdovickalhota.knihovna.cz/favicon.svg + source_url: http://horazdovickalhota.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:35:00.030159+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKK.yaml b/data/custodian/CZ-32-HOR-L-MKK.yaml index 49993b322b..2788245ac1 100644 --- a/data/custodian/CZ-32-HOR-L-MKK.yaml +++ b/data/custodian/CZ-32-HOR-L-MKK.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKK ghcid_numeric: 17565650561585569499 valid_from: '2025-12-06T23:37:31.238306+00:00' @@ -207,3 +207,22 @@ location: postal_code: 341 01 street_address: Kvášňovice 5 normalization_timestamp: '2025-12-09T10:53:44.478795+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:09.927030+00:00' + source_url: https://kvasnovice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://kvasnovice.knihovna.cz/favicon.svg + source_url: https://kvasnovice.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:35:09.927030+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKKPMKH.yaml b/data/custodian/CZ-32-HOR-L-MKKPMKH.yaml index 6e06344cbe..cfc1769cea 100644 --- a/data/custodian/CZ-32-HOR-L-MKKPMKH.yaml +++ b/data/custodian/CZ-32-HOR-L-MKKPMKH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKKPMKH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKKPMKH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKKPMKH ghcid_numeric: 5929047814979017540 valid_from: '2025-12-06T23:37:42.802013+00:00' @@ -209,3 +209,22 @@ location: postal_code: 341 01 street_address: Komušín 69 normalization_timestamp: '2025-12-09T10:53:44.504948+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:14.862556+00:00' + source_url: http://komusin.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://komusin.knihovna.cz/favicon.svg + source_url: http://komusin.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:35:14.862556+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKM-mistni_knihovna_mysliv.yaml b/data/custodian/CZ-32-HOR-L-MKM-mistni_knihovna_mysliv.yaml index 0db3b67e6c..5326cc283f 100644 --- a/data/custodian/CZ-32-HOR-L-MKM-mistni_knihovna_mysliv.yaml +++ b/data/custodian/CZ-32-HOR-L-MKM-mistni_knihovna_mysliv.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKM-mistni_knihovna_mysliv - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKM-mistni_knihovna_mysliv valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKM-mistni_knihovna_mysliv ghcid_numeric: 10408348879099225105 valid_from: '2025-12-06T23:37:31.425055+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 01 street_address: Myslív 52 normalization_timestamp: '2025-12-09T10:53:44.556773+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:22.235236+00:00' + source_url: https://horazdovice.tritius.cz/Katalog/library/mysliv + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://horazdovice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://horazdovice.tritius.cz/Katalog/library/mysliv + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:35:22.235236+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-HOR-L-MKM.yaml b/data/custodian/CZ-32-HOR-L-MKM.yaml index e4a2ce0758..dce831e498 100644 --- a/data/custodian/CZ-32-HOR-L-MKM.yaml +++ b/data/custodian/CZ-32-HOR-L-MKM.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKM ghcid_numeric: 12984633398666781793 valid_from: '2025-12-06T23:37:30.837682+00:00' @@ -214,3 +214,22 @@ location: postal_code: 346 01 street_address: Mířkov 77 normalization_timestamp: '2025-12-09T10:53:44.582739+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:29.527898+00:00' + source_url: https://www.mirkov.cz/obec/obecni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.mirkov.cz/skins/mirkov.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.mirkov.cz/obec/obecni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:35:29.527898+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-HOR-L-MKTPMKH.yaml b/data/custodian/CZ-32-HOR-L-MKTPMKH.yaml index 5b07fdf5eb..a16373a492 100644 --- a/data/custodian/CZ-32-HOR-L-MKTPMKH.yaml +++ b/data/custodian/CZ-32-HOR-L-MKTPMKH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKTPMKH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKTPMKH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKTPMKH ghcid_numeric: 6528215325238426465 valid_from: '2025-12-06T23:37:42.799036+00:00' @@ -209,3 +209,22 @@ location: postal_code: 341 01 street_address: Třebomyslice 1 normalization_timestamp: '2025-12-09T10:53:44.661087+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:42.503567+00:00' + source_url: http://trebomyslice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://trebomyslice.knihovna.cz/favicon.svg + source_url: http://trebomyslice.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:35:42.503567+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKV-mistni_knihovna_velenovy.yaml b/data/custodian/CZ-32-HOR-L-MKV-mistni_knihovna_velenovy.yaml index f005088184..9c160ea130 100644 --- a/data/custodian/CZ-32-HOR-L-MKV-mistni_knihovna_velenovy.yaml +++ b/data/custodian/CZ-32-HOR-L-MKV-mistni_knihovna_velenovy.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKV-mistni_knihovna_velenovy - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKV-mistni_knihovna_velenovy valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKV-mistni_knihovna_velenovy ghcid_numeric: 11100699841074095331 valid_from: '2025-12-06T23:37:42.813077+00:00' @@ -209,3 +209,22 @@ location: postal_code: 341 01 street_address: Velenovy 83 normalization_timestamp: '2025-12-09T10:53:44.688144+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:47.963938+00:00' + source_url: http://www.velenovy.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.velenovy.knihovna.cz/favicon.svg + source_url: http://www.velenovy.knihovna.cz + css_selector: '[document] > html.dk_fouc.js > head > link' + retrieved_on: '2025-12-24T10:35:47.963938+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MKVH.yaml b/data/custodian/CZ-32-HOR-L-MKVH.yaml index f1d63e65e6..b59f92ffbc 100644 --- a/data/custodian/CZ-32-HOR-L-MKVH.yaml +++ b/data/custodian/CZ-32-HOR-L-MKVH.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MKVH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MKVH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MKVH ghcid_numeric: 4995855290197079048 valid_from: '2025-12-06T23:37:31.490108+00:00' @@ -207,3 +207,37 @@ location: postal_code: 334 55 street_address: Horšice 7 normalization_timestamp: '2025-12-09T10:53:44.740740+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:55.992348+00:00' + source_url: https://knihovna-horsice.webnode.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://fe1096bebc.cbaul-cdnwnd.com/9c8f900954822612d2a8d9b4828e3f5e/200000068-9aef79aef8/Sni%CC%81mek%20obrazovky%202022-05-07%20v%C2%A010.00.02.png?ph=fe1096bebc + source_url: https://knihovna-horsice.webnode.cz + css_selector: '#wnd_LogoBlock_186677810 > div.b-l-c.logo-content > div.b-l-image.logo-image + > div.b-l-image-w.logo-image-cell > picture > source > img.wnd-logo-img' + retrieved_on: '2025-12-24T10:35:55.992348+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=fe1096bebc + source_url: https://knihovna-horsice.webnode.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T10:35:55.992348+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://fe1096bebc.cbaul-cdnwnd.com/9c8f900954822612d2a8d9b4828e3f5e/200000009-4114641147/700/bible-g2732317f0_1280.png?ph=fe1096bebc + source_url: https://knihovna-horsice.webnode.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T10:35:55.992348+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MLKK.yaml b/data/custodian/CZ-32-HOR-L-MLKK.yaml index b349fd95f0..23e882a519 100644 --- a/data/custodian/CZ-32-HOR-L-MLKK.yaml +++ b/data/custodian/CZ-32-HOR-L-MLKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MLKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MLKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MLKK ghcid_numeric: 14903504063015332315 valid_from: '2025-12-06T23:37:31.234492+00:00' @@ -213,3 +213,22 @@ location: postal_code: 341 01 street_address: Kejnice 12 normalization_timestamp: '2025-12-09T10:53:44.768638+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:01.094770+00:00' + source_url: http://kejnice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://kejnice.knihovna.cz/favicon.svg + source_url: http://kejnice.knihovna.cz + css_selector: '[document] > html.dk_fouc.js > head > link' + retrieved_on: '2025-12-24T10:36:01.094770+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-MLKMB.yaml b/data/custodian/CZ-32-HOR-L-MLKMB.yaml index 1a786494d0..5dd90419c1 100644 --- a/data/custodian/CZ-32-HOR-L-MLKMB.yaml +++ b/data/custodian/CZ-32-HOR-L-MLKMB.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-MLKMB - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-MLKMB valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-MLKMB ghcid_numeric: 11651869536502504631 valid_from: '2025-12-06T23:37:31.214372+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 01 street_address: Malý Bor 146 normalization_timestamp: '2025-12-09T10:53:44.794669+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:06.273457+00:00' + source_url: http://www.maly-bor.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.maly-bor.knihovna.cz/favicon.svg + source_url: http://www.maly-bor.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:36:06.273457+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-OKK.yaml b/data/custodian/CZ-32-HOR-L-OKK.yaml index bb95d902f6..b90a5cfaf6 100644 --- a/data/custodian/CZ-32-HOR-L-OKK.yaml +++ b/data/custodian/CZ-32-HOR-L-OKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-OKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-OKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-OKK ghcid_numeric: 9055859501873734015 valid_from: '2025-12-06T23:37:31.437137+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 01 street_address: Kovčín 57 normalization_timestamp: '2025-12-09T10:53:44.822578+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:11.630332+00:00' + source_url: https://horazdovice.tritius.cz/Katalog/library/kovcin + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://horazdovice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://horazdovice.tritius.cz/Katalog/library/kovcin + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:36:11.630332+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-HOR-L-OKS.yaml b/data/custodian/CZ-32-HOR-L-OKS.yaml index 34177a0257..a1b33a1c1b 100644 --- a/data/custodian/CZ-32-HOR-L-OKS.yaml +++ b/data/custodian/CZ-32-HOR-L-OKS.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-OKS - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-OKS valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-OKS ghcid_numeric: 4755412903110068103 valid_from: '2025-12-06T23:37:31.266514+00:00' @@ -207,3 +207,22 @@ location: postal_code: 341 01 street_address: Svéradice 146 normalization_timestamp: '2025-12-09T10:53:44.848417+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:16.543068+00:00' + source_url: http://www.sveradice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.sveradice.knihovna.cz/favicon.svg + source_url: http://www.sveradice.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:36:16.543068+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-OKVB.yaml b/data/custodian/CZ-32-HOR-L-OKVB.yaml index 8f5e959291..efc4bd34d7 100644 --- a/data/custodian/CZ-32-HOR-L-OKVB.yaml +++ b/data/custodian/CZ-32-HOR-L-OKVB.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOR-L-OKVB - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOR-L-OKVB valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOR-L-OKVB ghcid_numeric: 17088559637005661938 valid_from: '2025-12-06T23:37:31.204626+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 01 street_address: Velký Bor 71 normalization_timestamp: '2025-12-09T10:53:44.878945+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:21.751394+00:00' + source_url: http://www.velkybor.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.velkybor.knihovna.cz/favicon.svg + source_url: http://www.velkybor.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:36:21.751394+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-L-OKVH.yaml b/data/custodian/CZ-32-HOR-L-OKVH.yaml index 4ca0f38da3..48b663abc1 100644 --- a/data/custodian/CZ-32-HOR-L-OKVH.yaml +++ b/data/custodian/CZ-32-HOR-L-OKVH.yaml @@ -207,3 +207,22 @@ location: postal_code: 341 01 street_address: Velké Hydčice 18 normalization_timestamp: '2025-12-09T10:53:44.906346+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:26.886734+00:00' + source_url: http://www.velkehydcice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.velkehydcice.knihovna.cz/favicon.svg + source_url: http://www.velkehydcice.knihovna.cz + css_selector: '[document] > html.vendor-webkit.browser-chrome > head > link' + retrieved_on: '2025-12-24T10:36:26.886734+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOR-M-MMH-mestske_muzeum_horazdovice.yaml b/data/custodian/CZ-32-HOR-M-MMH-mestske_muzeum_horazdovice.yaml index 728e1e30da..88839c2f83 100644 --- a/data/custodian/CZ-32-HOR-M-MMH-mestske_muzeum_horazdovice.yaml +++ b/data/custodian/CZ-32-HOR-M-MMH-mestske_muzeum_horazdovice.yaml @@ -238,3 +238,22 @@ location: youtube_status: NOT_FOUND youtube_search_query: Městské muzeum Horažďovice official youtube_search_timestamp: '2025-12-09T09:30:39.454023+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:34.030701+00:00' + source_url: http://www.muzeumhd.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.muzeumhd.cz/templates/projekt/favicon96.png + source_url: http://www.muzeumhd.cz + css_selector: '[document] > html.js.rgba > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T10:36:34.030701+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: 96x96 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-HOS-L-MKH.yaml b/data/custodian/CZ-32-HOS-L-MKH.yaml index 7acc4a69ad..3b1af193fc 100644 --- a/data/custodian/CZ-32-HOS-L-MKH.yaml +++ b/data/custodian/CZ-32-HOS-L-MKH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HOS-L-MKH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HOS-L-MKH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HOS-L-MKH ghcid_numeric: 18037250865401242182 valid_from: '2025-12-06T23:37:21.889291+00:00' @@ -214,3 +214,22 @@ location: postal_code: 345 25 street_address: Dobrohostova 110 normalization_timestamp: '2025-12-09T10:53:44.954909+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:39.721492+00:00' + source_url: https://domazlice.tritius.cz/library/mkhostoun + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://domazlice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://domazlice.tritius.cz/library/mkhostoun + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:36:39.721492+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-HOS-L-MKVR.yaml b/data/custodian/CZ-32-HOS-L-MKVR.yaml index 4a13375895..f5ba2b3178 100644 --- a/data/custodian/CZ-32-HOS-L-MKVR.yaml +++ b/data/custodian/CZ-32-HOS-L-MKVR.yaml @@ -200,3 +200,22 @@ location: postal_code: 345 25 street_address: Rybník 15 normalization_timestamp: '2025-12-09T10:53:44.980526+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:47.692325+00:00' + source_url: http://www.rybnik.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.rybnik.cz/skins/rybnik.cz_lego2/favicons/apple-touch-icon.png + source_url: http://www.rybnik.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:36:47.692325+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-HRA-L-KFPVH.yaml b/data/custodian/CZ-32-HRA-L-KFPVH.yaml index 43257f8ba1..d631c07dd3 100644 --- a/data/custodian/CZ-32-HRA-L-KFPVH.yaml +++ b/data/custodian/CZ-32-HRA-L-KFPVH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HRA-L-KFPVH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HRA-L-KFPVH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HRA-L-KFPVH ghcid_numeric: 16874823455624639507 valid_from: '2025-12-06T23:37:31.376300+00:00' @@ -214,3 +214,28 @@ location: postal_code: 342 01 street_address: Hrádek u Sušice 78 normalization_timestamp: '2025-12-09T10:53:45.031039+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:55.669902+00:00' + source_url: https://www.hradekususice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=51db6cb849 + source_url: https://www.hradekususice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T10:36:55.669902+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://51db6cb849.clvaw-cdnwnd.com/6018a16a2de51e44cdea1a60b79337b2/200000001-058390583c/700/book-1291164_960_720.jpg?ph=51db6cb849 + source_url: https://www.hradekususice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T10:36:55.669902+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-HRA-L-MKH.yaml b/data/custodian/CZ-32-HRA-L-MKH.yaml index de1dcd19ee..ca499ab17d 100644 --- a/data/custodian/CZ-32-HRA-L-MKH.yaml +++ b/data/custodian/CZ-32-HRA-L-MKH.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HRA-L-MKH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HRA-L-MKH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HRA-L-MKH ghcid_numeric: 14024534106400069666 valid_from: '2025-12-06T23:37:21.057615+00:00' @@ -210,3 +210,22 @@ location: postal_code: 338 42 street_address: nám. 8.května 270 normalization_timestamp: '2025-12-09T10:53:45.067528+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:00.399987+00:00' + source_url: https://knihovnahradek.webk.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://knihovnahradek.files.webk.cz/logov.png + source_url: https://knihovnahradek.webk.cz + css_selector: '#header_in > a > h1 > img.mobile_display_none' + retrieved_on: '2025-12-24T10:37:00.399987+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Na úvodní stranu + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/CZ-32-HRA-L-OKH.yaml b/data/custodian/CZ-32-HRA-L-OKH.yaml index a28c365095..26f5c47ad0 100644 --- a/data/custodian/CZ-32-HRA-L-OKH.yaml +++ b/data/custodian/CZ-32-HRA-L-OKH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-HRA-L-OKH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-HRA-L-OKH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-HRA-L-OKH ghcid_numeric: 15838527678284769193 valid_from: '2025-12-06T23:37:31.610658+00:00' @@ -211,3 +211,22 @@ location: postal_code: 332 11 street_address: Hradec 45 normalization_timestamp: '2025-12-09T10:53:45.095108+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:06.025249+00:00' + source_url: http://www.hradec.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.hradec.knihovna.cz/favicon.svg + source_url: http://www.hradec.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:37:06.025249+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-JAN-L-OKS.yaml b/data/custodian/CZ-32-JAN-L-OKS.yaml index cf00254843..b35ae4aaf1 100644 --- a/data/custodian/CZ-32-JAN-L-OKS.yaml +++ b/data/custodian/CZ-32-JAN-L-OKS.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-JAN-L-OKS - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-JAN-L-OKS valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-JAN-L-OKS ghcid_numeric: 10541907871809857609 valid_from: '2025-12-06T23:37:31.428175+00:00' @@ -211,3 +211,22 @@ location: postal_code: 340 21 street_address: Soustov 7 normalization_timestamp: '2025-12-09T10:53:45.124449+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:11.127835+00:00' + source_url: https://www.dlazov.cz/dlazov/fr.asp?tab=oudlazov&id=710&burl=&pt=XA + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dlazov.cz/dlazov/icon/safari-pinned-tab.svg + source_url: https://www.dlazov.cz/dlazov/fr.asp?tab=oudlazov&id=710&burl=&pt=XA + css_selector: '[document] > html > head > link:nth-of-type(21)' + retrieved_on: '2025-12-24T10:37:11.127835+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-KAS-L-MKKH.yaml b/data/custodian/CZ-32-KAS-L-MKKH.yaml index b290fb1c03..e61b0c6c6f 100644 --- a/data/custodian/CZ-32-KAS-L-MKKH.yaml +++ b/data/custodian/CZ-32-KAS-L-MKKH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KAS-L-MKKH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KAS-L-MKKH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KAS-L-MKKH ghcid_numeric: 14355034040526921405 valid_from: '2025-12-06T23:37:21.106095+00:00' @@ -223,3 +223,22 @@ location: postal_code: 341 92 street_address: Náměstí 1 normalization_timestamp: '2025-12-09T10:53:45.229201+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:39.493787+00:00' + source_url: https://plk.tritius.cz/library/khory + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://plk.tritius.cz/apple-touch-icon-180x180.png + source_url: https://plk.tritius.cz/library/khory + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:37:39.493787+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-KAS-L-OKP.yaml b/data/custodian/CZ-32-KAS-L-OKP.yaml index ee63a9f117..12d2e04367 100644 --- a/data/custodian/CZ-32-KAS-L-OKP.yaml +++ b/data/custodian/CZ-32-KAS-L-OKP.yaml @@ -42,13 +42,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KAS-L-OKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KAS-L-OKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KAS-L-OKP ghcid_numeric: 4206564392852745139 valid_from: '2025-12-06T23:37:42.755323+00:00' @@ -108,8 +108,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Obecní knihovna Podhůří @@ -217,3 +217,22 @@ location: geonames_id: 3073780 geonames_name: Kasejovice feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:56.381965+00:00' + source_url: https://www.kasejovice.cz/kulturne-spolecenske-centrum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kasejovice.cz/assets/favicon/apple-icon-180x180.png?7de4705b66f341c89739d9d2f16532c2 + source_url: https://www.kasejovice.cz/kulturne-spolecenske-centrum + css_selector: '[document] > html > head > link:nth-of-type(10)' + retrieved_on: '2025-12-24T10:37:56.381965+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 13 diff --git a/data/custodian/CZ-32-KAZ-L-MKK.yaml b/data/custodian/CZ-32-KAZ-L-MKK.yaml index c0f3232cd5..e4aad90fbe 100644 --- a/data/custodian/CZ-32-KAZ-L-MKK.yaml +++ b/data/custodian/CZ-32-KAZ-L-MKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KAZ-L-MKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KAZ-L-MKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KAZ-L-MKK ghcid_numeric: 2331324600625085335 valid_from: '2025-12-06T23:37:22.450686+00:00' @@ -214,3 +214,22 @@ location: postal_code: 331 51 street_address: Ke Škále 220 normalization_timestamp: '2025-12-09T10:53:45.380027+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:07.154341+00:00' + source_url: https://tritius.kaznejov.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://tritius.kaznejov.cz/apple-touch-icon-180x180.png + source_url: https://tritius.kaznejov.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T10:38:07.154341+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-KAZ-L-OKC.yaml b/data/custodian/CZ-32-KAZ-L-OKC.yaml index 4009849dee..ca7e17fb61 100644 --- a/data/custodian/CZ-32-KAZ-L-OKC.yaml +++ b/data/custodian/CZ-32-KAZ-L-OKC.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KAZ-L-OKC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KAZ-L-OKC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KAZ-L-OKC ghcid_numeric: 7209447491061331572 valid_from: '2025-12-08T11:21:24.329921+00:00' @@ -212,3 +212,22 @@ location: postal_code: 331 51 street_address: Čivice normalization_timestamp: '2025-12-09T10:53:45.405037+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:13.943681+00:00' + source_url: https://www.dobric.cz/obec/sluzby-v-nasi-obci/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dobric.cz/skins/dobric.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.dobric.cz/obec/sluzby-v-nasi-obci/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:38:13.943681+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-KAZ-L-OKJ.yaml b/data/custodian/CZ-32-KAZ-L-OKJ.yaml index 02b12cc1f5..5767953321 100644 --- a/data/custodian/CZ-32-KAZ-L-OKJ.yaml +++ b/data/custodian/CZ-32-KAZ-L-OKJ.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KAZ-L-OKJ - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KAZ-L-OKJ valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KAZ-L-OKJ ghcid_numeric: 16732585431145824823 valid_from: '2025-12-06T23:37:31.863532+00:00' @@ -207,3 +207,22 @@ location: postal_code: 334 51 street_address: Jarov 24 normalization_timestamp: '2025-12-09T10:53:45.431667+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:21.050866+00:00' + source_url: https://www.oujarov.cz/obec-jarov/soucasnost + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.oujarov.cz/skins/oujarov.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.oujarov.cz/obec-jarov/soucasnost + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:38:21.050866+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-KAZ-L-OKO.yaml b/data/custodian/CZ-32-KAZ-L-OKO.yaml index aab60e2f60..2b8a0671c9 100644 --- a/data/custodian/CZ-32-KAZ-L-OKO.yaml +++ b/data/custodian/CZ-32-KAZ-L-OKO.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KAZ-L-OKO - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KAZ-L-OKO valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KAZ-L-OKO ghcid_numeric: 1430253210101297675 valid_from: '2025-12-06T23:37:31.921089+00:00' @@ -211,3 +211,22 @@ location: postal_code: 331 51 street_address: Obora čp. 199 normalization_timestamp: '2025-12-09T10:53:45.481026+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:31.906538+00:00' + source_url: https://www.obora-ps.cz/pro-obcany/obecni-knihovna.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://obora-ps.cz/wp-content/uploads/2025/03/cropped-Oboraznakb-180x180.png + source_url: https://www.obora-ps.cz/pro-obcany/obecni-knihovna.html + css_selector: '[document] > html.g-offcanvas-css2 > head > link:nth-of-type(20)' + retrieved_on: '2025-12-24T10:38:31.906538+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-KDY-L-MKL.yaml b/data/custodian/CZ-32-KDY-L-MKL.yaml index 0df7179d2f..d9dcba16aa 100644 --- a/data/custodian/CZ-32-KDY-L-MKL.yaml +++ b/data/custodian/CZ-32-KDY-L-MKL.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KDY-L-MKL - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KDY-L-MKL valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KDY-L-MKL ghcid_numeric: 12481936237569535569 valid_from: '2025-12-06T23:37:43.440567+00:00' @@ -208,3 +208,22 @@ location: postal_code: 345 06 street_address: Libkov 7 normalization_timestamp: '2025-12-09T10:53:45.615583+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:46.944648+00:00' + source_url: https://www.libkov.cz/obec/mistni-knihovna/novinky-z-knihovny + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.libkov.cz/skins/libkov.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.libkov.cz/obec/mistni-knihovna/novinky-z-knihovny + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:38:46.944648+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-KLA-L-MKVK.yaml b/data/custodian/CZ-32-KLA-L-MKVK.yaml index d6120d5e7a..03f584dc78 100644 --- a/data/custodian/CZ-32-KLA-L-MKVK.yaml +++ b/data/custodian/CZ-32-KLA-L-MKVK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KLA-L-MKVK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KLA-L-MKVK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KLA-L-MKVK ghcid_numeric: 16189931856255226667 valid_from: '2025-12-06T23:37:32.090520+00:00' @@ -217,3 +217,22 @@ location: postal_code: 349 61 street_address: nám. Republiky 89 normalization_timestamp: '2025-12-09T10:53:45.959497+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:19:22.109749+00:00' + source_url: https://www.kladruby.cz/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kladruby.cz/html/images/favicon.ico + source_url: https://www.kladruby.cz/knihovna + css_selector: '[document] > html > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T11:19:22.109749+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-KLA-L-MLKB-mistni_lidova_knihovna_bezdekov.yaml b/data/custodian/CZ-32-KLA-L-MLKB-mistni_lidova_knihovna_bezdekov.yaml index e55202b2da..6190aabd22 100644 --- a/data/custodian/CZ-32-KLA-L-MLKB-mistni_lidova_knihovna_bezdekov.yaml +++ b/data/custodian/CZ-32-KLA-L-MLKB-mistni_lidova_knihovna_bezdekov.yaml @@ -211,3 +211,22 @@ location: postal_code: 339 01 street_address: Bezděkov 7 normalization_timestamp: '2025-12-09T10:53:45.987150+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:19:28.050368+00:00' + source_url: https://www.bezdekov.cz/oubezdekov/knihovna.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.bezdekov.cz/bezdekov-common/icon/safari-pinned-tab.svg + source_url: https://www.bezdekov.cz/oubezdekov/knihovna.asp + css_selector: '[document] > html.touch-no.focus-within > head > link:nth-of-type(34)' + retrieved_on: '2025-12-24T11:19:28.050368+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-KLA-L-MLKD.yaml b/data/custodian/CZ-32-KLA-L-MLKD.yaml index 3eedb71e79..148cebb8eb 100644 --- a/data/custodian/CZ-32-KLA-L-MLKD.yaml +++ b/data/custodian/CZ-32-KLA-L-MLKD.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KLA-L-MLKD - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KLA-L-MLKD valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KLA-L-MLKD ghcid_numeric: 15791831524879315615 valid_from: '2025-12-06T23:37:31.217846+00:00' @@ -211,3 +211,22 @@ location: postal_code: 339 01 street_address: Dolany 125 normalization_timestamp: '2025-12-09T10:53:46.041072+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:19:42.605101+00:00' + source_url: https://www.obec-dolany.cz/dolany/knihovna.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-dolany.cz/dolany-common/icon/safari-pinned-tab.svg + source_url: https://www.obec-dolany.cz/dolany/knihovna.asp + css_selector: '[document] > html.touch-no.focus-within > head > link:nth-of-type(34)' + retrieved_on: '2025-12-24T11:19:42.605101+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-KLA-L-OKVK.yaml b/data/custodian/CZ-32-KLA-L-OKVK.yaml index a54454874a..813ab62c79 100644 --- a/data/custodian/CZ-32-KLA-L-OKVK.yaml +++ b/data/custodian/CZ-32-KLA-L-OKVK.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KLA-L-OKVK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KLA-L-OKVK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KLA-L-OKVK ghcid_numeric: 13744963716914387575 valid_from: '2025-12-06T23:37:31.944482+00:00' @@ -210,3 +210,22 @@ location: postal_code: 338 41 street_address: Klabava 17 normalization_timestamp: '2025-12-09T10:53:46.220952+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:20:00.233540+00:00' + source_url: https://klabava.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://klabava.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://klabava.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:20:00.233540+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-KLA-M-VMDKHVKKIO.yaml b/data/custodian/CZ-32-KLA-M-VMDKHVKKIO.yaml index 0b872faeec..8df304caaf 100644 --- a/data/custodian/CZ-32-KLA-M-VMDKHVKKIO.yaml +++ b/data/custodian/CZ-32-KLA-M-VMDKHVKKIO.yaml @@ -42,13 +42,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KLA-M-VMDKHVKKIO - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KLA-M-VMDKHVKKIO valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KLA-M-VMDKHVKKIO ghcid_numeric: 14413374467079472462 valid_from: '2025-12-06T23:37:18.290685+00:00' @@ -215,3 +215,22 @@ location: postal_code: 339 01 street_address: Hostašova 1 normalization_timestamp: '2025-12-09T10:53:46.268139+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:20:17.338904+00:00' + source_url: https://muzeumklatovy.kpsys.cz/#! + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://muzeumklatovy.kpsys.cz/favicon.png?v=2.3.0-32021 + source_url: https://muzeumklatovy.kpsys.cz/#! + css_selector: '#ng-app > head > link:nth-of-type(9)' + retrieved_on: '2025-12-24T11:20:17.338904+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: 256x256 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-KLE-L-MKKPC.yaml b/data/custodian/CZ-32-KLE-L-MKKPC.yaml index 73911a7116..f7fd20e724 100644 --- a/data/custodian/CZ-32-KLE-L-MKKPC.yaml +++ b/data/custodian/CZ-32-KLE-L-MKKPC.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KLE-L-MKKPC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KLE-L-MKKPC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KLE-L-MKKPC ghcid_numeric: 7864091254906168865 valid_from: '2025-12-08T11:21:26.523807+00:00' @@ -112,8 +112,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Místní knihovna Klenčí pod Čerchovem @@ -218,3 +218,22 @@ location: geocoding_timestamp: '2025-12-09T21:40:24.982687+00:00' geocoding_method: CITY_NAME_LOOKUP geonames_matched_name: Klenčí pod Čerchovem +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:20:23.053173+00:00' + source_url: https://www.knihovna-klenci.cz/on-line-katalog + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.knihovna-klenci.cz/favicon.svg + source_url: https://www.knihovna-klenci.cz/on-line-katalog + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:20:23.053173+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-KOL-L-KICVK.yaml b/data/custodian/CZ-32-KOL-L-KICVK.yaml index c54aebed00..3d144e189b 100644 --- a/data/custodian/CZ-32-KOL-L-KICVK.yaml +++ b/data/custodian/CZ-32-KOL-L-KICVK.yaml @@ -43,13 +43,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KOL-L-KICVK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KOL-L-KICVK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KOL-L-KICVK ghcid_numeric: 16773814809099428060 valid_from: '2025-12-06T23:37:23.195815+00:00' @@ -212,3 +212,28 @@ location: country: *id007 postal_code: 341 42 normalization_timestamp: '2025-12-09T10:53:46.345184+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:20:35.770473+00:00' + source_url: http://knihovna-a-informacni-centrum-v-kolinci6.webnode.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=8bf043a3de + source_url: http://knihovna-a-informacni-centrum-v-kolinci6.webnode.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:20:35.770473+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://8bf043a3de.cbaul-cdnwnd.com/98f729675da94cf8ea6aa299a21350ca/200000014-2f72e2f72f/700/IMG_20240828_133922%20%281%29.jpeg?ph=8bf043a3de + source_url: http://knihovna-a-informacni-centrum-v-kolinci6.webnode.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T11:20:35.770473+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-KOL-L-KMK.yaml b/data/custodian/CZ-32-KOL-L-KMK.yaml index 25a000dfbc..8fdd524403 100644 --- a/data/custodian/CZ-32-KOL-L-KMK.yaml +++ b/data/custodian/CZ-32-KOL-L-KMK.yaml @@ -48,13 +48,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KOL-L-KMK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KOL-L-KMK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KOL-L-KMK ghcid_numeric: 10636960935897538251 valid_from: '2025-12-06T23:37:21.950801+00:00' @@ -228,3 +228,22 @@ location: postal_code: 345 43 street_address: U Staré fary 142 normalization_timestamp: '2025-12-09T10:53:46.372950+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:20:43.773304+00:00' + source_url: https://domazlice.tritius.cz/library/kolovec + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://domazlice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://domazlice.tritius.cz/library/kolovec + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:20:43.773304+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-KOL-L-MKZ.yaml b/data/custodian/CZ-32-KOL-L-MKZ.yaml index ca6ab86c71..83fb6d4874 100644 --- a/data/custodian/CZ-32-KOL-L-MKZ.yaml +++ b/data/custodian/CZ-32-KOL-L-MKZ.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KOL-L-MKZ - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KOL-L-MKZ valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KOL-L-MKZ ghcid_numeric: 12508885798672402864 valid_from: '2025-12-06T23:37:31.290415+00:00' @@ -211,3 +211,22 @@ location: postal_code: 341 42 street_address: Zavlekov 56 normalization_timestamp: '2025-12-09T10:53:46.450944+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:20:56.236060+00:00' + source_url: https://zavlekov.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://zavlekov.knihovna.cz/favicon.svg + source_url: https://zavlekov.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:20:56.236060+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-KRA-L-MKVK.yaml b/data/custodian/CZ-32-KRA-L-MKVK.yaml index 773f123cf7..ef41f1f9f7 100644 --- a/data/custodian/CZ-32-KRA-L-MKVK.yaml +++ b/data/custodian/CZ-32-KRA-L-MKVK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KRA-L-MKVK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KRA-L-MKVK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KRA-L-MKVK ghcid_numeric: 8260617816044188906 valid_from: '2025-12-06T23:37:19.618365+00:00' @@ -227,3 +227,22 @@ location: postal_code: 331 41 street_address: Markova 2 normalization_timestamp: '2025-12-09T10:53:46.585051+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:21:12.097412+00:00' + source_url: https://tritius.knihovnakralovice.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://tritius.knihovnakralovice.cz/apple-touch-icon-180x180.png + source_url: https://tritius.knihovnakralovice.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:21:12.097412+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-KRA-L-MLKK.yaml b/data/custodian/CZ-32-KRA-L-MLKK.yaml index b5e90cfa18..28b34c22fd 100644 --- a/data/custodian/CZ-32-KRA-L-MLKK.yaml +++ b/data/custodian/CZ-32-KRA-L-MLKK.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KRA-L-MLKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KRA-L-MLKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KRA-L-MLKK ghcid_numeric: 12649709534241265374 valid_from: '2025-12-06T23:37:31.765879+00:00' @@ -213,3 +213,22 @@ location: postal_code: 331 41 street_address: Kozojedy 100 normalization_timestamp: '2025-12-09T10:53:46.652331+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:21:19.302498+00:00' + source_url: https://tritius.knihovnakralovice.cz/library/kozojedy + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://tritius.knihovnakralovice.cz/apple-touch-icon-180x180.png + source_url: https://tritius.knihovnakralovice.cz/library/kozojedy + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:21:19.302498+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-KRA-L-MLKVH.yaml b/data/custodian/CZ-32-KRA-L-MLKVH.yaml index f9ac648ab6..08ce412318 100644 --- a/data/custodian/CZ-32-KRA-L-MLKVH.yaml +++ b/data/custodian/CZ-32-KRA-L-MLKVH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KRA-L-MLKVH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KRA-L-MLKVH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KRA-L-MLKVH ghcid_numeric: 9282684688591635561 valid_from: '2025-12-06T23:37:42.792970+00:00' @@ -209,3 +209,22 @@ location: postal_code: 331 41 street_address: Hradecko 67 normalization_timestamp: '2025-12-09T10:53:46.687471+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:21:24.160918+00:00' + source_url: https://www.knihovnakralovice.cz/cs/pobocka-hradecko + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.knihovnakralovice.cz/favicon.ico + source_url: https://www.knihovnakralovice.cz/cs/pobocka-hradecko + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:21:24.160918+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-KRA-M-MGSP.yaml b/data/custodian/CZ-32-KRA-M-MGSP.yaml index bedad3e6a6..588c0a2214 100644 --- a/data/custodian/CZ-32-KRA-M-MGSP.yaml +++ b/data/custodian/CZ-32-KRA-M-MGSP.yaml @@ -80,7 +80,8 @@ provenance: notes: - 'Country resolved 2025-12-06T23:54:39Z: XX→CZ via Wikidata P17' - 'Region resolved 2025-12-07T00:00:56Z: XX->32 via Wikidata P131 (CZ-32)' - - 'City resolved 2025-12-07T00:31:12Z: XXX->KRA via Wikidata Q86920578 coords (49.9820,13.4875) -> Kralovice (GeoNames:3072941)' + - 'City resolved 2025-12-07T00:31:12Z: XXX->KRA via Wikidata Q86920578 coords (49.9820,13.4875) + -> Kralovice (GeoNames:3072941)' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:11Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:34Z - 'YouTube/Google Maps enrichment 2025-12-09T09:30:42Z: YouTube: not found' @@ -111,8 +112,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Muzeum a galerie severního Plzeňska @@ -184,8 +185,8 @@ wikidata_enrichment: instance_of: &id005 - id: Q33506 label: museum - description: institution that holds artifacts and other objects of scientific, artistic, cultural, historical, or other - importance + description: institution that holds artifacts and other objects of scientific, + artistic, cultural, historical, or other importance wikidata_instance_of: *id005 wikidata_location: headquarters_location: @@ -228,3 +229,22 @@ location: youtube_status: NOT_FOUND youtube_search_query: Muzeum a galerie severního Plzeňska official youtube_search_timestamp: '2025-12-09T09:30:42.798287+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:21:37.988345+00:00' + source_url: http://www.marianskatynice.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.marianskatynice.cz/skins/marianskatynice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: http://www.marianskatynice.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:21:37.988345+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-KVI-L-MKK.yaml b/data/custodian/CZ-32-KVI-L-MKK.yaml index b67b767fe0..44eb9481fd 100644 --- a/data/custodian/CZ-32-KVI-L-MKK.yaml +++ b/data/custodian/CZ-32-KVI-L-MKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KVI-L-MKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KVI-L-MKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KVI-L-MKK ghcid_numeric: 4404900477198165292 valid_from: '2025-12-06T23:37:31.071070+00:00' @@ -214,3 +214,22 @@ location: postal_code: 345 62 street_address: Kvíčovice 50 normalization_timestamp: '2025-12-09T10:53:46.786658+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:21:46.797650+00:00' + source_url: https://www.kvicovice.cz/obec-7/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kvicovice.cz/skins/kvicovice.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.kvicovice.cz/obec-7/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:21:46.797650+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-KYS-L-OKK.yaml b/data/custodian/CZ-32-KYS-L-OKK.yaml index 7dadae7bfc..38528656f2 100644 --- a/data/custodian/CZ-32-KYS-L-OKK.yaml +++ b/data/custodian/CZ-32-KYS-L-OKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-KYS-L-OKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-KYS-L-OKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-KYS-L-OKK ghcid_numeric: 17896279461649070507 valid_from: '2025-12-06T23:37:31.848827+00:00' @@ -211,3 +211,22 @@ location: postal_code: 330 01 street_address: Horní Náves 136 normalization_timestamp: '2025-12-09T10:53:46.814006+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:21:55.526526+00:00' + source_url: https://www.kysice.eu/obec-1/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kysice.eu/skins/kysice.eu_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.kysice.eu/obec-1/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:21:55.526526+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-LET-L-MKL.yaml b/data/custodian/CZ-32-LET-L-MKL.yaml index c2b4b88c62..51b1cae219 100644 --- a/data/custodian/CZ-32-LET-L-MKL.yaml +++ b/data/custodian/CZ-32-LET-L-MKL.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-LET-L-MKL - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-LET-L-MKL valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-LET-L-MKL ghcid_numeric: 2595206414431899652 valid_from: '2025-12-06T23:37:31.736205+00:00' @@ -210,3 +210,22 @@ location: postal_code: 326 00 street_address: Plzeňská 50 normalization_timestamp: '2025-12-09T10:53:46.840256+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:22:01.868829+00:00' + source_url: https://www.obec-letkov.cz/obec-1/mistni-knihovna-letkov + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-letkov.cz/skins/letkov_lego/favicons/safari-pinned-tab.svg + source_url: https://www.obec-letkov.cz/obec-1/mistni-knihovna-letkov + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:22:01.868829+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-MAN-L-MKM.yaml b/data/custodian/CZ-32-MAN-L-MKM.yaml index ccdd76f09b..9bfa895ebd 100644 --- a/data/custodian/CZ-32-MAN-L-MKM.yaml +++ b/data/custodian/CZ-32-MAN-L-MKM.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MAN-L-MKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MAN-L-MKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MAN-L-MKM ghcid_numeric: 13569931343342174497 valid_from: '2025-12-06T23:37:23.577148+00:00' @@ -224,3 +224,22 @@ location: postal_code: 331 62 street_address: Manětín 89 normalization_timestamp: '2025-12-09T10:53:47.046589+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:23:29.049852+00:00' + source_url: https://katalog.manetin.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://katalog.manetin.cz/themes/root/images/vufind-favicon.ico + source_url: https://katalog.manetin.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:23:29.049852+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-MAN-L-OKM.yaml b/data/custodian/CZ-32-MAN-L-OKM.yaml index 4c6ae74ed4..da512fe34d 100644 --- a/data/custodian/CZ-32-MAN-L-OKM.yaml +++ b/data/custodian/CZ-32-MAN-L-OKM.yaml @@ -42,13 +42,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MAN-L-OKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MAN-L-OKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MAN-L-OKM ghcid_numeric: 8652418067787963421 valid_from: '2025-12-06T23:37:42.778476+00:00' @@ -108,8 +108,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Obecní knihovna Mezí @@ -214,3 +214,22 @@ location: geonames_id: 3071046 geonames_name: Manětín feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:23:36.110678+00:00' + source_url: https://mezi-katalog.manetin.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://mezi-katalog.manetin.cz/themes/root/images/vufind-favicon.ico + source_url: https://mezi-katalog.manetin.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:23:36.110678+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-MAN-L-OKS.yaml b/data/custodian/CZ-32-MAN-L-OKS.yaml index e29e9dffee..88a997d6ce 100644 --- a/data/custodian/CZ-32-MAN-L-OKS.yaml +++ b/data/custodian/CZ-32-MAN-L-OKS.yaml @@ -42,13 +42,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MAN-L-OKS - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MAN-L-OKS valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MAN-L-OKS ghcid_numeric: 3165869942419319360 valid_from: '2025-12-06T23:37:42.775760+00:00' @@ -108,8 +108,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Obecní knihovna Stvolny @@ -214,3 +214,22 @@ location: geonames_id: 3071046 geonames_name: Manětín feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:23:40.981383+00:00' + source_url: https://stvolny-katalog.manetin.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://stvolny-katalog.manetin.cz/themes/root/images/vufind-favicon.ico + source_url: https://stvolny-katalog.manetin.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:23:40.981383+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-MEC-L-MKM.yaml b/data/custodian/CZ-32-MEC-L-MKM.yaml index fa9a47020f..af1d1753b1 100644 --- a/data/custodian/CZ-32-MEC-L-MKM.yaml +++ b/data/custodian/CZ-32-MEC-L-MKM.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MEC-L-MKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MEC-L-MKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MEC-L-MKM ghcid_numeric: 9734417700704518524 valid_from: '2025-12-06T23:37:21.178260+00:00' @@ -218,3 +218,22 @@ location: postal_code: 340 37 street_address: Farní 43 normalization_timestamp: '2025-12-09T10:53:47.150809+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:23:52.097698+00:00' + source_url: https://klatovy.tritius.cz/library/mecin + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://klatovy.tritius.cz/apple-touch-icon-180x180.png + source_url: https://klatovy.tritius.cz/library/mecin + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:23:52.097698+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-MER-L-OUKMUP.yaml b/data/custodian/CZ-32-MER-L-OUKMUP.yaml index f9d920cb40..f05a93a4e0 100644 --- a/data/custodian/CZ-32-MER-L-OUKMUP.yaml +++ b/data/custodian/CZ-32-MER-L-OUKMUP.yaml @@ -36,13 +36,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MER-L-OUKMUP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MER-L-OUKMUP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MER-L-OUKMUP ghcid_numeric: 17498656420724007736 valid_from: '2025-12-08T11:21:37.721724+00:00' @@ -105,8 +105,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Obecní úřad - knihovna Merklín u Přeštic @@ -216,3 +216,22 @@ location: geonames_id: 3070844 geonames_name: Merklín feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:24:00.016359+00:00' + source_url: https://prestice.tritius.cz/library/merklin + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://prestice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://prestice.tritius.cz/library/merklin + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:24:00.016359+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-MES-L-OKU.yaml b/data/custodian/CZ-32-MES-L-OKU.yaml index d100beb4f1..dfc280d9c1 100644 --- a/data/custodian/CZ-32-MES-L-OKU.yaml +++ b/data/custodian/CZ-32-MES-L-OKU.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MES-L-OKU - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MES-L-OKU valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MES-L-OKU ghcid_numeric: 3121694048125655451 valid_from: '2025-12-08T11:21:39.134492+00:00' @@ -216,3 +216,22 @@ location: postal_code: 330 33 street_address: Úlice 50 normalization_timestamp: '2025-12-09T10:53:47.206151+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:24:06.119088+00:00' + source_url: https://www.obeculice.cz/obec/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obeculice.cz/skins/obeculice.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.obeculice.cz/obec/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:24:06.119088+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-MES-L-OKVC.yaml b/data/custodian/CZ-32-MES-L-OKVC.yaml index 7e1a9e4636..92a7f163b1 100644 --- a/data/custodian/CZ-32-MES-L-OKVC.yaml +++ b/data/custodian/CZ-32-MES-L-OKVC.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MES-L-OKVC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MES-L-OKVC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MES-L-OKVC ghcid_numeric: 1309835489533712242 valid_from: '2025-12-08T11:21:33.815693+00:00' @@ -216,3 +216,22 @@ location: postal_code: 330 33 street_address: Čeminy 37 normalization_timestamp: '2025-12-09T10:53:47.232925+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:24:13.393101+00:00' + source_url: https://www.ceminy.cz/obec/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.ceminy.cz/skins/ceminy.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.ceminy.cz/obec/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:24:13.393101+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-MIR-L-MKVS.yaml b/data/custodian/CZ-32-MIR-L-MKVS.yaml index c03aa732f1..208337f5bc 100644 --- a/data/custodian/CZ-32-MIR-L-MKVS.yaml +++ b/data/custodian/CZ-32-MIR-L-MKVS.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MIR-L-MKVS - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MIR-L-MKVS valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MIR-L-MKVS ghcid_numeric: 12061310120520097006 valid_from: '2025-12-06T23:37:31.959673+00:00' @@ -210,3 +210,22 @@ location: postal_code: 338 43 street_address: Skořice 78 normalization_timestamp: '2025-12-09T10:53:47.259034+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:24:18.855546+00:00' + source_url: https://skorice.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://skorice.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://skorice.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:24:18.855546+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-MRA-L-MKM.yaml b/data/custodian/CZ-32-MRA-L-MKM.yaml index c1736d8e5b..b2e06ba866 100644 --- a/data/custodian/CZ-32-MRA-L-MKM.yaml +++ b/data/custodian/CZ-32-MRA-L-MKM.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MRA-L-MKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MRA-L-MKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MRA-L-MKM ghcid_numeric: 2686764781815003307 valid_from: '2025-12-06T23:37:30.789550+00:00' @@ -214,3 +214,30 @@ location: postal_code: 345 01 street_address: Mrákov 135 normalization_timestamp: '2025-12-09T10:53:47.312170+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:24:26.638775+00:00' + source_url: https://knihovnamrakov.webk.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://knihovnamrakov.webk.cz/themes/new/orange/logo2.png + source_url: https://knihovnamrakov.webk.cz + css_selector: '#outpage > header.tmava > a > img.mobile_display_none' + retrieved_on: '2025-12-24T11:24:26.638775+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Na úvodní stranu + - claim_type: favicon_url + claim_value: https://knihovnamrakov.webk.cz/themes/new/favicon.ico + source_url: https://knihovnamrakov.webk.cz + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T11:24:26.638775+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-MUT-L-OKM.yaml b/data/custodian/CZ-32-MUT-L-OKM.yaml index 6dce318b77..72fcf45b25 100644 --- a/data/custodian/CZ-32-MUT-L-OKM.yaml +++ b/data/custodian/CZ-32-MUT-L-OKM.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-MUT-L-OKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-MUT-L-OKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-MUT-L-OKM ghcid_numeric: 4115793928639987860 valid_from: '2025-12-06T23:37:30.806908+00:00' @@ -210,3 +210,22 @@ location: postal_code: 345 25 street_address: Mutěnín 60 normalization_timestamp: '2025-12-09T10:53:47.339674+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:24:35.850799+00:00' + source_url: https://www.mutenin.cz/volny-cas/obecni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.mutenin.cz/skins/mutenin.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.mutenin.cz/volny-cas/obecni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:24:35.850799+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-MYT-L-KMMVC.yaml b/data/custodian/CZ-32-MYT-L-KMMVC.yaml index aa6c5ff735..6f37fe8fcc 100644 --- a/data/custodian/CZ-32-MYT-L-KMMVC.yaml +++ b/data/custodian/CZ-32-MYT-L-KMMVC.yaml @@ -220,3 +220,30 @@ location: geocoding_timestamp: '2025-12-09T21:40:22.193308+00:00' geocoding_method: CITY_NAME_LOOKUP geonames_matched_name: Mýto +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:24:42.157625+00:00' + source_url: https://knihovnamyto.webk.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://knihovnamyto.webk.cz/themes/new/orange/logo1.png + source_url: https://knihovnamyto.webk.cz + css_selector: '#outpage > header.tmava > a > img.mobile_display_none' + retrieved_on: '2025-12-24T11:24:42.157625+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Na úvodní stranu + - claim_type: favicon_url + claim_value: https://knihovnamyto.webk.cz/themes/new/favicon.ico + source_url: https://knihovnamyto.webk.cz + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T11:24:42.157625+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-NEM-L-OUNMK.yaml b/data/custodian/CZ-32-NEM-L-OUNMK.yaml index 7d9d744b14..507b3e296c 100644 --- a/data/custodian/CZ-32-NEM-L-OUNMK.yaml +++ b/data/custodian/CZ-32-NEM-L-OUNMK.yaml @@ -214,3 +214,22 @@ location: geocoding_timestamp: '2025-12-09T21:46:20.810707+00:00' geocoding_method: MANUAL_CITY_MAPPING geonames_matched_name: Němčovice +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:25:01.110374+00:00' + source_url: https://www.nemcovice.cz/zivot-v-obci/knihovna/knihy-v-knihovne-nemcovice + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nemcovice.cz/skins/nemcovice.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.nemcovice.cz/zivot-v-obci/knihovna/knihy-v-knihovne-nemcovice + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:25:01.110374+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-NEP-L-MKN.yaml b/data/custodian/CZ-32-NEP-L-MKN.yaml index 750df76fd1..5bb76e996d 100644 --- a/data/custodian/CZ-32-NEP-L-MKN.yaml +++ b/data/custodian/CZ-32-NEP-L-MKN.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NEP-L-MKN - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NEP-L-MKN valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NEP-L-MKN ghcid_numeric: 320022628832174233 valid_from: '2025-12-06T23:37:21.228139+00:00' @@ -220,3 +220,28 @@ location: postal_code: 335 01 street_address: Nádražní 476 normalization_timestamp: '2025-12-09T10:53:47.528012+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:25:15.574770+00:00' + source_url: https://www.nepomuk.cz/obcan/mestska-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nepomuk.cz/favicon-apple-touch.png + source_url: https://www.nepomuk.cz/obcan/mestska-knihovna + css_selector: '[document] > html.fonts-loaded > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T11:25:15.574770+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.nepomuk.cz/images/social.png + source_url: https://www.nepomuk.cz/obcan/mestska-knihovna + css_selector: '[document] > html.fonts-loaded > head > meta:nth-of-type(4)' + retrieved_on: '2025-12-24T11:25:15.574770+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/CZ-32-NEP-L-MKVC-mistni_knihovna_ve_cmelinech.yaml b/data/custodian/CZ-32-NEP-L-MKVC-mistni_knihovna_ve_cmelinech.yaml index 7ebbf98d32..b30ae18846 100644 --- a/data/custodian/CZ-32-NEP-L-MKVC-mistni_knihovna_ve_cmelinech.yaml +++ b/data/custodian/CZ-32-NEP-L-MKVC-mistni_knihovna_ve_cmelinech.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NEP-L-MKVC-mistni_knihovna_ve_cmelinech - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NEP-L-MKVC-mistni_knihovna_ve_cmelinech valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NEP-L-MKVC-mistni_knihovna_ve_cmelinech ghcid_numeric: 5824439486703556298 valid_from: '2025-12-08T11:28:08.765694+00:00' @@ -220,3 +220,22 @@ location: postal_code: 335 01 street_address: Čmelíny 32 normalization_timestamp: '2025-12-09T10:53:47.557293+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:25:21.384721+00:00' + source_url: https://www.cmeliny-viska.cz/cs/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.cmeliny-viska.cz/application/views/cmeliny-viska/images/favicon.ico + source_url: https://www.cmeliny-viska.cz/cs/knihovna + css_selector: '[document] > html > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T11:25:21.384721+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-NEP-L-OKT-obecni_knihovna_trebcice.yaml b/data/custodian/CZ-32-NEP-L-OKT-obecni_knihovna_trebcice.yaml index e051c70b43..babd43de5b 100644 --- a/data/custodian/CZ-32-NEP-L-OKT-obecni_knihovna_trebcice.yaml +++ b/data/custodian/CZ-32-NEP-L-OKT-obecni_knihovna_trebcice.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NEP-L-OKT-obecni_knihovna_trebcice - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NEP-L-OKT-obecni_knihovna_trebcice valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NEP-L-OKT-obecni_knihovna_trebcice ghcid_numeric: 1354416580702409100 valid_from: '2025-12-06T23:37:31.580425+00:00' @@ -210,3 +210,22 @@ location: postal_code: 335 01 street_address: Třebčice 2 normalization_timestamp: '2025-12-09T10:53:47.865596+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:25:48.408179+00:00' + source_url: https://www.trebcice.cz/obec/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.trebcice.cz/skins/trebcice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.trebcice.cz/obec/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:25:48.408179+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-NEV-L-MKN.yaml b/data/custodian/CZ-32-NEV-L-MKN.yaml index 0c7883ba51..fbfec9b9e0 100644 --- a/data/custodian/CZ-32-NEV-L-MKN.yaml +++ b/data/custodian/CZ-32-NEV-L-MKN.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NEV-L-MKN - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NEV-L-MKN valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NEV-L-MKN ghcid_numeric: 8618668365090453099 valid_from: '2025-12-06T23:37:31.165013+00:00' @@ -210,3 +210,22 @@ location: postal_code: 344 01 street_address: Nevolice 4 normalization_timestamp: '2025-12-09T10:53:48.086028+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:05.472437+00:00' + source_url: http://nevolice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://nevolice.knihovna.cz/favicon.svg + source_url: http://nevolice.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:26:05.472437+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-NEZ-L-MKNS.yaml b/data/custodian/CZ-32-NEZ-L-MKNS.yaml index 3f8b015837..890fe894bb 100644 --- a/data/custodian/CZ-32-NEZ-L-MKNS.yaml +++ b/data/custodian/CZ-32-NEZ-L-MKNS.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NEZ-L-MKNS - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NEZ-L-MKNS valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NEZ-L-MKNS ghcid_numeric: 539961279137448809 valid_from: '2025-12-08T11:21:26.502086+00:00' @@ -219,3 +219,28 @@ location: postal_code: 342 01 street_address: Nezdice na Šumavě 173 normalization_timestamp: '2025-12-09T10:53:48.112219+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:11.710239+00:00' + source_url: https://www.nezdicenasumave.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=98563d9b32 + source_url: https://www.nezdicenasumave.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:26:11.710239+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://98563d9b32.clvaw-cdnwnd.com/2672992140aae7cac14039bebe4f332a/200000062-d303dd3040/700/book-2170910_960_720-0.jpg?ph=98563d9b32 + source_url: https://www.nezdicenasumave.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T11:26:11.710239+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-NEZ-L-MKVN.yaml b/data/custodian/CZ-32-NEZ-L-MKVN.yaml index 9b4835930b..f4ebad5869 100644 --- a/data/custodian/CZ-32-NEZ-L-MKVN.yaml +++ b/data/custodian/CZ-32-NEZ-L-MKVN.yaml @@ -213,3 +213,22 @@ location: postal_code: 332 04 street_address: Nezvěstice 76 normalization_timestamp: '2025-12-09T10:53:48.135734+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:17.133504+00:00' + source_url: http://www.knihovnanezvestice.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.knihovnanezvestice.cz/favicon.ico + source_url: http://www.knihovnanezvestice.cz + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-24T11:26:17.133504+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-NEZ-L-OKM.yaml b/data/custodian/CZ-32-NEZ-L-OKM.yaml index 3455964e95..917445e472 100644 --- a/data/custodian/CZ-32-NEZ-L-OKM.yaml +++ b/data/custodian/CZ-32-NEZ-L-OKM.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NEZ-L-OKM - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NEZ-L-OKM valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NEZ-L-OKM ghcid_numeric: 7072401210211263676 valid_from: '2025-12-06T23:37:27.302613+00:00' @@ -220,3 +220,22 @@ location: postal_code: 332 04 street_address: Milínov 48 normalization_timestamp: '2025-12-09T10:53:48.163044+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:24.168951+00:00' + source_url: https://milinov.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://milinov.knihovna.cz/favicon.svg + source_url: https://milinov.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:26:24.168951+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-NEZ-L-OKVN.yaml b/data/custodian/CZ-32-NEZ-L-OKVN.yaml index fc9cacf4c0..1c5268183e 100644 --- a/data/custodian/CZ-32-NEZ-L-OKVN.yaml +++ b/data/custodian/CZ-32-NEZ-L-OKVN.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NEZ-L-OKVN - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NEZ-L-OKVN valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NEZ-L-OKVN ghcid_numeric: 569852810275926717 valid_from: '2025-12-06T23:37:31.683866+00:00' @@ -207,3 +207,22 @@ location: postal_code: 332 04 street_address: Nebílovy 101 normalization_timestamp: '2025-12-09T10:53:48.191731+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:32.125667+00:00' + source_url: https://www.obecnebilovy.cz/obec-125/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obecnebilovy.cz/skins/obecnebilovy.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.obecnebilovy.cz/obec-125/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:26:32.125667+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-NYR-L-OKU.yaml b/data/custodian/CZ-32-NYR-L-OKU.yaml index 1151397c3f..64e815d7e8 100644 --- a/data/custodian/CZ-32-NYR-L-OKU.yaml +++ b/data/custodian/CZ-32-NYR-L-OKU.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-NYR-L-OKU - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-NYR-L-OKU valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-NYR-L-OKU ghcid_numeric: 18427857246368617724 valid_from: '2025-12-08T11:21:30.858573+00:00' @@ -212,3 +212,32 @@ location: postal_code: 330 23 street_address: Úherce 94 normalization_timestamp: '2025-12-09T10:53:48.241515+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:41.342164+00:00' + source_url: https://obecuherce.cz/w/knihovna + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://obecuherce.cz/w/wp-content/uploads/2020/02/tribrichy.gif + source_url: https://obecuherce.cz/w/knihovna + css_selector: '#header > div.header__inner > div.header__content:nth-of-type(2) + > div.lsvr-container > div.header__content-inner > div.header-logo > a.header-logo__link + > img.header-logo__image' + retrieved_on: '2025-12-24T11:26:41.342164+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Obec Úherce + - claim_type: favicon_url + claim_value: https://obecuherce.cz/w/wp-content/uploads/2020/02/tribrichy2-150x150.gif + source_url: https://obecuherce.cz/w/knihovna + css_selector: '[document] > html > head > link:nth-of-type(18)' + retrieved_on: '2025-12-24T11:26:41.342164+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 32x32 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/CZ-32-OSE-L-MKVO.yaml b/data/custodian/CZ-32-OSE-L-MKVO.yaml index 1b847f54e2..93bb162750 100644 --- a/data/custodian/CZ-32-OSE-L-MKVO.yaml +++ b/data/custodian/CZ-32-OSE-L-MKVO.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-OSE-L-MKVO - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-OSE-L-MKVO valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-OSE-L-MKVO ghcid_numeric: 4116363679238094229 valid_from: '2025-12-06T23:37:31.641832+00:00' @@ -216,3 +216,22 @@ location: postal_code: 335 46 street_address: Oselce 2 normalization_timestamp: '2025-12-09T10:53:48.308406+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:52.594952+00:00' + source_url: https://www.oselce.cz/kontakt + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.oselce.cz/assets/favicon/apple-touch-icon.png?01742a8eb16e43289b923e45e733fb6c + source_url: https://www.oselce.cz/kontakt + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T11:26:52.594952+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-PAC-L-MKP.yaml b/data/custodian/CZ-32-PAC-L-MKP.yaml index a3b3748fd6..637bb80df7 100644 --- a/data/custodian/CZ-32-PAC-L-MKP.yaml +++ b/data/custodian/CZ-32-PAC-L-MKP.yaml @@ -37,13 +37,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PAC-L-MKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PAC-L-MKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PAC-L-MKP ghcid_numeric: 12104889071559581760 valid_from: '2025-12-06T23:37:23.275889+00:00' @@ -206,3 +206,22 @@ location: country: *id005 postal_code: 341 01 normalization_timestamp: '2025-12-09T10:53:48.334098+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:26:59.176690+00:00' + source_url: https://katalog.knihovna-pacejov.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://katalog.knihovna-pacejov.cz/themes/root/images/vufind-favicon.ico + source_url: https://katalog.knihovna-pacejov.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:26:59.176690+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-PER-L-OKK.yaml b/data/custodian/CZ-32-PER-L-OKK.yaml index b5651aad62..f95e255d04 100644 --- a/data/custodian/CZ-32-PER-L-OKK.yaml +++ b/data/custodian/CZ-32-PER-L-OKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PER-L-OKK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PER-L-OKK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PER-L-OKK ghcid_numeric: 16910858643174265480 valid_from: '2025-12-06T23:37:31.883716+00:00' @@ -211,3 +211,22 @@ location: postal_code: 330 36 street_address: Křelovice 46 normalization_timestamp: '2025-12-09T10:53:48.386539+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:27:19.822705+00:00' + source_url: https://www.obeckrelovice.cz/obec-krelovice/organizace/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obeckrelovice.cz/skins/obeckrelovice.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.obeckrelovice.cz/obec-krelovice/organizace/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:27:19.822705+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-PET-L-MKPUS.yaml b/data/custodian/CZ-32-PET-L-MKPUS.yaml index 402c2e001d..28a47cc992 100644 --- a/data/custodian/CZ-32-PET-L-MKPUS.yaml +++ b/data/custodian/CZ-32-PET-L-MKPUS.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PET-L-MKPUS - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PET-L-MKPUS valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PET-L-MKPUS ghcid_numeric: 17597603500949005293 valid_from: '2025-12-06T23:37:31.303339+00:00' @@ -214,3 +214,28 @@ location: postal_code: 342 01 street_address: Petrovice u Sušice 34 normalization_timestamp: '2025-12-09T10:53:48.410593+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:27:26.200496+00:00' + source_url: https://www.petroviceususice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=6845e23e75 + source_url: https://www.petroviceususice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:27:26.200496+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://6845e23e75.clvaw-cdnwnd.com/a50c6b97b0516f4668c94d03a00e51ec/200000027-ea764ea767/700/books-3786559_960_720.jpg?ph=6845e23e75 + source_url: https://www.petroviceususice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T11:27:26.200496+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-PIL-M-MJPVB.yaml b/data/custodian/CZ-32-PIL-M-MJPVB.yaml index 1b111c0712..5ee2caf914 100644 --- a/data/custodian/CZ-32-PIL-M-MJPVB.yaml +++ b/data/custodian/CZ-32-PIL-M-MJPVB.yaml @@ -239,3 +239,22 @@ location: youtube_status: NOT_FOUND youtube_search_query: Muzeum jižního Plzeňska v Blovicích official youtube_search_timestamp: '2025-12-09T09:30:53.279869+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:27:56.080273+00:00' + source_url: http://www.muzeum-blovice.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.muzeum-blovice.cz/skins/muzeum-blovice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: http://www.muzeum-blovice.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:27:56.080273+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PIL-M-ZMVP.yaml b/data/custodian/CZ-32-PIL-M-ZMVP.yaml index 8511b75ded..e5c6ad78a2 100644 --- a/data/custodian/CZ-32-PIL-M-ZMVP.yaml +++ b/data/custodian/CZ-32-PIL-M-ZMVP.yaml @@ -80,7 +80,8 @@ provenance: notes: - 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17' - 'Region resolved 2025-12-07T00:01:53Z: XX->32 via Wikidata P131 (CZ-32)' - - 'City resolved 2025-12-07T00:26:51Z: XXX->PIL via Wikidata Q10939685 coords (49.7414,13.3825) -> Pilsen (GeoNames:3068160)' + - 'City resolved 2025-12-07T00:26:51Z: XXX->PIL via Wikidata Q10939685 coords (49.7414,13.3825) + -> Pilsen (GeoNames:3068160)' - Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:11Z - Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:34Z - 'YouTube/Google Maps enrichment 2025-12-09T09:30:54Z: YouTube: not found' @@ -111,8 +112,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.85 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Západočeské muzeum v Plzni @@ -210,8 +211,8 @@ wikidata_enrichment: instance_of: &id005 - id: Q33506 label: museum - description: institution that holds artifacts and other objects of scientific, artistic, cultural, historical, or other - importance + description: institution that holds artifacts and other objects of scientific, + artistic, cultural, historical, or other importance - id: Q43229 label: organization description: social entity established to meet needs or pursue goals @@ -266,3 +267,22 @@ location: youtube_status: NOT_FOUND youtube_search_query: Západočeské muzeum v Plzni official youtube_search_timestamp: '2025-12-09T09:30:54.602239+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:04.182433+00:00' + source_url: http://www.zcm.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.zcm.cz/images/favicon/favicon.svg + source_url: http://www.zcm.cz + css_selector: '[document] > html.g-offcanvas-css3 > head > link:nth-of-type(18)' + retrieved_on: '2025-12-24T11:28:04.182433+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PLA-L-MKP.yaml b/data/custodian/CZ-32-PLA-L-MKP.yaml index 95a6008293..b658650451 100644 --- a/data/custodian/CZ-32-PLA-L-MKP.yaml +++ b/data/custodian/CZ-32-PLA-L-MKP.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLA-L-MKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLA-L-MKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLA-L-MKP ghcid_numeric: 1526420810723952257 valid_from: '2025-12-06T23:37:21.272310+00:00' @@ -212,3 +212,22 @@ location: postal_code: 340 34 street_address: Náměstí 180 normalization_timestamp: '2025-12-09T10:53:48.462885+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:11.720693+00:00' + source_url: https://www.planice.cz/organizace/instituce-1/mestska-knihovna-270cs.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.planice.cz/skins/planice_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.planice.cz/organizace/instituce-1/mestska-knihovna-270cs.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:28:11.720693+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PLA-L-OKMKP.yaml b/data/custodian/CZ-32-PLA-L-OKMKP.yaml index 10e77c4416..ab443524d4 100644 --- a/data/custodian/CZ-32-PLA-L-OKMKP.yaml +++ b/data/custodian/CZ-32-PLA-L-OKMKP.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLA-L-OKMKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLA-L-OKMKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLA-L-OKMKP ghcid_numeric: 9675510013097604646 valid_from: '2025-12-06T23:37:20.216736+00:00' @@ -223,3 +223,22 @@ location: postal_code: 348 15 street_address: nám. Svobody 56 normalization_timestamp: '2025-12-09T10:53:48.608863+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:28.038864+00:00' + source_url: https://katalog.knihovna-plana.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://katalog.knihovna-plana.cz/themes/root/images/vufind-favicon.ico + source_url: https://katalog.knihovna-plana.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:28:28.038864+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-E-CGPK.yaml b/data/custodian/CZ-32-PLZ-E-CGPK.yaml index ebfa98e1de..6874a6a213 100644 --- a/data/custodian/CZ-32-PLZ-E-CGPK.yaml +++ b/data/custodian/CZ-32-PLZ-E-CGPK.yaml @@ -36,13 +36,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-E-CGPK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-E-CGPK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-E-CGPK ghcid_numeric: 2366801478916212173 valid_from: '2025-12-06T23:37:43.018747+00:00' @@ -100,8 +100,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Církevní gymnázium Plzeň - Knihovna @@ -177,7 +177,8 @@ wikidata_enrichment: instance_of: &id004 - id: Q50379845 label: religious library - description: type of library with collections that focus on religion and related subjects + description: type of library with collections that focus on religion and related + subjects wikidata_instance_of: *id004 wikidata_location: country: &id005 @@ -209,3 +210,28 @@ location: geonames_id: 3065249 geonames_name: Starý Plzenec feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:38.142167+00:00' + source_url: https://www.cirkevni-gymnazium.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.cirkevni-gymnazium.cz/favicon.ico + source_url: https://www.cirkevni-gymnazium.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:28:38.142167+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.cirkevni-gymnazium.cz/obrazek/3/logo-cg-png/ + source_url: https://www.cirkevni-gymnazium.cz + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T11:28:38.142167+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-G-ZGVPPOK.yaml b/data/custodian/CZ-32-PLZ-G-ZGVPPOK.yaml index e2f7639784..b412d71c6f 100644 --- a/data/custodian/CZ-32-PLZ-G-ZGVPPOK.yaml +++ b/data/custodian/CZ-32-PLZ-G-ZGVPPOK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-G-ZGVPPOK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-G-ZGVPPOK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-G-ZGVPPOK ghcid_numeric: 11436851496853252106 valid_from: '2025-12-06T23:37:26.585306+00:00' @@ -217,3 +217,22 @@ location: postal_code: 301 00 street_address: Pražská 13 normalization_timestamp: '2025-12-09T10:53:48.698047+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:47.471782+00:00' + source_url: https://www.zpc-galerie.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.zpc-galerie.cz/sites/default/files/favicon.ico + source_url: https://www.zpc-galerie.cz + css_selector: '[document] > html.js > head > link' + retrieved_on: '2025-12-24T11:28:47.471782+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-L-AOPKCRPSCC.yaml b/data/custodian/CZ-32-PLZ-L-AOPKCRPSCC.yaml index 2ab42aca01..37c511101d 100644 --- a/data/custodian/CZ-32-PLZ-L-AOPKCRPSCC.yaml +++ b/data/custodian/CZ-32-PLZ-L-AOPKCRPSCC.yaml @@ -42,13 +42,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-L-AOPKCRPSCC - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-L-AOPKCRPSCC valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-L-AOPKCRPSCC ghcid_numeric: 2108666100323919543 valid_from: '2025-12-08T11:21:39.491265+00:00' @@ -225,3 +225,32 @@ location: postal_code: 301 00 street_address: Malá 9 normalization_timestamp: '2025-12-09T10:53:48.745933+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:56.200884+00:00' + source_url: https://ceskyles.aopk.gov.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://ceskyles.aopk.gov.cz/documents/20123/313949/CesLes-bila.svg/0b9c9500-5e8d-328e-b1ac-6072a1e351cd?t=1645009981651 + source_url: https://ceskyles.aopk.gov.cz + css_selector: '#portlet_com_liferay_journal_content_web_portlet_JournalContentPortlet_INSTANCE_logo_chko + > div.portlet-content > div.portlet-content-container > div.portlet-body > div + > div.journal-content-article > a.logo > img.logo__picture' + retrieved_on: '2025-12-24T11:28:56.200884+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Logo CHKO Český les. + - claim_type: favicon_url + claim_value: https://aopk.gov.cz/o/aopk-liferay-theme/images/favicon.ico + source_url: https://ceskyles.aopk.gov.cz + css_selector: '[document] > html.ltr.aopk-lfr > head > link:nth-of-type(9)' + retrieved_on: '2025-12-24T11:28:56.200884+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-L-FNPLK.yaml b/data/custodian/CZ-32-PLZ-L-FNPLK.yaml index 734805d6ba..cc55e18567 100644 --- a/data/custodian/CZ-32-PLZ-L-FNPLK.yaml +++ b/data/custodian/CZ-32-PLZ-L-FNPLK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-L-FNPLK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-L-FNPLK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-L-FNPLK ghcid_numeric: 16673836618802302415 valid_from: '2025-12-06T23:37:19.549722+00:00' @@ -214,3 +214,36 @@ location: postal_code: 323 00 street_address: alej Svobody 76 normalization_timestamp: '2025-12-09T10:53:48.838430+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:07.672657+00:00' + source_url: https://ukaz.cuni.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://ukaz.cuni.cz/custom/420CKIS_INST-UKAZ/img/library-logo.png + source_url: https://ukaz.cuni.cz + css_selector: '#logoImage' + retrieved_on: '2025-12-24T11:29:07.672657+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Logo knihovny + - claim_type: favicon_url + claim_value: https://ukaz.cuni.cz/custom/420CKIS_INST-UKAZ/img/favicon.ico + source_url: https://ukaz.cuni.cz + css_selector: '#viewCustomerFavIcon' + retrieved_on: '2025-12-24T11:29:07.672657+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://cuni.primo.exlibrisgroup.com/discovery/custom/420CKIS_INST-UKAZ/img/library-logo.png + source_url: https://ukaz.cuni.cz + css_selector: '#ogImage' + retrieved_on: '2025-12-24T11:29:07.672657+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-L-KHSPKSSVPK.yaml b/data/custodian/CZ-32-PLZ-L-KHSPKSSVPK.yaml index d9c217bc50..f3be4d62a5 100644 --- a/data/custodian/CZ-32-PLZ-L-KHSPKSSVPK.yaml +++ b/data/custodian/CZ-32-PLZ-L-KHSPKSSVPK.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-L-KHSPKSSVPK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-L-KHSPKSSVPK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-L-KHSPKSSVPK ghcid_numeric: 16753134515985422138 valid_from: '2025-12-06T23:37:19.554876+00:00' @@ -214,3 +214,32 @@ location: postal_code: 301 00 street_address: Skrétova 15 normalization_timestamp: '2025-12-09T10:53:48.888844+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:15.140568+00:00' + source_url: https://www.khsplzen.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.khsplzen.cz/wp-content/uploads/2022/10/cropped-LOGO-KHS-podelne-3.png + source_url: https://www.khsplzen.cz + css_selector: '#mh-mobile > div.mh-container.mh-container-outer > header.mh-header + > div.mh-container.mh-container-inner > div.mh-custom-header.clearfix > a.mh-header-image-link + > img.mh-header-image' + retrieved_on: '2025-12-24T11:29:15.140568+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Krajská hygienická stanice Plzeňského kraje + - claim_type: favicon_url + claim_value: https://www.khsplzen.cz/wp-content/uploads/2020/11/cropped-header-5-180x180.jpg + source_url: https://www.khsplzen.cz + css_selector: '[document] > html.js.mh-one-sb > head > link:nth-of-type(15)' + retrieved_on: '2025-12-24T11:29:15.140568+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-PLZ-L-MKS.yaml b/data/custodian/CZ-32-PLZ-L-MKS.yaml index 19d2b4d1b8..dceb9c85ef 100644 --- a/data/custodian/CZ-32-PLZ-L-MKS.yaml +++ b/data/custodian/CZ-32-PLZ-L-MKS.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-L-MKS - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-L-MKS valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-L-MKS ghcid_numeric: 16819077430460520469 valid_from: '2025-12-08T11:21:33.571726+00:00' @@ -217,3 +217,22 @@ location: postal_code: 321 00 street_address: Šlovice normalization_timestamp: '2025-12-09T10:53:48.917958+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:21.495004+00:00' + source_url: https://www.dobrany.cz/kultura-vzdelani-sport/knihovna/knihovna-slovice + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dobrany.cz/skins/dobrany.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.dobrany.cz/kultura-vzdelani-sport/knihovna/knihovna-slovice + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:29:21.495004+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-PLZ-L-NPUUOPVPK.yaml b/data/custodian/CZ-32-PLZ-L-NPUUOPVPK.yaml index 623135a472..896ff6d9b3 100644 --- a/data/custodian/CZ-32-PLZ-L-NPUUOPVPK.yaml +++ b/data/custodian/CZ-32-PLZ-L-NPUUOPVPK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-L-NPUUOPVPK - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-L-NPUUOPVPK valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-L-NPUUOPVPK ghcid_numeric: 5018390144717403564 valid_from: '2025-12-08T11:21:30.622145+00:00' @@ -225,3 +225,22 @@ location: postal_code: 306 37 street_address: Prešovská 7 normalization_timestamp: '2025-12-09T10:53:48.965267+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:31.275983+00:00' + source_url: https://iispp.npu.cz/carmen/library/plzen + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://iispp.npu.cz/carmen/apple-touch-icon-180x180.png + source_url: https://iispp.npu.cz/carmen/library/plzen + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:29:31.275983+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-PLZ-L-UKLFVPSVI.yaml b/data/custodian/CZ-32-PLZ-L-UKLFVPSVI.yaml index b89cdc078e..fceb11e37e 100644 --- a/data/custodian/CZ-32-PLZ-L-UKLFVPSVI.yaml +++ b/data/custodian/CZ-32-PLZ-L-UKLFVPSVI.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-L-UKLFVPSVI - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-L-UKLFVPSVI valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-L-UKLFVPSVI ghcid_numeric: 1793430680675409475 valid_from: '2025-12-06T23:37:19.519821+00:00' @@ -220,3 +220,36 @@ location: postal_code: 323 00 street_address: alej Svobody 76 normalization_timestamp: '2025-12-09T10:53:49.173188+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:56.571533+00:00' + source_url: https://ukaz.cuni.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://ukaz.cuni.cz/custom/420CKIS_INST-UKAZ/img/library-logo.png + source_url: https://ukaz.cuni.cz + css_selector: '#logoImage' + retrieved_on: '2025-12-24T11:29:56.571533+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Logo knihovny + - claim_type: favicon_url + claim_value: https://ukaz.cuni.cz/custom/420CKIS_INST-UKAZ/img/favicon.ico + source_url: https://ukaz.cuni.cz + css_selector: '#viewCustomerFavIcon' + retrieved_on: '2025-12-24T11:29:56.571533+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://cuni.primo.exlibrisgroup.com/discovery/custom/420CKIS_INST-UKAZ/img/library-logo.png + source_url: https://ukaz.cuni.cz + css_selector: '#ogImage' + retrieved_on: '2025-12-24T11:29:56.571533+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-L-UKRMPPOTK.yaml b/data/custodian/CZ-32-PLZ-L-UKRMPPOTK.yaml index d622c23747..799d63e02c 100644 --- a/data/custodian/CZ-32-PLZ-L-UKRMPPOTK.yaml +++ b/data/custodian/CZ-32-PLZ-L-UKRMPPOTK.yaml @@ -227,3 +227,28 @@ location: postal_code: 301 00 street_address: Kopeckého sady 11 normalization_timestamp: '2025-12-09T10:53:49.204259+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:01.511663+00:00' + source_url: https://ukr.plzen.eu + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://ukr.plzen.eu/apple-touch-icon.png + source_url: https://ukr.plzen.eu + css_selector: '[document] > html.show--consent > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:30:01.511663+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://ukr.plzen.eu/public/images/social.png + source_url: https://ukr.plzen.eu + css_selector: '[document] > html.show--consent > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-24T11:30:01.511663+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/CZ-32-PLZ-L-VZUPSRTK.yaml b/data/custodian/CZ-32-PLZ-L-VZUPSRTK.yaml index 24958efecb..7816f617e9 100644 --- a/data/custodian/CZ-32-PLZ-L-VZUPSRTK.yaml +++ b/data/custodian/CZ-32-PLZ-L-VZUPSRTK.yaml @@ -220,3 +220,37 @@ location: postal_code: 301 00 street_address: Tylova 1581/46 normalization_timestamp: '2025-12-09T10:53:49.252175+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:09.178596+00:00' + source_url: https://www.vzuplzen.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.vzuplzen.cz/wp-content/uploads/2020/04/vzulogo.svg + source_url: https://www.vzuplzen.cz + css_selector: '#site-header-wrap > div.site-header-top > div.container > div.row + > div.site-branding > a.logo-light > img.entered.lazyloaded' + retrieved_on: '2025-12-24T11:30:09.178596+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: VZÚ Plzeň + - claim_type: favicon_url + claim_value: https://www.vzuplzen.cz/wp-content/uploads/2020/04/vzulogo.svg + source_url: https://www.vzuplzen.cz + css_selector: '[document] > html > head > link:nth-of-type(64)' + retrieved_on: '2025-12-24T11:30:09.178596+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.vzuplzen.cz/wp-content/uploads/2020/04/img_7899_editace.jpg + source_url: https://www.vzuplzen.cz + css_selector: '[document] > html > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-24T11:30:09.178596+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-M-ZMVPK.yaml b/data/custodian/CZ-32-PLZ-M-ZMVPK.yaml index 6285ae29fe..7929abcb88 100644 --- a/data/custodian/CZ-32-PLZ-M-ZMVPK.yaml +++ b/data/custodian/CZ-32-PLZ-M-ZMVPK.yaml @@ -224,3 +224,22 @@ location: postal_code: 301 50 street_address: Kopeckého sady 2 normalization_timestamp: '2025-12-09T10:53:49.392778+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:31.663206+00:00' + source_url: https://verbis.zcm.cz/#! + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://verbis.zcm.cz/favicon.png?v=2.3.0-32021 + source_url: https://verbis.zcm.cz/#! + css_selector: '#ng-app > head > link:nth-of-type(9)' + retrieved_on: '2025-12-24T11:30:31.663206+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: 256x256 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-PLZ-O-AMP.yaml b/data/custodian/CZ-32-PLZ-O-AMP.yaml index c28ba30934..13d06ebe61 100644 --- a/data/custodian/CZ-32-PLZ-O-AMP.yaml +++ b/data/custodian/CZ-32-PLZ-O-AMP.yaml @@ -52,13 +52,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PLZ-O-AMP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PLZ-O-AMP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PLZ-O-AMP ghcid_numeric: 12822286620103458884 valid_from: '2025-12-06T23:35:06.157072+00:00' @@ -253,3 +253,22 @@ location: postal_code: 301 00 street_address: Veleslavínova 19 normalization_timestamp: '2025-12-09T10:53:49.433025+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:38.986731+00:00' + source_url: https://amp.tritius.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://amp.tritius.cz/apple-touch-icon-180x180.png + source_url: https://amp.tritius.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:30:38.986731+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-POB-L-MKP.yaml b/data/custodian/CZ-32-POB-L-MKP.yaml index 04e94ffd2b..b4d872407f 100644 --- a/data/custodian/CZ-32-POB-L-MKP.yaml +++ b/data/custodian/CZ-32-POB-L-MKP.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-POB-L-MKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-POB-L-MKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-POB-L-MKP ghcid_numeric: 11746328645587576651 valid_from: '2025-12-06T23:37:23.304244+00:00' @@ -220,3 +220,22 @@ location: postal_code: 345 22 street_address: nám. Míru 55 normalization_timestamp: '2025-12-09T10:53:49.492242+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:47.940296+00:00' + source_url: https://www.pobezovice.cz/infocentrum/mestska-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.pobezovice.cz/skins/pobezovice_lego/favicons/safari-pinned-tab.svg + source_url: https://www.pobezovice.cz/infocentrum/mestska-knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:30:47.940296+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-POS-L-MKP.yaml b/data/custodian/CZ-32-POS-L-MKP.yaml index a2b3fcc229..18b8bca30c 100644 --- a/data/custodian/CZ-32-POS-L-MKP.yaml +++ b/data/custodian/CZ-32-POS-L-MKP.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-POS-L-MKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-POS-L-MKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-POS-L-MKP ghcid_numeric: 12518466827323643678 valid_from: '2025-12-06T23:37:30.926309+00:00' @@ -214,3 +214,22 @@ location: postal_code: 345 35 street_address: Postřekov 270 normalization_timestamp: '2025-12-09T10:53:49.541114+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:57.798229+00:00' + source_url: https://domazlice.tritius.cz/library/postrekov + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://domazlice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://domazlice.tritius.cz/library/postrekov + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:30:57.798229+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-PRE-A-DHP.yaml b/data/custodian/CZ-32-PRE-A-DHP.yaml index 0229384195..ba8f2c34e2 100644 --- a/data/custodian/CZ-32-PRE-A-DHP.yaml +++ b/data/custodian/CZ-32-PRE-A-DHP.yaml @@ -237,3 +237,22 @@ location: youtube_status: NOT_FOUND youtube_search_query: Dům historie Přešticka official youtube_search_timestamp: '2025-12-09T09:30:57.233406+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:07.833276+00:00' + source_url: http://www.dumhistorie.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.dumhistorie.cz/skins/dumhistorie.cz_lego2/favicons/apple-touch-icon.png + source_url: http://www.dumhistorie.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:07.833276+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-PRE-L-MKP.yaml b/data/custodian/CZ-32-PRE-L-MKP.yaml index 534eaf2a20..ddadd56751 100644 --- a/data/custodian/CZ-32-PRE-L-MKP.yaml +++ b/data/custodian/CZ-32-PRE-L-MKP.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PRE-L-MKP - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PRE-L-MKP valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PRE-L-MKP ghcid_numeric: 5690544136345718567 valid_from: '2025-12-06T23:37:19.612345+00:00' @@ -228,3 +228,22 @@ location: postal_code: 334 01 street_address: Husova 1079 normalization_timestamp: '2025-12-09T10:53:49.672525+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:15.317932+00:00' + source_url: https://prestice.tritius.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://prestice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://prestice.tritius.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:31:15.317932+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-PRE-L-MKVH.yaml b/data/custodian/CZ-32-PRE-L-MKVH.yaml index 8bda771df5..6c0df3b8f9 100644 --- a/data/custodian/CZ-32-PRE-L-MKVH.yaml +++ b/data/custodian/CZ-32-PRE-L-MKVH.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PRE-L-MKVH - valid_from: "2025-12-10T09:47:08Z" + valid_from: '2025-12-10T09:47:08Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PRE-L-MKVH valid_from: null - valid_to: "2025-12-10T09:47:08Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:08Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PRE-L-MKVH ghcid_numeric: 126891602261530029 valid_from: '2025-12-06T23:37:31.689567+00:00' @@ -209,3 +209,22 @@ location: postal_code: 334 01 street_address: Horušany 70 normalization_timestamp: '2025-12-09T10:53:49.745722+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:27.880178+00:00' + source_url: https://www.sobekury.cz/obec/knihovny + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.sobekury.cz/skins/sobekury2_resp/favicons/safari-pinned-tab.svg + source_url: https://www.sobekury.cz/obec/knihovny + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:31:27.880178+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PRE-L-MKVP.yaml b/data/custodian/CZ-32-PRE-L-MKVP.yaml index b2b7d430be..ce5b23ce7b 100644 --- a/data/custodian/CZ-32-PRE-L-MKVP.yaml +++ b/data/custodian/CZ-32-PRE-L-MKVP.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PRE-L-MKVP - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PRE-L-MKVP valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PRE-L-MKVP ghcid_numeric: 6648808370566109458 valid_from: '2025-12-06T23:37:31.596274+00:00' @@ -207,3 +207,28 @@ location: postal_code: 334 01 street_address: Příchovice 37 normalization_timestamp: '2025-12-09T10:53:49.894824+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:48.605411+00:00' + source_url: https://www.prichovice.cz/obec-61/knihovna-1 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.prichovice.cz/skins/prichovice2/favicons/safari-pinned-tab.svg + source_url: https://www.prichovice.cz/obec-61/knihovna-1 + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:31:48.605411+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.prichovice.cz/obec-61/www.prichovice.cz/data/editor/181cs_2.png + source_url: https://www.prichovice.cz/obec-61/knihovna-1 + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-24T11:31:48.605411+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 5 diff --git a/data/custodian/CZ-32-PRE-L-MKVS.yaml b/data/custodian/CZ-32-PRE-L-MKVS.yaml index 7821ea0bc0..a9519b52c0 100644 --- a/data/custodian/CZ-32-PRE-L-MKVS.yaml +++ b/data/custodian/CZ-32-PRE-L-MKVS.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PRE-L-MKVS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PRE-L-MKVS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PRE-L-MKVS ghcid_numeric: 3108156399207870293 valid_from: '2025-12-06T23:37:31.726953+00:00' @@ -207,3 +207,22 @@ location: postal_code: 334 01 street_address: Soběkury 47 normalization_timestamp: '2025-12-09T10:53:49.928130+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:58.906173+00:00' + source_url: https://www.sobekury.cz/obec/knihovny + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.sobekury.cz/skins/sobekury2_resp/favicons/safari-pinned-tab.svg + source_url: https://www.sobekury.cz/obec/knihovny + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:31:58.906173+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PRI-L-MKP-mistni_knihovna_primda.yaml b/data/custodian/CZ-32-PRI-L-MKP-mistni_knihovna_primda.yaml index 1869bc3ca6..c1fbf6f895 100644 --- a/data/custodian/CZ-32-PRI-L-MKP-mistni_knihovna_primda.yaml +++ b/data/custodian/CZ-32-PRI-L-MKP-mistni_knihovna_primda.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PRI-L-MKP-mistni_knihovna_primda - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PRI-L-MKP-mistni_knihovna_primda valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PRI-L-MKP-mistni_knihovna_primda ghcid_numeric: 16016268298609517835 valid_from: '2025-12-06T23:37:32.141556+00:00' @@ -207,3 +207,22 @@ location: postal_code: 348 06 street_address: náměstí Jindřicha Kolowrata čp. 112 normalization_timestamp: '2025-12-09T10:53:50.089431+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:14.442383+00:00' + source_url: https://www.mestoprimda.cz/mesto/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.mestoprimda.cz/skins/primda_lego/favicons/safari-pinned-tab.svg + source_url: https://www.mestoprimda.cz/mesto/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:32:14.442383+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PRI-L-MKT.yaml b/data/custodian/CZ-32-PRI-L-MKT.yaml index 2db755aeae..372f805908 100644 --- a/data/custodian/CZ-32-PRI-L-MKT.yaml +++ b/data/custodian/CZ-32-PRI-L-MKT.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PRI-L-MKT - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PRI-L-MKT valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PRI-L-MKT ghcid_numeric: 7813799942313227044 valid_from: '2025-12-06T23:37:32.129032+00:00' @@ -207,3 +207,22 @@ location: postal_code: 348 06 street_address: Třemešné 16 normalization_timestamp: '2025-12-09T10:53:50.202104+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:24.709860+00:00' + source_url: https://www.obectremesne.cz/kultura-a-sport/mistni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obectremesne.cz/skins/tremesne_lego/favicons/safari-pinned-tab.svg + source_url: https://www.obectremesne.cz/kultura-a-sport/mistni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:32:24.709860+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PRO-L-OKVP.yaml b/data/custodian/CZ-32-PRO-L-OKVP.yaml index c4cc5b598a..3a4db17905 100644 --- a/data/custodian/CZ-32-PRO-L-OKVP.yaml +++ b/data/custodian/CZ-32-PRO-L-OKVP.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PRO-L-OKVP - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PRO-L-OKVP valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PRO-L-OKVP ghcid_numeric: 4272587850748301176 valid_from: '2025-12-06T23:37:43.680868+00:00' @@ -216,3 +216,22 @@ location: postal_code: 349 01 street_address: Prostiboř 37 normalization_timestamp: '2025-12-09T10:53:50.278969+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:34.030218+00:00' + source_url: https://www.obecprostibor.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obecprostibor.cz/skins/prostibor_lego/favicons/safari-pinned-tab.svg + source_url: https://www.obecprostibor.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:32:34.030218+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-PTR-L-OKC.yaml b/data/custodian/CZ-32-PTR-L-OKC.yaml index 482ae43743..62f7577e17 100644 --- a/data/custodian/CZ-32-PTR-L-OKC.yaml +++ b/data/custodian/CZ-32-PTR-L-OKC.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-PTR-L-OKC - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-PTR-L-OKC valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-PTR-L-OKC ghcid_numeric: 9477696656011143728 valid_from: '2025-12-06T23:37:43.320662+00:00' @@ -208,3 +208,22 @@ location: postal_code: 330 11 street_address: Chotiná 42 normalization_timestamp: '2025-12-09T10:53:50.413406+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:40.760748+00:00' + source_url: https://www.hromnice.cz/pro-obcany/knihovny + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.hromnice.cz/skins/hromnice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.hromnice.cz/pro-obcany/knihovny + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:32:40.760748+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-RAD-L-MKVS.yaml b/data/custodian/CZ-32-RAD-L-MKVS.yaml index 3214cb45d3..dd42391a9b 100644 --- a/data/custodian/CZ-32-RAD-L-MKVS.yaml +++ b/data/custodian/CZ-32-RAD-L-MKVS.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-RAD-L-MKVS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-RAD-L-MKVS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-RAD-L-MKVS ghcid_numeric: 17941461726185137983 valid_from: '2025-12-06T23:37:31.974908+00:00' @@ -210,3 +210,22 @@ location: postal_code: 338 28 street_address: Skomelno 94 normalization_timestamp: '2025-12-09T10:53:50.503582+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:50.320433+00:00' + source_url: https://skomelno.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://skomelno.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://skomelno.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:32:50.320433+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-RAD-L-OKC.yaml b/data/custodian/CZ-32-RAD-L-OKC.yaml index 326463696a..278d2f4d89 100644 --- a/data/custodian/CZ-32-RAD-L-OKC.yaml +++ b/data/custodian/CZ-32-RAD-L-OKC.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-RAD-L-OKC - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-RAD-L-OKC valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-RAD-L-OKC ghcid_numeric: 9083214297968148168 valid_from: '2025-12-06T23:37:32.056012+00:00' @@ -207,3 +207,22 @@ location: postal_code: 338 28 street_address: Chomle 19 normalization_timestamp: '2025-12-09T10:53:50.528262+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:55.022201+00:00' + source_url: https://chomle.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://chomle.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://chomle.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:32:55.022201+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-REJ-L-MKR.yaml b/data/custodian/CZ-32-REJ-L-MKR.yaml index 6c2a69f30e..a9ee081533 100644 --- a/data/custodian/CZ-32-REJ-L-MKR.yaml +++ b/data/custodian/CZ-32-REJ-L-MKR.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-REJ-L-MKR - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-REJ-L-MKR valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-REJ-L-MKR ghcid_numeric: 608870935286548198 valid_from: '2025-12-06T23:37:43.504600+00:00' @@ -208,3 +208,28 @@ location: postal_code: 341 92 street_address: Náměstí Svobody 1 normalization_timestamp: '2025-12-09T10:53:50.552882+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:33:00.958861+00:00' + source_url: https://www.rejstejn.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=3b83343bfe + source_url: https://www.rejstejn.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:33:00.958861+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://3b83343bfe.clvaw-cdnwnd.com/dc2c7944a20f02d42d1ded11692397e1/200000000-2041b2041d/700/books-3786559_960_720.jpg?ph=3b83343bfe + source_url: https://www.rejstejn.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T11:33:00.958861+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-ROK-A-SOAR-statni_okresni_archiv_rokycany.yaml b/data/custodian/CZ-32-ROK-A-SOAR-statni_okresni_archiv_rokycany.yaml index 61e43c8dff..a792602160 100644 --- a/data/custodian/CZ-32-ROK-A-SOAR-statni_okresni_archiv_rokycany.yaml +++ b/data/custodian/CZ-32-ROK-A-SOAR-statni_okresni_archiv_rokycany.yaml @@ -261,3 +261,22 @@ location: youtube_status: NOT_FOUND youtube_search_query: Státní okresní archiv Rokycany official youtube_search_timestamp: '2025-12-09T09:30:57.912188+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:33:09.861321+00:00' + source_url: http://www.soaplzen.cz/soka-ro + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.soaplzen.cz/sites/default/files/easybreeze_favicon.ico + source_url: http://www.soaplzen.cz/soka-ro + css_selector: '[document] > html.js > head > link' + retrieved_on: '2025-12-24T11:33:09.861321+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-ROK-L-HVRPPOK.yaml b/data/custodian/CZ-32-ROK-L-HVRPPOK.yaml index 90bfa8ee1c..8de515612e 100644 --- a/data/custodian/CZ-32-ROK-L-HVRPPOK.yaml +++ b/data/custodian/CZ-32-ROK-L-HVRPPOK.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ROK-L-HVRPPOK - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ROK-L-HVRPPOK valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ROK-L-HVRPPOK ghcid_numeric: 17700910522146903999 valid_from: '2025-12-06T23:37:19.821458+00:00' @@ -211,3 +211,22 @@ location: postal_code: 337 01 street_address: Voldušská 721 normalization_timestamp: '2025-12-09T10:53:50.686790+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:33:19.308569+00:00' + source_url: https://www.hvr.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.hvr.cz/wp-content/uploads/2024/05/cropped-Logo_čtvercové_bez_názvu-180x180.png + source_url: https://www.hvr.cz + css_selector: '[document] > html.js.js > head > link:nth-of-type(34)' + retrieved_on: '2025-12-24T11:33:19.308569+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-ROK-L-MKR-mistni_knihovna_rakova.yaml b/data/custodian/CZ-32-ROK-L-MKR-mistni_knihovna_rakova.yaml index a5c7a90d94..d4905736ef 100644 --- a/data/custodian/CZ-32-ROK-L-MKR-mistni_knihovna_rakova.yaml +++ b/data/custodian/CZ-32-ROK-L-MKR-mistni_knihovna_rakova.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ROK-L-MKR-mistni_knihovna_rakova - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ROK-L-MKR-mistni_knihovna_rakova valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ROK-L-MKR-mistni_knihovna_rakova ghcid_numeric: 1169046435197881957 valid_from: '2025-12-06T23:37:32.000720+00:00' @@ -210,3 +210,22 @@ location: postal_code: 337 01 street_address: Raková 68 normalization_timestamp: '2025-12-09T10:53:50.796974+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:33:26.230062+00:00' + source_url: https://rakova.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://rakova.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://rakova.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:33:26.230062+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-ROK-L-MKVS.yaml b/data/custodian/CZ-32-ROK-L-MKVS.yaml index 9ffbccafcd..13eac05694 100644 --- a/data/custodian/CZ-32-ROK-L-MKVS.yaml +++ b/data/custodian/CZ-32-ROK-L-MKVS.yaml @@ -34,13 +34,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ROK-L-MKVS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ROK-L-MKVS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ROK-L-MKVS ghcid_numeric: 4948421485260498122 valid_from: '2025-12-06T23:37:31.969023+00:00' @@ -202,3 +202,22 @@ location: postal_code: 337 01 street_address: Sirá 52 normalization_timestamp: '2025-12-09T10:53:50.819408+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:39:32.709379+00:00' + source_url: https://www.obec-sira.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-sira.cz/skins/obec-sira.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.obec-sira.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:39:32.709379+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-ROK-L-OKVL.yaml b/data/custodian/CZ-32-ROK-L-OKVL.yaml index 04bdfd184a..0bc0f19104 100644 --- a/data/custodian/CZ-32-ROK-L-OKVL.yaml +++ b/data/custodian/CZ-32-ROK-L-OKVL.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ROK-L-OKVL - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ROK-L-OKVL valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ROK-L-OKVL ghcid_numeric: 13563558227743610149 valid_from: '2025-12-06T23:37:31.972112+00:00' @@ -210,3 +210,22 @@ location: postal_code: 337 01 street_address: Litohlavy 64 normalization_timestamp: '2025-12-09T10:53:50.860271+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:39:38.629515+00:00' + source_url: https://litohlavy.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://litohlavy.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://litohlavy.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:39:38.629515+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-ROK-M-MDBHVRPZMV.yaml b/data/custodian/CZ-32-ROK-M-MDBHVRPZMV.yaml index 5d5cc31f2d..5d43cfbb5a 100644 --- a/data/custodian/CZ-32-ROK-M-MDBHVRPZMV.yaml +++ b/data/custodian/CZ-32-ROK-M-MDBHVRPZMV.yaml @@ -45,13 +45,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ROK-M-MDBHVRPZMV - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ROK-M-MDBHVRPZMV valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ROK-M-MDBHVRPZMV ghcid_numeric: 13661799999652014382 valid_from: '2025-12-06T23:37:22.909040+00:00' @@ -218,3 +218,22 @@ location: postal_code: 337 01 street_address: Malé nám. 123/I normalization_timestamp: '2025-12-09T10:53:50.910391+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:39:47.286612+00:00' + source_url: https://www.zcm.cz/o-muzeu/objekty/muzeum-dr-bohuslava-horaka-v-rokycanech + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.zcm.cz/images/favicon/favicon.svg + source_url: https://www.zcm.cz/o-muzeu/objekty/muzeum-dr-bohuslava-horaka-v-rokycanech + css_selector: '[document] > html.g-offcanvas-css3 > head > link:nth-of-type(15)' + retrieved_on: '2025-12-24T11:39:47.286612+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-SOB-L-MKS.yaml b/data/custodian/CZ-32-SOB-L-MKS.yaml index 689b699c2e..eb4f7b6776 100644 --- a/data/custodian/CZ-32-SOB-L-MKS.yaml +++ b/data/custodian/CZ-32-SOB-L-MKS.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-SOB-L-MKS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-SOB-L-MKS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-SOB-L-MKS ghcid_numeric: 3297881439685956112 valid_from: '2025-12-06T23:37:31.334775+00:00' @@ -210,3 +210,28 @@ location: postal_code: 342 01 street_address: Soběšice 146 normalization_timestamp: '2025-12-09T10:53:50.955858+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:39:57.708713+00:00' + source_url: https://www.sobesice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=8fd0d98184 + source_url: https://www.sobesice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:39:57.708713+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://8fd0d98184.clvaw-cdnwnd.com/b77bb31ce427712c8905259996fb2d84/200000079-0cac00cac2/700/leaves-1076307_960_720-9.jpg?ph=8fd0d98184 + source_url: https://www.sobesice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T11:39:57.708713+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-SPA-L-MKSP.yaml b/data/custodian/CZ-32-SPA-L-MKSP.yaml index 90a4685a0b..896c684df7 100644 --- a/data/custodian/CZ-32-SPA-L-MKSP.yaml +++ b/data/custodian/CZ-32-SPA-L-MKSP.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-SPA-L-MKSP - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-SPA-L-MKSP valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-SPA-L-MKSP ghcid_numeric: 14318853444016584504 valid_from: '2025-12-06T23:37:21.368464+00:00' @@ -216,3 +216,22 @@ location: postal_code: 335 61 street_address: Náměstí Svobody 132 normalization_timestamp: '2025-12-09T10:53:51.006698+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:40:08.097265+00:00' + source_url: https://blovice.tritius.cz/library/spaleneporici + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://blovice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://blovice.tritius.cz/library/spaleneporici + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:40:08.097265+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-SRN-L-MKS.yaml b/data/custodian/CZ-32-SRN-L-MKS.yaml index 205a64c575..6a219a8fa1 100644 --- a/data/custodian/CZ-32-SRN-L-MKS.yaml +++ b/data/custodian/CZ-32-SRN-L-MKS.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-SRN-L-MKS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-SRN-L-MKS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-SRN-L-MKS ghcid_numeric: 18282133526043512266 valid_from: '2025-12-06T23:37:31.361762+00:00' @@ -214,3 +214,28 @@ location: postal_code: 341 92 street_address: Srní 113 normalization_timestamp: '2025-12-09T10:53:51.063360+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:40:20.101641+00:00' + source_url: https://www.srni.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=8d4c91e338 + source_url: https://www.srni.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:40:20.101641+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + - claim_type: og_image_url + claim_value: https://8d4c91e338.clvaw-cdnwnd.com/afdec2479b2049337745cb1d1e0be389/200000041-82ced82cef/700/book-2363734_960_720-9.jpg?ph=8d4c91e338 + source_url: https://www.srni.knihovna.cz + css_selector: '[document] > html.js.sizes > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-24T11:40:20.101641+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/CZ-32-STA-L-KCMK.yaml b/data/custodian/CZ-32-STA-L-KCMK.yaml index e1ffb94a1a..49fc77d915 100644 --- a/data/custodian/CZ-32-STA-L-KCMK.yaml +++ b/data/custodian/CZ-32-STA-L-KCMK.yaml @@ -42,13 +42,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STA-L-KCMK - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STA-L-KCMK valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STA-L-KCMK ghcid_numeric: 12344620405106662775 valid_from: '2025-12-06T23:37:21.371778+00:00' @@ -108,8 +108,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: K-Centrum, městská knihovna @@ -219,3 +219,22 @@ location: geonames_id: 3065249 geonames_name: Starý Plzenec feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:40:28.116275+00:00' + source_url: https://plzenec.tritius.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://plzenec.tritius.cz/apple-touch-icon-180x180.png + source_url: https://plzenec.tritius.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:40:28.116275+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-STA-L-MKC.yaml b/data/custodian/CZ-32-STA-L-MKC.yaml index 1a98d21f4c..6face6f442 100644 --- a/data/custodian/CZ-32-STA-L-MKC.yaml +++ b/data/custodian/CZ-32-STA-L-MKC.yaml @@ -215,3 +215,22 @@ location: postal_code: 345 61 street_address: Čermná 49 normalization_timestamp: '2025-12-09T10:53:51.114447+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:40:33.733078+00:00' + source_url: https://domazlice.tritius.cz/library/cermna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://domazlice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://domazlice.tritius.cz/library/cermna + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:40:33.733078+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-STA-L-MKSS.yaml b/data/custodian/CZ-32-STA-L-MKSS.yaml index 35fa881769..26783e7e6a 100644 --- a/data/custodian/CZ-32-STA-L-MKSS.yaml +++ b/data/custodian/CZ-32-STA-L-MKSS.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STA-L-MKSS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STA-L-MKSS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STA-L-MKSS ghcid_numeric: 1727226569689161664 valid_from: '2025-12-06T23:37:32.110748+00:00' @@ -212,3 +212,28 @@ location: postal_code: 348 01 street_address: Staré Sedliště normalization_timestamp: '2025-12-09T10:53:51.166897+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:40:42.574793+00:00' + source_url: https://www.ssedliste.cz/kultura-1/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.ssedliste.cz/skins/ssedliste.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.ssedliste.cz/kultura-1/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:40:42.574793+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.ssedliste.cz/data/editor/309cs_1.jpg + source_url: https://www.ssedliste.cz/kultura-1/knihovna + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-24T11:40:42.574793+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 5 diff --git a/data/custodian/CZ-32-STA-L-MKVS-mistni_knihovna_ve_stahlavech.yaml b/data/custodian/CZ-32-STA-L-MKVS-mistni_knihovna_ve_stahlavech.yaml index cf8fac61eb..9eb130a857 100644 --- a/data/custodian/CZ-32-STA-L-MKVS-mistni_knihovna_ve_stahlavech.yaml +++ b/data/custodian/CZ-32-STA-L-MKVS-mistni_knihovna_ve_stahlavech.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STA-L-MKVS-mistni_knihovna_ve_stahlavech - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STA-L-MKVS-mistni_knihovna_ve_stahlavech valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STA-L-MKVS-mistni_knihovna_ve_stahlavech ghcid_numeric: 7579435895080714591 valid_from: '2025-12-08T11:28:08.739688+00:00' @@ -220,3 +220,22 @@ location: postal_code: 332 03 street_address: Palackého 440 normalization_timestamp: '2025-12-09T10:53:51.187702+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:40:48.260020+00:00' + source_url: https://blovice.tritius.cz/library/stahlavy + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://blovice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://blovice.tritius.cz/library/stahlavy + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:40:48.260020+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-STA-L-OKP.yaml b/data/custodian/CZ-32-STA-L-OKP.yaml index 9a6a76e505..7dc6c96ba5 100644 --- a/data/custodian/CZ-32-STA-L-OKP.yaml +++ b/data/custodian/CZ-32-STA-L-OKP.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STA-L-OKP - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STA-L-OKP valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STA-L-OKP ghcid_numeric: 11409208215811470415 valid_from: '2025-12-06T23:37:31.161406+00:00' @@ -214,3 +214,22 @@ location: postal_code: 345 61 street_address: Puclice 1 normalization_timestamp: '2025-12-09T10:53:51.244073+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:02.385215+00:00' + source_url: https://domazlice.tritius.cz/library/puclice + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://domazlice.tritius.cz/apple-touch-icon-180x180.png + source_url: https://domazlice.tritius.cz/library/puclice + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:41:02.385215+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-STE-L-OKS.yaml b/data/custodian/CZ-32-STE-L-OKS.yaml index 0679b6368d..242db79e8b 100644 --- a/data/custodian/CZ-32-STE-L-OKS.yaml +++ b/data/custodian/CZ-32-STE-L-OKS.yaml @@ -226,3 +226,22 @@ location: geonames_id: 3065191 geonames_name: Štěnovice feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:09.852885+00:00' + source_url: https://www.knihovna-stenovice.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.knihovna-stenovice.cz/skins/knihovnastenovice/favicons/safari-pinned-tab.svg + source_url: https://www.knihovna-stenovice.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:41:09.852885+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-STE-L-OKVC.yaml b/data/custodian/CZ-32-STE-L-OKVC.yaml index b3a07c1048..1b9e07d1cd 100644 --- a/data/custodian/CZ-32-STE-L-OKVC.yaml +++ b/data/custodian/CZ-32-STE-L-OKVC.yaml @@ -34,13 +34,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STE-L-OKVC - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STE-L-OKVC valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STE-L-OKVC ghcid_numeric: 18389057022155218450 valid_from: '2025-12-08T11:21:31.891845+00:00' @@ -207,3 +207,22 @@ location: postal_code: 332 09 street_address: Čižice 128 normalization_timestamp: '2025-12-09T10:53:51.293902+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:17.291378+00:00' + source_url: https://www.obec-cizice.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-cizice.cz/skins/cizice_lego/favicons/safari-pinned-tab.svg + source_url: https://www.obec-cizice.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:41:17.291378+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-STO-L-OKH.yaml b/data/custodian/CZ-32-STO-L-OKH.yaml index b3a58e710a..ae38bb08bf 100644 --- a/data/custodian/CZ-32-STO-L-OKH.yaml +++ b/data/custodian/CZ-32-STO-L-OKH.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STO-L-OKH - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STO-L-OKH valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STO-L-OKH ghcid_numeric: 10760871362860174655 valid_from: '2025-12-06T23:37:31.719594+00:00' @@ -211,3 +211,22 @@ location: postal_code: 333 01 street_address: Honezovice 86 normalization_timestamp: '2025-12-09T10:53:51.316919+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:22.295431+00:00' + source_url: http://www.honezovice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.honezovice.knihovna.cz/favicon.svg + source_url: http://www.honezovice.knihovna.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:41:22.295431+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-STO-L-VKL.yaml b/data/custodian/CZ-32-STO-L-VKL.yaml index cde8383fa8..cba9585df7 100644 --- a/data/custodian/CZ-32-STO-L-VKL.yaml +++ b/data/custodian/CZ-32-STO-L-VKL.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STO-L-VKL - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STO-L-VKL valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STO-L-VKL ghcid_numeric: 13148564914134135928 valid_from: '2025-12-06T23:37:31.502847+00:00' @@ -207,3 +207,30 @@ location: postal_code: 333 01 street_address: Losina 29 normalization_timestamp: '2025-12-09T10:53:51.389094+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:31.155945+00:00' + source_url: https://knihovnachotesov.webk.cz + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://knihovnachotesov.webk.cz/themes/new/blue/logo3.png + source_url: https://knihovnachotesov.webk.cz + css_selector: '#outpage > header.tmava > a > img.mobile_display_none' + retrieved_on: '2025-12-24T11:41:31.155945+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Na úvodní stranu + - claim_type: favicon_url + claim_value: https://knihovnachotesov.webk.cz/themes/new/favicon.ico + source_url: https://knihovnachotesov.webk.cz + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T11:41:31.155945+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-STO-L-VKM.yaml b/data/custodian/CZ-32-STO-L-VKM.yaml index 40e054327f..cbf9294860 100644 --- a/data/custodian/CZ-32-STO-L-VKM.yaml +++ b/data/custodian/CZ-32-STO-L-VKM.yaml @@ -34,13 +34,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STO-L-VKM - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STO-L-VKM valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STO-L-VKM ghcid_numeric: 2958425798743055731 valid_from: '2025-12-06T23:37:31.505964+00:00' @@ -202,3 +202,22 @@ location: postal_code: 333 01 street_address: Mantov 97 normalization_timestamp: '2025-12-09T10:53:51.413180+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:36.740397+00:00' + source_url: https://www.obec-chotesov.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-chotesov.cz/skins/obec-chotesov.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.obec-chotesov.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:41:36.740397+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-STR-L-MKK.yaml b/data/custodian/CZ-32-STR-L-MKK.yaml index cb96e6bea0..f6ce4c8fe2 100644 --- a/data/custodian/CZ-32-STR-L-MKK.yaml +++ b/data/custodian/CZ-32-STR-L-MKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STR-L-MKK - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STR-L-MKK valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STR-L-MKK ghcid_numeric: 14024095298071020845 valid_from: '2025-12-06T23:37:32.154558+00:00' @@ -217,3 +217,22 @@ location: postal_code: 349 01 street_address: Kostelec 2 normalization_timestamp: '2025-12-09T10:53:51.441729+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:42.896588+00:00' + source_url: https://www.obeckostelec.cz/obec/sluzby-v-obci/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obeckostelec.cz/skins/obeckostelec_lego/favicons/safari-pinned-tab.svg + source_url: https://www.obeckostelec.cz/obec/sluzby-v-obci/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:41:42.896588+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-STR-L-MKVS.yaml b/data/custodian/CZ-32-STR-L-MKVS.yaml index 0b70b1b7bf..9dc24e1070 100644 --- a/data/custodian/CZ-32-STR-L-MKVS.yaml +++ b/data/custodian/CZ-32-STR-L-MKVS.yaml @@ -211,3 +211,22 @@ location: postal_code: 349 01 street_address: Svojšín 135 normalization_timestamp: '2025-12-09T10:53:51.577883+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:41:59.991308+00:00' + source_url: https://www.svojsin.cz/urad/sluzby + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.svojsin.cz/skins/svojsin.cz_lego/favicons/safari-pinned-tab.svg + source_url: https://www.svojsin.cz/urad/sluzby + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:41:59.991308+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-STR-L-MMKS.yaml b/data/custodian/CZ-32-STR-L-MMKS.yaml index 30cec13cc6..d2c89d207a 100644 --- a/data/custodian/CZ-32-STR-L-MMKS.yaml +++ b/data/custodian/CZ-32-STR-L-MMKS.yaml @@ -36,13 +36,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STR-L-MMKS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STR-L-MMKS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STR-L-MMKS ghcid_numeric: 14365364173759054488 valid_from: '2025-12-06T23:37:20.212539+00:00' @@ -100,8 +100,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.95 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: MKS - Městská knihovna Stříbro @@ -220,3 +220,22 @@ location: geonames_id: 3064919 geonames_name: Stříbro feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:42:07.149220+00:00' + source_url: https://tritius.knihovna-stribro.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://tritius.knihovna-stribro.cz/apple-touch-icon-180x180.png + source_url: https://tritius.knihovna-stribro.cz + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:42:07.149220+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 14 diff --git a/data/custodian/CZ-32-STR-L-OKVS.yaml b/data/custodian/CZ-32-STR-L-OKVS.yaml index 46f0776060..6f2025bc94 100644 --- a/data/custodian/CZ-32-STR-L-OKVS.yaml +++ b/data/custodian/CZ-32-STR-L-OKVS.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-STR-L-OKVS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-STR-L-OKVS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-STR-L-OKVS ghcid_numeric: 8470522486643658349 valid_from: '2025-12-06T23:37:31.518797+00:00' @@ -210,3 +210,22 @@ location: postal_code: 332 07 street_address: Střížovice 66 normalization_timestamp: '2025-12-09T10:53:51.654826+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:42:14.273543+00:00' + source_url: https://www.strizovice.eu + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.strizovice.eu/skins/strizovice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.strizovice.eu + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:42:14.273543+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-SUL-L-ZKS.yaml b/data/custodian/CZ-32-SUL-L-ZKS.yaml index 95afabc88b..f8a09f8a65 100644 --- a/data/custodian/CZ-32-SUL-L-ZKS.yaml +++ b/data/custodian/CZ-32-SUL-L-ZKS.yaml @@ -34,13 +34,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-SUL-L-ZKS - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-SUL-L-ZKS valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-SUL-L-ZKS ghcid_numeric: 4149589204521112366 valid_from: '2025-12-06T23:37:32.105166+00:00' @@ -202,3 +202,22 @@ location: postal_code: 349 74 street_address: Sulislav 59 normalization_timestamp: '2025-12-09T10:53:51.678285+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:42:19.690789+00:00' + source_url: https://www.sulislav.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.sulislav.cz/skins/sulislav.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.sulislav.cz + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:42:19.690789+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-SUS-M-MSSK.yaml b/data/custodian/CZ-32-SUS-M-MSSK.yaml index b0d606cc84..712622ee4d 100644 --- a/data/custodian/CZ-32-SUS-M-MSSK.yaml +++ b/data/custodian/CZ-32-SUS-M-MSSK.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-SUS-M-MSSK - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-SUS-M-MSSK valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-SUS-M-MSSK ghcid_numeric: 8788242252217447668 valid_from: '2025-12-08T11:21:34.831193+00:00' @@ -215,3 +215,22 @@ location: postal_code: 342 01 street_address: nám. Svobody 40 normalization_timestamp: '2025-12-09T10:53:51.749120+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:42:33.886541+00:00' + source_url: https://www.muzeumsumavy.cz/muzeumsumavy/knihovna.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.muzeumsumavy.cz/muzeumsumavy/icon/safari-pinned-tab.svg + source_url: https://www.muzeumsumavy.cz/muzeumsumavy/knihovna.asp + css_selector: '[document] > html.touch-no > head > link:nth-of-type(20)' + retrieved_on: '2025-12-24T11:42:33.886541+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-SVI-L-MKVK.yaml b/data/custodian/CZ-32-SVI-L-MKVK.yaml index 6b5433efed..60f08a0e42 100644 --- a/data/custodian/CZ-32-SVI-L-MKVK.yaml +++ b/data/custodian/CZ-32-SVI-L-MKVK.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-SVI-L-MKVK - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-SVI-L-MKVK valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-SVI-L-MKVK ghcid_numeric: 6705064075962958854 valid_from: '2025-12-06T23:37:31.557462+00:00' @@ -207,3 +207,22 @@ location: postal_code: 340 12 street_address: Kbel 37 normalization_timestamp: '2025-12-09T10:53:51.776739+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:42:43.374884+00:00' + source_url: https://www.obec-kbel.cz/obec/obecni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-kbel.cz/skins/obeckbel_lego/favicons/safari-pinned-tab.svg + source_url: https://www.obec-kbel.cz/obec/obecni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:42:43.374884+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-TAC-L-MKSTK.yaml b/data/custodian/CZ-32-TAC-L-MKSTK.yaml index de66f890b6..2cdb0cd52b 100644 --- a/data/custodian/CZ-32-TAC-L-MKSTK.yaml +++ b/data/custodian/CZ-32-TAC-L-MKSTK.yaml @@ -180,3 +180,22 @@ wikidata_enrichment: enrichment_version: 2.1.0 instance_of: - Q114617264 +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:43:05.009708+00:00' + source_url: https://katalog.mkstc.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://katalog.mkstc.cz/themes/root/images/vufind-favicon.ico + source_url: https://katalog.mkstc.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:43:05.009708+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-TIS-L-OKVT.yaml b/data/custodian/CZ-32-TIS-L-OKVT.yaml index ed276897e6..86fd2bd85f 100644 --- a/data/custodian/CZ-32-TIS-L-OKVT.yaml +++ b/data/custodian/CZ-32-TIS-L-OKVT.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TIS-L-OKVT - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TIS-L-OKVT valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TIS-L-OKVT ghcid_numeric: 10094349205435303362 valid_from: '2025-12-06T23:37:32.161170+00:00' @@ -209,3 +209,22 @@ location: postal_code: 348 01 street_address: Tisová 5 normalization_timestamp: '2025-12-09T10:53:51.999412+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:43:15.345855+00:00' + source_url: https://www.tisova.eu/obec/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tisova.eu/skins/tisova.eu_lego2/favicons/apple-touch-icon.png + source_url: https://www.tisova.eu/obec/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:43:15.345855+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-TRE-L-OKCB.yaml b/data/custodian/CZ-32-TRE-L-OKCB.yaml index e0cc1dbcea..5a471d5ed3 100644 --- a/data/custodian/CZ-32-TRE-L-OKCB.yaml +++ b/data/custodian/CZ-32-TRE-L-OKCB.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRE-L-OKCB - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRE-L-OKCB valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRE-L-OKCB ghcid_numeric: 12175512125046336816 valid_from: '2025-12-08T11:21:27.294593+00:00' @@ -216,3 +216,22 @@ location: postal_code: 330 11 street_address: Česká Bříza 160 normalization_timestamp: '2025-12-09T10:53:52.043843+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:43:56.599464+00:00' + source_url: https://www.ceska-briza.cz/obec/sluzby-obcanum-1/knihovna-verejna-13cs.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.ceska-briza.cz/skins/ceska-briza.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.ceska-briza.cz/obec/sluzby-obcanum-1/knihovna-verejna-13cs.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:43:56.599464+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-TRE-L-OKD.yaml b/data/custodian/CZ-32-TRE-L-OKD.yaml index 19deb7fd74..578e96cb60 100644 --- a/data/custodian/CZ-32-TRE-L-OKD.yaml +++ b/data/custodian/CZ-32-TRE-L-OKD.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRE-L-OKD - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRE-L-OKD valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRE-L-OKD ghcid_numeric: 2784380042664742820 valid_from: '2025-12-06T23:37:31.857652+00:00' @@ -214,3 +214,22 @@ location: postal_code: 330 11 street_address: Dolany 66 normalization_timestamp: '2025-12-09T10:53:52.076606+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:04.922683+00:00' + source_url: https://www.dolany-ps.cz/organizace-a-sluzby/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.dolany-ps.cz/skins/dolany-ps.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.dolany-ps.cz/organizace-a-sluzby/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:44:04.922683+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-TRE-L-OKH.yaml b/data/custodian/CZ-32-TRE-L-OKH.yaml index 0dcde97e39..23e791f896 100644 --- a/data/custodian/CZ-32-TRE-L-OKH.yaml +++ b/data/custodian/CZ-32-TRE-L-OKH.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRE-L-OKH - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRE-L-OKH valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRE-L-OKH ghcid_numeric: 3347655891535984919 valid_from: '2025-12-06T23:37:31.902917+00:00' @@ -209,3 +209,22 @@ location: postal_code: 330 11 street_address: Hromnice 16 normalization_timestamp: '2025-12-09T10:53:52.103149+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:12.847169+00:00' + source_url: https://www.hromnice.cz/pro-obcany/knihovny + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.hromnice.cz/skins/hromnice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.hromnice.cz/pro-obcany/knihovny + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:44:12.847169+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-TRE-L-OKP.yaml b/data/custodian/CZ-32-TRE-L-OKP.yaml index e9c7ee5e6b..6f69427f93 100644 --- a/data/custodian/CZ-32-TRE-L-OKP.yaml +++ b/data/custodian/CZ-32-TRE-L-OKP.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRE-L-OKP - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRE-L-OKP valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRE-L-OKP ghcid_numeric: 9971091532608127487 valid_from: '2025-12-06T23:37:31.869078+00:00' @@ -207,3 +207,22 @@ location: postal_code: 330 11 street_address: Příšov 36 normalization_timestamp: '2025-12-09T10:53:52.155047+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:22.194151+00:00' + source_url: https://www.obec-prisov.cz/obec/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.obec-prisov.cz/skins/obec-prisov.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.obec-prisov.cz/obec/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:44:22.194151+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-TRE-L-OKVN.yaml b/data/custodian/CZ-32-TRE-L-OKVN.yaml index 4deabbdfbc..5825736b47 100644 --- a/data/custodian/CZ-32-TRE-L-OKVN.yaml +++ b/data/custodian/CZ-32-TRE-L-OKVN.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRE-L-OKVN - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRE-L-OKVN valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRE-L-OKVN ghcid_numeric: 5404639988829149399 valid_from: '2025-12-06T23:37:31.831114+00:00' @@ -207,3 +207,22 @@ location: postal_code: 330 11 street_address: Nevřeň 73 normalization_timestamp: '2025-12-09T10:53:52.178414+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:28.194554+00:00' + source_url: https://www.nevren.cz/obecni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nevren.cz/image.php?nid=18306&oid=8164381&width=32 + source_url: https://www.nevren.cz/obecni-knihovna + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-24T11:44:28.194554+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-TRE-L-OKZ.yaml b/data/custodian/CZ-32-TRE-L-OKZ.yaml index 8b1da94403..3adab2749c 100644 --- a/data/custodian/CZ-32-TRE-L-OKZ.yaml +++ b/data/custodian/CZ-32-TRE-L-OKZ.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRE-L-OKZ - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRE-L-OKZ valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRE-L-OKZ ghcid_numeric: 14475517998472739149 valid_from: '2025-12-08T11:21:26.173192+00:00' @@ -212,3 +212,22 @@ location: postal_code: 330 11 street_address: Žichlice 81 normalization_timestamp: '2025-12-09T10:53:52.209552+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:35.156406+00:00' + source_url: https://www.hromnice.cz/pro-obcany/knihovny + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.hromnice.cz/skins/hromnice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.hromnice.cz/pro-obcany/knihovny + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:44:35.156406+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-TRN-L-OKK.yaml b/data/custodian/CZ-32-TRN-L-OKK.yaml index 784a32ac83..4f89492dd2 100644 --- a/data/custodian/CZ-32-TRN-L-OKK.yaml +++ b/data/custodian/CZ-32-TRN-L-OKK.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRN-L-OKK - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRN-L-OKK valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRN-L-OKK ghcid_numeric: 12272791870427884566 valid_from: '2025-12-06T23:37:31.889825+00:00' @@ -211,3 +211,22 @@ location: postal_code: 330 13 street_address: Krašovice 49 normalization_timestamp: '2025-12-09T10:53:52.235866+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:41.014186+00:00' + source_url: https://www.krasovice.cz/obec-krasovice/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.krasovice.cz/skins/krasovice.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.krasovice.cz/obec-krasovice/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:44:41.014186+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-TRN-L-OKVT.yaml b/data/custodian/CZ-32-TRN-L-OKVT.yaml index c67114d4c3..283a227a62 100644 --- a/data/custodian/CZ-32-TRN-L-OKVT.yaml +++ b/data/custodian/CZ-32-TRN-L-OKVT.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TRN-L-OKVT - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TRN-L-OKVT valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TRN-L-OKVT ghcid_numeric: 16699320076876399573 valid_from: '2025-12-06T23:37:31.807916+00:00' @@ -213,3 +213,28 @@ location: postal_code: 330 13 street_address: Trnová normalization_timestamp: '2025-12-09T10:53:52.265362+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:49.151944+00:00' + source_url: https://www.trnova.cz/obec-7/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.trnova.cz/skins/trnova.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.trnova.cz/obec-7/knihovna + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:44:49.151944+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.trnova.cz/data/editor/200cs_1.jpg + source_url: https://www.trnova.cz/obec-7/knihovna + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-24T11:44:49.151944+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 5 diff --git a/data/custodian/CZ-32-TYM-L-MKT.yaml b/data/custodian/CZ-32-TYM-L-MKT.yaml index 2a3601ce9c..5209427e52 100644 --- a/data/custodian/CZ-32-TYM-L-MKT.yaml +++ b/data/custodian/CZ-32-TYM-L-MKT.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-TYM-L-MKT - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-TYM-L-MKT valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-TYM-L-MKT ghcid_numeric: 8438583246317889276 valid_from: '2025-12-06T23:37:31.482847+00:00' @@ -207,3 +207,22 @@ location: postal_code: 332 01 street_address: Tymákov 40 normalization_timestamp: '2025-12-09T10:53:52.282402+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:44:57.043480+00:00' + source_url: https://www.tymakov.cz/obec/organizace/mistni-knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tymakov.cz/skins/tymakov.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.tymakov.cz/obec/organizace/mistni-knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:44:57.043480+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-UTU-L-MKVR.yaml b/data/custodian/CZ-32-UTU-L-MKVR.yaml index 102ba5c83d..bc4f52f111 100644 --- a/data/custodian/CZ-32-UTU-L-MKVR.yaml +++ b/data/custodian/CZ-32-UTU-L-MKVR.yaml @@ -202,3 +202,22 @@ location: postal_code: 332 09 street_address: Robčice normalization_timestamp: '2025-12-09T10:53:52.398387+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:45:12.470694+00:00' + source_url: https://www.utusice.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.utusice.cz/skins/utusice.cz_lego2/favicons/safari-pinned-tab.svg + source_url: https://www.utusice.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:45:12.470694+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-VEJ-L-OKV-obecni_knihovna_vejvanov.yaml b/data/custodian/CZ-32-VEJ-L-OKV-obecni_knihovna_vejvanov.yaml index dde03e4837..e38b9bfb6c 100644 --- a/data/custodian/CZ-32-VEJ-L-OKV-obecni_knihovna_vejvanov.yaml +++ b/data/custodian/CZ-32-VEJ-L-OKV-obecni_knihovna_vejvanov.yaml @@ -43,13 +43,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-VEJ-L-OKV-obecni_knihovna_vejvanov - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-VEJ-L-OKV-obecni_knihovna_vejvanov valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-VEJ-L-OKV-obecni_knihovna_vejvanov ghcid_numeric: 6300400203095474139 valid_from: '2025-12-06T23:37:26.376578+00:00' @@ -215,3 +215,22 @@ location: country: *id007 postal_code: 338 28 normalization_timestamp: '2025-12-09T10:53:52.424380+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:45:19.411318+00:00' + source_url: https://www.vejvanov.cz/zivot-v-obci/knihovna + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.vejvanov.cz/skins/vejvanov.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.vejvanov.cz/zivot-v-obci/knihovna + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:45:19.411318+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-VEL-L-MKV.yaml b/data/custodian/CZ-32-VEL-L-MKV.yaml index fc631180a8..81de3c4097 100644 --- a/data/custodian/CZ-32-VEL-L-MKV.yaml +++ b/data/custodian/CZ-32-VEL-L-MKV.yaml @@ -213,3 +213,22 @@ location: geocoding_timestamp: '2025-12-09T21:40:08.904210+00:00' geocoding_method: CITY_NAME_LOOKUP geonames_matched_name: Velhartice +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:45:30.790740+00:00' + source_url: https://velhartice.knihovna.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://duyn491kcolsw.cloudfront.net/files/2d/2di/2div3h.svg?ph=302de62eb1 + source_url: https://velhartice.knihovna.cz + css_selector: '[document] > html.js.sizes > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:45:30.790740+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: any + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/CZ-32-VOL-L-MKVV.yaml b/data/custodian/CZ-32-VOL-L-MKVV.yaml index 311b702ddc..ab09973ced 100644 --- a/data/custodian/CZ-32-VOL-L-MKVV.yaml +++ b/data/custodian/CZ-32-VOL-L-MKVV.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-VOL-L-MKVV - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-VOL-L-MKVV valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-VOL-L-MKVV ghcid_numeric: 12870416339149295877 valid_from: '2025-12-06T23:37:31.956813+00:00' @@ -214,3 +214,22 @@ location: postal_code: 338 22 street_address: Volduchy normalization_timestamp: '2025-12-09T10:53:52.492220+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:45:35.879015+00:00' + source_url: https://volduchy.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://volduchy.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://volduchy.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:45:35.879015+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-VSE-L-OKVV.yaml b/data/custodian/CZ-32-VSE-L-OKVV.yaml index fdd7af4679..252381b402 100644 --- a/data/custodian/CZ-32-VSE-L-OKVV.yaml +++ b/data/custodian/CZ-32-VSE-L-OKVV.yaml @@ -44,13 +44,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-VSE-L-OKVV - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-VSE-L-OKVV valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-VSE-L-OKVV ghcid_numeric: 4251386703204187097 valid_from: '2025-12-06T23:37:31.825123+00:00' @@ -211,3 +211,22 @@ location: postal_code: 330 16 street_address: Všeruby 1 normalization_timestamp: '2025-12-09T10:53:52.595101+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:45:48.946179+00:00' + source_url: https://www.vseruby-mesto.cz/sluzby/knihovna-vseruby + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.vseruby-mesto.cz/skins/vseruby-mesto.cz_lego2/favicons/apple-touch-icon.png + source_url: https://www.vseruby-mesto.cz/sluzby/knihovna-vseruby + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:45:48.946179+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/CZ-32-ZBI-L-MKL.yaml b/data/custodian/CZ-32-ZBI-L-MKL.yaml index fd4ff0cf1d..6896b1c0a5 100644 --- a/data/custodian/CZ-32-ZBI-L-MKL.yaml +++ b/data/custodian/CZ-32-ZBI-L-MKL.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ZBI-L-MKL - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ZBI-L-MKL valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ZBI-L-MKL ghcid_numeric: 10824331360859835146 valid_from: '2025-12-06T23:37:31.980890+00:00' @@ -210,3 +210,22 @@ location: postal_code: 337 08 street_address: Líšná normalization_timestamp: '2025-12-09T10:53:52.709984+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:47:26.279393+00:00' + source_url: https://lisna.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lisna.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://lisna.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:47:26.279393+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-ZBI-L-MKT.yaml b/data/custodian/CZ-32-ZBI-L-MKT.yaml index 80e80e1461..dd77a99aea 100644 --- a/data/custodian/CZ-32-ZBI-L-MKT.yaml +++ b/data/custodian/CZ-32-ZBI-L-MKT.yaml @@ -34,13 +34,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ZBI-L-MKT - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ZBI-L-MKT valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ZBI-L-MKT ghcid_numeric: 7295381984414048618 valid_from: '2025-12-06T23:37:31.977903+00:00' @@ -202,3 +202,22 @@ location: postal_code: 338 08 street_address: Týček 95 normalization_timestamp: '2025-12-09T10:53:52.766412+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:47:33.679332+00:00' + source_url: http://www.tycek.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.tycek.cz/skins/tycek.cz_lego3/favicons/safari-pinned-tab.svg + source_url: http://www.tycek.cz + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T11:47:33.679332+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 5 diff --git a/data/custodian/CZ-32-ZBI-L-OKT.yaml b/data/custodian/CZ-32-ZBI-L-OKT.yaml index 066ae073da..fc27513cc3 100644 --- a/data/custodian/CZ-32-ZBI-L-OKT.yaml +++ b/data/custodian/CZ-32-ZBI-L-OKT.yaml @@ -38,13 +38,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ZBI-L-OKT - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ZBI-L-OKT valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ZBI-L-OKT ghcid_numeric: 7261007358961130915 valid_from: '2025-12-06T23:37:32.065073+00:00' @@ -210,3 +210,22 @@ location: postal_code: 338 08 street_address: Terešov 108 normalization_timestamp: '2025-12-09T10:53:52.844787+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:47:46.418997+00:00' + source_url: https://teresov.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://teresov.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://teresov.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:47:46.418997+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/CZ-32-ZBI-L-OKVL.yaml b/data/custodian/CZ-32-ZBI-L-OKVL.yaml index 3ec20c3a56..c5970caf7e 100644 --- a/data/custodian/CZ-32-ZBI-L-OKVL.yaml +++ b/data/custodian/CZ-32-ZBI-L-OKVL.yaml @@ -41,13 +41,13 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: CZ-32-ZBI-L-OKVL - valid_from: "2025-12-10T09:47:09Z" + valid_from: '2025-12-10T09:47:09Z' valid_to: null - reason: "Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ" + reason: Corrected region code from CZ-PL to CZ-32 (Plzeň (Plzeňský)) per ISO 3166-2:CZ - ghcid: CZ-PL-ZBI-L-OKVL valid_from: null - valid_to: "2025-12-10T09:47:09Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:47:09Z' + reason: Previous GHCID with incorrect region code - ghcid: CZ-PL-ZBI-L-OKVL ghcid_numeric: 9449205815812347505 valid_from: '2025-12-06T23:37:32.062221+00:00' @@ -207,3 +207,22 @@ location: postal_code: 338 08 street_address: Lhotka 64 normalization_timestamp: '2025-12-09T10:53:52.870975+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:47:51.564764+00:00' + source_url: https://lhotka-ostrovec.katalog.rokyknih.cz + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lhotka-ostrovec.katalog.rokyknih.cz/themes/root/images/vufind-favicon.ico + source_url: https://lhotka-ostrovec.katalog.rokyknih.cz + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:47:51.564764+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-TAN-M-KDC.yaml b/data/custodian/JP-10-TAN-M-KDC.yaml index 12ba112594..372b762ed0 100644 --- a/data/custodian/JP-10-TAN-M-KDC.yaml +++ b/data/custodian/JP-10-TAN-M-KDC.yaml @@ -243,3 +243,36 @@ wikidata_enrichment: commons_category: Kanna Dinosaur Center image: Kanna Dinosaur Center 1.jpg wikidata_image: Kanna Dinosaur Center 1.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:08:46.175800+00:00' + source_url: https://dino-nakasato.org + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://dino-nakasato.org/wp-content/uploads/2018/12/dino-logo01.png + source_url: https://dino-nakasato.org + css_selector: '#header-l > p.sitename > a > img.sitename-bottom' + retrieved_on: '2025-12-23T21:08:46.175800+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 神流町恐竜センター + - claim_type: favicon_url + claim_value: https://dino-nakasato.org/wp-content/uploads/2018/12/cropped-web-icon-180x180.png + source_url: https://dino-nakasato.org + css_selector: '[document] > html.s-navi-right > head > link:nth-of-type(31)' + retrieved_on: '2025-12-23T21:08:46.175800+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://dino-nakasato.org/wp-content/uploads/2018/12/top01.jpg + source_url: https://dino-nakasato.org + css_selector: '[document] > html.s-navi-right > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-23T21:08:46.175800+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-10-TAT-L-MMLKJC.yaml b/data/custodian/JP-10-TAT-L-MMLKJC.yaml index 68c1ac37f2..57f9f31509 100644 --- a/data/custodian/JP-10-TAT-L-MMLKJC.yaml +++ b/data/custodian/JP-10-TAT-L-MMLKJC.yaml @@ -205,3 +205,22 @@ location: geonames_id: 1850559 geonames_name: Tatebayashi feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:08:55.686878+00:00' + source_url: https://opac.kanto-gakuen.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://opac.kanto-gakuen.ac.jp/favicon.ico + source_url: https://opac.kanto-gakuen.ac.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:08:55.686878+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-TAT-M-GMAT.yaml b/data/custodian/JP-10-TAT-M-GMAT.yaml index afcb7395c1..af0f4cbad0 100644 --- a/data/custodian/JP-10-TAT-M-GMAT.yaml +++ b/data/custodian/JP-10-TAT-M-GMAT.yaml @@ -258,3 +258,37 @@ wikidata_enrichment: - id: Q11602062 label: 第一工房 description: '' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:09:12.585244+00:00' + source_url: http://www.gmat.pref.gunma.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://gmat.pref.gunma.jp/wp/wp-content/themes/gmat/images/site-logo.svg + source_url: http://www.gmat.pref.gunma.jp + css_selector: '#header > div.header_container.outer_max:nth-of-type(2) > div.header_logo_container + > h1 > a > img' + retrieved_on: '2025-12-23T21:09:12.585244+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 群馬県立館林美術館 + - claim_type: favicon_url + claim_value: https://gmat.pref.gunma.jp/wp/wp-content/themes/gmat/images/favicon.ico + source_url: http://www.gmat.pref.gunma.jp + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T21:09:12.585244+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://gmat.pref.gunma.jp/wp/wp-content/uploads/2023/01/ogp.jpg + source_url: http://www.gmat.pref.gunma.jp + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T21:09:12.585244+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-10-TOM-L-TCL.yaml b/data/custodian/JP-10-TOM-L-TCL.yaml index 77e5fc63b9..430765de41 100644 --- a/data/custodian/JP-10-TOM-L-TCL.yaml +++ b/data/custodian/JP-10-TOM-L-TCL.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library-tomioka.annexis.jp/ wikidata_official_website: http://www.library-tomioka.annexis.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:09:52.250662+00:00' + source_url: http://www.library-tomioka.annexis.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.library-tomioka.annexis.jp/css/img/apple-touch-icon.png + source_url: http://www.library-tomioka.annexis.jp + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:09:52.250662+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.library-tomioka.annexis.jp/design_img/ + source_url: http://www.library-tomioka.annexis.jp + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:09:52.250662+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-TOM-M-GMNH.yaml b/data/custodian/JP-10-TOM-M-GMNH.yaml index 81b9d9af41..e6ef3e3636 100644 --- a/data/custodian/JP-10-TOM-M-GMNH.yaml +++ b/data/custodian/JP-10-TOM-M-GMNH.yaml @@ -282,3 +282,22 @@ location: postal_code: 370-2345 street_address: KAMIKUROIWA, Tomioka Shi, Gumma Ken, 370-2345 normalization_timestamp: '2025-12-09T06:54:38.462863+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:10:02.379961+00:00' + source_url: http://www.gmnh.pref.gunma.jp/en + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.gmnh.pref.gunma.jp/wp-content/uploads/cropped-512-180x180.png + source_url: http://www.gmnh.pref.gunma.jp/en + css_selector: '[document] > html > head > link:nth-of-type(12)' + retrieved_on: '2025-12-23T21:10:02.379961+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-10-TOM-M-TSM.yaml b/data/custodian/JP-10-TOM-M-TSM.yaml index 107dee85e3..0f17958e93 100644 --- a/data/custodian/JP-10-TOM-M-TSM.yaml +++ b/data/custodian/JP-10-TOM-M-TSM.yaml @@ -385,3 +385,38 @@ wikidata_enrichment: - id: Q17217830 label: Edmond Auguste Bastien description: フランスの技術者 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:10:26.968558+00:00' + source_url: https://www.tomioka-silk.jp/_tomioka-silk-mill + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tomioka-silk.jp/files/user/assets/imgs/common/logo.svg?v=1709039147 + source_url: https://www.tomioka-silk.jp/_tomioka-silk-mill + css_selector: '#header > div.header__drawerMenu:nth-of-type(2) > div.header__drawerMenuWrap + > nav.drawerMenu > div.drawerMenu__inner:nth-of-type(2) > div.drawerMenu__logo + > a > img.sp-hidden' + retrieved_on: '2025-12-23T21:10:26.968558+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 富岡製糸場 + - claim_type: favicon_url + claim_value: https://www.tomioka-silk.jp/files/user/assets/imgs/favicon.svg?v=1708495689 + source_url: https://www.tomioka-silk.jp/_tomioka-silk-mill + css_selector: '[document] > html.chrome.notouch > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:10:26.968558+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.tomioka-silk.jp/files/user/assets/img/logo.png + source_url: https://www.tomioka-silk.jp/_tomioka-silk-mill + css_selector: '[document] > html.chrome.notouch > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:10:26.968558+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-10-TON-L-K-kawabamurabunkakaikantoshoshitsu.yaml b/data/custodian/JP-10-TON-L-K-kawabamurabunkakaikantoshoshitsu.yaml index ec9f3ec67b..d791963183 100644 --- a/data/custodian/JP-10-TON-L-K-kawabamurabunkakaikantoshoshitsu.yaml +++ b/data/custodian/JP-10-TON-L-K-kawabamurabunkakaikantoshoshitsu.yaml @@ -201,3 +201,37 @@ wikidata_enrichment: wikidata_web: official_website: http://www.vill.kawaba.gunma.jp/gyosei/f_kyoiku/tosyo.html wikidata_official_website: http://www.vill.kawaba.gunma.jp/gyosei/f_kyoiku/tosyo.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:10:36.054953+00:00' + source_url: http://www.vill.kawaba.gunma.jp/gyosei/f_kyoiku/tosyo.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.vill.kawaba.gunma.jp/common/images/img-logo.png + source_url: http://www.vill.kawaba.gunma.jp/gyosei/f_kyoiku/tosyo.html + css_selector: '#top-head > div.siteHeader__inner:nth-of-type(2) > p.siteHeader__logo + > a.siteHeader__navbarBrand > img' + retrieved_on: '2025-12-23T21:10:36.054953+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 川場村 + - claim_type: favicon_url + claim_value: http://www.vill.kawaba.gunma.jp/common/images/apple-touch-icon.png + source_url: http://www.vill.kawaba.gunma.jp/gyosei/f_kyoiku/tosyo.html + css_selector: '[document] > html.fontsizeS > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T21:10:36.054953+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.vill.kawaba.gunma.jp/common/images/ogp.png + source_url: http://www.vill.kawaba.gunma.jp/gyosei/f_kyoiku/tosyo.html + css_selector: '[document] > html.fontsizeS > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T21:10:36.054953+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-10-TON-M-TIM.yaml b/data/custodian/JP-10-TON-M-TIM.yaml index 218523e3a3..b83730b381 100644 --- a/data/custodian/JP-10-TON-M-TIM.yaml +++ b/data/custodian/JP-10-TON-M-TIM.yaml @@ -252,3 +252,22 @@ location: postal_code: 379-1619 street_address: TANIGAWA, Tone Gun Minakami Machi, Gumma Ken, 379-1619 normalization_timestamp: '2025-12-09T06:54:38.701376+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:10:57.619868+00:00' + source_url: http://tenichi-museum.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://tenichi-museum.com/wp-content/themes/tenichi_museum/favicon.ico + source_url: http://tenichi-museum.com + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:10:57.619868+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-10-YOS-M-TMM.yaml b/data/custodian/JP-10-YOS-M-TMM.yaml index 4527828e73..0f970ccfd2 100644 --- a/data/custodian/JP-10-YOS-M-TMM.yaml +++ b/data/custodian/JP-10-YOS-M-TMM.yaml @@ -254,3 +254,22 @@ location: geonames_id: 1848194 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:31.212765+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:11:06.599902+00:00' + source_url: http://www.city.takasaki.gunma.jp/docs/2013121900164 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.takasaki.gunma.jp/apple-touch-icon.png + source_url: http://www.city.takasaki.gunma.jp/docs/2013121900164 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:11:06.599902+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALA.yaml b/data/custodian/JP-11-AGE-L-ALA.yaml index bbd5ff609e..c8757bc7a1 100644 --- a/data/custodian/JP-11-AGE-L-ALA.yaml +++ b/data/custodian/JP-11-AGE-L-ALA.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:11:17.195218+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:11:17.195218+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALH-ageoshi_library_haraichikominkantoshoshitsu.yaml b/data/custodian/JP-11-AGE-L-ALH-ageoshi_library_haraichikominkantoshoshitsu.yaml index 19c0b724c7..bb33759ca4 100644 --- a/data/custodian/JP-11-AGE-L-ALH-ageoshi_library_haraichikominkantoshoshitsu.yaml +++ b/data/custodian/JP-11-AGE-L-ALH-ageoshi_library_haraichikominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:11:24.705015+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:11:24.705015+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALH.yaml b/data/custodian/JP-11-AGE-L-ALH.yaml index 52910b64e5..512a099f03 100644 --- a/data/custodian/JP-11-AGE-L-ALH.yaml +++ b/data/custodian/JP-11-AGE-L-ALH.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:11:31.783728+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:11:31.783728+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALK-ageoshi_library_kamihirakominkantoshoshitsu.yaml b/data/custodian/JP-11-AGE-L-ALK-ageoshi_library_kamihirakominkantoshoshitsu.yaml index aaf7b68dc9..bd80db3800 100644 --- a/data/custodian/JP-11-AGE-L-ALK-ageoshi_library_kamihirakominkantoshoshitsu.yaml +++ b/data/custodian/JP-11-AGE-L-ALK-ageoshi_library_kamihirakominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:11:40.508612+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:11:40.508612+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALK.yaml b/data/custodian/JP-11-AGE-L-ALK.yaml index 697be8bdaf..ecdb5d3fe1 100644 --- a/data/custodian/JP-11-AGE-L-ALK.yaml +++ b/data/custodian/JP-11-AGE-L-ALK.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:11:47.724834+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:11:47.724834+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALO-ageoshi_library_oishibunkan.yaml b/data/custodian/JP-11-AGE-L-ALO-ageoshi_library_oishibunkan.yaml index 9a81bf9573..933fbc907f 100644 --- a/data/custodian/JP-11-AGE-L-ALO-ageoshi_library_oishibunkan.yaml +++ b/data/custodian/JP-11-AGE-L-ALO-ageoshi_library_oishibunkan.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:11:54.600654+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:11:54.600654+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALO.yaml b/data/custodian/JP-11-AGE-L-ALO.yaml index 90be863fb5..37021096ff 100644 --- a/data/custodian/JP-11-AGE-L-ALO.yaml +++ b/data/custodian/JP-11-AGE-L-ALO.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:12:03.226713+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:12:03.226713+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AGE-L-ALT.yaml b/data/custodian/JP-11-AGE-L-ALT.yaml index 2a5257854f..9ded37c92d 100644 --- a/data/custodian/JP-11-AGE-L-ALT.yaml +++ b/data/custodian/JP-11-AGE-L-ALT.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ageo.lg.jp/site/library/ wikidata_official_website: http://www.city.ageo.lg.jp/site/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:12:11.655101+00:00' + source_url: http://www.city.ageo.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.ageo.lg.jp/apple-touch-icon.png + source_url: http://www.city.ageo.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:12:11.655101+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-AKA-M-UMAM.yaml b/data/custodian/JP-11-AKA-M-UMAM.yaml index 544cd31555..31f0591131 100644 --- a/data/custodian/JP-11-AKA-M-UMAM.yaml +++ b/data/custodian/JP-11-AKA-M-UMAM.yaml @@ -1321,3 +1321,31 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/fnW-qmPuruw/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:12:25.602122+00:00' + source_url: https://www.umam.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.umam.jp/wp-content/themes/umam/assets/img/common/head_logo.svg + source_url: https://www.umam.jp + css_selector: '#js-header > div.header--top > div.container.header--inner > h1.header--logo + > a > img' + retrieved_on: '2025-12-23T21:12:25.602122+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + - claim_type: favicon_url + claim_value: https://www.umam.jp/wp-content/uploads/2021/12/favicons.png + source_url: https://www.umam.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T21:12:25.602122+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 32x32 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-ASA-L-TLAL.yaml b/data/custodian/JP-11-ASA-L-TLAL.yaml index e96a446f6c..b9483c976f 100644 --- a/data/custodian/JP-11-ASA-L-TLAL.yaml +++ b/data/custodian/JP-11-ASA-L-TLAL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-ASA-L-TLAL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-ASA-L-TLAL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-ASA-L-TLAL ghcid_numeric: 11228366313432506843 valid_from: '2025-12-06T23:38:55.160139+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TOYODAIGAKUFUZOKU Library ASAKA Library @@ -204,3 +205,37 @@ location: geonames_id: 1907299 geonames_name: Asaka feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:12:37.892838+00:00' + source_url: http://www.toyo.ac.jp/site/library/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.toyo.ac.jp/img/common/img_logo.svg + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '#gheader > div.gnav__frm > h1.gnav__logo > a.gnav__logo-link > + img.gnav__logo-img' + retrieved_on: '2025-12-23T21:12:37.892838+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 東洋大学 + - claim_type: favicon_url + claim_value: http://www.toyo.ac.jp/img/common/favicon.ico + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '[document] > html.js_domload.js_imgload > head > link:nth-of-type(6)' + retrieved_on: '2025-12-23T21:12:37.892838+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.toyo.ac.jp/site/library/ogp.jpg + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '[document] > html.js_domload.js_imgload > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:12:37.892838+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-ASA-M-ACM.yaml b/data/custodian/JP-11-ASA-M-ACM.yaml index a2fc71ece7..867e189b5f 100644 --- a/data/custodian/JP-11-ASA-M-ACM.yaml +++ b/data/custodian/JP-11-ASA-M-ACM.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-ASA-M-ACM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-ASA-M-ACM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-ASA-M-ACM ghcid_numeric: 11563568222836089840 valid_from: '2025-12-06T23:38:32.533530+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: ASAKA CITY MUSEUM @@ -225,3 +226,28 @@ location: postal_code: 351-0007 street_address: OKA, Asaka Shi, Saitama Ken, 351-0007 normalization_timestamp: '2025-12-09T11:30:40.762158+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:12:46.583352+00:00' + source_url: https://www.city.asaka.lg.jp/soshiki/42/museum.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.asaka.lg.jp/img/apple-touch-icon.png + source_url: https://www.city.asaka.lg.jp/soshiki/42/museum.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:12:46.583352+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.asaka.lg.jp/img/asaka-city.png + source_url: https://www.city.asaka.lg.jp/soshiki/42/museum.html + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:12:46.583352+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-CHI-L-HL.yaml b/data/custodian/JP-11-CHI-L-HL.yaml index 891278ecf2..965c47f570 100644 --- a/data/custodian/JP-11-CHI-L-HL.yaml +++ b/data/custodian/JP-11-CHI-L-HL.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.vill.higashichichibu.saitama.jp/soshiki/09/tosyokann.html wikidata_official_website: http://www.vill.higashichichibu.saitama.jp/soshiki/09/tosyokann.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:13:31.279102+00:00' + source_url: http://www.vill.higashichichibu.saitama.jp/soshiki/09/tosyokann.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.vill.higashichichibu.saitama.jp/img/icon/apple-touch-icon.png + source_url: http://www.vill.higashichichibu.saitama.jp/soshiki/09/tosyokann.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:13:31.279102+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-CHI-L-M.yaml b/data/custodian/JP-11-CHI-L-M.yaml index ee16955f90..65a8c29012 100644 --- a/data/custodian/JP-11-CHI-L-M.yaml +++ b/data/custodian/JP-11-CHI-L-M.yaml @@ -201,3 +201,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.minano.saitama.jp/section/kyoiku/5.htm wikidata_official_website: http://www.town.minano.saitama.jp/section/kyoiku/5.htm +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:13:41.655282+00:00' + source_url: http://www.town.minano.saitama.jp/section/kyoiku/5.htm + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.minano.saitama.jp/wp-content/themes/minano/favicon.ico + source_url: http://www.town.minano.saitama.jp/section/kyoiku/5.htm + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:13:41.655282+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-CHI-L-N.yaml b/data/custodian/JP-11-CHI-L-N.yaml index 3bb0256189..76d2826ef0 100644 --- a/data/custodian/JP-11-CHI-L-N.yaml +++ b/data/custodian/JP-11-CHI-L-N.yaml @@ -202,3 +202,23 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.nagatoro.saitama.jp/ wikidata_official_website: http://www.town.nagatoro.saitama.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:13:49.691276+00:00' + source_url: http://www.town.nagatoro.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.gstatic.com/images/branding/googlelogo/1x/googlelogo_color_42x16dp.png + source_url: http://www.town.nagatoro.saitama.jp + css_selector: '#google_language_translator > div.skiptranslate.goog-te-gadget + > span > a.VIpgJd-ZVi9od-l4eHX-hSRGPd > img' + retrieved_on: '2025-12-23T21:13:49.691276+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Google Translate + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-CHI-L-YL.yaml b/data/custodian/JP-11-CHI-L-YL.yaml index c506a74de1..f1a47b0b35 100644 --- a/data/custodian/JP-11-CHI-L-YL.yaml +++ b/data/custodian/JP-11-CHI-L-YL.yaml @@ -202,3 +202,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.yokoze.saitama.jp/soshiki/kyouiku/choumin/toshokan.html wikidata_official_website: http://www.town.yokoze.saitama.jp/soshiki/kyouiku/choumin/toshokan.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:14:05.174553+00:00' + source_url: http://www.town.yokoze.saitama.jp/soshiki/kyouiku/choumin/toshokan.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.yokoze.saitama.jp/wp-content/uploads/2020/03/cropped-favicon-180x180.jpg + source_url: http://www.town.yokoze.saitama.jp/soshiki/kyouiku/choumin/toshokan.html + css_selector: '[document] > html.no-touchevents.scriptdefer > head > link:nth-of-type(22)' + retrieved_on: '2025-12-23T21:14:05.174553+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.yokoze.saitama.jp/wp-content/uploads/2020/03/yokoze_ogp.png + source_url: http://www.town.yokoze.saitama.jp/soshiki/kyouiku/choumin/toshokan.html + css_selector: '[document] > html.no-touchevents.scriptdefer > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T21:14:05.174553+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-11-CHI-M-NMLMH.yaml b/data/custodian/JP-11-CHI-M-NMLMH.yaml index e3e3c0617c..e0058abfb5 100644 --- a/data/custodian/JP-11-CHI-M-NMLMH.yaml +++ b/data/custodian/JP-11-CHI-M-NMLMH.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-CHI-M-NMLMH - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-CHI-M-NMLMH valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-CHI-M-NMLMH ghcid_numeric: 7189388730573825394 valid_from: '2025-12-06T23:38:32.648656+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: NAGATORO-MACHI LOCAL MATERIAL HALL @@ -231,3 +232,23 @@ location: postal_code: 369-1305 street_address: NAGATORO, Chichibu Gun Nagatoro Machi, Saitama Ken, 369-1305 normalization_timestamp: '2025-12-09T11:32:25.076970+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:18:15.610540+00:00' + source_url: https://www.town.nagatoro.saitama.jp/nagatoro-2/siryokan + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.gstatic.com/images/branding/googlelogo/1x/googlelogo_color_42x16dp.png + source_url: https://www.town.nagatoro.saitama.jp/nagatoro-2/siryokan + css_selector: '#google_language_translator > div.skiptranslate.goog-te-gadget + > span > a.VIpgJd-ZVi9od-l4eHX-hSRGPd > img' + retrieved_on: '2025-12-23T21:18:15.610540+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Google Translate + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-CHI-M-SMNH.yaml b/data/custodian/JP-11-CHI-M-SMNH.yaml index 46c5b8938a..34509c65ca 100644 --- a/data/custodian/JP-11-CHI-M-SMNH.yaml +++ b/data/custodian/JP-11-CHI-M-SMNH.yaml @@ -673,3 +673,22 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/srDQ9JLOk10/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:18:27.776093+00:00' + source_url: http://www.shizen.spec.ed.jp/?page_id=164 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.shizen.spec.ed.jp/nc_favicon.ico?1547712870 + source_url: http://www.shizen.spec.ed.jp/?page_id=164 + css_selector: '[document] > html.ng-scope > head > link' + retrieved_on: '2025-12-23T21:18:27.776093+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-FUJ-L-BL.yaml b/data/custodian/JP-11-FUJ-L-BL.yaml index d765ffaffa..f2ab897a43 100644 --- a/data/custodian/JP-11-FUJ-L-BL.yaml +++ b/data/custodian/JP-11-FUJ-L-BL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-FUJ-L-BL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-FUJ-L-BL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-FUJ-L-BL ghcid_numeric: 6566134099849852200 valid_from: '2025-12-06T23:38:54.803664+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: BUNKYOGAKUINDAIGAKUFUJIMINO Library @@ -196,7 +197,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://www.u-bunkyo.ac.jp/center/library/fujimino.html wikidata_official_website: http://www.u-bunkyo.ac.jp/center/library/fujimino.html @@ -218,3 +220,20 @@ location: geonames_id: 6822159 geonames_name: Fujimino feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:18:40.354415+00:00' + source_url: http://www.u-bunkyo.ac.jp/center/library/fujimino.html + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.bgu.ac.jp/bgu_sys/wp-content/uploads/sites/11/2022/08/ogp.png + source_url: http://www.u-bunkyo.ac.jp/center/library/fujimino.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T21:18:40.354415+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-FUJ-L-FCCL.yaml b/data/custodian/JP-11-FUJ-L-FCCL.yaml index a8440a84df..fcb96fedf8 100644 --- a/data/custodian/JP-11-FUJ-L-FCCL.yaml +++ b/data/custodian/JP-11-FUJ-L-FCCL.yaml @@ -206,3 +206,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.fujimi.saitama.jp wikidata_official_website: http://www.lib.fujimi.saitama.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:18:50.713499+00:00' + source_url: http://www.lib.fujimi.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.fujimi.saitama.jp/themes/lib_theme/images/logo.png + source_url: http://www.lib.fujimi.saitama.jp + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T21:18:50.713499+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 富士見市立図書館 + - claim_type: favicon_url + claim_value: https://www.lib.fujimi.saitama.jp/themes/lib_theme/favicon.ico + source_url: http://www.lib.fujimi.saitama.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:18:50.713499+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-FUJ-L-FLF.yaml b/data/custodian/JP-11-FUJ-L-FLF.yaml index 942cc666ce..f8035c6b7e 100644 --- a/data/custodian/JP-11-FUJ-L-FLF.yaml +++ b/data/custodian/JP-11-FUJ-L-FLF.yaml @@ -205,3 +205,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.fujimi.saitama.jp wikidata_official_website: http://www.lib.fujimi.saitama.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:19:31.892037+00:00' + source_url: http://www.lib.fujimi.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.fujimi.saitama.jp/themes/lib_theme/images/logo.png + source_url: http://www.lib.fujimi.saitama.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-23T21:19:31.892037+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 富士見市立図書館 + - claim_type: favicon_url + claim_value: https://www.lib.fujimi.saitama.jp/themes/lib_theme/favicon.ico + source_url: http://www.lib.fujimi.saitama.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:19:31.892037+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-FUJ-L-FLT.yaml b/data/custodian/JP-11-FUJ-L-FLT.yaml index b615bc558b..4316fc9561 100644 --- a/data/custodian/JP-11-FUJ-L-FLT.yaml +++ b/data/custodian/JP-11-FUJ-L-FLT.yaml @@ -205,3 +205,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.fujimi.saitama.jp/ wikidata_official_website: http://www.lib.fujimi.saitama.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:19:42.269309+00:00' + source_url: http://www.lib.fujimi.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.fujimi.saitama.jp/themes/lib_theme/images/logo.png + source_url: http://www.lib.fujimi.saitama.jp + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T21:19:42.269309+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 富士見市立図書館 + - claim_type: favicon_url + claim_value: https://www.lib.fujimi.saitama.jp/themes/lib_theme/favicon.ico + source_url: http://www.lib.fujimi.saitama.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:19:42.269309+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-FUJ-L-TTOML.yaml b/data/custodian/JP-11-FUJ-L-TTOML.yaml index 570a456661..b76a789560 100644 --- a/data/custodian/JP-11-FUJ-L-TTOML.yaml +++ b/data/custodian/JP-11-FUJ-L-TTOML.yaml @@ -213,3 +213,20 @@ wikidata_enrichment: - https://www.town.tara.lg.jp/koukyou/_1305.html - http://www2.tosyo-saga.jp/tara2/opac/top.do wikidata_official_website: *id007 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:20:23.471965+00:00' + source_url: https://www.town.tara.lg.jp/koukyou/_1305.html + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.town.tara.lg.jp/library/common/img_ogp.jpg + source_url: https://www.town.tara.lg.jp/koukyou/_1305.html + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T21:20:23.471965+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-FUJ-M-FMKHFM.yaml b/data/custodian/JP-11-FUJ-M-FMKHFM.yaml index ecf72d0bb4..550574c5dc 100644 --- a/data/custodian/JP-11-FUJ-M-FMKHFM.yaml +++ b/data/custodian/JP-11-FUJ-M-FMKHFM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-FUJ-M-FMKHFM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-FUJ-M-FMKHFM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-FUJ-M-FMKHFM ghcid_numeric: 13376578648821354060 valid_from: '2025-12-06T23:38:32.601381+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: FUJIMINO MUNICIPAL KAMIFUKUOKA HISTORICAL AND FOLKLORE MUSEUM @@ -250,3 +251,36 @@ location: postal_code: 356-0022 street_address: NAGAMIYA, Fujimino Shi, Saitama Ken, 356-0022 normalization_timestamp: '2025-12-09T11:31:03.468934+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:20:33.042541+00:00' + source_url: https://www.city.fujimino.saitama.jp/soshikiichiran/kamifukuokarekishiminzokushiryokan/gakugeikakari/2550.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.fujimino.saitama.jp/theme/base/img_common/renewal_v2/pc_header_logo.png + source_url: https://www.city.fujimino.saitama.jp/soshikiichiran/kamifukuokarekishiminzokushiryokan/gakugeikakari/2550.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T21:20:33.042541+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: ふじみ野市 + - claim_type: favicon_url + claim_value: https://www.city.fujimino.saitama.jp/smartphone.png + source_url: https://www.city.fujimino.saitama.jp/soshikiichiran/kamifukuokarekishiminzokushiryokan/gakugeikakari/2550.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:20:33.042541+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.fujimino.saitama.jp/material/images/group/3/rekimin.jpg + source_url: https://www.city.fujimino.saitama.jp/soshikiichiran/kamifukuokarekishiminzokushiryokan/gakugeikakari/2550.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T21:20:33.042541+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-FUJ-M-MSMM.yaml b/data/custodian/JP-11-FUJ-M-MSMM.yaml index 7657080cf2..8f3406d850 100644 --- a/data/custodian/JP-11-FUJ-M-MSMM.yaml +++ b/data/custodian/JP-11-FUJ-M-MSMM.yaml @@ -239,3 +239,28 @@ location: postal_code: 354-0011 street_address: MIZUKO, Fujimi Shi, Saitama Ken, 354-0011 normalization_timestamp: '2025-12-09T11:31:49.848094+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:20:44.464532+00:00' + source_url: https://www.city.fujimi.saitama.jp/madoguchi_shisetsu/02shisetsu/shiryoukan/mizukokaiduka + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.fujimi.saitama.jp/images/apple-touch-icon.png + source_url: https://www.city.fujimi.saitama.jp/madoguchi_shisetsu/02shisetsu/shiryoukan/mizukokaiduka + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T21:20:44.464532+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.fujimi.saitama.jp/images/ogp-logo.png + source_url: https://www.city.fujimi.saitama.jp/madoguchi_shisetsu/02shisetsu/shiryoukan/mizukokaiduka + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:20:44.464532+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-FUJ-M-TCHFM.yaml b/data/custodian/JP-11-FUJ-M-TCHFM.yaml index 9e4767ed36..746c1a456c 100644 --- a/data/custodian/JP-11-FUJ-M-TCHFM.yaml +++ b/data/custodian/JP-11-FUJ-M-TCHFM.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-FUJ-M-TCHFM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-FUJ-M-TCHFM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-FUJ-M-TCHFM ghcid_numeric: 11292249157193815588 valid_from: '2025-12-06T23:38:40.089428+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TARA-CHO HISTORICAL FOLK MUSEUM @@ -213,3 +214,20 @@ location: postal_code: 849-1602 street_address: TARA, Fujitsu Gun Tara Cho, Saga Ken, 849-1602 normalization_timestamp: '2025-12-09T11:32:05.816883+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:20:51.321470+00:00' + source_url: https://www.town.tara.lg.jp/koukyou/_1318.html + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.town.tara.lg.jp/library/common/img_ogp.jpg + source_url: https://www.town.tara.lg.jp/koukyou/_1318.html + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T21:20:51.321470+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-FUK-L-SITL.yaml b/data/custodian/JP-11-FUK-L-SITL.yaml index fffd8d13ee..45e467f7f5 100644 --- a/data/custodian/JP-11-FUK-L-SITL.yaml +++ b/data/custodian/JP-11-FUK-L-SITL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-FUK-L-SITL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-FUK-L-SITL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-FUK-L-SITL ghcid_numeric: 657064676052421111 valid_from: '2025-12-06T23:38:54.627721+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saitama Institute of Technology, Library @@ -190,7 +191,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: https://www.sit.ac.jp/toshokan/ wikidata_official_website: https://www.sit.ac.jp/toshokan/ @@ -212,3 +214,28 @@ location: geonames_id: 8183228 geonames_name: Fukaya feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:21:30.785796+00:00' + source_url: https://www.sit.ac.jp/toshokan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.sit.ac.jp/common/image/app-icon.png + source_url: https://www.sit.ac.jp/toshokan + css_selector: '[document] > html.js > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:21:30.785796+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.sit.ac.jp/common/image/sns-icon.jpg + source_url: https://www.sit.ac.jp/toshokan + css_selector: '[document] > html.js > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T21:21:30.785796+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-FUK-L-TUL.yaml b/data/custodian/JP-11-FUK-L-TUL.yaml index 15303fb81d..cda60e847a 100644 --- a/data/custodian/JP-11-FUK-L-TUL.yaml +++ b/data/custodian/JP-11-FUK-L-TUL.yaml @@ -205,3 +205,28 @@ location: geonames_id: 8183228 geonames_name: Fukaya feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:21:44.936155+00:00' + source_url: http://www.tohto.ac.jp/campuslife/campus/library/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.tohto.ac.jp/wp-content/assets/img/icon/apple-touch-icon.png + source_url: http://www.tohto.ac.jp/campuslife/campus/library/index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:21:44.936155+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: http://www.tohto.ac.jp/wp-content/assets/img/common/ogp.jpg + source_url: http://www.tohto.ac.jp/campuslife/campus/library/index.html + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T21:21:44.936155+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-FUK-M-FMA.yaml b/data/custodian/JP-11-FUK-M-FMA.yaml index 40d414b189..d5d2284317 100644 --- a/data/custodian/JP-11-FUK-M-FMA.yaml +++ b/data/custodian/JP-11-FUK-M-FMA.yaml @@ -386,3 +386,28 @@ location: geonames_id: 1863917 feature_code: PPLA2 normalization_timestamp: '2025-12-09T06:53:31.305566+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:21:54.827056+00:00' + source_url: https://www.city.fukuyama.hiroshima.jp/site/fukuyama-museum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.fukuyama.hiroshima.jp/favicon.ico + source_url: https://www.city.fukuyama.hiroshima.jp/site/fukuyama-museum + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:21:54.827056+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.fukuyama.hiroshima.jp/img/ogp-fukuyama.png + source_url: https://www.city.fukuyama.hiroshima.jp/site/fukuyama-museum + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:21:54.827056+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-FUK-M-FML.yaml b/data/custodian/JP-11-FUK-M-FML.yaml index 0168ed70dd..bf5cf5a152 100644 --- a/data/custodian/JP-11-FUK-M-FML.yaml +++ b/data/custodian/JP-11-FUK-M-FML.yaml @@ -424,3 +424,28 @@ location: geonames_id: 1863917 feature_code: PPLA2 normalization_timestamp: '2025-12-09T06:53:31.369644+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:22:02.506959+00:00' + source_url: https://www.city.fukuyama.hiroshima.jp/site/bungakukan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.fukuyama.hiroshima.jp/favicon.ico + source_url: https://www.city.fukuyama.hiroshima.jp/site/bungakukan + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:22:02.506959+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.fukuyama.hiroshima.jp/img/ogp-fukuyama.png + source_url: https://www.city.fukuyama.hiroshima.jp/site/bungakukan + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:22:02.506959+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-FUK-M-HRPM.yaml b/data/custodian/JP-11-FUK-M-HRPM.yaml index 2b8b1fc4cb..7405569483 100644 --- a/data/custodian/JP-11-FUK-M-HRPM.yaml +++ b/data/custodian/JP-11-FUK-M-HRPM.yaml @@ -370,3 +370,28 @@ location: geonames_id: 1863917 feature_code: PPLA2 normalization_timestamp: '2025-12-09T06:53:31.424102+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:22:11.329520+00:00' + source_url: https://www.city.fukuyama.hiroshima.jp/soshiki/jinkenheiwa + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.fukuyama.hiroshima.jp/favicon.ico + source_url: https://www.city.fukuyama.hiroshima.jp/soshiki/jinkenheiwa + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:22:11.329520+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.fukuyama.hiroshima.jp/img/ogp-fukuyama.png + source_url: https://www.city.fukuyama.hiroshima.jp/soshiki/jinkenheiwa + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:22:11.329520+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-GYO-L-M.yaml b/data/custodian/JP-11-GYO-L-M.yaml index 88b98cb3b5..4ea9bc9990 100644 --- a/data/custodian/JP-11-GYO-L-M.yaml +++ b/data/custodian/JP-11-GYO-L-M.yaml @@ -205,3 +205,22 @@ location: geonames_id: 1863482 geonames_name: Gyōda feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:24:37.974045+00:00' + source_url: http://www.iot.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.iot.ac.jp/library/wp-content/themes/stinger8-child/images/logo.png + source_url: http://www.iot.ac.jp/library + css_selector: '#header-l > p.sitename > a > img' + retrieved_on: '2025-12-23T21:24:37.974045+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-GYO-M-GCM.yaml b/data/custodian/JP-11-GYO-M-GCM.yaml index 959bd50726..b84638102c 100644 --- a/data/custodian/JP-11-GYO-M-GCM.yaml +++ b/data/custodian/JP-11-GYO-M-GCM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-GYO-M-GCM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-GYO-M-GCM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-GYO-M-GCM ghcid_numeric: 13872364634387157514 valid_from: '2025-12-06T23:38:32.424483+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: GYODA CITY MUSEUM @@ -255,3 +256,36 @@ location: postal_code: 361-0052 street_address: HOMMARU, Gyoda Shi, Saitama Ken, 361-0052 normalization_timestamp: '2025-12-09T11:30:56.655858+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:24:53.243084+00:00' + source_url: https://www.city.gyoda.lg.jp/kyoiku/iinkai/sisetu/hakubutukan.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.gyoda.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: https://www.city.gyoda.lg.jp/kyoiku/iinkai/sisetu/hakubutukan.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T21:24:53.243084+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 行田市 Gyoda City + - claim_type: favicon_url + claim_value: https://www.city.gyoda.lg.jp/theme/base/img_common/smartphone.png + source_url: https://www.city.gyoda.lg.jp/kyoiku/iinkai/sisetu/hakubutukan.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:24:53.243084+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.gyoda.lg.jp/theme/base/img_common/ogp_noimage.png + source_url: https://www.city.gyoda.lg.jp/kyoiku/iinkai/sisetu/hakubutukan.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T21:24:53.243084+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-GYO-M-MSABM.yaml b/data/custodian/JP-11-GYO-M-MSABM.yaml index dc955e9b57..3d4f83f9a5 100644 --- a/data/custodian/JP-11-GYO-M-MSABM.yaml +++ b/data/custodian/JP-11-GYO-M-MSABM.yaml @@ -273,3 +273,22 @@ wikidata_enrichment: image: Saitamaprefecturalmusuemofsakitamaancientburialmound-outside-may2011.jpg commons_category: Saitama Prefectural Museum of the Sakitama Ancient Burial Mounds wikidata_image: Saitamaprefecturalmusuemofsakitamaancientburialmound-outside-may2011.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:25:08.681777+00:00' + source_url: http://www.sakitama-muse.spec.ed.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.sakitama-muse.spec.ed.jp/nc_favicon.ico?1547712870 + source_url: http://www.sakitama-muse.spec.ed.jp + css_selector: '[document] > html.ng-scope > head > link' + retrieved_on: '2025-12-23T21:25:08.681777+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HAN-L-HCL.yaml b/data/custodian/JP-11-HAN-L-HCL.yaml index 50e91c17b4..54c208f381 100644 --- a/data/custodian/JP-11-HAN-L-HCL.yaml +++ b/data/custodian/JP-11-HAN-L-HCL.yaml @@ -206,3 +206,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.hanno-lib.jp/ wikidata_official_website: http://www.hanno-lib.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:25:20.808759+00:00' + source_url: http://www.hanno-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.hanno-lib.jp/img/og_ico.png + source_url: http://www.hanno-lib.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:25:20.808759+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HAN-L-HL.yaml b/data/custodian/JP-11-HAN-L-HL.yaml index ebf6451b33..761558da70 100644 --- a/data/custodian/JP-11-HAN-L-HL.yaml +++ b/data/custodian/JP-11-HAN-L-HL.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.hanno-lib.jp/institution/kodomo.html wikidata_official_website: http://www.hanno-lib.jp/institution/kodomo.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:25:29.030487+00:00' + source_url: http://www.hanno-lib.jp/institution/kodomo.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.hanno-lib.jp/img/og_ico.png + source_url: http://www.hanno-lib.jp/institution/kodomo.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:25:29.030487+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HAN-L-SUMC.yaml b/data/custodian/JP-11-HAN-L-SUMC.yaml index 760d7ba098..53fa467608 100644 --- a/data/custodian/JP-11-HAN-L-SUMC.yaml +++ b/data/custodian/JP-11-HAN-L-SUMC.yaml @@ -214,3 +214,20 @@ location: geonames_id: 1863183 geonames_name: Hannō feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:26:09.180467+00:00' + source_url: http://www.surugadai.ac.jp/mediacenter + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: http://www.surugadai.ac.jp/mediacenter/common/img/ogimg_media.png + source_url: http://www.surugadai.ac.jp/mediacenter + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T21:26:09.180467+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-HAN-M-SA.yaml b/data/custodian/JP-11-HAN-M-SA.yaml index 37e48dabb1..e0dd2c7522 100644 --- a/data/custodian/JP-11-HAN-M-SA.yaml +++ b/data/custodian/JP-11-HAN-M-SA.yaml @@ -1323,3 +1323,31 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/A04o3TlO9oE/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:26:20.528596+00:00' + source_url: https://www.parks.or.jp/suizokukan + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.parks.or.jp/suizokukan/img/site_logo.png + source_url: https://www.parks.or.jp/suizokukan + css_selector: '[document] > html > body.top > header > div.header_inner > div.header_sub + > p.site_logo.pc > a > img' + retrieved_on: '2025-12-23T21:26:20.528596+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: さいたまの公園 + - claim_type: favicon_url + claim_value: https://www.parks.or.jp/favicon.ico + source_url: https://www.parks.or.jp/suizokukan + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:26:20.528596+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HID-L-LSWSJC.yaml b/data/custodian/JP-11-HID-L-LSWSJC.yaml index b447f2ae8d..e0a8f451a0 100644 --- a/data/custodian/JP-11-HID-L-LSWSJC.yaml +++ b/data/custodian/JP-11-HID-L-LSWSJC.yaml @@ -211,3 +211,28 @@ location: geonames_id: 6822161 geonames_name: Hidaka feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:27:04.865043+00:00' + source_url: http://www.saijo.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.saijo.ac.jp/favicon.ico + source_url: http://www.saijo.ac.jp/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:27:04.865043+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.saijo.ac.jp/ogimage.png + source_url: http://www.saijo.ac.jp/library + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T21:27:04.865043+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-HID-L-SMUFHMC.yaml b/data/custodian/JP-11-HID-L-SMUFHMC.yaml index bc42c2d74a..8237b96984 100644 --- a/data/custodian/JP-11-HID-L-SMUFHMC.yaml +++ b/data/custodian/JP-11-HID-L-SMUFHMC.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-HID-L-SMUFHMC - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-HID-L-SMUFHMC valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-HID-L-SMUFHMC ghcid_numeric: 15687691070802458812 valid_from: '2025-12-06T23:38:54.607145+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saitama Medical University Faculty of Health & Medical Care @@ -204,3 +205,22 @@ location: geonames_id: 6822161 geonames_name: Hidaka feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:27:12.778749+00:00' + source_url: http://www.saitama-med.ac.jp/smulibrary/instruction.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.saitama-med.ac.jp/opac/images/cyan/favicon.ico + source_url: http://www.saitama-med.ac.jp/smulibrary/instruction.html + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T21:27:12.778749+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HIG-L-DBU6AL.yaml b/data/custodian/JP-11-HIG-L-DBU6AL.yaml index eaaffffbdd..5c5100464e 100644 --- a/data/custodian/JP-11-HIG-L-DBU6AL.yaml +++ b/data/custodian/JP-11-HIG-L-DBU6AL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-HIG-L-DBU6AL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-HIG-L-DBU6AL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-HIG-L-DBU6AL ghcid_numeric: 7902938626425818057 valid_from: '2025-12-06T23:38:55.002260+00:00' @@ -205,3 +206,28 @@ geocoding: resolved_place: 大東文化大学 resolved_city: Higashimatsuyama Shi timestamp: '2025-12-09T23:13:33.001854+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:27:21.914604+00:00' + source_url: http://www.daito.ac.jp/research/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.daito.ac.jp/assets/img/apple-touch-icon.png + source_url: http://www.daito.ac.jp/research/library + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T21:27:21.914604+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.daito.ac.jp/assets/img/img_logo_ogp.jpg + source_url: http://www.daito.ac.jp/research/library + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T21:27:21.914604+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-HIG-L-GL.yaml b/data/custodian/JP-11-HIG-L-GL.yaml index 81fce5b633..0b57667538 100644 --- a/data/custodian/JP-11-HIG-L-GL.yaml +++ b/data/custodian/JP-11-HIG-L-GL.yaml @@ -208,3 +208,28 @@ wikidata_enrichment: wikidata_web: official_website: https://www.genkai-library.com/ wikidata_official_website: https://www.genkai-library.com/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:27:27.358484+00:00' + source_url: https://www.genkai-library.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://u.jimcdn.com/cms/o/sc40ef1de32f3b025/img/favicon.png?t=1653532143 + source_url: https://www.genkai-library.com + css_selector: '[document] > html.j-feature-js.j-feature-no-touch > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:27:27.358484+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://image.jimcdn.com/app/cms/image/transf/none/path/sc40ef1de32f3b025/backgroundarea/i3ff1607fb14b5d2b/version/1673923227/image.jpg + source_url: https://www.genkai-library.com + css_selector: '[document] > html.j-feature-js.j-feature-no-touch > head > meta:nth-of-type(18)' + retrieved_on: '2025-12-23T21:27:27.358484+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-HIG-M-HB.yaml b/data/custodian/JP-11-HIG-M-HB.yaml index 248914feeb..5660cf7e88 100644 --- a/data/custodian/JP-11-HIG-M-HB.yaml +++ b/data/custodian/JP-11-HIG-M-HB.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-HIG-M-HB - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-HIG-M-HB valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-HIG-M-HB ghcid_numeric: 12188837978169936986 valid_from: '2025-12-06T23:38:32.471163+00:00' @@ -232,3 +233,28 @@ geocoding: resolved_place: Oya resolved_city: Higashimatsuyama Shi timestamp: '2025-12-09T23:13:33.112446+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:27:49.736620+00:00' + source_url: https://www.seibu-la.co.jp/higashimatsuyama-pg + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.higashimatsuyama-botan.com/img/apple-touch-icon.png + source_url: https://www.seibu-la.co.jp/higashimatsuyama-pg + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:27:49.736620+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.higashimatsuyama-botan.com/img/ogimage.jpg + source_url: https://www.seibu-la.co.jp/higashimatsuyama-pg + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T21:27:49.736620+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-HIG-M-HUM.yaml b/data/custodian/JP-11-HIG-M-HUM.yaml index b12bcd7e90..36477dcf16 100644 --- a/data/custodian/JP-11-HIG-M-HUM.yaml +++ b/data/custodian/JP-11-HIG-M-HUM.yaml @@ -785,3 +785,22 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/N3tOsFgZGR0/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:27:56.480258+00:00' + source_url: https://www.digital-museum.hiroshima-u.ac.jp/~humuseum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.digital-museum.hiroshima-u.ac.jp/img/icon/favicon.ico + source_url: https://www.digital-museum.hiroshima-u.ac.jp/~humuseum + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T21:27:56.480258+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HIG-M-MGHP.yaml b/data/custodian/JP-11-HIG-M-MGHP.yaml index 32fcd3bada..e9c90a50c8 100644 --- a/data/custodian/JP-11-HIG-M-MGHP.yaml +++ b/data/custodian/JP-11-HIG-M-MGHP.yaml @@ -265,3 +265,30 @@ wikidata_enrichment: image: Maruki museum.jpg commons_category: Maruki Gallery for the Hiroshima Panels wikidata_image: Maruki museum.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:28:04.466145+00:00' + source_url: https://marukigallery.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://marukigallery.jp/wp-content/themes/maruki/img/safari-pinned-tab.svg + source_url: https://marukigallery.jp + css_selector: '[document] > html.wf-loading.wf-a-otf-ryumin-pr6n-n3-loading > + head > link:nth-of-type(12)' + retrieved_on: '2025-12-23T21:28:04.466145+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://marukigallery.jp/wp-content/uploads/2020/01/maruki_gallery.jpg + source_url: https://marukigallery.jp + css_selector: '[document] > html.wf-loading.wf-a-otf-ryumin-pr6n-n3-loading > + head > meta:nth-of-type(16)' + retrieved_on: '2025-12-23T21:28:04.466145+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 5 diff --git a/data/custodian/JP-11-HIG-M-PMS.yaml b/data/custodian/JP-11-HIG-M-PMS.yaml index 77463dddb2..bf3fe60353 100644 --- a/data/custodian/JP-11-HIG-M-PMS.yaml +++ b/data/custodian/JP-11-HIG-M-PMS.yaml @@ -264,3 +264,22 @@ location: postal_code: 355-0065 street_address: IWADONO, Higashimatsuyama Shi, Saitama Ken, 355-0065 normalization_timestamp: '2025-12-09T11:30:30.358651+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:28:10.182797+00:00' + source_url: https://www.saitama-peacemuseum.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.parastorage.com/client/pfavico.ico + source_url: https://www.saitama-peacemuseum.com + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:28:10.182797+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: 192x192 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HIG-M-SCSZ.yaml b/data/custodian/JP-11-HIG-M-SCSZ.yaml index 9f95853ba0..72b2d434e9 100644 --- a/data/custodian/JP-11-HIG-M-SCSZ.yaml +++ b/data/custodian/JP-11-HIG-M-SCSZ.yaml @@ -970,3 +970,31 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/mVJvoyR1wfg/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:28:22.802441+00:00' + source_url: https://www.parks.or.jp/sczoo + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.parks.or.jp/sczoo/img/site_logo.png + source_url: https://www.parks.or.jp/sczoo + css_selector: '[document] > html > body.top > header > div.header_inner > div.header_sub + > p.site_logo.pc > a > img' + retrieved_on: '2025-12-23T21:28:22.802441+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: さいたまの公園 + - claim_type: favicon_url + claim_value: https://www.parks.or.jp/favicon.ico + source_url: https://www.parks.or.jp/sczoo + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:28:22.802441+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-HIK-L-CL.yaml b/data/custodian/JP-11-HIK-L-CL.yaml index 7bf09386e5..68378c792a 100644 --- a/data/custodian/JP-11-HIK-L-CL.yaml +++ b/data/custodian/JP-11-HIK-L-CL.yaml @@ -202,3 +202,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.ranzan.saitama.jp/0000000154.html wikidata_official_website: http://www.town.ranzan.saitama.jp/0000000154.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:28:31.304441+00:00' + source_url: http://www.town.ranzan.saitama.jp/0000000154.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.ranzan.saitama.jp/design_img/favicon.ico + source_url: http://www.town.ranzan.saitama.jp/0000000154.html + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T21:28:31.304441+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.ranzan.saitama.jp/design_img/og_image.png + source_url: http://www.town.ranzan.saitama.jp/0000000154.html + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:28:31.304441+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-KAR-L-KL-karatsushiochi_library.yaml b/data/custodian/JP-11-KAR-L-KL-karatsushiochi_library.yaml index a61e590ef3..ed6c857f23 100644 --- a/data/custodian/JP-11-KAR-L-KL-karatsushiochi_library.yaml +++ b/data/custodian/JP-11-KAR-L-KL-karatsushiochi_library.yaml @@ -213,3 +213,22 @@ wikidata_enrichment: wikidata_media: image: Karatsu City Ouchi Library.jpg wikidata_image: Karatsu City Ouchi Library.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:48:21.844725+00:00' + source_url: http://www.city.karatsu.lg.jp/kindai-toshokan/kyoiku/toshokan/ochi/shisetsu.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.karatsu.lg.jp/apple-touch-icon.png + source_url: http://www.city.karatsu.lg.jp/kindai-toshokan/kyoiku/toshokan/ochi/shisetsu.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:48:21.844725+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-KAR-M-FKB.yaml b/data/custodian/JP-11-KAR-M-FKB.yaml index 23ec8dac1d..3216574f18 100644 --- a/data/custodian/JP-11-KAR-M-FKB.yaml +++ b/data/custodian/JP-11-KAR-M-FKB.yaml @@ -246,3 +246,22 @@ location: postal_code: 847-0047 street_address: HOMMACHI, Karatsu Shi, Saga Ken, 847-0047 normalization_timestamp: '2025-12-09T11:30:51.739774+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:48:28.563875+00:00' + source_url: http://karatsu-bank.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://karatsu-bank.jp/img/favicon.ico + source_url: http://karatsu-bank.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:48:28.563875+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAR-M-SPNCM.yaml b/data/custodian/JP-11-KAR-M-SPNCM.yaml index 1215dd5414..f1392906a5 100644 --- a/data/custodian/JP-11-KAR-M-SPNCM.yaml +++ b/data/custodian/JP-11-KAR-M-SPNCM.yaml @@ -267,3 +267,37 @@ wikidata_enrichment: - id: Q85877124 label: Maekawa Associates, Architects & Engineers description: '' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:49:12.480929+00:00' + source_url: https://saga-museum.jp/nagoya + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://saga-museum.jp/all-common/img/logo_saga.png + source_url: https://saga-museum.jp/nagoya + css_selector: '#page > header > div.header_top > div.container > div.header_top_left + > ul.yoko_list.yoko_list_line > li > a.externalLink > img' + retrieved_on: '2025-12-23T21:49:12.480929+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 佐賀県 + - claim_type: favicon_url + claim_value: https://saga-museum.jp/nagoya/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/nagoya + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:49:12.480929+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://saga-museum.jp/nagoya/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/nagoya + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T21:49:12.480929+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KAS-L-KCSL.yaml b/data/custodian/JP-11-KAS-L-KCSL.yaml index d1e0234ae0..8a3f65134e 100644 --- a/data/custodian/JP-11-KAS-L-KCSL.yaml +++ b/data/custodian/JP-11-KAS-L-KCSL.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://kasukabetrc.jp/showalib/ wikidata_official_website: http://kasukabetrc.jp/showalib/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:49:31.206537+00:00' + source_url: https://www.lib.kasukabe.saitama.jp/shisetsu/syouwa.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.kasukabe.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.lib.kasukabe.saitama.jp/shisetsu/syouwa.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:49:31.206537+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAS-L-KL-kasukabeshiritsutakesato_library.yaml b/data/custodian/JP-11-KAS-L-KL-kasukabeshiritsutakesato_library.yaml index 3458f0fd11..7e88ad2761 100644 --- a/data/custodian/JP-11-KAS-L-KL-kasukabeshiritsutakesato_library.yaml +++ b/data/custodian/JP-11-KAS-L-KL-kasukabeshiritsutakesato_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.kasukabe.saitama.jp/ wikidata_official_website: http://www.lib.kasukabe.saitama.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:49:39.424546+00:00' + source_url: https://www.lib.kasukabe.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.kasukabe.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.lib.kasukabe.saitama.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:49:39.424546+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAS-L-KL.yaml b/data/custodian/JP-11-KAS-L-KL.yaml index c323bed269..8020a744ca 100644 --- a/data/custodian/JP-11-KAS-L-KL.yaml +++ b/data/custodian/JP-11-KAS-L-KL.yaml @@ -201,3 +201,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.kasukabe.saitama.jp wikidata_official_website: http://www.lib.kasukabe.saitama.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:49:50.981508+00:00' + source_url: https://www.lib.kasukabe.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.kasukabe.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.lib.kasukabe.saitama.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:49:50.981508+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAS-L-KUL.yaml b/data/custodian/JP-11-KAS-L-KUL.yaml index c19c9420d6..cc3510214a 100644 --- a/data/custodian/JP-11-KAS-L-KUL.yaml +++ b/data/custodian/JP-11-KAS-L-KUL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAS-L-KUL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAS-L-KUL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAS-L-KUL ghcid_numeric: 7701384354506172443 valid_from: '2025-12-06T23:38:54.649410+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Kyoei University Library @@ -190,7 +191,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://library.kyoei.ac.jp/ wikidata_official_website: http://library.kyoei.ac.jp/ @@ -212,3 +214,22 @@ location: geonames_id: 1859884 geonames_name: Kasukabe feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:50:10.515054+00:00' + source_url: http://library.kyoei.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://library.kyoei.ac.jp/tpl/wp-content/uploads/kyouei_fcon.png + source_url: http://library.kyoei.ac.jp + css_selector: '[document] > html > head > link:nth-of-type(27)' + retrieved_on: '2025-12-23T21:50:10.515054+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 32x32 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAS-M-KHM.yaml b/data/custodian/JP-11-KAS-M-KHM.yaml index 65742bfd03..64eb973fbb 100644 --- a/data/custodian/JP-11-KAS-M-KHM.yaml +++ b/data/custodian/JP-11-KAS-M-KHM.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAS-M-KHM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAS-M-KHM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAS-M-KHM ghcid_numeric: 1938724268985197884 valid_from: '2025-12-06T23:38:32.483335+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: THE KASUKABE HISTORY MUSEUM @@ -236,3 +237,36 @@ location: postal_code: 344-0062 street_address: KASUKABEHIGASHI, Kasukabe Shi, Saitama Ken, 344-0062 normalization_timestamp: '2025-12-09T11:32:16.906420+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:50:19.186518+00:00' + source_url: https://www.city.kasukabe.lg.jp/soshikikarasagasu/bunkazaihogoka/gyomuannai/8/2/1/1/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.kasukabe.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: https://www.city.kasukabe.lg.jp/soshikikarasagasu/bunkazaihogoka/gyomuannai/8/2/1/1/index.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T21:50:19.186518+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 春日部市 KASUKABE CITY + - claim_type: favicon_url + claim_value: https://www.city.kasukabe.lg.jp/theme/base/img_common/smartphone.png + source_url: https://www.city.kasukabe.lg.jp/soshikikarasagasu/bunkazaihogoka/gyomuannai/8/2/1/1/index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:50:19.186518+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kasukabe.lg.jp/theme/base/img_common/ogp_noimage.png + source_url: https://www.city.kasukabe.lg.jp/soshikikarasagasu/bunkazaihogoka/gyomuannai/8/2/1/1/index.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T21:50:19.186518+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KAS-M-YM.yaml b/data/custodian/JP-11-KAS-M-YM.yaml index e2a1bef935..5562fc33fc 100644 --- a/data/custodian/JP-11-KAS-M-YM.yaml +++ b/data/custodian/JP-11-KAS-M-YM.yaml @@ -251,3 +251,22 @@ location: postal_code: 849-1321 street_address: FURUEDA, Kashima Shi, Saga Ken, 849-1321 normalization_timestamp: '2025-12-09T11:31:42.339386+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:50:29.499042+00:00' + source_url: https://www.yutokusan.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.yutokusan.jp/apple-touch-icon.png + source_url: https://www.yutokusan.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:50:29.499042+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsukawagoeekihigashiguchi_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsukawagoeekihigashiguchi_library.yaml index 1d96bc26c4..ccc68c3e09 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsukawagoeekihigashiguchi_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsukawagoeekihigashiguchi_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/higashiguchi.html wikidata_official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/higashiguchi.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:50:36.359283+00:00' + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_higashi_a + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.city.kawagoe.saitama.jp/site/wp-content/uploads/2022/08/cropped-tokimo-180x180.png + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_higashi_a + css_selector: '[document] > html.js > head > link:nth-of-type(29)' + retrieved_on: '2025-12-23T21:50:36.359283+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsunishi_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsunishi_library.yaml index 6559a0bc95..4d12769ccf 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsunishi_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsunishi_library.yaml @@ -201,3 +201,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/nishi.html wikidata_official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/nishi.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:50:42.900373+00:00' + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_nishi_a + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.city.kawagoe.saitama.jp/site/wp-content/uploads/2022/08/cropped-tokimo-180x180.png + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_nishi_a + css_selector: '[document] > html.js > head > link:nth-of-type(29)' + retrieved_on: '2025-12-23T21:50:42.900373+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsutakashina_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsutakashina_library.yaml index 0651db91fc..dc916af4af 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsutakashina_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawagoeshiritsutakashina_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/takashina.html wikidata_official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/takashina.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:50:50.256257+00:00' + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_takashina_a + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.city.kawagoe.saitama.jp/site/wp-content/uploads/2022/08/cropped-tokimo-180x180.png + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_takashina_a + css_selector: '[document] > html.js > head > link:nth-of-type(29)' + retrieved_on: '2025-12-23T21:50:50.256257+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuchuo_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuchuo_library.yaml index 71a8b78363..afa5bb9d93 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuchuo_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuchuo_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/tyuuoutosyokan.html wikidata_official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/tyuuoutosyokan.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:50:57.896113+00:00' + source_url: https://www.kawaguchi-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kawaguchi-lib.jp/img/favicon.ico + source_url: https://www.kawaguchi-lib.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:50:57.896113+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuhatogaya_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuhatogaya_library.yaml index c4609f0b5f..57cc9ee2b2 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuhatogaya_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuhatogaya_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/hatogaya.html wikidata_official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/hatogaya.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:51:04.585257+00:00' + source_url: https://www.kawaguchi-lib.jp/lib06_hato.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kawaguchi-lib.jp/img/favicon.ico + source_url: https://www.kawaguchi-lib.jp/lib06_hato.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:51:04.585257+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsumaekawa_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsumaekawa_library.yaml index 113a282e2d..c95c3b7529 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsumaekawa_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsumaekawa_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/maekawatosyokan.html wikidata_official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/maekawatosyokan.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:51:11.307816+00:00' + source_url: https://www.kawaguchi-lib.jp/lib02_maekawa.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kawaguchi-lib.jp/img/favicon.ico + source_url: https://www.kawaguchi-lib.jp/lib02_maekawa.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:51:11.307816+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsushingo_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsushingo_library.yaml index 890eea4ff9..6af574c53d 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsushingo_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsushingo_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/singoutosyokan.html wikidata_official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/singoutosyokan.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:51:18.610650+00:00' + source_url: https://www.kawaguchi-lib.jp/lib03_shin.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kawaguchi-lib.jp/img/favicon.ico + source_url: https://www.kawaguchi-lib.jp/lib03_shin.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:51:18.610650+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsutozuka_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsutozuka_library.yaml index fe8669200c..80e59ac05e 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsutozuka_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsutozuka_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/todukatosyokan.html wikidata_official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/todukatosyokan.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:51:26.079179+00:00' + source_url: https://www.kawaguchi-lib.jp/lib05_tostuka.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kawaguchi-lib.jp/img/favicon.ico + source_url: https://www.kawaguchi-lib.jp/lib05_tostuka.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:51:26.079179+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuyokozone_library.yaml b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuyokozone_library.yaml index 0e7cdc1d08..07e81ac123 100644 --- a/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuyokozone_library.yaml +++ b/data/custodian/JP-11-KAW-L-KL-kawaguchishiritsuyokozone_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/yokozonetosyokan.html wikidata_official_website: http://www.kawaguchi-lib.jp/docshp/images/usr_doc/yokozonetosyokan.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:51:34.382076+00:00' + source_url: https://www.kawaguchi-lib.jp/lib04_yoko.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kawaguchi-lib.jp/img/favicon.ico + source_url: https://www.kawaguchi-lib.jp/lib04_yoko.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:51:34.382076+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-KL.yaml b/data/custodian/JP-11-KAW-L-KL.yaml index d239fb16a3..6608486829 100644 --- a/data/custodian/JP-11-KAW-L-KL.yaml +++ b/data/custodian/JP-11-KAW-L-KL.yaml @@ -201,3 +201,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/index.html wikidata_official_website: http://www.lib.city.kawagoe.saitama.jp/shisetsu/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:51:40.994773+00:00' + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_chuo_a + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.city.kawagoe.saitama.jp/site/wp-content/uploads/2022/08/cropped-tokimo-180x180.png + source_url: https://www.lib.city.kawagoe.saitama.jp/locations/#locations_chuo_a + css_selector: '[document] > html.js > head > link:nth-of-type(29)' + retrieved_on: '2025-12-23T21:51:40.994773+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-KAW-L-MSBCLMSIL.yaml b/data/custodian/JP-11-KAW-L-MSBCLMSIL.yaml index 0309157393..05cf02ea3c 100644 --- a/data/custodian/JP-11-KAW-L-MSBCLMSIL.yaml +++ b/data/custodian/JP-11-KAW-L-MSBCLMSIL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAW-L-MSBCLMSIL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAW-L-MSBCLMSIL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAW-L-MSBCLMSIL ghcid_numeric: 17621705826381634040 valid_from: '2025-12-06T23:38:57.999585+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Megmilk Snow Brand Co., LTD. Milk Science Institute Library @@ -151,3 +152,28 @@ location: geonames_id: 1859740 geonames_name: Kawagoe feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:51:50.364784+00:00' + source_url: http://www.meg-snow.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.meg-snow.com/favicon.svg + source_url: http://www.meg-snow.com + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T21:51:50.364784+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.meg-snow.com/assets/images/common/ogp.png + source_url: http://www.meg-snow.com + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:51:50.364784+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-11-KAW-L-SGUKJCIMC.yaml b/data/custodian/JP-11-KAW-L-SGUKJCIMC.yaml index 2a2e570c17..00e44b1a3c 100644 --- a/data/custodian/JP-11-KAW-L-SGUKJCIMC.yaml +++ b/data/custodian/JP-11-KAW-L-SGUKJCIMC.yaml @@ -37,20 +37,22 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAW-L-SGUKJCIMC - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAW-L-SGUKJCIMC valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAW-L-SGUKJCIMC ghcid_numeric: 4445517660592422958 valid_from: '2025-12-06T23:38:54.652221+00:00' reason: Initial GHCID from CH-Annotator (japan_complete_ch_annotator.yaml) custodian_name: claim_type: custodian_name - claim_value: Saitama Gakuen University Kawaguchi Junior College Information Media Center + claim_value: Saitama Gakuen University Kawaguchi Junior College Information Media + Center source_type: ch_annotator identifiers: - identifier_scheme: GHCID @@ -101,11 +103,12 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name - claim_value: Saitama Gakuen University Kawaguchi Junior College Information Media Center + claim_value: Saitama Gakuen University Kawaguchi Junior College Information Media + Center property_uri: skos:prefLabel provenance: namespace: glam @@ -165,7 +168,8 @@ wikidata_enrichment: wikidata_labels: en: Saitama Gakuen University Kawaguchi Junior College Information Media Center ja: 埼玉学園大学・川口短期大学情報メディアセンター - wikidata_label_en: Saitama Gakuen University Kawaguchi Junior College Information Media Center + wikidata_label_en: Saitama Gakuen University Kawaguchi Junior College Information + Media Center wikidata_label_ja: 埼玉学園大学・川口短期大学情報メディアセンター wikidata_classification: instance_of: &id004 @@ -190,7 +194,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://www.media.saigaku.ac.jp wikidata_official_website: http://www.media.saigaku.ac.jp @@ -212,3 +217,22 @@ location: geonames_id: 1859730 geonames_name: Kawaguchi feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:52:04.465355+00:00' + source_url: http://www.media.saigaku.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.media.saigaku.ac.jp/wp-content/uploads/2016/01/cropped-bbb-180x180.png + source_url: http://www.media.saigaku.ac.jp + css_selector: '[document] > html.no-js > head > link:nth-of-type(24)' + retrieved_on: '2025-12-23T21:52:04.465355+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/JP-11-KAW-L-SLSB.yaml b/data/custodian/JP-11-KAW-L-SLSB.yaml index 791d288984..ddbb68efdc 100644 --- a/data/custodian/JP-11-KAW-L-SLSB.yaml +++ b/data/custodian/JP-11-KAW-L-SLSB.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAW-L-SLSB - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAW-L-SLSB valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAW-L-SLSB ghcid_numeric: 4022296224172505995 valid_from: '2025-12-06T23:38:54.604635+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAITAMAIKADAIGAKUFUZOKU Library SOGOIRYOSENTA-BUNKAN @@ -204,3 +205,22 @@ location: geonames_id: 1859740 geonames_name: Kawagoe feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:52:19.297804+00:00' + source_url: http://www.saitama-med.ac.jp/smulibrary/instruction.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.saitama-med.ac.jp/opac/images/cyan/favicon.ico + source_url: http://www.saitama-med.ac.jp/smulibrary/instruction.html + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T21:52:19.297804+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-SUMC.yaml b/data/custodian/JP-11-KAW-L-SUMC.yaml index 6c60f4e653..1d075a2211 100644 --- a/data/custodian/JP-11-KAW-L-SUMC.yaml +++ b/data/custodian/JP-11-KAW-L-SUMC.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAW-L-SUMC - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAW-L-SUMC valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAW-L-SUMC ghcid_numeric: 16689471265673480423 valid_from: '2025-12-06T23:38:54.643780+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Shobi University Media Center @@ -196,7 +197,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://www.shobi-u.ac.jp/mediacenter/ wikidata_official_website: http://www.shobi-u.ac.jp/mediacenter/ @@ -218,3 +220,22 @@ location: geonames_id: 1859740 geonames_name: Kawagoe feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:52:28.726901+00:00' + source_url: http://www.shobi-u.ac.jp/mediacenter + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.shobi-u.ac.jp/img/module/common/favicon.ico + source_url: http://www.shobi-u.ac.jp/mediacenter + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:52:28.726901+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-L-TLKL.yaml b/data/custodian/JP-11-KAW-L-TLKL.yaml index fad079cf20..fb93e70cac 100644 --- a/data/custodian/JP-11-KAW-L-TLKL.yaml +++ b/data/custodian/JP-11-KAW-L-TLKL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAW-L-TLKL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAW-L-TLKL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAW-L-TLKL ghcid_numeric: 15146159583868490581 valid_from: '2025-12-06T23:38:55.162838+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TOYODAIGAKUFUZOKU Library KAWAGOE Library @@ -204,3 +205,37 @@ location: geonames_id: 1859740 geonames_name: Kawagoe feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:52:57.789607+00:00' + source_url: http://www.toyo.ac.jp/site/library/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.toyo.ac.jp/img/common/img_logo.svg + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '#gheader > div.gnav__frm > h1.gnav__logo > a.gnav__logo-link > + img.gnav__logo-img' + retrieved_on: '2025-12-23T21:52:57.789607+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 東洋大学 + - claim_type: favicon_url + claim_value: http://www.toyo.ac.jp/img/common/favicon.ico + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '[document] > html.js_domload.js_ajax_load > head > link:nth-of-type(6)' + retrieved_on: '2025-12-23T21:52:57.789607+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.toyo.ac.jp/site/library/ogp.jpg + source_url: http://www.toyo.ac.jp/site/library/index.html + css_selector: '[document] > html.js_domload.js_ajax_load > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:52:57.789607+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-M-KCM.yaml b/data/custodian/JP-11-KAW-M-KCM.yaml index 91901829ea..4524d603ef 100644 --- a/data/custodian/JP-11-KAW-M-KCM.yaml +++ b/data/custodian/JP-11-KAW-M-KCM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAW-M-KCM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAW-M-KCM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAW-M-KCM ghcid_numeric: 5443637870790967439 valid_from: '2025-12-06T23:38:32.359311+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KAWAGOE CITY MUSEUM @@ -274,3 +275,36 @@ location: postal_code: 350-0053 street_address: KURUWAMACHI, Kawagoe Shi, Saitama Ken, 350-0053 normalization_timestamp: '2025-12-09T11:32:31.695472+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:53:21.247412+00:00' + source_url: https://www.city.kawagoe.saitama.jp/welcome/kankospot/hommarugotenzone/hakubutsukan/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.kawagoe.saitama.jp/welcome/kankospot/hommarugotenzone/_template_/_site_/_default_/_res/museum/images/header/tlogo.png + source_url: https://www.city.kawagoe.saitama.jp/welcome/kankospot/hommarugotenzone/hakubutsukan/index.html + css_selector: '#tlogo > h1 > a > img' + retrieved_on: '2025-12-23T21:53:21.247412+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 川越市立博物館トップページ + - claim_type: favicon_url + claim_value: https://www.city.kawagoe.saitama.jp/welcome/kankospot/hommarugotenzone/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.kawagoe.saitama.jp/welcome/kankospot/hommarugotenzone/hakubutsukan/index.html + css_selector: '[document] > html > head > link:nth-of-type(13)' + retrieved_on: '2025-12-23T21:53:21.247412+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kawagoe.saitama.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.kawagoe.saitama.jp/welcome/kankospot/hommarugotenzone/hakubutsukan/index.html + css_selector: '[document] > html > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-23T21:53:21.247412+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KAW-M-KFM.yaml b/data/custodian/JP-11-KAW-M-KFM.yaml index 1c01a0853b..aa801a745d 100644 --- a/data/custodian/JP-11-KAW-M-KFM.yaml +++ b/data/custodian/JP-11-KAW-M-KFM.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAW-M-KFM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAW-M-KFM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAW-M-KFM ghcid_numeric: 6398592696379052522 valid_from: '2025-12-06T23:38:32.375714+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KAWAGOE FESTIVAL MUSEUM @@ -249,3 +250,22 @@ location: postal_code: 350-0062 street_address: MOTOMACHI, Kawagoe Shi, Saitama Ken, 350-0062 normalization_timestamp: '2025-12-09T11:31:07.536658+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:53:28.496693+00:00' + source_url: https://kawagoematsuri.jp/matsurimuseum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kawagoematsuri.jp/apple-touch-icon.png + source_url: https://kawagoematsuri.jp/matsurimuseum + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T21:53:28.496693+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAW-M-YMA.yaml b/data/custodian/JP-11-KAW-M-YMA.yaml index 9bc531618e..ba859d0beb 100644 --- a/data/custodian/JP-11-KAW-M-YMA.yaml +++ b/data/custodian/JP-11-KAW-M-YMA.yaml @@ -152,3 +152,28 @@ location: geonames_id: 1859740 geonames_name: Kawagoe feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:53:58.846417+00:00' + source_url: https://www.koedo-kameya.com/sp.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.koedo-kameya.com/wp-content/uploads/kameya.png + source_url: https://www.koedo-kameya.com/sp.html + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T21:53:58.846417+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.koedo-kameya.com/wp-content/uploads/logo_ogp.jpg + source_url: https://www.koedo-kameya.com/sp.html + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T21:53:58.846417+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-KAZ-L-HIUL.yaml b/data/custodian/JP-11-KAZ-L-HIUL.yaml index 56d1760d15..47eb8f6866 100644 --- a/data/custodian/JP-11-KAZ-L-HIUL.yaml +++ b/data/custodian/JP-11-KAZ-L-HIUL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KAZ-L-HIUL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KAZ-L-HIUL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KAZ-L-HIUL ghcid_numeric: 1012517973054958727 valid_from: '2025-12-06T23:38:54.638130+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Heisei International University Library @@ -190,7 +191,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://www.hiu.ac.jp/education/hiu_library/ wikidata_official_website: http://www.hiu.ac.jp/education/hiu_library/ @@ -212,3 +214,23 @@ location: geonames_id: 1859586 geonames_name: Kazo feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:54:07.072345+00:00' + source_url: http://www.hiu.ac.jp/education/hiu_library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.hiu.ac.jp/hiu_wp/wp-content/themes/hiu/common/img/hd_logo.svg + source_url: http://www.hiu.ac.jp/education/hiu_library + css_selector: '[document] > html > body > header > div.hd_inner > div.hd_logo + > a > img' + retrieved_on: '2025-12-23T21:54:07.072345+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 平成国際大学 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-KAZ-L-KL-kazoshiritsudoyonofurusatootone_library.yaml b/data/custodian/JP-11-KAZ-L-KL-kazoshiritsudoyonofurusatootone_library.yaml index 61d95a12d9..ecd4f7eafa 100644 --- a/data/custodian/JP-11-KAZ-L-KL-kazoshiritsudoyonofurusatootone_library.yaml +++ b/data/custodian/JP-11-KAZ-L-KL-kazoshiritsudoyonofurusatootone_library.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.kazo.saitama.jp/access_otone.html wikidata_official_website: http://www.library.kazo.saitama.jp/access_otone.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:54:14.760022+00:00' + source_url: https://www.library.kazo.saitama.jp/access/otone.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.kazo.saitama.jp/access/otone.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T21:54:14.760022+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 加須市立図書館 + - claim_type: favicon_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.kazo.saitama.jp/access/otone.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:54:14.760022+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukisai_library.yaml b/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukisai_library.yaml index ffc2952bd2..20f0c9de7f 100644 --- a/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukisai_library.yaml +++ b/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukisai_library.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.kazo.saitama.jp/access_kisai.html wikidata_official_website: http://www.library.kazo.saitama.jp/access_kisai.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:54:21.820495+00:00' + source_url: https://www.library.kazo.saitama.jp/access/kisai.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.kazo.saitama.jp/access/kisai.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T21:54:21.820495+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 加須市立図書館 + - claim_type: favicon_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.kazo.saitama.jp/access/kisai.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:54:21.820495+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukitakawabe_library.yaml b/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukitakawabe_library.yaml index 09279cf474..35b95df81b 100644 --- a/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukitakawabe_library.yaml +++ b/data/custodian/JP-11-KAZ-L-KL-kazoshiritsukitakawabe_library.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.kazo.saitama.jp/access_kitakawabe.html wikidata_official_website: http://www.library.kazo.saitama.jp/access_kitakawabe.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:54:29.492469+00:00' + source_url: https://www.library.kazo.saitama.jp/access/kitakawabe.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.kazo.saitama.jp/access/kitakawabe.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T21:54:29.492469+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 加須市立図書館 + - claim_type: favicon_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.kazo.saitama.jp/access/kitakawabe.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:54:29.492469+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KAZ-L-KL.yaml b/data/custodian/JP-11-KAZ-L-KL.yaml index 83cafeed5d..bf7e0deab3 100644 --- a/data/custodian/JP-11-KAZ-L-KL.yaml +++ b/data/custodian/JP-11-KAZ-L-KL.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.kazo.saitama.jp/access_kazo.html wikidata_official_website: http://www.library.kazo.saitama.jp/access_kazo.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:54:36.849624+00:00' + source_url: https://www.library.kazo.saitama.jp/access + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.kazo.saitama.jp/access + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T21:54:36.849624+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 加須市立図書館 + - claim_type: favicon_url + claim_value: https://www.library.kazo.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.kazo.saitama.jp/access + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:54:36.849624+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KIS-L-YL.yaml b/data/custodian/JP-11-KIS-L-YL.yaml index 0350b8a593..6da55e63d4 100644 --- a/data/custodian/JP-11-KIS-L-YL.yaml +++ b/data/custodian/JP-11-KIS-L-YL.yaml @@ -222,3 +222,28 @@ wikidata_enrichment: wikidata_media: image: Fukudomi Yuaikan.jpg wikidata_image: Fukudomi Yuaikan.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:54:59.744416+00:00' + source_url: http://www.town.shiroishi.lg.jp/jyuumin/manabu/toshokan.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.shiroishi.lg.jp/library/common/webclipicon.png + source_url: http://www.town.shiroishi.lg.jp/jyuumin/manabu/toshokan.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T21:54:59.744416+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.town.shiroishi.lg.jp/library/common/fblogo.jpg + source_url: http://www.town.shiroishi.lg.jp/jyuumin/manabu/toshokan.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T21:54:59.744416+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-KIT-L-KCCSL.yaml b/data/custodian/JP-11-KIT-L-KCCSL.yaml index fa637cb141..13a81b7312 100644 --- a/data/custodian/JP-11-KIT-L-KCCSL.yaml +++ b/data/custodian/JP-11-KIT-L-KCCSL.yaml @@ -211,3 +211,30 @@ wikidata_enrichment: wikidata_web: official_website: http://kitamoto-sdh.co.jp/library/index.html wikidata_official_website: http://kitamoto-sdh.co.jp/library/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:55:10.905058+00:00' + source_url: https://www.library.kitamoto.saitama.jp/facility/kodomo.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.kitamoto.saitama.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.kitamoto.saitama.jp/facility/kodomo.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T21:55:10.905058+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 北本市立図書館 + - claim_type: favicon_url + claim_value: https://www.library.kitamoto.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.kitamoto.saitama.jp/facility/kodomo.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:55:10.905058+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KIT-L-KL.yaml b/data/custodian/JP-11-KIT-L-KL.yaml index 98d9f536b4..4242a0d0f6 100644 --- a/data/custodian/JP-11-KIT-L-KL.yaml +++ b/data/custodian/JP-11-KIT-L-KL.yaml @@ -212,3 +212,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.kitamoto.saitama.jp/toshokan/ wikidata_official_website: http://www.library.kitamoto.saitama.jp/toshokan/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:55:18.747182+00:00' + source_url: https://www.library.kitamoto.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.kitamoto.saitama.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.kitamoto.saitama.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-23T21:55:18.747182+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 北本市立図書館 + - claim_type: favicon_url + claim_value: https://www.library.kitamoto.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.kitamoto.saitama.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T21:55:18.747182+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KIT-L-M-matsubushimachitasedaikoryugakusyukantoshoshitsu.yaml b/data/custodian/JP-11-KIT-L-M-matsubushimachitasedaikoryugakusyukantoshoshitsu.yaml index 224461f1ee..0017efee7a 100644 --- a/data/custodian/JP-11-KIT-L-M-matsubushimachitasedaikoryugakusyukantoshoshitsu.yaml +++ b/data/custodian/JP-11-KIT-L-M-matsubushimachitasedaikoryugakusyukantoshoshitsu.yaml @@ -203,3 +203,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.matsubushi.lg.jp/www/contents/1398470251723/index.html wikidata_official_website: http://www.town.matsubushi.lg.jp/www/contents/1398470251723/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:55:26.498427+00:00' + source_url: http://www.town.matsubushi.lg.jp/www/genre/1382593457259/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.matsubushi.lg.jp/www/css/images/favicon.ico + source_url: http://www.town.matsubushi.lg.jp/www/genre/1382593457259/index.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T21:55:26.498427+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www/images/logo.jpg + source_url: http://www.town.matsubushi.lg.jp/www/genre/1382593457259/index.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T21:55:26.498427+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-KIT-L-SL.yaml b/data/custodian/JP-11-KIT-L-SL.yaml index 430fcce2a0..3c5875801b 100644 --- a/data/custodian/JP-11-KIT-L-SL.yaml +++ b/data/custodian/JP-11-KIT-L-SL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KIT-L-SL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KIT-L-SL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KIT-L-SL ghcid_numeric: 14139896260681556093 valid_from: '2025-12-06T23:38:42.749420+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SUGITOCHORITSU Library @@ -191,8 +192,9 @@ wikidata_enrichment: instance_of: &id004 - id: Q7075 label: library - description: institution charged with the care of a collection of literary, musical, artistic, or reference materials, - such as books, manuscripts, recordings, or films + description: institution charged with the care of a collection of literary, + musical, artistic, or reference materials, such as books, manuscripts, recordings, + or films wikidata_instance_of: *id004 wikidata_location: coordinates: &id007 @@ -227,3 +229,22 @@ location: postal_code: 345-0042 street_address: 477-8 OJIMA, Kitakatsushika Gun Sugito Machi, Saitama Ken, 345-0042 normalization_timestamp: '2025-12-09T11:30:37.586641+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:55:39.824264+00:00' + source_url: https://www.town.sugito.lg.jp/site/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.sugito.lg.jp/apple-touch-icon.png + source_url: https://www.town.sugito.lg.jp/site/library + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:55:39.824264+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-KIT-M-MPGKMMNPU.yaml b/data/custodian/JP-11-KIT-M-MPGKMMNPU.yaml index 43d935dd40..dc20f70e6b 100644 --- a/data/custodian/JP-11-KIT-M-MPGKMMNPU.yaml +++ b/data/custodian/JP-11-KIT-M-MPGKMMNPU.yaml @@ -32,20 +32,22 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KIT-M-MPGKMMNPU - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KIT-M-MPGKMMNPU valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KIT-M-MPGKMMNPU ghcid_numeric: 9643747082078054408 valid_from: '2025-12-06T23:38:32.611547+00:00' reason: Initial GHCID from CH-Annotator (japan_complete_ch_annotator.yaml) custodian_name: claim_type: custodian_name - claim_value: Medicinal Plant Garden and Kimura Memorial Museum, Nihon Pharmaceutical University + claim_value: Medicinal Plant Garden and Kimura Memorial Museum, Nihon Pharmaceutical + University source_type: ch_annotator identifiers: - identifier_scheme: GHCID @@ -96,11 +98,12 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name - claim_value: Medicinal Plant Garden and Kimura Memorial Museum, Nihon Pharmaceutical University + claim_value: Medicinal Plant Garden and Kimura Memorial Museum, Nihon Pharmaceutical + University property_uri: skos:prefLabel provenance: namespace: glam @@ -153,3 +156,22 @@ location: geonames_id: 1861641 geonames_name: Ina feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:56:04.720351+00:00' + source_url: https://www.nichiyaku.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nichiyaku.ac.jp/images/common/apple-touch-icon-180x180.png + source_url: https://www.nichiyaku.ac.jp + css_selector: '[document] > html > head > link:nth-of-type(22)' + retrieved_on: '2025-12-23T21:56:04.720351+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 10 diff --git a/data/custodian/JP-11-KOD-L-K-kamikawamachifureaisentatoshoshitsu.yaml b/data/custodian/JP-11-KOD-L-K-kamikawamachifureaisentatoshoshitsu.yaml index a601208b49..091813a5fd 100644 --- a/data/custodian/JP-11-KOD-L-K-kamikawamachifureaisentatoshoshitsu.yaml +++ b/data/custodian/JP-11-KOD-L-K-kamikawamachifureaisentatoshoshitsu.yaml @@ -201,3 +201,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.kamikawa.saitama.jp/shisetu/toshositu/tosho02.html wikidata_official_website: http://www.town.kamikawa.saitama.jp/shisetu/toshositu/tosho02.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:56:11.869217+00:00' + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.town.kamikawa.saitama.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T21:56:11.869217+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 人を育てて まちが育つ 未来につなぐ 住みよい神川 埼玉県 神川町 Kamikawa Town Official Site + - claim_type: favicon_url + claim_value: http://www.town.kamikawa.saitama.jp/smartphone.png + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:56:11.869217+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.town.kamikawa.saitama.jp/material/images/group/1/sp_search_logo.png + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T21:56:11.869217+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KOD-L-K.yaml b/data/custodian/JP-11-KOD-L-K.yaml index aa6f181db0..30a52e377c 100644 --- a/data/custodian/JP-11-KOD-L-K.yaml +++ b/data/custodian/JP-11-KOD-L-K.yaml @@ -201,3 +201,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.kamikawa.saitama.jp/shisetu/toshositu/tosho01.html wikidata_official_website: http://www.town.kamikawa.saitama.jp/shisetu/toshositu/tosho01.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:56:19.514603+00:00' + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.town.kamikawa.saitama.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T21:56:19.514603+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 人を育てて まちが育つ 未来につなぐ 住みよい神川 埼玉県 神川町 Kamikawa Town Official Site + - claim_type: favicon_url + claim_value: http://www.town.kamikawa.saitama.jp/smartphone.png + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:56:19.514603+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.town.kamikawa.saitama.jp/material/images/group/1/sp_search_logo.png + source_url: http://www.town.kamikawa.saitama.jp/shisetsuannai/1444.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T21:56:19.514603+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KOD-L-ML.yaml b/data/custodian/JP-11-KOD-L-ML.yaml index 9cb763675d..92e357b311 100644 --- a/data/custodian/JP-11-KOD-L-ML.yaml +++ b/data/custodian/JP-11-KOD-L-ML.yaml @@ -202,3 +202,37 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.saitama-misato.lg.jp/life/study/library.html wikidata_official_website: http://www.town.saitama-misato.lg.jp/life/study/library.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:56:35.317929+00:00' + source_url: https://www.town.saitama-misato.lg.jp/category/13-0-0-0-0-0-0-0-0-0.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.town.saitama-misato.lg.jp/css/img/head_logo_pc.png + source_url: https://www.town.saitama-misato.lg.jp/category/13-0-0-0-0-0-0-0-0-0.html + css_selector: '#body > div.all > div.design > header.main_header > div.head > + div.head_in > div.h_main > h1.h1 > img' + retrieved_on: '2025-12-23T21:56:35.317929+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 美しい里の町 埼玉県美里町 + - claim_type: favicon_url + claim_value: https://www.town.saitama-misato.lg.jp/category/design_img/favicon.ico + source_url: https://www.town.saitama-misato.lg.jp/category/13-0-0-0-0-0-0-0-0-0.html + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T21:56:35.317929+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.saitama-misato.lg.jp/design_img/ + source_url: https://www.town.saitama-misato.lg.jp/category/13-0-0-0-0-0-0-0-0-0.html + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T21:56:35.317929+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-KOD-L-T.yaml b/data/custodian/JP-11-KOD-L-T.yaml index 54b5650604..f9c61bb8af 100644 --- a/data/custodian/JP-11-KOD-L-T.yaml +++ b/data/custodian/JP-11-KOD-L-T.yaml @@ -201,3 +201,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.kamikawa.saitama.jp/shisetu/toshositu/tosho03.html wikidata_official_website: http://www.town.kamikawa.saitama.jp/shisetu/toshositu/tosho03.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:56:42.734086+00:00' + source_url: http://www.town.kamikawa.saitama.jp/soshiki/kominkan/1731.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.town.kamikawa.saitama.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.town.kamikawa.saitama.jp/soshiki/kominkan/1731.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T21:56:42.734086+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 人を育てて まちが育つ 未来につなぐ 住みよい神川 埼玉県 神川町 Kamikawa Town Official Site + - claim_type: favicon_url + claim_value: http://www.town.kamikawa.saitama.jp/smartphone.png + source_url: http://www.town.kamikawa.saitama.jp/soshiki/kominkan/1731.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T21:56:42.734086+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.town.kamikawa.saitama.jp/material/images/group/1/sp_search_logo.png + source_url: http://www.town.kamikawa.saitama.jp/soshiki/kominkan/1731.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T21:56:42.734086+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KON-L-KL-kawasato_library.yaml b/data/custodian/JP-11-KON-L-KL-kawasato_library.yaml index ec060cb86c..96f51741ef 100644 --- a/data/custodian/JP-11-KON-L-KL-kawasato_library.yaml +++ b/data/custodian/JP-11-KON-L-KL-kawasato_library.yaml @@ -205,3 +205,23 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.kounosu.saitama.jp/ wikidata_official_website: http://lib.city.kounosu.saitama.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:56:51.548160+00:00' + source_url: https://lib.city.kounosu.saitama.jp/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://lib.city.kounosu.saitama.jp/img/common/logo.png + source_url: https://lib.city.kounosu.saitama.jp/index.html + css_selector: '#header > header > div.container > div.d-flex.justify-content-between + > a > img.toplogo.w-md-100' + retrieved_on: '2025-12-23T21:56:51.548160+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鴻巣市立図書館 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-KON-L-KL-konosushiritsufukiage_library.yaml b/data/custodian/JP-11-KON-L-KL-konosushiritsufukiage_library.yaml index 725f1c516c..a695a1f1d9 100644 --- a/data/custodian/JP-11-KON-L-KL-konosushiritsufukiage_library.yaml +++ b/data/custodian/JP-11-KON-L-KL-konosushiritsufukiage_library.yaml @@ -205,3 +205,23 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.kounosu.saitama.jp wikidata_official_website: http://lib.city.kounosu.saitama.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:56:57.045196+00:00' + source_url: https://lib.city.kounosu.saitama.jp/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://lib.city.kounosu.saitama.jp/img/common/logo.png + source_url: https://lib.city.kounosu.saitama.jp/index.html + css_selector: '#header > header > div.container > div.d-flex.justify-content-between + > a > img.toplogo.w-md-100' + retrieved_on: '2025-12-23T21:56:57.045196+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鴻巣市立図書館 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-KON-L-KL.yaml b/data/custodian/JP-11-KON-L-KL.yaml index cd12de5199..17af771a79 100644 --- a/data/custodian/JP-11-KON-L-KL.yaml +++ b/data/custodian/JP-11-KON-L-KL.yaml @@ -205,3 +205,23 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.kounosu.saitama.jp wikidata_official_website: http://lib.city.kounosu.saitama.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:57:03.300509+00:00' + source_url: https://lib.city.kounosu.saitama.jp/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://lib.city.kounosu.saitama.jp/img/common/logo.png + source_url: https://lib.city.kounosu.saitama.jp/index.html + css_selector: '#header > header > div.container > div.d-flex.justify-content-between + > a > img.toplogo.w-md-100' + retrieved_on: '2025-12-23T21:57:03.300509+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鴻巣市立図書館 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-KOS-L-KCL.yaml b/data/custodian/JP-11-KOS-L-KCL.yaml index 335b1098da..6ce1fdaf70 100644 --- a/data/custodian/JP-11-KOS-L-KCL.yaml +++ b/data/custodian/JP-11-KOS-L-KCL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KOS-L-KCL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KOS-L-KCL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KOS-L-KCL ghcid_numeric: 4146725335847529269 valid_from: '2025-12-06T23:38:42.577671+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Koshigaya City Library @@ -193,8 +194,9 @@ wikidata_enrichment: instance_of: &id004 - id: Q7075 label: library - description: institution charged with the care of a collection of literary, musical, artistic, or reference materials, - such as books, manuscripts, recordings, or films + description: institution charged with the care of a collection of literary, + musical, artistic, or reference materials, such as books, manuscripts, recordings, + or films wikidata_instance_of: *id004 wikidata_location: country: &id005 @@ -231,3 +233,22 @@ location: geonames_id: 1858729 geonames_name: Koshigaya feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:57:21.118435+00:00' + source_url: https://lib.city.koshigaya.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.koshigaya.saitama.jp/apple-touch-icon.png + source_url: https://lib.city.koshigaya.saitama.jp + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T21:57:21.118435+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-KOS-L-STJCL.yaml b/data/custodian/JP-11-KOS-L-STJCL.yaml index d82b5953f5..68f7302b95 100644 --- a/data/custodian/JP-11-KOS-L-STJCL.yaml +++ b/data/custodian/JP-11-KOS-L-STJCL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KOS-L-STJCL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KOS-L-STJCL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KOS-L-STJCL ghcid_numeric: 14557978647595188825 valid_from: '2025-12-06T23:38:58.449023+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saitama toho junior college library @@ -187,7 +188,8 @@ wikidata_enrichment: member_of: - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://www.saitamatoho.jp/library wikidata_official_website: http://www.saitamatoho.jp/library @@ -209,3 +211,23 @@ location: geonames_id: 1858729 geonames_name: Koshigaya feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T21:57:39.518224+00:00' + source_url: http://www.saitamatoho.jp/library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.saitamatoho.jp/images/common/logo.png + source_url: http://www.saitamatoho.jp/library + css_selector: '[document] > html > body > header > div.header-inner:nth-of-type(18) + > h1.header-logo > a > img.u-pc' + retrieved_on: '2025-12-23T21:57:39.518224+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 埼玉東萌短期大学 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-KUB-M-SAM.yaml b/data/custodian/JP-11-KUB-M-SAM.yaml index e052746f70..63a399b935 100644 --- a/data/custodian/JP-11-KUB-M-SAM.yaml +++ b/data/custodian/JP-11-KUB-M-SAM.yaml @@ -416,3 +416,28 @@ location: geonames_id: 1858553 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:31.917494+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:03:01.560854+00:00' + source_url: https://www.simose-museum.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.simose-museum.jp/img/icon.png + source_url: https://www.simose-museum.jp + css_selector: '[document] > html.loaded > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T22:03:01.560854+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://simose-museum.jp/img/ogp.png + source_url: https://www.simose-museum.jp + css_selector: '[document] > html.loaded > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:03:01.560854+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KUK-A-KK-kukishi_koubunshokan.yaml b/data/custodian/JP-11-KUK-A-KK-kukishi_koubunshokan.yaml index 83be2b7bf2..d7d02ea8a1 100644 --- a/data/custodian/JP-11-KUK-A-KK-kukishi_koubunshokan.yaml +++ b/data/custodian/JP-11-KUK-A-KK-kukishi_koubunshokan.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KUK-A-KK-kukishi_koubunshokan - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KUK-A-KK-kukishi_koubunshokan valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KUK-A-KK-kukishi_koubunshokan ghcid_numeric: 15004612586312600804 valid_from: '2025-12-06T23:38:29.562115+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KUKISHI KOUBUNSHOKAN @@ -151,3 +152,22 @@ location: geonames_id: 1858445 geonames_name: Kukichūō feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:03:09.855588+00:00' + source_url: https://www.city.kuki.lg.jp/shisetsu/shiyakusyo/kobunsho.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.kuki.lg.jp/_template_/_site_/_default_/_res/favicon.ico + source_url: https://www.city.kuki.lg.jp/shisetsu/shiyakusyo/kobunsho.html + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T22:03:09.855588+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KUK-A-KK.yaml b/data/custodian/JP-11-KUK-A-KK.yaml index cad34cec41..202af35165 100644 --- a/data/custodian/JP-11-KUK-A-KK.yaml +++ b/data/custodian/JP-11-KUK-A-KK.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KUK-A-KK - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KUK-A-KK valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KUK-A-KK ghcid_numeric: 12908457717511834568 valid_from: '2025-12-06T23:35:50.102300+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KUKISHI KOUBUNSHOKAN @@ -151,3 +152,22 @@ location: geonames_id: 1858445 geonames_name: Kukichūō feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:03:18.479006+00:00' + source_url: https://www.city.kuki.lg.jp/shisetsu/shiyakusyo/kobunsho.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.kuki.lg.jp/_template_/_site_/_default_/_res/favicon.ico + source_url: https://www.city.kuki.lg.jp/shisetsu/shiyakusyo/kobunsho.html + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T22:03:18.479006+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KUK-M-CMK.yaml b/data/custodian/JP-11-KUK-M-CMK.yaml index 72bc61bd62..c1c2773ca2 100644 --- a/data/custodian/JP-11-KUK-M-CMK.yaml +++ b/data/custodian/JP-11-KUK-M-CMK.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KUK-M-CMK - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KUK-M-CMK valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KUK-M-CMK ghcid_numeric: 11144982790899539422 valid_from: '2025-12-06T23:38:32.569186+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: COMMUNITY MUSEUM OF KUKI @@ -224,7 +225,8 @@ wikidata_enrichment: founded_by: - id: Q911522 label: Washimiya - description: dissolved municipality in Kitakatsushika district, Saitama prefecture, Japan + description: dissolved municipality in Kitakatsushika district, Saitama prefecture, + Japan wikidata_web: official_website: https://www.city.kuki.lg.jp/miryoku/rekishi_bunkazai/kyodoshiryokan/ wikidata_official_website: https://www.city.kuki.lg.jp/miryoku/rekishi_bunkazai/kyodoshiryokan/ @@ -244,3 +246,28 @@ location: postal_code: 340-0217 street_address: WASHINOMIYA, Kuki Shi, Saitama Ken, 340-0217 normalization_timestamp: '2025-12-09T11:31:31.184131+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:04:20.551590+00:00' + source_url: https://www.city.kuki.lg.jp/miryoku/rekishi_bunkazai/kyodoshiryokan/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.kuki.lg.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.kuki.lg.jp/miryoku/rekishi_bunkazai/kyodoshiryokan/index.html + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T22:04:20.551590+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kuki.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.kuki.lg.jp/miryoku/rekishi_bunkazai/kyodoshiryokan/index.html + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T22:04:20.551590+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-KUK-M-KSP.yaml b/data/custodian/JP-11-KUK-M-KSP.yaml index 9178461c9e..002285eaa1 100644 --- a/data/custodian/JP-11-KUK-M-KSP.yaml +++ b/data/custodian/JP-11-KUK-M-KSP.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KUK-M-KSP - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KUK-M-KSP valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KUK-M-KSP ghcid_numeric: 14775107617642797981 valid_from: '2025-12-06T23:38:32.572207+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KUKI SOUGOUBUNKAKAIKAN PLANETARIUM @@ -151,3 +152,30 @@ location: geonames_id: 1858445 geonames_name: Kukichūō feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:04:30.086376+00:00' + source_url: https://www.kuki-bunka.jp/sogobunka/planetarium + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.kuki-bunka.jp/wp-content/themes/talltrees_v_2/images/logo.png + source_url: https://www.kuki-bunka.jp/sogobunka/planetarium + css_selector: '#top > div.content_inner.clearfix > p.logo > a > img' + retrieved_on: '2025-12-23T22:04:30.086376+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: Tall Trees 久喜 + - claim_type: favicon_url + claim_value: https://www.kuki-bunka.jp/wp-content/themes/talltrees_v_2/images/favicon.ico + source_url: https://www.kuki-bunka.jp/sogobunka/planetarium + css_selector: '[document] > html.js.webkit > head > link' + retrieved_on: '2025-12-23T22:04:30.086376+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-KUM-L-RL.yaml b/data/custodian/JP-11-KUM-L-RL.yaml index 8fae8c6aba..92aabc5ea0 100644 --- a/data/custodian/JP-11-KUM-L-RL.yaml +++ b/data/custodian/JP-11-KUM-L-RL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KUM-L-RL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KUM-L-RL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KUM-L-RL ghcid_numeric: 14418000575374759612 valid_from: '2025-12-06T23:38:55.323336+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: RISSHODAIGAKUJOHOMEDEIASENTAKUMAGAYA Library @@ -204,3 +205,22 @@ location: geonames_id: 1858428 geonames_name: Kumagaya feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:05:02.310495+00:00' + source_url: http://www.ris.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.ris.ac.jp/WSR/logo.png + source_url: http://www.ris.ac.jp/library + css_selector: '[document] > html > body > header > div.inner > h1 > a > img' + retrieved_on: '2025-12-23T22:05:02.310495+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 立正大学図書館サイト + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-KUM-L-SL.yaml b/data/custodian/JP-11-KUM-L-SL.yaml index a1b1174560..a8cd30a1e8 100644 --- a/data/custodian/JP-11-KUM-L-SL.yaml +++ b/data/custodian/JP-11-KUM-L-SL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-KUM-L-SL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-KUM-L-SL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-KUM-L-SL ghcid_numeric: 1760980764687028532 valid_from: '2025-12-06T23:38:59.519775+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAITAMAKENRITSUKUMAGAYATENJI Library @@ -151,3 +152,24 @@ location: geonames_id: 1858428 geonames_name: Kumagaya feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:05:15.819107+00:00' + source_url: http://www.normanet.ne.jp/~kumatten + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.normanet.ne.jp/wp-content/uploads/2024/03/アセット-1@2x.png + source_url: http://www.normanet.ne.jp/~kumatten + css_selector: '[document] > html > body.home.blog > div.wp-site-blocks > header.wp-block-template-part + > div.wp-block-group.alignwide > div.wp-block-group.alignfull > div.wp-block-group.site-title-conteiner + > div.wp-block-site-logo > a.custom-logo-link > img.custom-logo' + retrieved_on: '2025-12-23T22:05:15.819107+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 障害者情報ネットワークノーマネット + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-MIN-L-ML.yaml b/data/custodian/JP-11-MIN-L-ML.yaml index 50728da567..483d5f3b22 100644 --- a/data/custodian/JP-11-MIN-L-ML.yaml +++ b/data/custodian/JP-11-MIN-L-ML.yaml @@ -230,3 +230,28 @@ location: postal_code: 345-0801 street_address: 1139 MOMMA, Minamisaitama Gun Miyashiro Machi, Saitama Ken, 345-0801 normalization_timestamp: '2025-12-09T11:32:22.168076+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:06:19.485524+00:00' + source_url: https://lib-miyashiro.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib-miyashiro.jp/wpfiles/wp-content/uploads/2021/08/cropped-favicon-1-180x180.png + source_url: https://lib-miyashiro.jp + css_selector: '[document] > html > head > link:nth-of-type(49)' + retrieved_on: '2025-12-23T22:06:19.485524+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://lib-miyashiro.jp/wpfiles/wp-content/uploads/2021/08/default-1-1024x408.png + source_url: https://lib-miyashiro.jp + css_selector: '[document] > html > head > meta:nth-of-type(21)' + retrieved_on: '2025-12-23T22:06:19.485524+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-11-MIN-L-NITL.yaml b/data/custodian/JP-11-MIN-L-NITL.yaml index 6d0c7fcb34..dbc80aec8a 100644 --- a/data/custodian/JP-11-MIN-L-NITL.yaml +++ b/data/custodian/JP-11-MIN-L-NITL.yaml @@ -20,7 +20,8 @@ original_entry: city: Minamisaitama Gun Miyashiro Machi region: Saitama Ken postal_code: 345-8501 - street_address: 4-1 GAKUENDAI, Minamisaitama Gun Miyashiro Machi, Saitama Ken, 345-8501 + street_address: 4-1 GAKUENDAI, Minamisaitama Gun Miyashiro Machi, Saitama Ken, + 345-8501 processing_timestamp: '2025-12-06T23:38:54.617515+00:00' ghcid: ghcid_current: JP-11-MIN-L-NITL @@ -37,13 +38,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-MIN-L-NITL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-MIN-L-NITL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-MIN-L-NITL ghcid_numeric: 10600405391416075973 valid_from: '2025-12-06T23:38:54.617515+00:00' @@ -101,8 +103,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Nippon Institute of Technology Library @@ -217,3 +219,22 @@ location: geonames_id: 11809254 geonames_name: Miyashiro feature_code: PPLX +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:06:27.771103+00:00' + source_url: https://library.nit.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://library.nit.ac.jp/nc_favicon.ico?1653452542 + source_url: https://library.nit.ac.jp + css_selector: '[document] > html.ng-scope > head > link' + retrieved_on: '2025-12-23T22:06:27.771103+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-MIN-M-MITNIT.yaml b/data/custodian/JP-11-MIN-M-MITNIT.yaml index bd66f346f9..74778d4720 100644 --- a/data/custodian/JP-11-MIN-M-MITNIT.yaml +++ b/data/custodian/JP-11-MIN-M-MITNIT.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-MIN-M-MITNIT - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-MIN-M-MITNIT valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-MIN-M-MITNIT ghcid_numeric: 10520244838649592070 valid_from: '2025-12-06T23:38:32.668355+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: MUSEUM OF INDUSTRIAL TECHNOLOGY, NIPPON INSTITUTE OF TECHNOLOGY @@ -153,3 +154,22 @@ location: geonames_id: 11809254 geonames_name: Miyashiro feature_code: PPLX +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:06:36.369449+00:00' + source_url: https://museum.nit.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://museum.nit.ac.jp/images/common/logo.png + source_url: https://museum.nit.ac.jp + css_selector: '#home > header > div.header-inner.w-1200 > h1 > a > img' + retrieved_on: '2025-12-23T22:06:36.369449+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 工業技術博物館 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-MIN-M-MTM.yaml b/data/custodian/JP-11-MIN-M-MTM.yaml index 5b6efcfb76..1c2708a385 100644 --- a/data/custodian/JP-11-MIN-M-MTM.yaml +++ b/data/custodian/JP-11-MIN-M-MTM.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-MIN-M-MTM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-MIN-M-MTM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-MIN-M-MTM ghcid_numeric: 12926755921507861811 valid_from: '2025-12-06T23:38:32.663455+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: MIYASHIRO TOWN MUSEUM @@ -153,3 +154,28 @@ location: geonames_id: 11809254 geonames_name: Miyashiro feature_code: PPLX +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:06:47.142260+00:00' + source_url: https://www.town.miyashiro.lg.jp/category/10-0-0-0-0.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.miyashiro.lg.jp/category/design_img/favicon.ico + source_url: https://www.town.miyashiro.lg.jp/category/10-0-0-0-0.html + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T22:06:47.142260+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.miyashiro.lg.jp/design_img/ + source_url: https://www.town.miyashiro.lg.jp/category/10-0-0-0-0.html + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T22:06:47.142260+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-MIN-M-TZP.yaml b/data/custodian/JP-11-MIN-M-TZP.yaml index 2706a32453..af6f8dea9b 100644 --- a/data/custodian/JP-11-MIN-M-TZP.yaml +++ b/data/custodian/JP-11-MIN-M-TZP.yaml @@ -1027,3 +1027,36 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/o6K5KJ2FrSo/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:06:59.015773+00:00' + source_url: https://www.tobuzoo.com + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tobuzoo.com/_assets/images/common/logo_zoo_xmas.png + source_url: https://www.tobuzoo.com + css_selector: '#js-header-logo > a > img' + retrieved_on: '2025-12-23T22:06:59.015773+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 東武動物公園 + - claim_type: favicon_url + claim_value: https://www.tobuzoo.com/_assets/images/common/apple-touch-icon.png + source_url: https://www.tobuzoo.com + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T22:06:59.015773+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.tobuzoo.com/_assets/images/common/ogimage.png + source_url: https://www.tobuzoo.com + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T22:06:59.015773+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-MIS-L-MCL.yaml b/data/custodian/JP-11-MIS-L-MCL.yaml index 801203bfc3..827e6c9861 100644 --- a/data/custodian/JP-11-MIS-L-MCL.yaml +++ b/data/custodian/JP-11-MIS-L-MCL.yaml @@ -205,3 +205,20 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.misato.saitama.jp/ wikidata_official_website: http://www.lib.misato.saitama.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:07:09.115766+00:00' + source_url: https://www.lib.misato.saitama.jp/index.html + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.lib.misato.saitama.jp/images/ogimage.png + source_url: https://www.lib.misato.saitama.jp/index.html + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T22:07:09.115766+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-MIS-L-MHL.yaml b/data/custodian/JP-11-MIS-L-MHL.yaml index fdd1237f33..c9e1722c51 100644 --- a/data/custodian/JP-11-MIS-L-MHL.yaml +++ b/data/custodian/JP-11-MIS-L-MHL.yaml @@ -205,3 +205,20 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.misato.saitama.jp/ wikidata_official_website: http://www.lib.misato.saitama.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:07:18.495265+00:00' + source_url: https://www.lib.misato.saitama.jp/index.html + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.lib.misato.saitama.jp/images/ogimage.png + source_url: https://www.lib.misato.saitama.jp/index.html + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T22:07:18.495265+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-MIS-L-MWL.yaml b/data/custodian/JP-11-MIS-L-MWL.yaml index fbc144d879..56cdeb5df9 100644 --- a/data/custodian/JP-11-MIS-L-MWL.yaml +++ b/data/custodian/JP-11-MIS-L-MWL.yaml @@ -205,3 +205,20 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.misato.saitama.jp/ wikidata_official_website: http://www.lib.misato.saitama.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:07:27.750322+00:00' + source_url: https://www.lib.misato.saitama.jp/index.html + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.lib.misato.saitama.jp/images/ogimage.png + source_url: https://www.lib.misato.saitama.jp/index.html + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T22:07:27.750322+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-MIY-L-KTL-kamimine_town_library.yaml b/data/custodian/JP-11-MIY-L-KTL-kamimine_town_library.yaml index 754dc41386..88d8c9107e 100644 --- a/data/custodian/JP-11-MIY-L-KTL-kamimine_town_library.yaml +++ b/data/custodian/JP-11-MIY-L-KTL-kamimine_town_library.yaml @@ -211,3 +211,20 @@ wikidata_enrichment: wikidata_media: image: Kamimine Town Furusato Gakkan.jpg wikidata_image: Kamimine Town Furusato Gakkan.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:07:43.740386+00:00' + source_url: https://kamimine.milib.jp/toshow/asp/index.aspx + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://kamimine.milib.jp/toshow/asp/shared/img/snsThumbnail.png + source_url: https://kamimine.milib.jp/toshow/asp/index.aspx + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-23T22:07:43.740386+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-MIY-L-MTL.yaml b/data/custodian/JP-11-MIY-L-MTL.yaml index 6ebfae1cc4..09ab003aea 100644 --- a/data/custodian/JP-11-MIY-L-MTL.yaml +++ b/data/custodian/JP-11-MIY-L-MTL.yaml @@ -209,3 +209,30 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.town.miyaki.lg.jp/ wikidata_official_website: http://lib.town.miyaki.lg.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:07:56.819384+00:00' + source_url: http://lib.town.miyaki.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://lib.town.miyaki.lg.jp/img/logo.png + source_url: http://lib.town.miyaki.lg.jp + css_selector: '[document] > html > body > header > h1.logo > img' + retrieved_on: '2025-12-23T22:07:56.819384+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: みやき町立図書館 + - claim_type: favicon_url + claim_value: http://lib.town.miyaki.lg.jp/img/favicon.ico + source_url: http://lib.town.miyaki.lg.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T22:07:56.819384+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-MIY-M-ISTH.yaml b/data/custodian/JP-11-MIY-M-ISTH.yaml index 716aea2bdc..3440091f0d 100644 --- a/data/custodian/JP-11-MIY-M-ISTH.yaml +++ b/data/custodian/JP-11-MIY-M-ISTH.yaml @@ -361,3 +361,22 @@ location: geonames_id: 1856798 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:31.990944+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:08:05.450197+00:00' + source_url: http://www.itsukushimajinja.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.itsukushimajinja.jp/jp/images/000_common/icon/favicon.ico + source_url: http://www.itsukushimajinja.jp + css_selector: '[document] > html.Windows.Pc > head > link' + retrieved_on: '2025-12-23T22:08:05.450197+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-NAK-M-ISMH.yaml b/data/custodian/JP-11-NAK-M-ISMH.yaml index 3fcfb6c56e..4f5a4b09a9 100644 --- a/data/custodian/JP-11-NAK-M-ISMH.yaml +++ b/data/custodian/JP-11-NAK-M-ISMH.yaml @@ -362,3 +362,28 @@ location: geonames_id: 1855809 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:32.031174+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:08:27.556950+00:00' + source_url: https://kanko-innoshima.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://kanko-innoshima.jp/wp/wp-content/uploads/2017/07/cropped-sign-180x180.png + source_url: https://kanko-innoshima.jp + css_selector: '[document] > html > head > link:nth-of-type(27)' + retrieved_on: '2025-12-23T22:08:27.556950+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://kanko-innoshima.jp/wp/wp-content/uploads/2017/03/hassakun.png + source_url: https://kanko-innoshima.jp + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T22:08:27.556950+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-11-NII-L-FL.yaml b/data/custodian/JP-11-NII-L-FL.yaml index 13aed7d658..24f3f5870f 100644 --- a/data/custodian/JP-11-NII-L-FL.yaml +++ b/data/custodian/JP-11-NII-L-FL.yaml @@ -200,3 +200,20 @@ wikidata_enrichment: wikidata_web: official_website: https://www.lib.niiza.saitama.jp/contents?1&pid=32 wikidata_official_website: https://www.lib.niiza.saitama.jp/contents?1&pid=32 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:08:46.394341+00:00' + source_url: https://www.lib.niiza.saitama.jp/contents?1&pid=32 + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.lib.niiza.saitama.jp/images/ogimage.png + source_url: https://www.lib.niiza.saitama.jp/contents?1&pid=32 + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T22:08:46.394341+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-NII-L-JL.yaml b/data/custodian/JP-11-NII-L-JL.yaml index 61e76fcf61..f85feaf62a 100644 --- a/data/custodian/JP-11-NII-L-JL.yaml +++ b/data/custodian/JP-11-NII-L-JL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-NII-L-JL - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-NII-L-JL valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-NII-L-JL ghcid_numeric: 1538740627176284476 valid_from: '2025-12-06T23:38:54.635550+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: JUMONJIGAKUENJOSHIDAIGAKUTOSHOJOHOSENTA Library @@ -204,3 +205,28 @@ location: geonames_id: 6822138 geonames_name: Niiza feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:08:55.471159+00:00' + source_url: http://www.jumonji-u.ac.jp/outline/center/lib-it/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.jumonji-u.ac.jp/common/image/app-icon.png + source_url: http://www.jumonji-u.ac.jp/outline/center/lib-it/index.html + css_selector: '[document] > html.js > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:08:55.471159+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.jumonji-u.ac.jp/common/image/sns-icon.jpg + source_url: http://www.jumonji-u.ac.jp/outline/center/lib-it/index.html + css_selector: '[document] > html.js > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:08:55.471159+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-NII-L-NL.yaml b/data/custodian/JP-11-NII-L-NL.yaml index 79cd3a0994..fd780f0aa1 100644 --- a/data/custodian/JP-11-NII-L-NL.yaml +++ b/data/custodian/JP-11-NII-L-NL.yaml @@ -201,3 +201,20 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.niiza.saitama.jp wikidata_official_website: http://www.lib.niiza.saitama.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:09:05.841658+00:00' + source_url: https://www.lib.niiza.saitama.jp/contents?3&pid=31 + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.lib.niiza.saitama.jp/images/ogimage.png + source_url: https://www.lib.niiza.saitama.jp/contents?3&pid=31 + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T22:09:05.841658+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-NII-M-AUAKMM.yaml b/data/custodian/JP-11-NII-M-AUAKMM.yaml index e245545e47..7dbbdc4673 100644 --- a/data/custodian/JP-11-NII-M-AUAKMM.yaml +++ b/data/custodian/JP-11-NII-M-AUAKMM.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-NII-M-AUAKMM - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-NII-M-AUAKMM valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-NII-M-AUAKMM ghcid_numeric: 9652165022558405931 valid_from: '2025-12-06T23:38:32.552057+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: ATOMI UNIVERSITY ATOMI KAKEI MEMORIAL MUSEUM @@ -151,3 +152,28 @@ location: geonames_id: 6822138 geonames_name: Niiza feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:10:20.134050+00:00' + source_url: https://www.atomi.ac.jp/univ/museum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.atomi.ac.jp/univ/img/common/apple-touch-icon.png + source_url: https://www.atomi.ac.jp/univ/museum + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T22:10:20.134050+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.atomi.ac.jp/univ/resources/img/common/ogimage.png + source_url: https://www.atomi.ac.jp/univ/museum + css_selector: '[document] > html > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-23T22:10:20.134050+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-NIS-M-ACM.yaml b/data/custodian/JP-11-NIS-M-ACM.yaml index 97badf2ff3..d4c4a092db 100644 --- a/data/custodian/JP-11-NIS-M-ACM.yaml +++ b/data/custodian/JP-11-NIS-M-ACM.yaml @@ -270,3 +270,28 @@ location: postal_code: 844-0004 street_address: ODARU, Nishimatsura Gun Arita Cho, Saga Ken, 844-0004 normalization_timestamp: '2025-12-09T11:30:52.819828+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:11:39.046037+00:00' + source_url: https://www.town.arita.lg.jp/main/828.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.arita.lg.jp/dynamic/favicon.ico + source_url: https://www.town.arita.lg.jp/main/828.html + css_selector: '#ctl00_Head1 > link:nth-of-type(14)' + retrieved_on: '2025-12-23T22:11:39.046037+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.arita.lg.jp/dynamic/common/images/ogp/og_image.png + source_url: https://www.town.arita.lg.jp/main/828.html + css_selector: '#ctl00_Head1 > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T22:11:39.046037+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-NIS-M-APP.yaml b/data/custodian/JP-11-NIS-M-APP.yaml index 2a78672a61..f6e3e84c6b 100644 --- a/data/custodian/JP-11-NIS-M-APP.yaml +++ b/data/custodian/JP-11-NIS-M-APP.yaml @@ -243,3 +243,30 @@ wikidata_enrichment: image: Zwinger Palace (Arita Porcelain Park).jpg commons_category: Arita Porcelain Park wikidata_image: Zwinger Palace (Arita Porcelain Park).jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:11:45.631613+00:00' + source_url: https://www.arita-touki.com + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.arita-touki.com/wp-content/themes/arita-touki/images/common/logo_sp.png + source_url: https://www.arita-touki.com + css_selector: '#header > div.head > div.pn.front > a > img' + retrieved_on: '2025-12-23T22:11:45.631613+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 株式会社香蘭社 有田ポーセリンパーク + - claim_type: favicon_url + claim_value: https://www.arita-touki.com/wp-content/themes/arita-touki/apple-touch-icon.png + source_url: https://www.arita-touki.com + css_selector: '[document] > html.js.csstransitions > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T22:11:45.631613+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 152x152 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-NIS-M-CP.yaml b/data/custodian/JP-11-NIS-M-CP.yaml index 9811485de0..a27b94ab86 100644 --- a/data/custodian/JP-11-NIS-M-CP.yaml +++ b/data/custodian/JP-11-NIS-M-CP.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-NIS-M-CP - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-NIS-M-CP valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-NIS-M-CP ghcid_numeric: 13259454459481359025 valid_from: '2025-12-06T23:38:40.084695+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: CHINA ON THE PARK @@ -153,3 +154,30 @@ location: geonames_id: 1865103 geonames_name: Arita feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:11:53.255733+00:00' + source_url: https://www.fukagawa-seiji.co.jp/shops/chinaonthepark + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.fukagawa-seiji.co.jp/assets/favicon.svg + source_url: https://www.fukagawa-seiji.co.jp/shops/chinaonthepark + css_selector: '[document] > html.Chrome.wf-adobe-garamond-pro-n4-active > head + > link:nth-of-type(2)' + retrieved_on: '2025-12-23T22:11:53.255733+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.fukagawa-seiji.co.jp/assets/ogp.png + source_url: https://www.fukagawa-seiji.co.jp/shops/chinaonthepark + css_selector: '[document] > html.Chrome.wf-adobe-garamond-pro-n4-active > head + > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T22:11:53.255733+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-NIS-M-HCWM.yaml b/data/custodian/JP-11-NIS-M-HCWM.yaml index 7d45ff74c0..a4428c24b1 100644 --- a/data/custodian/JP-11-NIS-M-HCWM.yaml +++ b/data/custodian/JP-11-NIS-M-HCWM.yaml @@ -407,3 +407,22 @@ location: geonames_id: 1855300 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:32.081358+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:12:02.964329+00:00' + source_url: https://www.water.city.hiroshima.lg.jp/site/siryou + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.water.city.hiroshima.lg.jp/apple-touch-icon.png + source_url: https://www.water.city.hiroshima.lg.jp/site/siryou + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:12:02.964329+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-NIS-M-HFMA.yaml b/data/custodian/JP-11-NIS-M-HFMA.yaml index 485fcb5b29..a3760d3cd5 100644 --- a/data/custodian/JP-11-NIS-M-HFMA.yaml +++ b/data/custodian/JP-11-NIS-M-HFMA.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-NIS-M-HFMA - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-NIS-M-HFMA valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-NIS-M-HFMA ghcid_numeric: 3887953209171894081 valid_from: '2025-12-06T23:38:40.069147+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: HISTORY AND FOLKLORE MUSEUM OF ARITA @@ -276,3 +277,30 @@ location: postal_code: 844-0001 street_address: IZUMIYAMA, Nishimatsura Gun Arita Cho, Saga Ken, 844-0001 normalization_timestamp: '2025-12-09T11:31:08.461756+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:12:15.007976+00:00' + source_url: https://www.town.arita.lg.jp/main/169.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.arita.lg.jp/dynamic/rekishi/favicon.ico + source_url: https://www.town.arita.lg.jp/main/169.html + css_selector: '[document] > html.fontawesome-i2svg-active.fontawesome-i2svg-complete + > head > link:nth-of-type(10)' + retrieved_on: '2025-12-23T22:12:15.007976+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.town.arita.lg.jp/dynamic/rekishi/common/images/ogp/og_image.png + source_url: https://www.town.arita.lg.jp/main/169.html + css_selector: '[document] > html.fontawesome-i2svg-active.fontawesome-i2svg-complete + > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T22:12:15.007976+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-NIS-M-HFMAW.yaml b/data/custodian/JP-11-NIS-M-HFMAW.yaml index e56b30108c..1719559b38 100644 --- a/data/custodian/JP-11-NIS-M-HFMAW.yaml +++ b/data/custodian/JP-11-NIS-M-HFMAW.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-NIS-M-HFMAW - valid_from: "2025-12-10T09:43:54Z" + valid_from: '2025-12-10T09:43:54Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-NIS-M-HFMAW valid_from: null - valid_to: "2025-12-10T09:43:54Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:54Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-NIS-M-HFMAW ghcid_numeric: 16706623277202826625 valid_from: '2025-12-06T23:38:40.082019+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: HISTORY AND FOLKLORE MUSEUM OF ARITA (WEST) @@ -215,3 +216,28 @@ location: geonames_id: 1865103 geonames_name: Arita feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:12:28.070893+00:00' + source_url: https://www.town.arita.lg.jp/main/3081.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.arita.lg.jp/dynamic/favicon.ico + source_url: https://www.town.arita.lg.jp/main/3081.html + css_selector: '#ctl00_Head1 > link:nth-of-type(14)' + retrieved_on: '2025-12-23T22:12:28.070893+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.arita.lg.jp/dynamic/common/images/ogp/og_image.png + source_url: https://www.town.arita.lg.jp/main/3081.html + css_selector: '#ctl00_Head1 > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T22:12:28.070893+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-NIS-M-KCM.yaml b/data/custodian/JP-11-NIS-M-KCM.yaml index b28cbfd59e..1a2bc2f332 100644 --- a/data/custodian/JP-11-NIS-M-KCM.yaml +++ b/data/custodian/JP-11-NIS-M-KCM.yaml @@ -262,3 +262,37 @@ wikidata_enrichment: - id: Q9109104 label: Yoshichika Uchida description: Japanese architect +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:12:45.166180+00:00' + source_url: https://saga-museum.jp/ceramic + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://saga-museum.jp/all-common/img/logo_saga.png + source_url: https://saga-museum.jp/ceramic + css_selector: '#page > header > div.header_top > div.container > div.header_top_left + > ul.yoko_list.yoko_list_line > li > a.externalLink > img' + retrieved_on: '2025-12-23T22:12:45.166180+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 佐賀県 + - claim_type: favicon_url + claim_value: https://saga-museum.jp/ceramic/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/ceramic + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:12:45.166180+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://saga-museum.jp/ceramic/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/ceramic + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:12:45.166180+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-OKE-L-OCL.yaml b/data/custodian/JP-11-OKE-L-OCL.yaml index a1b3d0bbab..c15f9b3dcb 100644 --- a/data/custodian/JP-11-OKE-L-OCL.yaml +++ b/data/custodian/JP-11-OKE-L-OCL.yaml @@ -205,3 +205,30 @@ wikidata_enrichment: wikidata_web: official_website: https://www.okegawa-library.jp/TOSHOW/asp/index.aspx wikidata_official_website: https://www.okegawa-library.jp/TOSHOW/asp/index.aspx +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:13:28.687782+00:00' + source_url: https://www.okegawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/images/logo.png + source_url: https://www.okegawa-library.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-23T22:13:28.687782+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 桶川市図書館 + - claim_type: favicon_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/favicon.ico + source_url: https://www.okegawa-library.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T22:13:28.687782+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-OKE-L-OKL.yaml b/data/custodian/JP-11-OKE-L-OKL.yaml index ef16bb95c8..1dc156b51f 100644 --- a/data/custodian/JP-11-OKE-L-OKL.yaml +++ b/data/custodian/JP-11-OKE-L-OKL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-OKE-L-OKL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-OKE-L-OKL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-OKE-L-OKL ghcid_numeric: 16349975457109521423 valid_from: '2025-12-06T23:38:52.872564+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: OKEGAWA-KAWATAYA Library @@ -151,3 +152,30 @@ location: geonames_id: 1854371 geonames_name: Okegawa feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:13:39.001474+00:00' + source_url: https://www.okegawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/images/logo.png + source_url: https://www.okegawa-library.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-23T22:13:39.001474+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 桶川市図書館 + - claim_type: favicon_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/favicon.ico + source_url: https://www.okegawa-library.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T22:13:39.001474+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-OKE-L-OL-okegawashiritusakata_library.yaml b/data/custodian/JP-11-OKE-L-OL-okegawashiritusakata_library.yaml index a70dbaa963..0c67b4f851 100644 --- a/data/custodian/JP-11-OKE-L-OL-okegawashiritusakata_library.yaml +++ b/data/custodian/JP-11-OKE-L-OL-okegawashiritusakata_library.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-OKE-L-OL-okegawashiritusakata_library - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-OKE-L-OL-okegawashiritusakata_library valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-OKE-L-OL-okegawashiritusakata_library ghcid_numeric: 15123271162023875596 valid_from: '2025-12-06T23:38:52.874891+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: OkegawashirituSakata Library @@ -151,3 +152,30 @@ location: geonames_id: 1854371 geonames_name: Okegawa feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:13:47.804060+00:00' + source_url: https://www.okegawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/images/logo.png + source_url: https://www.okegawa-library.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-23T22:13:47.804060+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 桶川市図書館 + - claim_type: favicon_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/favicon.ico + source_url: https://www.okegawa-library.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T22:13:47.804060+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-OKE-L-OL.yaml b/data/custodian/JP-11-OKE-L-OL.yaml index 41f95007dc..f457882fa4 100644 --- a/data/custodian/JP-11-OKE-L-OL.yaml +++ b/data/custodian/JP-11-OKE-L-OL.yaml @@ -206,3 +206,30 @@ wikidata_enrichment: wikidata_web: official_website: https://www.okegawa-library.jp/TOSHOW/asp/index.aspx wikidata_official_website: https://www.okegawa-library.jp/TOSHOW/asp/index.aspx +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:13:57.522084+00:00' + source_url: https://www.okegawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/images/logo.png + source_url: https://www.okegawa-library.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-23T22:13:57.522084+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 桶川市図書館 + - claim_type: favicon_url + claim_value: https://www.okegawa-library.jp/themes/lib_theme/favicon.ico + source_url: https://www.okegawa-library.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T22:13:57.522084+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-OKE-M-OCFHMH.yaml b/data/custodian/JP-11-OKE-M-OCFHMH.yaml index dc6b5b8a62..07cb320b13 100644 --- a/data/custodian/JP-11-OKE-M-OCFHMH.yaml +++ b/data/custodian/JP-11-OKE-M-OCFHMH.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-OKE-M-OCFHMH - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-OKE-M-OCFHMH valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-OKE-M-OCFHMH ghcid_numeric: 12241254824419276294 valid_from: '2025-12-06T23:38:32.562472+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Okegawa city folk historical material hall @@ -219,3 +220,36 @@ location: postal_code: 363-0027 street_address: KAWATAYA, Okegawa Shi, Saitama Ken, 363-0027 normalization_timestamp: '2025-12-09T11:30:37.763549+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:14:06.298532+00:00' + source_url: https://www.city.okegawa.lg.jp/shiminkatsudo/shogaigakushu/lifelonglearningcenter/3347.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.okegawa.lg.jp/theme/base/img_sub/pc_header_logo.png + source_url: https://www.city.okegawa.lg.jp/shiminkatsudo/shogaigakushu/lifelonglearningcenter/3347.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T22:14:06.298532+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 埼玉県 桶川市 Okegawa City Official WebSite + - claim_type: favicon_url + claim_value: https://www.city.okegawa.lg.jp/smartphone.png + source_url: https://www.city.okegawa.lg.jp/shiminkatsudo/shogaigakushu/lifelonglearningcenter/3347.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:14:06.298532+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.okegawa.lg.jp/material/images/group/3/notfound_20200625.jpg + source_url: https://www.city.okegawa.lg.jp/shiminkatsudo/shogaigakushu/lifelonglearningcenter/3347.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:14:06.298532+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-ONO-M-OHM.yaml b/data/custodian/JP-11-ONO-M-OHM.yaml index 2040a28ae0..4014644d00 100644 --- a/data/custodian/JP-11-ONO-M-OHM.yaml +++ b/data/custodian/JP-11-ONO-M-OHM.yaml @@ -452,3 +452,22 @@ location: geonames_id: 1853992 feature_code: PPLA2 normalization_timestamp: '2025-12-09T06:53:32.129514+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:14:15.037972+00:00' + source_url: https://www.city.onomichi.hiroshima.jp/soshiki/7/4035.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.onomichi.hiroshima.jp/apple-touch-icon.png + source_url: https://www.city.onomichi.hiroshima.jp/soshiki/7/4035.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:14:15.037972+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-OSA-M-HM.yaml b/data/custodian/JP-11-OSA-M-HM.yaml index bbeb984460..fdcd5ed961 100644 --- a/data/custodian/JP-11-OSA-M-HM.yaml +++ b/data/custodian/JP-11-OSA-M-HM.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-OSA-M-HM - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-OSA-M-HM valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-OSA-M-HM ghcid_numeric: 10840501989842240417 valid_from: '2025-12-06T23:38:32.661181+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: HACHIGATAJYO MUSEUM @@ -153,3 +154,22 @@ location: geonames_id: 1848243 geonames_name: Yorii feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:14:29.588526+00:00' + source_url: https://www.town.yorii.saitama.jp/site/rekishikan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.yorii.saitama.jp/apple-touch-icon.png + source_url: https://www.town.yorii.saitama.jp/site/rekishikan + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:14:29.588526+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAG-A-SPGA-saga_prefectural_government_archives.yaml b/data/custodian/JP-11-SAG-A-SPGA-saga_prefectural_government_archives.yaml index b1bcec92a1..3e2a6af6c1 100644 --- a/data/custodian/JP-11-SAG-A-SPGA-saga_prefectural_government_archives.yaml +++ b/data/custodian/JP-11-SAG-A-SPGA-saga_prefectural_government_archives.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-A-SPGA-saga_prefectural_government_archives - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-A-SPGA-saga_prefectural_government_archives valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-A-SPGA-saga_prefectural_government_archives ghcid_numeric: 11394472083572261265 valid_from: '2025-12-06T23:38:29.675662+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saga Prefectural Government Archives @@ -151,3 +152,28 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:14:36.605059+00:00' + source_url: https://www.pref.saga.lg.jp/kiji00327911/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.pref.saga.lg.jp/dynamic/favicon.ico + source_url: https://www.pref.saga.lg.jp/kiji00327911/index.html + css_selector: '#ctl00_Head1 > link:nth-of-type(11)' + retrieved_on: '2025-12-23T22:14:36.605059+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.pref.saga.lg.jp/dynamic/common/images/ogp/og_image.gif + source_url: https://www.pref.saga.lg.jp/kiji00327911/index.html + css_selector: '#ctl00_Head1 > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T22:14:36.605059+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-A-SPGA.yaml b/data/custodian/JP-11-SAG-A-SPGA.yaml index bc6e281790..6d7562e701 100644 --- a/data/custodian/JP-11-SAG-A-SPGA.yaml +++ b/data/custodian/JP-11-SAG-A-SPGA.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-A-SPGA - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-A-SPGA valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-A-SPGA ghcid_numeric: 160045087370444001 valid_from: '2025-12-06T23:35:50.248843+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saga Prefectural Government Archives @@ -151,3 +152,28 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:14:44.760364+00:00' + source_url: https://www.pref.saga.lg.jp/kiji00327911/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.pref.saga.lg.jp/dynamic/favicon.ico + source_url: https://www.pref.saga.lg.jp/kiji00327911/index.html + css_selector: '#ctl00_Head1 > link:nth-of-type(11)' + retrieved_on: '2025-12-23T22:14:44.760364+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.pref.saga.lg.jp/dynamic/common/images/ogp/og_image.gif + source_url: https://www.pref.saga.lg.jp/kiji00327911/index.html + css_selector: '#ctl00_Head1 > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T22:14:44.760364+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-L-S.yaml b/data/custodian/JP-11-SAG-L-S.yaml index c7864660cc..4a5fdac520 100644 --- a/data/custodian/JP-11-SAG-L-S.yaml +++ b/data/custodian/JP-11-SAG-L-S.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-S - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-S valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-S ghcid_numeric: 3950836815932115724 valid_from: '2025-12-06T23:38:59.437885+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGAKENKYOIKUSENTATOSHOSHIRYOSHITSU @@ -151,3 +152,22 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:15:06.390577+00:00' + source_url: https://www.saga-ed.jp/%E3%82%BB%E3%83%B3%E3%82%BF%E3%83%BC%E5%88%A9%E7%94%A8%E3%81%AE%E7%9A%86%E6%A7%98%E3%81%B8/%E5%9B%B3%E6%9B%B8%E8%B3%87%E6%96%99%E5%AE%A4 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.saga-ed.jp/wp-content/uploads/2020/06/spec.png + source_url: https://www.saga-ed.jp/%E3%82%BB%E3%83%B3%E3%82%BF%E3%83%BC%E5%88%A9%E7%94%A8%E3%81%AE%E7%9A%86%E6%A7%98%E3%81%B8/%E5%9B%B3%E6%9B%B8%E8%B3%87%E6%96%99%E5%AE%A4 + css_selector: '[document] > html > head > link:nth-of-type(29)' + retrieved_on: '2025-12-23T22:15:06.390577+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-L-SCPL.yaml b/data/custodian/JP-11-SAG-L-SCPL.yaml index 7c9df637c0..2c79f773b5 100644 --- a/data/custodian/JP-11-SAG-L-SCPL.yaml +++ b/data/custodian/JP-11-SAG-L-SCPL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SCPL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SCPL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SCPL ghcid_numeric: 2916782218939642450 valid_from: '2025-12-06T23:38:47.900853+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saga City Public Library @@ -275,3 +276,22 @@ location: postal_code: 840-0815 street_address: 3-2-15 TENJIN, Saga Shi, Saga Ken, 840-0815 normalization_timestamp: '2025-12-09T11:30:36.790371+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:15:15.414051+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:15:15.414051+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SL.yaml b/data/custodian/JP-11-SAG-L-SL.yaml index f086bca124..579ae8f6d4 100644 --- a/data/custodian/JP-11-SAG-L-SL.yaml +++ b/data/custodian/JP-11-SAG-L-SL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SL ghcid_numeric: 12125489200035075272 valid_from: '2025-12-06T23:38:59.664914+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGAKENRITSUTENJI Library @@ -151,3 +152,28 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:15:27.125576+00:00' + source_url: http://sagaten.sakura.ne.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://sagaten.jp/acms/wp-content/themes/sagaten/favicon/apple-touch-icon.png + source_url: http://sagaten.sakura.ne.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:15:27.125576+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://sagaten.jp/images/ogp.png + source_url: http://sagaten.sakura.ne.jp + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T22:15:27.125576+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAG-L-SLF.yaml b/data/custodian/JP-11-SAG-L-SLF.yaml index 519194043c..44c9925c1f 100644 --- a/data/custodian/JP-11-SAG-L-SLF.yaml +++ b/data/custodian/JP-11-SAG-L-SLF.yaml @@ -222,3 +222,22 @@ wikidata_enrichment: wikidata_media: image: Saga Forester Fuji.jpg wikidata_image: Saga Forester Fuji.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:15:35.706377+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:15:35.706377+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SLH.yaml b/data/custodian/JP-11-SAG-L-SLH.yaml index fcbf7d6db1..0e0422736b 100644 --- a/data/custodian/JP-11-SAG-L-SLH.yaml +++ b/data/custodian/JP-11-SAG-L-SLH.yaml @@ -222,3 +222,22 @@ wikidata_enrichment: wikidata_media: image: Higashiyoka Cultural Hall.jpg wikidata_image: Higashiyoka Cultural Hall.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:15:44.050357+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:15:44.050357+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SLI.yaml b/data/custodian/JP-11-SAG-L-SLI.yaml index c347af40e0..a141eed0e1 100644 --- a/data/custodian/JP-11-SAG-L-SLI.yaml +++ b/data/custodian/JP-11-SAG-L-SLI.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SLI - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SLI valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SLI ghcid_numeric: 8418939588844516389 valid_from: '2025-12-06T23:38:53.829046+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGADAIGAKUFUZOKU Library IGAKUBUNKAN @@ -215,3 +216,22 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:32:02.392733+00:00' + source_url: http://www.lib.saga-u.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.saga-u.ac.jp/static/icon_512.png + source_url: http://www.lib.saga-u.ac.jp + css_selector: '[document] > html.is-loading > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T22:32:02.392733+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 512x512 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAG-L-SLK-sagashiritsu_library_kubotakan.yaml b/data/custodian/JP-11-SAG-L-SLK-sagashiritsu_library_kubotakan.yaml index 30df92e397..9aef6f3102 100644 --- a/data/custodian/JP-11-SAG-L-SLK-sagashiritsu_library_kubotakan.yaml +++ b/data/custodian/JP-11-SAG-L-SLK-sagashiritsu_library_kubotakan.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SLK-sagashiritsu_library_kubotakan - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SLK-sagashiritsu_library_kubotakan valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SLK-sagashiritsu_library_kubotakan ghcid_numeric: 17480515578541266208 valid_from: '2025-12-06T23:38:52.895558+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGASHIRITSU Library KUBOTAKAN @@ -223,3 +224,22 @@ location: postal_code: 849-0203 street_address: 3331-3 KUBOTACHO SHINDEN, Saga Shi, Saga Ken, 849-0203 normalization_timestamp: '2025-12-09T11:30:39.084709+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:32:11.536385+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:32:11.536385+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SLK.yaml b/data/custodian/JP-11-SAG-L-SLK.yaml index 7d29fc0e3e..b382275c1b 100644 --- a/data/custodian/JP-11-SAG-L-SLK.yaml +++ b/data/custodian/JP-11-SAG-L-SLK.yaml @@ -218,3 +218,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.lib.saga.saga.jp/?page_id=113 wikidata_official_website: https://www.lib.saga.saga.jp/?page_id=113 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:32:20.927814+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:32:20.927814+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SLM-sagashiritsu_library_mitsusekan.yaml b/data/custodian/JP-11-SAG-L-SLM-sagashiritsu_library_mitsusekan.yaml index f728a751b1..bfce0108ec 100644 --- a/data/custodian/JP-11-SAG-L-SLM-sagashiritsu_library_mitsusekan.yaml +++ b/data/custodian/JP-11-SAG-L-SLM-sagashiritsu_library_mitsusekan.yaml @@ -218,3 +218,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.lib.saga.saga.jp/?page_id=112 wikidata_official_website: https://www.lib.saga.saga.jp/?page_id=112 +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:32:30.260404+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:32:30.260404+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SLM.yaml b/data/custodian/JP-11-SAG-L-SLM.yaml index 150248cf4c..b7bb7dffcb 100644 --- a/data/custodian/JP-11-SAG-L-SLM.yaml +++ b/data/custodian/JP-11-SAG-L-SLM.yaml @@ -222,3 +222,22 @@ wikidata_enrichment: wikidata_media: image: Morodomicho Community Center.jpg wikidata_image: Morodomicho Community Center.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:32:39.489620+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:32:39.489620+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SLY.yaml b/data/custodian/JP-11-SAG-L-SLY.yaml index dd98f363ea..0fec77d1bc 100644 --- a/data/custodian/JP-11-SAG-L-SLY.yaml +++ b/data/custodian/JP-11-SAG-L-SLY.yaml @@ -223,3 +223,22 @@ wikidata_enrichment: wikidata_media: image: Wellness Yamato.jpg wikidata_image: Wellness Yamato.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:32:48.483348+00:00' + source_url: https://www.lib.saga.saga.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.lib.saga.saga.jp/top/images/7ea519c507537628fccfbddadd59702e0b654f2e.png + source_url: https://www.lib.saga.saga.jp + css_selector: '#TOP > header > div.wrap.header_wrap > div.title > a > img.headerFreeLogo:nth-of-type(2)' + retrieved_on: '2025-12-23T22:32:48.483348+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAG-L-SPCGELL.yaml b/data/custodian/JP-11-SAG-L-SPCGELL.yaml index f159d47db2..8c2e30021a 100644 --- a/data/custodian/JP-11-SAG-L-SPCGELL.yaml +++ b/data/custodian/JP-11-SAG-L-SPCGELL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SPCGELL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SPCGELL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SPCGELL ghcid_numeric: 18116058790063041688 valid_from: '2025-12-06T23:38:59.798140+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saga Prefectural Center of gender equality and lifelong learning @@ -151,3 +152,22 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:33:02.312565+00:00' + source_url: http://www.avance.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.avance.or.jp/_public/favicon.ico?dummy=1551918921 + source_url: http://www.avance.or.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T22:33:02.312565+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-L-SPL.yaml b/data/custodian/JP-11-SAG-L-SPL.yaml index fdc4f18675..1e6701ddbd 100644 --- a/data/custodian/JP-11-SAG-L-SPL.yaml +++ b/data/custodian/JP-11-SAG-L-SPL.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SPL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SPL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SPL ghcid_numeric: 3590658434741170887 valid_from: '2025-12-06T23:38:47.898241+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saga Prefectural Library @@ -262,3 +263,22 @@ location: postal_code: 840-0041 street_address: 2-1-41 JONAI, Saga Shi, Saga Ken, 840-0041 normalization_timestamp: '2025-12-09T11:32:23.990639+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:33:14.448095+00:00' + source_url: http://www.tosyo-saga.jp/kentosyo + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.tosyo-saga.jp/nc_favicon.ico?1751950834 + source_url: http://www.tosyo-saga.jp/kentosyo + css_selector: '[document] > html.ng-scope > head > link' + retrieved_on: '2025-12-23T22:33:14.448095+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-L-SSLK.yaml b/data/custodian/JP-11-SAG-L-SSLK.yaml index cf29546248..d04deca744 100644 --- a/data/custodian/JP-11-SAG-L-SSLK.yaml +++ b/data/custodian/JP-11-SAG-L-SSLK.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SSLK - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SSLK valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SSLK ghcid_numeric: 5824807797417335212 valid_from: '2025-12-06T23:38:59.753641+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGAKENIRYOSENTAKOSEIKANTOSHOJOHOKONA(SAGAKENRITSU Library KOSEIKAMBUNSHITSU) @@ -151,3 +152,38 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:33:23.296562+00:00' + source_url: http://www.koseikan.jp/about/management/library/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.koseikan.jp/images/index/index_logo_pc.png + source_url: http://www.koseikan.jp/about/management/library/index.html + css_selector: '[document] > html.js.flexbox > body > div.l-wrapper > main.l-indexpage + > div.l-indexpage__inner > div.l-indexpage__menu > div.l-indexpage__menu__inner + > nav.p-indexpageMenu > h1.p-indexpageMenu__logo > img.u-pcOnly' + retrieved_on: '2025-12-23T22:33:23.296562+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 好生館 + - claim_type: favicon_url + claim_value: http://www.koseikan.jp/apple-touch-icon.png + source_url: http://www.koseikan.jp/about/management/library/index.html + css_selector: '[document] > html.js.flexbox > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T22:33:23.296562+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.koseikan.jp/images/ogp/koseikan_ogp.png + source_url: http://www.koseikan.jp/about/management/library/index.html + css_selector: '[document] > html.js.flexbox > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T22:33:23.296562+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAG-L-SUL.yaml b/data/custodian/JP-11-SAG-L-SUL.yaml index 0e713ffca0..bbf6c454cb 100644 --- a/data/custodian/JP-11-SAG-L-SUL.yaml +++ b/data/custodian/JP-11-SAG-L-SUL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SUL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SUL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SUL ghcid_numeric: 658898420425441209 valid_from: '2025-12-06T23:38:53.826630+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saga University Library @@ -203,7 +204,8 @@ wikidata_enrichment: member_of: - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories has_parts: - id: Q111735502 label: SAGADAIGAKUFUZOKU Library IGAKUBUNKAN @@ -227,3 +229,22 @@ location: postal_code: 840-8502 street_address: 1 HONJOMACHI, Saga Shi, Saga Ken, 840-8502 normalization_timestamp: '2025-12-09T11:30:57.225500+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:33:36.619246+00:00' + source_url: http://www.lib.saga-u.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.saga-u.ac.jp/static/icon_512.png + source_url: http://www.lib.saga-u.ac.jp + css_selector: '[document] > html.is-loading > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T22:33:36.619246+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 512x512 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAG-L-SWSJCL.yaml b/data/custodian/JP-11-SAG-L-SWSJCL.yaml index 98187d4cc4..ff3be2b120 100644 --- a/data/custodian/JP-11-SAG-L-SWSJCL.yaml +++ b/data/custodian/JP-11-SAG-L-SWSJCL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-L-SWSJCL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-L-SWSJCL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-L-SWSJCL ghcid_numeric: 13804932252654620688 valid_from: '2025-12-06T23:38:57.323508+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saga Women's Junior College Library @@ -189,7 +190,8 @@ wikidata_enrichment: member_of: - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://opac.asahigakuen.ac.jp/OPAC4/opac/Top wikidata_official_website: http://opac.asahigakuen.ac.jp/OPAC4/opac/Top @@ -211,3 +213,28 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:33:46.239815+00:00' + source_url: http://opac.asahigakuen.ac.jp/OPAC4/opac/Top + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://opac.asahigakuen.ac.jp/opac4/common/images/op4-favicon.ico + source_url: http://opac.asahigakuen.ac.jp/OPAC4/opac/Top + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T22:33:46.239815+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://opac.asahigakuen.ac.jp/opac4/common/images/ogp-image.jpg + source_url: http://opac.asahigakuen.ac.jp/OPAC4/opac/Top + css_selector: '[document] > html > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-23T22:33:46.239815+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-M-OMM.yaml b/data/custodian/JP-11-SAG-M-OMM.yaml index 677dfa9224..08fb105bbf 100644 --- a/data/custodian/JP-11-SAG-M-OMM.yaml +++ b/data/custodian/JP-11-SAG-M-OMM.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-M-OMM - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-M-OMM valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-M-OMM ghcid_numeric: 16935389971268001127 valid_from: '2025-12-06T23:38:39.986259+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: OKUMA MEMORIAL MUSEUM @@ -226,3 +227,28 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:35:18.069704+00:00' + source_url: https://www.okuma-museum.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.okuma-museum.jp/wp/wp-content/themes/okuma-museum/images/apple-touch-icon-precomposed.png + source_url: https://www.okuma-museum.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:35:18.069704+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.okuma-museum.jp/wp/wp-content/themes/okuma-museum/images/okuma_ogp.png + source_url: https://www.okuma-museum.jp + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T22:35:18.069704+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-11-SAG-M-SCCM.yaml b/data/custodian/JP-11-SAG-M-SCCM.yaml index 06e94529fa..cdb9dc1d06 100644 --- a/data/custodian/JP-11-SAG-M-SCCM.yaml +++ b/data/custodian/JP-11-SAG-M-SCCM.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-M-SCCM - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-M-SCCM valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-M-SCCM ghcid_numeric: 5567243809595415575 valid_from: '2025-12-06T23:38:39.988728+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: THE SAGA CITY CULTURAL MUSEUM @@ -281,3 +282,28 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:35:25.393945+00:00' + source_url: https://www.sagarekimin.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.sagarekimin.jp/images/favicon.ico + source_url: https://www.sagarekimin.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T22:35:25.393945+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.sagarekimin.jp/images/ogimage.png + source_url: https://www.sagarekimin.jp + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:35:25.393945+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-M-SCHM.yaml b/data/custodian/JP-11-SAG-M-SCHM.yaml index 82c5eeba4e..55a5f52db8 100644 --- a/data/custodian/JP-11-SAG-M-SCHM.yaml +++ b/data/custodian/JP-11-SAG-M-SCHM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-M-SCHM - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-M-SCHM valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-M-SCHM ghcid_numeric: 2690739331851984724 valid_from: '2025-12-06T23:38:39.978950+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGA CASTLE HISTORY MUSEUM @@ -253,3 +254,37 @@ location: postal_code: 840-0041 street_address: JONAI, Saga Shi, Saga Ken, 840-0041 normalization_timestamp: '2025-12-09T11:30:47.258429+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:35:38.331412+00:00' + source_url: https://saga-museum.jp/sagajou + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://saga-museum.jp/all-common/img/logo_saga.png + source_url: https://saga-museum.jp/sagajou + css_selector: '#page > header > div.header_top > div.container > div.header_top_left + > ul.yoko_list.yoko_list_line > li > a.externalLink > img' + retrieved_on: '2025-12-23T22:35:38.331412+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 佐賀県 + - claim_type: favicon_url + claim_value: https://saga-museum.jp/sagajou/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/sagajou + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:35:38.331412+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://saga-museum.jp/sagajou/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/sagajou + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:35:38.331412+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAG-M-SHG.yaml b/data/custodian/JP-11-SAG-M-SHG.yaml index 2d8d0ea5a0..3d46bbf5ed 100644 --- a/data/custodian/JP-11-SAG-M-SHG.yaml +++ b/data/custodian/JP-11-SAG-M-SHG.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-M-SHG - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-M-SHG valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-M-SHG ghcid_numeric: 3787845251354799406 valid_from: '2025-12-06T23:38:39.976662+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGASHI HOSHIZORA GAKUSHUKAN @@ -151,3 +152,22 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:35:44.577789+00:00' + source_url: https://saga-hoshizora.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://i0.wp.com/saga-hoshizora.com/wp-content/uploads/2018/05/cropped-438c530bba9a09c6ecbe1f3b77e71bc6-9-1.png?fit=180%2C180&ssl=1 + source_url: https://saga-hoshizora.com + css_selector: '[document] > html.js.svg > head > link:nth-of-type(32)' + retrieved_on: '2025-12-23T22:35:44.577789+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-SAG-M-SNSKCNR.yaml b/data/custodian/JP-11-SAG-M-SNSKCNR.yaml index 38fabb735d..1ecb542f0a 100644 --- a/data/custodian/JP-11-SAG-M-SNSKCNR.yaml +++ b/data/custodian/JP-11-SAG-M-SNSKCNR.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-M-SNSKCNR - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-M-SNSKCNR valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-M-SNSKCNR ghcid_numeric: 18031388419514516732 valid_from: '2025-12-06T23:38:39.998625+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAGAKEN NOUGYO SHIKEN KENKYU CENTER NOUGYO REKISHISHIRYOUKAN @@ -164,7 +165,8 @@ wikidata_enrichment: wikidata_labels: ja: 農業歴史資料館 en: Agricultural History Museum, Saga Prefectural Agricultural Research Center - wikidata_label_en: Agricultural History Museum, Saga Prefectural Agricultural Research Center + wikidata_label_en: Agricultural History Museum, Saga Prefectural Agricultural Research + Center wikidata_label_ja: 農業歴史資料館 wikidata_descriptions: en: museum in Saga, Japan @@ -210,3 +212,28 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:35:51.834370+00:00' + source_url: https://www.pref.saga.lg.jp/kiji00322235/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.pref.saga.lg.jp/dynamic/favicon.ico + source_url: https://www.pref.saga.lg.jp/kiji00322235/index.html + css_selector: '#ctl00_Head1 > link:nth-of-type(11)' + retrieved_on: '2025-12-23T22:35:51.834370+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.pref.saga.lg.jp/dynamic/common/UploadFileOutput.ashx?c_id=3&id=22235&set_pic=1&set_file_field=0&folder=hpkiji&file=3_22235_digest1_NWTW3QGX&ext=pdf&updflWid=1200&updflHei=630 + source_url: https://www.pref.saga.lg.jp/kiji00322235/index.html + css_selector: '#ctl00_Head1 > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T22:35:51.834370+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAG-M-SPM.yaml b/data/custodian/JP-11-SAG-M-SPM.yaml index ea91d08cfd..f5150fee4a 100644 --- a/data/custodian/JP-11-SAG-M-SPM.yaml +++ b/data/custodian/JP-11-SAG-M-SPM.yaml @@ -293,3 +293,37 @@ wikidata_enrichment: - id: Q245188 label: modern architecture description: type of architecture +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:36:03.071140+00:00' + source_url: https://saga-museum.jp/museum + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://saga-museum.jp/all-common/img/logo_saga.png + source_url: https://saga-museum.jp/museum + css_selector: '#page > header > div.header_top > div.container > div.header_top_left + > ul.yoko_list.yoko_list_line > li > a.externalLink > img' + retrieved_on: '2025-12-23T22:36:03.071140+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 佐賀県 + - claim_type: favicon_url + claim_value: https://saga-museum.jp/museum/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/museum + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T22:36:03.071140+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://saga-museum.jp/museum/common/img/apple-touch-icon.png + source_url: https://saga-museum.jp/museum + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:36:03.071140+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAG-M-STMNDHM.yaml b/data/custodian/JP-11-SAG-M-STMNDHM.yaml index 23203c252f..36b3d9ee19 100644 --- a/data/custodian/JP-11-SAG-M-STMNDHM.yaml +++ b/data/custodian/JP-11-SAG-M-STMNDHM.yaml @@ -256,3 +256,22 @@ location: geonames_id: 1853303 geonames_name: Saga feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:47:06.972551+00:00' + source_url: https://sano-mietsu-historymuseum.city.saga.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://sano-mietsu-historymuseum.city.saga.lg.jp/wp-content/uploads/2022/01/cropped-favicon-180x180.png + source_url: https://sano-mietsu-historymuseum.city.saga.lg.jp + css_selector: '[document] > html.js.svg > head > link:nth-of-type(38)' + retrieved_on: '2025-12-23T22:47:06.972551+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-SAG-M-SUAMS.yaml b/data/custodian/JP-11-SAG-M-SUAMS.yaml index dfd952efee..770518cf00 100644 --- a/data/custodian/JP-11-SAG-M-SUAMS.yaml +++ b/data/custodian/JP-11-SAG-M-SUAMS.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAG-M-SUAMS - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAG-M-SUAMS valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAG-M-SUAMS ghcid_numeric: 5914526208649480198 valid_from: '2025-12-06T23:38:40.001187+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: THE SAGA UNIVERSITY ART MUSEUM (SUAM) @@ -209,7 +210,8 @@ wikidata_enrichment: description: museum run within a university - id: Q207694 label: art museum - description: building or space for the exhibition of art (for institution, use Q3196771) + description: building or space for the exhibition of art (for institution, use + Q3196771) wikidata_instance_of: *id005 wikidata_location: coordinates: &id008 @@ -257,3 +259,28 @@ location: postal_code: 840-8502 street_address: HONJOMACHI HONJO, Saga Shi, Saga Ken, 840-8502 normalization_timestamp: '2025-12-09T11:31:16.227625+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:36:27.756641+00:00' + source_url: https://museum.saga-u.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://museum.saga-u.ac.jp/cms/wp-content/themes/suam/apple-touch-icon.png + source_url: https://museum.saga-u.ac.jp + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T22:36:27.756641+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://museum.saga-u.ac.jp/cms/wp-content/themes/suam/ogp.png + source_url: https://museum.saga-u.ac.jp + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T22:36:27.756641+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAI-A-SAC-saitamashi_archives_center.yaml b/data/custodian/JP-11-SAI-A-SAC-saitamashi_archives_center.yaml index 47c022cc6d..9c59593b82 100644 --- a/data/custodian/JP-11-SAI-A-SAC-saitamashi_archives_center.yaml +++ b/data/custodian/JP-11-SAI-A-SAC-saitamashi_archives_center.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-A-SAC-saitamashi_archives_center - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-A-SAC-saitamashi_archives_center valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-A-SAC-saitamashi_archives_center ghcid_numeric: 12486167168659056308 valid_from: '2025-12-06T23:38:29.544210+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAITAMASHI ARCHIVES CENTER @@ -153,3 +154,28 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:36:39.668549+00:00' + source_url: https://www.city.saitama.jp/006/015/037/001/p036238.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/006/015/037/001/p036238.html + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T22:36:39.668549+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/006/015/037/001/サムネイル画像のURL + source_url: https://www.city.saitama.jp/006/015/037/001/p036238.html + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T22:36:39.668549+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-A-SAC.yaml b/data/custodian/JP-11-SAI-A-SAC.yaml index cdd8f941d7..0c28969836 100644 --- a/data/custodian/JP-11-SAI-A-SAC.yaml +++ b/data/custodian/JP-11-SAI-A-SAC.yaml @@ -154,3 +154,28 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:36:45.627226+00:00' + source_url: https://www.city.saitama.jp/006/015/037/001/p036238.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/006/015/037/001/p036238.html + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T22:36:45.627226+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/006/015/037/001/サムネイル画像のURL + source_url: https://www.city.saitama.jp/006/015/037/001/p036238.html + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T22:36:45.627226+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-A-SPA.yaml b/data/custodian/JP-11-SAI-A-SPA.yaml index 080c78d4f5..8b08c15f5b 100644 --- a/data/custodian/JP-11-SAI-A-SPA.yaml +++ b/data/custodian/JP-11-SAI-A-SPA.yaml @@ -236,3 +236,22 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:36:54.758118+00:00' + source_url: http://www.monjo.spec.ed.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.monjo.spec.ed.jp/nc_favicon.ico?1547712870 + source_url: http://www.monjo.spec.ed.jp + css_selector: '[document] > html.ng-scope > head > link' + retrieved_on: '2025-12-23T22:36:54.758118+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-L-KL.yaml b/data/custodian/JP-11-SAI-L-KL.yaml index 35e7a9879e..fa6d08cb3d 100644 --- a/data/custodian/JP-11-SAI-L-KL.yaml +++ b/data/custodian/JP-11-SAI-L-KL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-L-KL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-L-KL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-L-KL ghcid_numeric: 5370232120464118864 valid_from: '2025-12-06T23:38:56.887601+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KOKUSAIGAKUINSAITAMATANKIDAIGAKUFUZOKU Library @@ -206,3 +207,22 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:37:20.153688+00:00' + source_url: http://sc.kgef.ac.jp/toshokan.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://sc.kgef.ac.jp/wp-content/uploads/2023/12/cropped-logo-icon600x600-180x180.jpg + source_url: http://sc.kgef.ac.jp/toshokan.html + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T22:37:20.153688+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-SAI-L-ML.yaml b/data/custodian/JP-11-SAI-L-ML.yaml index 6da1d5a590..3a3ea35300 100644 --- a/data/custodian/JP-11-SAI-L-ML.yaml +++ b/data/custodian/JP-11-SAI-L-ML.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-L-ML - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-L-ML valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-L-ML ghcid_numeric: 14565256984373054653 valid_from: '2025-12-06T23:38:55.412547+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: MEJIRODAIGAKUIWATSUKI Library @@ -206,3 +207,22 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:37:30.148917+00:00' + source_url: http://www.mejiro.ac.jp/library/iwatsuki/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.mejiro.ac.jp/images/common/favicon.ico + source_url: http://www.mejiro.ac.jp/library/iwatsuki/index.html + css_selector: '[document] > html > head > link:nth-of-type(13)' + retrieved_on: '2025-12-23T22:37:30.148917+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library.yaml b/data/custodian/JP-11-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library.yaml index ab5ed04678..53e4b6c75c 100644 --- a/data/custodian/JP-11-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library.yaml +++ b/data/custodian/JP-11-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library.yaml @@ -20,7 +20,8 @@ original_entry: city: Saitama Shi Iwatsuki Ku region: Saitama Ken postal_code: 339-0052 - street_address: 354-3 OTA SHINSHOJI KURUWA, Saitama Shi Iwatsuki Ku, Saitama Ken, 339-0052 + street_address: 354-3 OTA SHINSHOJI KURUWA, Saitama Shi Iwatsuki Ku, Saitama Ken, + 339-0052 processing_timestamp: '2025-12-06T23:38:59.730471+00:00' ghcid: ghcid_current: JP-11-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library @@ -37,13 +38,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-L-NL-ningensogokagakudaigakuhokeniryogakubu_library ghcid_numeric: 4115852862298247038 valid_from: '2025-12-06T23:38:59.730471+00:00' @@ -101,8 +103,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: NINGENSOGOKAGAKUDAIGAKUHOKENIRYOGAKUBU Library @@ -192,7 +194,8 @@ location: region_code: 11 country: JP postal_code: 339-0052 - street_address: 354-3 OTA SHINSHOJI KURUWA, Saitama Shi Iwatsuki Ku, Saitama Ken, 339-0052 + street_address: 354-3 OTA SHINSHOJI KURUWA, Saitama Shi Iwatsuki Ku, Saitama Ken, + 339-0052 normalization_timestamp: '2025-12-09T20:21:32.119989+00:00' latitude: 35.90807 longitude: 139.65657 @@ -206,3 +209,28 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:37:38.375190+00:00' + source_url: http://www.human.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.human.ac.jp/wp/wp-content/themes/uhas/images/favicon/apple-touch-icon.png + source_url: http://www.human.ac.jp/library + css_selector: '[document] > html._device-pc._os-mac > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T22:37:38.375190+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: http://www.human.ac.jp/wp/wp-content/themes/uhas/images/common/OGP.jpg + source_url: http://www.human.ac.jp/library + css_selector: '[document] > html._device-pc._os-mac > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T22:37:38.375190+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 6 diff --git a/data/custodian/JP-11-SAI-L-NL.yaml b/data/custodian/JP-11-SAI-L-NL.yaml index fe95b3eb8b..6154d40136 100644 --- a/data/custodian/JP-11-SAI-L-NL.yaml +++ b/data/custodian/JP-11-SAI-L-NL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-L-NL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-L-NL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-L-NL ghcid_numeric: 5997221323897974948 valid_from: '2025-12-06T23:38:54.646559+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: NINGENSOGOKAGAKUDAIGAKU Library @@ -206,3 +207,28 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:37:47.795526+00:00' + source_url: http://www.human.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.human.ac.jp/wp/wp-content/themes/uhas/images/favicon/apple-touch-icon.png + source_url: http://www.human.ac.jp/library + css_selector: '[document] > html._device-pc._os-mac > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T22:37:47.795526+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: http://www.human.ac.jp/wp/wp-content/themes/uhas/images/common/OGP.jpg + source_url: http://www.human.ac.jp/library + css_selector: '[document] > html._device-pc._os-mac > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T22:37:47.795526+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 6 diff --git a/data/custodian/JP-11-SAI-L-S-saitamakendanjokyodosankakusuishinsentajohoraibura.yaml b/data/custodian/JP-11-SAI-L-S-saitamakendanjokyodosankakusuishinsentajohoraibura.yaml index 191e4d89c2..29b5348393 100644 --- a/data/custodian/JP-11-SAI-L-S-saitamakendanjokyodosankakusuishinsentajohoraibura.yaml +++ b/data/custodian/JP-11-SAI-L-S-saitamakendanjokyodosankakusuishinsentajohoraibura.yaml @@ -154,3 +154,22 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:38:01.468825+00:00' + source_url: http://www.pref.saitama.lg.jp/withyou/library/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.pref.saitama.lg.jp/shared/images/favicon/apple-touch-icon-precomposed.png + source_url: http://www.pref.saitama.lg.jp/withyou/library/index.html + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-23T22:38:01.468825+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAI-L-SATTAA.yaml b/data/custodian/JP-11-SAI-L-SATTAA.yaml index 3055b469ec..5662d5dff7 100644 --- a/data/custodian/JP-11-SAI-L-SATTAA.yaml +++ b/data/custodian/JP-11-SAI-L-SATTAA.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-L-SATTAA - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-L-SATTAA valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-L-SATTAA ghcid_numeric: 9340036876251629910 valid_from: '2025-12-06T23:38:58.651149+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saitama Arts Theater, Theater Arts Archives @@ -153,3 +154,28 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:38:12.789395+00:00' + source_url: http://www.saf.or.jp/arthall/facilities/sa_inforoom + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.saf.or.jp/wp-content/uploads/2020/07/cropped-favicon-180x180.png + source_url: http://www.saf.or.jp/arthall/facilities/sa_inforoom + css_selector: '[document] > html > head > link:nth-of-type(18)' + retrieved_on: '2025-12-23T22:38:12.789395+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.saf.or.jp/arthall/images/detail_no_photo.gif + source_url: http://www.saf.or.jp/arthall/facilities/sa_inforoom + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T22:38:12.789395+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-11-SAI-L-SL.yaml b/data/custodian/JP-11-SAI-L-SL.yaml index 7dd65a9674..1bb867017d 100644 --- a/data/custodian/JP-11-SAI-L-SL.yaml +++ b/data/custodian/JP-11-SAI-L-SL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-L-SL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-L-SL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-L-SL ghcid_numeric: 11143717315417654229 valid_from: '2025-12-06T23:38:54.932189+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SHIBAURAKOGYODAIGAKUGAKUJUTSUJOHOSENTAOMIYA Library @@ -206,3 +207,22 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T22:38:26.599948+00:00' + source_url: http://lib.shibaura-it.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.shibaura-it.ac.jp/files/images/favicon.ico + source_url: http://lib.shibaura-it.ac.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T22:38:26.599948+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-L-SPAL.yaml b/data/custodian/JP-11-SAI-L-SPAL.yaml index 54c8cfcf29..228141f187 100644 --- a/data/custodian/JP-11-SAI-L-SPAL.yaml +++ b/data/custodian/JP-11-SAI-L-SPAL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-L-SPAL - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-L-SPAL valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-L-SPAL ghcid_numeric: 14975083963014749738 valid_from: '2025-12-06T23:38:57.759421+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Saitama Prefectural Assembly Library @@ -153,3 +154,22 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:33:52.360385+00:00' + source_url: http://www.pref.saitama.lg.jp/soshiki/f1606/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.pref.saitama.lg.jp/shared/site_rn/images/favicon/apple-touch-icon-precomposed.png + source_url: http://www.pref.saitama.lg.jp/soshiki/f1606/index.html + css_selector: '[document] > html > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T01:33:52.360385+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAI-M-IK.yaml b/data/custodian/JP-11-SAI-M-IK.yaml index df4237b2d7..65b76fae1e 100644 --- a/data/custodian/JP-11-SAI-M-IK.yaml +++ b/data/custodian/JP-11-SAI-M-IK.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-M-IK - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-M-IK valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-M-IK ghcid_numeric: 2574153907511075444 valid_from: '2025-12-06T23:38:32.345222+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: IWATSUKI KYOUDOSHIRYOUKAN @@ -257,3 +258,28 @@ location: postal_code: 339-0057 street_address: HONCHO, Saitama Shi Iwatsuki Ku, Saitama Ken, 339-0057 normalization_timestamp: '2025-12-09T11:30:48.344697+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:34:36.777031+00:00' + source_url: https://www.city.saitama.jp/004/005/004/005/iwatsuki/001/p009074.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/004/005/004/005/iwatsuki/001/p009074.html + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-24T01:34:36.777031+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/004/005/004/005/iwatsuki/001/サムネイル画像のURL + source_url: https://www.city.saitama.jp/004/005/004/005/iwatsuki/001/p009074.html + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:34:36.777031+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-OBAMS.yaml b/data/custodian/JP-11-SAI-M-OBAMS.yaml index ed88739d4b..8af1496bce 100644 --- a/data/custodian/JP-11-SAI-M-OBAMS.yaml +++ b/data/custodian/JP-11-SAI-M-OBAMS.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-M-OBAMS - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-M-OBAMS valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-M-OBAMS ghcid_numeric: 14789886801235549011 valid_from: '2025-12-06T23:38:32.286590+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: THE OMIYA BONSAI ART MUSEUM, SAITAMA @@ -212,7 +213,8 @@ wikidata_enrichment: instance_of: &id005 - id: Q207694 label: art museum - description: building or space for the exhibition of art (for institution, use Q3196771) + description: building or space for the exhibition of art (for institution, use + Q3196771) main_subject: - id: Q64365 label: bonsai @@ -261,3 +263,30 @@ location: postal_code: 331-0804 street_address: TOROCHO, Saitama Shi Kita Ku, Saitama Ken, 331-0804 normalization_timestamp: '2025-12-09T11:32:34.010342+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:34:50.730610+00:00' + source_url: https://www.bonsai-art-museum.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.bonsai-art-museum.jp/wp-content/themes/bonsai2023_2/img_en/common/logo.png + source_url: https://www.bonsai-art-museum.jp + css_selector: '[document] > html > body > header > h1.enH1 > a > img' + retrieved_on: '2025-12-24T01:34:50.730610+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + - claim_type: favicon_url + claim_value: https://www.bonsai-art-museum.jp/safari-pinned-tab.svg + source_url: https://www.bonsai-art-museum.jp + css_selector: '[document] > html > head > link:nth-of-type(21)' + retrieved_on: '2025-12-24T01:34:50.730610+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/JP-11-SAI-M-OHBFMKK.yaml b/data/custodian/JP-11-SAI-M-OHBFMKK.yaml index 81a43b0b70..7aa02da80c 100644 --- a/data/custodian/JP-11-SAI-M-OHBFMKK.yaml +++ b/data/custodian/JP-11-SAI-M-OHBFMKK.yaml @@ -233,3 +233,28 @@ wikidata_enrichment: image: Minuma Classic museum.jpg commons_category: Old House of the Bando Family (Minuma Kurashikku-Kan) wikidata_image: Minuma Classic museum.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:34:59.882494+00:00' + source_url: https://www.city.saitama.jp/004/005/004/005/004 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/004/005/004/005/004 + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-24T01:34:59.882494+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/004/005/004/005/サムネイル画像のURL + source_url: https://www.city.saitama.jp/004/005/004/005/004 + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:34:59.882494+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-RM.yaml b/data/custodian/JP-11-SAI-M-RM.yaml index 50c70053cb..93deff6b54 100644 --- a/data/custodian/JP-11-SAI-M-RM.yaml +++ b/data/custodian/JP-11-SAI-M-RM.yaml @@ -298,3 +298,30 @@ wikidata_enrichment: - id: Q11309024 label: JR East Design description: 東京都渋谷区に本社を置く建築設計事務所 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:35:07.275143+00:00' + source_url: https://www.railway-museum.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.railway-museum.jp/img/common/logo.png + source_url: https://www.railway-museum.jp + css_selector: '#wrapper > header > div.header_top > div.left > h1 > a > img' + retrieved_on: '2025-12-24T01:35:07.275143+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鉄道博物館 THE RAILWAY MUSEUM + - claim_type: favicon_url + claim_value: https://www.railway-museum.jp/favicon.ico + source_url: https://www.railway-museum.jp + css_selector: '[document] > html > head > link:nth-of-type(12)' + retrieved_on: '2025-12-24T01:35:07.275143+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-SCST.yaml b/data/custodian/JP-11-SAI-M-SCST.yaml index 2de5aca4f2..373537c9d0 100644 --- a/data/custodian/JP-11-SAI-M-SCST.yaml +++ b/data/custodian/JP-11-SAI-M-SCST.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-M-SCST - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-M-SCST valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-M-SCST ghcid_numeric: 3396574622103730788 valid_from: '2025-12-06T23:38:32.308721+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAITAMA CITY SPACE THEATER @@ -153,3 +154,30 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:35:34.318526+00:00' + source_url: http://www.ucyugekijo.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.ucyugekijo.jp/wp/wp-content/themes/ucyugekijo/images/logo.svg + source_url: http://www.ucyugekijo.jp + css_selector: '#header > div.st-header__logo:nth-of-type(2) > a > img' + retrieved_on: '2025-12-24T01:35:34.318526+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: さいたま市宇宙劇場 + - claim_type: favicon_url + claim_value: https://www.ucyugekijo.jp/wp/wp-content/themes/ucyugekijo/images/favicon.ico + source_url: http://www.ucyugekijo.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:35:34.318526+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-SMCAM.yaml b/data/custodian/JP-11-SAI-M-SMCAM.yaml index 62741aaa2b..7c1695b891 100644 --- a/data/custodian/JP-11-SAI-M-SMCAM.yaml +++ b/data/custodian/JP-11-SAI-M-SMCAM.yaml @@ -231,3 +231,28 @@ wikidata_enrichment: wikidata_media: image: Saitama Saitama Municipal Manga Hall 1.JPG wikidata_image: Saitama Saitama Municipal Manga Hall 1.JPG +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:35:42.776267+00:00' + source_url: https://www.city.saitama.jp/004/005/002/003/001 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/004/005/002/003/001 + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-24T01:35:42.776267+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/004/005/002/003/サムネイル画像のURL + source_url: https://www.city.saitama.jp/004/005/002/003/001 + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:35:42.776267+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-SMUM.yaml b/data/custodian/JP-11-SAI-M-SMUM.yaml index 791a2f9dcd..9b96a94c34 100644 --- a/data/custodian/JP-11-SAI-M-SMUM.yaml +++ b/data/custodian/JP-11-SAI-M-SMUM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-M-SMUM - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-M-SMUM valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-M-SMUM ghcid_numeric: 14104077033628014497 valid_from: '2025-12-06T23:38:32.330019+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAITAMA MUNICIPAL URAWA MUSEUM @@ -213,8 +214,8 @@ wikidata_enrichment: instance_of: &id005 - id: Q33506 label: museum - description: institution that holds artifacts and other objects of scientific, artistic, cultural, historical, or other - importance + description: institution that holds artifacts and other objects of scientific, + artistic, cultural, historical, or other importance wikidata_instance_of: *id005 wikidata_location: coordinates: &id008 @@ -261,3 +262,28 @@ location: postal_code: 336-0911 street_address: MIMURO, Saitama Shi Midori Ku, Saitama Ken, 336-0911 normalization_timestamp: '2025-12-09T11:31:11.120725+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:35:50.382968+00:00' + source_url: https://www.city.saitama.jp/004/005/004/005/002/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/004/005/004/005/002/index.html + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-24T01:35:50.382968+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/004/005/004/005/002/サムネイル画像のURL + source_url: https://www.city.saitama.jp/004/005/004/005/002/index.html + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:35:50.382968+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-SOKD.yaml b/data/custodian/JP-11-SAI-M-SOKD.yaml index cccbe24378..e96e53be2e 100644 --- a/data/custodian/JP-11-SAI-M-SOKD.yaml +++ b/data/custodian/JP-11-SAI-M-SOKD.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-M-SOKD - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-M-SOKD valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-M-SOKD ghcid_numeric: 9299280899799194757 valid_from: '2025-12-06T23:38:32.339227+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAITAMASHI OSAKIKOUEN KODOMO DOUBUTSUEN @@ -205,8 +206,8 @@ wikidata_enrichment: description: park in a city or other incorporated place - id: Q43501 label: zoo - description: 'collection of assorted wild animal species kept for purposes like: study, conservation and, or, commercial - exhibition' + description: 'collection of assorted wild animal species kept for purposes like: + study, conservation and, or, commercial exhibition' wikidata_instance_of: *id004 wikidata_location: coordinates: &id007 @@ -247,3 +248,28 @@ location: postal_code: 336-0974 street_address: OSAKI, Saitama Shi Midori Ku, Saitama Ken, 336-0974 normalization_timestamp: '2025-12-09T11:30:48.583942+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:35:57.519694+00:00' + source_url: https://www.city.saitama.jp/004/001/003/001/p002130.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/004/001/003/001/p002130.html + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-24T01:35:57.519694+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/004/001/003/001/サムネイル画像のURL + source_url: https://www.city.saitama.jp/004/001/003/001/p002130.html + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:35:57.519694+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-UAM.yaml b/data/custodian/JP-11-SAI-M-UAM.yaml index ffbaf4a46c..e559e3d3f5 100644 --- a/data/custodian/JP-11-SAI-M-UAM.yaml +++ b/data/custodian/JP-11-SAI-M-UAM.yaml @@ -536,3 +536,22 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/Op9e7VoSLC0/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:36:06.178348+00:00' + source_url: https://www.city.saitama.jp/urawa-art-museum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon-urawa.ico + source_url: https://www.city.saitama.jp/urawa-art-museum + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T01:36:06.178348+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-UMTA.yaml b/data/custodian/JP-11-SAI-M-UMTA.yaml index 1dda13b585..11a0955e4a 100644 --- a/data/custodian/JP-11-SAI-M-UMTA.yaml +++ b/data/custodian/JP-11-SAI-M-UMTA.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-M-UMTA - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-M-UMTA valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-M-UMTA ghcid_numeric: 9602204586211868900 valid_from: '2025-12-06T23:38:32.334239+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Urawa Museum of Traditional Architecture @@ -244,3 +245,28 @@ location: postal_code: 336-0925 street_address: SHIMOYAMAGUCHISHINDEN, Saitama Shi Midori Ku, Saitama Ken, 336-0925 normalization_timestamp: '2025-12-09T11:30:48.081311+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:36:13.247183+00:00' + source_url: https://www.city.saitama.jp/004/005/004/005/003/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/004/005/004/005/003/index.html + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-24T01:36:13.247183+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/004/005/004/005/003/サムネイル画像のURL + source_url: https://www.city.saitama.jp/004/005/004/005/003/index.html + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:36:13.247183+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAI-M-YLHM.yaml b/data/custodian/JP-11-SAI-M-YLHM.yaml index 2eb1abae2a..3ced62b944 100644 --- a/data/custodian/JP-11-SAI-M-YLHM.yaml +++ b/data/custodian/JP-11-SAI-M-YLHM.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAI-M-YLHM - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAI-M-YLHM valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAI-M-YLHM ghcid_numeric: 9408312537163090612 valid_from: '2025-12-06T23:38:32.314624+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: YONO LOCAL HISTORY MUSEUM @@ -153,3 +154,28 @@ location: geonames_id: 6940394 geonames_name: Saitama feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:36:21.009808+00:00' + source_url: https://www.city.saitama.jp/004/005/004/005/yono/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saitama.jp/share/imgs/favicon.ico + source_url: https://www.city.saitama.jp/004/005/004/005/yono/index.html + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-24T01:36:21.009808+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.saitama.jp/004/005/004/005/yono/サムネイル画像のURL + source_url: https://www.city.saitama.jp/004/005/004/005/yono/index.html + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:36:21.009808+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAK-L-JUMML.yaml b/data/custodian/JP-11-SAK-L-JUMML.yaml index b774a63b11..fd2b27f91a 100644 --- a/data/custodian/JP-11-SAK-L-JUMML.yaml +++ b/data/custodian/JP-11-SAK-L-JUMML.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAK-L-JUMML - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAK-L-JUMML valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAK-L-JUMML ghcid_numeric: 14381522842950447650 valid_from: '2025-12-06T23:38:54.609914+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Josai University Mizuta Memorial Library @@ -190,7 +191,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://libopac.josai.ac.jp wikidata_official_website: http://libopac.josai.ac.jp @@ -212,3 +214,28 @@ location: geonames_id: 1853209 geonames_name: Sakado feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:36:29.123459+00:00' + source_url: http://libopac.josai.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.josai.ac.jp/common/image/app-icon.png + source_url: http://libopac.josai.ac.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:36:29.123459+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.josai.ac.jp/common/image/sns-icon.jpg + source_url: http://libopac.josai.ac.jp + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-24T01:36:29.123459+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAK-L-LKNU.yaml b/data/custodian/JP-11-SAK-L-LKNU.yaml index 9f5cf61836..a897860d99 100644 --- a/data/custodian/JP-11-SAK-L-LKNU.yaml +++ b/data/custodian/JP-11-SAK-L-LKNU.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAK-L-LKNU - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAK-L-LKNU valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAK-L-LKNU ghcid_numeric: 1507328155590184808 valid_from: '2025-12-06T23:38:54.625161+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Library of Kagawa Nutrition University @@ -189,7 +190,8 @@ wikidata_enrichment: part_of: id: Q6346396 label: Kagawa Nutrition University - description: private university in Japan with campuses in Saitama Prefecture and Tokyo + description: private university in Japan with campuses in Saitama Prefecture + and Tokyo member_of: - id: Q56061971 label: Japan Association of Private University Libraries @@ -215,3 +217,22 @@ location: geonames_id: 1853209 geonames_name: Sakado feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:36:35.657047+00:00' + source_url: http://www.eiyo.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.eiyo.ac.jp/icons/webclip.png + source_url: http://www.eiyo.ac.jp/library + css_selector: '[document] > html.w-mod-js.wf-notosansjp-n4-active > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T01:36:35.657047+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAK-L-ML.yaml b/data/custodian/JP-11-SAK-L-ML.yaml index af2e220887..a4a12060d9 100644 --- a/data/custodian/JP-11-SAK-L-ML.yaml +++ b/data/custodian/JP-11-SAK-L-ML.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAK-L-ML - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAK-L-ML valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAK-L-ML ghcid_numeric: 710715265794629921 valid_from: '2025-12-06T23:38:54.726256+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: MEIKAIDAIGAKUSHIGAKUBUMEDEIASENTA( Library ) @@ -182,7 +183,8 @@ wikidata_enrichment: part_of: id: Q6810046 label: Meikai University - description: Higher education institution in Urayasu City, Chiba Prefecture, Japan + description: Higher education institution in Urayasu City, Chiba Prefecture, + Japan wikidata_web: official_website: https://opac-dent.meikai.ac.jp/ wikidata_official_website: https://opac-dent.meikai.ac.jp/ @@ -204,3 +206,22 @@ location: geonames_id: 1853209 geonames_name: Sakado feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:36:45.230033+00:00' + source_url: https://opac-dent.meikai.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://opac-dent.meikai.ac.jp/favicon.ico + source_url: https://opac-dent.meikai.ac.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:36:45.230033+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAK-M-MPGJU.yaml b/data/custodian/JP-11-SAK-M-MPGJU.yaml index c6856667e5..1f51ee34b5 100644 --- a/data/custodian/JP-11-SAK-M-MPGJU.yaml +++ b/data/custodian/JP-11-SAK-M-MPGJU.yaml @@ -215,3 +215,28 @@ wikidata_enrichment: wikidata_web: official_website: https://www.josai.ac.jp/yakuyou/index.html wikidata_official_website: https://www.josai.ac.jp/yakuyou/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:37:13.473762+00:00' + source_url: https://www.josai.ac.jp/yakuyou + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.josai.ac.jp/common/image/app-icon.png + source_url: https://www.josai.ac.jp/yakuyou + css_selector: '[document] > html.js > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:37:13.473762+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.josai.ac.jp/common/image/sns-icon.jpg + source_url: https://www.josai.ac.jp/yakuyou + css_selector: '[document] > html.js > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-24T01:37:13.473762+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAK-M-SCFHM.yaml b/data/custodian/JP-11-SAK-M-SCFHM.yaml index d9716ea341..f51587d521 100644 --- a/data/custodian/JP-11-SAK-M-SCFHM.yaml +++ b/data/custodian/JP-11-SAK-M-SCFHM.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAK-M-SCFHM - valid_from: "2025-12-10T09:43:55Z" + valid_from: '2025-12-10T09:43:55Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAK-M-SCFHM valid_from: null - valid_to: "2025-12-10T09:43:55Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:55Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAK-M-SCFHM ghcid_numeric: 8712144546425500276 valid_from: '2025-12-06T23:38:32.590592+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAKADO CITY FOLK HISTORICAL MUSEUM @@ -174,8 +175,8 @@ wikidata_enrichment: instance_of: &id004 - id: Q33506 label: museum - description: institution that holds artifacts and other objects of scientific, artistic, cultural, historical, or other - importance + description: institution that holds artifacts and other objects of scientific, + artistic, cultural, historical, or other importance wikidata_instance_of: *id004 wikidata_location: coordinates: &id007 @@ -217,3 +218,22 @@ location: postal_code: 350-0212 street_address: ISHII, Sakado Shi, Saitama Ken, 350-0212 normalization_timestamp: '2025-12-09T11:30:56.117121+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:37:21.654102+00:00' + source_url: https://www.city.sakado.lg.jp/soshiki/47/501.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.sakado.lg.jp/apple-touch-icon.png + source_url: https://www.city.sakado.lg.jp/soshiki/47/501.html + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T01:37:21.654102+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAT-M-SCM.yaml b/data/custodian/JP-11-SAT-M-SCM.yaml index 8882c5604a..b840f4e21b 100644 --- a/data/custodian/JP-11-SAT-M-SCM.yaml +++ b/data/custodian/JP-11-SAT-M-SCM.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAT-M-SCM - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAT-M-SCM valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAT-M-SCM ghcid_numeric: 1365079235388610347 valid_from: '2025-12-06T23:38:32.595448+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SATTE CITY MUSEUM @@ -151,3 +152,36 @@ location: geonames_id: 1852849 geonames_name: Satte feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:37:34.087212+00:00' + source_url: https://www.city.satte.lg.jp/sitetop/soshiki/kyoudomuseum/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.satte.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: https://www.city.satte.lg.jp/sitetop/soshiki/kyoudomuseum/index.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-24T01:37:34.087212+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 埼玉県 幸手市 Satte City + - claim_type: favicon_url + claim_value: https://www.city.satte.lg.jp/theme/base/img_common/smartphone.png + source_url: https://www.city.satte.lg.jp/sitetop/soshiki/kyoudomuseum/index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:37:34.087212+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.satte.lg.jp/theme/base/img_common/ogp_noimage.png + source_url: https://www.city.satte.lg.jp/sitetop/soshiki/kyoudomuseum/index.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-24T01:37:34.087212+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAY-L-BUHL.yaml b/data/custodian/JP-11-SAY-L-BUHL.yaml index 63fd94fa21..57c61767f9 100644 --- a/data/custodian/JP-11-SAY-L-BUHL.yaml +++ b/data/custodian/JP-11-SAY-L-BUHL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAY-L-BUHL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAY-L-BUHL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAY-L-BUHL ghcid_numeric: 5981178323481043770 valid_from: '2025-12-06T23:38:54.640984+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Bunri University of Hospitality Library @@ -204,3 +205,28 @@ location: geonames_id: 1907146 geonames_name: Sayama feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:37:41.298920+00:00' + source_url: http://www.bunri-c.ac.jp/univ/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.bunri-c.ac.jp/dcms_media/other/icon.ico + source_url: http://www.bunri-c.ac.jp/univ/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:37:41.298920+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://bunri-c-s.cms2.jp/dcms_media/image/thumbnail.jpg + source_url: http://www.bunri-c.ac.jp/univ/library + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-24T01:37:41.298920+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-SAY-L-MML.yaml b/data/custodian/JP-11-SAY-L-MML.yaml index 253356b6e7..f7c88ef3d0 100644 --- a/data/custodian/JP-11-SAY-L-MML.yaml +++ b/data/custodian/JP-11-SAY-L-MML.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAY-L-MML - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAY-L-MML valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAY-L-MML ghcid_numeric: 15891514047000374066 valid_from: '2025-12-06T23:38:54.662813+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: MUSASHINOGAKUINDAIGAKU・MUSASHINOTANKIDAIGAKU Library @@ -204,3 +205,28 @@ location: geonames_id: 1907146 geonames_name: Sayama feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:37:49.718485+00:00' + source_url: http://www.musashino.ac.jp/mgu/campus_life/institution/campus.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.musashino.ac.jp/mgu/wp/wp-content/themes/musashino_mgu/images/favicon/apple-touch-icon.png + source_url: http://www.musashino.ac.jp/mgu/campus_life/institution/campus.html + css_selector: '[document] > html._device-pc._os-mac > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T01:37:49.718485+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.musashino.ac.jp/mgu/wp/wp-content/themes/musashino_mgu/images/common/ogp.jpg + source_url: http://www.musashino.ac.jp/mgu/campus_life/institution/campus.html + css_selector: '[document] > html._device-pc._os-mac > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:37:49.718485+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 5 diff --git a/data/custodian/JP-11-SAY-L-SL-sayamashiritsusayamadai_library.yaml b/data/custodian/JP-11-SAY-L-SL-sayamashiritsusayamadai_library.yaml index fedca89500..f1987eab04 100644 --- a/data/custodian/JP-11-SAY-L-SL-sayamashiritsusayamadai_library.yaml +++ b/data/custodian/JP-11-SAY-L-SL-sayamashiritsusayamadai_library.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.sayama.saitama.jp/shisetsuannai/bunkashisetsu/sayama/index.html wikidata_official_website: http://www.city.sayama.saitama.jp/shisetsuannai/bunkashisetsu/sayama/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:37:58.041137+00:00' + source_url: https://www.sayama-friendship.net + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.sayama-friendship.net/header_logo.png + source_url: https://www.sayama-friendship.net + css_selector: '#header > p:nth-of-type(2) > a > img' + retrieved_on: '2025-12-24T01:37:58.041137+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 狭山台図書館|施設案内やイベントのご紹介 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-SAY-L-SL.yaml b/data/custodian/JP-11-SAY-L-SL.yaml index c09bac1ef6..770baef32c 100644 --- a/data/custodian/JP-11-SAY-L-SL.yaml +++ b/data/custodian/JP-11-SAY-L-SL.yaml @@ -201,3 +201,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.sayama.saitama.jp/shisetsuannai/bunkashisetsu/chuou/index.html wikidata_official_website: http://www.city.sayama.saitama.jp/shisetsuannai/bunkashisetsu/chuou/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:38:06.463240+00:00' + source_url: https://www.city.sayama.saitama.jp/shisei/shisetsu/bunkashisetsu/library/chuou/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.sayama.saitama.jp/images/apple-touch-icon.png + source_url: https://www.city.sayama.saitama.jp/shisei/shisetsu/bunkashisetsu/library/chuou/index.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:38:06.463240+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-SAY-L-TL.yaml b/data/custodian/JP-11-SAY-L-TL.yaml index 10672a24f0..4c8ee703e9 100644 --- a/data/custodian/JP-11-SAY-L-TL.yaml +++ b/data/custodian/JP-11-SAY-L-TL.yaml @@ -205,3 +205,38 @@ location: geonames_id: 1907146 geonames_name: Sayama feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:38:14.281441+00:00' + source_url: http://www.tokyo-kasei.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.tokyo-kasei.ac.jp/common/image/logo.png + source_url: http://www.tokyo-kasei.ac.jp/library + css_selector: '[document] > html > body.isPC.is-scrolling > div.wrapper-new:nth-of-type(4) + > nav.global-nav-new > div.global-nav-new__header > div.global-nav-new__logo + > a > img' + retrieved_on: '2025-12-24T01:38:14.281441+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 東京家政大学 + - claim_type: favicon_url + claim_value: http://www.tokyo-kasei.ac.jp/common/image/touch-icon.png + source_url: http://www.tokyo-kasei.ac.jp/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T01:38:14.281441+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.tokyo-kasei.ac.jp/common/image/og_image.jpg + source_url: http://www.tokyo-kasei.ac.jp/library + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-24T01:38:14.281441+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SAY-M-SCCSZ.yaml b/data/custodian/JP-11-SAY-M-SCCSZ.yaml index 9da653f115..d3ffc63a6e 100644 --- a/data/custodian/JP-11-SAY-M-SCCSZ.yaml +++ b/data/custodian/JP-11-SAY-M-SCCSZ.yaml @@ -936,3 +936,31 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/S6y7WkWJbR8/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:38:22.246964+00:00' + source_url: https://www.parks.or.jp/chikozan/zoo + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.parks.or.jp/chikozan/img/site_logo.png + source_url: https://www.parks.or.jp/chikozan/zoo + css_selector: '[document] > html > body > header > div.header_inner > div.header_sub + > p.site_logo.pc > a > img' + retrieved_on: '2025-12-24T01:38:22.246964+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: さいたまの公園 + - claim_type: favicon_url + claim_value: https://www.parks.or.jp/favicon.ico + source_url: https://www.parks.or.jp/chikozan/zoo + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:38:22.246964+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SAY-M-ST.yaml b/data/custodian/JP-11-SAY-M-ST.yaml index dec4eff929..9cad2c1e99 100644 --- a/data/custodian/JP-11-SAY-M-ST.yaml +++ b/data/custodian/JP-11-SAY-M-ST.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SAY-M-ST - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SAY-M-ST valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SAY-M-ST ghcid_numeric: 13642675765446092456 valid_from: '2025-12-06T23:38:32.489830+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAYAMASHI TOSHIRYOKKASHOKUBUTSUEN @@ -238,3 +239,31 @@ location: postal_code: 350-1335 street_address: KASHIWABARA, Sayama Shi, Saitama Ken, 350-1335 normalization_timestamp: '2025-12-09T11:30:58.871243+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:38:31.357789+00:00' + source_url: http://www.parks.or.jp/chikozan/botanical-garden + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.parks.or.jp/chikozan/img/site_logo.png + source_url: http://www.parks.or.jp/chikozan/botanical-garden + css_selector: '[document] > html > body > header > div.header_inner > div.header_sub + > p.site_logo.pc > a > img' + retrieved_on: '2025-12-24T01:38:31.357789+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: さいたまの公園 + - claim_type: favicon_url + claim_value: http://www.parks.or.jp/favicon.ico + source_url: http://www.parks.or.jp/chikozan/botanical-garden + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:38:31.357789+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-SHI-L-SCL.yaml b/data/custodian/JP-11-SHI-L-SCL.yaml index b586df1b9d..3dbd4edbe3 100644 --- a/data/custodian/JP-11-SHI-L-SCL.yaml +++ b/data/custodian/JP-11-SHI-L-SCL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SHI-L-SCL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SHI-L-SCL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SHI-L-SCL ghcid_numeric: 2593598293605017877 valid_from: '2025-12-06T23:38:42.746901+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Shiraoka City Library @@ -194,8 +195,9 @@ wikidata_enrichment: instance_of: &id004 - id: Q7075 label: library - description: institution charged with the care of a collection of literary, musical, artistic, or reference materials, - such as books, manuscripts, recordings, or films + description: institution charged with the care of a collection of literary, + musical, artistic, or reference materials, such as books, manuscripts, recordings, + or films wikidata_instance_of: *id004 wikidata_location: coordinates: &id007 @@ -230,3 +232,36 @@ location: postal_code: 349-0292 street_address: 432 SENDANO, Shiraoka Shi, Saitama Ken, 349-0292 normalization_timestamp: '2025-12-09T11:31:37.657642+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:38:39.544773+00:00' + source_url: https://www.city.shiraoka.lg.jp/soshiki/kyouikubu/syougaigakusyuuka/36/32/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.shiraoka.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: https://www.city.shiraoka.lg.jp/soshiki/kyouikubu/syougaigakusyuuka/36/32/index.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-24T01:38:39.544773+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 白岡市 Shiraoka City + - claim_type: favicon_url + claim_value: https://www.city.shiraoka.lg.jp/theme/base/img_common/smartphone.png + source_url: https://www.city.shiraoka.lg.jp/soshiki/kyouikubu/syougaigakusyuuka/36/32/index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:38:39.544773+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.shiraoka.lg.jp/material/images/group/31/toshokan03.jpg + source_url: https://www.city.shiraoka.lg.jp/soshiki/kyouikubu/syougaigakusyuuka/36/32/index.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-24T01:38:39.544773+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SHI-M-SSCR.yaml b/data/custodian/JP-11-SHI-M-SSCR.yaml index 2cf7d9726b..6ab1573733 100644 --- a/data/custodian/JP-11-SHI-M-SSCR.yaml +++ b/data/custodian/JP-11-SHI-M-SSCR.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-SHI-M-SSCR - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-SHI-M-SSCR valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-SHI-M-SSCR ghcid_numeric: 3389555529950122243 valid_from: '2025-12-06T23:38:32.609121+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SHIRAOKASHI SHOUGAIGAKUSHU CENTER REKISHISHIRYOUTENJISHITSU @@ -151,3 +152,36 @@ location: geonames_id: 1851883 geonames_name: Shiraoka feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:38:57.777955+00:00' + source_url: http://www.city.shiraoka.lg.jp/11876.htm + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.shiraoka.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.shiraoka.lg.jp/11876.htm + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-24T01:38:57.777955+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 白岡市 Shiraoka City + - claim_type: favicon_url + claim_value: http://www.city.shiraoka.lg.jp/theme/base/img_common/smartphone.png + source_url: http://www.city.shiraoka.lg.jp/11876.htm + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:38:57.777955+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.shiraoka.lg.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.city.shiraoka.lg.jp/11876.htm + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-24T01:38:57.777955+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SOK-L-DUL.yaml b/data/custodian/JP-11-SOK-L-DUL.yaml index d5ec232525..cbde5af416 100644 --- a/data/custodian/JP-11-SOK-L-DUL.yaml +++ b/data/custodian/JP-11-SOK-L-DUL.yaml @@ -210,3 +210,28 @@ location: geonames_id: 1851604 geonames_name: Sōka feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:39:05.995854+00:00' + source_url: http://www.dokkyo.ac.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.dokkyo.ac.jp/images/app.png?ver=2020 + source_url: http://www.dokkyo.ac.jp/library + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:39:05.995854+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.dokkyo.ac.jp/images/ogimg.png + source_url: http://www.dokkyo.ac.jp/library + css_selector: '[document] > html > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-24T01:39:05.995854+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-SOK-L-SCL.yaml b/data/custodian/JP-11-SOK-L-SCL.yaml index 29e837f6c4..af95f82ada 100644 --- a/data/custodian/JP-11-SOK-L-SCL.yaml +++ b/data/custodian/JP-11-SOK-L-SCL.yaml @@ -206,3 +206,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.city.soka.saitama.jp wikidata_official_website: http://www.lib.city.soka.saitama.jp +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:39:15.583226+00:00' + source_url: https://www.lib.city.soka.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.city.soka.saitama.jp/themes/lib_theme/favicon.ico + source_url: https://www.lib.city.soka.saitama.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T01:39:15.583226+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TAK-L-TLR.yaml b/data/custodian/JP-11-TAK-L-TLR.yaml index 5850ef67fd..2c25ac0c7e 100644 --- a/data/custodian/JP-11-TAK-L-TLR.yaml +++ b/data/custodian/JP-11-TAK-L-TLR.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TAK-L-TLR - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TAK-L-TLR valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TAK-L-TLR ghcid_numeric: 6539710957107000111 valid_from: '2025-12-06T23:38:47.927964+00:00' @@ -80,7 +81,8 @@ provenance: extraction_method: 'Created from CH-Annotator file: japan_complete_ch_annotator.yaml' confidence_score: 0.98 notes: - - Removed incorrect wikidata_enrichment on 2025-12-08T08:18:48.709468+00:00. Re-enrichment required with proper matching. + - Removed incorrect wikidata_enrichment on 2025-12-08T08:18:48.709468+00:00. Re-enrichment + required with proper matching. - Canonical location normalized on 2025-12-09T11:31:13Z ch_annotator: convention_id: ch_annotator-v1_7_0 @@ -109,8 +111,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TAKEOSHI Library ・REKISHISHIRYOKAN @@ -268,3 +270,22 @@ location: postal_code: 843-0022 street_address: 5304-1 TAKEOCHO TAKEO, Takeo Shi, Saga Ken, 843-0022 normalization_timestamp: '2025-12-09T11:31:13.329190+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:39:46.646820+00:00' + source_url: https://takeo.city-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://takeo.city-library.jp/themes/library/images/favicon/favicon.svg + source_url: https://takeo.city-library.jp + css_selector: '[document] > html.no-js.sr > head > link' + retrieved_on: '2025-12-24T01:39:46.646820+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/svg+xml + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-11-TAK-M-SPSSM.yaml b/data/custodian/JP-11-TAK-M-SPSSM.yaml index ae6b24b5a9..5009a6d41f 100644 --- a/data/custodian/JP-11-TAK-M-SPSSM.yaml +++ b/data/custodian/JP-11-TAK-M-SPSSM.yaml @@ -2287,3 +2287,23 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/yFQqwX3qPaw/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:39:56.482383+00:00' + source_url: https://www.yumeginga.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.yumeginga.jp/_public/favicon.ico?dummy=1723171938 + source_url: https://www.yumeginga.jp + css_selector: '[document] > html.wf-carbon-i7-active.wf-carbon-n7-active > head + > link' + retrieved_on: '2025-12-24T01:39:56.482383+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TAK-M-TCLHM.yaml b/data/custodian/JP-11-TAK-M-TCLHM.yaml index 6fb2e24187..ef16b9be06 100644 --- a/data/custodian/JP-11-TAK-M-TCLHM.yaml +++ b/data/custodian/JP-11-TAK-M-TCLHM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TAK-M-TCLHM - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TAK-M-TCLHM valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TAK-M-TCLHM ghcid_numeric: 18240643783862006207 valid_from: '2025-12-06T23:38:40.039005+00:00' @@ -109,8 +110,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TAKEO CITY LIBRARY・HISTORICAL MUSEUM @@ -270,3 +271,30 @@ location: postal_code: 843-0022 street_address: TAKEOCHO TAKEO, Takeo Shi, Saga Ken, 843-0022 normalization_timestamp: '2025-12-09T11:30:23.515880+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:40:04.768580+00:00' + source_url: http://www.city.takeo.lg.jp/rekisi/his-top.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.takeo.lg.jp/rekisi/img/common/logo.png + source_url: http://www.city.takeo.lg.jp/rekisi/his-top.html + css_selector: '#globalHeader > h1 > a > img' + retrieved_on: '2025-12-24T01:40:04.768580+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 武雄市歴史資料館 + - claim_type: favicon_url + claim_value: http://www.city.takeo.lg.jp/favicon.ico + source_url: http://www.city.takeo.lg.jp/rekisi/his-top.html + css_selector: '[document] > html > head > link:nth-of-type(8)' + retrieved_on: '2025-12-24T01:40:04.768580+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TAK-M-TCLM.yaml b/data/custodian/JP-11-TAK-M-TCLM.yaml index 6e078e0d6e..f770ca835d 100644 --- a/data/custodian/JP-11-TAK-M-TCLM.yaml +++ b/data/custodian/JP-11-TAK-M-TCLM.yaml @@ -215,3 +215,22 @@ location: postal_code: 846-0031 street_address: TAKUMACHI, Taku Shi, Saga Ken, 846-0031 normalization_timestamp: '2025-12-09T11:32:00.474874+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:40:12.266443+00:00' + source_url: https://www.city.taku.lg.jp/soshiki/20/2207.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.taku.lg.jp/img/apple-touch-icon.png + source_url: https://www.city.taku.lg.jp/soshiki/20/2207.html + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T01:40:12.266443+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-A-TAC-todashi_archives_center.yaml b/data/custodian/JP-11-TOD-A-TAC-todashi_archives_center.yaml index 253a6b78e2..2a38118b5e 100644 --- a/data/custodian/JP-11-TOD-A-TAC-todashi_archives_center.yaml +++ b/data/custodian/JP-11-TOD-A-TAC-todashi_archives_center.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOD-A-TAC-todashi_archives_center - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOD-A-TAC-todashi_archives_center valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOD-A-TAC-todashi_archives_center ghcid_numeric: 9040406419864521637 valid_from: '2025-12-06T23:38:29.551617+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TODASHI ARCHIVES CENTER @@ -151,3 +152,36 @@ location: geonames_id: 2127794 geonames_name: Toda feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:40:27.640509+00:00' + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.toda.saitama.jp/img/common/logo.svg + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + css_selector: '#logo > p > a > img' + retrieved_on: '2025-12-24T01:40:27.640509+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市 TodaCity + - claim_type: favicon_url + claim_value: https://www.city.toda.saitama.jp/apple-touch-icon.png + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:40:27.640509+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.toda.saitama.jp/ogp.png + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + css_selector: '[document] > html > head > meta:nth-of-type(5)' + retrieved_on: '2025-12-24T01:40:27.640509+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-A-TAC.yaml b/data/custodian/JP-11-TOD-A-TAC.yaml index 82fc49c6a4..29c285252a 100644 --- a/data/custodian/JP-11-TOD-A-TAC.yaml +++ b/data/custodian/JP-11-TOD-A-TAC.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOD-A-TAC - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOD-A-TAC valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOD-A-TAC ghcid_numeric: 11071697224715410785 valid_from: '2025-12-06T23:35:50.100088+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TODASHI ARCHIVES CENTER @@ -151,3 +152,36 @@ location: geonames_id: 2127794 geonames_name: Toda feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:40:34.829146+00:00' + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.toda.saitama.jp/img/common/logo.svg + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + css_selector: '#logo > p > a > img' + retrieved_on: '2025-12-24T01:40:34.829146+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市 TodaCity + - claim_type: favicon_url + claim_value: https://www.city.toda.saitama.jp/apple-touch-icon.png + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:40:34.829146+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.toda.saitama.jp/ogp.png + source_url: https://www.city.toda.saitama.jp/soshiki/377/hakubutsu-archives.html + css_selector: '[document] > html > head > meta:nth-of-type(5)' + retrieved_on: '2025-12-24T01:40:34.829146+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-L-TCL.yaml b/data/custodian/JP-11-TOD-L-TCL.yaml index 6a672e71f5..b5545cfb10 100644 --- a/data/custodian/JP-11-TOD-L-TCL.yaml +++ b/data/custodian/JP-11-TOD-L-TCL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOD-L-TCL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOD-L-TCL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOD-L-TCL ghcid_numeric: 733168471166724572 valid_from: '2025-12-06T23:38:42.589859+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Toda City Library @@ -253,3 +254,30 @@ location: postal_code: 335-0021 street_address: 1707 NIIZO, Toda Shi, Saitama Ken, 335-0021 normalization_timestamp: '2025-12-09T11:32:17.421351+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:40:41.132028+00:00' + source_url: https://library.toda.saitama.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://library.toda.saitama.jp/img/common/logo.png + source_url: https://library.toda.saitama.jp + css_selector: '#wrap > header > h1 > a > img' + retrieved_on: '2025-12-24T01:40:41.132028+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市立図書館 + - claim_type: favicon_url + claim_value: https://library.toda.saitama.jp/img/apple-touch-icon.png + source_url: https://library.toda.saitama.jp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:40:41.132028+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-L-TLK.yaml b/data/custodian/JP-11-TOD-L-TLK.yaml index 04261e54e3..378c6c4529 100644 --- a/data/custodian/JP-11-TOD-L-TLK.yaml +++ b/data/custodian/JP-11-TOD-L-TLK.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: https://library.toda.saitama.jp/annaizu/annaizu05.htm wikidata_official_website: https://library.toda.saitama.jp/annaizu/annaizu05.htm +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:40:48.532630+00:00' + source_url: https://library.toda.saitama.jp/annaizu/annaizu05.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://library.toda.saitama.jp/img/common/logo.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu05.html + css_selector: '#wrap > header > h1 > a > img' + retrieved_on: '2025-12-24T01:40:48.532630+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市立図書館 + - claim_type: favicon_url + claim_value: https://library.toda.saitama.jp/img/apple-touch-icon.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu05.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:40:48.532630+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-L-TLM.yaml b/data/custodian/JP-11-TOD-L-TLM.yaml index f650c7311f..871d164e77 100644 --- a/data/custodian/JP-11-TOD-L-TLM.yaml +++ b/data/custodian/JP-11-TOD-L-TLM.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: https://library.toda.saitama.jp/annaizu/annaizu07.htm wikidata_official_website: https://library.toda.saitama.jp/annaizu/annaizu07.htm +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:40:55.395927+00:00' + source_url: https://library.toda.saitama.jp/annaizu/annaizu07.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://library.toda.saitama.jp/img/common/logo.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu07.html + css_selector: '#wrap > header > h1 > a > img' + retrieved_on: '2025-12-24T01:40:55.395927+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市立図書館 + - claim_type: favicon_url + claim_value: https://library.toda.saitama.jp/img/apple-touch-icon.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu07.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:40:55.395927+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-L-TLS-todashiritsu_library_shimotodaminamibunshitsu.yaml b/data/custodian/JP-11-TOD-L-TLS-todashiritsu_library_shimotodaminamibunshitsu.yaml index 75895e7986..a53bb4946c 100644 --- a/data/custodian/JP-11-TOD-L-TLS-todashiritsu_library_shimotodaminamibunshitsu.yaml +++ b/data/custodian/JP-11-TOD-L-TLS-todashiritsu_library_shimotodaminamibunshitsu.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: https://library.toda.saitama.jp/annaizu/annaizu08.htm wikidata_official_website: https://library.toda.saitama.jp/annaizu/annaizu08.htm +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:41:02.916307+00:00' + source_url: https://library.toda.saitama.jp/annaizu/annaizu08.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://library.toda.saitama.jp/img/common/logo.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu08.html + css_selector: '#wrap > header > h1 > a > img' + retrieved_on: '2025-12-24T01:41:02.916307+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市立図書館 + - claim_type: favicon_url + claim_value: https://library.toda.saitama.jp/img/apple-touch-icon.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu08.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:41:02.916307+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-L-TLS.yaml b/data/custodian/JP-11-TOD-L-TLS.yaml index b11a64aba8..09221bdfb0 100644 --- a/data/custodian/JP-11-TOD-L-TLS.yaml +++ b/data/custodian/JP-11-TOD-L-TLS.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: https://library.toda.saitama.jp/annaizu/annaizu06.htm wikidata_official_website: https://library.toda.saitama.jp/annaizu/annaizu06.htm +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:41:09.876349+00:00' + source_url: https://library.toda.saitama.jp/annaizu/annaizu06.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://library.toda.saitama.jp/img/common/logo.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu06.html + css_selector: '#wrap > header > h1 > a > img' + retrieved_on: '2025-12-24T01:41:09.876349+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市立図書館 + - claim_type: favicon_url + claim_value: https://library.toda.saitama.jp/img/apple-touch-icon.png + source_url: https://library.toda.saitama.jp/annaizu/annaizu06.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T01:41:09.876349+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-M-TCM.yaml b/data/custodian/JP-11-TOD-M-TCM.yaml index 1eb2f6cab1..f6cb77b468 100644 --- a/data/custodian/JP-11-TOD-M-TCM.yaml +++ b/data/custodian/JP-11-TOD-M-TCM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOD-M-TCM - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOD-M-TCM valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOD-M-TCM ghcid_numeric: 4157221002030016126 valid_from: '2025-12-06T23:38:32.515849+00:00' @@ -108,8 +109,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TODA CITY MUSEUM @@ -262,3 +263,36 @@ location: postal_code: 335-0021 street_address: NIIZO, Toda Shi, Saitama Ken, 335-0021 normalization_timestamp: '2025-12-09T11:32:04.569616+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:41:17.501143+00:00' + source_url: https://www.city.toda.saitama.jp/soshiki/377 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.toda.saitama.jp/img/common/logo.svg + source_url: https://www.city.toda.saitama.jp/soshiki/377 + css_selector: '#logo > p > a > img' + retrieved_on: '2025-12-24T01:41:17.501143+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市 TodaCity + - claim_type: favicon_url + claim_value: https://www.city.toda.saitama.jp/apple-touch-icon.png + source_url: https://www.city.toda.saitama.jp/soshiki/377 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:41:17.501143+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.toda.saitama.jp/ogp.png + source_url: https://www.city.toda.saitama.jp/soshiki/377 + css_selector: '[document] > html > head > meta:nth-of-type(5)' + retrieved_on: '2025-12-24T01:41:17.501143+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-TOD-M-TKHSSGC.yaml b/data/custodian/JP-11-TOD-M-TKHSSGC.yaml index 4b66a12421..6e4e5918b6 100644 --- a/data/custodian/JP-11-TOD-M-TKHSSGC.yaml +++ b/data/custodian/JP-11-TOD-M-TKHSSGC.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOD-M-TKHSSGC - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOD-M-TKHSSGC valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOD-M-TKHSSGC ghcid_numeric: 2973301787012383302 valid_from: '2025-12-06T23:38:32.519199+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: TODASHIRITSU KYODO HAKUBUTSUKAN SAIKO SHIZEN GAKUSHU CENTER @@ -151,3 +152,36 @@ location: geonames_id: 2127794 geonames_name: Toda feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:41:24.725424+00:00' + source_url: https://www.city.toda.saitama.jp/soshiki/378 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.toda.saitama.jp/img/common/logo.svg + source_url: https://www.city.toda.saitama.jp/soshiki/378 + css_selector: '#logo > p > a > img' + retrieved_on: '2025-12-24T01:41:24.725424+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 戸田市 TodaCity + - claim_type: favicon_url + claim_value: https://www.city.toda.saitama.jp/apple-touch-icon.png + source_url: https://www.city.toda.saitama.jp/soshiki/378 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T01:41:24.725424+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.toda.saitama.jp/ogp.png + source_url: https://www.city.toda.saitama.jp/soshiki/378 + css_selector: '[document] > html > head > meta:nth-of-type(5)' + retrieved_on: '2025-12-24T01:41:24.725424+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-TOK-L-NLT.yaml b/data/custodian/JP-11-TOK-L-NLT.yaml index b525017eb5..ddf1a7f0f2 100644 --- a/data/custodian/JP-11-TOK-L-NLT.yaml +++ b/data/custodian/JP-11-TOK-L-NLT.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOK-L-NLT - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOK-L-NLT valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOK-L-NLT ghcid_numeric: 9692518509007281297 valid_from: '2025-12-06T23:38:54.686052+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: NIHONDAIGAKUGEIJUTSUGAKUBU Library (TOKOROZAWA) @@ -204,3 +205,28 @@ location: geonames_id: 1850181 geonames_name: Tokorozawa feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:41:46.435157+00:00' + source_url: http://www.art.nihon-u.ac.jp/library/use.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.art.nihon-u.ac.jp/img-com/favicon.ico + source_url: http://www.art.nihon-u.ac.jp/library/use.html + css_selector: '[document] > html > head > link:nth-of-type(17)' + retrieved_on: '2025-12-24T01:41:46.435157+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.art.nihon-u.ac.jp/upload/logo.png + source_url: http://www.art.nihon-u.ac.jp/library/use.html + css_selector: '[document] > html > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-24T01:41:46.435157+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-NUCL.yaml b/data/custodian/JP-11-TOK-L-NUCL.yaml index 5da27218f9..fade38767e 100644 --- a/data/custodian/JP-11-TOK-L-NUCL.yaml +++ b/data/custodian/JP-11-TOK-L-NUCL.yaml @@ -41,13 +41,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOK-L-NUCL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOK-L-NUCL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOK-L-NUCL ghcid_numeric: 17158232313807929930 valid_from: '2025-12-06T23:38:54.670394+00:00' @@ -106,8 +107,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Nihon University Central Library @@ -224,3 +225,28 @@ location: geonames_id: 1850181 geonames_name: Tokorozawa feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:41:53.109319+00:00' + source_url: https://www.nihon-u.ac.jp/facilities/library/guide + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nihon-u.ac.jp/assets/img/favicon.ico + source_url: https://www.nihon-u.ac.jp/facilities/library/guide + css_selector: '[document] > html.js_ajaxload.menu_active > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T01:41:53.109319+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 32x32 + - claim_type: og_image_url + claim_value: https://www.nihon-u.ac.jp/assets/img/ogp.png + source_url: https://www.nihon-u.ac.jp/facilities/library/guide + css_selector: '[document] > html.js_ajaxload.menu_active > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-24T01:41:53.109319+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-SHFFIC.yaml b/data/custodian/JP-11-TOK-L-SHFFIC.yaml index 904274a173..717ee8df10 100644 --- a/data/custodian/JP-11-TOK-L-SHFFIC.yaml +++ b/data/custodian/JP-11-TOK-L-SHFFIC.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOK-L-SHFFIC - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOK-L-SHFFIC valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOK-L-SHFFIC ghcid_numeric: 9530617663061379712 valid_from: '2025-12-06T23:38:59.852654+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SAYAMA HILL FLORA & FAUNA INTERACTION CENTER @@ -151,3 +152,36 @@ location: geonames_id: 1850181 geonames_name: Tokorozawa feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:42:04.951514+00:00' + source_url: http://www.ikifure.info + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://ikifure.info/info/wp-content/themes/ikifure/img/logo1.jpg + source_url: http://www.ikifure.info + css_selector: '#header > div.header-inner > div.site:nth-of-type(2) > a > img' + retrieved_on: '2025-12-24T01:42:04.951514+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 狭山丘陵いきものふれあいの里センター + - claim_type: favicon_url + claim_value: https://ikifure.info/info/wp-content/uploads/2016/09/logo-1-1.jpg + source_url: http://www.ikifure.info + css_selector: '[document] > html.fa-events-icons-ready > head > link:nth-of-type(16)' + retrieved_on: '2025-12-24T01:42:04.951514+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 32x32 + - claim_type: og_image_url + claim_value: https://ikifure.info/info/wp-content/themes/ikifure/picnic-top.jpg + source_url: http://www.ikifure.info + css_selector: '[document] > html.fa-events-icons-ready > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T01:42:04.951514+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TL.yaml b/data/custodian/JP-11-TOK-L-TL.yaml index 372eb409b9..9d729ce5f1 100644 --- a/data/custodian/JP-11-TOK-L-TL.yaml +++ b/data/custodian/JP-11-TOK-L-TL.yaml @@ -214,3 +214,22 @@ wikidata_enrichment: commons_category: Tokorozawa City Library Main Building image: Tokorozawa City Library 20090722.jpg wikidata_image: Tokorozawa City Library 20090722.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:42:16.335021+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:42:16.335021+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TLA.yaml b/data/custodian/JP-11-TOK-L-TLA.yaml index ef8c3a7e0b..9319f55314 100644 --- a/data/custodian/JP-11-TOK-L-TLA.yaml +++ b/data/custodian/JP-11-TOK-L-TLA.yaml @@ -215,3 +215,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.tokorozawa-library.jp/contents/map/map_06.html wikidata_official_website: https://www.tokorozawa-library.jp/contents/map/map_06.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:42:23.667251+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:42:23.667251+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TLS-tokorozawashiritsutokorozawa_library_shintokorozaw.yaml b/data/custodian/JP-11-TOK-L-TLS-tokorozawashiritsutokorozawa_library_shintokorozaw.yaml index 27b8b2e6f8..12e4247525 100644 --- a/data/custodian/JP-11-TOK-L-TLS-tokorozawashiritsutokorozawa_library_shintokorozaw.yaml +++ b/data/custodian/JP-11-TOK-L-TLS-tokorozawashiritsutokorozawa_library_shintokorozaw.yaml @@ -215,3 +215,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.tokorozawa-library.jp/contents/map/map_08.html wikidata_official_website: https://www.tokorozawa-library.jp/contents/map/map_08.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:42:32.155087+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:42:32.155087+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TLS.yaml b/data/custodian/JP-11-TOK-L-TLS.yaml index 7c5553dd84..9b23f59ee8 100644 --- a/data/custodian/JP-11-TOK-L-TLS.yaml +++ b/data/custodian/JP-11-TOK-L-TLS.yaml @@ -215,3 +215,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.tokorozawa-library.jp/contents/map/map_04.html wikidata_official_website: https://www.tokorozawa-library.jp/contents/map/map_04.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T01:42:39.690278+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T01:42:39.690278+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tomiokabunkan.yaml b/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tomiokabunkan.yaml index 74cfac4783..f8ba5ed433 100644 --- a/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tomiokabunkan.yaml +++ b/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tomiokabunkan.yaml @@ -215,3 +215,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.tokorozawa-library.jp/contents/map/map_05.html wikidata_official_website: https://www.tokorozawa-library.jp/contents/map/map_05.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:36.117396+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:33:36.117396+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tsubakiminebu.yaml b/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tsubakiminebu.yaml index 8f43399dbe..fb5e84ab08 100644 --- a/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tsubakiminebu.yaml +++ b/data/custodian/JP-11-TOK-L-TLT-tokorozawashiritsutokorozawa_library_tsubakiminebu.yaml @@ -215,3 +215,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.tokorozawa-library.jp/contents/map/map_03.html wikidata_official_website: https://www.tokorozawa-library.jp/contents/map/map_03.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:46.177164+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:33:46.177164+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TLT.yaml b/data/custodian/JP-11-TOK-L-TLT.yaml index dc228d2efc..d051176f9e 100644 --- a/data/custodian/JP-11-TOK-L-TLT.yaml +++ b/data/custodian/JP-11-TOK-L-TLT.yaml @@ -211,3 +211,22 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.tokorozawa.saitama.jp/lib_map/map_02.html wikidata_official_website: http://lib.city.tokorozawa.saitama.jp/lib_map/map_02.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:33:56.365474+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:33:56.365474+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOK-L-TLY.yaml b/data/custodian/JP-11-TOK-L-TLY.yaml index ccf7ab6c1e..3a39f78f39 100644 --- a/data/custodian/JP-11-TOK-L-TLY.yaml +++ b/data/custodian/JP-11-TOK-L-TLY.yaml @@ -215,3 +215,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.tokorozawa-library.jp/contents/map/map_07.html wikidata_official_website: https://www.tokorozawa-library.jp/contents/map/map_07.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:04.294188+00:00' + source_url: https://www.tokorozawa-library.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokorozawa-library.jp/img/favicon/favicon.ico + source_url: https://www.tokorozawa-library.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:34:04.294188+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TOM-M-FSTHFM.yaml b/data/custodian/JP-11-TOM-M-FSTHFM.yaml index 6d809d0beb..08c4e407a7 100644 --- a/data/custodian/JP-11-TOM-M-FSTHFM.yaml +++ b/data/custodian/JP-11-TOM-M-FSTHFM.yaml @@ -434,3 +434,20 @@ location: geonames_id: 1850066 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:32.210555+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:33.265492+00:00' + source_url: http://www.tomo-rekimin.org + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://tomo-rekimin.org/site/wp-content/uploads/2024/02/toukan-150x150.png + source_url: http://www.tomo-rekimin.org + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-24T10:34:33.265492+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-11-TOS-L-KL.yaml b/data/custodian/JP-11-TOS-L-KL.yaml index 9b300a3de3..a6f35486a6 100644 --- a/data/custodian/JP-11-TOS-L-KL.yaml +++ b/data/custodian/JP-11-TOS-L-KL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-TOS-L-KL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-TOS-L-KL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-TOS-L-KL ghcid_numeric: 6706775949435070972 valid_from: '2025-12-06T23:38:57.328543+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KYUSHURYUKOKUTANKIDAIGAKU Library @@ -206,3 +207,28 @@ location: geonames_id: 1849904 geonames_name: Tosu feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:41.942081+00:00' + source_url: http://www.k-ryukoku.ac.jp/institution/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.k-ryukoku.ac.jp/apple-touch-icon.png + source_url: http://www.k-ryukoku.ac.jp/institution/library + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T10:34:41.942081+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.k-ryukoku.ac.jp/image/ogp.jpg + source_url: http://www.k-ryukoku.ac.jp/institution/library + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-24T10:34:41.942081+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-TOS-L-TPL.yaml b/data/custodian/JP-11-TOS-L-TPL.yaml index 143007303e..17775d0a4c 100644 --- a/data/custodian/JP-11-TOS-L-TPL.yaml +++ b/data/custodian/JP-11-TOS-L-TPL.yaml @@ -210,3 +210,22 @@ wikidata_enrichment: wikidata_media: image: Tosu City Library 2024.jpg wikidata_image: Tosu City Library 2024.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:34:53.104325+00:00' + source_url: http://www.city.tosu.lg.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.tosu.lg.jp/apple-touch-icon.png + source_url: http://www.city.tosu.lg.jp/library + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T10:34:53.104325+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-11-TSU-L-TL.yaml b/data/custodian/JP-11-TSU-L-TL.yaml index b8c0573cbf..46d86b6cf7 100644 --- a/data/custodian/JP-11-TSU-L-TL.yaml +++ b/data/custodian/JP-11-TSU-L-TL.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html wikidata_official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:00.452801+00:00' + source_url: https://www.tsurugashima-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/images/logo.png + source_url: https://www.tsurugashima-lib.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T10:35:00.452801+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鶴ヶ島市立図書館 + - claim_type: favicon_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/favicon.ico + source_url: https://www.tsurugashima-lib.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T10:35:00.452801+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TSU-L-TLF.yaml b/data/custodian/JP-11-TSU-L-TLF.yaml index c5cc1df9ed..6ecd51558c 100644 --- a/data/custodian/JP-11-TSU-L-TLF.yaml +++ b/data/custodian/JP-11-TSU-L-TLF.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html wikidata_official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:07.886868+00:00' + source_url: https://www.tsurugashima-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/images/logo.png + source_url: https://www.tsurugashima-lib.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T10:35:07.886868+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鶴ヶ島市立図書館 + - claim_type: favicon_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/favicon.ico + source_url: https://www.tsurugashima-lib.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T10:35:07.886868+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TSU-L-TLH.yaml b/data/custodian/JP-11-TSU-L-TLH.yaml index 2373c4be63..e10bd05e90 100644 --- a/data/custodian/JP-11-TSU-L-TLH.yaml +++ b/data/custodian/JP-11-TSU-L-TLH.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html wikidata_official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:15.013483+00:00' + source_url: https://www.tsurugashima-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/images/logo.png + source_url: https://www.tsurugashima-lib.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T10:35:15.013483+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鶴ヶ島市立図書館 + - claim_type: favicon_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/favicon.ico + source_url: https://www.tsurugashima-lib.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T10:35:15.013483+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TSU-L-TLK.yaml b/data/custodian/JP-11-TSU-L-TLK.yaml index 0567c348f5..86cdd1189b 100644 --- a/data/custodian/JP-11-TSU-L-TLK.yaml +++ b/data/custodian/JP-11-TSU-L-TLK.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html wikidata_official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:22.323671+00:00' + source_url: https://www.tsurugashima-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/images/logo.png + source_url: https://www.tsurugashima-lib.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T10:35:22.323671+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鶴ヶ島市立図書館 + - claim_type: favicon_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/favicon.ico + source_url: https://www.tsurugashima-lib.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T10:35:22.323671+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TSU-L-TLM.yaml b/data/custodian/JP-11-TSU-L-TLM.yaml index a9d9f43290..70ae29586d 100644 --- a/data/custodian/JP-11-TSU-L-TLM.yaml +++ b/data/custodian/JP-11-TSU-L-TLM.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html wikidata_official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:29.771504+00:00' + source_url: https://www.tsurugashima-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/images/logo.png + source_url: https://www.tsurugashima-lib.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T10:35:29.771504+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鶴ヶ島市立図書館 + - claim_type: favicon_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/favicon.ico + source_url: https://www.tsurugashima-lib.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T10:35:29.771504+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TSU-L-TLN.yaml b/data/custodian/JP-11-TSU-L-TLN.yaml index 9fbac3b313..a6295989d6 100644 --- a/data/custodian/JP-11-TSU-L-TLN.yaml +++ b/data/custodian/JP-11-TSU-L-TLN.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html wikidata_official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:37.323232+00:00' + source_url: https://www.tsurugashima-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/images/logo.png + source_url: https://www.tsurugashima-lib.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T10:35:37.323232+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鶴ヶ島市立図書館 + - claim_type: favicon_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/favicon.ico + source_url: https://www.tsurugashima-lib.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T10:35:37.323232+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-TSU-L-TLO.yaml b/data/custodian/JP-11-TSU-L-TLO.yaml index d9e4433d84..0674780821 100644 --- a/data/custodian/JP-11-TSU-L-TLO.yaml +++ b/data/custodian/JP-11-TSU-L-TLO.yaml @@ -200,3 +200,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html wikidata_official_website: http://www.city.tsurugashima.lg.jp/page/dir000018.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:44.517445+00:00' + source_url: https://www.tsurugashima-lib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/images/logo.png + source_url: https://www.tsurugashima-lib.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T10:35:44.517445+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鶴ヶ島市立図書館 + - claim_type: favicon_url + claim_value: https://www.tsurugashima-lib.jp/themes/lib_theme/favicon.ico + source_url: https://www.tsurugashima-lib.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T10:35:44.517445+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-URE-L-UL-ureshinoshiureshino_library.yaml b/data/custodian/JP-11-URE-L-UL-ureshinoshiureshino_library.yaml index 5b6c6aef27..c03c0ee2d7 100644 --- a/data/custodian/JP-11-URE-L-UL-ureshinoshiureshino_library.yaml +++ b/data/custodian/JP-11-URE-L-UL-ureshinoshiureshino_library.yaml @@ -213,3 +213,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.city.ureshino.lg.jp/toshokan.html wikidata_official_website: https://www.city.ureshino.lg.jp/toshokan.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:50.350288+00:00' + source_url: https://www.city.ureshino.lg.jp/toshokan.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.ureshino.lg.jp/_public/favicon.ico?dummy=1567657558 + source_url: https://www.city.ureshino.lg.jp/toshokan.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:35:50.350288+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-URE-L-UL.yaml b/data/custodian/JP-11-URE-L-UL.yaml index dd036532ec..4701faeae2 100644 --- a/data/custodian/JP-11-URE-L-UL.yaml +++ b/data/custodian/JP-11-URE-L-UL.yaml @@ -211,3 +211,22 @@ wikidata_enrichment: wikidata_media: image: Shiota Library and Museum of History and Folklore.jpg wikidata_image: Shiota Library and Museum of History and Folklore.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:35:56.454559+00:00' + source_url: https://www.city.ureshino.lg.jp/toshokan.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.ureshino.lg.jp/_public/favicon.ico?dummy=1567657558 + source_url: https://www.city.ureshino.lg.jp/toshokan.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:35:56.454559+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-URE-M-SYM.yaml b/data/custodian/JP-11-URE-M-SYM.yaml index 73f9ae55a7..95f31eb3a4 100644 --- a/data/custodian/JP-11-URE-M-SYM.yaml +++ b/data/custodian/JP-11-URE-M-SYM.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-URE-M-SYM - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-URE-M-SYM valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-URE-M-SYM ghcid_numeric: 10025814110241076504 valid_from: '2025-12-06T23:38:40.053844+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: SHIDA YAKINOSATO MUSEUM @@ -221,3 +222,28 @@ location: geonames_id: 11013738 geonames_name: Ureshino feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:03.141540+00:00' + source_url: https://shidayakinosato.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://cdn.goope.jp/82306/191227190039-5e05d64750dd0.ico + source_url: https://shidayakinosato.com + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:36:03.141540+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://cdn.goope.jp/82306/190405122316rs3h.png + source_url: https://shidayakinosato.com + css_selector: '[document] > html > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-24T10:36:03.141540+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-11-WAK-L-RL.yaml b/data/custodian/JP-11-WAK-L-RL.yaml index 0abf8540f8..d98fedda5b 100644 --- a/data/custodian/JP-11-WAK-L-RL.yaml +++ b/data/custodian/JP-11-WAK-L-RL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-WAK-L-RL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-WAK-L-RL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-WAK-L-RL ghcid_numeric: 5065595170072502231 valid_from: '2025-12-06T23:38:58.011377+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: RIKEN library @@ -151,3 +152,22 @@ location: geonames_id: 1907300 geonames_name: Wako feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:21.821162+00:00' + source_url: http://www.riken.jp/outreach/resources/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.riken.jp/apple-touch-icon.png + source_url: http://www.riken.jp/outreach/resources/library + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T10:36:21.821162+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-WAK-L-WCLSBL.yaml b/data/custodian/JP-11-WAK-L-WCLSBL.yaml index 5be3922a63..c83594f2e4 100644 --- a/data/custodian/JP-11-WAK-L-WCLSBL.yaml +++ b/data/custodian/JP-11-WAK-L-WCLSBL.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-WAK-L-WCLSBL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-WAK-L-WCLSBL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-WAK-L-WCLSBL ghcid_numeric: 14244861528036080035 valid_from: '2025-12-06T23:38:52.816557+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Wako City Library, Shimoniikura Branch Library @@ -151,3 +152,23 @@ location: geonames_id: 1907300 geonames_name: Wako feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:36.688627+00:00' + source_url: https://www.wakolib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.wakolib.jp/top/images/logo.png + source_url: https://www.wakolib.jp + css_selector: '#TOP > header > div.header_outer > div.wrap.header_wrap > div.title + > a > img.title__logo.title__logo--def' + retrieved_on: '2025-12-24T10:36:36.688627+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 和光市図書館 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-WAK-L-WPL.yaml b/data/custodian/JP-11-WAK-L-WPL.yaml index 7277c3b77a..2230d94f0e 100644 --- a/data/custodian/JP-11-WAK-L-WPL.yaml +++ b/data/custodian/JP-11-WAK-L-WPL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-WAK-L-WPL - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-WAK-L-WPL valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-WAK-L-WPL ghcid_numeric: 4493325600586452756 valid_from: '2025-12-06T23:38:42.623151+00:00' @@ -103,8 +104,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Wako Public Library @@ -198,8 +199,9 @@ wikidata_enrichment: instance_of: &id004 - id: Q7075 label: library - description: institution charged with the care of a collection of literary, musical, artistic, or reference materials, - such as books, manuscripts, recordings, or films + description: institution charged with the care of a collection of literary, + musical, artistic, or reference materials, such as books, manuscripts, recordings, + or films wikidata_instance_of: *id004 wikidata_location: coordinates: &id007 @@ -237,3 +239,23 @@ location: postal_code: 351-0114 street_address: 31-1 HONCHO, Wako Shi, Saitama Ken, 351-0114 normalization_timestamp: '2025-12-09T11:32:34.207277+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:45.311097+00:00' + source_url: https://www.wakolib.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.wakolib.jp/top/images/logo.png + source_url: https://www.wakolib.jp + css_selector: '#TOP > header > div.header_outer > div.wrap.header_wrap > div.title + > a > img.title__logo.title__logo--def' + retrieved_on: '2025-12-24T10:36:45.311097+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 和光市図書館 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-WAK-M-NTCTM.yaml b/data/custodian/JP-11-WAK-M-NTCTM.yaml index 24515bb627..6e32ee2e2a 100644 --- a/data/custodian/JP-11-WAK-M-NTCTM.yaml +++ b/data/custodian/JP-11-WAK-M-NTCTM.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-WAK-M-NTCTM - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-WAK-M-NTCTM valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-WAK-M-NTCTM ghcid_numeric: 7575678626839888028 valid_from: '2025-12-06T23:38:32.541992+00:00' @@ -102,8 +103,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: NATIONAL TAX COLLEGE TAX MUSEUM @@ -213,3 +214,22 @@ location: geonames_id: 1907300 geonames_name: Wako feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:36:50.602999+00:00' + source_url: https://www.nta.go.jp/about/organization/ntc/sozei/index.htm + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.nta.go.jp/template/img/template/logo.png + source_url: https://www.nta.go.jp/about/organization/ntc/sozei/index.htm + css_selector: '#logo > h1 > img' + retrieved_on: '2025-12-24T10:36:50.602999+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 国税庁 National Tax Agency 法人番号7000012050002 + summary: + total_claims: 1 + has_primary_logo: true + has_favicon: false + has_og_image: false + favicon_count: 0 diff --git a/data/custodian/JP-11-WAR-L-WCL.yaml b/data/custodian/JP-11-WAR-L-WCL.yaml index 2409b797d7..c437feb495 100644 --- a/data/custodian/JP-11-WAR-L-WCL.yaml +++ b/data/custodian/JP-11-WAR-L-WCL.yaml @@ -241,3 +241,28 @@ location: geonames_id: 11612339 geonames_name: Warabi feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:00.638060+00:00' + source_url: https://www.city.warabi.saitama.jp/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.warabi.saitama.jp/library + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:37:00.638060+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.warabi.saitama.jp/library + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:37:00.638060+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-WAR-L-WLK.yaml b/data/custodian/JP-11-WAR-L-WLK.yaml index 7565e39191..9e2862fac9 100644 --- a/data/custodian/JP-11-WAR-L-WLK.yaml +++ b/data/custodian/JP-11-WAR-L-WLK.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.warabi.saitama.jp/hp/page000007000/hpg000006965.htm wikidata_official_website: http://www.city.warabi.saitama.jp/hp/page000007000/hpg000006965.htm +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:09.359915+00:00' + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003610.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003610.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:37:09.359915+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003610.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:37:09.359915+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-WAR-L-WLN.yaml b/data/custodian/JP-11-WAR-L-WLN.yaml index 68f39214d1..9a673a64d0 100644 --- a/data/custodian/JP-11-WAR-L-WLN.yaml +++ b/data/custodian/JP-11-WAR-L-WLN.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.warabi.saitama.jp/hp/page000007000/hpg000006966.htm wikidata_official_website: http://www.city.warabi.saitama.jp/hp/page000007000/hpg000006966.htm +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:17.343275+00:00' + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003611.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003611.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:37:17.343275+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003611.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:37:17.343275+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-WAR-L-WLT.yaml b/data/custodian/JP-11-WAR-L-WLT.yaml index 5729c54bbf..ad59f2dd5b 100644 --- a/data/custodian/JP-11-WAR-L-WLT.yaml +++ b/data/custodian/JP-11-WAR-L-WLT.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.warabi.saitama.jp/hp/page000007000/hpg000006964.htm wikidata_official_website: http://www.city.warabi.saitama.jp/hp/page000007000/hpg000006964.htm +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:37:27.058358+00:00' + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003609.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003609.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:37:27.058358+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.warabi.saitama.jp/shisetsu/gakko/1003609.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:37:27.058358+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-WAR-M-WMM.yaml b/data/custodian/JP-11-WAR-M-WMM.yaml index b807c448cb..0169bed8cf 100644 --- a/data/custodian/JP-11-WAR-M-WMM.yaml +++ b/data/custodian/JP-11-WAR-M-WMM.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-WAR-M-WMM - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-WAR-M-WMM valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-WAR-M-WMM ghcid_numeric: 4152117785056502622 valid_from: '2025-12-06T23:38:32.512655+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: WARABI MUNICIPAL MUSEUM @@ -151,3 +152,28 @@ location: geonames_id: 11612339 geonames_name: Warabi feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:08.006936+00:00' + source_url: https://www.city.warabi.saitama.jp/shogaigakushu/bunka/shiryokan/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.warabi.saitama.jp/shogaigakushu/bunka/shiryokan/index.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-24T10:38:08.006936+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.warabi.saitama.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.warabi.saitama.jp/shogaigakushu/bunka/shiryokan/index.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:38:08.006936+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-YAS-A-YS-yashioshiritsu_shiryoukan.yaml b/data/custodian/JP-11-YAS-A-YS-yashioshiritsu_shiryoukan.yaml index 0424f9eacb..170d7fec09 100644 --- a/data/custodian/JP-11-YAS-A-YS-yashioshiritsu_shiryoukan.yaml +++ b/data/custodian/JP-11-YAS-A-YS-yashioshiritsu_shiryoukan.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-11-YAS-A-YS-yashioshiritsu_shiryoukan - valid_from: "2025-12-10T09:43:56Z" + valid_from: '2025-12-10T09:43:56Z' valid_to: null - reason: "Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per ISO 3166-2:JP" + reason: Corrected region code from JP-SA (abbreviation) to JP-11 (Saitama) per + ISO 3166-2:JP - ghcid: JP-SA-YAS-A-YS-yashioshiritsu_shiryoukan valid_from: null - valid_to: "2025-12-10T09:43:56Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:56Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-SA-YAS-A-YS-yashioshiritsu_shiryoukan ghcid_numeric: 18422948106411132518 valid_from: '2025-12-06T23:38:29.564892+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: YASHIOSHIRITSU SHIRYOUKAN @@ -151,3 +152,37 @@ location: geonames_id: 1848499 geonames_name: Yashio feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:16.814653+00:00' + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.yashio.lg.jp/images/logo.png + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '#base > div.base.top-page > div.baseall > header.header > div.base-width + > div.header-wp > h1.header-heading > a.header-heading-link > img' + retrieved_on: '2025-12-24T10:38:16.814653+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 八潮市・YashioCity + - claim_type: favicon_url + claim_value: http://www.city.yashio.lg.jp/images/apple-touch-icon.png + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T10:38:16.814653+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yashio.lg.jp/images/ogp.png + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:38:16.814653+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-YAS-A-YS.yaml b/data/custodian/JP-11-YAS-A-YS.yaml index d152d5de09..d14c2dc9cd 100644 --- a/data/custodian/JP-11-YAS-A-YS.yaml +++ b/data/custodian/JP-11-YAS-A-YS.yaml @@ -152,3 +152,37 @@ location: geonames_id: 1848499 geonames_name: Yashio feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:24.461911+00:00' + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.yashio.lg.jp/images/logo.png + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '#base > div.base.top-page > div.baseall > header.header > div.base-width + > div.header-wp > h1.header-heading > a.header-heading-link > img' + retrieved_on: '2025-12-24T10:38:24.461911+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 八潮市・YashioCity + - claim_type: favicon_url + claim_value: http://www.city.yashio.lg.jp/images/apple-touch-icon.png + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T10:38:24.461911+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yashio.lg.jp/images/ogp.png + source_url: http://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:38:24.461911+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-YAS-M-YCMM.yaml b/data/custodian/JP-11-YAS-M-YCMM.yaml index 4b2b1aa681..f567217601 100644 --- a/data/custodian/JP-11-YAS-M-YCMM.yaml +++ b/data/custodian/JP-11-YAS-M-YCMM.yaml @@ -430,3 +430,37 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/HUZdzXw2Ag8/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:47.588360+00:00' + source_url: https://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.yashio.lg.jp/images/logo.png + source_url: https://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '#base > div.base.top-page > div.baseall > header.header > div.base-width + > div.header-wp > h1.header-heading > a.header-heading-link > img' + retrieved_on: '2025-12-24T10:38:47.588360+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 八潮市・YashioCity + - claim_type: favicon_url + claim_value: https://www.city.yashio.lg.jp/images/apple-touch-icon.png + source_url: https://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T10:38:47.588360+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yashio.lg.jp/images/ogp.png + source_url: https://www.city.yashio.lg.jp/kurashi/shisetsuguide/shiryokan/index.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-24T10:38:47.588360+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-11-YOS-L-Y-yoshikawashiasahichikusentatoshoshitsu.yaml b/data/custodian/JP-11-YOS-L-Y-yoshikawashiasahichikusentatoshoshitsu.yaml index e6aa2f7a37..2134525421 100644 --- a/data/custodian/JP-11-YOS-L-Y-yoshikawashiasahichikusentatoshoshitsu.yaml +++ b/data/custodian/JP-11-YOS-L-Y-yoshikawashiasahichikusentatoshoshitsu.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.yoshikawa-oasis-tosho.info/library/ wikidata_official_website: http://www.yoshikawa-oasis-tosho.info/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-24T10:38:55.848615+00:00' + source_url: https://www.yoshikawa-oasis-tosho.info/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.yoshikawa-oasis-tosho.info/favicon.ico + source_url: https://www.yoshikawa-oasis-tosho.info/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T10:38:55.848615+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-YOS-L-Y-yoshikawashichuokominkantoshoshitsu.yaml b/data/custodian/JP-11-YOS-L-Y-yoshikawashichuokominkantoshoshitsu.yaml index c51c0c7b86..764b48912e 100644 --- a/data/custodian/JP-11-YOS-L-Y-yoshikawashichuokominkantoshoshitsu.yaml +++ b/data/custodian/JP-11-YOS-L-Y-yoshikawashichuokominkantoshoshitsu.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.yoshikawa-oasis-tosho.info/library/ wikidata_official_website: http://www.yoshikawa-oasis-tosho.info/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:27:34.725728+00:00' + source_url: https://www.yoshikawa-oasis-tosho.info/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.yoshikawa-oasis-tosho.info/favicon.ico + source_url: https://www.yoshikawa-oasis-tosho.info/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:27:34.725728+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-YOS-L-Y.yaml b/data/custodian/JP-11-YOS-L-Y.yaml index f6eccc8433..df48d60c05 100644 --- a/data/custodian/JP-11-YOS-L-Y.yaml +++ b/data/custodian/JP-11-YOS-L-Y.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.yoshikawa-oasis-tosho.info/library/ wikidata_official_website: http://www.yoshikawa-oasis-tosho.info/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:27:44.080984+00:00' + source_url: https://www.yoshikawa-oasis-tosho.info/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.yoshikawa-oasis-tosho.info/favicon.ico + source_url: https://www.yoshikawa-oasis-tosho.info/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:27:44.080984+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-11-YOS-L-YCL.yaml b/data/custodian/JP-11-YOS-L-YCL.yaml index 2fe10b7e02..560d5c069d 100644 --- a/data/custodian/JP-11-YOS-L-YCL.yaml +++ b/data/custodian/JP-11-YOS-L-YCL.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.yoshikawa-oasis-tosho.info/library/ wikidata_official_website: http://www.yoshikawa-oasis-tosho.info/library/ +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:27:55.185581+00:00' + source_url: https://www.yoshikawa-oasis-tosho.info/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.yoshikawa-oasis-tosho.info/favicon.ico + source_url: https://www.yoshikawa-oasis-tosho.info/library + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:27:55.185581+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-ABI-L-ACL.yaml b/data/custodian/JP-12-ABI-L-ACL.yaml index e025de33c8..239dd13bba 100644 --- a/data/custodian/JP-12-ABI-L-ACL.yaml +++ b/data/custodian/JP-12-ABI-L-ACL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-12-ABI-L-ACL - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO 3166-2:JP" + reason: Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO + 3166-2:JP - ghcid: JP-CH-ABI-L-ACL valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-CH-ABI-L-ACL ghcid_numeric: 17720456709233169366 valid_from: '2025-12-06T23:38:43.008792+00:00' @@ -232,3 +233,30 @@ location: postal_code: 270-1147 street_address: 26-4 WAKAMATSU, Abiko Shi, Chiba Ken, 270-1147 normalization_timestamp: '2025-12-09T10:55:27.100014+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:03.142208+00:00' + source_url: https://www.library.city.abiko.chiba.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.city.abiko.chiba.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.city.abiko.chiba.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T11:28:03.142208+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 我孫子市民図書館 + - claim_type: favicon_url + claim_value: https://www.library.city.abiko.chiba.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.city.abiko.chiba.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T11:28:03.142208+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-ABI-L-ACLFB.yaml b/data/custodian/JP-12-ABI-L-ACLFB.yaml index 45ed9bcec8..5718505b13 100644 --- a/data/custodian/JP-12-ABI-L-ACLFB.yaml +++ b/data/custodian/JP-12-ABI-L-ACLFB.yaml @@ -204,3 +204,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.abiko.chiba.jp/ wikidata_official_website: http://www.library.city.abiko.chiba.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:08.852120+00:00' + source_url: https://www.library.city.abiko.chiba.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.city.abiko.chiba.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.city.abiko.chiba.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T11:28:08.852120+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 我孫子市民図書館 + - claim_type: favicon_url + claim_value: https://www.library.city.abiko.chiba.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.city.abiko.chiba.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T11:28:08.852120+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-ABI-L-ACLKB.yaml b/data/custodian/JP-12-ABI-L-ACLKB.yaml index 6cb931691e..5b79cf0f14 100644 --- a/data/custodian/JP-12-ABI-L-ACLKB.yaml +++ b/data/custodian/JP-12-ABI-L-ACLKB.yaml @@ -204,3 +204,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.abiko.chiba.jp/ wikidata_official_website: http://www.library.city.abiko.chiba.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:14.495028+00:00' + source_url: https://www.library.city.abiko.chiba.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library.city.abiko.chiba.jp/themes/lib_theme/images/logo.png + source_url: https://www.library.city.abiko.chiba.jp + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-24T11:28:14.495028+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 我孫子市民図書館 + - claim_type: favicon_url + claim_value: https://www.library.city.abiko.chiba.jp/themes/lib_theme/favicon.ico + source_url: https://www.library.city.abiko.chiba.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-24T11:28:14.495028+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-ABI-L-CL.yaml b/data/custodian/JP-12-ABI-L-CL.yaml index 1ca60ed29e..b5f7e3ceb2 100644 --- a/data/custodian/JP-12-ABI-L-CL.yaml +++ b/data/custodian/JP-12-ABI-L-CL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-12-ABI-L-CL - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO 3166-2:JP" + reason: Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO + 3166-2:JP - ghcid: JP-CH-ABI-L-CL valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-CH-ABI-L-CL ghcid_numeric: 16396862059347377039 valid_from: '2025-12-06T23:38:54.746651+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: CHUOGAKUINDAIGAKU Library @@ -204,3 +205,28 @@ location: geonames_id: 2113164 geonames_name: Abiko feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:23.967183+00:00' + source_url: http://www.cgu.ac.jp/faculty/tabid/66/Default.aspx + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.cgu.ac.jp/apple-touch-icon.png + source_url: http://www.cgu.ac.jp/faculty/tabid/66/Default.aspx + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T11:28:23.967183+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.cgu.ac.jp/ogp.png + source_url: http://www.cgu.ac.jp/faculty/tabid/66/Default.aspx + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-24T11:28:23.967183+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-12-ABI-L-YIO.yaml b/data/custodian/JP-12-ABI-L-YIO.yaml index 6d837d142f..d98c97f642 100644 --- a/data/custodian/JP-12-ABI-L-YIO.yaml +++ b/data/custodian/JP-12-ABI-L-YIO.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-12-ABI-L-YIO - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO 3166-2:JP" + reason: Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO + 3166-2:JP - ghcid: JP-CH-ABI-L-YIO valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-CH-ABI-L-YIO ghcid_numeric: 7642129889605395749 valid_from: '2025-12-06T23:38:58.679200+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Yamashina Institute for Ornithology @@ -151,3 +152,22 @@ location: geonames_id: 2113164 geonames_name: Abiko feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:36.891502+00:00' + source_url: http://www.yamashina.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.yamashina.or.jp/hp/favicon.ico + source_url: http://www.yamashina.or.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:28:36.891502+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-ASA-L-APL.yaml b/data/custodian/JP-12-ASA-L-APL.yaml index 0a21a4ae6d..971c4d05be 100644 --- a/data/custodian/JP-12-ASA-L-APL.yaml +++ b/data/custodian/JP-12-ASA-L-APL.yaml @@ -204,3 +204,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library-asahi-chiba.jp/ wikidata_official_website: http://www.library-asahi-chiba.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:44.034042+00:00' + source_url: https://www.library-asahi-chiba.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library-asahi-chiba.jp/common/favicon-asahi.png + source_url: https://www.library-asahi-chiba.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:28:44.034042+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/png + favicon_sizes: 32x32 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-ASA-L-CPEL.yaml b/data/custodian/JP-12-ASA-L-CPEL.yaml index a75209bb99..6fbcd94ea4 100644 --- a/data/custodian/JP-12-ASA-L-CPEL.yaml +++ b/data/custodian/JP-12-ASA-L-CPEL.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-12-ASA-L-CPEL - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO 3166-2:JP" + reason: Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO + 3166-2:JP - ghcid: JP-CH-ASA-L-CPEL valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-CH-ASA-L-CPEL ghcid_numeric: 18238927871217540372 valid_from: '2025-12-06T23:38:42.757145+00:00' @@ -260,3 +261,22 @@ location: postal_code: 289-2521 street_address: 349 HA, Asahi Shi, Chiba Ken, 289-2521 normalization_timestamp: '2025-12-09T10:55:27.252333+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:28:57.073798+00:00' + source_url: https://www.library.pref.chiba.lg.jp/guide/east/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.pref.chiba.lg.jp/apple-touch-icon.png + source_url: https://www.library.pref.chiba.lg.jp/guide/east/index.html + css_selector: '[document] > html > head > link:nth-of-type(6)' + retrieved_on: '2025-12-24T11:28:57.073798+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 192x192 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-12-ASH-M-HCM.yaml b/data/custodian/JP-12-ASH-M-HCM.yaml index cc516068c9..7394612f6e 100644 --- a/data/custodian/JP-12-ASH-M-HCM.yaml +++ b/data/custodian/JP-12-ASH-M-HCM.yaml @@ -387,3 +387,22 @@ location: geonames_id: 2130612 feature_code: PPLA2 normalization_timestamp: '2025-12-09T06:53:32.257790+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:13.002280+00:00' + source_url: https://www.city.ashibetsu.hokkaido.jp/shigai/kinenkan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.ashibetsu.hokkaido.jp/img/favicon.ico + source_url: https://www.city.ashibetsu.hokkaido.jp/shigai/kinenkan + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:29:13.002280+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-ATS-M-HAOC.yaml b/data/custodian/JP-12-ATS-M-HAOC.yaml index 19c5104192..5af43c6bc4 100644 --- a/data/custodian/JP-12-ATS-M-HAOC.yaml +++ b/data/custodian/JP-12-ATS-M-HAOC.yaml @@ -435,3 +435,28 @@ location: geonames_id: 2130594 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:32.308211+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:23.002154+00:00' + source_url: http://www.domaibun.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.domaibun.or.jp/files/favicon/favicon.ico?cache=1766575757 + source_url: http://www.domaibun.or.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:29:23.002154+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.domaibun.or.jp/files/site_config/capture.jpg + source_url: http://www.domaibun.or.jp + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-24T11:29:23.002154+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-12-ATS-M-HVH.yaml b/data/custodian/JP-12-ATS-M-HVH.yaml index 630f3f0179..549133b0ae 100644 --- a/data/custodian/JP-12-ATS-M-HVH.yaml +++ b/data/custodian/JP-12-ATS-M-HVH.yaml @@ -487,3 +487,28 @@ youtube_enrichment: api_version: v3 error: 'Channel not found: UCOBaueaOr9vyp-lciPsrPTw' status: FAILED +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:31.203385+00:00' + source_url: https://www.kaitaku.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kaitaku.or.jp/assets/img/apple-touch-icon.png + source_url: https://www.kaitaku.or.jp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-24T11:29:31.203385+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.kaitaku.or.jp/assets/img/og-image.jpg + source_url: https://www.kaitaku.or.jp + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-24T11:29:31.203385+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-12-AWA-M-MMM.yaml b/data/custodian/JP-12-AWA-M-MMM.yaml index 4216735418..bbec2df384 100644 --- a/data/custodian/JP-12-AWA-M-MMM.yaml +++ b/data/custodian/JP-12-AWA-M-MMM.yaml @@ -258,3 +258,22 @@ location: postal_code: 299-1908 street_address: YOSHIHAMA, Awa Gun Kyonan Machi, Chiba Ken, 299-1908 normalization_timestamp: '2025-12-09T10:55:27.323365+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:40.491709+00:00' + source_url: https://www.town.kyonan.chiba.jp/site/hishikawamoronobukinenkan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.kyonan.chiba.jp/apple-touch-icon.png + source_url: https://www.town.kyonan.chiba.jp/site/hishikawamoronobukinenkan + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-24T11:29:40.491709+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-12-CHI-A-CPA-chiba_prefectural_archives.yaml b/data/custodian/JP-12-CHI-A-CPA-chiba_prefectural_archives.yaml index d9c9ea1257..d76f8a75be 100644 --- a/data/custodian/JP-12-CHI-A-CPA-chiba_prefectural_archives.yaml +++ b/data/custodian/JP-12-CHI-A-CPA-chiba_prefectural_archives.yaml @@ -224,3 +224,30 @@ wikidata_enrichment: image: ChibaPrefecturalArchives.jpg commons_category: Chiba Prefectural Archives wikidata_image: ChibaPrefecturalArchives.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:50.978976+00:00' + source_url: http://www.pref.chiba.lg.jp/bunshokan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.pref.chiba.lg.jp/shared/images/favicon/apple-touch-icon-precomposed.png + source_url: http://www.pref.chiba.lg.jp/bunshokan + css_selector: '[document] > html.wf-a-otf-ud-shin-go-pr6n-n3-active.wf-active + > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T11:29:50.978976+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.pref.chiba.lg.jp/shared/images/sns/logo.png + source_url: http://www.pref.chiba.lg.jp/bunshokan + css_selector: '[document] > html.wf-a-otf-ud-shin-go-pr6n-n3-active.wf-active + > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-24T11:29:50.978976+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-12-CHI-A-CPA.yaml b/data/custodian/JP-12-CHI-A-CPA.yaml index f5e368593a..5673f99746 100644 --- a/data/custodian/JP-12-CHI-A-CPA.yaml +++ b/data/custodian/JP-12-CHI-A-CPA.yaml @@ -224,3 +224,30 @@ wikidata_enrichment: image: ChibaPrefecturalArchives.jpg commons_category: Chiba Prefectural Archives wikidata_image: ChibaPrefecturalArchives.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:29:58.854733+00:00' + source_url: http://www.pref.chiba.lg.jp/bunshokan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.pref.chiba.lg.jp/shared/images/favicon/apple-touch-icon-precomposed.png + source_url: http://www.pref.chiba.lg.jp/bunshokan + css_selector: '[document] > html.wf-a-otf-ud-shin-go-pr6n-n3-active.wf-active + > head > link:nth-of-type(4)' + retrieved_on: '2025-12-24T11:29:58.854733+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.pref.chiba.lg.jp/shared/images/sns/logo.png + source_url: http://www.pref.chiba.lg.jp/bunshokan + css_selector: '[document] > html.wf-a-otf-ud-shin-go-pr6n-n3-active.wf-active + > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-24T11:29:58.854733+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-12-CHI-L-CCCL.yaml b/data/custodian/JP-12-CHI-L-CCCL.yaml index dc0c34e535..11c1c19703 100644 --- a/data/custodian/JP-12-CHI-L-CCCL.yaml +++ b/data/custodian/JP-12-CHI-L-CCCL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-12-CHI-L-CCCL - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO 3166-2:JP" + reason: Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO + 3166-2:JP - ghcid: JP-CH-CHI-L-CCCL valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-CH-CHI-L-CCCL ghcid_numeric: 3853656989179429213 valid_from: '2025-12-06T23:38:42.785589+00:00' @@ -246,3 +247,22 @@ location: postal_code: 260-0045 street_address: 3-7-7 BENTEN, Chiba Shi Chuo Ku, Chiba Ken, 260-0045 normalization_timestamp: '2025-12-09T10:55:27.408448+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:09.123438+00:00' + source_url: https://www.library.city.chiba.jp/facilities/chuou/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/chuou/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:30:09.123438+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCHL.yaml b/data/custodian/JP-12-CHI-L-CCHL.yaml index 3578d897bd..e3c8080c08 100644 --- a/data/custodian/JP-12-CHI-L-CCHL.yaml +++ b/data/custodian/JP-12-CHI-L-CCHL.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/hanamigawa/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/hanamigawa/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:16.722993+00:00' + source_url: https://www.library.city.chiba.jp/facilities/hanamigawa/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/hanamigawa/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:30:16.722993+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCHLHDA.yaml b/data/custodian/JP-12-CHI-L-CCHLHDA.yaml index d18c2a7063..018d1bab54 100644 --- a/data/custodian/JP-12-CHI-L-CCHLHDA.yaml +++ b/data/custodian/JP-12-CHI-L-CCHLHDA.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/hanamigawa-danchi/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/hanamigawa-danchi/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:24.187247+00:00' + source_url: https://www.library.city.chiba.jp/facilities/hanamigawa-danchi/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/hanamigawa-danchi/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:30:24.187247+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCIL.yaml b/data/custodian/JP-12-CHI-L-CCIL.yaml index 38a2cfa95c..9f42eb01b9 100644 --- a/data/custodian/JP-12-CHI-L-CCIL.yaml +++ b/data/custodian/JP-12-CHI-L-CCIL.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/inage/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/inage/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:31.603108+00:00' + source_url: https://www.library.city.chiba.jp/facilities/inage/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/inage/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:30:31.603108+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCLLCRRR.yaml b/data/custodian/JP-12-CHI-L-CCLLCRRR.yaml index aa286d7108..f77c95c50f 100644 --- a/data/custodian/JP-12-CHI-L-CCLLCRRR.yaml +++ b/data/custodian/JP-12-CHI-L-CCLLCRRR.yaml @@ -32,13 +32,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-12-CHI-L-CCLLCRRR - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO 3166-2:JP" + reason: Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO + 3166-2:JP - ghcid: JP-CH-CHI-L-CCLLCRRR valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-CH-CHI-L-CCLLCRRR ghcid_numeric: 18280829317482739101 valid_from: '2025-12-06T23:38:58.069917+00:00' @@ -96,8 +97,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Chiba City Lifelong Learning Center Research & Reference Room @@ -153,3 +154,22 @@ location: geonames_id: 2113015 geonames_name: Chiba feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:42.598426+00:00' + source_url: http://chiba-gakushu.jp/rent/rent_01_11.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://chiba-gakushu.jp/wordpress/wp-content/uploads/2022/05/cropped-favicon-180x180.png + source_url: http://chiba-gakushu.jp/rent/rent_01_11.html + css_selector: '[document] > html > head > link:nth-of-type(20)' + retrieved_on: '2025-12-24T11:30:42.598426+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-12-CHI-L-CCML-chiba_city_midori_library.yaml b/data/custodian/JP-12-CHI-L-CCML-chiba_city_midori_library.yaml index 884e0b6376..7567c2cef8 100644 --- a/data/custodian/JP-12-CHI-L-CCML-chiba_city_midori_library.yaml +++ b/data/custodian/JP-12-CHI-L-CCML-chiba_city_midori_library.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/midori/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/midori/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:50.023826+00:00' + source_url: https://www.library.city.chiba.jp/facilities/midori/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/midori/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:30:50.023826+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCML-chiba_city_miyako_library.yaml b/data/custodian/JP-12-CHI-L-CCML-chiba_city_miyako_library.yaml index d361564b5a..05bab06c85 100644 --- a/data/custodian/JP-12-CHI-L-CCML-chiba_city_miyako_library.yaml +++ b/data/custodian/JP-12-CHI-L-CCML-chiba_city_miyako_library.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/miyako/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/miyako/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:30:57.559230+00:00' + source_url: https://www.library.city.chiba.jp/facilities/miyako/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/miyako/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:30:57.559230+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCML.yaml b/data/custodian/JP-12-CHI-L-CCML.yaml index 407c870edc..b0347e0b60 100644 --- a/data/custodian/JP-12-CHI-L-CCML.yaml +++ b/data/custodian/JP-12-CHI-L-CCML.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/mihama/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/mihama/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:05.217745+00:00' + source_url: https://www.library.city.chiba.jp/facilities/mihama/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/mihama/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:05.217745+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCMLAA.yaml b/data/custodian/JP-12-CHI-L-CCMLAA.yaml index f8a7ab3f81..8c169ce821 100644 --- a/data/custodian/JP-12-CHI-L-CCMLAA.yaml +++ b/data/custodian/JP-12-CHI-L-CCMLAA.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/midori-asumigaoka/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/midori-asumigaoka/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:14.082485+00:00' + source_url: https://www.library.city.chiba.jp/facilities/midori-asumigaoka/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/midori-asumigaoka/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:14.082485+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCMLSA.yaml b/data/custodian/JP-12-CHI-L-CCMLSA.yaml index 05fc944f59..cf3e590a8e 100644 --- a/data/custodian/JP-12-CHI-L-CCMLSA.yaml +++ b/data/custodian/JP-12-CHI-L-CCMLSA.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/miyako-shirahata/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/miyako-shirahata/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:21.548964+00:00' + source_url: https://www.library.city.chiba.jp/facilities/miyako-shirahata/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/miyako-shirahata/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:21.548964+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCMLTA.yaml b/data/custodian/JP-12-CHI-L-CCMLTA.yaml index 764c4192ff..ec18977604 100644 --- a/data/custodian/JP-12-CHI-L-CCMLTA.yaml +++ b/data/custodian/JP-12-CHI-L-CCMLTA.yaml @@ -206,3 +206,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/midori-toke/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/midori-toke/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:29.046395+00:00' + source_url: https://www.library.city.chiba.jp/facilities/midori-toke/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/midori-toke/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:29.046395+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCMLUA.yaml b/data/custodian/JP-12-CHI-L-CCMLUA.yaml index 95fe11db9f..b9b44598bc 100644 --- a/data/custodian/JP-12-CHI-L-CCMLUA.yaml +++ b/data/custodian/JP-12-CHI-L-CCMLUA.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/mihama-utase/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/mihama-utase/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:36.798272+00:00' + source_url: https://www.library.city.chiba.jp/facilities/mihama-utase/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/mihama-utase/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:36.798272+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCWL.yaml b/data/custodian/JP-12-CHI-L-CCWL.yaml index 9c52ef92f9..5b72921c10 100644 --- a/data/custodian/JP-12-CHI-L-CCWL.yaml +++ b/data/custodian/JP-12-CHI-L-CCWL.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/wakaba/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/wakaba/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:44.178656+00:00' + source_url: https://www.library.city.chiba.jp/facilities/wakaba/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/wakaba/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:44.178656+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCWLIA.yaml b/data/custodian/JP-12-CHI-L-CCWLIA.yaml index 968d8e632b..50a1bdf46b 100644 --- a/data/custodian/JP-12-CHI-L-CCWLIA.yaml +++ b/data/custodian/JP-12-CHI-L-CCWLIA.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/wakaba-izumi/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/wakaba-izumi/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:51.758902+00:00' + source_url: https://www.library.city.chiba.jp/facilities/wakaba-izumi/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/wakaba-izumi/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:51.758902+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CCWLNTA.yaml b/data/custodian/JP-12-CHI-L-CCWLNTA.yaml index a91ce84673..d74faf25af 100644 --- a/data/custodian/JP-12-CHI-L-CCWLNTA.yaml +++ b/data/custodian/JP-12-CHI-L-CCWLNTA.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library.city.chiba.jp/facilities/wakaba-nishituga/index.html wikidata_official_website: http://www.library.city.chiba.jp/facilities/wakaba-nishituga/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:31:59.272210+00:00' + source_url: https://www.library.city.chiba.jp/facilities/wakaba-nishituga/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library.city.chiba.jp/common/images/favicon.ico + source_url: https://www.library.city.chiba.jp/facilities/wakaba-nishituga/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-24T11:31:59.272210+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-12-CHI-L-CKUGL.yaml b/data/custodian/JP-12-CHI-L-CKUGL.yaml index 407254fd44..efb6a56e05 100644 --- a/data/custodian/JP-12-CHI-L-CKUGL.yaml +++ b/data/custodian/JP-12-CHI-L-CKUGL.yaml @@ -218,3 +218,28 @@ location: geonames_id: 2113015 geonames_name: Chiba feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:10.170271+00:00' + source_url: http://lib.cku.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.cku.ac.jp/wp/wp-content/themes/cku/images/common/apple-touch-icon.png + source_url: http://lib.cku.ac.jp + css_selector: '[document] > html.js._device-pc > head > link:nth-of-type(7)' + retrieved_on: '2025-12-24T11:32:10.170271+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.cku.ac.jp/wp/wp-content/themes/cku/images/common/ogp.jpg + source_url: http://lib.cku.ac.jp + css_selector: '[document] > html.js._device-pc > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-24T11:32:10.170271+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 6 diff --git a/data/custodian/JP-12-CHI-L-CL-chibameitokutankidaigaku_library.yaml b/data/custodian/JP-12-CHI-L-CL-chibameitokutankidaigaku_library.yaml index 4775a7356f..3e076ba854 100644 --- a/data/custodian/JP-12-CHI-L-CL-chibameitokutankidaigaku_library.yaml +++ b/data/custodian/JP-12-CHI-L-CL-chibameitokutankidaigaku_library.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-12-CHI-L-CL-chibameitokutankidaigaku_library - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO 3166-2:JP" + reason: Corrected region code from JP-CH (abbreviation) to JP-12 (Chiba) per ISO + 3166-2:JP - ghcid: JP-CH-CHI-L-CL-chibameitokutankidaigaku_library valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-CH-CHI-L-CL-chibameitokutankidaigaku_library ghcid_numeric: 13059231585613291105 valid_from: '2025-12-06T23:38:56.911443+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: CHIBAMEITOKUTANKIDAIGAKU Library @@ -206,3 +207,22 @@ location: geonames_id: 2113015 geonames_name: Chiba feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-24T11:32:22.039025+00:00' + source_url: http://www.chibameitoku.ac.jp/tandai/students/facilities.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.chibameitoku.ac.jp/tandai/contents/wp-content/uploads/2025/05/cropped-favicon-180x180.png + source_url: http://www.chibameitoku.ac.jp/tandai/students/facilities.html + css_selector: '[document] > html.js > head > link:nth-of-type(20)' + retrieved_on: '2025-12-24T11:32:22.039025+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3