From 38292d191853975f0673914b358cfc5379c76fb1 Mon Sep 17 00:00:00 2001 From: kempersc Date: Tue, 23 Dec 2025 20:56:21 +0100 Subject: [PATCH] enrich: logo enrichment for JP custodians (1350 processed, 10746 remaining) --- backend/rag/main.py | 33 ++- .../.logo_enrichment_crawl4ai_checkpoint.json | 222 +++++++++++++++++- ...PMA-fukuoka_prefectural_museum_of_art.yaml | 25 ++ data/custodian/JP-07-KAM-M-IMZP.yaml | 25 ++ data/custodian/JP-07-KOK-M-KNOHM.yaml | 33 +++ data/custodian/JP-07-KUR-M-HMKPM.yaml | 25 ++ data/custodian/JP-07-NIS-A-FCPL.yaml | 25 ++ data/custodian/JP-07-SHA-M-KUM.yaml | 19 ++ data/custodian/JP-07-TAG-M-TSSRH.yaml | 25 ++ data/custodian/JP-08-AIZ-M-SRA.yaml | 25 ++ ...BAN-L-BL-bandoshiritsusashima_library.yaml | 19 ++ data/custodian/JP-08-BAN-L-BL.yaml | 19 ++ data/custodian/JP-08-BAN-M-INM.yaml | 27 +++ data/custodian/JP-08-CHI-M-HIMH.yaml | 19 ++ data/custodian/JP-08-FUJ-L-KL.yaml | 19 ++ data/custodian/JP-08-HIG-L-IL.yaml | 19 ++ data/custodian/JP-08-HIG-L-K.yaml | 25 ++ data/custodian/JP-08-HIG-L-O.yaml | 19 ++ data/custodian/JP-08-HIG-M-IPOA.yaml | 34 +++ data/custodian/JP-08-HIT-L-HBIUL.yaml | 51 +++- ...-L-HL-hitachinakashiritsuchuo_library.yaml | 19 ++ ...hitachinakashiritsunakaminato_library.yaml | 19 ++ ...-L-HL-hitachinakashiritsusano_library.yaml | 19 ++ ...8-HIT-L-HL-hitachishiritsujuo_library.yaml | 36 ++- ...HIT-L-HL-hitachishiritsunambu_library.yaml | 36 ++- data/custodian/JP-08-HIT-L-HL.yaml | 36 ++- data/custodian/JP-08-HIT-L-HLT.yaml | 19 ++ data/custodian/JP-08-HIT-L-ICUL.yaml | 41 +++- data/custodian/JP-08-HIT-M-HCCSS.yaml | 34 ++- data/custodian/JP-08-HIT-M-HCM.yaml | 42 +++- data/custodian/JP-08-HIT-M-NG.yaml | 28 ++- data/custodian/JP-08-HOK-L-HLA.yaml | 19 ++ data/custodian/JP-08-HOK-L-HLT.yaml | 19 ++ data/custodian/JP-08-HOK-L-HPL.yaml | 19 ++ data/custodian/JP-08-INA-L-ABIUL.yaml | 38 +++ ...shikishisakuragawakominkantoshoshitsu.yaml | 19 ++ ...nashikishishintonekominkantoshoshitsu.yaml | 19 ++ data/custodian/JP-08-INA-L-I.yaml | 19 ++ data/custodian/JP-08-INA-L-IPUHSL.yaml | 38 ++- data/custodian/JP-08-INA-L-M.yaml | 33 +++ ...hiokashijonanchikukominkantoshoshitsu.yaml | 25 ++ data/custodian/JP-08-ISH-L-I.yaml | 25 ++ data/custodian/JP-08-ISH-L-ICCL.yaml | 34 ++- data/custodian/JP-08-ISH-M-IF.yaml | 27 +++ ...itakoshiritsuitakokominkantoshoshitsu.yaml | 19 ++ ...koshiritsunobukatakominkantoshoshitsu.yaml | 19 ++ ...takoshiritsuouharakominkantoshoshitsu.yaml | 19 ++ ...takoshiritsutsuchikominkantoshoshitsu.yaml | 19 ++ data/custodian/JP-08-ITA-L-I.yaml | 19 ++ data/custodian/JP-08-ITA-L-IPL.yaml | 19 ++ data/custodian/JP-08-JOS-L-J.yaml | 25 ++ data/custodian/JP-08-JOS-M-JSMFH.yaml | 25 ++ ...kamisushiwakamatsukominkantoshoshitsu.yaml | 19 ++ ...-K-kamisushiyatabekominkantoshoshitsu.yaml | 19 ++ data/custodian/JP-08-KAM-L-K.yaml | 19 ++ ...8-KAM-L-KL-kamisushiritsuzumo_library.yaml | 19 ++ data/custodian/JP-08-KAM-L-KL.yaml | 19 ++ ...-KAS-L-KL-kasamashiritsuiwama_library.yaml | 17 ++ ...KAS-L-KL-kasamashiritsutomobe_library.yaml | 17 ++ data/custodian/JP-08-KAS-L-KL.yaml | 17 ++ data/custodian/JP-08-KAS-L-KLO.yaml | 27 +++ data/custodian/JP-08-KAS-L-KPL.yaml | 36 ++- data/custodian/JP-08-KIT-L-KCL.yaml | 19 ++ data/custodian/JP-08-KIT-M-KCMA.yaml | 19 ++ data/custodian/JP-08-KIT-M-TMMAI.yaml | 19 ++ ...OG-L-K-kogashichuokominkantoshoshitsu.yaml | 33 +++ ...-L-K-kogashinakatakominkantoshoshitsu.yaml | 33 +++ ...-K-kogashitsutsumikominkantoshoshitsu.yaml | 33 +++ ...KOG-L-K-kogashiyusentasowatoshoshitsu.yaml | 33 +++ data/custodian/JP-08-KOG-L-K.yaml | 33 +++ .../JP-08-KOG-L-KL-kogashisanwa_library.yaml | 33 +++ data/custodian/JP-08-KOG-L-KL.yaml | 33 +++ data/custodian/JP-08-KOG-M-KCMH.yaml | 42 +++- data/custodian/JP-08-KOO-M-KSK.yaml | 25 ++ data/custodian/JP-08-KUJ-L-D.yaml | 19 ++ data/custodian/JP-08-MIT-L-IUL.yaml | 43 +++- ...-MIT-L-ML-mitoshiritsuchihara_library.yaml | 27 +++ ...-08-MIT-L-ML-mitoshiritsumiwa_library.yaml | 27 +++ ...08-MIT-L-ML-mitoshiritsuseibu_library.yaml | 27 +++ ...-08-MIT-L-ML-mitoshiritsutobu_library.yaml | 27 +++ ...IT-L-ML-mitoshiritsutsunezumi_library.yaml | 27 +++ data/custodian/JP-08-MIT-L-ML.yaml | 27 +++ data/custodian/JP-08-MIT-M-IPMH.yaml | 25 ++ data/custodian/JP-08-MIT-M-MBP.yaml | 19 ++ data/custodian/JP-08-MIT-M-TM.yaml | 28 ++- data/custodian/JP-08-MIT-M-TUMM.yaml | 34 ++- ...nakashichuokominkantoshoetsuranshitsu.yaml | 25 ++ data/custodian/JP-08-NAK-L-TL.yaml | 19 ++ ...namegatashikitaurakominkantoshoshitsu.yaml | 25 ++ data/custodian/JP-08-NAM-L-N.yaml | 25 ++ data/custodian/JP-08-NAM-L-NL.yaml | 25 ++ ...mitamashihatorifureaisentatoshoshitsu.yaml | 25 ++ data/custodian/JP-08-OMI-L-O.yaml | 25 ++ ...-08-OMI-L-OL-omitamashitamari_library.yaml | 25 ++ data/custodian/JP-08-OMI-L-OL.yaml | 25 ++ data/custodian/JP-08-RYU-L-RL.yaml | 38 ++- data/custodian/JP-08-RYU-M-RCFHMM.yaml | 19 ++ data/custodian/JP-08-SAK-L-M.yaml | 25 ++ ...agawashiyamatochuokominkantoshoshitsu.yaml | 25 ++ data/custodian/JP-08-SAK-L-S.yaml | 25 ++ data/custodian/JP-08-SAS-L-G.yaml | 25 ++ data/custodian/JP-08-SAS-M-SMHFM.yaml | 19 ++ data/custodian/JP-08-SHI-M-APMJ.yaml | 19 ++ data/custodian/JP-08-SHI-M-SCM.yaml | 19 ++ data/custodian/JP-08-TOR-L-T.yaml | 17 ++ data/custodian/JP-08-TOR-L-TUAUTL.yaml | 32 ++- data/custodian/JP-08-TOR-M-TMC.yaml | 28 ++- data/custodian/NL-GE-DOE-M-FMD.yaml | 7 + data/custodian/NL-GE-DOE-M-GM.yaml | 7 + data/custodian/NL-GE-DOE-R-PF.yaml | 7 + data/custodian/NL-GE-DRU-M-SNMN.yaml | 7 + data/custodian/NL-GE-ELB-A-NVA.yaml | 8 + data/custodian/NL-GE-ELB-M-NVM.yaml | 8 + data/custodian/NL-GE-ELS-M-HME.yaml | 7 + data/custodian/NL-GE-GRO-M-LM.yaml | 8 + data/custodian/NL-GE-HAR-M-KMM.yaml | 8 + .../NL-GE-HAR-M-M-museum_19391945.yaml | 7 + data/custodian/NL-GE-HED-M-HM.yaml | 7 + data/custodian/NL-GE-HEE-M-MGHH.yaml | 8 + data/custodian/NL-GE-HEN-M-AM.yaml | 7 + data/custodian/NL-GE-LIC-M-SMW.yaml | 7 + data/custodian/NL-GE-LIE-M-EK.yaml | 7 + data/custodian/NL-GE-LUN-M-ML.yaml | 7 + data/custodian/NL-GE-NIJ-A-LA.yaml | 7 + data/custodian/NL-GE-NIJ-M-CDCK.yaml | 7 + data/custodian/NL-GE-NIJ-M-RMM.yaml | 7 + data/custodian/NL-GE-NIJ-M-USRSVM.yaml | 7 + data/custodian/NL-GE-OEN-M-MO.yaml | 8 + .../NL-GE-RUU-M-MM-het_mag_museum.yaml | 7 + data/custodian/NL-GE-TER-M-KKPUM.yaml | 7 + data/custodian/NL-GE-THA-M-NAM.yaml | 7 + data/custodian/NL-GE-TIE-I-M.yaml | 8 + data/custodian/NL-GE-TIE-M-FST.yaml | 7 + data/custodian/NL-GE-VAA-M-MVH.yaml | 7 + data/custodian/NL-GE-VEL-M-HVKMM.yaml | 8 + data/custodian/NL-GE-VOR-M-MAS.yaml | 7 + data/custodian/NL-GE-WAG-M-IWSI.yaml | 8 + data/custodian/NL-GE-WEH-M-MUOA.yaml | 8 + data/custodian/NL-GE-WIJ-I-M.yaml | 8 + data/custodian/NL-GE-WIJ-M-VMW.yaml | 8 + data/custodian/NL-GE-WOE-M-MHW.yaml | 8 + data/custodian/NL-GE-XXX-M-MVV.yaml | 9 + data/custodian/NL-GE-ZEV-M-CLM.yaml | 8 + ...-OVZ-oudheidkundige_vereniging_zuwent.yaml | 8 + data/custodian/NL-GE-ZUT-L-BZ.yaml | 9 + data/custodian/NL-GE-ZUT-M-MBH.yaml | 8 + data/custodian/NL-GE-ZUT-M-SMZ.yaml | 8 + data/custodian/NL-GR-GRO-E-H.yaml | 7 + data/custodian/NL-GR-GRO-M-GM.yaml | 8 + data/custodian/NL-GR-GRO-M-KG.yaml | 8 + data/custodian/NL-GR-GRO-M-LG.yaml | 8 + data/custodian/NL-GR-GRO-M-MV.yaml | 8 + data/custodian/NL-GR-GRO-R-CCC.yaml | 7 + data/custodian/NL-GR-GRO-R-KKS.yaml | 7 + data/custodian/NL-GR-GRO-S-VGM.yaml | 7 + data/custodian/NL-GR-HAR-B-HBH.yaml | 8 + data/custodian/NL-GR-LEE-M-MJSL.yaml | 7 + data/custodian/NL-GR-NIE-M-BTOM.yaml | 7 + data/custodian/NL-GR-OML-M-MCTB.yaml | 7 + data/custodian/NL-GR-ONS-M-RES.yaml | 7 + data/custodian/NL-GR-VEE-M-NNTTM.yaml | 8 + data/custodian/NL-LI-BIL-M-OMB.yaml | 7 + data/custodian/NL-LI-HAA-M-MCHBV.yaml | 7 + data/custodian/NL-LI-HEE-M-RM.yaml | 7 + data/custodian/NL-LI-HOR-M-MDK.yaml | 8 + data/custodian/NL-LI-KER-M-VMK.yaml | 7 + data/custodian/NL-LI-MAA-E-JEA.yaml | 8 + data/custodian/NL-LI-MAA-M-VMM.yaml | 8 + data/custodian/NL-LI-OOS-L-CO.yaml | 8 + data/custodian/NL-LI-SIM-M-ZLSM.yaml | 8 + data/custodian/NL-LI-SIT-L-BLS.yaml | 7 + data/custodian/NL-LI-SIT-M-FM.yaml | 7 + data/custodian/NL-LI-SIT-M-HND.yaml | 7 + data/custodian/NL-LI-SIT-M-ND.yaml | 7 + data/custodian/NL-LI-STE-M-MS.yaml | 8 + .../NL-LI-SWA-M-MA-museum_asselt.yaml | 8 + data/custodian/NL-LI-VAA-M-MV.yaml | 8 + .../NL-LI-VEN-L-BV-biblionu_venray.yaml | 8 + .../NL-LI-VEN-L-BV-de_bibliotheek_venray.yaml | 8 + data/custodian/NL-LI-VEN-M-FMMV.yaml | 7 + .../NL-LI-VEN-M-MJ-museum_jocas.yaml | 7 + data/custodian/NL-LI-VEN-M-MPV.yaml | 8 + data/custodian/NL-LI-VEN-M-MWD.yaml | 8 + data/custodian/NL-LI-VEN-M-PMV.yaml | 7 + data/custodian/NL-NB-AAR-M-BKM.yaml | 7 + data/custodian/NL-NB-BAK-M-MDT.yaml | 7 + data/custodian/NL-NB-BER-M-MDS.yaml | 7 + data/custodian/NL-NB-BES-M-BWAMH.yaml | 7 + data/custodian/NL-NB-BES-M-KMB.yaml | 7 + data/custodian/NL-NB-BES-M-NMB.yaml | 7 + data/custodian/NL-NB-BOR-M-BOSB.yaml | 8 + .../NL-NB-BOX-M-MC-museum_canonije.yaml | 7 + ...NB-BOX-M-MV-stichting_museum_vekemans.yaml | 8 + data/custodian/NL-NB-BOZ-A-GAR.yaml | 7 + data/custodian/NL-NB-BRE-L-VA.yaml | 7 + data/custodian/NL-NB-BRE-M-BM.yaml | 7 + .../NL-NB-BRE-M-PM-princenhaags_museum.yaml | 7 + data/custodian/NL-NB-CEN-R-NCSCNN.yaml | 7 + data/custodian/NL-NB-CUI-M-MC.yaml | 7 + .../NL-NB-EIN-M-CM-crypto_museum.yaml | 8 + data/custodian/NL-NB-EIN-M-EMM.yaml | 7 + data/custodian/NL-NB-EIN-M-LM.yaml | 7 + data/custodian/NL-NB-ETL-M-AZKM.yaml | 7 + data/custodian/NL-NB-ETL-M-VOM.yaml | 7 + data/custodian/NL-NB-GEE-M-MDR.yaml | 8 + data/custodian/NL-NB-GEM-M-MAW.yaml | 7 + data/custodian/NL-NB-HEL-M-EM.yaml | 7 + scripts/sync/qdrant_person_sync.py | 23 +- 208 files changed, 3604 insertions(+), 101 deletions(-) diff --git a/backend/rag/main.py b/backend/rag/main.py index 6404fc1e55..512ffe8271 100644 --- a/backend/rag/main.py +++ b/backend/rag/main.py @@ -568,16 +568,31 @@ def extract_llm_response_metadata( if finish_reason is None and isinstance(choice, dict): finish_reason = choice.get("finish_reason") - # Extract usage statistics - usage = last_entry.get("usage", {}) - prompt_tokens = usage.get("prompt_tokens") - completion_tokens = usage.get("completion_tokens") - total_tokens = usage.get("total_tokens") - - # Check for cached_tokens (some providers include this) + # Extract usage statistics - handle both dict and object types + # (DSPy/OpenAI SDK may return CompletionUsage objects instead of dicts) + usage = last_entry.get("usage") + prompt_tokens = None + completion_tokens = None + total_tokens = None cached_tokens = None - if "prompt_tokens_details" in usage: - cached_tokens = usage["prompt_tokens_details"].get("cached_tokens") + + if usage is not None: + if hasattr(usage, "prompt_tokens"): + # It's an object (e.g., CompletionUsage from OpenAI SDK) + prompt_tokens = getattr(usage, "prompt_tokens", None) + completion_tokens = getattr(usage, "completion_tokens", None) + total_tokens = getattr(usage, "total_tokens", None) + prompt_details = getattr(usage, "prompt_tokens_details", None) + if prompt_details is not None: + cached_tokens = getattr(prompt_details, "cached_tokens", None) + elif isinstance(usage, dict): + # It's a plain dict + prompt_tokens = usage.get("prompt_tokens") + completion_tokens = usage.get("completion_tokens") + total_tokens = usage.get("total_tokens") + prompt_details = usage.get("prompt_tokens_details") + if isinstance(prompt_details, dict): + cached_tokens = prompt_details.get("cached_tokens") # Extract model info model = last_entry.get("response_model") or last_entry.get("model") diff --git a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json index 9d4f27c4bd..ec46522dd3 100644 --- a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json +++ b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json @@ -6656,7 +6656,227 @@ "JP-06-FUK-L-FL.yaml", "JP-06-FUK-M-FCHM.yaml", "JP-06-HAR-M-FJ.yaml", - "JP-06-KIT-M-FCSM.yaml" + "JP-06-KIT-M-FCSM.yaml", + "JP-06-KOI-L-FPL-fukui_prefectural_library.yaml", + "JP-06-OBA-M-YTMM.yaml", + "JP-06-OTS-H-OT.yaml", + "JP-07-AMA-M-AHM.yaml", + "JP-07-FUK-M-FPMA-fukuoka_prefectural_museum_of_art.yaml", + "JP-07-JON-L-KL.yaml", + "JP-07-KAM-M-IMZP.yaml", + "JP-07-KOK-M-KNOHM.yaml", + "JP-07-KUR-M-HMKPM.yaml", + "JP-07-MAT-M-MKSU.yaml", + "JP-07-NIS-A-FCPL.yaml", + "JP-07-OKI-M-TM.yaml", + "JP-07-ORA-L-TL.yaml", + "JP-07-SHA-M-KUM.yaml", + "JP-07-TAG-M-TSSRH.yaml", + "JP-07-UMI-M-UT.yaml", + "JP-08-AIZ-M-SRA.yaml", + "JP-08-AKI-M-MMBE.yaml", + "JP-08-BAN-L-BL-bandoshiritsusashima_library.yaml", + "JP-08-BAN-L-BL.yaml", + "JP-08-BAN-M-BPM.yaml", + "JP-08-BAN-M-INM.yaml", + "JP-08-CHI-L-CL-chikuseishiritsuakeno_library.yaml", + "JP-08-CHI-L-CL.yaml", + "JP-08-CHI-L-CLK.yaml", + "JP-08-CHI-L-CLS.yaml", + "JP-08-CHI-M-HIMH.yaml", + "JP-08-CHI-M-HMA.yaml", + "JP-08-CHI-M-SMA.yaml", + "JP-08-FUJ-L-KL.yaml", + "JP-08-HIG-L-IL.yaml", + "JP-08-HIG-L-ITICIP.yaml", + "JP-08-HIG-L-K.yaml", + "JP-08-HIG-L-N.yaml", + "JP-08-HIG-L-O.yaml", + "JP-08-HIG-L-SL.yaml", + "JP-08-HIG-M-GKT.yaml", + "JP-08-HIG-M-IPOA.yaml", + "JP-08-HIG-M-OMBM.yaml", + "JP-08-HIG-M-OMM.yaml", + "JP-08-HIG-M-OWCSSM.yaml", + "JP-08-HIT-A-HCA-hitachiomiya_city_archives.yaml", + "JP-08-HIT-A-HCA.yaml", + "JP-08-HIT-L-H-hitachiomiyashigozenyamakominkantoshoshitsu.yaml", + "JP-08-HIT-L-H-hitachiomiyashimiwakominkantoshoshitsu.yaml", + "JP-08-HIT-L-H-hitachiomiyashiogawasogosentatoshoshitsu.yaml", + "JP-08-HIT-L-H-hitachiomiyashiyamagatakominkantoshoshitsu.yaml", + "JP-08-HIT-L-H.yaml", + "JP-08-HIT-L-HBIUL.yaml", + "JP-08-HIT-L-HL-hitachinakashiritsuchuo_library.yaml", + "JP-08-HIT-L-HL-hitachinakashiritsunakaminato_library.yaml", + "JP-08-HIT-L-HL-hitachinakashiritsusano_library.yaml", + "JP-08-HIT-L-HL-hitachishiritsujuo_library.yaml", + "JP-08-HIT-L-HL-hitachishiritsunambu_library.yaml", + "JP-08-HIT-L-HL-hitachishiritsutaga_library.yaml", + "JP-08-HIT-L-HL.yaml", + "JP-08-HIT-L-HLK.yaml", + "JP-08-HIT-L-HLS-hitachiotashiritsu_library_satomibunshitsu.yaml", + "JP-08-HIT-L-HLS.yaml", + "JP-08-HIT-L-HLT.yaml", + "JP-08-HIT-L-HPL.yaml", + "JP-08-HIT-L-ICUL.yaml", + "JP-08-HIT-L-IL.yaml", + "JP-08-HIT-M-HCCSS.yaml", + "JP-08-HIT-M-HCLHM.yaml", + "JP-08-HIT-M-HCM.yaml", + "JP-08-HIT-M-HMCC.yaml", + "JP-08-HIT-M-HSHFMH.yaml", + "JP-08-HIT-M-NG.yaml", + "JP-08-HIT-M-NMM.yaml", + "JP-08-HIT-M-NWM.yaml", + "JP-08-HOK-L-HLA.yaml", + "JP-08-HOK-L-HLT.yaml", + "JP-08-HOK-L-HPL.yaml", + "JP-08-INA-L-ABIUL.yaml", + "JP-08-INA-L-AL.yaml", + "JP-08-INA-L-I-inashikishisakuragawakominkantoshoshitsu.yaml", + "JP-08-INA-L-I-inashikishishintonekominkantoshoshitsu.yaml", + "JP-08-INA-L-I.yaml", + "JP-08-INA-L-ICCL.yaml", + "JP-08-INA-L-IPUHSL.yaml", + "JP-08-INA-L-K.yaml", + "JP-08-INA-L-M.yaml", + "JP-08-INA-L-TDIL.yaml", + "JP-08-INA-L-TLH.yaml", + "JP-08-INA-M-ICHFCM.yaml", + "JP-08-INA-M-TKM.yaml", + "JP-08-ISH-L-I-ishiokashijonanchikukominkantoshoshitsu.yaml", + "JP-08-ISH-L-I.yaml", + "JP-08-ISH-L-ICCL.yaml", + "JP-08-ISH-L-ICYL.yaml", + "JP-08-ISH-M-IF.yaml", + "JP-08-ISH-M-IFM.yaml", + "JP-08-ISH-M-TRGB.yaml", + "JP-08-ITA-L-I-itakoshiritsuitakokominkantoshoshitsu.yaml", + "JP-08-ITA-L-I-itakoshiritsunobukatakominkantoshoshitsu.yaml", + "JP-08-ITA-L-I-itakoshiritsuouharakominkantoshoshitsu.yaml", + "JP-08-ITA-L-I-itakoshiritsutsuchikominkantoshoshitsu.yaml", + "JP-08-ITA-L-I.yaml", + "JP-08-ITA-L-IPL.yaml", + "JP-08-JOS-L-J.yaml", + "JP-08-JOS-L-JPL.yaml", + "JP-08-JOS-M-JSMFH.yaml", + "JP-08-KAM-L-K-kamisushiwakamatsukominkantoshoshitsu.yaml", + "JP-08-KAM-L-K-kamisushiyatabekominkantoshoshitsu.yaml", + "JP-08-KAM-L-K.yaml", + "JP-08-KAM-L-KFSNRDAJFR.yaml", + "JP-08-KAM-L-KL-kamisushiritsuzumo_library.yaml", + "JP-08-KAM-L-KL.yaml", + "JP-08-KAM-M-KMHF.yaml", + "JP-08-KAS-L-KL-kasamashiritsuiwama_library.yaml", + "JP-08-KAS-L-KL-kasamashiritsutomobe_library.yaml", + "JP-08-KAS-L-KL-kasumigaurashiritsu_library.yaml", + "JP-08-KAS-L-KL.yaml", + "JP-08-KAS-L-KLC.yaml", + "JP-08-KAS-L-KLO.yaml", + "JP-08-KAS-L-KPL.yaml", + "JP-08-KAS-M-ICAM.yaml", + "JP-08-KAS-M-KCFHMH.yaml", + "JP-08-KAS-M-KCMH.yaml", + "JP-08-KAS-M-KCSRRP.yaml", + "JP-08-KAS-M-KIAM.yaml", + "JP-08-KAS-M-TKMM.yaml", + "JP-08-KIT-L-KCL.yaml", + "JP-08-KIT-L-TPL.yaml", + "JP-08-KIT-M-KCFHMM.yaml", + "JP-08-KIT-M-KCMA.yaml", + "JP-08-KIT-M-KVW.yaml", + "JP-08-KIT-M-TMMAI.yaml", + "JP-08-KIT-M-TR.yaml", + "JP-08-KOG-L-K-kogashichuokominkantoshoshitsu.yaml", + "JP-08-KOG-L-K-kogashinakatakominkantoshoshitsu.yaml", + "JP-08-KOG-L-K-kogashitsutsumikominkantoshoshitsu.yaml", + "JP-08-KOG-L-K-kogashiyusentasowatoshoshitsu.yaml", + "JP-08-KOG-L-K.yaml", + "JP-08-KOG-L-KCML.yaml", + "JP-08-KOG-L-KL-kogashisanwa_library.yaml", + "JP-08-KOG-L-KL.yaml", + "JP-08-KOG-M-KCMH.yaml", + "JP-08-KOG-M-KMAM.yaml", + "JP-08-KOG-M-MMPIIFTNMF.yaml", + "JP-08-KOG-M-SLMH.yaml", + "JP-08-KOO-M-KSK.yaml", + "JP-08-KUJ-L-D.yaml", + "JP-08-MIT-A-IPAM-ibaraki_prefectural_archives_and_museum.yaml", + "JP-08-MIT-A-IPAM.yaml", + "JP-08-MIT-L-I.yaml", + "JP-08-MIT-L-IL.yaml", + "JP-08-MIT-L-IUL.yaml", + "JP-08-MIT-L-ML-mitoshiritsuchihara_library.yaml", + "JP-08-MIT-L-ML-mitoshiritsumiwa_library.yaml", + "JP-08-MIT-L-ML-mitoshiritsuseibu_library.yaml", + "JP-08-MIT-L-ML-mitoshiritsutobu_library.yaml", + "JP-08-MIT-L-ML-mitoshiritsutsunezumi_library.yaml", + "JP-08-MIT-L-ML.yaml", + "JP-08-MIT-L-TUMITC.yaml", + "JP-08-MIT-M-HNMM.yaml", + "JP-08-MIT-M-IPMH.yaml", + "JP-08-MIT-M-JGC.yaml", + "JP-08-MIT-M-JHMM.yaml", + "JP-08-MIT-M-KPK.yaml", + "JP-08-MIT-M-KSM.yaml", + "JP-08-MIT-M-MBP.yaml", + "JP-08-MIT-M-MCM.yaml", + "JP-08-MIT-M-MMAI.yaml", + "JP-08-MIT-M-TM.yaml", + "JP-08-MIT-M-TUMM.yaml", + "JP-08-MOR-L-AGHLDRDMTI.yaml", + "JP-08-MOR-L-M-moriyashigoshukominkantoshoshitsu.yaml", + "JP-08-MOR-L-M-moriyashikitamoriyakominkantoshoshitsu.yaml", + "JP-08-MOR-L-M-moriyashikoyakominkantoshoshitsu.yaml", + "JP-08-MOR-L-M.yaml", + "JP-08-NAK-L-IRMRDCDJAE.yaml", + "JP-08-NAK-L-IWSJCL.yaml", + "JP-08-NAK-L-N-nakashichuokominkantoshoetsuranshitsu.yaml", + "JP-08-NAK-L-N.yaml", + "JP-08-NAK-L-NCPL.yaml", + "JP-08-NAK-L-TL.yaml", + "JP-08-NAK-M-IMNS.yaml", + "JP-08-NAK-M-IPBG.yaml", + "JP-08-NAK-M-NSHFMH.yaml", + "JP-08-NAK-M-TPRP.yaml", + "JP-08-NAK-M-TVM.yaml", + "JP-08-NAM-L-N-namegatashikitaurakominkantoshoshitsu.yaml", + "JP-08-NAM-L-N.yaml", + "JP-08-NAM-L-NL.yaml", + "JP-08-OGA-M-KSMLCIC.yaml", + "JP-08-OMI-L-O-omitamashihatorifureaisentatoshoshitsu.yaml", + "JP-08-OMI-L-O.yaml", + "JP-08-OMI-L-OL-omitamashitamari_library.yaml", + "JP-08-OMI-L-OL.yaml", + "JP-08-OMI-M-OCMH.yaml", + "JP-08-OMI-M-OML.yaml", + "JP-08-RYU-L-RL.yaml", + "JP-08-RYU-L-RPLH.yaml", + "JP-08-RYU-M-RCFHMM.yaml", + "JP-08-SAK-L-M.yaml", + "JP-08-SAK-L-S-sakuragawashiyamatochuokominkantoshoshitsu.yaml", + "JP-08-SAK-L-S.yaml", + "JP-08-SAK-M-GAM.yaml", + "JP-08-SAK-M-IRM.yaml", + "JP-08-SAK-M-MDHM.yaml", + "JP-08-SAK-M-THART.yaml", + "JP-08-SAS-L-G.yaml", + "JP-08-SAS-L-S.yaml", + "JP-08-SAS-M-SMHFM.yaml", + "JP-08-SHI-L-SPL.yaml", + "JP-08-SHI-M-APMJ.yaml", + "JP-08-SHI-M-SCM.yaml", + "JP-08-SHI-M-SMM.yaml", + "JP-08-TAK-M-TCFHM.yaml", + "JP-08-TOR-L-T.yaml", + "JP-08-TOR-L-TL-torideshiritsufujishiro_library.yaml", + "JP-08-TOR-L-TL.yaml", + "JP-08-TOR-L-TUAUTL.yaml", + "JP-08-TOR-M-TMC.yaml", + "JP-08-TSU-A-UTA-university_of_tsukuba_archives.yaml", + "JP-08-TSU-A-UTA.yaml", + "JP-08-TSU-L-AFFRITCTOA.yaml", + "JP-08-TSU-L-APIL.yaml" ], "last_index": 9 } \ No newline at end of file diff --git a/data/custodian/JP-07-FUK-M-FPMA-fukuoka_prefectural_museum_of_art.yaml b/data/custodian/JP-07-FUK-M-FPMA-fukuoka_prefectural_museum_of_art.yaml index ed709c271d..841da584b9 100644 --- a/data/custodian/JP-07-FUK-M-FPMA-fukuoka_prefectural_museum_of_art.yaml +++ b/data/custodian/JP-07-FUK-M-FPMA-fukuoka_prefectural_museum_of_art.yaml @@ -842,3 +842,28 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/AB5LqTtUsrM/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:08:47.594980+00:00' + source_url: https://fukuoka-kenbi.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://fukuoka-kenbi.jp/bundles/favicon/apple-touch-icon.png + source_url: https://fukuoka-kenbi.jp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:08:47.594980+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://fukuoka-kenbi.jp/bundles/images/og2.jpg + source_url: https://fukuoka-kenbi.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T17:08:47.594980+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 4 diff --git a/data/custodian/JP-07-KAM-M-IMZP.yaml b/data/custodian/JP-07-KAM-M-IMZP.yaml index 3c1615c1ce..4e9ac9f950 100644 --- a/data/custodian/JP-07-KAM-M-IMZP.yaml +++ b/data/custodian/JP-07-KAM-M-IMZP.yaml @@ -1056,3 +1056,28 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/0SNm-NLWZGc/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:09:04.555899+00:00' + source_url: https://www.itozu-zoo.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.itozu-zoo.jp/com/img/com-hc-logo.png + source_url: https://www.itozu-zoo.jp + css_selector: '[document] > html > body > header.hc > div.inner > h1 > a > img' + retrieved_on: '2025-12-23T17:09:04.555899+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 到津の森公園 + - claim_type: og_image_url + claim_value: http://www.itozu-zoo.jp/img/facebook-thumb.jpg + source_url: https://www.itozu-zoo.jp + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T17:09:04.555899+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-07-KOK-M-KNOHM.yaml b/data/custodian/JP-07-KOK-M-KNOHM.yaml index 07d70816ec..0551230d1d 100644 --- a/data/custodian/JP-07-KOK-M-KNOHM.yaml +++ b/data/custodian/JP-07-KOK-M-KNOHM.yaml @@ -474,3 +474,36 @@ location: geonames_id: 9888403 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:27.855909+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:09:12.756797+00:00' + source_url: https://www.city.kasuga.fukuoka.jp/miryoku/history/historymuseum/1002240/1002243.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.kasuga.fukuoka.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: https://www.city.kasuga.fukuoka.jp/miryoku/history/historymuseum/1002240/1002243.html + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T17:09:12.756797+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 春日市 みんなで春をつくろう + - claim_type: favicon_url + claim_value: https://www.city.kasuga.fukuoka.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.kasuga.fukuoka.jp/miryoku/history/historymuseum/1002240/1002243.html + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T17:09:12.756797+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kasuga.fukuoka.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.kasuga.fukuoka.jp/miryoku/history/historymuseum/1002240/1002243.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:09:12.756797+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-07-KUR-M-HMKPM.yaml b/data/custodian/JP-07-KUR-M-HMKPM.yaml index c533c1e00a..6fe05e4347 100644 --- a/data/custodian/JP-07-KUR-M-HMKPM.yaml +++ b/data/custodian/JP-07-KUR-M-HMKPM.yaml @@ -425,3 +425,28 @@ location: geonames_id: 9892661 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:27.945767+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:09:22.749578+00:00' + source_url: https://www.town.hirokawa.fukuoka.jp/soshiki/kyoikuiinkai_jimukyoku/5/3/8/1/1189.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.hirokawa.fukuoka.jp/theme/base/img_common/smartphone.png + source_url: https://www.town.hirokawa.fukuoka.jp/soshiki/kyoikuiinkai_jimukyoku/5/3/8/1/1189.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:09:22.749578+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.hirokawa.fukuoka.jp/theme/base/img_common/ogp_noimage.png + source_url: https://www.town.hirokawa.fukuoka.jp/soshiki/kyoikuiinkai_jimukyoku/5/3/8/1/1189.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T17:09:22.749578+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-07-NIS-A-FCPL.yaml b/data/custodian/JP-07-NIS-A-FCPL.yaml index 3709c90712..fb8edc24ed 100644 --- a/data/custodian/JP-07-NIS-A-FCPL.yaml +++ b/data/custodian/JP-07-NIS-A-FCPL.yaml @@ -299,3 +299,28 @@ location: geonames_id: 9888329 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:28.110962+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:09:42.727751+00:00' + source_url: http://toshokan.city.fukuoka.lg.jp/index.php + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://toshokan.city.fukuoka.lg.jp/favicon.ico + source_url: http://toshokan.city.fukuoka.lg.jp/index.php + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T17:09:42.727751+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://toshokan.city.fukuoka.lg.jp/img/og.jpg + source_url: http://toshokan.city.fukuoka.lg.jp/index.php + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T17:09:42.727751+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-07-SHA-M-KUM.yaml b/data/custodian/JP-07-SHA-M-KUM.yaml index 829b4b9607..f312ebef38 100644 --- a/data/custodian/JP-07-SHA-M-KUM.yaml +++ b/data/custodian/JP-07-SHA-M-KUM.yaml @@ -301,3 +301,22 @@ location: geonames_id: 9888259 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:28.199814+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:10:06.012820+00:00' + source_url: http://www.museum.kyushu-u.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.museum.kyushu-u.ac.jp/img_file/favicon.ico + source_url: http://www.museum.kyushu-u.ac.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T17:10:06.012820+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-07-TAG-M-TSSRH.yaml b/data/custodian/JP-07-TAG-M-TSSRH.yaml index aed1ae43dd..bdaa65259c 100644 --- a/data/custodian/JP-07-TAG-M-TSSRH.yaml +++ b/data/custodian/JP-07-TAG-M-TSSRH.yaml @@ -300,3 +300,28 @@ location: geonames_name: Tagawa feature_code: PPLA2 normalization_timestamp: '2025-12-09T06:53:28.269805+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:10:18.438702+00:00' + source_url: https://www.joho.tagawa.fukuoka.jp/list00784.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.joho.tagawa.fukuoka.jp/dynamic/favicon.ico + source_url: https://www.joho.tagawa.fukuoka.jp/list00784.html + css_selector: '#ctl00_Head1 > link:nth-of-type(14)' + retrieved_on: '2025-12-23T17:10:18.438702+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.joho.tagawa.fukuoka.jp/dynamic/common/images/ogp/og_image.png + source_url: https://www.joho.tagawa.fukuoka.jp/list00784.html + css_selector: '#ctl00_Head1 > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T17:10:18.438702+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-08-AIZ-M-SRA.yaml b/data/custodian/JP-08-AIZ-M-SRA.yaml index 88fb791107..e88eef0432 100644 --- a/data/custodian/JP-08-AIZ-M-SRA.yaml +++ b/data/custodian/JP-08-AIZ-M-SRA.yaml @@ -463,3 +463,28 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/DBH5qr-KKgE/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:10:28.416036+00:00' + source_url: https://bukeyashiki.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://bukeyashiki.com/wp-content/uploads/2024/04/cropped-favicon-black-bukeyashiki-180x180.png + source_url: https://bukeyashiki.com + css_selector: '[document] > html > head > link:nth-of-type(17)' + retrieved_on: '2025-12-23T17:10:28.416036+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://bukeyashiki.com/wp-content/uploads/2024/04/saigotanomo-01-1.jpg + source_url: https://bukeyashiki.com + css_selector: '[document] > html > head > meta:nth-of-type(16)' + retrieved_on: '2025-12-23T17:10:28.416036+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-BAN-L-BL-bandoshiritsusashima_library.yaml b/data/custodian/JP-08-BAN-L-BL-bandoshiritsusashima_library.yaml index 2b51586e19..437440f437 100644 --- a/data/custodian/JP-08-BAN-L-BL-bandoshiritsusashima_library.yaml +++ b/data/custodian/JP-08-BAN-L-BL-bandoshiritsusashima_library.yaml @@ -204,3 +204,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.bando.lg.jp/page/dir000587.html wikidata_official_website: http://www.city.bando.lg.jp/page/dir000587.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:11:11.590395+00:00' + source_url: http://www.city.bando.lg.jp/page/dir000587.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.bando.lg.jp/web_clip_icon.png + source_url: http://www.city.bando.lg.jp/page/dir000587.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:11:11.590395+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-BAN-L-BL.yaml b/data/custodian/JP-08-BAN-L-BL.yaml index cb1c3c641b..1e2bd6360a 100644 --- a/data/custodian/JP-08-BAN-L-BL.yaml +++ b/data/custodian/JP-08-BAN-L-BL.yaml @@ -204,3 +204,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.bando.lg.jp/sp/page/dir000589.html wikidata_official_website: http://www.city.bando.lg.jp/sp/page/dir000589.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:11:19.370656+00:00' + source_url: http://www.city.bando.lg.jp/sp/page/dir000589.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.bando.lg.jp/sp/web_clip_icon.png + source_url: http://www.city.bando.lg.jp/sp/page/dir000589.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:11:19.370656+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-BAN-M-INM.yaml b/data/custodian/JP-08-BAN-M-INM.yaml index 7ac0dc80f4..04f2e5ed39 100644 --- a/data/custodian/JP-08-BAN-M-INM.yaml +++ b/data/custodian/JP-08-BAN-M-INM.yaml @@ -960,3 +960,30 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/NMh7IrCMioc/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:11:28.034750+00:00' + source_url: https://www.nat.museum.ibk.ed.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.nat.museum.ibk.ed.jp/favicon.ico + source_url: https://www.nat.museum.ibk.ed.jp + css_selector: '[document] > html.wf-vdl-v7marugothic-n5-active.wf-noto-sans-cjk-jp-n4-active + > head > link' + retrieved_on: '2025-12-23T17:11:28.034750+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.nat.museum.ibk.ed.jp/ogp.png + source_url: https://www.nat.museum.ibk.ed.jp + css_selector: '[document] > html.wf-vdl-v7marugothic-n5-active.wf-noto-sans-cjk-jp-n4-active + > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:11:28.034750+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-08-CHI-M-HIMH.yaml b/data/custodian/JP-08-CHI-M-HIMH.yaml index 1ccd58e55b..1a452f3e4b 100644 --- a/data/custodian/JP-08-CHI-M-HIMH.yaml +++ b/data/custodian/JP-08-CHI-M-HIMH.yaml @@ -248,3 +248,22 @@ location: geonames_id: 6822097 geonames_name: Chikusei feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:12:06.262220+00:00' + source_url: https://www.itayahazan.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.itayahazan.jp/web_clip_icon.png + source_url: https://www.itayahazan.jp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:12:06.262220+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-FUJ-L-KL.yaml b/data/custodian/JP-08-FUJ-L-KL.yaml index 89b3fec643..ed8a99208c 100644 --- a/data/custodian/JP-08-FUJ-L-KL.yaml +++ b/data/custodian/JP-08-FUJ-L-KL.yaml @@ -283,3 +283,22 @@ location: geonames_id: 2112939 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:28.513443+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:12:19.128319+00:00' + source_url: https://www.town.kunimi.fukushima.jp/site/kangetsudai + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.kunimi.fukushima.jp/img/favicon.ico + source_url: https://www.town.kunimi.fukushima.jp/site/kangetsudai + css_selector: '[document] > html > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T17:12:19.128319+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-HIG-L-IL.yaml b/data/custodian/JP-08-HIG-L-IL.yaml index 860068723f..aa4eb0e8ec 100644 --- a/data/custodian/JP-08-HIG-L-IL.yaml +++ b/data/custodian/JP-08-HIG-L-IL.yaml @@ -203,3 +203,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.t-ibaraki.jp wikidata_official_website: http://www.lib.t-ibaraki.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:12:29.396093+00:00' + source_url: http://www.lib.t-ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.lib.t-ibaraki.jp/themes/lib_theme/favicon.ico + source_url: http://www.lib.t-ibaraki.jp + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T17:12:29.396093+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-HIG-L-K.yaml b/data/custodian/JP-08-HIG-L-K.yaml index 1a06f5fb2c..e112503442 100644 --- a/data/custodian/JP-08-HIG-L-K.yaml +++ b/data/custodian/JP-08-HIG-L-K.yaml @@ -203,3 +203,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.shirosato.lg.jp/page/page001450.html wikidata_official_website: http://www.town.shirosato.lg.jp/page/page001450.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:12:40.171097+00:00' + source_url: http://www.town.shirosato.lg.jp/page/page001450.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.shirosato.lg.jp/web_clip_icon.png + source_url: http://www.town.shirosato.lg.jp/page/page001450.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:12:40.171097+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.shirosato.lg.jp/data/img/26-1266229965_565.JPG + source_url: http://www.town.shirosato.lg.jp/page/page001450.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:12:40.171097+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-HIG-L-O.yaml b/data/custodian/JP-08-HIG-L-O.yaml index 1747b153b1..f6b95c8c1e 100644 --- a/data/custodian/JP-08-HIG-L-O.yaml +++ b/data/custodian/JP-08-HIG-L-O.yaml @@ -204,3 +204,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.oarai.lg.jp/~syougai/syogai/info-752-278_3.html wikidata_official_website: http://www.town.oarai.lg.jp/~syougai/syogai/info-752-278_3.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:12:48.782056+00:00' + source_url: http://www.town.oarai.lg.jp/~syougai/syogai/info-752-278_3.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.oarai.lg.jp/wp/wp-content/uploads/2021/03/cropped-favicon-180x180.jpg + source_url: http://www.town.oarai.lg.jp/~syougai/syogai/info-752-278_3.html + css_selector: '[document] > html.fontS.white > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:12:48.782056+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/JP-08-HIG-M-IPOA.yaml b/data/custodian/JP-08-HIG-M-IPOA.yaml index 970d3e23e0..6da80f5d89 100644 --- a/data/custodian/JP-08-HIG-M-IPOA.yaml +++ b/data/custodian/JP-08-HIG-M-IPOA.yaml @@ -260,3 +260,37 @@ wikidata_enrichment: image: Aqua World.jpg commons_category: Aqua World Ibaraki Prefectural Oarai Aquarium wikidata_image: Aqua World.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:13:32.566006+00:00' + source_url: https://www.aquaworld-oarai.com + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.aquaworld-oarai.com/cms/wp-content/themes/aqua_hmr/images/logo.png + source_url: https://www.aquaworld-oarai.com + css_selector: '#inner_header_parts > div.header_top > div.site-branding > h1.site-title + > a > img' + retrieved_on: '2025-12-23T17:13:32.566006+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: アクアワールド茨城県大洗水族館 + - claim_type: favicon_url + claim_value: https://www.aquaworld-oarai.com/cms/wp-content/uploads/fbrfg/apple-touch-icon.png + source_url: https://www.aquaworld-oarai.com + css_selector: '[document] > html > head > link:nth-of-type(34)' + retrieved_on: '2025-12-23T17:13:32.566006+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.aquaworld-oarai.com/cms/wp-content/uploads/2022/08/ogp.jpg + source_url: https://www.aquaworld-oarai.com + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:13:32.566006+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 4 diff --git a/data/custodian/JP-08-HIT-L-HBIUL.yaml b/data/custodian/JP-08-HIT-L-HBIUL.yaml index 4b1d847752..f5931dca4a 100644 --- a/data/custodian/JP-08-HIT-L-HBIUL.yaml +++ b/data/custodian/JP-08-HIT-L-HBIUL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-L-HBIUL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-L-HBIUL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-L-HBIUL ghcid_numeric: 5832817409698474045 valid_from: '2025-12-06T23:38:53.207834+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Hitachi Branch of Ibaraki University Library @@ -204,3 +205,41 @@ location: geonames_id: 2112708 geonames_name: Hitachi feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:14:42.305207+00:00' + source_url: http://www.lib.ibaraki.ac.jp/guide/kougaku.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.lib.ibaraki.ac.jp/assets/images/logo-library.svg + source_url: http://www.lib.ibaraki.ac.jp/guide/kougaku.html + css_selector: '[document] > html.js-focus-visible.wf-source-han-sans-japanese-n7-active + > body > main.container-lower > div.container-lower__inner > header.container-lower__header + > div.container-lower__major > div.breadcrumb > ul.breadcrumb__inner.container-lower__bread + > li > a.breadcrumb__home > img.breadcrumb__logo' + retrieved_on: '2025-12-23T17:14:42.305207+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 茨城大学図書館 + - claim_type: favicon_url + claim_value: http://www.lib.ibaraki.ac.jp/apple-touch-icon-180x180.png + source_url: http://www.lib.ibaraki.ac.jp/guide/kougaku.html + css_selector: '[document] > html.js-focus-visible.wf-source-han-sans-japanese-n7-active + > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:14:42.305207+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.lib.ibaraki.ac.jp/ogp.jpg + source_url: http://www.lib.ibaraki.ac.jp/guide/kougaku.html + css_selector: '[document] > html.js-focus-visible.wf-source-han-sans-japanese-n7-active + > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T17:14:42.305207+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsuchuo_library.yaml b/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsuchuo_library.yaml index 237c327873..0895a037cb 100644 --- a/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsuchuo_library.yaml +++ b/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsuchuo_library.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.hitachinaka.ibaraki.jp wikidata_official_website: http://www.lib.hitachinaka.ibaraki.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:14:54.103098+00:00' + source_url: http://www.lib.hitachinaka.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.hitachinaka.ibaraki.jp/apple-touch-icon.png + source_url: http://www.lib.hitachinaka.ibaraki.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:14:54.103098+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsunakaminato_library.yaml b/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsunakaminato_library.yaml index 240dae26e4..3785289d78 100644 --- a/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsunakaminato_library.yaml +++ b/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsunakaminato_library.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.hitachinaka.ibaraki.jp/ wikidata_official_website: http://www.lib.hitachinaka.ibaraki.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:15:04.761951+00:00' + source_url: http://www.lib.hitachinaka.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.hitachinaka.ibaraki.jp/apple-touch-icon.png + source_url: http://www.lib.hitachinaka.ibaraki.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:15:04.761951+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsusano_library.yaml b/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsusano_library.yaml index 9d017f5bfe..0a67df0906 100644 --- a/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsusano_library.yaml +++ b/data/custodian/JP-08-HIT-L-HL-hitachinakashiritsusano_library.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.hitachinaka.ibaraki.jp wikidata_official_website: http://www.lib.hitachinaka.ibaraki.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:15:16.286006+00:00' + source_url: http://www.lib.hitachinaka.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.hitachinaka.ibaraki.jp/apple-touch-icon.png + source_url: http://www.lib.hitachinaka.ibaraki.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:15:16.286006+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-HIT-L-HL-hitachishiritsujuo_library.yaml b/data/custodian/JP-08-HIT-L-HL-hitachishiritsujuo_library.yaml index 87a69930a2..216d6c4a46 100644 --- a/data/custodian/JP-08-HIT-L-HL-hitachishiritsujuo_library.yaml +++ b/data/custodian/JP-08-HIT-L-HL-hitachishiritsujuo_library.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-L-HL-hitachishiritsujuo_library - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-L-HL-hitachishiritsujuo_library valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-L-HL-hitachishiritsujuo_library ghcid_numeric: 12907750541195458816 valid_from: '2025-12-06T23:38:42.039454+00:00' @@ -237,3 +238,30 @@ location: postal_code: 319-1304 street_address: 202-1 JUOCHO TOMOBE, Hitachi Shi, Ibaraki Ken, 319-1304 normalization_timestamp: '2025-12-09T10:56:29.805366+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:15:27.901951+00:00' + source_url: http://www.city.hitachi.lg.jp/lib + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: http://www.city.hitachi.lg.jp/lib + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T17:15:27.901951+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 日立市トップページ + - claim_type: favicon_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/favicon.ico + source_url: http://www.city.hitachi.lg.jp/lib + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:15:27.901951+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-HIT-L-HL-hitachishiritsunambu_library.yaml b/data/custodian/JP-08-HIT-L-HL-hitachishiritsunambu_library.yaml index ab54b61aaf..3b54f20c27 100644 --- a/data/custodian/JP-08-HIT-L-HL-hitachishiritsunambu_library.yaml +++ b/data/custodian/JP-08-HIT-L-HL-hitachishiritsunambu_library.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-L-HL-hitachishiritsunambu_library - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-L-HL-hitachishiritsunambu_library valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-L-HL-hitachishiritsunambu_library ghcid_numeric: 4201558236335047041 valid_from: '2025-12-06T23:38:51.085999+00:00' @@ -236,3 +237,30 @@ location: postal_code: 319-1222 street_address: 3-24-1 KUJICHO, Hitachi Shi, Ibaraki Ken, 319-1222 normalization_timestamp: '2025-12-09T10:56:29.834779+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:15:35.023624+00:00' + source_url: http://www.city.hitachi.lg.jp/lib/008/004/p025529.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: http://www.city.hitachi.lg.jp/lib/008/004/p025529.html + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T17:15:35.023624+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 日立市トップページ + - claim_type: favicon_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/favicon.ico + source_url: http://www.city.hitachi.lg.jp/lib/008/004/p025529.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:15:35.023624+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-HIT-L-HL.yaml b/data/custodian/JP-08-HIT-L-HL.yaml index 6e9b0157c7..d26798f038 100644 --- a/data/custodian/JP-08-HIT-L-HL.yaml +++ b/data/custodian/JP-08-HIT-L-HL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-L-HL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-L-HL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-L-HL ghcid_numeric: 15152615725784543380 valid_from: '2025-12-06T23:38:42.033742+00:00' @@ -237,3 +238,30 @@ location: postal_code: 317-0073 street_address: 1-21-1 SAIWAICHO, Hitachi Shi, Ibaraki Ken, 317-0073 normalization_timestamp: '2025-12-09T10:56:30.077042+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:15:42.961322+00:00' + source_url: http://www.city.hitachi.lg.jp/lib + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: http://www.city.hitachi.lg.jp/lib + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T17:15:42.961322+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 日立市トップページ + - claim_type: favicon_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/favicon.ico + source_url: http://www.city.hitachi.lg.jp/lib + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:15:42.961322+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-HIT-L-HLT.yaml b/data/custodian/JP-08-HIT-L-HLT.yaml index 12879cdb7c..33f02e5d90 100644 --- a/data/custodian/JP-08-HIT-L-HLT.yaml +++ b/data/custodian/JP-08-HIT-L-HLT.yaml @@ -206,3 +206,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib.hitachinaka.ibaraki.jp/ wikidata_official_website: http://www.lib.hitachinaka.ibaraki.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:16:13.342272+00:00' + source_url: http://www.lib.hitachinaka.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.hitachinaka.ibaraki.jp/apple-touch-icon.png + source_url: http://www.lib.hitachinaka.ibaraki.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:16:13.342272+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-HIT-L-ICUL.yaml b/data/custodian/JP-08-HIT-L-ICUL.yaml index 207af9a2f8..867d35b977 100644 --- a/data/custodian/JP-08-HIT-L-ICUL.yaml +++ b/data/custodian/JP-08-HIT-L-ICUL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-L-ICUL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-L-ICUL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-L-ICUL ghcid_numeric: 13278198200379618335 valid_from: '2025-12-06T23:38:54.512609+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Ibaraki Christian University Library @@ -190,7 +191,8 @@ wikidata_enrichment: description: organisatie uit Japan - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://www.icc.ac.jp/lib/ wikidata_official_website: http://www.icc.ac.jp/lib/ @@ -212,3 +214,28 @@ location: geonames_id: 2112708 geonames_name: Hitachi feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:16:34.252323+00:00' + source_url: http://www.icc.ac.jp/lib + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.icc.ac.jp/tncui5000000006q-img/apple-touch-icon-180x180.png + source_url: http://www.icc.ac.jp/lib + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T17:16:34.252323+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.icc.ac.jp/tncui5000000006q-img/ogp_2021_spring.jpg + source_url: http://www.icc.ac.jp/lib + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T17:16:34.252323+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-HIT-M-HCCSS.yaml b/data/custodian/JP-08-HIT-M-HCCSS.yaml index 1a425b809d..6d4c3310c2 100644 --- a/data/custodian/JP-08-HIT-M-HCCSS.yaml +++ b/data/custodian/JP-08-HIT-M-HCCSS.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-M-HCCSS - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-M-HCCSS valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-M-HCCSS ghcid_numeric: 10527612468706142728 valid_from: '2025-12-06T23:38:31.595494+00:00' @@ -226,3 +227,28 @@ location: postal_code: 317-0073 street_address: SAIWAICHO, Hitachi Shi, Ibaraki Ken, 317-0073 normalization_timestamp: '2025-12-09T10:56:30.291111+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:16:45.608050+00:00' + source_url: http://www.civic.jp/science + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.civic.jp/web_clip_icon.png + source_url: http://www.civic.jp/science + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:16:45.608050+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.civic.jp/web_clip_icon.png + source_url: http://www.civic.jp/science + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:16:45.608050+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-HIT-M-HCM.yaml b/data/custodian/JP-08-HIT-M-HCM.yaml index 43bc3f0ec8..f2aed93213 100644 --- a/data/custodian/JP-08-HIT-M-HCM.yaml +++ b/data/custodian/JP-08-HIT-M-HCM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-M-HCM - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-M-HCM valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-M-HCM ghcid_numeric: 10992953195208404806 valid_from: '2025-12-06T23:38:31.589937+00:00' @@ -247,3 +248,36 @@ location: postal_code: 317-0055 street_address: MIYATACHO, Hitachi Shi, Ibaraki Ken, 317-0055 normalization_timestamp: '2025-12-09T10:56:30.343997+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:19:16.287434+00:00' + source_url: https://www.city.hitachi.lg.jp/museum + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png + source_url: https://www.city.hitachi.lg.jp/museum + css_selector: '#tlogo > p > a > img' + retrieved_on: '2025-12-23T17:19:16.287434+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 日立市トップページ + - claim_type: favicon_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.hitachi.lg.jp/museum + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T17:19:16.287434+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.hitachi.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.hitachi.lg.jp/museum + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T17:19:16.287434+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-HIT-M-NG.yaml b/data/custodian/JP-08-HIT-M-NG.yaml index 699fb45534..776ed9cd69 100644 --- a/data/custodian/JP-08-HIT-M-NG.yaml +++ b/data/custodian/JP-08-HIT-M-NG.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-HIT-M-NG - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-HIT-M-NG valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-HIT-M-NG ghcid_numeric: 7483789558077479572 valid_from: '2025-12-06T23:38:31.632773+00:00' @@ -251,3 +252,22 @@ location: postal_code: 313-0007 street_address: ARAJUKUCHO, Hitachiota Shi, Ibaraki Ken, 313-0007 normalization_timestamp: '2025-12-09T10:56:30.419275+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:19:36.679251+00:00' + source_url: http://www.tokugawa.gr.jp/seizanso + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.tokugawa.gr.jp/wp-content/uploads/2025/10/cropped-tokugawaaoi-180x180.png + source_url: http://www.tokugawa.gr.jp/seizanso + css_selector: '[document] > html.js.canvas > head > link:nth-of-type(33)' + retrieved_on: '2025-12-23T17:19:36.679251+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/JP-08-HOK-L-HLA.yaml b/data/custodian/JP-08-HOK-L-HLA.yaml index 0edaf6cba7..ddb84ed35e 100644 --- a/data/custodian/JP-08-HOK-L-HLA.yaml +++ b/data/custodian/JP-08-HOK-L-HLA.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib-hokota.jp/index.jsp wikidata_official_website: http://www.lib-hokota.jp/index.jsp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:19:47.993629+00:00' + source_url: http://www.lib-hokota.jp/index.jsp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib-hokota.jp/apple-touch-icon.png + source_url: http://www.lib-hokota.jp/index.jsp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:19:47.993629+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-HOK-L-HLT.yaml b/data/custodian/JP-08-HOK-L-HLT.yaml index 0270fdb431..bf4c6f7bf6 100644 --- a/data/custodian/JP-08-HOK-L-HLT.yaml +++ b/data/custodian/JP-08-HOK-L-HLT.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib-hokota.jp/index.jsp wikidata_official_website: http://www.lib-hokota.jp/index.jsp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:19:54.593999+00:00' + source_url: http://www.lib-hokota.jp/index.jsp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib-hokota.jp/apple-touch-icon.png + source_url: http://www.lib-hokota.jp/index.jsp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:19:54.593999+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-HOK-L-HPL.yaml b/data/custodian/JP-08-HOK-L-HPL.yaml index 5c98a0c112..1ae0e83013 100644 --- a/data/custodian/JP-08-HOK-L-HPL.yaml +++ b/data/custodian/JP-08-HOK-L-HPL.yaml @@ -203,3 +203,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.lib-hokota.jp wikidata_official_website: http://www.lib-hokota.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:20:06.652680+00:00' + source_url: http://www.lib-hokota.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib-hokota.jp/apple-touch-icon.png + source_url: http://www.lib-hokota.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:20:06.652680+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-INA-L-ABIUL.yaml b/data/custodian/JP-08-INA-L-ABIUL.yaml index 723c4a80a2..857c563393 100644 --- a/data/custodian/JP-08-INA-L-ABIUL.yaml +++ b/data/custodian/JP-08-INA-L-ABIUL.yaml @@ -207,3 +207,41 @@ location: geonames_id: 2113115 geonames_name: Ami feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:20:14.891867+00:00' + source_url: http://www.lib.ibaraki.ac.jp/guide/nougaku.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.lib.ibaraki.ac.jp/assets/images/logo-library.svg + source_url: http://www.lib.ibaraki.ac.jp/guide/nougaku.html + css_selector: '[document] > html.js-focus-visible.wf-source-han-sans-japanese-n7-active + > body > main.container-lower > div.container-lower__inner > header.container-lower__header + > div.container-lower__major > div.breadcrumb > ul.breadcrumb__inner.container-lower__bread + > li > a.breadcrumb__home > img.breadcrumb__logo' + retrieved_on: '2025-12-23T17:20:14.891867+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 茨城大学図書館 + - claim_type: favicon_url + claim_value: http://www.lib.ibaraki.ac.jp/apple-touch-icon-180x180.png + source_url: http://www.lib.ibaraki.ac.jp/guide/nougaku.html + css_selector: '[document] > html.js-focus-visible.wf-source-han-sans-japanese-n7-active + > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:20:14.891867+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.lib.ibaraki.ac.jp/ogp.jpg + source_url: http://www.lib.ibaraki.ac.jp/guide/nougaku.html + css_selector: '[document] > html.js-focus-visible.wf-source-han-sans-japanese-n7-active + > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T17:20:14.891867+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-INA-L-I-inashikishisakuragawakominkantoshoshitsu.yaml b/data/custodian/JP-08-INA-L-I-inashikishisakuragawakominkantoshoshitsu.yaml index dafbe668bb..1f68e7ce33 100644 --- a/data/custodian/JP-08-INA-L-I-inashikishisakuragawakominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-INA-L-I-inashikishisakuragawakominkantoshoshitsu.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.inashiki.lg.jp/index.php?code=678 wikidata_official_website: http://www.city.inashiki.lg.jp/index.php?code=678 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:20:30.102260+00:00' + source_url: http://www.city.inashiki.lg.jp/index.php?code=678 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.inashiki.lg.jp/web_clip_icon.png + source_url: http://www.city.inashiki.lg.jp/index.php?code=678 + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:20:30.102260+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-INA-L-I-inashikishishintonekominkantoshoshitsu.yaml b/data/custodian/JP-08-INA-L-I-inashikishishintonekominkantoshoshitsu.yaml index 03897ae935..bc0f6e5d85 100644 --- a/data/custodian/JP-08-INA-L-I-inashikishishintonekominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-INA-L-I-inashikishishintonekominkantoshoshitsu.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.inashiki.lg.jp/page/dir000911.html wikidata_official_website: http://www.city.inashiki.lg.jp/page/dir000911.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:20:37.545301+00:00' + source_url: http://www.city.inashiki.lg.jp/page/dir000911.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.inashiki.lg.jp/web_clip_icon.png + source_url: http://www.city.inashiki.lg.jp/page/dir000911.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:20:37.545301+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-INA-L-I.yaml b/data/custodian/JP-08-INA-L-I.yaml index e3ef71b045..a59a542181 100644 --- a/data/custodian/JP-08-INA-L-I.yaml +++ b/data/custodian/JP-08-INA-L-I.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.inashiki.lg.jp/index.php?code=677 wikidata_official_website: http://www.city.inashiki.lg.jp/index.php?code=677 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:20:45.580612+00:00' + source_url: http://www.city.inashiki.lg.jp/index.php?code=677 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.inashiki.lg.jp/web_clip_icon.png + source_url: http://www.city.inashiki.lg.jp/index.php?code=677 + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:20:45.580612+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-INA-L-IPUHSL.yaml b/data/custodian/JP-08-INA-L-IPUHSL.yaml index 55ca4d0edb..52198688bb 100644 --- a/data/custodian/JP-08-INA-L-IPUHSL.yaml +++ b/data/custodian/JP-08-INA-L-IPUHSL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-INA-L-IPUHSL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-INA-L-IPUHSL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-INA-L-IPUHSL ghcid_numeric: 10607131817429094390 valid_from: '2025-12-06T23:38:53.929985+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Ibaraki Prefectural University of Health Sciences, Library @@ -206,3 +207,28 @@ location: geonames_id: 2113115 geonames_name: Ami feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:20:55.944763+00:00' + source_url: http://www.lib.ipu.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.ipu.ac.jp/web_clip_icon.png + source_url: http://www.lib.ipu.ac.jp + css_selector: '[document] > html.sr > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:20:55.944763+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.ipu.ac.jp/web_clip_icon.png + source_url: http://www.lib.ipu.ac.jp + css_selector: '[document] > html.sr > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:20:55.944763+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-INA-L-M.yaml b/data/custodian/JP-08-INA-L-M.yaml index 859c4a9c13..ca5b226fbe 100644 --- a/data/custodian/JP-08-INA-L-M.yaml +++ b/data/custodian/JP-08-INA-L-M.yaml @@ -202,3 +202,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.vill.miho.lg.jp/page/page000551.html wikidata_official_website: http://www.vill.miho.lg.jp/page/page000551.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:21:06.290343+00:00' + source_url: http://www.vill.miho.lg.jp/page/page000551.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.vill.miho.lg.jp/skin/common/img/header/logo_title@2x.png + source_url: http://www.vill.miho.lg.jp/page/page000551.html + css_selector: '#logoTitle > a > img' + retrieved_on: '2025-12-23T17:21:06.290343+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 美浦村公式ホームページ + - claim_type: favicon_url + claim_value: http://www.vill.miho.lg.jp/web_clip_icon.png + source_url: http://www.vill.miho.lg.jp/page/page000551.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:21:06.290343+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.vill.miho.lg.jp/web_clip_icon.png + source_url: http://www.vill.miho.lg.jp/page/page000551.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:21:06.290343+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-ISH-L-I-ishiokashijonanchikukominkantoshoshitsu.yaml b/data/custodian/JP-08-ISH-L-I-ishiokashijonanchikukominkantoshoshitsu.yaml index 1843162fb2..5ae40f22b5 100644 --- a/data/custodian/JP-08-ISH-L-I-ishiokashijonanchikukominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-ISH-L-I-ishiokashijonanchikukominkantoshoshitsu.yaml @@ -200,3 +200,28 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.ishioka.lg.jp/page/page000013.html wikidata_official_website: http://lib.city.ishioka.lg.jp/page/page000013.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:21:30.821269+00:00' + source_url: http://lib.city.ishioka.lg.jp/page/page000013.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.city.ishioka.lg.jp/web_clip_icon.png + source_url: http://lib.city.ishioka.lg.jp/page/page000013.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:21:30.821269+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://lib.city.ishioka.lg.jp/data/img/1707208817_9.jpg + source_url: http://lib.city.ishioka.lg.jp/page/page000013.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T17:21:30.821269+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-ISH-L-I.yaml b/data/custodian/JP-08-ISH-L-I.yaml index 34f14f7ed6..86ac45bab1 100644 --- a/data/custodian/JP-08-ISH-L-I.yaml +++ b/data/custodian/JP-08-ISH-L-I.yaml @@ -200,3 +200,28 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.ishioka.lg.jp/page/page000013.html wikidata_official_website: http://lib.city.ishioka.lg.jp/page/page000013.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:21:37.804173+00:00' + source_url: http://lib.city.ishioka.lg.jp/page/page000013.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.city.ishioka.lg.jp/web_clip_icon.png + source_url: http://lib.city.ishioka.lg.jp/page/page000013.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:21:37.804173+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://lib.city.ishioka.lg.jp/data/img/1707208817_9.jpg + source_url: http://lib.city.ishioka.lg.jp/page/page000013.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T17:21:37.804173+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-ISH-L-ICCL.yaml b/data/custodian/JP-08-ISH-L-ICCL.yaml index 6b5b77dca6..393dc1c54b 100644 --- a/data/custodian/JP-08-ISH-L-ICCL.yaml +++ b/data/custodian/JP-08-ISH-L-ICCL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-ISH-L-ICCL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-ISH-L-ICCL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-ISH-L-ICCL ghcid_numeric: 3965822687242136515 valid_from: '2025-12-06T23:38:42.050790+00:00' @@ -231,3 +232,28 @@ location: postal_code: 315-0017 street_address: 1-6-31 WAKAMIYA, Ishioka Shi, Ibaraki Ken, 315-0017 normalization_timestamp: '2025-12-09T10:56:31.095417+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:21:44.900494+00:00' + source_url: http://lib.city.ishioka.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.city.ishioka.lg.jp/web_clip_icon.png + source_url: http://lib.city.ishioka.lg.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:21:44.900494+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://lib.city.ishioka.lg.jp/web_clip_icon.png + source_url: http://lib.city.ishioka.lg.jp + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T17:21:44.900494+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-ISH-M-IF.yaml b/data/custodian/JP-08-ISH-M-IF.yaml index 736d07d79c..9818c419c1 100644 --- a/data/custodian/JP-08-ISH-M-IF.yaml +++ b/data/custodian/JP-08-ISH-M-IF.yaml @@ -233,3 +233,30 @@ location: postal_code: 315-0153 street_address: SHIMOAOYAGI, Ishioka Shi, Ibaraki Ken, 315-0153 normalization_timestamp: '2025-12-09T10:56:31.160547+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:21:58.117434+00:00' + source_url: http://flowerpark.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.flowerpark.or.jp/assets/img/top/fp_appletouchicon.png + source_url: http://flowerpark.or.jp + css_selector: '[document] > html.wf-roboto-n5-active.wf-cormorantgaramond-n6-active + > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:21:58.117434+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.flowerpark.or.jp/assets/img/top/fp_ogp.jpg + source_url: http://flowerpark.or.jp + css_selector: '[document] > html.wf-roboto-n5-active.wf-cormorantgaramond-n6-active + > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T17:21:58.117434+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-ITA-L-I-itakoshiritsuitakokominkantoshoshitsu.yaml b/data/custodian/JP-08-ITA-L-I-itakoshiritsuitakokominkantoshoshitsu.yaml index aeda002b20..b688839bbe 100644 --- a/data/custodian/JP-08-ITA-L-I-itakoshiritsuitakokominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-ITA-L-I-itakoshiritsuitakokominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.itako.ed.jp/default.asp wikidata_official_website: https://lib.itako.ed.jp/default.asp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:08.100899+00:00' + source_url: https://lib.itako.ed.jp/default.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.itako.ed.jp/apple-touch-icon.png + source_url: https://lib.itako.ed.jp/default.asp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:08.100899+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-ITA-L-I-itakoshiritsunobukatakominkantoshoshitsu.yaml b/data/custodian/JP-08-ITA-L-I-itakoshiritsunobukatakominkantoshoshitsu.yaml index 0bb8891397..6054612e83 100644 --- a/data/custodian/JP-08-ITA-L-I-itakoshiritsunobukatakominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-ITA-L-I-itakoshiritsunobukatakominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.itako.ed.jp/default.asp wikidata_official_website: https://lib.itako.ed.jp/default.asp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:13.591645+00:00' + source_url: https://lib.itako.ed.jp/default.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.itako.ed.jp/apple-touch-icon.png + source_url: https://lib.itako.ed.jp/default.asp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:13.591645+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-ITA-L-I-itakoshiritsuouharakominkantoshoshitsu.yaml b/data/custodian/JP-08-ITA-L-I-itakoshiritsuouharakominkantoshoshitsu.yaml index fae2a34879..4517519ba1 100644 --- a/data/custodian/JP-08-ITA-L-I-itakoshiritsuouharakominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-ITA-L-I-itakoshiritsuouharakominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.itako.ed.jp/default.asp wikidata_official_website: https://lib.itako.ed.jp/default.asp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:19.037932+00:00' + source_url: https://lib.itako.ed.jp/default.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.itako.ed.jp/apple-touch-icon.png + source_url: https://lib.itako.ed.jp/default.asp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:19.037932+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-ITA-L-I-itakoshiritsutsuchikominkantoshoshitsu.yaml b/data/custodian/JP-08-ITA-L-I-itakoshiritsutsuchikominkantoshoshitsu.yaml index dcbb43d934..80f7d1ebdb 100644 --- a/data/custodian/JP-08-ITA-L-I-itakoshiritsutsuchikominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-ITA-L-I-itakoshiritsutsuchikominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.itako.ed.jp/default.asp wikidata_official_website: https://lib.itako.ed.jp/default.asp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:24.581444+00:00' + source_url: https://lib.itako.ed.jp/default.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.itako.ed.jp/apple-touch-icon.png + source_url: https://lib.itako.ed.jp/default.asp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:24.581444+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-ITA-L-I.yaml b/data/custodian/JP-08-ITA-L-I.yaml index eef789ac6b..f5e9ee10c7 100644 --- a/data/custodian/JP-08-ITA-L-I.yaml +++ b/data/custodian/JP-08-ITA-L-I.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.itako.ed.jp/default.asp wikidata_official_website: https://lib.itako.ed.jp/default.asp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:30.070380+00:00' + source_url: https://lib.itako.ed.jp/default.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.itako.ed.jp/apple-touch-icon.png + source_url: https://lib.itako.ed.jp/default.asp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:30.070380+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-ITA-L-IPL.yaml b/data/custodian/JP-08-ITA-L-IPL.yaml index cccd9e2015..177b1bb3ee 100644 --- a/data/custodian/JP-08-ITA-L-IPL.yaml +++ b/data/custodian/JP-08-ITA-L-IPL.yaml @@ -206,3 +206,22 @@ wikidata_enrichment: wikidata_web: official_website: https://lib.itako.ed.jp/default.asp wikidata_official_website: https://lib.itako.ed.jp/default.asp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:36.041578+00:00' + source_url: https://lib.itako.ed.jp/default.asp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.itako.ed.jp/apple-touch-icon.png + source_url: https://lib.itako.ed.jp/default.asp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:36.041578+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-JOS-L-J.yaml b/data/custodian/JP-08-JOS-L-J.yaml index 72b9eedafe..ce2f1537bd 100644 --- a/data/custodian/JP-08-JOS-L-J.yaml +++ b/data/custodian/JP-08-JOS-L-J.yaml @@ -200,3 +200,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.joso.lg.jp/shigai/kanko/chiiki/1421553530228.html wikidata_official_website: http://www.city.joso.lg.jp/shigai/kanko/chiiki/1421553530228.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:44.006351+00:00' + source_url: http://www.city.joso.lg.jp/shigai/kanko/chiiki/1421553530228.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.joso.lg.jp/shigai/kanko/chiiki/web_clip_icon.png + source_url: http://www.city.joso.lg.jp/shigai/kanko/chiiki/1421553530228.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:44.006351+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.joso.lg.jp/web_clip_icon.png + source_url: http://www.city.joso.lg.jp/shigai/kanko/chiiki/1421553530228.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:22:44.006351+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-JOS-M-JSMFH.yaml b/data/custodian/JP-08-JOS-M-JSMFH.yaml index 80b078a71b..35a6109ede 100644 --- a/data/custodian/JP-08-JOS-M-JSMFH.yaml +++ b/data/custodian/JP-08-JOS-M-JSMFH.yaml @@ -236,3 +236,28 @@ wikidata_enrichment: image: 常総市地域交流センター(豊田城) (3739493330).jpg commons_category: Joso City Regional Exchange Center wikidata_image: 常総市地域交流センター(豊田城) (3739493330).jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:22:58.012822+00:00' + source_url: http://www.city.joso.lg.jp/shigai/kanko/chiiki + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.joso.lg.jp/shigai/kanko/web_clip_icon.png + source_url: http://www.city.joso.lg.jp/shigai/kanko/chiiki + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:22:58.012822+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.joso.lg.jp/web_clip_icon.png + source_url: http://www.city.joso.lg.jp/shigai/kanko/chiiki + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:22:58.012822+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KAM-L-K-kamisushiwakamatsukominkantoshoshitsu.yaml b/data/custodian/JP-08-KAM-L-K-kamisushiwakamatsukominkantoshoshitsu.yaml index 5ed7d8e5a2..3373f04e5a 100644 --- a/data/custodian/JP-08-KAM-L-K-kamisushiwakamatsukominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-KAM-L-K-kamisushiwakamatsukominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kamisu-tosho.jp/ wikidata_official_website: http://www.kamisu-tosho.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:23:10.152905+00:00' + source_url: http://www.kamisu-tosho.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.kamisu-tosho.jp/apple-touch-icon.png + source_url: http://www.kamisu-tosho.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:23:10.152905+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-KAM-L-K-kamisushiyatabekominkantoshoshitsu.yaml b/data/custodian/JP-08-KAM-L-K-kamisushiyatabekominkantoshoshitsu.yaml index 21c1d081d2..148c897335 100644 --- a/data/custodian/JP-08-KAM-L-K-kamisushiyatabekominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-KAM-L-K-kamisushiyatabekominkantoshoshitsu.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kamisu-tosho.jp/ wikidata_official_website: http://www.kamisu-tosho.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:23:20.195756+00:00' + source_url: http://www.kamisu-tosho.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.kamisu-tosho.jp/apple-touch-icon.png + source_url: http://www.kamisu-tosho.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:23:20.195756+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-KAM-L-K.yaml b/data/custodian/JP-08-KAM-L-K.yaml index eaab31b9be..063b0a1a74 100644 --- a/data/custodian/JP-08-KAM-L-K.yaml +++ b/data/custodian/JP-08-KAM-L-K.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kamisu-tosho.jp/ wikidata_official_website: http://www.kamisu-tosho.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:23:31.066796+00:00' + source_url: http://www.kamisu-tosho.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.kamisu-tosho.jp/apple-touch-icon.png + source_url: http://www.kamisu-tosho.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:23:31.066796+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-KAM-L-KL-kamisushiritsuzumo_library.yaml b/data/custodian/JP-08-KAM-L-KL-kamisushiritsuzumo_library.yaml index 9cd0107372..c2e0cf9b24 100644 --- a/data/custodian/JP-08-KAM-L-KL-kamisushiritsuzumo_library.yaml +++ b/data/custodian/JP-08-KAM-L-KL-kamisushiritsuzumo_library.yaml @@ -204,3 +204,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kamisu-tosho.jp wikidata_official_website: http://www.kamisu-tosho.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:23:43.746448+00:00' + source_url: http://www.kamisu-tosho.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.kamisu-tosho.jp/apple-touch-icon.png + source_url: http://www.kamisu-tosho.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:23:43.746448+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-KAM-L-KL.yaml b/data/custodian/JP-08-KAM-L-KL.yaml index 144b2b9476..2b1873a95d 100644 --- a/data/custodian/JP-08-KAM-L-KL.yaml +++ b/data/custodian/JP-08-KAM-L-KL.yaml @@ -204,3 +204,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.kamisu-tosho.jp/ wikidata_official_website: http://www.kamisu-tosho.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:23:55.056243+00:00' + source_url: http://www.kamisu-tosho.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.kamisu-tosho.jp/apple-touch-icon.png + source_url: http://www.kamisu-tosho.jp + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T17:23:55.056243+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-KAS-L-KL-kasamashiritsuiwama_library.yaml b/data/custodian/JP-08-KAS-L-KL-kasamashiritsuiwama_library.yaml index 81e1de5c7f..b39ac83bb4 100644 --- a/data/custodian/JP-08-KAS-L-KL-kasamashiritsuiwama_library.yaml +++ b/data/custodian/JP-08-KAS-L-KL-kasamashiritsuiwama_library.yaml @@ -204,3 +204,20 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.kasama.ibaraki.jp/ wikidata_official_website: http://lib.city.kasama.ibaraki.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:24:27.308377+00:00' + source_url: http://lib.city.kasama.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: http://localhost/TOSHOW/asp/shared/img/snsThumbnail.png + source_url: http://lib.city.kasama.ibaraki.jp + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-23T17:24:27.308377+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-08-KAS-L-KL-kasamashiritsutomobe_library.yaml b/data/custodian/JP-08-KAS-L-KL-kasamashiritsutomobe_library.yaml index 925febde91..d99025bf31 100644 --- a/data/custodian/JP-08-KAS-L-KL-kasamashiritsutomobe_library.yaml +++ b/data/custodian/JP-08-KAS-L-KL-kasamashiritsutomobe_library.yaml @@ -204,3 +204,20 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.kasama.ibaraki.jp/ wikidata_official_website: http://lib.city.kasama.ibaraki.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:24:49.477077+00:00' + source_url: http://lib.city.kasama.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: http://localhost/TOSHOW/asp/shared/img/snsThumbnail.png + source_url: http://lib.city.kasama.ibaraki.jp + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-23T17:24:49.477077+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-08-KAS-L-KL.yaml b/data/custodian/JP-08-KAS-L-KL.yaml index 7e440ab147..c59dc072c5 100644 --- a/data/custodian/JP-08-KAS-L-KL.yaml +++ b/data/custodian/JP-08-KAS-L-KL.yaml @@ -204,3 +204,20 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.kasama.ibaraki.jp/ wikidata_official_website: http://lib.city.kasama.ibaraki.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:25:10.894182+00:00' + source_url: http://lib.city.kasama.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: http://localhost/TOSHOW/asp/shared/img/snsThumbnail.png + source_url: http://lib.city.kasama.ibaraki.jp + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-23T17:25:10.894182+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-08-KAS-L-KLO.yaml b/data/custodian/JP-08-KAS-L-KLO.yaml index a82105eab1..15319c67dd 100644 --- a/data/custodian/JP-08-KAS-L-KLO.yaml +++ b/data/custodian/JP-08-KAS-L-KLO.yaml @@ -205,3 +205,30 @@ wikidata_enrichment: wikidata_web: official_website: http://opac.city.kashima.ibaraki.jp/facility/ohno.html wikidata_official_website: http://opac.city.kashima.ibaraki.jp/facility/ohno.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:25:25.206928+00:00' + source_url: http://opac.city.kashima.ibaraki.jp/facility/ohno.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://opac.city.kashima.ibaraki.jp/themes/lib_theme/images/logo.png + source_url: http://opac.city.kashima.ibaraki.jp/facility/ohno.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T17:25:25.206928+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鹿嶋市立中央図書館 + - claim_type: favicon_url + claim_value: https://opac.city.kashima.ibaraki.jp/themes/lib_theme/favicon.ico + source_url: http://opac.city.kashima.ibaraki.jp/facility/ohno.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T17:25:25.206928+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-KAS-L-KPL.yaml b/data/custodian/JP-08-KAS-L-KPL.yaml index 6ff31e4dba..1bcdec799e 100644 --- a/data/custodian/JP-08-KAS-L-KPL.yaml +++ b/data/custodian/JP-08-KAS-L-KPL.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-KAS-L-KPL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-KAS-L-KPL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-KAS-L-KPL ghcid_numeric: 392916093464918445 valid_from: '2025-12-06T23:38:42.095297+00:00' @@ -230,3 +231,30 @@ location: postal_code: 314-0031 street_address: 2398-1 KYUCHU, Kashima Shi, Ibaraki Ken, 314-0031 normalization_timestamp: '2025-12-09T10:56:31.762238+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:25:33.554267+00:00' + source_url: http://opac.city.kashima.ibaraki.jp/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://opac.city.kashima.ibaraki.jp/themes/lib_theme/images/logo.png + source_url: http://opac.city.kashima.ibaraki.jp/index.html + css_selector: '#header_logo > a.active > img' + retrieved_on: '2025-12-23T17:25:33.554267+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鹿嶋市立中央図書館 + - claim_type: favicon_url + claim_value: https://opac.city.kashima.ibaraki.jp/themes/lib_theme/favicon.ico + source_url: http://opac.city.kashima.ibaraki.jp/index.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T17:25:33.554267+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-KIT-L-KCL.yaml b/data/custodian/JP-08-KIT-L-KCL.yaml index 09b43e1fb1..821a15f04b 100644 --- a/data/custodian/JP-08-KIT-L-KCL.yaml +++ b/data/custodian/JP-08-KIT-L-KCL.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.kitaibaraki.lg.jp/ wikidata_official_website: http://lib.city.kitaibaraki.lg.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:25:55.212326+00:00' + source_url: http://lib.city.kitaibaraki.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://lib.city.kitaibaraki.lg.jp/favicon.ico + source_url: http://lib.city.kitaibaraki.lg.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T17:25:55.212326+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-KIT-M-KCMA.yaml b/data/custodian/JP-08-KIT-M-KCMA.yaml index c68f41a9e0..1d3971a445 100644 --- a/data/custodian/JP-08-KIT-M-KCMA.yaml +++ b/data/custodian/JP-08-KIT-M-KCMA.yaml @@ -480,3 +480,22 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/D4uhk8cgsZ4/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:26:12.875562+00:00' + source_url: http://www.kcmofa.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.kcmofa.com/wp/wp-content/uploads/2018/11/cropped-kcmoa-180x180.jpg + source_url: http://www.kcmofa.com + css_selector: '[document] > html > head > link:nth-of-type(19)' + retrieved_on: '2025-12-23T17:26:12.875562+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-KIT-M-TMMAI.yaml b/data/custodian/JP-08-KIT-M-TMMAI.yaml index 4224fc6d78..3d7b8d9cab 100644 --- a/data/custodian/JP-08-KIT-M-TMMAI.yaml +++ b/data/custodian/JP-08-KIT-M-TMMAI.yaml @@ -250,3 +250,22 @@ wikidata_enrichment: - id: Q11394499 label: Hiroshi Naito description: Japanese architect +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:26:31.246939+00:00' + source_url: http://www.tenshin.museum.ibk.ed.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.tenshin.museum.ibk.ed.jp/apple-touch-icon.png + source_url: http://www.tenshin.museum.ibk.ed.jp + css_selector: '[document] > html > head > link:nth-of-type(10)' + retrieved_on: '2025-12-23T17:26:31.246939+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-KOG-L-K-kogashichuokominkantoshoshitsu.yaml b/data/custodian/JP-08-KOG-L-K-kogashichuokominkantoshoshitsu.yaml index 04e521a3a3..85985855b7 100644 --- a/data/custodian/JP-08-KOG-L-K-kogashichuokominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-KOG-L-K-kogashichuokominkantoshoshitsu.yaml @@ -200,3 +200,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html wikidata_official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:26:41.917068+00:00' + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:26:41.917068+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: http://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:26:41.917068+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:26:41.917068+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOG-L-K-kogashinakatakominkantoshoshitsu.yaml b/data/custodian/JP-08-KOG-L-K-kogashinakatakominkantoshoshitsu.yaml index 98b35eaad8..02bfde97f5 100644 --- a/data/custodian/JP-08-KOG-L-K-kogashinakatakominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-KOG-L-K-kogashinakatakominkantoshoshitsu.yaml @@ -200,3 +200,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html wikidata_official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:26:50.475093+00:00' + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:26:50.475093+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: http://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:26:50.475093+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:26:50.475093+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOG-L-K-kogashitsutsumikominkantoshoshitsu.yaml b/data/custodian/JP-08-KOG-L-K-kogashitsutsumikominkantoshoshitsu.yaml index 70f110c916..ab168e2adb 100644 --- a/data/custodian/JP-08-KOG-L-K-kogashitsutsumikominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-KOG-L-K-kogashitsutsumikominkantoshoshitsu.yaml @@ -200,3 +200,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html wikidata_official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:26:59.095398+00:00' + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:26:59.095398+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: http://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:26:59.095398+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:26:59.095398+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOG-L-K-kogashiyusentasowatoshoshitsu.yaml b/data/custodian/JP-08-KOG-L-K-kogashiyusentasowatoshoshitsu.yaml index 7cc0e807f4..17379b48c9 100644 --- a/data/custodian/JP-08-KOG-L-K-kogashiyusentasowatoshoshitsu.yaml +++ b/data/custodian/JP-08-KOG-L-K-kogashiyusentasowatoshoshitsu.yaml @@ -200,3 +200,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html wikidata_official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:27:07.603610+00:00' + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:27:07.603610+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: http://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:27:07.603610+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:27:07.603610+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOG-L-K.yaml b/data/custodian/JP-08-KOG-L-K.yaml index 9d50d84007..f77ef7d851 100644 --- a/data/custodian/JP-08-KOG-L-K.yaml +++ b/data/custodian/JP-08-KOG-L-K.yaml @@ -200,3 +200,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ibaraki-koga.lg.jp/0000003340.html wikidata_official_website: http://www.city.ibaraki-koga.lg.jp/0000003340.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:27:16.151119+00:00' + source_url: http://www.city.ibaraki-koga.lg.jp/0000003340.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000003340.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:27:16.151119+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: http://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000003340.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:27:16.151119+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000003340.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:27:16.151119+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOG-L-KL-kogashisanwa_library.yaml b/data/custodian/JP-08-KOG-L-KL-kogashisanwa_library.yaml index 02d75cc579..39153c7a92 100644 --- a/data/custodian/JP-08-KOG-L-KL-kogashisanwa_library.yaml +++ b/data/custodian/JP-08-KOG-L-KL-kogashisanwa_library.yaml @@ -199,3 +199,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html wikidata_official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:27:26.750028+00:00' + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:27:26.750028+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: http://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:27:26.750028+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:27:26.750028+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOG-L-KL.yaml b/data/custodian/JP-08-KOG-L-KL.yaml index 2aea64d7b0..41fa429a09 100644 --- a/data/custodian/JP-08-KOG-L-KL.yaml +++ b/data/custodian/JP-08-KOG-L-KL.yaml @@ -199,3 +199,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html wikidata_official_website: http://www.city.ibaraki-koga.lg.jp/0000000701.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:27:35.187966+00:00' + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:27:35.187966+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: http://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:27:35.187966+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: http://www.city.ibaraki-koga.lg.jp/0000000701.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:27:35.187966+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOG-M-KCMH.yaml b/data/custodian/JP-08-KOG-M-KCMH.yaml index 9df507467c..920834109f 100644 --- a/data/custodian/JP-08-KOG-M-KCMH.yaml +++ b/data/custodian/JP-08-KOG-M-KCMH.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-KOG-M-KCMH - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-KOG-M-KCMH valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-KOG-M-KCMH ghcid_numeric: 3752762643115101494 valid_from: '2025-12-06T23:38:31.606988+00:00' @@ -279,3 +280,36 @@ location: postal_code: 306-0033 street_address: CHUOCHO, Koga Shi, Ibaraki Ken, 306-0033 normalization_timestamp: '2025-12-09T10:56:32.253869+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:27:43.402427+00:00' + source_url: https://www.city.ibaraki-koga.lg.jp/lifetop/kogameguri/history_cultual_property/3_1/7639.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.ibaraki-koga.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: https://www.city.ibaraki-koga.lg.jp/lifetop/kogameguri/history_cultual_property/3_1/7639.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T17:27:43.402427+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 古河市 koga city こがでくらすと KOGA KURASU + - claim_type: favicon_url + claim_value: https://www.city.ibaraki-koga.lg.jp/smartphone.png + source_url: https://www.city.ibaraki-koga.lg.jp/lifetop/kogameguri/history_cultual_property/3_1/7639.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:27:43.402427+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.ibaraki-koga.lg.jp/material/images/group/1/poster_notfound_05.png + source_url: https://www.city.ibaraki-koga.lg.jp/lifetop/kogameguri/history_cultual_property/3_1/7639.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T17:27:43.402427+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-KOO-M-KSK.yaml b/data/custodian/JP-08-KOO-M-KSK.yaml index de67b8d4f3..8d0d8327ab 100644 --- a/data/custodian/JP-08-KOO-M-KSK.yaml +++ b/data/custodian/JP-08-KOO-M-KSK.yaml @@ -253,3 +253,28 @@ location: geonames_id: 7602711 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:28.746587+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:28:03.371858+00:00' + source_url: https://www.bunka-manabi.or.jp/kaiseikan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.bunka-manabi.or.jp/kaiseikan/favicon.ico + source_url: https://www.bunka-manabi.or.jp/kaiseikan + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T17:28:03.371858+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.bunka-manabi.or.jp/kaiseikan/sns.png + source_url: https://www.bunka-manabi.or.jp/kaiseikan + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T17:28:03.371858+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-08-KUJ-L-D.yaml b/data/custodian/JP-08-KUJ-L-D.yaml index b0340294ff..cb722e53f9 100644 --- a/data/custodian/JP-08-KUJ-L-D.yaml +++ b/data/custodian/JP-08-KUJ-L-D.yaml @@ -202,3 +202,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.daigo.ibaraki.jp/page/page000219.html wikidata_official_website: http://www.town.daigo.ibaraki.jp/page/page000219.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:28:11.120312+00:00' + source_url: http://www.town.daigo.ibaraki.jp/page/page000219.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.daigo.ibaraki.jp/page/web_clip_icon.png + source_url: http://www.town.daigo.ibaraki.jp/page/page000219.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:28:11.120312+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-MIT-L-IUL.yaml b/data/custodian/JP-08-MIT-L-IUL.yaml index 60f58a093b..0c99d6be07 100644 --- a/data/custodian/JP-08-MIT-L-IUL.yaml +++ b/data/custodian/JP-08-MIT-L-IUL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-MIT-L-IUL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-MIT-L-IUL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-MIT-L-IUL ghcid_numeric: 5279681389086876576 valid_from: '2025-12-06T23:38:53.205349+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Ibaraki University Library @@ -187,7 +188,8 @@ wikidata_enrichment: member_of: - id: Q56056912 label: Japan Consortium for Open Access Repository - description: promotes open access and open science in Japan with knowledge dissemination via digital repositories + description: promotes open access and open science in Japan with knowledge dissemination + via digital repositories wikidata_web: official_website: http://www.lib.ibaraki.ac.jp wikidata_official_website: http://www.lib.ibaraki.ac.jp @@ -209,3 +211,30 @@ location: geonames_id: 2111901 geonames_name: Mito feature_code: PPLA +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:28:27.283649+00:00' + source_url: http://www.lib.ibaraki.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.ibaraki.ac.jp/apple-touch-icon-180x180.png + source_url: http://www.lib.ibaraki.ac.jp + css_selector: '[document] > html.wf-source-han-sans-japanese-n7-active.wf-source-han-sans-japanese-n4-active + > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T17:28:27.283649+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.lib.ibaraki.ac.jp/ogp.jpg + source_url: http://www.lib.ibaraki.ac.jp + css_selector: '[document] > html.wf-source-han-sans-japanese-n7-active.wf-source-han-sans-japanese-n4-active + > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T17:28:27.283649+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-MIT-L-ML-mitoshiritsuchihara_library.yaml b/data/custodian/JP-08-MIT-L-ML-mitoshiritsuchihara_library.yaml index f0f2a0fecc..3b0cb12f4d 100644 --- a/data/custodian/JP-08-MIT-L-ML-mitoshiritsuchihara_library.yaml +++ b/data/custodian/JP-08-MIT-L-ML-mitoshiritsuchihara_library.yaml @@ -204,3 +204,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library-mito.jp/contents/tosyokan/uchihara.html wikidata_official_website: http://www.library-mito.jp/contents/tosyokan/uchihara.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:28:35.894689+00:00' + source_url: http://www.library-mito.jp/contents/tosyokan/uchihara.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library-mito.jp/themes/lib_theme/images/logo.png + source_url: http://www.library-mito.jp/contents/tosyokan/uchihara.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T17:28:35.894689+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 水戸市立図書館 + - claim_type: favicon_url + claim_value: https://www.library-mito.jp/themes/lib_theme/favicon.ico + source_url: http://www.library-mito.jp/contents/tosyokan/uchihara.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T17:28:35.894689+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-MIT-L-ML-mitoshiritsumiwa_library.yaml b/data/custodian/JP-08-MIT-L-ML-mitoshiritsumiwa_library.yaml index 63aca2b7e2..bc37e6b746 100644 --- a/data/custodian/JP-08-MIT-L-ML-mitoshiritsumiwa_library.yaml +++ b/data/custodian/JP-08-MIT-L-ML-mitoshiritsumiwa_library.yaml @@ -204,3 +204,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library-mito.jp/contents/tosyokan/miwa.html wikidata_official_website: http://www.library-mito.jp/contents/tosyokan/miwa.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:28:43.807391+00:00' + source_url: http://www.library-mito.jp/contents/tosyokan/miwa.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library-mito.jp/themes/lib_theme/images/logo.png + source_url: http://www.library-mito.jp/contents/tosyokan/miwa.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T17:28:43.807391+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 水戸市立図書館 + - claim_type: favicon_url + claim_value: https://www.library-mito.jp/themes/lib_theme/favicon.ico + source_url: http://www.library-mito.jp/contents/tosyokan/miwa.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T17:28:43.807391+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-MIT-L-ML-mitoshiritsuseibu_library.yaml b/data/custodian/JP-08-MIT-L-ML-mitoshiritsuseibu_library.yaml index 97679c917e..7a9ab4145e 100644 --- a/data/custodian/JP-08-MIT-L-ML-mitoshiritsuseibu_library.yaml +++ b/data/custodian/JP-08-MIT-L-ML-mitoshiritsuseibu_library.yaml @@ -238,3 +238,30 @@ wikidata_enrichment: image: Seibu Library in Mito.jpg commons_category: Mito City Seibu Library wikidata_image: Seibu Library in Mito.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:28:50.316829+00:00' + source_url: http://www.library-mito.jp/contents/tosyokan/seibu.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library-mito.jp/themes/lib_theme/images/logo.png + source_url: http://www.library-mito.jp/contents/tosyokan/seibu.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T17:28:50.316829+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 水戸市立図書館 + - claim_type: favicon_url + claim_value: https://www.library-mito.jp/themes/lib_theme/favicon.ico + source_url: http://www.library-mito.jp/contents/tosyokan/seibu.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T17:28:50.316829+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-MIT-L-ML-mitoshiritsutobu_library.yaml b/data/custodian/JP-08-MIT-L-ML-mitoshiritsutobu_library.yaml index 07baaa5c91..3e72d398b3 100644 --- a/data/custodian/JP-08-MIT-L-ML-mitoshiritsutobu_library.yaml +++ b/data/custodian/JP-08-MIT-L-ML-mitoshiritsutobu_library.yaml @@ -204,3 +204,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library-mito.jp/contents/tosyokan/toubu.html wikidata_official_website: http://www.library-mito.jp/contents/tosyokan/toubu.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:44:01.761086+00:00' + source_url: http://www.library-mito.jp/contents/tosyokan/toubu.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library-mito.jp/themes/lib_theme/images/logo.png + source_url: http://www.library-mito.jp/contents/tosyokan/toubu.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T19:44:01.761086+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 水戸市立図書館 + - claim_type: favicon_url + claim_value: https://www.library-mito.jp/themes/lib_theme/favicon.ico + source_url: http://www.library-mito.jp/contents/tosyokan/toubu.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T19:44:01.761086+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-MIT-L-ML-mitoshiritsutsunezumi_library.yaml b/data/custodian/JP-08-MIT-L-ML-mitoshiritsutsunezumi_library.yaml index 21b8b277f7..4aaaa7a105 100644 --- a/data/custodian/JP-08-MIT-L-ML-mitoshiritsutsunezumi_library.yaml +++ b/data/custodian/JP-08-MIT-L-ML-mitoshiritsutsunezumi_library.yaml @@ -204,3 +204,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.library-mito.jp/contents/tosyokan/tsunezumi.html wikidata_official_website: http://www.library-mito.jp/contents/tosyokan/tsunezumi.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:44:11.242150+00:00' + source_url: http://www.library-mito.jp/contents/tosyokan/tsunezumi.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library-mito.jp/themes/lib_theme/images/logo.png + source_url: http://www.library-mito.jp/contents/tosyokan/tsunezumi.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T19:44:11.242150+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 水戸市立図書館 + - claim_type: favicon_url + claim_value: https://www.library-mito.jp/themes/lib_theme/favicon.ico + source_url: http://www.library-mito.jp/contents/tosyokan/tsunezumi.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T19:44:11.242150+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-MIT-L-ML.yaml b/data/custodian/JP-08-MIT-L-ML.yaml index d28201a63b..ff7b6626a1 100644 --- a/data/custodian/JP-08-MIT-L-ML.yaml +++ b/data/custodian/JP-08-MIT-L-ML.yaml @@ -220,3 +220,30 @@ wikidata_enrichment: wikidata_media: image: Mito City Central Library & Mito City Museum02.jpg wikidata_image: Mito City Central Library & Mito City Museum02.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:44:21.343628+00:00' + source_url: http://www.library-mito.jp/contents/tosyokan/chuou.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.library-mito.jp/themes/lib_theme/images/logo.png + source_url: http://www.library-mito.jp/contents/tosyokan/chuou.html + css_selector: '#header_logo > a > img' + retrieved_on: '2025-12-23T19:44:21.343628+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 水戸市立図書館 + - claim_type: favicon_url + claim_value: https://www.library-mito.jp/themes/lib_theme/favicon.ico + source_url: http://www.library-mito.jp/contents/tosyokan/chuou.html + css_selector: '[document] > html > head.notranslate > link' + retrieved_on: '2025-12-23T19:44:21.343628+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-MIT-M-IPMH.yaml b/data/custodian/JP-08-MIT-M-IPMH.yaml index ae7f7eccfc..857901499c 100644 --- a/data/custodian/JP-08-MIT-M-IPMH.yaml +++ b/data/custodian/JP-08-MIT-M-IPMH.yaml @@ -271,3 +271,28 @@ wikidata_enrichment: image: Main-building-of-ibaraki-prefectural-museum-of-history.jpeg commons_category: Ibaraki Prefectural Museum of History wikidata_image: Main-building-of-ibaraki-prefectural-museum-of-history.jpeg +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:44:46.387193+00:00' + source_url: https://rekishikan-ibk.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://rekishikan-ibk.jp/apple-touch-icon.png + source_url: https://rekishikan-ibk.jp + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:44:46.387193+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://rekishikan-ibk.jp/ogp.png + source_url: https://rekishikan-ibk.jp + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T19:44:46.387193+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-MIT-M-MBP.yaml b/data/custodian/JP-08-MIT-M-MBP.yaml index 074ccd2eaa..a66f12a22b 100644 --- a/data/custodian/JP-08-MIT-M-MBP.yaml +++ b/data/custodian/JP-08-MIT-M-MBP.yaml @@ -236,3 +236,22 @@ wikidata_enrichment: commons_category: Mito Botanical Park image: 水戸市植物公園(開園20周年).jpg wikidata_image: 水戸市植物公園(開園20周年).jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:45:05.803895+00:00' + source_url: https://www.mito-botanical-park.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.mito-botanical-park.com/apple-touch-icon.png + source_url: https://www.mito-botanical-park.com + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T19:45:05.803895+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-MIT-M-TM.yaml b/data/custodian/JP-08-MIT-M-TM.yaml index 6a02b596a0..d4d3a77198 100644 --- a/data/custodian/JP-08-MIT-M-TM.yaml +++ b/data/custodian/JP-08-MIT-M-TM.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-MIT-M-TM - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-MIT-M-TM valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-MIT-M-TM ghcid_numeric: 12779366248301792590 valid_from: '2025-12-06T23:38:31.577236+00:00' @@ -259,3 +260,22 @@ location: postal_code: 310-0912 street_address: MIGAWA, Mito Shi, Ibaraki Ken, 310-0912 normalization_timestamp: '2025-12-09T10:56:32.868805+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:45:40.573220+00:00' + source_url: http://www.tokugawa.gr.jp/guide + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.tokugawa.gr.jp/wp-content/uploads/2025/10/cropped-tokugawaaoi-180x180.png + source_url: http://www.tokugawa.gr.jp/guide + css_selector: '[document] > html.js.canvas > head > link:nth-of-type(33)' + retrieved_on: '2025-12-23T19:45:40.573220+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 4 diff --git a/data/custodian/JP-08-MIT-M-TUMM.yaml b/data/custodian/JP-08-MIT-M-TUMM.yaml index 77e480b984..a7d1b25232 100644 --- a/data/custodian/JP-08-MIT-M-TUMM.yaml +++ b/data/custodian/JP-08-MIT-M-TUMM.yaml @@ -39,13 +39,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-MIT-M-TUMM - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-MIT-M-TUMM valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-MIT-M-TUMM ghcid_numeric: 3073007632065009158 valid_from: '2025-12-06T23:38:31.582425+00:00' @@ -220,3 +221,28 @@ location: postal_code: 310-8585 street_address: MIWA, Mito Shi, Ibaraki Ken, 310-8585 normalization_timestamp: '2025-12-09T10:56:32.898773+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:45:48.444777+00:00' + source_url: https://www.tokiwa.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tokiwa.ac.jp/common/image/app-icon.png + source_url: https://www.tokiwa.ac.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T19:45:48.444777+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.tokiwa.ac.jp/common/image/sns-icon.jpg + source_url: https://www.tokiwa.ac.jp + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T19:45:48.444777+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-NAK-L-N-nakashichuokominkantoshoetsuranshitsu.yaml b/data/custodian/JP-08-NAK-L-N-nakashichuokominkantoshoetsuranshitsu.yaml index ca174e1c6a..7e44f6e926 100644 --- a/data/custodian/JP-08-NAK-L-N-nakashichuokominkantoshoetsuranshitsu.yaml +++ b/data/custodian/JP-08-NAK-L-N-nakashichuokominkantoshoetsuranshitsu.yaml @@ -200,3 +200,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.naka.lg.jp/page/page000787.html wikidata_official_website: http://www.city.naka.lg.jp/page/page000787.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:46:27.559154+00:00' + source_url: http://www.city.naka.lg.jp/page/page000787.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.naka.lg.jp/web_clip_icon.png + source_url: http://www.city.naka.lg.jp/page/page000787.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T19:46:27.559154+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.naka.lg.jp/data/img/1362444827_27.jpg + source_url: http://www.city.naka.lg.jp/page/page000787.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T19:46:27.559154+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-NAK-L-TL.yaml b/data/custodian/JP-08-NAK-L-TL.yaml index 32765b06b3..800d6df1e3 100644 --- a/data/custodian/JP-08-NAK-L-TL.yaml +++ b/data/custodian/JP-08-NAK-L-TL.yaml @@ -203,3 +203,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.tosyo.vill.tokai.ibaraki.jp/ wikidata_official_website: http://www.tosyo.vill.tokai.ibaraki.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:46:48.426643+00:00' + source_url: http://www.tosyo.vill.tokai.ibaraki.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tosyo.vill.tokai.ibaraki.jp/favicon.ico + source_url: http://www.tosyo.vill.tokai.ibaraki.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T19:46:48.426643+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-NAM-L-N-namegatashikitaurakominkantoshoshitsu.yaml b/data/custodian/JP-08-NAM-L-N-namegatashikitaurakominkantoshoshitsu.yaml index caad90f440..79d428dca7 100644 --- a/data/custodian/JP-08-NAM-L-N-namegatashikitaurakominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-NAM-L-N-namegatashikitaurakominkantoshoshitsu.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.namegata.ibaraki.jp/page/page000422.html wikidata_official_website: http://www.city.namegata.ibaraki.jp/page/page000422.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:47:10.160807+00:00' + source_url: http://www.city.namegata.ibaraki.jp/page/page000422.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.namegata.ibaraki.jp/page/web_clip_icon.png + source_url: http://www.city.namegata.ibaraki.jp/page/page000422.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:47:10.160807+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.namegata.ibaraki.jp/web_clip_icon.png + source_url: http://www.city.namegata.ibaraki.jp/page/page000422.html + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T19:47:10.160807+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-NAM-L-N.yaml b/data/custodian/JP-08-NAM-L-N.yaml index db5436739e..d641da107a 100644 --- a/data/custodian/JP-08-NAM-L-N.yaml +++ b/data/custodian/JP-08-NAM-L-N.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.namegata.ibaraki.jp/page/page000422.html wikidata_official_website: http://www.city.namegata.ibaraki.jp/page/page000422.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:47:19.194270+00:00' + source_url: http://www.city.namegata.ibaraki.jp/page/page000422.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.namegata.ibaraki.jp/page/web_clip_icon.png + source_url: http://www.city.namegata.ibaraki.jp/page/page000422.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:47:19.194270+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.namegata.ibaraki.jp/web_clip_icon.png + source_url: http://www.city.namegata.ibaraki.jp/page/page000422.html + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T19:47:19.194270+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-NAM-L-NL.yaml b/data/custodian/JP-08-NAM-L-NL.yaml index 4adabd04d4..2a7fc14170 100644 --- a/data/custodian/JP-08-NAM-L-NL.yaml +++ b/data/custodian/JP-08-NAM-L-NL.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.namegata.ibaraki.jp/page/page002449.html wikidata_official_website: http://www.city.namegata.ibaraki.jp/page/page002449.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:47:27.313339+00:00' + source_url: http://www.city.namegata.ibaraki.jp/page/page002449.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.namegata.ibaraki.jp/web_clip_icon.png + source_url: http://www.city.namegata.ibaraki.jp/page/page002449.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T19:47:27.313339+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.namegata.ibaraki.jp/web_clip_icon.png + source_url: http://www.city.namegata.ibaraki.jp/page/page002449.html + css_selector: '[document] > html > head > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T19:47:27.313339+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-OMI-L-O-omitamashihatorifureaisentatoshoshitsu.yaml b/data/custodian/JP-08-OMI-L-O-omitamashihatorifureaisentatoshoshitsu.yaml index f16a96c634..30297c6e9b 100644 --- a/data/custodian/JP-08-OMI-L-O-omitamashihatorifureaisentatoshoshitsu.yaml +++ b/data/custodian/JP-08-OMI-L-O-omitamashihatorifureaisentatoshoshitsu.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.omitama.lg.jp/ wikidata_official_website: http://lib.city.omitama.lg.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:47:45.987931+00:00' + source_url: http://lib.city.omitama.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.city.omitama.lg.jp/apple-touch-icon.png + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > link:nth-of-type(15)' + retrieved_on: '2025-12-23T19:47:45.987931+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.omitama.lg.jp/manage/contents/upload/58a52e076ccf5.jpg + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T19:47:45.987931+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-OMI-L-O.yaml b/data/custodian/JP-08-OMI-L-O.yaml index 42072c7f89..3b9b8cc9a8 100644 --- a/data/custodian/JP-08-OMI-L-O.yaml +++ b/data/custodian/JP-08-OMI-L-O.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.omitama.lg.jp/ wikidata_official_website: http://lib.city.omitama.lg.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:47:56.640622+00:00' + source_url: http://lib.city.omitama.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.city.omitama.lg.jp/apple-touch-icon.png + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > link:nth-of-type(15)' + retrieved_on: '2025-12-23T19:47:56.640622+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.omitama.lg.jp/manage/contents/upload/58a52e076ccf5.jpg + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T19:47:56.640622+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-OMI-L-OL-omitamashitamari_library.yaml b/data/custodian/JP-08-OMI-L-OL-omitamashitamari_library.yaml index e53f625e56..b6bd9f61bd 100644 --- a/data/custodian/JP-08-OMI-L-OL-omitamashitamari_library.yaml +++ b/data/custodian/JP-08-OMI-L-OL-omitamashitamari_library.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.omitama.lg.jp wikidata_official_website: http://lib.city.omitama.lg.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:48:06.859515+00:00' + source_url: http://lib.city.omitama.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.city.omitama.lg.jp/apple-touch-icon.png + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > link:nth-of-type(15)' + retrieved_on: '2025-12-23T19:48:06.859515+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.omitama.lg.jp/manage/contents/upload/58a52e076ccf5.jpg + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T19:48:06.859515+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-OMI-L-OL.yaml b/data/custodian/JP-08-OMI-L-OL.yaml index 6c7c3404c2..33b43357e8 100644 --- a/data/custodian/JP-08-OMI-L-OL.yaml +++ b/data/custodian/JP-08-OMI-L-OL.yaml @@ -204,3 +204,28 @@ wikidata_enrichment: wikidata_web: official_website: http://lib.city.omitama.lg.jp wikidata_official_website: http://lib.city.omitama.lg.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:48:16.977359+00:00' + source_url: http://lib.city.omitama.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://lib.city.omitama.lg.jp/apple-touch-icon.png + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > link:nth-of-type(15)' + retrieved_on: '2025-12-23T19:48:16.977359+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://lib.city.omitama.lg.jp/manage/contents/upload/58a52e076ccf5.jpg + source_url: http://lib.city.omitama.lg.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T19:48:16.977359+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-08-RYU-L-RL.yaml b/data/custodian/JP-08-RYU-L-RL.yaml index 71bc0bec65..9d55460336 100644 --- a/data/custodian/JP-08-RYU-L-RL.yaml +++ b/data/custodian/JP-08-RYU-L-RL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-RYU-L-RL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-RYU-L-RL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-RYU-L-RL ghcid_numeric: 9910797045164632222 valid_from: '2025-12-06T23:38:54.515110+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: RYUTSUKEIZAIDAIGAKU Library @@ -215,3 +216,28 @@ location: geonames_id: 2111258 geonames_name: Ryūgasaki feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:48:33.270251+00:00' + source_url: http://www.rku.ac.jp/campuslife/library + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.rku.ac.jp/favicon.ico + source_url: http://www.rku.ac.jp/campuslife/library + css_selector: '[document] > html > body > div.wrapper > link:nth-of-type(21)' + retrieved_on: '2025-12-23T19:48:33.270251+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.rku.ac.jp/assets/img/ogp_img.png + source_url: http://www.rku.ac.jp/campuslife/library + css_selector: '[document] > html > body > div.wrapper > meta:nth-of-type(10)' + retrieved_on: '2025-12-23T19:48:33.270251+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-08-RYU-M-RCFHMM.yaml b/data/custodian/JP-08-RYU-M-RCFHMM.yaml index 2e2934edc6..c46f6a7b07 100644 --- a/data/custodian/JP-08-RYU-M-RCFHMM.yaml +++ b/data/custodian/JP-08-RYU-M-RCFHMM.yaml @@ -206,3 +206,22 @@ wikidata_enrichment: wikidata_web: official_website: https://www.ryureki.org/ wikidata_official_website: https://www.ryureki.org/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:48:40.490214+00:00' + source_url: https://www.ryureki.org + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://u.jimcdn.com/cms/o/s53baff9ff90c3aa1/img/favicon.ico?t=1381146665 + source_url: https://www.ryureki.org + css_selector: '[document] > html.j-feature-js.j-feature-no-touch > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T19:48:40.490214+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-08-SAK-L-M.yaml b/data/custodian/JP-08-SAK-L-M.yaml index 08c90c3bb2..2a7b88b293 100644 --- a/data/custodian/JP-08-SAK-L-M.yaml +++ b/data/custodian/JP-08-SAK-L-M.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.sakuragawa.lg.jp/index.php?code=1882 wikidata_official_website: http://www.city.sakuragawa.lg.jp/index.php?code=1882 +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:48:48.453252+00:00' + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1882 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.sakuragawa.lg.jp/web_clip_icon.png + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1882 + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:48:48.453252+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.sakuragawa.lg.jp/web_clip_icon.png + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1882 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T19:48:48.453252+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-SAK-L-S-sakuragawashiyamatochuokominkantoshoshitsu.yaml b/data/custodian/JP-08-SAK-L-S-sakuragawashiyamatochuokominkantoshoshitsu.yaml index c3d5ad98a1..8300b44141 100644 --- a/data/custodian/JP-08-SAK-L-S-sakuragawashiyamatochuokominkantoshoshitsu.yaml +++ b/data/custodian/JP-08-SAK-L-S-sakuragawashiyamatochuokominkantoshoshitsu.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.sakuragawa.lg.jp/index.php?code=1884 wikidata_official_website: http://www.city.sakuragawa.lg.jp/index.php?code=1884 +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:48:56.074285+00:00' + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1884 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.sakuragawa.lg.jp/web_clip_icon.png + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1884 + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:48:56.074285+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.sakuragawa.lg.jp/web_clip_icon.png + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1884 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T19:48:56.074285+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-SAK-L-S.yaml b/data/custodian/JP-08-SAK-L-S.yaml index d810991a16..56d0753e01 100644 --- a/data/custodian/JP-08-SAK-L-S.yaml +++ b/data/custodian/JP-08-SAK-L-S.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.sakuragawa.lg.jp/index.php?code=1886 wikidata_official_website: http://www.city.sakuragawa.lg.jp/index.php?code=1886 +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:49:03.688923+00:00' + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1886 + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.sakuragawa.lg.jp/web_clip_icon.png + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1886 + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:49:03.688923+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.sakuragawa.lg.jp/web_clip_icon.png + source_url: http://www.city.sakuragawa.lg.jp/index.php?code=1886 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T19:49:03.688923+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-SAS-L-G.yaml b/data/custodian/JP-08-SAS-L-G.yaml index e39649990b..d86bba28e5 100644 --- a/data/custodian/JP-08-SAS-L-G.yaml +++ b/data/custodian/JP-08-SAS-L-G.yaml @@ -202,3 +202,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.goka.lg.jp/page/page000178.html wikidata_official_website: http://www.town.goka.lg.jp/page/page000178.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:49:19.777101+00:00' + source_url: http://www.town.goka.lg.jp/page/page000178.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.town.goka.lg.jp/web_clip_icon.png + source_url: http://www.town.goka.lg.jp/page/page000178.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T19:49:19.777101+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.goka.lg.jp/web_clip_icon.png + source_url: http://www.town.goka.lg.jp/page/page000178.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T19:49:19.777101+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-08-SAS-M-SMHFM.yaml b/data/custodian/JP-08-SAS-M-SMHFM.yaml index 0607e005c3..cd65888c00 100644 --- a/data/custodian/JP-08-SAS-M-SMHFM.yaml +++ b/data/custodian/JP-08-SAS-M-SMHFM.yaml @@ -235,3 +235,22 @@ location: postal_code: 306-0431 street_address: NISHIIZUMIDA, Sashima Gun Sakai Machi, Ibaraki Ken, 306-0431 normalization_timestamp: '2025-12-09T10:56:33.723631+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:49:36.020135+00:00' + source_url: https://www.town.ibaraki-sakai.lg.jp/page/page000738.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.town.ibaraki-sakai.lg.jp/page/web_clip_icon.png + source_url: https://www.town.ibaraki-sakai.lg.jp/page/page000738.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:49:36.020135+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-SHI-M-APMJ.yaml b/data/custodian/JP-08-SHI-M-APMJ.yaml index fd147f4c1a..ad0e32b83d 100644 --- a/data/custodian/JP-08-SHI-M-APMJ.yaml +++ b/data/custodian/JP-08-SHI-M-APMJ.yaml @@ -428,3 +428,22 @@ location: geonames_id: 2111025 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:28.921675+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:49:48.519036+00:00' + source_url: http://am-j.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://am-j.or.jp/wp-content/uploads/2020/10/cropped-amj-favicon-180x180.png + source_url: http://am-j.or.jp + css_selector: '[document] > html > head > link:nth-of-type(24)' + retrieved_on: '2025-12-23T19:49:48.519036+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-08-SHI-M-SCM.yaml b/data/custodian/JP-08-SHI-M-SCM.yaml index 202f23da4b..772f86bbad 100644 --- a/data/custodian/JP-08-SHI-M-SCM.yaml +++ b/data/custodian/JP-08-SHI-M-SCM.yaml @@ -237,3 +237,22 @@ location: geonames_id: 2111030 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:28.984438+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:49:55.929465+00:00' + source_url: https://www.city.shirakawa.fukushima.jp/page/page000394.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.shirakawa.fukushima.jp/web_clip_icon.png + source_url: https://www.city.shirakawa.fukushima.jp/page/page000394.html + css_selector: '[document] > html > head > link:nth-of-type(2)' + retrieved_on: '2025-12-23T19:49:55.929465+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-TOR-L-T.yaml b/data/custodian/JP-08-TOR-L-T.yaml index 48666ac016..0c9860794c 100644 --- a/data/custodian/JP-08-TOR-L-T.yaml +++ b/data/custodian/JP-08-TOR-L-T.yaml @@ -205,3 +205,20 @@ wikidata_enrichment: wikidata_web: official_website: https://www.toride-toshokan.jp/TOSHOW/html/usageguide.html wikidata_official_website: https://www.toride-toshokan.jp/TOSHOW/html/usageguide.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:55:03.033932+00:00' + source_url: https://www.toride-toshokan.jp/TOSHOW/html/usageguide.html + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: http://localhost/TOSHOW/asp/shared/img/snsThumbnail.png + source_url: https://www.toride-toshokan.jp/TOSHOW/html/usageguide.html + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-23T19:55:03.033932+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-08-TOR-L-TUAUTL.yaml b/data/custodian/JP-08-TOR-L-TUAUTL.yaml index 8df8140f30..48143381c1 100644 --- a/data/custodian/JP-08-TOR-L-TUAUTL.yaml +++ b/data/custodian/JP-08-TOR-L-TUAUTL.yaml @@ -37,13 +37,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-TOR-L-TUAUTL - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-TOR-L-TUAUTL valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-TOR-L-TUAUTL ghcid_numeric: 3789275110163182681 valid_from: '2025-12-06T23:38:53.362547+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: Tokyo University of Arts University Toride Library @@ -204,3 +205,22 @@ location: geonames_id: 2110729 geonames_name: Toride feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:53:35.604803+00:00' + source_url: http://www.lib.geidai.ac.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.lib.geidai.ac.jp/static/icon_512.png + source_url: http://www.lib.geidai.ac.jp + css_selector: '[document] > html.pc.chrome > head > link:nth-of-type(7)' + retrieved_on: '2025-12-23T19:53:35.604803+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 512x512 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-08-TOR-M-TMC.yaml b/data/custodian/JP-08-TOR-M-TMC.yaml index db62883500..c295d288fe 100644 --- a/data/custodian/JP-08-TOR-M-TMC.yaml +++ b/data/custodian/JP-08-TOR-M-TMC.yaml @@ -43,13 +43,14 @@ ghcid: method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-08-TOR-M-TMC - valid_from: "2025-12-10T09:43:39Z" + valid_from: '2025-12-10T09:43:39Z' valid_to: null - reason: "Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per ISO 3166-2:JP" + reason: Corrected region code from JP-IB (abbreviation) to JP-08 (Ibaraki) per + ISO 3166-2:JP - ghcid: JP-IB-TOR-M-TMC valid_from: null - valid_to: "2025-12-10T09:43:39Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:39Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-IB-TOR-M-TMC ghcid_numeric: 10088489509224250471 valid_from: '2025-12-06T23:38:31.655901+00:00' @@ -224,3 +225,22 @@ location: postal_code: 302-0007 street_address: YOSHIDA, Toride Shi, Ibaraki Ken, 302-0007 normalization_timestamp: '2025-12-09T10:56:33.902031+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T19:53:45.016263+00:00' + source_url: https://www.city.toride.ibaraki.jp/maibun/shisetsu/001.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.toride.ibaraki.jp/shared/images/favicon/apple-touch-icon-precomposed.png + source_url: https://www.city.toride.ibaraki.jp/maibun/shisetsu/001.html + css_selector: '[document] > html > head > link:nth-of-type(14)' + retrieved_on: '2025-12-23T19:53:45.016263+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/NL-GE-DOE-M-FMD.yaml b/data/custodian/NL-GE-DOE-M-FMD.yaml index 43184f53a5..2741e2b075 100644 --- a/data/custodian/NL-GE-DOE-M-FMD.yaml +++ b/data/custodian/NL-GE-DOE-M-FMD.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Fotografica Museum Doesburg + description: museum in Doesburg, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-DOE-M-GM.yaml b/data/custodian/NL-GE-DOE-M-GM.yaml index 0c852aa227..3b9f8e015c 100644 --- a/data/custodian/NL-GE-DOE-M-GM.yaml +++ b/data/custodian/NL-GE-DOE-M-GM.yaml @@ -73,3 +73,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: De Graafschap HET Museum + description: museum in Doetinchem, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-DOE-R-PF.yaml b/data/custodian/NL-GE-DOE-R-PF.yaml index d00ccaa12c..8e3357c191 100644 --- a/data/custodian/NL-GE-DOE-R-PF.yaml +++ b/data/custodian/NL-GE-DOE-R-PF.yaml @@ -109,3 +109,10 @@ provenance: - 'Location resolution method: EXA_WEB_SEARCH' - Address and contact details verified via official website peutjut-fonds.nl - Foundation maintains Peutjut cemetery in Aceh, Indonesia (Dutch colonial heritage) +wikidata_enrichment: + wikidata_id: null + label: Stichting Peutjut-Fonds + description: foundation for maintenance of Peutjut military cemetery in Doesburg, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-DRU-M-SNMN.yaml b/data/custodian/NL-GE-DRU-M-SNMN.yaml index bec72afb75..17b1344767 100644 --- a/data/custodian/NL-GE-DRU-M-SNMN.yaml +++ b/data/custodian/NL-GE-DRU-M-SNMN.yaml @@ -73,3 +73,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: The Sint-Nicolas Museum NL + description: museum in Druten, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ELB-A-NVA.yaml b/data/custodian/NL-GE-ELB-A-NVA.yaml index 6c2c48b6f0..62a68e2e9e 100644 --- a/data/custodian/NL-GE-ELB-A-NVA.yaml +++ b/data/custodian/NL-GE-ELB-A-NVA.yaml @@ -101,3 +101,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q111190981 + wikidata_url: https://www.wikidata.org/wiki/Q111190981 + label: Streekarchivariaat Noordwest-Veluwe + description: archief met vestigingen in Elburg, Ermelo, Harderwijk, Nunspeet en Oldebroek + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Noord-Veluws Archief is part of Streekarchivariaat Noordwest-Veluwe regional archive network diff --git a/data/custodian/NL-GE-ELB-M-NVM.yaml b/data/custodian/NL-GE-ELB-M-NVM.yaml index 2ee5261ace..fd4fae000f 100644 --- a/data/custodian/NL-GE-ELB-M-NVM.yaml +++ b/data/custodian/NL-GE-ELB-M-NVM.yaml @@ -148,3 +148,11 @@ identifiers: - identifier_scheme: Wikidata identifier_value: Q18089004 identifier_url: https://www.wikidata.org/wiki/Q18089004 +wikidata_enrichment: + wikidata_id: Q18089004 + wikidata_url: https://www.wikidata.org/wiki/Q18089004 + label: Noord-Veluws Museum + description: museum in Nunspeet, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata ID already present in identifiers section diff --git a/data/custodian/NL-GE-ELS-M-HME.yaml b/data/custodian/NL-GE-ELS-M-HME.yaml index 1a09921622..7feb06f2cd 100644 --- a/data/custodian/NL-GE-ELS-M-HME.yaml +++ b/data/custodian/NL-GE-ELS-M-HME.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Historisch Museum Elspeet + description: museum in Elspeet, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-GRO-M-LM.yaml b/data/custodian/NL-GE-GRO-M-LM.yaml index 11b82281c6..fb2f73e450 100644 --- a/data/custodian/NL-GE-GRO-M-LM.yaml +++ b/data/custodian/NL-GE-GRO-M-LM.yaml @@ -97,3 +97,11 @@ provenance: - 'Location resolution method: EXA_WEB_SEARCH' - Museum renamed to Freedom Museum in September 2019 - LinkedIn website field was incorrect (showed Lochems Oorlogsmuseum instead of Freedom Museum) +wikidata_enrichment: + wikidata_id: Q1869397 + wikidata_url: https://www.wikidata.org/wiki/Q1869397 + label: Vrijheidsmuseum + description: museum in Groesbeek, Gelderland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Museum renamed from Nationaal Bevrijdingsmuseum 1944-1945 to Freedom Museum (Vrijheidsmuseum) in September 2019 diff --git a/data/custodian/NL-GE-HAR-M-KMM.yaml b/data/custodian/NL-GE-HAR-M-KMM.yaml index 05732d8529..d8dc1d776a 100644 --- a/data/custodian/NL-GE-HAR-M-KMM.yaml +++ b/data/custodian/NL-GE-HAR-M-KMM.yaml @@ -339,3 +339,11 @@ identifiers: - identifier_scheme: Wikidata identifier_value: Q1051928 identifier_url: https://www.wikidata.org/wiki/Q1051928 +wikidata_enrichment: + wikidata_id: Q1051928 + wikidata_url: https://www.wikidata.org/wiki/Q1051928 + label: Kröller-Müller Museum + description: kunstmuseum en beeldentuin in Otterlo + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata ID already present in identifiers section diff --git a/data/custodian/NL-GE-HAR-M-M-museum_19391945.yaml b/data/custodian/NL-GE-HAR-M-M-museum_19391945.yaml index 12996c3907..277db33215 100644 --- a/data/custodian/NL-GE-HAR-M-M-museum_19391945.yaml +++ b/data/custodian/NL-GE-HAR-M-M-museum_19391945.yaml @@ -84,3 +84,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Harreveld, Gelderland' +wikidata_enrichment: + wikidata_id: null + label: Museum 1939-1945 + description: museum in Harreveld, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution (also known as Museum Opdat wij niet Vergeten) diff --git a/data/custodian/NL-GE-HED-M-HM.yaml b/data/custodian/NL-GE-HED-M-HM.yaml index be8607092b..32b82fffd4 100644 --- a/data/custodian/NL-GE-HED-M-HM.yaml +++ b/data/custodian/NL-GE-HED-M-HM.yaml @@ -97,3 +97,10 @@ provenance: - 'Emic name corrected: "Historisch museum" → "Historisch Museum Hedel"' - 'Location resolved: Voorstraat 2, 5321 GH Hedel (Gelderland)' - Province corrected from OV to GE based on actual location in municipality Maasdriel +wikidata_enrichment: + wikidata_id: Q42298583 + wikidata_url: https://www.wikidata.org/wiki/Q42298583 + label: Historisch museum + description: museum in Hedel, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-HEE-M-MGHH.yaml b/data/custodian/NL-GE-HEE-M-MGHH.yaml index 0f57a0c06c..89406f16bb 100644 --- a/data/custodian/NL-GE-HEE-M-MGHH.yaml +++ b/data/custodian/NL-GE-HEE-M-MGHH.yaml @@ -86,3 +86,11 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Heerde, Gelderland' +wikidata_enrichment: + wikidata_id: Q372829 + wikidata_url: https://www.wikidata.org/wiki/Q372829 + label: Geelvinck Hinlopen Huis + description: museum in Amsterdam + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata shows location as Amsterdam; file indicates Heerde - may be satellite location or data discrepancy diff --git a/data/custodian/NL-GE-HEN-M-AM.yaml b/data/custodian/NL-GE-HEN-M-AM.yaml index 6ad2709446..353e26d7c4 100644 --- a/data/custodian/NL-GE-HEN-M-AM.yaml +++ b/data/custodian/NL-GE-HEN-M-AM.yaml @@ -91,3 +91,10 @@ provenance: - 'Location resolution method: EXA_WEB_SEARCH' - LinkedIn website field was incorrect (showed Jenevermuseum) - Museum is permanently closed but retained as heritage record +wikidata_enrichment: + wikidata_id: null + label: Achterhoeks Museum 1940-1945 + description: museum (permanently closed) in Hengelo (GE), Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-LIC-M-SMW.yaml b/data/custodian/NL-GE-LIC-M-SMW.yaml index b50badf9e2..6252aaeb4a 100644 --- a/data/custodian/NL-GE-LIC-M-SMW.yaml +++ b/data/custodian/NL-GE-LIC-M-SMW.yaml @@ -76,3 +76,10 @@ identifiers: - identifier_scheme: Wikidata identifier_value: Q2006131 identifier_url: https://www.wikidata.org/wiki/Q2006131 +wikidata_enrichment: + wikidata_id: null + label: Stenen Museum Winkeltje + description: museum in Lichtenvoorde, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this museum; Q2006131 in identifiers refers to building "'t Zand 2" not the museum itself diff --git a/data/custodian/NL-GE-LIE-M-EK.yaml b/data/custodian/NL-GE-LIE-M-EK.yaml index 98542a176b..29202ce42f 100644 --- a/data/custodian/NL-GE-LIE-M-EK.yaml +++ b/data/custodian/NL-GE-LIE-M-EK.yaml @@ -101,3 +101,10 @@ provenance: - Location verified via official website, WhichMuseum, and AroundUs on 2025-12-17 - Open-air museum established 1936, opened 1966 - Abbreviation uses EK (Erve Kots) as proper noun, skipping articles +wikidata_enrichment: + wikidata_id: Q2746942 + wikidata_url: https://www.wikidata.org/wiki/Q2746942 + label: Erve Kots + description: museumgebouw in Gelderland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-LUN-M-ML.yaml b/data/custodian/NL-GE-LUN-M-ML.yaml index 4b20a1f000..f8306f9303 100644 --- a/data/custodian/NL-GE-LUN-M-ML.yaml +++ b/data/custodian/NL-GE-LUN-M-ML.yaml @@ -114,3 +114,10 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 3 +wikidata_enrichment: + wikidata_id: Q98907719 + wikidata_url: https://www.wikidata.org/wiki/Q98907719 + label: Museum Lunteren + description: museum in Lunteren, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-NIJ-A-LA.yaml b/data/custodian/NL-GE-NIJ-A-LA.yaml index c5ba82f340..8abce4615e 100644 --- a/data/custodian/NL-GE-NIJ-A-LA.yaml +++ b/data/custodian/NL-GE-NIJ-A-LA.yaml @@ -108,3 +108,10 @@ provenance: - Created from unmatched LinkedIn company profile - Province corrected from UT to GE - actual location is Nijmegen (Gelderland) - City resolved from XXX to NIJ (Nijmegen) via FLORON website +wikidata_enrichment: + wikidata_id: null + label: Het Levend Archief + description: archive and research center in Nijmegen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found; also known as FLORON (Floristisch Onderzoek Nederland) diff --git a/data/custodian/NL-GE-NIJ-M-CDCK.yaml b/data/custodian/NL-GE-NIJ-M-CDCK.yaml index 0a75c55541..e8ab1e3e3e 100644 --- a/data/custodian/NL-GE-NIJ-M-CDCK.yaml +++ b/data/custodian/NL-GE-NIJ-M-CDCK.yaml @@ -119,3 +119,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q13742228 + wikidata_url: https://www.wikidata.org/wiki/Q13742228 + label: Katholiek Documentatie Centrum + description: documentation centre in Nijmegen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-NIJ-M-RMM.yaml b/data/custodian/NL-GE-NIJ-M-RMM.yaml index 195a40dee2..8803f24377 100644 --- a/data/custodian/NL-GE-NIJ-M-RMM.yaml +++ b/data/custodian/NL-GE-NIJ-M-RMM.yaml @@ -119,3 +119,10 @@ provenance: - Historical location was Griegstraat, 6521 GA Nijmegen - GeoNames ID 2750053 for Nijmegen (admin1=03 → Gelderland) - LinkedIn follower count (5.6M) appears to be data anomaly +wikidata_enrichment: + wikidata_id: null + label: R-evolutie Museum voor Modelvliegtuigen + description: museum (permanently closed) in Nijmegen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-NIJ-M-USRSVM.yaml b/data/custodian/NL-GE-NIJ-M-USRSVM.yaml index a492026c84..46646e77fe 100644 --- a/data/custodian/NL-GE-NIJ-M-USRSVM.yaml +++ b/data/custodian/NL-GE-NIJ-M-USRSVM.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: UMC Saint Radboud Steunstichting Vrienden Museum + description: museum in Nijmegen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found; affiliated with Museum voor Anatomie en Pathologie at Radboudumc diff --git a/data/custodian/NL-GE-OEN-M-MO.yaml b/data/custodian/NL-GE-OEN-M-MO.yaml index 4851a9519b..060e375938 100644 --- a/data/custodian/NL-GE-OEN-M-MO.yaml +++ b/data/custodian/NL-GE-OEN-M-MO.yaml @@ -73,3 +73,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q109382770 + wikidata_url: https://www.wikidata.org/wiki/Q109382770 + label: Cultuur Historisch Museum Oene + description: Cultuurhistorisch Museum in Oene, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata uses name "Cultuur Historisch Museum Oene" rather than "Museum Oene" diff --git a/data/custodian/NL-GE-RUU-M-MM-het_mag_museum.yaml b/data/custodian/NL-GE-RUU-M-MM-het_mag_museum.yaml index 0a846c7786..faa2422c02 100644 --- a/data/custodian/NL-GE-RUU-M-MM-het_mag_museum.yaml +++ b/data/custodian/NL-GE-RUU-M-MM-het_mag_museum.yaml @@ -82,3 +82,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Ruurlo, Gelderland' +wikidata_enrichment: + wikidata_id: null + label: het MAG museum + description: museum in Ruurlo, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-TER-M-KKPUM.yaml b/data/custodian/NL-GE-TER-M-KKPUM.yaml index fad61f7146..bb24312dd0 100644 --- a/data/custodian/NL-GE-TER-M-KKPUM.yaml +++ b/data/custodian/NL-GE-TER-M-KKPUM.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: Kamers op Kolkenstein pop-up-museum + description: pop-up museum in Terwolde, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-THA-M-NAM.yaml b/data/custodian/NL-GE-THA-M-NAM.yaml index 8643a0929f..e20bf99ad4 100644 --- a/data/custodian/NL-GE-THA-M-NAM.yaml +++ b/data/custodian/NL-GE-THA-M-NAM.yaml @@ -132,3 +132,10 @@ provenance: - City resolved from XXX to 't Harde - Emic name corrected from abbreviation to full Dutch name - Enriched with museum website data 2025-12-20 +wikidata_enrichment: + wikidata_id: null + label: Nederlands Artillerie Museum + description: military museum in 't Harde, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found; also searched as "Nederlands Artilleriemuseum" diff --git a/data/custodian/NL-GE-TIE-I-M.yaml b/data/custodian/NL-GE-TIE-I-M.yaml index 80547ecbc9..93f0ab7653 100644 --- a/data/custodian/NL-GE-TIE-I-M.yaml +++ b/data/custodian/NL-GE-TIE-I-M.yaml @@ -2186,3 +2186,11 @@ logo_enrichment: has_favicon: true has_og_image: true favicon_count: 1 + +wikidata_enrichment: + wikidata_id: null + label: Stichting MOZA + description: cultural organization in Tiel, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found; file contains erroneous Mozart (Q254) YouTube data from previous enrichment diff --git a/data/custodian/NL-GE-TIE-M-FST.yaml b/data/custodian/NL-GE-TIE-M-FST.yaml index 975584f40b..87a963e142 100644 --- a/data/custodian/NL-GE-TIE-M-FST.yaml +++ b/data/custodian/NL-GE-TIE-M-FST.yaml @@ -133,3 +133,10 @@ provenance: - City code TIE resolved from Tiel address on streekmuseumtiel.nl - Name corrected from "Stichting PAK museum" to "Flipje en Streekmuseum Tiel" - Abbreviation changed from PM (PAK museum) to FST (Flipje en Streekmuseum Tiel) +wikidata_enrichment: + wikidata_id: Q13636575 + wikidata_url: https://www.wikidata.org/wiki/Q13636575 + label: Flipje & Streekmuseum + description: museum in Tiel, Gelderland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-VAA-M-MVH.yaml b/data/custodian/NL-GE-VAA-M-MVH.yaml index da7dcde370..ea5f9fdc4e 100644 --- a/data/custodian/NL-GE-VAA-M-MVH.yaml +++ b/data/custodian/NL-GE-VAA-M-MVH.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: Museum Vaassen Historie + description: local history museum in Vaassen, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-VEL-M-HVKMM.yaml b/data/custodian/NL-GE-VEL-M-HVKMM.yaml index 84f6fef580..0b875aa5c3 100644 --- a/data/custodian/NL-GE-VEL-M-HVKMM.yaml +++ b/data/custodian/NL-GE-VEL-M-HVKMM.yaml @@ -73,3 +73,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q1051928 + wikidata_url: https://www.wikidata.org/wiki/Q1051928 + label: Kröller Müller Museum + description: kunstmuseum en beeldentuin in Otterlo + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: LinkedIn name has typos "Kröller - Möller"; correct spelling is "Kröller-Müller Museum" diff --git a/data/custodian/NL-GE-VOR-M-MAS.yaml b/data/custodian/NL-GE-VOR-M-MAS.yaml index df5cfb2896..f25dbcc153 100644 --- a/data/custodian/NL-GE-VOR-M-MAS.yaml +++ b/data/custodian/NL-GE-VOR-M-MAS.yaml @@ -107,3 +107,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: EXA_WEB_SEARCH' - City code VOR resolved from Vorden address on museumgidsnederland.nl +wikidata_enrichment: + wikidata_id: null + label: Museum voor Achterhoekse Schilderkunst + description: regional art museum in Vorden, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-WAG-M-IWSI.yaml b/data/custodian/NL-GE-WAG-M-IWSI.yaml index c36da1ddea..74c24eca97 100644 --- a/data/custodian/NL-GE-WAG-M-IWSI.yaml +++ b/data/custodian/NL-GE-WAG-M-IWSI.yaml @@ -135,3 +135,11 @@ provenance: - 'Location resolution method: EXA_WEB_SEARCH' - Includes World Soil Museum as part of ISRIC - Located on Wageningen University & Research campus +wikidata_enrichment: + wikidata_id: Q16151477 + wikidata_url: https://www.wikidata.org/wiki/Q16151477 + label: Wereldbodemmuseum + description: museum in Wageningen, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata entry is for "World Soil Museum" (Wereldbodemmuseum), which is part of ISRIC diff --git a/data/custodian/NL-GE-WEH-M-MUOA.yaml b/data/custodian/NL-GE-WEH-M-MUOA.yaml index b53ff84bb4..0fd436c38f 100644 --- a/data/custodian/NL-GE-WEH-M-MUOA.yaml +++ b/data/custodian/NL-GE-WEH-M-MUOA.yaml @@ -126,3 +126,11 @@ provenance: - Museum is permanently closed (date unknown) - LinkedIn website URL is INCORRECT (points to Jenevermuseum Schiedam) - Heritage staff data appears to be garbage from LinkedIn scraping + +wikidata_enrichment: + wikidata_id: null + label: Museum Ut Olde Ambacht + description: museum in Wehl, Gelderland, Netherlands (permanently closed) + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-WIJ-I-M.yaml b/data/custodian/NL-GE-WIJ-I-M.yaml index c9707243f4..eff25be1bf 100644 --- a/data/custodian/NL-GE-WIJ-I-M.yaml +++ b/data/custodian/NL-GE-WIJ-I-M.yaml @@ -333,3 +333,11 @@ linkup_enrichment: extraction_method: linkup_answer_regex extraction_timestamp: '2025-12-16T20:25:12.662298+00:00' data_tier: TIER_4_INFERRED + +wikidata_enrichment: + wikidata_id: null + label: Stichting MUHABBAT + description: intangible heritage organization in Wijchen, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-WIJ-M-VMW.yaml b/data/custodian/NL-GE-WIJ-M-VMW.yaml index 45e47a7e94..93b5509e9c 100644 --- a/data/custodian/NL-GE-WIJ-M-VMW.yaml +++ b/data/custodian/NL-GE-WIJ-M-VMW.yaml @@ -70,3 +70,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' + +wikidata_enrichment: + wikidata_id: null + label: Vrienden Museum Wijchen + description: museum support organization in Wijchen, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-WOE-M-MHW.yaml b/data/custodian/NL-GE-WOE-M-MHW.yaml index 589247280c..6ccb82308c 100644 --- a/data/custodian/NL-GE-WOE-M-MHW.yaml +++ b/data/custodian/NL-GE-WOE-M-MHW.yaml @@ -74,3 +74,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' + +wikidata_enrichment: + wikidata_id: null + label: Museum Hoge Woerd + description: museum in Woerd, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-XXX-M-MVV.yaml b/data/custodian/NL-GE-XXX-M-MVV.yaml index 222e7a9812..5e524e97ee 100644 --- a/data/custodian/NL-GE-XXX-M-MVV.yaml +++ b/data/custodian/NL-GE-XXX-M-MVV.yaml @@ -160,3 +160,12 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 3 + +wikidata_enrichment: + wikidata_id: Q2466114 + wikidata_url: https://www.wikidata.org/wiki/Q2466114 + label: Museum voor de Verpleegkunde + description: museum (online-only) + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: First online-only museum in Nederlands Museumregister diff --git a/data/custodian/NL-GE-ZEV-M-CLM.yaml b/data/custodian/NL-GE-ZEV-M-CLM.yaml index 3860f94a8d..35f212a16f 100644 --- a/data/custodian/NL-GE-ZEV-M-CLM.yaml +++ b/data/custodian/NL-GE-ZEV-M-CLM.yaml @@ -70,3 +70,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' + +wikidata_enrichment: + wikidata_id: Q56461154 + wikidata_url: https://www.wikidata.org/wiki/Q56461154 + label: Liemers Museum + description: museum in Zevenaar + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-ZIE-S-OVZ-oudheidkundige_vereniging_zuwent.yaml b/data/custodian/NL-GE-ZIE-S-OVZ-oudheidkundige_vereniging_zuwent.yaml index 355c52abc7..8f0d765967 100644 --- a/data/custodian/NL-GE-ZIE-S-OVZ-oudheidkundige_vereniging_zuwent.yaml +++ b/data/custodian/NL-GE-ZIE-S-OVZ-oudheidkundige_vereniging_zuwent.yaml @@ -640,3 +640,11 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 1 + +wikidata_enrichment: + wikidata_id: Q98895207 + wikidata_url: https://www.wikidata.org/wiki/Q98895207 + label: Vereniging Voor Oudheidkunde Lichtenvoorde + description: historical society in Lichtenvoorde, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-ZUT-L-BZ.yaml b/data/custodian/NL-GE-ZUT-L-BZ.yaml index 4d214f6190..c5e50ae449 100644 --- a/data/custodian/NL-GE-ZUT-L-BZ.yaml +++ b/data/custodian/NL-GE-ZUT-L-BZ.yaml @@ -669,3 +669,12 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 1 + +wikidata_enrichment: + wikidata_id: Q123299703 + wikidata_url: https://www.wikidata.org/wiki/Q123299703 + label: BIJ de Bieb + description: bibliotheekorganisatie in de gemeenten Brummen, Lochem, Voorst en Zutphen, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Library organization now branded as "BIJ de Bieb"; originally known as Bibliotheek Berkel & IJssel diff --git a/data/custodian/NL-GE-ZUT-M-MBH.yaml b/data/custodian/NL-GE-ZUT-M-MBH.yaml index 52bfef0c02..a62b49a968 100644 --- a/data/custodian/NL-GE-ZUT-M-MBH.yaml +++ b/data/custodian/NL-GE-ZUT-M-MBH.yaml @@ -87,3 +87,11 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Zutphen, Gelderland' + +wikidata_enrichment: + wikidata_id: null + label: Museum De Brandkas van Henny + description: museum in Zutphen, Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ZUT-M-SMZ.yaml b/data/custodian/NL-GE-ZUT-M-SMZ.yaml index 8dcbd72a94..4940efe4d4 100644 --- a/data/custodian/NL-GE-ZUT-M-SMZ.yaml +++ b/data/custodian/NL-GE-ZUT-M-SMZ.yaml @@ -92,3 +92,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' + +wikidata_enrichment: + wikidata_id: Q2736515 + wikidata_url: https://www.wikidata.org/wiki/Q2736515 + label: Stedelijk Museum Zutphen + description: cultuurhistorisch museum in Gelderland over geschiedenis van Zutphen en de Graafschap + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GR-GRO-E-H.yaml b/data/custodian/NL-GR-GRO-E-H.yaml index 893e106376..60289bb516 100644 --- a/data/custodian/NL-GR-GRO-E-H.yaml +++ b/data/custodian/NL-GR-GRO-E-H.yaml @@ -94,3 +94,10 @@ provenance: - Created from unmatched LinkedIn company profile - City resolved from XXX to GRO (Groningen) - Research project at University of Groningen +wikidata_enrichment: + wikidata_id: null + label: HAICu + description: educational/research project in Groningen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-GRO-M-GM.yaml b/data/custodian/NL-GR-GRO-M-GM.yaml index a010f635f8..d6afa1e409 100644 --- a/data/custodian/NL-GR-GRO-M-GM.yaml +++ b/data/custodian/NL-GR-GRO-M-GM.yaml @@ -72,3 +72,11 @@ identifiers: - identifier_scheme: Wikidata identifier_value: Q2882945 identifier_url: https://www.wikidata.org/wiki/Q2882945 +wikidata_enrichment: + wikidata_id: Q2882945 + wikidata_url: https://www.wikidata.org/wiki/Q2882945 + label: GRID Grafisch Museum Groningen + description: GRID Grafisch Museum Groningen is een museum over grafische kunst, ontwerp en druktechniek. Met exposities, workshops en een werkende drukkerij brengt het de geschiedenis en toekomst van grafisch vakmanschap tot leven. + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-GR-GRO-M-KG.yaml b/data/custodian/NL-GR-GRO-M-KG.yaml index 77d2471d8c..61f277cfd8 100644 --- a/data/custodian/NL-GR-GRO-M-KG.yaml +++ b/data/custodian/NL-GR-GRO-M-KG.yaml @@ -117,3 +117,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q105369059 + wikidata_url: https://www.wikidata.org/wiki/Q105369059 + label: Kunstpunt Groningen + description: kunstorganisatie in Groningen + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-GR-GRO-M-LG.yaml b/data/custodian/NL-GR-GRO-M-LG.yaml index 0eb8fcabfc..9f9c429415 100644 --- a/data/custodian/NL-GR-GRO-M-LG.yaml +++ b/data/custodian/NL-GR-GRO-M-LG.yaml @@ -104,3 +104,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q90374206 + wikidata_url: https://www.wikidata.org/wiki/Q90374206 + label: Landschapsbeheer Groningen + description: landschapsbeheerorganisatie in Groningen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Dutch description in Wikidata diff --git a/data/custodian/NL-GR-GRO-M-MV.yaml b/data/custodian/NL-GR-GRO-M-MV.yaml index 76631e2e32..6142d9f3b0 100644 --- a/data/custodian/NL-GR-GRO-M-MV.yaml +++ b/data/custodian/NL-GR-GRO-M-MV.yaml @@ -80,3 +80,11 @@ identifiers: - identifier_scheme: Wikidata identifier_value: Q29564376 identifier_url: https://www.wikidata.org/wiki/Q29564376 +wikidata_enrichment: + wikidata_id: Q29564376 + wikidata_url: https://www.wikidata.org/wiki/Q29564376 + label: Museum Vosbergen + description: museum in Eelde, Drenthe + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-GR-GRO-R-CCC.yaml b/data/custodian/NL-GR-GRO-R-CCC.yaml index e41c8f336c..c8c74d2848 100644 --- a/data/custodian/NL-GR-GRO-R-CCC.yaml +++ b/data/custodian/NL-GR-GRO-R-CCC.yaml @@ -97,3 +97,10 @@ provenance: - City resolved from XXX to GRO (Groningen) - Institution type corrected from M (Museum) to R (Research) - This is a research project, not a museum +wikidata_enrichment: + wikidata_id: null + label: Creating Cultures of Care + description: research organization in Groningen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-GRO-R-KKS.yaml b/data/custodian/NL-GR-GRO-R-KKS.yaml index 48dd1616d3..2641823167 100644 --- a/data/custodian/NL-GR-GRO-R-KKS.yaml +++ b/data/custodian/NL-GR-GRO-R-KKS.yaml @@ -108,3 +108,10 @@ provenance: - Created from unmatched LinkedIn company profile - City resolved from XXX to GRO (Groningen) - Part of Hanze University of Applied Sciences +wikidata_enrichment: + wikidata_id: null + label: Kenniscentrum Kunst & Samenleving + description: research organization in Groningen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-GRO-S-VGM.yaml b/data/custodian/NL-GR-GRO-S-VGM.yaml index cbf6c1391d..127c58e8d3 100644 --- a/data/custodian/NL-GR-GRO-S-VGM.yaml +++ b/data/custodian/NL-GR-GRO-S-VGM.yaml @@ -98,3 +98,10 @@ provenance: - City resolved from XXX to GRO (Groningen) - Institution type corrected from M (Museum) to S (Society/Friends organization) - This is a friends/membership organization, not a museum itself +wikidata_enrichment: + wikidata_id: null + label: Vrienden van het Groninger Museum + description: support organization in Groningen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-HAR-B-HBH.yaml b/data/custodian/NL-GR-HAR-B-HBH.yaml index 31ae57f480..65b84395df 100644 --- a/data/custodian/NL-GR-HAR-B-HBH.yaml +++ b/data/custodian/NL-GR-HAR-B-HBH.yaml @@ -83,3 +83,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q1980263 + wikidata_url: https://www.wikidata.org/wiki/Q1980263 + label: Hortus Haren + description: botanische tuin in Haren + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-GR-LEE-M-MJSL.yaml b/data/custodian/NL-GR-LEE-M-MJSL.yaml index f44bd44c2c..3770d687b6 100644 --- a/data/custodian/NL-GR-LEE-M-MJSL.yaml +++ b/data/custodian/NL-GR-LEE-M-MJSL.yaml @@ -73,3 +73,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum Joodse Schooltje Leek + description: museum in Leek, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-NIE-M-BTOM.yaml b/data/custodian/NL-GR-NIE-M-BTOM.yaml index e7cb80056c..6766bb9c5f 100644 --- a/data/custodian/NL-GR-NIE-M-BTOM.yaml +++ b/data/custodian/NL-GR-NIE-M-BTOM.yaml @@ -82,3 +82,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Niezijl, Groningen' +wikidata_enrichment: + wikidata_id: null + label: Blik Trommel en Oudheden Museum + description: museum in Niezijl, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-OML-M-MCTB.yaml b/data/custodian/NL-GR-OML-M-MCTB.yaml index fb1e699981..efe8361490 100644 --- a/data/custodian/NL-GR-OML-M-MCTB.yaml +++ b/data/custodian/NL-GR-OML-M-MCTB.yaml @@ -87,3 +87,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Ommelanderwijk, Groningen' +wikidata_enrichment: + wikidata_id: null + label: Museum Collectie ter Borg + description: museum in Ommelanderwijk, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-ONS-M-RES.yaml b/data/custodian/NL-GR-ONS-M-RES.yaml index d42e6a5210..fe3589bc53 100644 --- a/data/custodian/NL-GR-ONS-M-RES.yaml +++ b/data/custodian/NL-GR-ONS-M-RES.yaml @@ -158,3 +158,10 @@ provenance: - Museum permanently closed May 21, 2019 after death of founder Klaas Ritsema - Website domain now parked (no longer active) - GeoNames ID 2749577 for Onstwedde (admin1=04 → Groningen) +wikidata_enrichment: + wikidata_id: null + label: Radio en Speelgoedmuseum + description: museum in Onstwedde, Netherlands (permanently closed May 2019) + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GR-VEE-M-NNTTM.yaml b/data/custodian/NL-GR-VEE-M-NNTTM.yaml index 90a32d75e0..2597fce4be 100644 --- a/data/custodian/NL-GR-VEE-M-NNTTM.yaml +++ b/data/custodian/NL-GR-VEE-M-NNTTM.yaml @@ -79,3 +79,11 @@ identifiers: - identifier_scheme: Wikidata identifier_value: Q18326395 identifier_url: https://www.wikidata.org/wiki/Q18326395 +wikidata_enrichment: + wikidata_id: Q18326395 + wikidata_url: https://www.wikidata.org/wiki/Q18326395 + label: Noord-Nederlands Trein & Tram Museum + description: museum in Menterwolde, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-BIL-M-OMB.yaml b/data/custodian/NL-LI-BIL-M-OMB.yaml index a8f4cd9cad..cc99f2fb7d 100644 --- a/data/custodian/NL-LI-BIL-M-OMB.yaml +++ b/data/custodian/NL-LI-BIL-M-OMB.yaml @@ -77,3 +77,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Online Museum De Bilt + description: museum in De Bilt, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-LI-HAA-M-MCHBV.yaml b/data/custodian/NL-LI-HAA-M-MCHBV.yaml index 80a5e4d2fb..6a7c738f04 100644 --- a/data/custodian/NL-LI-HAA-M-MCHBV.yaml +++ b/data/custodian/NL-LI-HAA-M-MCHBV.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum Catering Den Haag B.V. + description: museum catering service in Den Haag, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-LI-HEE-M-RM.yaml b/data/custodian/NL-LI-HEE-M-RM.yaml index 5b9e00ed18..3b514335d4 100644 --- a/data/custodian/NL-LI-HEE-M-RM.yaml +++ b/data/custodian/NL-LI-HEE-M-RM.yaml @@ -503,3 +503,10 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 2 +wikidata_enrichment: + wikidata_id: null + label: Het Romeins Museum + description: museum in Heerlen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-LI-HOR-M-MDK.yaml b/data/custodian/NL-LI-HOR-M-MDK.yaml index 9de6f477a7..7cae798281 100644 --- a/data/custodian/NL-LI-HOR-M-MDK.yaml +++ b/data/custodian/NL-LI-HOR-M-MDK.yaml @@ -108,3 +108,11 @@ provenance: - Location resolved via web research - museum located in Horst, Limburg - STIDOC is umbrella organization for 8 textile associations; museum is primary heritage custodian - GeoNames ID 2753591 for Horst (admin1=05 → Limburg) +wikidata_enrichment: + wikidata_id: Q103849450 + wikidata_url: https://www.wikidata.org/wiki/Q103849450 + label: Museum De kantfabriek + description: museum in Horst (Limburg) + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-KER-M-VMK.yaml b/data/custodian/NL-LI-KER-M-VMK.yaml index 385f39ecfe..82fd8b741f 100644 --- a/data/custodian/NL-LI-KER-M-VMK.yaml +++ b/data/custodian/NL-LI-KER-M-VMK.yaml @@ -118,3 +118,10 @@ provenance: - Location resolved via web research on 2025-12-18 - Kirchroa is Kerkraads dialect name for Kerkrade - Opening hours use 11:11 as nod to carnival tradition (11 November = start of carnival season) +wikidata_enrichment: + wikidata_id: null + label: Vasteloavends Museum Kirchroa + description: carnival museum in Kerkrade, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-LI-MAA-E-JEA.yaml b/data/custodian/NL-LI-MAA-E-JEA.yaml index 51ca882376..dc37f8a1f0 100644 --- a/data/custodian/NL-LI-MAA-E-JEA.yaml +++ b/data/custodian/NL-LI-MAA-E-JEA.yaml @@ -124,3 +124,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q1682433 + wikidata_url: https://www.wikidata.org/wiki/Q1682433 + label: Jan van Eyck Academie + description: kunstacademie in Maastricht, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-MAA-M-VMM.yaml b/data/custodian/NL-LI-MAA-M-VMM.yaml index db75158821..731c89bf4e 100644 --- a/data/custodian/NL-LI-MAA-M-VMM.yaml +++ b/data/custodian/NL-LI-MAA-M-VMM.yaml @@ -70,3 +70,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q111036461 + wikidata_url: https://www.wikidata.org/wiki/Q111036461 + label: Vestingmuseum Maastricht + description: museum in Maastricht, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-OOS-L-CO.yaml b/data/custodian/NL-LI-OOS-L-CO.yaml index e1836f35d4..e48c995d56 100644 --- a/data/custodian/NL-LI-OOS-L-CO.yaml +++ b/data/custodian/NL-LI-OOS-L-CO.yaml @@ -181,3 +181,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q110892467 + wikidata_url: https://www.wikidata.org/wiki/Q110892467 + label: Stichting Cultuur Oost + description: cultural support organization in Gelderland, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Dutch description in Wikidata; description inferred from context diff --git a/data/custodian/NL-LI-SIM-M-ZLSM.yaml b/data/custodian/NL-LI-SIM-M-ZLSM.yaml index f62cd05aa2..80a5552e57 100644 --- a/data/custodian/NL-LI-SIM-M-ZLSM.yaml +++ b/data/custodian/NL-LI-SIM-M-ZLSM.yaml @@ -95,3 +95,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q2382332 + wikidata_url: https://www.wikidata.org/wiki/Q2382332 + label: Zuid-Limburgse Stoomtrein Maatschappij + description: museum spoorwegbedrijf in Nederlands Limburg + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-SIT-L-BLS.yaml b/data/custodian/NL-LI-SIT-L-BLS.yaml index e831d51a73..b73e6462fb 100644 --- a/data/custodian/NL-LI-SIT-L-BLS.yaml +++ b/data/custodian/NL-LI-SIT-L-BLS.yaml @@ -625,3 +625,10 @@ logo_enrichment: has_favicon: true has_og_image: true favicon_count: 1 +wikidata_enrichment: + wikidata_id: null + label: Bibliotheek Ligne Sittard + description: library in Sittard, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution; part of De Domijnen organization diff --git a/data/custodian/NL-LI-SIT-M-FM.yaml b/data/custodian/NL-LI-SIT-M-FM.yaml index e1f0db293d..76c8f56ad1 100644 --- a/data/custodian/NL-LI-SIT-M-FM.yaml +++ b/data/custodian/NL-LI-SIT-M-FM.yaml @@ -82,3 +82,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Sittard, Limburg' +wikidata_enrichment: + wikidata_id: null + label: Fortuna Museum + description: museum in Sittard, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution; museum of Fortuna Sittard football club diff --git a/data/custodian/NL-LI-SIT-M-HND.yaml b/data/custodian/NL-LI-SIT-M-HND.yaml index fee8ee10cc..dc6835f764 100644 --- a/data/custodian/NL-LI-SIT-M-HND.yaml +++ b/data/custodian/NL-LI-SIT-M-HND.yaml @@ -160,3 +160,10 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 2 +wikidata_enrichment: + wikidata_id: null + label: Het Nieuwe Domein + description: art museum in Sittard, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found; umbrella organization De Domijnen (Q59962272) exists but is different entity diff --git a/data/custodian/NL-LI-SIT-M-ND.yaml b/data/custodian/NL-LI-SIT-M-ND.yaml index 24c96baba4..b25aa0043d 100644 --- a/data/custodian/NL-LI-SIT-M-ND.yaml +++ b/data/custodian/NL-LI-SIT-M-ND.yaml @@ -462,3 +462,10 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 2 +wikidata_enrichment: + wikidata_id: null + label: Het Nieuwe Domein + description: art museum in Sittard, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found; duplicate record - same institution as NL-LI-SIT-M-HND.yaml diff --git a/data/custodian/NL-LI-STE-M-MS.yaml b/data/custodian/NL-LI-STE-M-MS.yaml index cb45f0c637..579a5ac27d 100644 --- a/data/custodian/NL-LI-STE-M-MS.yaml +++ b/data/custodian/NL-LI-STE-M-MS.yaml @@ -70,3 +70,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q27915944 + wikidata_url: https://www.wikidata.org/wiki/Q27915944 + label: Streekmuseum Stevensweert/Ohé en Laak + description: museum in Stevensweert + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-SWA-M-MA-museum_asselt.yaml b/data/custodian/NL-LI-SWA-M-MA-museum_asselt.yaml index b18bc1271d..5a65b70805 100644 --- a/data/custodian/NL-LI-SWA-M-MA-museum_asselt.yaml +++ b/data/custodian/NL-LI-SWA-M-MA-museum_asselt.yaml @@ -73,3 +73,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q111081384 + wikidata_url: https://www.wikidata.org/wiki/Q111081384 + label: Museum Asselt + description: museum in Swalmen, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Dutch description in Wikidata; description inferred from location diff --git a/data/custodian/NL-LI-VAA-M-MV.yaml b/data/custodian/NL-LI-VAA-M-MV.yaml index a59cf8ed2c..5720a5e905 100644 --- a/data/custodian/NL-LI-VAA-M-MV.yaml +++ b/data/custodian/NL-LI-VAA-M-MV.yaml @@ -70,3 +70,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q41990282 + wikidata_url: https://www.wikidata.org/wiki/Q41990282 + label: Museum Vaals + description: museum in Vaals, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-VEN-L-BV-biblionu_venray.yaml b/data/custodian/NL-LI-VEN-L-BV-biblionu_venray.yaml index fcdc909af0..bfccece5aa 100644 --- a/data/custodian/NL-LI-VEN-L-BV-biblionu_venray.yaml +++ b/data/custodian/NL-LI-VEN-L-BV-biblionu_venray.yaml @@ -651,3 +651,11 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 1 +wikidata_enrichment: + wikidata_id: Q59962336 + wikidata_url: https://www.wikidata.org/wiki/Q59962336 + label: BiblioNu + description: bibliotheekorganisatie in de gemeenten Horst aan de Maas en Venray, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-VEN-L-BV-de_bibliotheek_venray.yaml b/data/custodian/NL-LI-VEN-L-BV-de_bibliotheek_venray.yaml index 67ad526519..4ad65d84f6 100644 --- a/data/custodian/NL-LI-VEN-L-BV-de_bibliotheek_venray.yaml +++ b/data/custodian/NL-LI-VEN-L-BV-de_bibliotheek_venray.yaml @@ -703,3 +703,11 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 1 +wikidata_enrichment: + wikidata_id: Q59962336 + wikidata_url: https://www.wikidata.org/wiki/Q59962336 + label: BiblioNu + description: bibliotheekorganisatie in de gemeenten Horst aan de Maas en Venray, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Duplicate file - same Wikidata entity as NL-LI-VEN-L-BV-biblionu_venray.yaml diff --git a/data/custodian/NL-LI-VEN-M-FMMV.yaml b/data/custodian/NL-LI-VEN-M-FMMV.yaml index be9a62109c..a0d8c8391b 100644 --- a/data/custodian/NL-LI-VEN-M-FMMV.yaml +++ b/data/custodian/NL-LI-VEN-M-FMMV.yaml @@ -84,3 +84,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Venlo, Limburg' +wikidata_enrichment: + wikidata_id: null + label: Frans Maas Museum Verzameling + description: museum in Venlo, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-LI-VEN-M-MJ-museum_jocas.yaml b/data/custodian/NL-LI-VEN-M-MJ-museum_jocas.yaml index 07e3f709d2..e572b4620c 100644 --- a/data/custodian/NL-LI-VEN-M-MJ-museum_jocas.yaml +++ b/data/custodian/NL-LI-VEN-M-MJ-museum_jocas.yaml @@ -87,3 +87,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Venlo, Limburg' +wikidata_enrichment: + wikidata_id: null + label: Museum JoCas + description: museum in Venlo, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-LI-VEN-M-MPV.yaml b/data/custodian/NL-LI-VEN-M-MPV.yaml index f2b73b2a02..fee0810a31 100644 --- a/data/custodian/NL-LI-VEN-M-MPV.yaml +++ b/data/custodian/NL-LI-VEN-M-MPV.yaml @@ -73,3 +73,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q23198204 + wikidata_url: https://www.wikidata.org/wiki/Q23198204 + label: Museum Psychiatrie Venray + description: museum in Venray, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-VEN-M-MWD.yaml b/data/custodian/NL-LI-VEN-M-MWD.yaml index 9ad43a19da..90bf01b57d 100644 --- a/data/custodian/NL-LI-VEN-M-MWD.yaml +++ b/data/custodian/NL-LI-VEN-M-MWD.yaml @@ -100,3 +100,11 @@ provenance: - Correct website is museumwasrol.nl - Location verified via web search on 2025-12-17 - Only open for groups by appointment (10-25 people) +wikidata_enrichment: + wikidata_id: Q2691822 + wikidata_url: https://www.wikidata.org/wiki/Q2691822 + label: Museum van Wasrol tot DVD + description: museum in Venlo + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-LI-VEN-M-PMV.yaml b/data/custodian/NL-LI-VEN-M-PMV.yaml index f8df8f0a50..a7b7615149 100644 --- a/data/custodian/NL-LI-VEN-M-PMV.yaml +++ b/data/custodian/NL-LI-VEN-M-PMV.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Politie museum Venlo + description: museum in Venlo, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-AAR-M-BKM.yaml b/data/custodian/NL-NB-AAR-M-BKM.yaml index 65f20e0b92..d6f52a4644 100644 --- a/data/custodian/NL-NB-AAR-M-BKM.yaml +++ b/data/custodian/NL-NB-AAR-M-BKM.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: Bies Kunstgalerij & Museum + description: museum in Aarle-Rixtel, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BAK-M-MDT.yaml b/data/custodian/NL-NB-BAK-M-MDT.yaml index 42f912ef99..793d67b26c 100644 --- a/data/custodian/NL-NB-BAK-M-MDT.yaml +++ b/data/custodian/NL-NB-BAK-M-MDT.yaml @@ -114,3 +114,10 @@ provenance: - Address verified via Land van de Peel tourism site - LinkedIn website URL was INCORRECT (pointed to Jenevermuseum Schiedam) - Original LinkedIn import had data quality issues +wikidata_enrichment: + wikidata_id: null + label: Museum De Tolbrug + description: museum in Bakel, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BER-M-MDS.yaml b/data/custodian/NL-NB-BER-M-MDS.yaml index 4fc406291c..90699c2810 100644 --- a/data/custodian/NL-NB-BER-M-MDS.yaml +++ b/data/custodian/NL-NB-BER-M-MDS.yaml @@ -103,3 +103,10 @@ provenance: - Created from unmatched LinkedIn company profile - Address verified via VisitBrabant and Museumgids Nederland on 2025-12-18 - Museum founded 2010, has Cultural ANBI status +wikidata_enrichment: + wikidata_id: null + label: Museum De Sigarenmaker + description: museum in Bergeijk, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BES-M-BWAMH.yaml b/data/custodian/NL-NB-BES-M-BWAMH.yaml index daeac41c93..2c71e8317f 100644 --- a/data/custodian/NL-NB-BES-M-BWAMH.yaml +++ b/data/custodian/NL-NB-BES-M-BWAMH.yaml @@ -74,3 +74,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Best Western Apollo Museum Hotel + description: museum in Best, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BES-M-KMB.yaml b/data/custodian/NL-NB-BES-M-KMB.yaml index 42d296aa18..8667029522 100644 --- a/data/custodian/NL-NB-BES-M-KMB.yaml +++ b/data/custodian/NL-NB-BES-M-KMB.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Klompen Museum Best + description: museum in Best, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BES-M-NMB.yaml b/data/custodian/NL-NB-BES-M-NMB.yaml index 0afaec1d9e..0c9afb708c 100644 --- a/data/custodian/NL-NB-BES-M-NMB.yaml +++ b/data/custodian/NL-NB-BES-M-NMB.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Norton Museum Best + description: museum in Best, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BOR-M-BOSB.yaml b/data/custodian/NL-NB-BOR-M-BOSB.yaml index 8e60eb41ac..9ad32d4200 100644 --- a/data/custodian/NL-NB-BOR-M-BOSB.yaml +++ b/data/custodian/NL-NB-BOR-M-BOSB.yaml @@ -140,3 +140,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q893960 + wikidata_url: https://www.wikidata.org/wiki/Q893960 + label: Borneo Orangutan Survival Foundation + description: Indonesian orangutan conservation organization (may be misclassified in NL dataset) + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Q893960 is Indonesian organization - verify if this NL entry is correct or duplicate diff --git a/data/custodian/NL-NB-BOX-M-MC-museum_canonije.yaml b/data/custodian/NL-NB-BOX-M-MC-museum_canonije.yaml index 9dce6065de..6f601fa7c4 100644 --- a/data/custodian/NL-NB-BOX-M-MC-museum_canonije.yaml +++ b/data/custodian/NL-NB-BOX-M-MC-museum_canonije.yaml @@ -87,3 +87,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Boxtel, Noord-Brabant' +wikidata_enrichment: + wikidata_id: null + label: Museum Canonije + description: museum in Boxtel, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BOX-M-MV-stichting_museum_vekemans.yaml b/data/custodian/NL-NB-BOX-M-MV-stichting_museum_vekemans.yaml index e8b6b73aeb..8bcdd3a228 100644 --- a/data/custodian/NL-NB-BOX-M-MV-stichting_museum_vekemans.yaml +++ b/data/custodian/NL-NB-BOX-M-MV-stichting_museum_vekemans.yaml @@ -70,3 +70,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q2119394 + wikidata_url: https://www.wikidata.org/wiki/Q2119394 + label: Museum Vekemans + description: museum in Noord-Brabant + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-NB-BOZ-A-GAR.yaml b/data/custodian/NL-NB-BOZ-A-GAR.yaml index 1addeb65c6..a1438b85fa 100644 --- a/data/custodian/NL-NB-BOZ-A-GAR.yaml +++ b/data/custodian/NL-NB-BOZ-A-GAR.yaml @@ -565,3 +565,10 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 3 +wikidata_enrichment: + wikidata_id: null + label: Gemeentearchief Roosendaal + description: archive in Bergen op Zoom, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BRE-L-VA.yaml b/data/custodian/NL-NB-BRE-L-VA.yaml index 4192ecdca6..b1d0ed138f 100644 --- a/data/custodian/NL-NB-BRE-L-VA.yaml +++ b/data/custodian/NL-NB-BRE-L-VA.yaml @@ -99,3 +99,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolved: Menenstraat 13, 4826AP Breda (Noord-Brabant)' - Province corrected from OV to NB based on KVK registration +wikidata_enrichment: + wikidata_id: null + label: Stichting Visie AmateurTheater + description: library in Breda, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BRE-M-BM.yaml b/data/custodian/NL-NB-BRE-M-BM.yaml index 0f17c925d2..2c8abe3338 100644 --- a/data/custodian/NL-NB-BRE-M-BM.yaml +++ b/data/custodian/NL-NB-BRE-M-BM.yaml @@ -86,3 +86,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Breda, Noord-Brabant' +wikidata_enrichment: + wikidata_id: null + label: Bierreclame Museum + description: museum in Breda, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-BRE-M-PM-princenhaags_museum.yaml b/data/custodian/NL-NB-BRE-M-PM-princenhaags_museum.yaml index 594d6ea0f1..9c5e987ac1 100644 --- a/data/custodian/NL-NB-BRE-M-PM-princenhaags_museum.yaml +++ b/data/custodian/NL-NB-BRE-M-PM-princenhaags_museum.yaml @@ -76,3 +76,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - Location enriched from institution name on 2025-12-16 +wikidata_enrichment: + wikidata_id: null + label: Princenhaags Museum + description: museum in Breda, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-CEN-R-NCSCNN.yaml b/data/custodian/NL-NB-CEN-R-NCSCNN.yaml index e74cce11af..138da7cce9 100644 --- a/data/custodian/NL-NB-CEN-R-NCSCNN.yaml +++ b/data/custodian/NL-NB-CEN-R-NCSCNN.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Nationaal Cyber Security Centrum (NCSC-NL) + description: research center in Centrum, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-CUI-M-MC.yaml b/data/custodian/NL-NB-CUI-M-MC.yaml index f9e575bf0e..941e0dc2e8 100644 --- a/data/custodian/NL-NB-CUI-M-MC.yaml +++ b/data/custodian/NL-NB-CUI-M-MC.yaml @@ -84,3 +84,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Cuijk, Noord-Brabant' +wikidata_enrichment: + wikidata_id: null + label: Museum Ceuclum + description: museum in Cuijk, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-EIN-M-CM-crypto_museum.yaml b/data/custodian/NL-NB-EIN-M-CM-crypto_museum.yaml index 4f6544de81..6210e3798a 100644 --- a/data/custodian/NL-NB-EIN-M-CM-crypto_museum.yaml +++ b/data/custodian/NL-NB-EIN-M-CM-crypto_museum.yaml @@ -88,3 +88,11 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Eindhoven, Noord-Brabant' +wikidata_enrichment: + wikidata_id: Q125662744 + wikidata_url: https://www.wikidata.org/wiki/Q125662744 + label: Cryptography Museum + description: null + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-NB-EIN-M-EMM.yaml b/data/custodian/NL-NB-EIN-M-EMM.yaml index 059b3cbe26..d49263418d 100644 --- a/data/custodian/NL-NB-EIN-M-EMM.yaml +++ b/data/custodian/NL-NB-EIN-M-EMM.yaml @@ -76,3 +76,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - Location enriched from institution name on 2025-12-16 +wikidata_enrichment: + wikidata_id: null + label: Stichting Eindhovens Muziek Museum + description: museum in Eindhoven, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-EIN-M-LM.yaml b/data/custodian/NL-NB-EIN-M-LM.yaml index 8704b34803..96936f87f6 100644 --- a/data/custodian/NL-NB-EIN-M-LM.yaml +++ b/data/custodian/NL-NB-EIN-M-LM.yaml @@ -104,3 +104,10 @@ provenance: - Part of international Living Museum network (originated USA) - Run by Stichting Neos (homeless/domestic violence organization) - 040 is Eindhoven area code, confirming location +wikidata_enrichment: + wikidata_id: null + label: Living Museum 040 + description: museum in Eindhoven, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-ETL-M-AZKM.yaml b/data/custodian/NL-NB-ETL-M-AZKM.yaml index d09baa5217..9dad658196 100644 --- a/data/custodian/NL-NB-ETL-M-AZKM.yaml +++ b/data/custodian/NL-NB-ETL-M-AZKM.yaml @@ -82,3 +82,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Etten-Leur, Noord-Brabant' +wikidata_enrichment: + wikidata_id: null + label: Ambachtelijke zagerij en klompenmakerij museum + description: museum in Etten-Leur, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-ETL-M-VOM.yaml b/data/custodian/NL-NB-ETL-M-VOM.yaml index db07b8ee9e..bfdd2335f3 100644 --- a/data/custodian/NL-NB-ETL-M-VOM.yaml +++ b/data/custodian/NL-NB-ETL-M-VOM.yaml @@ -105,3 +105,10 @@ provenance: - Location verified via web research - Achter de Molen 108, 4873 GZ Etten-Leur - Upgraded from NL-XX-XXX-M-OM-het_van_osch_museum to NL-NB-ETL-M-VOM - LinkedIn follower count (5.6M) flagged as bad data - museum is small private collection +wikidata_enrichment: + wikidata_id: null + label: Het Van Osch Museum + description: museum in Etten-Leur, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-GEE-M-MDR.yaml b/data/custodian/NL-NB-GEE-M-MDR.yaml index 89c45169f2..f3351c9148 100644 --- a/data/custodian/NL-NB-GEE-M-MDR.yaml +++ b/data/custodian/NL-NB-GEE-M-MDR.yaml @@ -87,3 +87,11 @@ provenance: - Museum opened in 1981, renovated in 1997 - Registered by Brabantse Museumstichting in 2004 - Geertruidenberg is the oldest city in Holland (city rights 1213) +wikidata_enrichment: + wikidata_id: Q1842735 + wikidata_url: https://www.wikidata.org/wiki/Q1842735 + label: Museum De Roos + description: museum in Noord-Brabant + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: null diff --git a/data/custodian/NL-NB-GEM-M-MAW.yaml b/data/custodian/NL-NB-GEM-M-MAW.yaml index 7dff6bfaac..55826fd3c5 100644 --- a/data/custodian/NL-NB-GEM-M-MAW.yaml +++ b/data/custodian/NL-NB-GEM-M-MAW.yaml @@ -84,3 +84,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Gemert, Noord-Brabant' +wikidata_enrichment: + wikidata_id: null + label: Museum van alles wa + description: museum in Gemert, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-NB-HEL-M-EM.yaml b/data/custodian/NL-NB-HEL-M-EM.yaml index 4f3f32cd84..970d6efdf7 100644 --- a/data/custodian/NL-NB-HEL-M-EM.yaml +++ b/data/custodian/NL-NB-HEL-M-EM.yaml @@ -85,3 +85,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Helmond, Noord-Brabant' +wikidata_enrichment: + wikidata_id: null + label: Edah Museum + description: museum in Helmond, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/scripts/sync/qdrant_person_sync.py b/scripts/sync/qdrant_person_sync.py index b9923fd2f8..41d57e12c1 100644 --- a/scripts/sync/qdrant_person_sync.py +++ b/scripts/sync/qdrant_person_sync.py @@ -426,12 +426,25 @@ class QdrantPersonSyncer(BaseSyncer): return {"status": "unavailable", "error": str(e)} def _list_staff_files(self) -> list[Path]: - """List all staff JSON files in bu/ directory.""" + """List all staff JSON files from multiple directories.""" + staff_files = [] + + # Check bu/ directory (legacy location) bu_dir = self.person_dir / "bu" - if not bu_dir.exists(): - self.logger.warning(f"Staff directory not found: {bu_dir}") - return [] - return sorted(bu_dir.glob("*_staff_*.json")) + if bu_dir.exists(): + staff_files.extend(bu_dir.glob("*_staff_*.json")) + + # Check affiliated/parsed/ directory (current location) + affiliated_dir = self.person_dir / "affiliated" / "parsed" + if affiliated_dir.exists(): + staff_files.extend(affiliated_dir.glob("*_staff_*.json")) + else: + self.logger.warning(f"Staff directory not found: {affiliated_dir}") + + if not staff_files: + self.logger.warning("No staff files found in any directory") + + return sorted(set(staff_files)) # Deduplicate and sort def _list_entity_files(self) -> list[Path]: """List all entity profile JSON files."""