From 5e8a432ef022527c7e33fc01da23483d04a48587 Mon Sep 17 00:00:00 2001 From: kempersc Date: Tue, 23 Dec 2025 18:08:45 +0100 Subject: [PATCH] enrich japanese and dutch custodians --- backend/rag/main.py | 174 ++++++++++++++++++ .../.logo_enrichment_crawl4ai_checkpoint.json | 152 ++++++++++++++- data/custodian/JP-05-AMA-M-KFHM.yaml | 27 +++ data/custodian/JP-05-DAI-M-AAC.yaml | 32 +++- data/custodian/JP-05-EBI-M-IC.yaml | 27 +++ data/custodian/JP-05-HOR-M-MV.yaml | 17 ++ data/custodian/JP-05-ISH-M-IJM.yaml | 19 ++ data/custodian/JP-05-KAT-L-KL.yaml | 19 ++ data/custodian/JP-05-KAT-L-KLI.yaml | 19 ++ data/custodian/JP-05-KAT-L-KLO.yaml | 33 ++++ data/custodian/JP-05-KAT-L-KLS.yaml | 19 ++ ...KAZ-L-KL-kosakachoritsukosaka_library.yaml | 33 ++++ data/custodian/JP-05-KAZ-M-OSCC.yaml | 50 ++++- ...kitaakitashiaikawakominkantoshoshitsu.yaml | 25 +++ data/custodian/JP-05-KIT-L-K.yaml | 25 +++ ...05-KIT-L-KL-kamikoanisonritsu_library.yaml | 28 +++ ...IT-L-KL-kitaakitashimoriyoshi_library.yaml | 25 +++ data/custodian/JP-05-KIT-L-KL.yaml | 25 +++ data/custodian/JP-05-KIT-M-KK.yaml | 33 ++++ data/custodian/JP-05-KUM-M-KMA.yaml | 19 ++ data/custodian/JP-05-MAT-M-ASRO.yaml | 25 +++ data/custodian/JP-05-MAT-M-EUM.yaml | 25 +++ data/custodian/JP-05-MIN-L-AO.yaml | 19 ++ data/custodian/JP-05-MIN-L-HL.yaml | 33 ++++ data/custodian/JP-05-MIY-M-OMA.yaml | 27 +++ data/custodian/JP-05-NIK-L-NLN.yaml | 33 ++++ data/custodian/JP-05-NIK-L-NPL.yaml | 33 ++++ data/custodian/JP-05-NIK-M-KM.yaml | 54 +++++- data/custodian/JP-05-NIK-M-TM.yaml | 38 +++- data/custodian/JP-05-NYU-M-TLM.yaml | 19 ++ data/custodian/JP-05-ODA-L-AL.yaml | 36 +++- data/custodian/JP-05-OGA-L-OL.yaml | 33 ++++ data/custodian/JP-05-OGA-L-W.yaml | 33 ++++ data/custodian/JP-05-OGA-M-NM.yaml | 25 +++ data/custodian/JP-05-OGA-M-OW.yaml | 54 +++++- data/custodian/JP-05-SEM-L-SL.yaml | 26 +++ data/custodian/JP-05-SEM-M-KKM.yaml | 26 +++ data/custodian/JP-05-SEM-M-OAM.yaml | 38 +++- data/custodian/JP-05-TAM-M-TMMAI.yaml | 27 +++ data/custodian/JP-05-UWA-M-UCHM.yaml | 19 ++ data/custodian/JP-05-YAM-L-F.yaml | 27 +++ ...OK-L-Y-yokoteshiritsutaiyutoshoshitsu.yaml | 25 +++ data/custodian/JP-05-YOK-L-Y.yaml | 33 ++++ ...YOK-L-YL-yokoteshiritsumasuda_library.yaml | 33 ++++ ...-YOK-L-YL-yokoteshiritsuomori_library.yaml | 33 ++++ data/custodian/JP-05-YOK-L-YL.yaml | 33 ++++ data/custodian/JP-05-YOK-L-YLCL.yaml | 33 ++++ data/custodian/JP-05-YOK-M-HTS.yaml | 38 ++++ data/custodian/JP-05-YOK-M-OLMH.yaml | 46 ++++- data/custodian/JP-05-YOK-M-TCHFM.yaml | 19 ++ data/custodian/JP-05-YOK-M-YILMH.yaml | 33 ++++ data/custodian/JP-05-YOK-M-YMMM.yaml | 19 ++ data/custodian/JP-05-YUR-L-AH.yaml | 36 +++- data/custodian/JP-05-YUR-M-YMLMM.yaml | 50 ++++- data/custodian/JP-05-YUZ-L-Y.yaml | 19 ++ data/custodian/JP-05-YUZ-L-YE.yaml | 19 ++ ...YUZ-L-YL-yuzawashiritsuogachi_library.yaml | 19 ++ data/custodian/JP-05-YUZ-L-YL.yaml | 19 ++ data/custodian/JP-05-YUZ-M-ISMM.yaml | 36 +++- data/custodian/JP-06-ABA-M-FPIAFSM.yaml | 34 ++++ data/custodian/JP-06-FUK-L-FL.yaml | 25 +++ data/custodian/JP-06-KIT-M-FCSM.yaml | 19 ++ ...6-KOI-L-FPL-fukui_prefectural_library.yaml | 19 ++ data/custodian/NL-DR-WES-M-MVP.yaml | 7 + data/custodian/NL-DR-ZUI-M-DM.yaml | 7 + data/custodian/NL-FL-ALM-M-DDAMAD.yaml | 7 + data/custodian/NL-FL-NAG-M-MN.yaml | 7 + data/custodian/NL-FR-ALD-M-MWA.yaml | 7 + data/custodian/NL-FR-BOE-M-MBK.yaml | 7 + data/custodian/NL-FR-BUR-M-MBO.yaml | 8 + data/custodian/NL-FR-DOK-L-BNF.yaml | 7 + data/custodian/NL-FR-DOK-M-MDB.yaml | 8 + .../NL-FR-GRO-M-MM-mineralogisch_museum.yaml | 7 + data/custodian/NL-FR-HAR-M-ODS.yaml | 8 + data/custodian/NL-FR-HAR-M-OMS.yaml | 9 + data/custodian/NL-FR-HIN-M-MH.yaml | 7 + data/custodian/NL-FR-KOL-M-MMA.yaml | 7 + data/custodian/NL-FR-LEE-M-FLMD.yaml | 8 + data/custodian/NL-FR-LEE-M-LML.yaml | 7 + data/custodian/NL-FR-LEE-M-SMF.yaml | 7 + data/custodian/NL-FR-LEM-M-ML.yaml | 7 + data/custodian/NL-FR-LEM-M-MSS.yaml | 8 + data/custodian/NL-FR-LEM-M-TLIMM.yaml | 7 + data/custodian/NL-FR-MOD-M-MF.yaml | 8 + data/custodian/NL-FR-RIE-M-RPM.yaml | 8 + data/custodian/NL-FR-RIE-M-RPMK.yaml | 7 + .../NL-FR-RYP-M-SM-stihl_museum.yaml | 7 + data/custodian/NL-FR-TER-M-BMT.yaml | 7 + data/custodian/NL-FR-WAR-M-MW.yaml | 7 + data/custodian/NL-FR-WOL-L-BW.yaml | 9 +- data/custodian/NL-FR-WOR-M-JH.yaml | 7 + data/custodian/NL-GE-AAL-M-SMK.yaml | 7 + data/custodian/NL-GE-APE-E-K.yaml | 7 + data/custodian/NL-GE-APE-M-A.yaml | 8 + data/custodian/NL-GE-APE-M-MBB.yaml | 7 + data/custodian/NL-GE-APE-M-MZH.yaml | 7 + data/custodian/NL-GE-ARN-A-SIFA.yaml | 7 + data/custodian/NL-GE-ARN-I-GT.yaml | 7 + data/custodian/NL-GE-ARN-I-KIEN.yaml | 7 + .../NL-GE-ARN-I-S-de_stoelenmatter.yaml | 7 + data/custodian/NL-GE-ARN-M-HMGJ.yaml | 7 + data/custodian/NL-GE-ARN-M-MMKA.yaml | 8 + data/custodian/NL-GE-ARN-M-TBM.yaml | 7 + data/custodian/NL-GE-ARN-M-VVMA.yaml | 7 + data/custodian/NL-GE-BAR-M-MGG.yaml | 7 + data/custodian/NL-GE-CEN-R-CFMCIRZUB.yaml | 9 + data/custodian/NL-GE-CUL-M-JRM.yaml | 7 + .../schemas/20251121/linkml/manifest.json | 2 +- .../uml/CustodianTypeIndicator3D.tsx | 3 + frontend/src/pages/LinkMLViewerPage.tsx | 31 +++- scripts/sync/oxigraph_person_sync.py | 38 +++- 111 files changed, 2378 insertions(+), 110 deletions(-) diff --git a/backend/rag/main.py b/backend/rag/main.py index 611f129a15..6404fc1e55 100644 --- a/backend/rag/main.py +++ b/backend/rag/main.py @@ -448,6 +448,41 @@ class DSPyQueryRequest(BaseModel): ) +class LLMResponseMetadata(BaseModel): + """LLM response provenance metadata (aligned with LinkML LLMResponse schema). + + Captures GLM 4.7 Interleaved Thinking chain-of-thought reasoning and + full API response metadata for audit trails and debugging. + + See: schemas/20251121/linkml/modules/classes/LLMResponse.yaml + """ + # Core response content + content: str | None = None # The final LLM response text + reasoning_content: str | None = None # GLM 4.7 Interleaved Thinking chain-of-thought + + # Model identification + model: str | None = None # Model identifier (e.g., 'glm-4.7', 'claude-3-opus') + provider: str | None = None # Provider enum: zai, anthropic, openai, huggingface, groq + + # Request tracking + request_id: str | None = None # Provider-assigned request ID + created: str | None = None # ISO 8601 timestamp of response generation + + # Token usage (for cost estimation and monitoring) + prompt_tokens: int | None = None # Tokens in input prompt + completion_tokens: int | None = None # Tokens in response (content + reasoning) + total_tokens: int | None = None # Total tokens used + cached_tokens: int | None = None # Tokens served from provider cache + + # Response metadata + finish_reason: str | None = None # stop, length, tool_calls, content_filter + latency_ms: int | None = None # Response latency in milliseconds + + # GLM 4.7 Thinking Mode configuration + thinking_mode: str | None = None # enabled, disabled, interleaved, preserved + clear_thinking: bool | None = None # False = Preserved Thinking enabled + + class DSPyQueryResponse(BaseModel): """DSPy RAG query response.""" question: str @@ -470,6 +505,127 @@ class DSPyQueryResponse(BaseModel): # Cache tracking cache_hit: bool = False # Whether response was served from cache + + # LLM response provenance (GLM 4.7 Thinking Mode support) + llm_response: LLMResponseMetadata | None = None # Full LLM response metadata including reasoning_content + + +def extract_llm_response_metadata( + lm: Any, + provider: str | None = None, + latency_ms: int | None = None, +) -> LLMResponseMetadata | None: + """Extract LLM response metadata from DSPy LM history. + + DSPy stores the raw API response in lm.history[-1]["response"], which includes: + - choices[0].message.content (final response text) + - choices[0].message.reasoning_content (GLM 4.7 Interleaved Thinking) + - usage.prompt_tokens, completion_tokens, total_tokens + - model, created, id, finish_reason + + This enables capturing GLM 4.7's chain-of-thought reasoning for provenance. + + Args: + lm: DSPy LM instance with history attribute + provider: LLM provider name (zai, anthropic, openai, etc.) + latency_ms: Response latency in milliseconds + + Returns: + LLMResponseMetadata or None if history is empty + """ + try: + # Check if LM has history + if not hasattr(lm, "history") or not lm.history: + logger.debug("No LM history available for metadata extraction") + return None + + # Get the last history entry (most recent LLM call) + last_entry = lm.history[-1] + response = last_entry.get("response") + + if response is None: + logger.debug("No response in LM history entry") + return None + + # Extract content and reasoning_content from the response + content = None + reasoning_content = None + finish_reason = None + + if hasattr(response, "choices") and response.choices: + choice = response.choices[0] + if hasattr(choice, "message"): + message = choice.message + content = getattr(message, "content", None) + # GLM 4.7 Interleaved Thinking - check for reasoning_content + reasoning_content = getattr(message, "reasoning_content", None) + elif isinstance(choice, dict): + content = choice.get("text") or choice.get("message", {}).get("content") + reasoning_content = choice.get("message", {}).get("reasoning_content") + + # Extract finish_reason + finish_reason = getattr(choice, "finish_reason", None) + if finish_reason is None and isinstance(choice, dict): + finish_reason = choice.get("finish_reason") + + # Extract usage statistics + usage = last_entry.get("usage", {}) + prompt_tokens = usage.get("prompt_tokens") + completion_tokens = usage.get("completion_tokens") + total_tokens = usage.get("total_tokens") + + # Check for cached_tokens (some providers include this) + cached_tokens = None + if "prompt_tokens_details" in usage: + cached_tokens = usage["prompt_tokens_details"].get("cached_tokens") + + # Extract model info + model = last_entry.get("response_model") or last_entry.get("model") + request_id = getattr(response, "id", None) + created = getattr(response, "created", None) + + # Convert unix timestamp to ISO 8601 if needed + created_str = None + if created: + if isinstance(created, (int, float)): + import datetime + created_str = datetime.datetime.fromtimestamp(created, tz=datetime.timezone.utc).isoformat() + else: + created_str = str(created) + + # Determine thinking mode (GLM 4.7 specific) + thinking_mode = None + if reasoning_content: + # If we got reasoning_content, the model used interleaved thinking + thinking_mode = "interleaved" + + metadata = LLMResponseMetadata( + content=content, + reasoning_content=reasoning_content, + model=model, + provider=provider, + request_id=request_id, + created=created_str, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + total_tokens=total_tokens, + cached_tokens=cached_tokens, + finish_reason=finish_reason, + latency_ms=latency_ms, + thinking_mode=thinking_mode, + ) + + if reasoning_content: + logger.info( + f"Captured GLM 4.7 reasoning_content ({len(reasoning_content)} chars) " + f"from {provider}/{model}" + ) + + return metadata + + except Exception as e: + logger.warning(f"Failed to extract LLM response metadata: {e}") + return None # Cache Client @@ -2292,6 +2448,13 @@ async def dspy_query(request: DSPyQueryRequest) -> DSPyQueryResponse: retrieved_results = getattr(result, "retrieved_results", None) query_type = getattr(result, "query_type", None) + # Extract LLM response metadata from DSPy history (GLM 4.7 reasoning_content support) + llm_response_metadata = extract_llm_response_metadata( + lm=lm, + provider=llm_provider_used, + latency_ms=int(elapsed_ms), + ) + # Build response object response = DSPyQueryResponse( question=request.question, @@ -2312,6 +2475,8 @@ async def dspy_query(request: DSPyQueryRequest) -> DSPyQueryResponse: llm_provider_used=llm_provider_used, llm_model_used=llm_model_used, cache_hit=False, + # LLM response provenance (GLM 4.7 Thinking Mode chain-of-thought) + llm_response=llm_response_metadata, ) # Cache the successful response for future requests @@ -2767,6 +2932,13 @@ async def stream_dspy_query_response( retrieved_results = getattr(result, "retrieved_results", None) query_type = getattr(result, "query_type", None) + # Extract LLM response metadata from DSPy history (GLM 4.7 reasoning_content support) + llm_response_metadata = extract_llm_response_metadata( + lm=lm, + provider=llm_provider_used, + latency_ms=int(elapsed_ms), + ) + response = DSPyQueryResponse( question=request.question, resolved_question=getattr(result, "resolved_question", None), @@ -2784,6 +2956,8 @@ async def stream_dspy_query_response( llm_provider_used=llm_provider_used, llm_model_used=llm_model_used, cache_hit=False, + # LLM response provenance (GLM 4.7 Thinking Mode chain-of-thought) + llm_response=llm_response_metadata, ) # Cache the response diff --git a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json index 3059e7790c..9d4f27c4bd 100644 --- a/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json +++ b/data/custodian/.logo_enrichment_crawl4ai_checkpoint.json @@ -6506,7 +6506,157 @@ "JP-05-AKI-M-AO.yaml", "JP-05-AKI-M-APCH.yaml", "JP-05-AKI-M-MIMAU.yaml", - "JP-05-AKI-M-NSLC.yaml" + "JP-05-AKI-M-NSLC.yaml", + "JP-05-AKI-M-OKFH.yaml", + "JP-05-AKI-M-SCFM.yaml", + "JP-05-AMA-M-KFHM.yaml", + "JP-05-DAI-A-DCA.yaml", + "JP-05-DAI-L-DL-daisenshiritsukamioka_library.yaml", + "JP-05-DAI-L-DL-daisenshiritsukyowa_library.yaml", + "JP-05-DAI-L-DL-daisenshiritsunakasen_library.yaml", + "JP-05-DAI-L-DL-daisenshiritsunangai_library.yaml", + "JP-05-DAI-L-DL-daisenshiritsunishisemboku_library.yaml", + "JP-05-DAI-L-DL-daisenshiritsuota_library.yaml", + "JP-05-DAI-L-DL-daisenshiritsusemboku_library.yaml", + "JP-05-DAI-L-DL.yaml", + "JP-05-DAI-M-AAC.yaml", + "JP-05-DAI-M-APMAS.yaml", + "JP-05-DAI-M-DSKT.yaml", + "JP-05-DAI-M-HHS.yaml", + "JP-05-DAI-M-HTCPM.yaml", + "JP-05-EBI-M-IC.yaml", + "JP-05-GOR-L-OCL.yaml", + "JP-05-HOJ-M-S.yaml", + "JP-05-HOR-M-MV.yaml", + "JP-05-ISH-M-IJM.yaml", + "JP-05-KAT-L-KL.yaml", + "JP-05-KAT-L-KLI.yaml", + "JP-05-KAT-L-KLO.yaml", + "JP-05-KAT-L-KLS.yaml", + "JP-05-KAT-M-KKIS.yaml", + "JP-05-KAZ-L-KL-kazunoshiritsutateyamabunkokeishotowada_library.yaml", + "JP-05-KAZ-L-KL-kosakachoritsukosaka_library.yaml", + "JP-05-KAZ-L-KL.yaml", + "JP-05-KAZ-M-KCHFM.yaml", + "JP-05-KAZ-M-KCMHM.yaml", + "JP-05-KAZ-M-KPMH.yaml", + "JP-05-KAZ-M-OSCC.yaml", + "JP-05-KIT-L-K-kitaakitashiaikawakominkantoshoshitsu.yaml", + "JP-05-KIT-L-K.yaml", + "JP-05-KIT-L-KL-kamikoanisonritsu_library.yaml", + "JP-05-KIT-L-KL-kitaakitashimoriyoshi_library.yaml", + "JP-05-KIT-L-KL.yaml", + "JP-05-KIT-M-ALTM.yaml", + "JP-05-KIT-M-HUMM.yaml", + "JP-05-KIT-M-KK.yaml", + "JP-05-KIT-M-MS.yaml", + "JP-05-KUM-M-KMA.yaml", + "JP-05-MAN-M-SSM.yaml", + "JP-05-MAT-M-ASRO.yaml", + "JP-05-MAT-M-EUM.yaml", + "JP-05-MIN-L-AO.yaml", + "JP-05-MIN-L-G.yaml", + "JP-05-MIN-L-HL.yaml", + "JP-05-MIN-L-I.yaml", + "JP-05-MIN-L-O.yaml", + "JP-05-MIN-M-GMFM.yaml", + "JP-05-MIN-M-IMHMH.yaml", + "JP-05-MIY-M-OMA.yaml", + "JP-05-NIK-L-NLK.yaml", + "JP-05-NIK-L-NLN.yaml", + "JP-05-NIK-L-NPL.yaml", + "JP-05-NIK-M-CHVC.yaml", + "JP-05-NIK-M-CSFSM.yaml", + "JP-05-NIK-M-KM.yaml", + "JP-05-NIK-M-KSTH.yaml", + "JP-05-NIK-M-NLM.yaml", + "JP-05-NIK-M-TM.yaml", + "JP-05-NOS-L-A.yaml", + "JP-05-NOS-L-N.yaml", + "JP-05-NOS-L-NL.yaml", + "JP-05-NOS-M-IMH.yaml", + "JP-05-NOS-M-NCCSM.yaml", + "JP-05-NOS-M-NEP.yaml", + "JP-05-NYU-M-TLM.yaml", + "JP-05-ODA-L-AL.yaml", + "JP-05-ODA-L-OKL.yaml", + "JP-05-ODA-L-OL-odateshiritsuhinai_library.yaml", + "JP-05-ODA-L-OL-odateshiritsutashiro_library.yaml", + "JP-05-ODA-L-OL.yaml", + "JP-05-ODA-M-MAD.yaml", + "JP-05-ODA-M-OMH.yaml", + "JP-05-OGA-L-H.yaml", + "JP-05-OGA-L-OL.yaml", + "JP-05-OGA-L-UL.yaml", + "JP-05-OGA-L-W.yaml", + "JP-05-OGA-M-HHM.yaml", + "JP-05-OGA-M-HNM.yaml", + "JP-05-OGA-M-NM.yaml", + "JP-05-OGA-M-OCGLC.yaml", + "JP-05-OGA-M-OW.yaml", + "JP-05-OGA-M-UMHM.yaml", + "JP-05-SEM-L-M.yaml", + "JP-05-SEM-L-SG.yaml", + "JP-05-SEM-L-SL.yaml", + "JP-05-SEM-M-HMAM.yaml", + "JP-05-SEM-M-KKM.yaml", + "JP-05-SEM-M-LTKTM.yaml", + "JP-05-SEM-M-MCMH.yaml", + "JP-05-SEM-M-MTMHF.yaml", + "JP-05-SEM-M-NMHML.yaml", + "JP-05-SEM-M-OAM.yaml", + "JP-05-SUM-M-NCHHM.yaml", + "JP-05-TAM-M-TMMAI.yaml", + "JP-05-UWA-M-UCHM.yaml", + "JP-05-YAM-L-F.yaml", + "JP-05-YAM-L-H-happochominehamachikubunkakoryusentahoeikantoshosh.yaml", + "JP-05-YAM-L-H.yaml", + "JP-05-YAM-L-M-mitanechokotokakominkantoshoshitsu.yaml", + "JP-05-YAM-L-M-mitanechoyamamotokominkantoshoshitsu.yaml", + "JP-05-YAM-L-M.yaml", + "JP-05-YOK-A-YCMA-yokote_city_modern_archives.yaml", + "JP-05-YOK-A-YCMA.yaml", + "JP-05-YOK-L-Y-yokoteshiritsutaiyutoshoshitsu.yaml", + "JP-05-YOK-L-Y.yaml", + "JP-05-YOK-L-YL-yokoteshiritsuhiraka_library.yaml", + "JP-05-YOK-L-YL-yokoteshiritsujumonji_library.yaml", + "JP-05-YOK-L-YL-yokoteshiritsumasuda_library.yaml", + "JP-05-YOK-L-YL-yokoteshiritsuomori_library.yaml", + "JP-05-YOK-L-YL.yaml", + "JP-05-YOK-L-YLCL.yaml", + "JP-05-YOK-M-HMYC.yaml", + "JP-05-YOK-M-HTS.yaml", + "JP-05-YOK-M-JHMER.yaml", + "JP-05-YOK-M-OLMH.yaml", + "JP-05-YOK-M-TCHFM.yaml", + "JP-05-YOK-M-YILMH.yaml", + "JP-05-YOK-M-YMMM.yaml", + "JP-05-YUR-L-AH.yaml", + "JP-05-YUR-L-D.yaml", + "JP-05-YUR-L-Y-yurihonjoshichokaikominkantoshoshitsu.yaml", + "JP-05-YUR-L-Y-yurihonjoshihigashiyurikominkantoshoshitsu.yaml", + "JP-05-YUR-L-Y-yurihonjoshiyashimakominkantoshoshitsu.yaml", + "JP-05-YUR-L-Y.yaml", + "JP-05-YUR-L-YL-yurihonjoshiiwaki_library.yaml", + "JP-05-YUR-L-YL-yurihonjoshiyuri_library.yaml", + "JP-05-YUR-L-YL.yaml", + "JP-05-YUR-M-CWTM.yaml", + "JP-05-YUR-M-IFMH.yaml", + "JP-05-YUR-M-KCAMYS.yaml", + "JP-05-YUR-M-OHFMH.yaml", + "JP-05-YUR-M-YMLMM.yaml", + "JP-05-YUR-M-YSYLCPI.yaml", + "JP-05-YUZ-L-Y.yaml", + "JP-05-YUZ-L-YE.yaml", + "JP-05-YUZ-L-YL-yuzawashiritsuogachi_library.yaml", + "JP-05-YUZ-L-YL.yaml", + "JP-05-YUZ-M-ISMM.yaml", + "JP-05-YUZ-M-JYYKT.yaml", + "JP-06-ABA-M-FPIAFSM.yaml", + "JP-06-FUK-L-FL.yaml", + "JP-06-FUK-M-FCHM.yaml", + "JP-06-HAR-M-FJ.yaml", + "JP-06-KIT-M-FCSM.yaml" ], "last_index": 9 } \ No newline at end of file diff --git a/data/custodian/JP-05-AMA-M-KFHM.yaml b/data/custodian/JP-05-AMA-M-KFHM.yaml index eaf9c3ed31..b80fd20f60 100644 --- a/data/custodian/JP-05-AMA-M-KFHM.yaml +++ b/data/custodian/JP-05-AMA-M-KFHM.yaml @@ -480,3 +480,30 @@ location: geonames_id: 6417058 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:25.911492+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:28:17.289828+00:00' + source_url: https://www.city.imabari.ehime.jp/museum/santou + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.imabari.ehime.jp/museum/santou/img/logo.gif + source_url: https://www.city.imabari.ehime.jp/museum/santou + css_selector: '#museum_logo > a > img' + retrieved_on: '2025-12-23T16:28:17.289828+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 上浦歴史民俗資料館(村上三島記念館) + - claim_type: favicon_url + claim_value: https://www.city.imabari.ehime.jp/museum/favicon.ico + source_url: https://www.city.imabari.ehime.jp/museum/santou + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T16:28:17.289828+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-DAI-M-AAC.yaml b/data/custodian/JP-05-DAI-M-AAC.yaml index a72d5421d5..11752137f6 100644 --- a/data/custodian/JP-05-DAI-M-AAC.yaml +++ b/data/custodian/JP-05-DAI-M-AAC.yaml @@ -38,18 +38,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.102635+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: DAI method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-DAI-M-AAC - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-DAI-M-AAC valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-DAI-M-AAC ghcid_numeric: 7831768514642479968 valid_from: '2025-12-06T23:38:31.102635+00:00' @@ -217,8 +218,27 @@ location: source_path: wikidata_enrichment.wikidata_coordinates city: Daisen Shi region: Akita Ken - region_code: 05 + region_code: 5 country: *id006 postal_code: 014-0802 street_address: HOTTA, Daisen Shi, Akita Ken, 014-0802 normalization_timestamp: '2025-12-09T10:55:21.771540+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:29:36.704689+00:00' + source_url: https://common3.pref.akita.lg.jp/maibun + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://common3.pref.akita.lg.jp/apple-touch-icon-180x180.png + source_url: https://common3.pref.akita.lg.jp/maibun + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:29:36.704689+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-EBI-M-IC.yaml b/data/custodian/JP-05-EBI-M-IC.yaml index 0180e00898..3a98e42316 100644 --- a/data/custodian/JP-05-EBI-M-IC.yaml +++ b/data/custodian/JP-05-EBI-M-IC.yaml @@ -441,3 +441,30 @@ location: geonames_id: 6416231 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:26.080731+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:29:55.234034+00:00' + source_url: https://www.city.imabari.ehime.jp/museum/imabarijo + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.imabari.ehime.jp/museum/imabarijo/img/logo.gif + source_url: https://www.city.imabari.ehime.jp/museum/imabarijo + css_selector: '#museum_logo > a > img' + retrieved_on: '2025-12-23T16:29:55.234034+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 今治城 + - claim_type: favicon_url + claim_value: https://www.city.imabari.ehime.jp/museum/favicon.ico + source_url: https://www.city.imabari.ehime.jp/museum/imabarijo + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T16:29:55.234034+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-HOR-M-MV.yaml b/data/custodian/JP-05-HOR-M-MV.yaml index 0053555c3b..70b4aca145 100644 --- a/data/custodian/JP-05-HOR-M-MV.yaml +++ b/data/custodian/JP-05-HOR-M-MV.yaml @@ -351,3 +351,20 @@ location: geonames_id: 1926141 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:26.249401+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:30:12.263185+00:00' + source_url: https://www.miuraz.co.jp/miurart + extraction_method: crawl4ai + claims: + - claim_type: og_image_url + claim_value: https://www.miuraz.co.jp/miurart/common/images/ogp.jpg + source_url: https://www.miuraz.co.jp/miurart + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T16:30:12.263185+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-05-ISH-M-IJM.yaml b/data/custodian/JP-05-ISH-M-IJM.yaml index 9a6215d70d..356fb553e0 100644 --- a/data/custodian/JP-05-ISH-M-IJM.yaml +++ b/data/custodian/JP-05-ISH-M-IJM.yaml @@ -423,3 +423,22 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/A8wWHGGK03A/hqdefault_live.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:30:19.721950+00:00' + source_url: https://itami-kinenkan.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://itami-kinenkan.jp/favicon.ico + source_url: https://itami-kinenkan.jp + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T16:30:19.721950+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-KAT-L-KL.yaml b/data/custodian/JP-05-KAT-L-KL.yaml index f2969971b9..5e58c5292d 100644 --- a/data/custodian/JP-05-KAT-L-KL.yaml +++ b/data/custodian/JP-05-KAT-L-KL.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://library.city.katagami.akita.jp wikidata_official_website: http://library.city.katagami.akita.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:30:26.959687+00:00' + source_url: http://library.city.katagami.akita.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://library.city.katagami.akita.jp/favicon.ico + source_url: http://library.city.katagami.akita.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:30:26.959687+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-KAT-L-KLI.yaml b/data/custodian/JP-05-KAT-L-KLI.yaml index db3b3e30a6..e652607d0e 100644 --- a/data/custodian/JP-05-KAT-L-KLI.yaml +++ b/data/custodian/JP-05-KAT-L-KLI.yaml @@ -200,3 +200,22 @@ wikidata_enrichment: wikidata_web: official_website: http://library.city.katagami.akita.jp/ wikidata_official_website: http://library.city.katagami.akita.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:30:33.904124+00:00' + source_url: http://library.city.katagami.akita.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://library.city.katagami.akita.jp/favicon.ico + source_url: http://library.city.katagami.akita.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:30:33.904124+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-KAT-L-KLO.yaml b/data/custodian/JP-05-KAT-L-KLO.yaml index 8bcbbdc3b3..3503eb39fb 100644 --- a/data/custodian/JP-05-KAT-L-KLO.yaml +++ b/data/custodian/JP-05-KAT-L-KLO.yaml @@ -199,3 +199,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.katagami.lg.jp/index.cfm/7 wikidata_official_website: http://www.city.katagami.lg.jp/index.cfm/7 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:30:43.017693+00:00' + source_url: http://www.city.katagami.lg.jp/index.cfm/7 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.katagami.lg.jp/theme/base/img_common/header_logo.png + source_url: http://www.city.katagami.lg.jp/index.cfm/7 + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T16:30:43.017693+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 潟上市 Katagami City + - claim_type: favicon_url + claim_value: http://www.city.katagami.lg.jp/theme/base/img_common/smartphone.png + source_url: http://www.city.katagami.lg.jp/index.cfm/7 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:30:43.017693+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.katagami.lg.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.city.katagami.lg.jp/index.cfm/7 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:30:43.017693+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-KAT-L-KLS.yaml b/data/custodian/JP-05-KAT-L-KLS.yaml index 780ace6ad7..46d7e69260 100644 --- a/data/custodian/JP-05-KAT-L-KLS.yaml +++ b/data/custodian/JP-05-KAT-L-KLS.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://library.city.katagami.akita.jp/ wikidata_official_website: http://library.city.katagami.akita.jp/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:30:50.218350+00:00' + source_url: http://library.city.katagami.akita.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://library.city.katagami.akita.jp/favicon.ico + source_url: http://library.city.katagami.akita.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:30:50.218350+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-KAZ-L-KL-kosakachoritsukosaka_library.yaml b/data/custodian/JP-05-KAZ-L-KL-kosakachoritsukosaka_library.yaml index 660b7417be..2a085c651a 100644 --- a/data/custodian/JP-05-KAZ-L-KL-kosakachoritsukosaka_library.yaml +++ b/data/custodian/JP-05-KAZ-L-KL-kosakachoritsukosaka_library.yaml @@ -201,3 +201,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.kosaka.akita.jp/tosho/index.html wikidata_official_website: http://www.town.kosaka.akita.jp/tosho/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:31:09.599571+00:00' + source_url: http://www.town.kosaka.akita.jp/tosho/index.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.town.kosaka.akita.jp/theme/base/img_sub/header_logo_pc.png + source_url: http://www.town.kosaka.akita.jp/tosho/index.html + css_selector: '#header > div.header-in > p.header-logo > a > img' + retrieved_on: '2025-12-23T16:31:09.599571+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 秋田県小坂町 Kosaka Town Official Site ひとと自然と文化を未来につなぐ 魅力あふれるまち + - claim_type: favicon_url + claim_value: http://www.town.kosaka.akita.jp/theme/base/img_common/smartphone.png + source_url: http://www.town.kosaka.akita.jp/tosho/index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:31:09.599571+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.town.kosaka.akita.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.town.kosaka.akita.jp/tosho/index.html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:31:09.599571+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-KAZ-M-OSCC.yaml b/data/custodian/JP-05-KAZ-M-OSCC.yaml index cfb36ced1f..f31462f880 100644 --- a/data/custodian/JP-05-KAZ-M-OSCC.yaml +++ b/data/custodian/JP-05-KAZ-M-OSCC.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.078680+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: KAZ method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-KAZ-M-OSCC - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-KAZ-M-OSCC valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-KAZ-M-OSCC ghcid_numeric: 13434133056760519923 valid_from: '2025-12-06T23:38:31.078680+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: OYU STONE CIRCLE CENTER @@ -200,7 +201,7 @@ wikidata_enrichment: location: city: Kazuno Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 018-5421 street_address: TOWADA OYU, Kazuno Shi, Akita Ken, 018-5421 @@ -215,3 +216,36 @@ location: geonames_id: 11612632 geonames_name: Kazuno feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:31:36.815238+00:00' + source_url: https://www.city.kazuno.akita.jp/kanko_bunka_sports/bunkazai/7/5593.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.kazuno.lg.jp/theme/base/img_common/pc_header_logo.png + source_url: https://www.city.kazuno.akita.jp/kanko_bunka_sports/bunkazai/7/5593.html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T16:31:36.815238+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 鹿角市(かづのし)世界遺産のまち + - claim_type: favicon_url + claim_value: https://www.city.kazuno.lg.jp/theme/base/img_common/smartphone.png + source_url: https://www.city.kazuno.akita.jp/kanko_bunka_sports/bunkazai/7/5593.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:31:36.815238+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kazuno.lg.jp/theme/base/img_common/ogp_noimage.png + source_url: https://www.city.kazuno.akita.jp/kanko_bunka_sports/bunkazai/7/5593.html + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T16:31:36.815238+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-KIT-L-K-kitaakitashiaikawakominkantoshoshitsu.yaml b/data/custodian/JP-05-KIT-L-K-kitaakitashiaikawakominkantoshoshitsu.yaml index 26e44f8f4d..c4ff0e51fc 100644 --- a/data/custodian/JP-05-KIT-L-K-kitaakitashiaikawakominkantoshoshitsu.yaml +++ b/data/custodian/JP-05-KIT-L-K-kitaakitashiaikawakominkantoshoshitsu.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/ wikidata_official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:31:46.454355+00:00' + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.kitaakita.akita.jp/assets/front/img/apple-touch-icon.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:31:46.454355+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kitaakita.akita.jp/uploads/common/og.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:31:46.454355+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-05-KIT-L-K.yaml b/data/custodian/JP-05-KIT-L-K.yaml index 626adfea59..cb3c31ff59 100644 --- a/data/custodian/JP-05-KIT-L-K.yaml +++ b/data/custodian/JP-05-KIT-L-K.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/ wikidata_official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/ +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:31:55.240922+00:00' + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.kitaakita.akita.jp/assets/front/img/apple-touch-icon.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:31:55.240922+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kitaakita.akita.jp/uploads/common/og.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:31:55.240922+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-05-KIT-L-KL-kamikoanisonritsu_library.yaml b/data/custodian/JP-05-KIT-L-KL-kamikoanisonritsu_library.yaml index c546736182..c1c5f511d1 100644 --- a/data/custodian/JP-05-KIT-L-KL-kamikoanisonritsu_library.yaml +++ b/data/custodian/JP-05-KIT-L-KL-kamikoanisonritsu_library.yaml @@ -202,3 +202,31 @@ wikidata_enrichment: wikidata_web: official_website: https://www.vill.kamikoani.akita.jp/forms/div/divinfolist.aspx?div_id=175 wikidata_official_website: https://www.vill.kamikoani.akita.jp/forms/div/divinfolist.aspx?div_id=175 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:32:03.826483+00:00' + source_url: https://www.vill.kamikoani.akita.jp/forms/div/divinfolist.aspx?div_id=175 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.vill.kamikoani.akita.jp/div/admin/image/logo/header/logo.png + source_url: https://www.vill.kamikoani.akita.jp/forms/div/divinfolist.aspx?div_id=175 + css_selector: '[document] > html > body.fontchangetarget > form > header.header + > div.navbar.container > div.navbar__logo2 > a > img' + retrieved_on: '2025-12-23T16:32:03.826483+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 上小阿仁村 + - claim_type: favicon_url + claim_value: https://www.vill.kamikoani.akita.jp/div/admin/image/icon/favicon/favicon.ico + source_url: https://www.vill.kamikoani.akita.jp/forms/div/divinfolist.aspx?div_id=175 + css_selector: '[document] > html > head > link:nth-of-type(37)' + retrieved_on: '2025-12-23T16:32:03.826483+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-KIT-L-KL-kitaakitashimoriyoshi_library.yaml b/data/custodian/JP-05-KIT-L-KL-kitaakitashimoriyoshi_library.yaml index 243f03431f..f717caaa05 100644 --- a/data/custodian/JP-05-KIT-L-KL-kitaakitashimoriyoshi_library.yaml +++ b/data/custodian/JP-05-KIT-L-KL-kitaakitashimoriyoshi_library.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/index.html wikidata_official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:32:12.834770+00:00' + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.kitaakita.akita.jp/assets/front/img/apple-touch-icon.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/index.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:32:12.834770+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kitaakita.akita.jp/uploads/common/og.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/index.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:32:12.834770+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-05-KIT-L-KL.yaml b/data/custodian/JP-05-KIT-L-KL.yaml index fa442de858..676e49ad44 100644 --- a/data/custodian/JP-05-KIT-L-KL.yaml +++ b/data/custodian/JP-05-KIT-L-KL.yaml @@ -205,3 +205,28 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/library.html wikidata_official_website: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/library.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:32:21.943314+00:00' + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/library.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.kitaakita.akita.jp/assets/front/img/apple-touch-icon.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/library.html + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:32:21.943314+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.kitaakita.akita.jp/uploads/common/og.png + source_url: http://www.city.kitaakita.akita.jp/koukyoushisetu/bunka_hukushi/tosyokan/library.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:32:21.943314+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-05-KIT-M-KK.yaml b/data/custodian/JP-05-KIT-M-KK.yaml index 2a18e6c696..cb9c4443a9 100644 --- a/data/custodian/JP-05-KIT-M-KK.yaml +++ b/data/custodian/JP-05-KIT-M-KK.yaml @@ -1155,3 +1155,36 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/AhvXckvWS94/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:32:42.286885+00:00' + source_url: http://hahaha.akita.jp/wp/kumakuma + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://hahaha.akita.jp/wp/kumakuma/wp-content/uploads/2020/05/logokuma.png + source_url: http://hahaha.akita.jp/wp/kumakuma + css_selector: '#logo > a > img' + retrieved_on: '2025-12-23T16:32:42.286885+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: くまくま園 公式ホームページ + - claim_type: favicon_url + claim_value: http://hahaha.akita.jp/wp/kumakuma/wp-content/uploads/2019/04/cropped-DSF5649-180x180.jpg + source_url: http://hahaha.akita.jp/wp/kumakuma + css_selector: '[document] > html.no-js > head > link:nth-of-type(27)' + retrieved_on: '2025-12-23T16:32:42.286885+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://hahaha.akita.jp/wp/kumakuma/wp-content/uploads/2019/04/7218.jpg + source_url: http://hahaha.akita.jp/wp/kumakuma + css_selector: '[document] > html.no-js > head > meta:nth-of-type(14)' + retrieved_on: '2025-12-23T16:32:42.286885+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-05-KUM-M-KMA.yaml b/data/custodian/JP-05-KUM-M-KMA.yaml index 2f857cf10b..44deaa3d8d 100644 --- a/data/custodian/JP-05-KUM-M-KMA.yaml +++ b/data/custodian/JP-05-KUM-M-KMA.yaml @@ -720,3 +720,22 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/uGw13ovqmI4/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:32:54.969217+00:00' + source_url: https://www.kumakogen.jp/site/muse + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.kumakogen.jp/apple-touch-icon.png + source_url: https://www.kumakogen.jp/site/muse + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:32:54.969217+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 3 diff --git a/data/custodian/JP-05-MAT-M-ASRO.yaml b/data/custodian/JP-05-MAT-M-ASRO.yaml index bab0201d93..a5d223dba1 100644 --- a/data/custodian/JP-05-MAT-M-ASRO.yaml +++ b/data/custodian/JP-05-MAT-M-ASRO.yaml @@ -243,3 +243,28 @@ location: geonames_id: 1926100 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:26.596703+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:33:17.535887+00:00' + source_url: http://www.morinokuni.or.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.morinokuni.or.jp/files/favicon/favicon.ico?cache=20251224013253 + source_url: http://www.morinokuni.or.jp + css_selector: '[document] > html > head > link:nth-of-type(15)' + retrieved_on: '2025-12-23T16:33:17.535887+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://morinokuni.or.jp/css/public/pc/image/ogimage.jpg + source_url: http://www.morinokuni.or.jp + css_selector: '[document] > html > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T16:33:17.535887+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-05-MAT-M-EUM.yaml b/data/custodian/JP-05-MAT-M-EUM.yaml index 538f256841..575dd6e7fd 100644 --- a/data/custodian/JP-05-MAT-M-EUM.yaml +++ b/data/custodian/JP-05-MAT-M-EUM.yaml @@ -369,3 +369,28 @@ location: geonames_id: 1926099 feature_code: PPLA normalization_timestamp: '2025-12-09T06:53:26.660999+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:33:27.766117+00:00' + source_url: https://www.ehime-u.ac.jp/about/ehime-u-museum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.ehime-u.ac.jp/wp-content/themes/ehime-university/assets/images/favicons/apple-touch-icon-180x180.png + source_url: https://www.ehime-u.ac.jp/about/ehime-u-museum + css_selector: '[document] > html > head > link:nth-of-type(11)' + retrieved_on: '2025-12-23T16:33:27.766117+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.ehime-u.ac.jp/wp-content/uploads/2022/03/愛媛大学ミュージアム外観.jpg + source_url: https://www.ehime-u.ac.jp/about/ehime-u-museum + css_selector: '[document] > html > head > meta:nth-of-type(15)' + retrieved_on: '2025-12-23T16:33:27.766117+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 39 diff --git a/data/custodian/JP-05-MIN-L-AO.yaml b/data/custodian/JP-05-MIN-L-AO.yaml index 7d473bf31b..faedfc473a 100644 --- a/data/custodian/JP-05-MIN-L-AO.yaml +++ b/data/custodian/JP-05-MIN-L-AO.yaml @@ -207,3 +207,22 @@ location: geonames_id: 1854678 geonames_name: Ōgata feature_code: PPL +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:33:36.471637+00:00' + source_url: https://libwww.akita-pu.ac.jp/drupal + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://libwww.akita-pu.ac.jp/opac/images/cyan/favicon.ico + source_url: https://libwww.akita-pu.ac.jp/drupal + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T16:33:36.471637+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-MIN-L-HL.yaml b/data/custodian/JP-05-MIN-L-HL.yaml index 3a80eb343e..4f4a1ee01b 100644 --- a/data/custodian/JP-05-MIN-L-HL.yaml +++ b/data/custodian/JP-05-MIN-L-HL.yaml @@ -200,3 +200,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.hachirogata.akita.jp/g.html?seq=107 wikidata_official_website: http://www.town.hachirogata.akita.jp/g.html?seq=107 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:33:48.329312+00:00' + source_url: http://www.town.hachirogata.akita.jp/g.html?seq=107 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.town.hachirogata.akita.jp/_template_/_site_/_default_/_res/design/images/header/hachirogata_logo.png + source_url: http://www.town.hachirogata.akita.jp/g.html?seq=107 + css_selector: '#tlogo > h1 > a > img' + retrieved_on: '2025-12-23T16:33:48.329312+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 八郎潟町公式サイトトップページ + - claim_type: favicon_url + claim_value: http://www.town.hachirogata.akita.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed_.png + source_url: http://www.town.hachirogata.akita.jp/g.html?seq=107 + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:33:48.329312+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.town.hachirogata.akita.jp/_template_/_site_/_default_/_res/images/sns/ogimage_.png + source_url: http://www.town.hachirogata.akita.jp/g.html?seq=107 + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:33:48.329312+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-MIY-M-OMA.yaml b/data/custodian/JP-05-MIY-M-OMA.yaml index b9c0aa1f22..385ee671ff 100644 --- a/data/custodian/JP-05-MIY-M-OMA.yaml +++ b/data/custodian/JP-05-MIY-M-OMA.yaml @@ -403,3 +403,30 @@ location: geonames_id: 1926087 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:26.728705+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:34:05.716767+00:00' + source_url: https://www.city.imabari.ehime.jp/museum/omishima + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.imabari.ehime.jp/museum/omishima/img/logo.gif + source_url: https://www.city.imabari.ehime.jp/museum/omishima + css_selector: '#museum_logo > a > img' + retrieved_on: '2025-12-23T16:34:05.716767+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 大三島美術館 + - claim_type: favicon_url + claim_value: https://www.city.imabari.ehime.jp/museum/favicon.ico + source_url: https://www.city.imabari.ehime.jp/museum/omishima + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T16:34:05.716767+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-NIK-L-NLN.yaml b/data/custodian/JP-05-NIK-L-NLN.yaml index 088e4a4023..6244508d04 100644 --- a/data/custodian/JP-05-NIK-L-NLN.yaml +++ b/data/custodian/JP-05-NIK-L-NLN.yaml @@ -204,3 +204,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.nikaho.akita.jp/life/detail.html?id=201 wikidata_official_website: http://www.city.nikaho.akita.jp/life/detail.html?id=201 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:34:17.852076+00:00' + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.nikaho.akita.jp/theme/base/s-admin/img_top/pc_header_logo.png + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + css_selector: '#sp-header > div.box.clearfix > a > img.header-logo' + retrieved_on: '2025-12-23T16:34:17.852076+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: にかほ市 + - claim_type: favicon_url + claim_value: http://www.city.nikaho.akita.jp/theme/base/img_common/smartphone.png + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:34:17.852076+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.nikaho.akita.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:34:17.852076+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-NIK-L-NPL.yaml b/data/custodian/JP-05-NIK-L-NPL.yaml index 57a36c8c5d..18678907ae 100644 --- a/data/custodian/JP-05-NIK-L-NPL.yaml +++ b/data/custodian/JP-05-NIK-L-NPL.yaml @@ -207,3 +207,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.nikaho.akita.jp/life/detail.html?id=201 wikidata_official_website: http://www.city.nikaho.akita.jp/life/detail.html?id=201 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:34:26.027443+00:00' + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.nikaho.akita.jp/theme/base/s-admin/img_top/pc_header_logo.png + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + css_selector: '#sp-header > div.box.clearfix > a > img.header-logo' + retrieved_on: '2025-12-23T16:34:26.027443+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: にかほ市 + - claim_type: favicon_url + claim_value: http://www.city.nikaho.akita.jp/theme/base/img_common/smartphone.png + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:34:26.027443+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.nikaho.akita.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.city.nikaho.akita.jp/life/detail.html?id=201 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:34:26.027443+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-NIK-M-KM.yaml b/data/custodian/JP-05-NIK-M-KM.yaml index 7ae08de45e..b72044dcb6 100644 --- a/data/custodian/JP-05-NIK-M-KM.yaml +++ b/data/custodian/JP-05-NIK-M-KM.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.121722+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: NIK method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-NIK-M-KM - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-NIK-M-KM valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-NIK-M-KM ghcid_numeric: 5835839064655281895 valid_from: '2025-12-06T23:38:31.121722+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: KISAKATA MUSEUM @@ -171,8 +172,8 @@ wikidata_enrichment: instance_of: &id004 - id: Q33506 label: museum - description: institution that holds artifacts and other objects of scientific, artistic, cultural, historical, or other - importance + description: institution that holds artifacts and other objects of scientific, + artistic, cultural, historical, or other importance wikidata_instance_of: *id004 wikidata_location: country: &id005 @@ -196,7 +197,7 @@ wikidata_enrichment: location: city: Nikaho Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 018-0104 street_address: KISAKATAMACHI KITSUNEMORI, Nikaho Shi, Akita Ken, 018-0104 @@ -211,3 +212,36 @@ location: geonames_id: 6822198 geonames_name: Nikaho feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:34:44.950203+00:00' + source_url: https://www.city.nikaho.akita.jp/life/detail.html?id=210 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.nikaho.akita.jp/theme/base/s-admin/img_top/pc_header_logo.png + source_url: https://www.city.nikaho.akita.jp/life/detail.html?id=210 + css_selector: '#sp-header > div.box.clearfix > a > img.header-logo' + retrieved_on: '2025-12-23T16:34:44.950203+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: にかほ市 + - claim_type: favicon_url + claim_value: https://www.city.nikaho.akita.jp/theme/base/img_common/smartphone.png + source_url: https://www.city.nikaho.akita.jp/life/detail.html?id=210 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:34:44.950203+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.nikaho.akita.jp/theme/base/img_common/ogp_noimage.png + source_url: https://www.city.nikaho.akita.jp/life/detail.html?id=210 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:34:44.950203+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-NIK-M-TM.yaml b/data/custodian/JP-05-NIK-M-TM.yaml index 0779051486..c67889aa26 100644 --- a/data/custodian/JP-05-NIK-M-TM.yaml +++ b/data/custodian/JP-05-NIK-M-TM.yaml @@ -34,18 +34,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.131423+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: NIK method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-NIK-M-TM - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-NIK-M-TM valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-NIK-M-TM ghcid_numeric: 13141746719298254165 valid_from: '2025-12-06T23:38:31.131423+00:00' @@ -232,8 +233,33 @@ location: source_path: wikidata_enrichment.wikidata_coordinates city: Nikaho Shi region: Akita Ken - region_code: 05 + region_code: 5 country: *id005 postal_code: 018-0402 street_address: HIRASAWA, Nikaho Shi, Akita Ken, 018-0402 normalization_timestamp: '2025-12-09T10:55:22.600756+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:34:59.980614+00:00' + source_url: https://www.tdk.com/museum + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.tdk.com/themes/custom/tdkcom/src/favicons/TDK_Favicon_180x180_BT.png + source_url: https://www.tdk.com/museum + css_selector: '[document] > html.no-touchevents.inputtypes-search > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T16:34:59.980614+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 180x180 + - claim_type: og_image_url + claim_value: https://www.tdk.com/system/files/dam/image/TDK_logo_blue_1200_630.png + source_url: https://www.tdk.com/museum + css_selector: '[document] > html.no-touchevents.inputtypes-search > head > meta:nth-of-type(6)' + retrieved_on: '2025-12-23T16:34:59.980614+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 6 diff --git a/data/custodian/JP-05-NYU-M-TLM.yaml b/data/custodian/JP-05-NYU-M-TLM.yaml index 3b1412da5f..ce50e0ce22 100644 --- a/data/custodian/JP-05-NYU-M-TLM.yaml +++ b/data/custodian/JP-05-NYU-M-TLM.yaml @@ -241,3 +241,22 @@ location: geonames_id: 1926070 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:26.783150+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:35:31.879456+00:00' + source_url: https://www.city.saijo.ehime.jp/soshiki/syakaikyoiku/kyodo-index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.saijo.ehime.jp/apple-touch-icon.png + source_url: https://www.city.saijo.ehime.jp/soshiki/syakaikyoiku/kyodo-index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:35:31.879456+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-ODA-L-AL.yaml b/data/custodian/JP-05-ODA-L-AL.yaml index ab42235de2..c38b8ac9b1 100644 --- a/data/custodian/JP-05-ODA-L-AL.yaml +++ b/data/custodian/JP-05-ODA-L-AL.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:54.486214+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: ODA method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-ODA-L-AL - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-ODA-L-AL valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-ODA-L-AL ghcid_numeric: 1915600020121425453 valid_from: '2025-12-06T23:38:54.486214+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: AKITAKANGOFUKUSHIDAIGAKUFUZOKU Library @@ -200,7 +201,7 @@ wikidata_enrichment: location: city: Odate Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 017-0046 street_address: 2-3-4 SHIMIZU, Odate Shi, Akita Ken, 017-0046 @@ -215,3 +216,22 @@ location: geonames_id: 2128787 geonames_name: Ōdate feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:35:43.736346+00:00' + source_url: http://www.well.ac.jp/library/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.well.ac.jp/assets/themes/custom/apple-touch-icon.png + source_url: http://www.well.ac.jp/library/index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:35:43.736346+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-OGA-L-OL.yaml b/data/custodian/JP-05-OGA-L-OL.yaml index af7df63802..6be5fbd416 100644 --- a/data/custodian/JP-05-OGA-L-OL.yaml +++ b/data/custodian/JP-05-OGA-L-OL.yaml @@ -204,3 +204,36 @@ wikidata_enrichment: wikidata_media: image: Oga City Library and Funakawako Community Centre.jpg wikidata_image: Oga City Library and Funakawako Community Centre.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:36:29.058213+00:00' + source_url: http://www.city.oga.akita.jp/index.cfm/12 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.oga.akita.jp/index.cfm/12 + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T16:36:29.058213+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 男鹿市 OGA CITY + - claim_type: favicon_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/smartphone.png + source_url: http://www.city.oga.akita.jp/index.cfm/12 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:36:29.058213+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.city.oga.akita.jp/index.cfm/12 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:36:29.058213+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-OGA-L-W.yaml b/data/custodian/JP-05-OGA-L-W.yaml index a3002c0d46..bda0debdb2 100644 --- a/data/custodian/JP-05-OGA-L-W.yaml +++ b/data/custodian/JP-05-OGA-L-W.yaml @@ -199,3 +199,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.oga.akita.jp/index.cfm/12 wikidata_official_website: http://www.city.oga.akita.jp/index.cfm/12 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:36:47.882466+00:00' + source_url: http://www.city.oga.akita.jp/index.cfm/12 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.oga.akita.jp/index.cfm/12 + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T16:36:47.882466+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 男鹿市 OGA CITY + - claim_type: favicon_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/smartphone.png + source_url: http://www.city.oga.akita.jp/index.cfm/12 + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:36:47.882466+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.city.oga.akita.jp/index.cfm/12 + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:36:47.882466+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-OGA-M-NM.yaml b/data/custodian/JP-05-OGA-M-NM.yaml index 85977953f5..8ee6f795ab 100644 --- a/data/custodian/JP-05-OGA-M-NM.yaml +++ b/data/custodian/JP-05-OGA-M-NM.yaml @@ -223,3 +223,28 @@ wikidata_enrichment: image: Entrance of Namahage Museum, Oga, Akita.JPG commons_category: Namahage Museum wikidata_image: Entrance of Namahage Museum, Oga, Akita.JPG +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:37:00.224317+00:00' + source_url: https://namahage.co.jp/namahagekan + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://namahage.co.jp/namahagekan/img/logo.svg + source_url: https://namahage.co.jp/namahagekan + css_selector: '#logo > h1 > a > img' + retrieved_on: '2025-12-23T16:37:00.224317+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: なまはげ館 + - claim_type: og_image_url + claim_value: http://namahage.co.jp/namahagekan/img/og_image.jpg + source_url: https://namahage.co.jp/namahagekan + css_selector: '[document] > html > head > meta:nth-of-type(7)' + retrieved_on: '2025-12-23T16:37:00.224317+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-05-OGA-M-OW.yaml b/data/custodian/JP-05-OGA-M-OW.yaml index df7c41638a..f026450259 100644 --- a/data/custodian/JP-05-OGA-M-OW.yaml +++ b/data/custodian/JP-05-OGA-M-OW.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.056462+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: OGA method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-OGA-M-OW - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-OGA-M-OW valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-OGA-M-OW ghcid_numeric: 6360991033694890579 valid_from: '2025-12-06T23:38:31.056462+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: OGASHI WAKAMIFURUSATOSHIRYOUKAN @@ -171,8 +172,8 @@ wikidata_enrichment: instance_of: &id004 - id: Q33506 label: museum - description: institution that holds artifacts and other objects of scientific, artistic, cultural, historical, or other - importance + description: institution that holds artifacts and other objects of scientific, + artistic, cultural, historical, or other importance wikidata_instance_of: *id004 wikidata_location: country: &id005 @@ -196,7 +197,7 @@ wikidata_enrichment: location: city: Oga Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 010-0401 street_address: NOISHI, Oga Shi, Akita Ken, 010-0401 @@ -211,3 +212,36 @@ location: geonames_id: 6822201 geonames_name: Oga feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:37:12.493621+00:00' + source_url: http://www.city.oga.akita.jp/index.cfm/14,1484,52,html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/pc_header_logo.png + source_url: http://www.city.oga.akita.jp/index.cfm/14,1484,52,html + css_selector: '#header-logo > a > img' + retrieved_on: '2025-12-23T16:37:12.493621+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 男鹿市 OGA CITY + - claim_type: favicon_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/smartphone.png + source_url: http://www.city.oga.akita.jp/index.cfm/14,1484,52,html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:37:12.493621+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: http://www.city.oga.akita.jp/theme/base/img_common/ogp_noimage.png + source_url: http://www.city.oga.akita.jp/index.cfm/14,1484,52,html + css_selector: '[document] > html > head > meta:nth-of-type(8)' + retrieved_on: '2025-12-23T16:37:12.493621+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-SEM-L-SL.yaml b/data/custodian/JP-05-SEM-L-SL.yaml index 246482dd4b..73a27ab6f3 100644 --- a/data/custodian/JP-05-SEM-L-SL.yaml +++ b/data/custodian/JP-05-SEM-L-SL.yaml @@ -204,3 +204,29 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.semboku.akita.jp/citizens/12_03.html wikidata_official_website: http://www.city.semboku.akita.jp/citizens/12_03.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:37:39.177585+00:00' + source_url: http://www.city.semboku.akita.jp/citizens/12_03.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.semboku.akita.jp/assets/img/common/logo_site.png + source_url: http://www.city.semboku.akita.jp/citizens/12_03.html + css_selector: '#top > header.header > div.header-inner.container > div.logo__btn__wrap + > div.header-logo > a > img' + retrieved_on: '2025-12-23T16:37:39.177585+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 仙北市 + - claim_type: og_image_url + claim_value: https://www.city.semboku.akita.jp/ogp.jpg + source_url: http://www.city.semboku.akita.jp/citizens/12_03.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:37:39.177585+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-05-SEM-M-KKM.yaml b/data/custodian/JP-05-SEM-M-KKM.yaml index 094bd6201d..7cc238594a 100644 --- a/data/custodian/JP-05-SEM-M-KKM.yaml +++ b/data/custodian/JP-05-SEM-M-KKM.yaml @@ -244,3 +244,29 @@ wikidata_enrichment: - id: Q11436810 label: Hiroshi Ōe description: Japanese architect (1913-1989) +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:37:49.960825+00:00' + source_url: https://www.city.semboku.akita.jp/sightseeing/densyo + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.semboku.akita.jp/assets/img/common/logo_site.png + source_url: https://www.city.semboku.akita.jp/sightseeing/densyo + css_selector: '#top > header.header > div.header-inner.container > div.logo__btn__wrap + > div.header-logo > a > img' + retrieved_on: '2025-12-23T16:37:49.960825+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 仙北市 + - claim_type: og_image_url + claim_value: https://www.city.semboku.akita.jp/ogp.jpg + source_url: https://www.city.semboku.akita.jp/sightseeing/densyo + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:37:49.960825+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: false + has_og_image: true + favicon_count: 0 diff --git a/data/custodian/JP-05-SEM-M-OAM.yaml b/data/custodian/JP-05-SEM-M-OAM.yaml index e145b03b52..47e278f39d 100644 --- a/data/custodian/JP-05-SEM-M-OAM.yaml +++ b/data/custodian/JP-05-SEM-M-OAM.yaml @@ -34,18 +34,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.138776+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: SEM method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-SEM-M-OAM - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-SEM-M-OAM valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-SEM-M-OAM ghcid_numeric: 3015213229431430773 valid_from: '2025-12-06T23:38:31.138776+00:00' @@ -242,8 +243,33 @@ location: source_path: wikidata_enrichment.wikidata_coordinates city: Semboku Shi region: Akita Ken - region_code: 05 + region_code: 5 country: *id006 postal_code: 014-0326 street_address: KAKUNODATEMACHI YAMANEMACHI, Semboku Shi, Akita Ken, 014-0326 normalization_timestamp: '2025-12-09T10:55:23.214529+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:57:02.350663+00:00' + source_url: https://www.museomura.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://static.wixstatic.com/media/9baae4_94da8f8b5eb44a29bf8956317f0679d4%7Emv2.jpg/v1/fill/w_180%2Ch_180%2Clg_1%2Cusm_0.66_1.00_0.01/9baae4_94da8f8b5eb44a29bf8956317f0679d4%7Emv2.jpg + source_url: https://www.museomura.com + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:57:02.350663+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/jpeg + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://static.wixstatic.com/media/9baae4_8fe18281c0d84ad8ba9151372a14569c~mv2.jpg/v1/fill/w_2500,h_951,al_c/9baae4_8fe18281c0d84ad8ba9151372a14569c~mv2.jpg + source_url: https://www.museomura.com + css_selector: '[document] > html > head > meta:nth-of-type(14)' + retrieved_on: '2025-12-23T16:57:02.350663+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 3 diff --git a/data/custodian/JP-05-TAM-M-TMMAI.yaml b/data/custodian/JP-05-TAM-M-TMMAI.yaml index 191f613935..f62fafedd6 100644 --- a/data/custodian/JP-05-TAM-M-TMMAI.yaml +++ b/data/custodian/JP-05-TAM-M-TMMAI.yaml @@ -379,3 +379,30 @@ location: geonames_id: 8626922 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:26.980178+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:58:06.626526+00:00' + source_url: https://www.city.imabari.ehime.jp/museum/tamagawa + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.imabari.ehime.jp/museum/tamagawa/img/logo.gif + source_url: https://www.city.imabari.ehime.jp/museum/tamagawa + css_selector: '#museum_logo > a > img' + retrieved_on: '2025-12-23T16:58:06.626526+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 玉川近代美術館 + - claim_type: favicon_url + claim_value: https://www.city.imabari.ehime.jp/museum/favicon.ico + source_url: https://www.city.imabari.ehime.jp/museum/tamagawa + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T16:58:06.626526+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-UWA-M-UCHM.yaml b/data/custodian/JP-05-UWA-M-UCHM.yaml index 3a3649e618..4dd8d977f1 100644 --- a/data/custodian/JP-05-UWA-M-UCHM.yaml +++ b/data/custodian/JP-05-UWA-M-UCHM.yaml @@ -381,3 +381,22 @@ location: geonames_id: 1926020 feature_code: PPLA2 normalization_timestamp: '2025-12-09T06:53:27.085239+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:58:14.746964+00:00' + source_url: https://www.city.uwajima.ehime.jp/site/siryoukan/rekishitop.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.uwajima.ehime.jp/apple-touch-icon.png + source_url: https://www.city.uwajima.ehime.jp/site/siryoukan/rekishitop.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:58:14.746964+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-YAM-L-F.yaml b/data/custodian/JP-05-YAM-L-F.yaml index 9d1ddfa8f0..6d937a21d4 100644 --- a/data/custodian/JP-05-YAM-L-F.yaml +++ b/data/custodian/JP-05-YAM-L-F.yaml @@ -203,3 +203,30 @@ wikidata_enrichment: wikidata_web: official_website: http://www.town.fujisato.akita.jp/c.html?seq=85 wikidata_official_website: http://www.town.fujisato.akita.jp/c.html?seq=85 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:58:25.203841+00:00' + source_url: http://www.town.fujisato.akita.jp/c.html?seq=85 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.town.fujisato.akita.jp/up/images/fujisato/image/logo.png + source_url: http://www.town.fujisato.akita.jp/c.html?seq=85 + css_selector: '#logo > a.home > img.common' + retrieved_on: '2025-12-23T16:58:25.203841+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 藤里町 + - claim_type: favicon_url + claim_value: http://www.town.fujisato.akita.jp/favicon.ico + source_url: http://www.town.fujisato.akita.jp/c.html?seq=85 + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T16:58:25.203841+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/vnd.microsoft.icon + favicon_sizes: '' + summary: + total_claims: 2 + has_primary_logo: true + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-YOK-L-Y-yokoteshiritsutaiyutoshoshitsu.yaml b/data/custodian/JP-05-YOK-L-Y-yokoteshiritsutaiyutoshoshitsu.yaml index 7361aee441..b4f5417fb0 100644 --- a/data/custodian/JP-05-YOK-L-Y-yokoteshiritsutaiyutoshoshitsu.yaml +++ b/data/custodian/JP-05-YOK-L-Y-yokoteshiritsutaiyutoshoshitsu.yaml @@ -203,3 +203,28 @@ wikidata_enrichment: - http://www.city.yokote.lg.jp/sub01/cat100168.html - https://www.city.yokote.lg.jp/shisetsu/1001527/1004013.html wikidata_official_website: *id006 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:58:48.498140+00:00' + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:58:48.498140+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:58:48.498140+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-L-Y.yaml b/data/custodian/JP-05-YOK-L-Y.yaml index 1816973e55..7195ac1e7c 100644 --- a/data/custodian/JP-05-YOK-L-Y.yaml +++ b/data/custodian/JP-05-YOK-L-Y.yaml @@ -203,3 +203,36 @@ wikidata_enrichment: - http://www.city.yokote.lg.jp/sub01/cat100168.html - https://www.city.yokote.lg.jp/kurashi/1001140/1001251/1005858/1005936.html wikidata_official_website: *id006 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:58:55.961999+00:00' + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/design/images/header/yokote_logo2.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '#logo2 > a > img' + retrieved_on: '2025-12-23T16:58:55.961999+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 横手市公式サイトトップページ + - claim_type: favicon_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:58:55.961999+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:58:55.961999+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-L-YL-yokoteshiritsumasuda_library.yaml b/data/custodian/JP-05-YOK-L-YL-yokoteshiritsumasuda_library.yaml index 16d1cee353..e754495a98 100644 --- a/data/custodian/JP-05-YOK-L-YL-yokoteshiritsumasuda_library.yaml +++ b/data/custodian/JP-05-YOK-L-YL-yokoteshiritsumasuda_library.yaml @@ -225,3 +225,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.yokote.lg.jp/sub01/cat100168.html wikidata_official_website: http://www.city.yokote.lg.jp/sub01/cat100168.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:59:07.532121+00:00' + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/design/images/header/yokote_logo2.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '#logo2 > a > img' + retrieved_on: '2025-12-23T16:59:07.532121+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 横手市公式サイトトップページ + - claim_type: favicon_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:59:07.532121+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:59:07.532121+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-L-YL-yokoteshiritsuomori_library.yaml b/data/custodian/JP-05-YOK-L-YL-yokoteshiritsuomori_library.yaml index 9b708785bb..2b9df1fd09 100644 --- a/data/custodian/JP-05-YOK-L-YL-yokoteshiritsuomori_library.yaml +++ b/data/custodian/JP-05-YOK-L-YL-yokoteshiritsuomori_library.yaml @@ -225,3 +225,36 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city.yokote.lg.jp/sub01/cat100168.html wikidata_official_website: http://www.city.yokote.lg.jp/sub01/cat100168.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:59:15.097221+00:00' + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/design/images/header/yokote_logo2.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '#logo2 > a > img' + retrieved_on: '2025-12-23T16:59:15.097221+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 横手市公式サイトトップページ + - claim_type: favicon_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:59:15.097221+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:59:15.097221+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-L-YL.yaml b/data/custodian/JP-05-YOK-L-YL.yaml index 369d9c1091..f26e9b1f6a 100644 --- a/data/custodian/JP-05-YOK-L-YL.yaml +++ b/data/custodian/JP-05-YOK-L-YL.yaml @@ -229,3 +229,36 @@ wikidata_enrichment: - http://www.city.yokote.lg.jp/sub01/cat100168.html - https://www.city.yokote.lg.jp/kurashi/1001140/1001251/ wikidata_official_website: *id006 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:59:21.904895+00:00' + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/design/images/header/yokote_logo2.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '#logo2 > a > img' + retrieved_on: '2025-12-23T16:59:21.904895+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 横手市公式サイトトップページ + - claim_type: favicon_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:59:21.904895+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:59:21.904895+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-L-YLCL.yaml b/data/custodian/JP-05-YOK-L-YLCL.yaml index ba1f36c1cc..036c21686b 100644 --- a/data/custodian/JP-05-YOK-L-YLCL.yaml +++ b/data/custodian/JP-05-YOK-L-YLCL.yaml @@ -232,3 +232,36 @@ wikidata_enrichment: wikidata_media: image: Yokote municipal Omonogawa library.jpg wikidata_image: Yokote municipal Omonogawa library.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:59:29.674209+00:00' + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/design/images/header/yokote_logo2.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '#logo2 > a > img' + retrieved_on: '2025-12-23T16:59:29.674209+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 横手市公式サイトトップページ + - claim_type: favicon_url + claim_value: http://www.city.yokote.lg.jp/sub01/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:59:29.674209+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: http://www.city.yokote.lg.jp/sub01/cat100168.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:59:29.674209+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-M-HTS.yaml b/data/custodian/JP-05-YOK-M-HTS.yaml index 74b1a04dcf..b0ca9f3905 100644 --- a/data/custodian/JP-05-YOK-M-HTS.yaml +++ b/data/custodian/JP-05-YOK-M-HTS.yaml @@ -351,3 +351,41 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/20YO3JAXQ8A/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:59:39.335966+00:00' + source_url: https://akitafurusatomura.co.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://akitafurusatomura.co.jp/wp-content/uploads/2023/11/header-logo.png + source_url: https://akitafurusatomura.co.jp + css_selector: '[document] > html.wf-a-otf-ud-shin-maru-go-pr6n-n3-active.wf-a-otf-ud-shin-go-pr6n-n3-active + > body.home.wp-singular > header.elementor.elementor-313 > header.elementor-element.elementor-element-f9ac638 + > div.elementor-element.elementor-element-0d496d0 > div.elementor-element.elementor-element-f5d359b + > div.elementor-widget-container > a > img.attachment-full.size-full' + retrieved_on: '2025-12-23T16:59:39.335966+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + - claim_type: favicon_url + claim_value: https://akitafurusatomura.co.jp/wp-content/uploads/2023/11/akitafurusatomura-favicon-300x300.png + source_url: https://akitafurusatomura.co.jp + css_selector: '[document] > html.wf-a-otf-ud-shin-maru-go-pr6n-n3-active.wf-a-otf-ud-shin-go-pr6n-n3-active + > head > link:nth-of-type(57)' + retrieved_on: '2025-12-23T16:59:39.335966+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 192x192 + - claim_type: og_image_url + claim_value: https://akitafurusatomura.co.jp/wp-content/uploads/2025/08/yosakoi.jpg + source_url: https://akitafurusatomura.co.jp + css_selector: '[document] > html.wf-a-otf-ud-shin-maru-go-pr6n-n3-active.wf-a-otf-ud-shin-go-pr6n-n3-active + > head > meta:nth-of-type(13)' + retrieved_on: '2025-12-23T16:59:39.335966+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-M-OLMH.yaml b/data/custodian/JP-05-YOK-M-OLMH.yaml index 780e568826..ae678a2eb7 100644 --- a/data/custodian/JP-05-YOK-M-OLMH.yaml +++ b/data/custodian/JP-05-YOK-M-OLMH.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.037693+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: YOK method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-YOK-M-OLMH - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-YOK-M-OLMH valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-YOK-M-OLMH ghcid_numeric: 14027401722154785391 valid_from: '2025-12-06T23:38:31.037693+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: OMONOGAWA LOCAL MATERIAL HALL @@ -182,8 +183,8 @@ wikidata_enrichment: instance_of: &id004 - id: Q33506 label: museum - description: institution that holds artifacts and other objects of scientific, artistic, cultural, historical, or other - importance + description: institution that holds artifacts and other objects of scientific, + artistic, cultural, historical, or other importance wikidata_instance_of: *id004 wikidata_location: country: &id005 @@ -209,7 +210,7 @@ wikidata_enrichment: location: city: Yokote Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 013-0208 street_address: OMONOGAWAMACHI NUMADATE, Yokote Shi, Akita Ken, 013-0208 @@ -224,3 +225,28 @@ location: geonames_id: 2110506 geonames_name: Yokote feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:59:47.673944+00:00' + source_url: https://www.city.yokote.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.yokote.lg.jp + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T16:59:47.673944+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.yokote.lg.jp + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T16:59:47.673944+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-M-TCHFM.yaml b/data/custodian/JP-05-YOK-M-TCHFM.yaml index 15d1ff34eb..918b213fec 100644 --- a/data/custodian/JP-05-YOK-M-TCHFM.yaml +++ b/data/custodian/JP-05-YOK-M-TCHFM.yaml @@ -336,3 +336,22 @@ location: geonames_id: 1926016 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:27.156836+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T16:59:55.061927+00:00' + source_url: https://www.city.toon.ehime.jp/soshiki/23/2431.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city.toon.ehime.jp/img/icon/apple-touch-icon.png + source_url: https://www.city.toon.ehime.jp/soshiki/23/2431.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T16:59:55.061927+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-M-YILMH.yaml b/data/custodian/JP-05-YOK-M-YILMH.yaml index d3d3074bad..e12d1a0bb0 100644 --- a/data/custodian/JP-05-YOK-M-YILMH.yaml +++ b/data/custodian/JP-05-YOK-M-YILMH.yaml @@ -229,3 +229,36 @@ wikidata_enrichment: wikidata_media: image: Ishizaka Yojiro Literature Museum.jpg wikidata_image: Ishizaka Yojiro Literature Museum.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:00:02.008254+00:00' + source_url: https://www.city.yokote.lg.jp/shogai/page000349.html + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.yokote.lg.jp/shogai/_template_/_site_/_default_/_res/design/images/header/yokote_logo2.png + source_url: https://www.city.yokote.lg.jp/shogai/page000349.html + css_selector: '#logo2 > a > img' + retrieved_on: '2025-12-23T17:00:02.008254+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 横手市公式サイトトップページ + - claim_type: favicon_url + claim_value: https://www.city.yokote.lg.jp/shogai/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png + source_url: https://www.city.yokote.lg.jp/shogai/page000349.html + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T17:00:02.008254+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yokote.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.yokote.lg.jp/shogai/page000349.html + css_selector: '[document] > html > head > meta:nth-of-type(9)' + retrieved_on: '2025-12-23T17:00:02.008254+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YOK-M-YMMM.yaml b/data/custodian/JP-05-YOK-M-YMMM.yaml index 791ecc17a5..1efb8db7ce 100644 --- a/data/custodian/JP-05-YOK-M-YMMM.yaml +++ b/data/custodian/JP-05-YOK-M-YMMM.yaml @@ -242,3 +242,22 @@ wikidata_enrichment: image: Yokote-Masuda Manga Museum 20190503.jpg commons_category: Yokote-Masuda Manga Museum wikidata_image: Yokote-Masuda Manga Museum 20190503.jpg +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:00:11.269636+00:00' + source_url: https://manga-museum.com + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://manga-museum.com/wp-content/uploads/2019/02/favicon.ico + source_url: https://manga-museum.com + css_selector: '[document] > html.wf-inactive.wf-inactive > head > link:nth-of-type(22)' + retrieved_on: '2025-12-23T17:00:11.269636+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 32x32 + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-YUR-L-AH.yaml b/data/custodian/JP-05-YUR-L-AH.yaml index 50532412ef..6e97047cf1 100644 --- a/data/custodian/JP-05-YUR-L-AH.yaml +++ b/data/custodian/JP-05-YUR-L-AH.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:53.911187+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: YUR method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-YUR-L-AH - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-YUR-L-AH valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-YUR-L-AH ghcid_numeric: 14962919741502556453 valid_from: '2025-12-06T23:38:53.911187+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: AKITAKENRITSUDAIGAKUTOSHOJOHOSENTA(HONJOKYAMPASU) @@ -189,7 +190,7 @@ wikidata_enrichment: location: city: Yurihonjo Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 015-0055 street_address: 84-4 TSUCHIYA EBINOKUCHI, Yurihonjo Shi, Akita Ken, 015-0055 @@ -204,3 +205,22 @@ location: geonames_id: 6822202 geonames_name: Yurihonjō feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:00:22.961004+00:00' + source_url: https://libwww.akita-pu.ac.jp/drupal + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://libwww.akita-pu.ac.jp/opac/images/cyan/favicon.ico + source_url: https://libwww.akita-pu.ac.jp/drupal + css_selector: '[document] > html > head > link:nth-of-type(9)' + retrieved_on: '2025-12-23T17:00:22.961004+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-05-YUR-M-YMLMM.yaml b/data/custodian/JP-05-YUR-M-YMLMM.yaml index 80656b483b..383700c216 100644 --- a/data/custodian/JP-05-YUR-M-YMLMM.yaml +++ b/data/custodian/JP-05-YUR-M-YMLMM.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.081154+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: YUR method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-YUR-M-YMLMM - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-YUR-M-YMLMM valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-YUR-M-YMLMM ghcid_numeric: 17602580394321561584 valid_from: '2025-12-06T23:38:31.081154+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: YURIHONJO MUNICIPAL LOCAL MATERIAL MUSEUM @@ -207,7 +208,7 @@ wikidata_enrichment: location: city: Yurihonjo Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 015-0011 street_address: ISHIWAKI, Yurihonjo Shi, Akita Ken, 015-0011 @@ -222,3 +223,36 @@ location: geonames_id: 6822202 geonames_name: Yurihonjō feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:05:02.539697+00:00' + source_url: https://www.city.yurihonjo.lg.jp/bunka-sport/bunka/c1323/5830 + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://www.city.yurihonjo.lg.jp/bunka-sport/bunka/c1323/_template_/_site_/_default_/_res/design/images/header/header-logo.png + source_url: https://www.city.yurihonjo.lg.jp/bunka-sport/bunka/c1323/5830 + css_selector: '#tlogo > h1 > a > img' + retrieved_on: '2025-12-23T17:05:02.539697+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: 由利本荘市公式サイトトップページ + - claim_type: favicon_url + claim_value: https://www.city.yurihonjo.lg.jp/bunka-sport/bunka/c1323/_template_/_site_/_default_/_res/images/apple-touch-icon.png?202406 + source_url: https://www.city.yurihonjo.lg.jp/bunka-sport/bunka/c1323/5830 + css_selector: '[document] > html > head > link:nth-of-type(5)' + retrieved_on: '2025-12-23T17:05:02.539697+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://www.city.yurihonjo.lg.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png + source_url: https://www.city.yurihonjo.lg.jp/bunka-sport/bunka/c1323/5830 + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T17:05:02.539697+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-05-YUZ-L-Y.yaml b/data/custodian/JP-05-YUZ-L-Y.yaml index 926bfca0ac..4bd9eab42c 100644 --- a/data/custodian/JP-05-YUZ-L-Y.yaml +++ b/data/custodian/JP-05-YUZ-L-Y.yaml @@ -205,3 +205,22 @@ wikidata_enrichment: - http://www.city-yuzawa.jp/ - http://www.city-yuzawa.jp/shisetsu023/553 wikidata_official_website: *id006 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:05:13.248293+00:00' + source_url: http://www.city-yuzawa.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city-yuzawa.jp/apple-touch-icon.png + source_url: http://www.city-yuzawa.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:05:13.248293+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-YUZ-L-YE.yaml b/data/custodian/JP-05-YUZ-L-YE.yaml index 0b5f2d480e..aabecad2db 100644 --- a/data/custodian/JP-05-YUZ-L-YE.yaml +++ b/data/custodian/JP-05-YUZ-L-YE.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city-yuzawa.jp wikidata_official_website: http://www.city-yuzawa.jp +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:05:18.929614+00:00' + source_url: http://www.city-yuzawa.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city-yuzawa.jp/apple-touch-icon.png + source_url: http://www.city-yuzawa.jp + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:05:18.929614+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-YUZ-L-YL-yuzawashiritsuogachi_library.yaml b/data/custodian/JP-05-YUZ-L-YL-yuzawashiritsuogachi_library.yaml index cf7d340a8e..4c69ed492b 100644 --- a/data/custodian/JP-05-YUZ-L-YL-yuzawashiritsuogachi_library.yaml +++ b/data/custodian/JP-05-YUZ-L-YL-yuzawashiritsuogachi_library.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city-yuzawa.jp/shisetsu0422/index.html wikidata_official_website: http://www.city-yuzawa.jp/shisetsu0422/index.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:05:25.849672+00:00' + source_url: http://www.city-yuzawa.jp/shisetsu0422/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city-yuzawa.jp/apple-touch-icon.png + source_url: http://www.city-yuzawa.jp/shisetsu0422/index.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:05:25.849672+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-YUZ-L-YL.yaml b/data/custodian/JP-05-YUZ-L-YL.yaml index ed817807e4..6eb9c866e5 100644 --- a/data/custodian/JP-05-YUZ-L-YL.yaml +++ b/data/custodian/JP-05-YUZ-L-YL.yaml @@ -199,3 +199,22 @@ wikidata_enrichment: wikidata_web: official_website: http://www.city-yuzawa.jp/shisetsu0421/520.html wikidata_official_website: http://www.city-yuzawa.jp/shisetsu0421/520.html +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:05:32.209495+00:00' + source_url: http://www.city-yuzawa.jp/shisetsu0421/520.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://www.city-yuzawa.jp/apple-touch-icon.png + source_url: http://www.city-yuzawa.jp/shisetsu0421/520.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:05:32.209495+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-05-YUZ-M-ISMM.yaml b/data/custodian/JP-05-YUZ-M-ISMM.yaml index 2222103f0a..b0487ba28e 100644 --- a/data/custodian/JP-05-YUZ-M-ISMM.yaml +++ b/data/custodian/JP-05-YUZ-M-ISMM.yaml @@ -32,18 +32,19 @@ ghcid: generation_timestamp: '2025-12-06T23:38:31.066995+00:00' location_resolution: country_code: JP - region_code: 05 + region_code: 5 city_code: YUZ method: CH_ANNOTATOR_SOURCE ghcid_history: - ghcid: JP-05-YUZ-M-ISMM - valid_from: "2025-12-10T09:43:29Z" + valid_from: '2025-12-10T09:43:29Z' valid_to: null - reason: "Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO 3166-2:JP" + reason: Corrected region code from JP-AK (abbreviation) to JP-05 (Akita) per ISO + 3166-2:JP - ghcid: JP-AK-YUZ-M-ISMM valid_from: null - valid_to: "2025-12-10T09:43:29Z" - reason: "Previous GHCID with incorrect region code" + valid_to: '2025-12-10T09:43:29Z' + reason: Previous GHCID with incorrect region code - ghcid: JP-AK-YUZ-M-ISMM ghcid_numeric: 4849865788719712149 valid_from: '2025-12-06T23:38:31.066995+00:00' @@ -101,8 +102,8 @@ ch_annotator: annotation_metadata: confidence_score: 0.98 verified: false - verification_date: - verified_by: + verification_date: null + verified_by: null entity_claims: - claim_type: full_name claim_value: INNAI SILVER-MINE MUSEUM @@ -215,7 +216,7 @@ wikidata_enrichment: location: city: Yuzawa Shi region: Akita Ken - region_code: 05 + region_code: 5 country: JP postal_code: 019-0111 street_address: KAMIINNAI, Yuzawa Shi, Akita Ken, 019-0111 @@ -230,3 +231,22 @@ location: geonames_id: 2110460 geonames_name: Yuzawa feature_code: PPLA2 +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:05:37.960769+00:00' + source_url: https://www.city-yuzawa.jp/soshiki/90/2913.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.city-yuzawa.jp/apple-touch-icon.png + source_url: https://www.city-yuzawa.jp/soshiki/90/2913.html + css_selector: '[document] > html > head > link:nth-of-type(3)' + retrieved_on: '2025-12-23T17:05:37.960769+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 2 diff --git a/data/custodian/JP-06-ABA-M-FPIAFSM.yaml b/data/custodian/JP-06-ABA-M-FPIAFSM.yaml index e1b6a8208f..bed8ea8d56 100644 --- a/data/custodian/JP-06-ABA-M-FPIAFSM.yaml +++ b/data/custodian/JP-06-ABA-M-FPIAFSM.yaml @@ -546,3 +546,37 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/A3aHuXI0GEw/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:05:57.288773+00:00' + source_url: http://asakura-museum.pref.fukui.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: logo_url + claim_value: https://asakura-museum.pref.fukui.lg.jp/theme/ver2022/img/common/logo/type1-white.png + source_url: http://asakura-museum.pref.fukui.lg.jp + css_selector: '#HeaderMenuArea > div.PageWidthSetter.Wide > div.SiteHeaderBlock + > div.BlockHeader.bottom > h1.SiteLogo > a > img' + retrieved_on: '2025-12-23T17:05:57.288773+00:00' + extraction_method: crawl4ai_header_logo + detection_confidence: high + alt_text: '' + - claim_type: favicon_url + claim_value: http://asakura-museum.pref.fukui.lg.jp/theme/ver2022/img/common/icon/sp_favicon.png?_=20220921 + source_url: http://asakura-museum.pref.fukui.lg.jp + css_selector: '[document] > html.chrome.chrome134 > head > link:nth-of-type(8)' + retrieved_on: '2025-12-23T17:05:57.288773+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: 144x144 + - claim_type: og_image_url + claim_value: https://asakura-museum.pref.fukui.lg.jp/theme/ver2022/img/common/ogp.png + source_url: http://asakura-museum.pref.fukui.lg.jp + css_selector: '[document] > html.chrome.chrome134 > head > meta:nth-of-type(12)' + retrieved_on: '2025-12-23T17:05:57.288773+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 3 + has_primary_logo: true + has_favicon: true + has_og_image: true + favicon_count: 2 diff --git a/data/custodian/JP-06-FUK-L-FL.yaml b/data/custodian/JP-06-FUK-L-FL.yaml index 1b4933fec8..8e84c0936e 100644 --- a/data/custodian/JP-06-FUK-L-FL.yaml +++ b/data/custodian/JP-06-FUK-L-FL.yaml @@ -358,3 +358,28 @@ location: geonames_id: 9865215 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:27.239068+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:06:10.256942+00:00' + source_url: http://toshokan.city.fukuoka.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://toshokan.city.fukuoka.lg.jp/favicon.ico + source_url: http://toshokan.city.fukuoka.lg.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T17:06:10.256942+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: image/x-icon + favicon_sizes: '' + - claim_type: og_image_url + claim_value: https://toshokan.city.fukuoka.lg.jp/img/og.jpg + source_url: http://toshokan.city.fukuoka.lg.jp + css_selector: '[document] > html > head > meta:nth-of-type(11)' + retrieved_on: '2025-12-23T17:06:10.256942+00:00' + extraction_method: crawl4ai_meta_og + summary: + total_claims: 2 + has_primary_logo: false + has_favicon: true + has_og_image: true + favicon_count: 1 diff --git a/data/custodian/JP-06-KIT-M-FCSM.yaml b/data/custodian/JP-06-KIT-M-FCSM.yaml index 647795c22c..3714fbe5ac 100644 --- a/data/custodian/JP-06-KIT-M-FCSM.yaml +++ b/data/custodian/JP-06-KIT-M-FCSM.yaml @@ -632,3 +632,22 @@ youtube_enrichment: comments: [] thumbnail_url: https://i.ytimg.com/vi/up0K74Tz10Q/hqdefault.jpg status: SUCCESS +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:08:07.511730+00:00' + source_url: http://info.pref.fukui.jp/koreki/index.html + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: http://info.pref.fukui.jp/koreki/img/fukuiicon.png + source_url: http://info.pref.fukui.jp/koreki/index.html + css_selector: '[document] > html > head > link:nth-of-type(4)' + retrieved_on: '2025-12-23T17:08:07.511730+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/JP-06-KOI-L-FPL-fukui_prefectural_library.yaml b/data/custodian/JP-06-KOI-L-FPL-fukui_prefectural_library.yaml index 82d84567f0..c139d2cd57 100644 --- a/data/custodian/JP-06-KOI-L-FPL-fukui_prefectural_library.yaml +++ b/data/custodian/JP-06-KOI-L-FPL-fukui_prefectural_library.yaml @@ -443,3 +443,22 @@ location: geonames_id: 9865219 feature_code: PPL normalization_timestamp: '2025-12-09T06:53:27.456012+00:00' +logo_enrichment: + enrichment_timestamp: '2025-12-23T17:08:16.365626+00:00' + source_url: https://www.library-archives.pref.fukui.lg.jp + extraction_method: crawl4ai + claims: + - claim_type: favicon_url + claim_value: https://www.library-archives.pref.fukui.lg.jp/favicon.ico + source_url: https://www.library-archives.pref.fukui.lg.jp + css_selector: '[document] > html > head > link' + retrieved_on: '2025-12-23T17:08:16.365626+00:00' + extraction_method: crawl4ai_link_rel + favicon_type: '' + favicon_sizes: '' + summary: + total_claims: 1 + has_primary_logo: false + has_favicon: true + has_og_image: false + favicon_count: 1 diff --git a/data/custodian/NL-DR-WES-M-MVP.yaml b/data/custodian/NL-DR-WES-M-MVP.yaml index 3c6b7ef4be..d806d2b84b 100644 --- a/data/custodian/NL-DR-WES-M-MVP.yaml +++ b/data/custodian/NL-DR-WES-M-MVP.yaml @@ -101,3 +101,10 @@ provenance: - CRITICAL - LinkedIn had WRONG website linked (museumstevensweert.nl instead of papierknipmuseum.nl) - Address verified via correct museum website research on 2025-12-17 - Founded in 1950s by Wiecher Lever, moved to Westerbork in 1965 +wikidata_enrichment: + wikidata_id: Q19832258 + wikidata_url: https://www.wikidata.org/wiki/Q19832258 + label: Museum van Papierknipkunst + description: Museum van Papierknipkunst + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-DR-ZUI-M-DM.yaml b/data/custodian/NL-DR-ZUI-M-DM.yaml index 29c6b0d6e8..bcd791cac4 100644 --- a/data/custodian/NL-DR-ZUI-M-DM.yaml +++ b/data/custodian/NL-DR-ZUI-M-DM.yaml @@ -100,3 +100,10 @@ provenance: - Province corrected from GR (Groningen) to DR (Drenthe) - City resolved from XXX to ZUI (Zuidlaren) - LinkedIn location field was incorrect - actual address is in Drenthe +wikidata_enrichment: + wikidata_id: null + label: Doe Museum + description: museum in Zuidlaren, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FL-ALM-M-DDAMAD.yaml b/data/custodian/NL-FL-ALM-M-DDAMAD.yaml index 00e0806801..6e4506d97b 100644 --- a/data/custodian/NL-FL-ALM-M-DDAMAD.yaml +++ b/data/custodian/NL-FL-ALM-M-DDAMAD.yaml @@ -83,3 +83,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - Location enriched from institution name on 2025-12-16 +wikidata_enrichment: + wikidata_id: null + label: Dutch Digital Art Museum Almere (DDAMA) + description: museum in Almere, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FL-NAG-M-MN.yaml b/data/custodian/NL-FL-NAG-M-MN.yaml index 5e52c82773..89d1ceb70e 100644 --- a/data/custodian/NL-FL-NAG-M-MN.yaml +++ b/data/custodian/NL-FL-NAG-M-MN.yaml @@ -74,3 +74,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum Nagele + description: museum in Nagele, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-ALD-M-MWA.yaml b/data/custodian/NL-FR-ALD-M-MWA.yaml index 1cea507eff..737427924d 100644 --- a/data/custodian/NL-FR-ALD-M-MWA.yaml +++ b/data/custodian/NL-FR-ALD-M-MWA.yaml @@ -73,3 +73,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum De Waach Aldeboarn + description: museum in Aldeboarn, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-BOE-M-MBK.yaml b/data/custodian/NL-FR-BOE-M-MBK.yaml index 00fc6fc1c2..09bf4367a6 100644 --- a/data/custodian/NL-FR-BOE-M-MBK.yaml +++ b/data/custodian/NL-FR-BOE-M-MBK.yaml @@ -74,3 +74,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum Boer Kip + description: museum in Boer, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-BUR-M-MBO.yaml b/data/custodian/NL-FR-BUR-M-MBO.yaml index 36feaa22fa..55e84fb4a9 100644 --- a/data/custodian/NL-FR-BUR-M-MBO.yaml +++ b/data/custodian/NL-FR-BUR-M-MBO.yaml @@ -81,3 +81,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum Buren en Oranje + description: museum in Buren (Friesland), Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution. Note - Q98893871 is Museum Buren + en Oranje in Buren (Gelderland), which is a different institution. diff --git a/data/custodian/NL-FR-DOK-L-BNF.yaml b/data/custodian/NL-FR-DOK-L-BNF.yaml index c284f84e14..353c5e24b0 100644 --- a/data/custodian/NL-FR-DOK-L-BNF.yaml +++ b/data/custodian/NL-FR-DOK-L-BNF.yaml @@ -870,3 +870,10 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 1 +wikidata_enrichment: + wikidata_id: null + label: Bibliotheken Noord Fryslân + description: library in Dokkum, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-DOK-M-MDB.yaml b/data/custodian/NL-FR-DOK-M-MDB.yaml index 57aef30121..465afc2e32 100644 --- a/data/custodian/NL-FR-DOK-M-MDB.yaml +++ b/data/custodian/NL-FR-DOK-M-MDB.yaml @@ -73,3 +73,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q1863317 + wikidata_url: https://www.wikidata.org/wiki/Q1863317 + label: Museum Dokkum + description: museum in Friesland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: LinkedIn emic_name contains artifact "Naar beginpagina" (homepage link text) - actual name is Museum Dokkum diff --git a/data/custodian/NL-FR-GRO-M-MM-mineralogisch_museum.yaml b/data/custodian/NL-FR-GRO-M-MM-mineralogisch_museum.yaml index 5474bcd256..bdc45c9b5c 100644 --- a/data/custodian/NL-FR-GRO-M-MM-mineralogisch_museum.yaml +++ b/data/custodian/NL-FR-GRO-M-MM-mineralogisch_museum.yaml @@ -82,3 +82,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Grou, Friesland' +wikidata_enrichment: + wikidata_id: Q20622462 + wikidata_url: https://www.wikidata.org/wiki/Q20622462 + label: Mineralogisch Museum + description: museum in Grou, Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-FR-HAR-M-ODS.yaml b/data/custodian/NL-FR-HAR-M-ODS.yaml index 608d604f3b..00712b2553 100644 --- a/data/custodian/NL-FR-HAR-M-ODS.yaml +++ b/data/custodian/NL-FR-HAR-M-ODS.yaml @@ -117,3 +117,11 @@ provenance: notes: - Enriched from institutional website with verified address - Emic name corrected from English to Dutch +wikidata_enrichment: + wikidata_id: Q12012586 + wikidata_url: https://www.wikidata.org/wiki/Q12012586 + label: De Spitkeet + description: museum in Friesland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata uses short name "De Spitkeet", full name is "Openluchtmuseum De Spitkeet" diff --git a/data/custodian/NL-FR-HAR-M-OMS.yaml b/data/custodian/NL-FR-HAR-M-OMS.yaml index 7c6151c6ff..61209e859f 100644 --- a/data/custodian/NL-FR-HAR-M-OMS.yaml +++ b/data/custodian/NL-FR-HAR-M-OMS.yaml @@ -110,3 +110,12 @@ provenance: - Created from unmatched LinkedIn company profile - Location verified via web research - De Dunen 3, 9281 KT Harkema - Upgraded from NL-XX-XXX-M-OAMS to NL-FR-HAR-M-OMS + - POTENTIAL DUPLICATE - See also NL-FR-HAR-M-ODS.yaml (same museum) +wikidata_enrichment: + wikidata_id: Q12012586 + wikidata_url: https://www.wikidata.org/wiki/Q12012586 + label: De Spitkeet + description: museum in Friesland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata uses short name "De Spitkeet", full name is "Openluchtmuseum De Spitkeet" diff --git a/data/custodian/NL-FR-HIN-M-MH.yaml b/data/custodian/NL-FR-HIN-M-MH.yaml index 9a9174c11f..0e604559fa 100644 --- a/data/custodian/NL-FR-HIN-M-MH.yaml +++ b/data/custodian/NL-FR-HIN-M-MH.yaml @@ -92,3 +92,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q141355 + wikidata_url: https://www.wikidata.org/wiki/Q141355 + label: Museum Hindeloopen + description: museum in Súdwest-Fryslân + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-FR-KOL-M-MMA.yaml b/data/custodian/NL-FR-KOL-M-MMA.yaml index 362600f634..062f52c347 100644 --- a/data/custodian/NL-FR-KOL-M-MMA.yaml +++ b/data/custodian/NL-FR-KOL-M-MMA.yaml @@ -97,3 +97,10 @@ provenance: - Address verified via official website on 2025-12-17 - 'Previous location resolution method: UNRESOLVED' - 'Current location resolution method: VERIFIED_ADDRESS' +wikidata_enrichment: + wikidata_id: Q2447114 + wikidata_url: https://www.wikidata.org/wiki/Q2447114 + label: Kollumer Museum Mr. Andreae + description: Streekmuseum in Friesland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-FR-LEE-M-FLMD.yaml b/data/custodian/NL-FR-LEE-M-FLMD.yaml index 45eccf17d7..260bf8de83 100644 --- a/data/custodian/NL-FR-LEE-M-FLMD.yaml +++ b/data/custodian/NL-FR-LEE-M-FLMD.yaml @@ -79,3 +79,11 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - Location enriched from institution name on 2025-12-16 +wikidata_enrichment: + wikidata_id: Q2869457 + wikidata_url: https://www.wikidata.org/wiki/Q2869457 + label: Frysk Letterkundich Museum en Dokumintaasjesintrum + description: voormalig museum in Leeuwarden + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata indicates this is a former museum ("voormalig museum") diff --git a/data/custodian/NL-FR-LEE-M-LML.yaml b/data/custodian/NL-FR-LEE-M-LML.yaml index 147ea1da93..82e60987dd 100644 --- a/data/custodian/NL-FR-LEE-M-LML.yaml +++ b/data/custodian/NL-FR-LEE-M-LML.yaml @@ -77,3 +77,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: The Living Museum Leeuwarden + description: museum in Leeuwarden, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-LEE-M-SMF.yaml b/data/custodian/NL-FR-LEE-M-SMF.yaml index 0b4c3261ed..93ef45e557 100644 --- a/data/custodian/NL-FR-LEE-M-SMF.yaml +++ b/data/custodian/NL-FR-LEE-M-SMF.yaml @@ -114,3 +114,10 @@ provenance: - Created from unmatched LinkedIn company profile - Enriched with location and contact details from official website - Location resolved via GeoNames to Leeuwarden, Friesland +wikidata_enrichment: + wikidata_id: null + label: Scouting Museum Fryslân + description: museum in Leeuwarden, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-LEM-M-ML.yaml b/data/custodian/NL-FR-LEM-M-ML.yaml index ca094224db..ca112aa8ec 100644 --- a/data/custodian/NL-FR-LEM-M-ML.yaml +++ b/data/custodian/NL-FR-LEM-M-ML.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum Lemmer + description: museum in Lemmer, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-LEM-M-MSS.yaml b/data/custodian/NL-FR-LEM-M-MSS.yaml index 721b551452..a5be4f0b4d 100644 --- a/data/custodian/NL-FR-LEM-M-MSS.yaml +++ b/data/custodian/NL-FR-LEM-M-MSS.yaml @@ -76,3 +76,11 @@ identifiers: - identifier_scheme: Wikidata identifier_value: Q13137168 identifier_url: https://www.wikidata.org/wiki/Q13137168 +wikidata_enrichment: + wikidata_id: Q13137168 + wikidata_url: https://www.wikidata.org/wiki/Q13137168 + label: Museum Sloten + description: museum in Sloten + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Museum Stedhûs Sleat is the Frisian name; also known as Museum Sloten diff --git a/data/custodian/NL-FR-LEM-M-TLIMM.yaml b/data/custodian/NL-FR-LEM-M-TLIMM.yaml index a4a4bb8424..69b72f92e3 100644 --- a/data/custodian/NL-FR-LEM-M-TLIMM.yaml +++ b/data/custodian/NL-FR-LEM-M-TLIMM.yaml @@ -128,3 +128,10 @@ provenance: - Private museum dedicated to Indian motorcycles - Operated by Tony "Indian" Leenes - LinkedIn follower count of 5.6M is anomalous data error +wikidata_enrichment: + wikidata_id: null + label: Tony Leenes Indian Motorcycle Museum + description: private motorcycle museum in Lemmer, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-MOD-M-MF.yaml b/data/custodian/NL-FR-MOD-M-MF.yaml index a5b2be4612..29ee67db16 100644 --- a/data/custodian/NL-FR-MOD-M-MF.yaml +++ b/data/custodian/NL-FR-MOD-M-MF.yaml @@ -124,3 +124,11 @@ provenance: - Created from unmatched LinkedIn company profile - 'Original location resolution method: PROVINCE_FROM_CITY_FIELD' - '2025-12-20: City resolved XXX→MOD, website corrected, KvK and parent organization added based on official website research' +wikidata_enrichment: + wikidata_id: Q7477577 + wikidata_url: https://www.wikidata.org/wiki/Q7477577 + label: Museum 't Fiskershúske + description: bouwwerk in Dongeradeel + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Open-air museum about traditional Wadden Sea fishing culture diff --git a/data/custodian/NL-FR-RIE-M-RPM.yaml b/data/custodian/NL-FR-RIE-M-RPM.yaml index 8a563dcfc1..0412d25d8f 100644 --- a/data/custodian/NL-FR-RIE-M-RPM.yaml +++ b/data/custodian/NL-FR-RIE-M-RPM.yaml @@ -73,3 +73,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q2743903 + wikidata_url: https://www.wikidata.org/wiki/Q2743903 + label: Rien Poortvlietmuseum + description: museum over de beeldend kunstenaar Rien Poortvliet + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Museum dedicated to artist Rien Poortvliet; location in GHCID may be incorrect (Rien is a village, not where this museum is) diff --git a/data/custodian/NL-FR-RIE-M-RPMK.yaml b/data/custodian/NL-FR-RIE-M-RPMK.yaml index 7f797a52a1..0cf3252edb 100644 --- a/data/custodian/NL-FR-RIE-M-RPMK.yaml +++ b/data/custodian/NL-FR-RIE-M-RPMK.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Stichting Rien Poortvliet Museum Korendijk + description: foundation for Rien Poortvliet museum + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No separate Wikidata entry for the foundation; may be related to Q2743903 (Rien Poortvlietmuseum) diff --git a/data/custodian/NL-FR-RYP-M-SM-stihl_museum.yaml b/data/custodian/NL-FR-RYP-M-SM-stihl_museum.yaml index ec0b656c4f..dfd8f128c4 100644 --- a/data/custodian/NL-FR-RYP-M-SM-stihl_museum.yaml +++ b/data/custodian/NL-FR-RYP-M-SM-stihl_museum.yaml @@ -105,3 +105,10 @@ provenance: - Location resolved from institutional website showing address in Ryptsjerk - Private museum run by Siem Terpstra, visits by appointment only - Not affiliated with official STIHL Brand World in Germany +wikidata_enrichment: + wikidata_id: null + label: Stihl Museum + description: private chainsaw museum in Ryptsjerk, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this private museum diff --git a/data/custodian/NL-FR-TER-M-BMT.yaml b/data/custodian/NL-FR-TER-M-BMT.yaml index a942f9ab50..db9c89dad4 100644 --- a/data/custodian/NL-FR-TER-M-BMT.yaml +++ b/data/custodian/NL-FR-TER-M-BMT.yaml @@ -76,3 +76,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - Location enriched from institution name on 2025-12-16 +wikidata_enrichment: + wikidata_id: Q79317785 + wikidata_url: https://www.wikidata.org/wiki/Q79317785 + label: Bunker Museum Terschelling + description: museum op Terschelling + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-FR-WAR-M-MW.yaml b/data/custodian/NL-FR-WAR-M-MW.yaml index ba9c3ee042..b4eaeb23d8 100644 --- a/data/custodian/NL-FR-WAR-M-MW.yaml +++ b/data/custodian/NL-FR-WAR-M-MW.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: null + label: Museum Warten + description: museum in Warten, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-FR-WOL-L-BW.yaml b/data/custodian/NL-FR-WOL-L-BW.yaml index dcb4007137..6f5e8eae29 100644 --- a/data/custodian/NL-FR-WOL-L-BW.yaml +++ b/data/custodian/NL-FR-WOL-L-BW.yaml @@ -536,8 +536,13 @@ ch_annotator: integrated_from: netherlands_complete_ch_annotator.yaml integration_date: '2025-12-06T23:20:45.782206+00:00' match_type: name -wikidata_enrichment_status: NOT_FOUND -wikidata_search_timestamp: '2025-12-08T08:52:13.003305+00:00' +wikidata_enrichment: + wikidata_id: null + label: Bibliotheek Wolvega + description: library in Wolvega, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution. Previous search 2025-12-08 also not found. location: latitude: 52.8771493 longitude: 6.0055689 diff --git a/data/custodian/NL-FR-WOR-M-JH.yaml b/data/custodian/NL-FR-WOR-M-JH.yaml index 137a3f40d7..90d9944891 100644 --- a/data/custodian/NL-FR-WOR-M-JH.yaml +++ b/data/custodian/NL-FR-WOR-M-JH.yaml @@ -82,3 +82,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Location resolution method: UNRESOLVED' - 'Location enriched on 2025-12-17 via Exa web search: Workum, Friesland' +wikidata_enrichment: + wikidata_id: Q2786191 + wikidata_url: https://www.wikidata.org/wiki/Q2786191 + label: Jopie Huisman Museum + description: museum in Súdwest-Fryslân + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-AAL-M-SMK.yaml b/data/custodian/NL-GE-AAL-M-SMK.yaml index 59cc1f254d..16680bf3f9 100644 --- a/data/custodian/NL-GE-AAL-M-SMK.yaml +++ b/data/custodian/NL-GE-AAL-M-SMK.yaml @@ -114,3 +114,10 @@ provenance: - Location resolved via GeoNames to Aalten, Gelderland - LinkedIn website shortlink (lnkd.in/ezz5r9nF) is broken - Museum may be for sale per saabmuseumforsale.nl +wikidata_enrichment: + wikidata_id: null + label: Saab Museum Kempink + description: private automobile museum in Aalten, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-APE-E-K.yaml b/data/custodian/NL-GE-APE-E-K.yaml index 96f335a265..79141d492d 100644 --- a/data/custodian/NL-GE-APE-E-K.yaml +++ b/data/custodian/NL-GE-APE-E-K.yaml @@ -241,3 +241,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: Q26258118 + wikidata_url: https://www.wikidata.org/wiki/Q26258118 + label: Kadaster + description: zelfstandig bestuursorgaan in Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup diff --git a/data/custodian/NL-GE-APE-M-A.yaml b/data/custodian/NL-GE-APE-M-A.yaml index 6be06f0c19..b0063d3309 100644 --- a/data/custodian/NL-GE-APE-M-A.yaml +++ b/data/custodian/NL-GE-APE-M-A.yaml @@ -213,6 +213,14 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: PROVINCE_FROM_CITY_FIELD' +wikidata_enrichment: + wikidata_id: Q618187 + wikidata_url: https://www.wikidata.org/wiki/Q618187 + label: Apenheul + description: dierentuin in Nederland + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata ID already present in identifiers section identifiers: - identifier_scheme: Wikidata identifier_value: Q618187 diff --git a/data/custodian/NL-GE-APE-M-MBB.yaml b/data/custodian/NL-GE-APE-M-MBB.yaml index c811fdde91..9a2504cf9f 100644 --- a/data/custodian/NL-GE-APE-M-MBB.yaml +++ b/data/custodian/NL-GE-APE-M-MBB.yaml @@ -145,3 +145,10 @@ provenance: - Created from unmatched LinkedIn company profile - 'Original location resolution method: PROVINCE_FROM_CITY_FIELD' - '2025-12-20: Province corrected FL→GE, city resolved XXX→APE. Distributed museum with visitor locations documented separately.' +wikidata_enrichment: + wikidata_id: null + label: Museum Bescherming Bevolking + description: civil protection museum in Apeldoorn, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-APE-M-MZH.yaml b/data/custodian/NL-GE-APE-M-MZH.yaml index 51b088c197..5a4858bf4b 100644 --- a/data/custodian/NL-GE-APE-M-MZH.yaml +++ b/data/custodian/NL-GE-APE-M-MZH.yaml @@ -97,3 +97,10 @@ provenance: - Location verified via web search - museum website confirms Schotweg 63, 7312 AB Apeldoorn - Small private museum, open by appointment only - Founded 2001 by Jan Bark +wikidata_enrichment: + wikidata_id: null + label: Museum in de Zevende Hemel + description: private museum in Apeldoorn, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ARN-A-SIFA.yaml b/data/custodian/NL-GE-ARN-A-SIFA.yaml index 614c98db1f..0d1a791b39 100644 --- a/data/custodian/NL-GE-ARN-A-SIFA.yaml +++ b/data/custodian/NL-GE-ARN-A-SIFA.yaml @@ -192,3 +192,10 @@ timespan: sources: - 'Linkup web search: https://www.yumpu.com/nl/document/view/31673067/stichting-indisch-thee-indisch-thee-familie-archief' notes: 'Found via pattern: full_date_nl' +wikidata_enrichment: + wikidata_id: null + label: Stichting Indisch Familie Archief + description: archive in Arnhem, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ARN-I-GT.yaml b/data/custodian/NL-GE-ARN-I-GT.yaml index 897f8e755b..3d68db9728 100644 --- a/data/custodian/NL-GE-ARN-I-GT.yaml +++ b/data/custodian/NL-GE-ARN-I-GT.yaml @@ -315,3 +315,10 @@ logo_enrichment: has_favicon: true has_og_image: true favicon_count: 3 +wikidata_enrichment: + wikidata_id: null + label: Gemeente Texel + description: intangible heritage custodian in Den Burg, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Q9966 (Texel island/municipality) not used - describes geographic entity, not the heritage custodian function diff --git a/data/custodian/NL-GE-ARN-I-KIEN.yaml b/data/custodian/NL-GE-ARN-I-KIEN.yaml index 8033a45b94..77213a8198 100644 --- a/data/custodian/NL-GE-ARN-I-KIEN.yaml +++ b/data/custodian/NL-GE-ARN-I-KIEN.yaml @@ -255,3 +255,10 @@ timespan: sources: - 'Linkup web search: https://www.immaterieelerfgoed.nl/nl/kenniscentrum' notes: 'Found via pattern: sinds' +wikidata_enrichment: + wikidata_id: null + label: Kenniscentrum Immaterieel Erfgoed Nederland + description: intangible heritage knowledge center in Arnhem, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ARN-I-S-de_stoelenmatter.yaml b/data/custodian/NL-GE-ARN-I-S-de_stoelenmatter.yaml index c2638c5a07..b53c0c9a90 100644 --- a/data/custodian/NL-GE-ARN-I-S-de_stoelenmatter.yaml +++ b/data/custodian/NL-GE-ARN-I-S-de_stoelenmatter.yaml @@ -380,3 +380,10 @@ logo_enrichment: has_favicon: true has_og_image: false favicon_count: 1 +wikidata_enrichment: + wikidata_id: null + label: De Stoelenmatter + description: intangible heritage custodian (chair caning) in Zundert, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ARN-M-HMGJ.yaml b/data/custodian/NL-GE-ARN-M-HMGJ.yaml index d4d98f545f..1c7c5fa01c 100644 --- a/data/custodian/NL-GE-ARN-M-HMGJ.yaml +++ b/data/custodian/NL-GE-ARN-M-HMGJ.yaml @@ -70,3 +70,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: Historisch Museum Grenadiers en Jagers + description: museum in Arnhem, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ARN-M-MMKA.yaml b/data/custodian/NL-GE-ARN-M-MMKA.yaml index 45943f2f98..c40d46bfe2 100644 --- a/data/custodian/NL-GE-ARN-M-MMKA.yaml +++ b/data/custodian/NL-GE-ARN-M-MMKA.yaml @@ -70,3 +70,11 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' +wikidata_enrichment: + wikidata_id: Q2114028 + wikidata_url: https://www.wikidata.org/wiki/Q2114028 + label: Museum Arnhem + description: museum in Arnhem + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: Wikidata label is "Museum Arnhem" (current name), LinkedIn name "Museum voor Moderne Kunst Arnhem" may be former name diff --git a/data/custodian/NL-GE-ARN-M-TBM.yaml b/data/custodian/NL-GE-ARN-M-TBM.yaml index bf50c8bf0c..26d8e6c6f6 100644 --- a/data/custodian/NL-GE-ARN-M-TBM.yaml +++ b/data/custodian/NL-GE-ARN-M-TBM.yaml @@ -123,3 +123,10 @@ provenance: - Only trolleybus museum in the Netherlands - Arnhem is the only Dutch city with trolleybus service - Located at Connexxion bus depot +wikidata_enrichment: + wikidata_id: null + label: Trolley-Bus Museum + description: museum in Arnhem, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-ARN-M-VVMA.yaml b/data/custodian/NL-GE-ARN-M-VVMA.yaml index ae5fddb6a3..c8834bcbe1 100644 --- a/data/custodian/NL-GE-ARN-M-VVMA.yaml +++ b/data/custodian/NL-GE-ARN-M-VVMA.yaml @@ -71,3 +71,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: Vereniging Vrienden Museum Arnhem + description: museum friends association in Arnhem, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-BAR-M-MGG.yaml b/data/custodian/NL-GE-BAR-M-MGG.yaml index e219e514b4..79d4e30098 100644 --- a/data/custodian/NL-GE-BAR-M-MGG.yaml +++ b/data/custodian/NL-GE-BAR-M-MGG.yaml @@ -84,3 +84,10 @@ provenance: - Reverted incorrect location enrichment on 2025-12-17 - LinkedIn HTML extraction was extracting wrong company's data - 'Location enriched on 2025-12-17 via Exa web search: Barneveld, Gelderland' +wikidata_enrichment: + wikidata_id: null + label: Museum De Grote Glind + description: museum in Barneveld, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/data/custodian/NL-GE-CEN-R-CFMCIRZUB.yaml b/data/custodian/NL-GE-CEN-R-CFMCIRZUB.yaml index 843497400b..f6afabbb8c 100644 --- a/data/custodian/NL-GE-CEN-R-CFMCIRZUB.yaml +++ b/data/custodian/NL-GE-CEN-R-CFMCIRZUB.yaml @@ -157,3 +157,12 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: CITY_INFERRED_FROM_NAME' + - 'DATA QUALITY ISSUE: This is a German institution (University of Bremen) incorrectly placed in Netherlands files' +wikidata_enrichment: + wikidata_id: Q191230 + wikidata_url: https://www.wikidata.org/wiki/Q191230 + label: Centre for Media, Communication and Information Research + description: research center at the University of Bremen + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: WARNING - This is a German institution at University of Bremen, NOT a Netherlands institution. File is incorrectly placed in NL collection. diff --git a/data/custodian/NL-GE-CUL-M-JRM.yaml b/data/custodian/NL-GE-CUL-M-JRM.yaml index 1b79b1e5b5..ecda2829ff 100644 --- a/data/custodian/NL-GE-CUL-M-JRM.yaml +++ b/data/custodian/NL-GE-CUL-M-JRM.yaml @@ -77,3 +77,10 @@ provenance: notes: - Created from unmatched LinkedIn company profile - 'Location resolution method: GEONAMES_LOOKUP' +wikidata_enrichment: + wikidata_id: null + label: Jan van Riebeeck Museum + description: museum in Culemborg, Netherlands + enrichment_timestamp: '2025-01-13T00:00:00Z' + enrichment_method: manual_wikidata_lookup + notes: No Wikidata entry found for this institution diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index b77f85a747..a8ea872d2f 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,5 +1,5 @@ { - "generated": "2025-12-23T14:47:28.725Z", + "generated": "2025-12-23T16:58:31.474Z", "version": "1.0.0", "categories": [ { diff --git a/frontend/src/components/uml/CustodianTypeIndicator3D.tsx b/frontend/src/components/uml/CustodianTypeIndicator3D.tsx index 1a91d4bf2e..5707191f59 100644 --- a/frontend/src/components/uml/CustodianTypeIndicator3D.tsx +++ b/frontend/src/components/uml/CustodianTypeIndicator3D.tsx @@ -826,6 +826,9 @@ export const CustodianTypeIndicator3D: React.FC = // Face click handling (raycasting) - only for onFaceClick callback const handleClick = useCallback((e: React.MouseEvent) => { + // Prevent event from bubbling up to parent elements (important for navigation stability) + e.stopPropagation(); + // If expandable, toggle expansion if (expandable) { handleExpansionClick(); diff --git a/frontend/src/pages/LinkMLViewerPage.tsx b/frontend/src/pages/LinkMLViewerPage.tsx index 5ce59f25df..342f7e9bc4 100644 --- a/frontend/src/pages/LinkMLViewerPage.tsx +++ b/frontend/src/pages/LinkMLViewerPage.tsx @@ -748,9 +748,12 @@ const LinkMLViewerPage: React.FC = () => { }); }, []); - // Handle URL parameters for deep linking - const handleUrlParams = useCallback((cats: SchemaCategory[]) => { - const classParam = searchParams.get('class'); + // Track if initialization has already happened (prevents re-init on URL param changes) + const isInitializedRef = useRef(false); + + // Handle URL parameters for deep linking (only used on initial mount) + const handleUrlParams = useCallback((cats: SchemaCategory[], currentSearchParams: URLSearchParams) => { + const classParam = currentSearchParams.get('class'); if (classParam) { setHighlightedClass(classParam); @@ -766,10 +769,18 @@ const LinkMLViewerPage: React.FC = () => { setExpandedSections(prev => new Set([...prev, 'classes'])); } } - }, [searchParams]); + }, []); - // Initialize schema file list from manifest + // Initialize schema file list from manifest - RUNS ONLY ONCE on mount + // Note: Does NOT depend on searchParams to prevent re-initialization when + // custodian filter changes the URL. Deep linking for ?class= is handled + // by reading searchParams directly inside the effect on initial mount only. useEffect(() => { + // Skip if already initialized (prevents re-init on searchParams changes from filter) + if (isInitializedRef.current) { + return; + } + const initializeSchemas = async () => { setIsLoading(true); try { @@ -783,14 +794,17 @@ const LinkMLViewerPage: React.FC = () => { setCategories(cats); - // Check URL params first for deep linking - handleUrlParams(cats); + // Check URL params for deep linking (read searchParams directly, don't depend on it) + handleUrlParams(cats, searchParams); // Select main schema by default if no URL param set the schema const classParam = searchParams.get('class'); if (!classParam && cats[0]?.files.length > 0) { setSelectedSchema(cats[0].files[0]); } + + // Mark as initialized to prevent re-running + isInitializedRef.current = true; } catch (err) { setError(t('failedToInit')); console.error(err); @@ -800,7 +814,8 @@ const LinkMLViewerPage: React.FC = () => { }; initializeSchemas(); - }, [handleUrlParams, searchParams]); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); // Empty deps - run only on mount // Scroll to highlighted class when it changes useEffect(() => { diff --git a/scripts/sync/oxigraph_person_sync.py b/scripts/sync/oxigraph_person_sync.py index efa638b611..6f00d7b1f1 100644 --- a/scripts/sync/oxigraph_person_sync.py +++ b/scripts/sync/oxigraph_person_sync.py @@ -334,8 +334,11 @@ class PersonRDFConverter: if ht: self.graph.add((person_uri, HC.heritageType, Literal(ht))) - # Link to custodian (employer) - custodian_name = source_staff.get('custodian') + # Link to custodian (employer) - check multiple schema locations + custodian_name = ( + source_staff.get('custodian') or + data.get('network_context', {}).get('source_custodian') + ) if custodian_name: custodian_uri = self._resolve_custodian_uri(custodian_name) if custodian_uri: @@ -345,8 +348,9 @@ class PersonRDFConverter: # Also store as literal for cases where lookup fails self.graph.add((person_uri, HC.custodianName, Literal(custodian_name))) - # Skills (handle null values) - for skill in (profile_data.get('skills') or []): + # Skills (handle null values) - check multiple schema locations + skills = profile_data.get('skills') or data.get('skills') or [] + for skill in skills: if skill: self.graph.add((person_uri, SCHEMA.knowsAbout, Literal(skill))) @@ -359,8 +363,15 @@ class PersonRDFConverter: if lang_name: self.graph.add((person_uri, SCHEMA.knowsLanguage, Literal(lang_name))) - # Experience (as structured data) - handle both 'experience' and 'career_history' keys - experience = profile_data.get('experience') or profile_data.get('career_history') or [] + # Experience (as structured data) - handle multiple schema variants: + # 1. profile_data.experience / profile_data.career_history (standard) + # 2. professional_experience (alternative schema) + experience = ( + profile_data.get('experience') or + profile_data.get('career_history') or + data.get('professional_experience') or + [] + ) if experience: for i, exp in enumerate(experience): if not isinstance(exp, dict): @@ -371,8 +382,10 @@ class PersonRDFConverter: if exp.get('title'): self.graph.add((exp_node, SCHEMA.roleName, Literal(exp['title']))) - if exp.get('company'): - self.graph.add((exp_node, SCHEMA.name, Literal(exp['company']))) + # Handle both 'company' (Schema A) and 'organization' (Schema B) + company = exp.get('company') or exp.get('organization') + if company: + self.graph.add((exp_node, SCHEMA.name, Literal(company))) # Handle both date_range and duration fields, also construct from start_date/end_date duration = exp.get('date_range') or exp.get('duration') @@ -393,7 +406,8 @@ class PersonRDFConverter: self.graph.add((person_uri, SCHEMA.hasOccupation, exp_node)) # Education (handle null and both schema variants) - education = profile_data.get('education') or [] + # Schema A: profile_data.education, Schema B: root education + education = profile_data.get('education') or data.get('education') or [] if education: for edu in education: if not isinstance(edu, dict): @@ -776,12 +790,16 @@ class OxigraphPersonSyncer(BaseSyncer): if uri: processed += 1 else: + self.logger.debug(f"No person extracted from entity: {filepath.name}") failed += 1 else: # staff uris = self.converter.add_persons_from_staff_list(data, filepath) - if uris: + # Empty staff list is not a failure - it's just a file with no persons + # (e.g., LinkedIn company page with 0 visible employees) + if uris is not None: # Explicitly check for None (error) vs [] (empty) processed += 1 else: + self.logger.debug(f"Failed to process staff file: {filepath.name}") failed += 1 except json.JSONDecodeError as e: