enrich: logo enrichment progress (CZ: 220, JP: 1600)

This commit is contained in:
kempersc 2025-12-23 22:08:43 +01:00
parent 4f6ca92084
commit ce1f80d024
48 changed files with 1598 additions and 125 deletions

View file

@ -7056,7 +7056,297 @@
"CZ-10-PRA-A-AUMAVESPZ.yaml",
"CZ-10-PRA-A-AUPZSI.yaml",
"CZ-10-PRA-A-AZMVP.yaml",
"CZ-10-PRA-A-BAKPR.yaml"
"CZ-10-PRA-A-BAKPR.yaml",
"JP-10-MAE-L-GPAL.yaml",
"JP-10-MAE-L-GPCSC.yaml",
"JP-10-MAE-L-GPL.yaml",
"JP-10-MAE-L-GPLES.yaml",
"JP-10-MAE-L-GUHWL.yaml",
"JP-10-MAE-L-LGPCHS.yaml",
"JP-10-MAE-L-LITCGU.yaml",
"JP-10-MAE-L-M.yaml",
"JP-10-MAE-L-MCL.yaml",
"JP-10-MAE-L-MKGCL.yaml",
"JP-10-MAE-L-ML-maebashikokadaigakufuzoku_library.yaml",
"JP-10-MAE-L-ML.yaml",
"JP-10-MAE-L-MLE.yaml",
"JP-10-MAE-L-MLF.yaml",
"JP-10-MAE-L-MLH.yaml",
"JP-10-MAE-L-MLJ.yaml",
"JP-10-MAE-L-MLK-maebashishiritsu_library_kaigayabunkan.yaml",
"JP-10-MAE-L-MLK-maebashishiritsu_library_kasukawabunkan.yaml",
"JP-10-MAE-L-MLK-maebashishiritsu_library_kiyosatobunkan.yaml",
"JP-10-MAE-L-MLK.yaml",
"JP-10-MAE-L-MLM-maebashishiritsu_library_miyagibunkan.yaml",
"JP-10-MAE-L-MLM.yaml",
"JP-10-MAE-L-MLN.yaml",
"JP-10-MAE-L-MLO.yaml",
"JP-10-MAE-L-MLS-maebashishiritsu_library_sogokyoikupurazabunkan.yaml",
"JP-10-MAE-L-MLS-maebashishiritsu_library_sojabunkan.yaml",
"JP-10-MAE-L-MLS.yaml",
"JP-10-MAE-L-NITGCL.yaml",
"JP-10-MAE-M-ABCFM.yaml",
"JP-10-MAE-M-AM.yaml",
"JP-10-MAE-M-GSMC.yaml",
"JP-10-MAE-M-MCCSCC.yaml",
"JP-10-MAE-M-MCML.yaml",
"JP-10-MAE-M-MCSYM.yaml",
"JP-10-MAE-M-MSKMH.yaml",
"JP-10-MAE-M-MSMH.yaml",
"JP-10-MID-L-KL.yaml",
"JP-10-MID-L-M.yaml",
"JP-10-MID-L-ML-midorishiritsuomama_library.yaml",
"JP-10-MID-L-ML.yaml",
"JP-10-MID-M-IM.yaml",
"JP-10-MID-M-LMCS.yaml",
"JP-10-MID-M-MCOM.yaml",
"JP-10-MOT-L-GL.yaml",
"JP-10-NUM-L-NCL.yaml",
"JP-10-NUM-M-NCHM.yaml",
"JP-10-NUM-M-U.yaml",
"JP-10-ORA-L-CL.yaml",
"JP-10-ORA-L-I.yaml",
"JP-10-ORA-L-MTL.yaml",
"JP-10-ORA-L-OL-orachoritsu_library.yaml",
"JP-10-ORA-L-OL.yaml",
"JP-10-ORA-L-TLIL.yaml",
"JP-10-ORA-M-OMERC.yaml",
"JP-10-OTA-L-AMLO.yaml",
"JP-10-OTA-L-GL.yaml",
"JP-10-OTA-L-KL.yaml",
"JP-10-OTA-L-OL-otashiritsunitta_library.yaml",
"JP-10-OTA-L-OL-otashiritsuojima_library.yaml",
"JP-10-OTA-L-OL-otashiritsuyabuzukahommachi_library.yaml",
"JP-10-OTA-L-OL.yaml",
"JP-10-OTA-M-AMLO.yaml",
"JP-10-OTA-M-EDTMJM.yaml",
"JP-10-OTA-M-GCSM.yaml",
"JP-10-OTA-M-JSI.yaml",
"JP-10-OTA-M-NHM.yaml",
"JP-10-OTA-M-THMM.yaml",
"JP-10-REN-M-TMA.yaml",
"JP-10-SAW-L-GPWSUL.yaml",
"JP-10-SAW-L-TTL.yaml",
"JP-10-SHI-L-S-shibukawashiakaborikominkantoshoshitsu.yaml",
"JP-10-SHI-L-S.yaml",
"JP-10-SHI-L-SCL.yaml",
"JP-10-SHI-L-SL.yaml",
"JP-10-SHI-M-HMA.yaml",
"JP-10-SHI-M-IAM.yaml",
"JP-10-SHI-M-SCAHM.yaml",
"JP-10-SHI-M-SCHHM.yaml",
"JP-10-SHI-M-SCMHKSM.yaml",
"JP-10-SHI-M-TRLM.yaml",
"CZ-10-PRA-A-BANUPKIB.yaml",
"CZ-10-PRA-A-BAVZ.yaml",
"CZ-10-PRA-A-EUACVV.yaml",
"CZ-10-PRA-A-MCPUMCOSA.yaml",
"CZ-10-PRA-A-MUAVCAUTGM.yaml",
"CZ-10-PRA-A-NA.yaml",
"CZ-10-PRA-A-NKPV.yaml",
"CZ-10-PRA-A-NMADTVS.yaml",
"CZ-10-PRA-A-NMAMDH.yaml",
"CZ-10-PRA-A-NTMAAS.yaml",
"CZ-10-PRA-A-NULK.yaml",
"CZ-10-PRA-A-OAZIMAM.yaml",
"CZ-10-PRA-A-SACTU.yaml",
"CZ-10-PRA-A-SACUZK.yaml",
"CZ-10-PRA-A-SAMCP.yaml",
"CZ-10-PRA-A-SANBU.yaml",
"CZ-10-PRA-A-SANUPKIB.yaml",
"CZ-10-PRA-A-SOAVP.yaml",
"CZ-10-PRA-A-UAZK.yaml",
"CZ-10-PRA-A-UDUKAUK.yaml",
"CZ-10-PRA-A-VACKOS.yaml",
"CZ-10-PRA-A-ZI.yaml",
"CZ-10-PRA-E-AGJSSPSJZS.yaml",
"CZ-10-PRA-E-DCKK.yaml",
"CZ-10-PRA-E-GPZK.yaml",
"CZ-10-PRA-E-JSHMPK.yaml",
"CZ-10-PRA-E-JVOSSPT.yaml",
"CZ-10-PRA-E-PKSK.yaml",
"CZ-10-PRA-E-SKICZSP.yaml",
"CZ-10-PRA-E-SPSJT.yaml",
"CZ-10-PRA-E-SPSSSHMPK.yaml",
"CZ-10-PRA-E-SZSK.yaml",
"CZ-10-PRA-E-UK.yaml",
"CZ-10-PRA-E-VOSCSRKS.yaml",
"CZ-10-PRA-E-VOSISSSEMI.yaml",
"CZ-10-PRA-E-VOSONSPSOP.yaml",
"CZ-10-PRA-E-VOSSOSPGEK.yaml",
"CZ-10-PRA-E-VOSSPSDK.yaml",
"CZ-10-PRA-E-VOSSSPSSSS.yaml",
"CZ-10-PRA-E-VOSTRSUSTR.yaml",
"CZ-10-PRA-E-VOSUSUS.yaml",
"CZ-10-PRA-E-VOSZSZSVPA.yaml",
"CZ-10-PRA-E-ZKSKSC.yaml",
"CZ-10-PRA-E-ZSHSK.yaml",
"CZ-10-PRA-G-CFGR.yaml",
"CZ-10-PRA-G-GHMPC.yaml",
"CZ-10-PRA-G-NGVPK.yaml",
"CZ-10-PRA-H-BASVSMK.yaml",
"CZ-10-PRA-H-BDCSAP.yaml",
"CZ-10-PRA-H-BPK.yaml",
"CZ-10-PRA-H-CCHUURUAM.yaml",
"CZ-10-PRA-H-CKACKK.yaml",
"CZ-10-PRA-H-CPRSAFSTVP-ceska_provincie_radu_sv_augustina_farnost_sv_tomas.yaml",
"CZ-10-PRA-H-CPRSAFSTVP.yaml",
"CZ-10-PRA-H-ETSVOSTSK.yaml",
"CZ-10-PRA-H-HITS.yaml",
"CZ-10-PRA-H-KDSDMSJK.yaml",
"CZ-10-PRA-H-KKPSSK.yaml",
"CZ-10-PRA-H-MBTSK.yaml",
"CZ-10-PRA-H-PBFK.yaml",
"CZ-10-PRA-H-ZOVPK.yaml",
"CZ-10-PRA-L-A-aritma.yaml",
"CZ-10-PRA-L-A.yaml",
"CZ-10-PRA-L-AAVSK.yaml",
"CZ-10-PRA-L-AC.yaml",
"CZ-10-PRA-L-ACRSRK.yaml",
"CZ-10-PRA-L-ACSRK.yaml",
"CZ-10-PRA-L-ACVUK.yaml",
"CZ-10-PRA-L-AHS.yaml",
"CZ-10-PRA-L-AHUV.yaml",
"CZ-10-PRA-L-AMSK.yaml",
"CZ-10-PRA-L-AMUVPKA.yaml",
"CZ-10-PRA-L-ANAC-academia_nakladatelstvi_av_cr.yaml",
"CZ-10-PRA-L-ANAC.yaml",
"CZ-10-PRA-L-AP.yaml",
"CZ-10-PRA-L-APS.yaml",
"CZ-10-PRA-L-APSR.yaml",
"CZ-10-PRA-L-APSRSPKDTK.yaml",
"CZ-10-PRA-L-AS.yaml",
"CZ-10-PRA-L-ASPK.yaml",
"CZ-10-PRA-L-ASR-acidotechna_sro.yaml",
"CZ-10-PRA-L-ASR.yaml",
"CZ-10-PRA-L-ATPSK.yaml",
"CZ-10-PRA-L-AUACPVVK.yaml",
"CZ-10-PRA-L-AVCAUPP-akademie_ved_cr_astronomicky_ustav_pobocka_praha.yaml",
"CZ-10-PRA-L-AVCAUPP.yaml",
"CZ-10-PRA-L-AVCEI.yaml",
"CZ-10-PRA-L-AVCFU-akademie_ved_cr_farmakologicky_ustav.yaml",
"CZ-10-PRA-L-AVCFU.yaml",
"CZ-10-PRA-L-AVCLEB.yaml",
"CZ-10-PRA-L-AVCMU-akademie_ved_cr_mikrobiologicky_ustav.yaml",
"CZ-10-PRA-L-AVCMU.yaml",
"CZ-10-PRA-L-AVCPLS-akademie_ved_cr_patentove_a_licencni_sluzby.yaml",
"CZ-10-PRA-L-AVCPLS.yaml",
"CZ-10-PRA-L-AVCPU.yaml",
"CZ-10-PRA-L-AVCPUJK.yaml",
"CZ-10-PRA-L-AVCUAC.yaml",
"CZ-10-PRA-L-AVCUFM.yaml",
"CZ-10-PRA-L-AVCUFR.yaml",
"CZ-10-PRA-L-AVCUMG-akademie_ved_cr_ustav_molekularni_genetiky.yaml",
"CZ-10-PRA-L-AVCUMG.yaml",
"CZ-10-PRA-L-AVCUPCSLON.yaml",
"CZ-10-PRA-L-AVCUPEK-akademie_ved_cr_ustav_pro_elektrotechniku_knihovna.yaml",
"CZ-10-PRA-L-AVCUPEK.yaml",
"CZ-10-PRA-L-AVCVD-akademie_ved_cr_vyvojove_dilny.yaml",
"CZ-10-PRA-L-AVCVD.yaml",
"CZ-10-PRA-L-AVSSK.yaml",
"CZ-10-PRA-L-AVUVPK.yaml",
"CZ-10-PRA-L-AZN.yaml",
"CZ-10-PRA-L-B.yaml",
"CZ-10-PRA-L-BL.yaml",
"CZ-10-PRA-L-BLZ.yaml",
"CZ-10-PRA-L-BMSSR.yaml",
"CZ-10-PRA-L-BOPS.yaml",
"CZ-10-PRA-L-BPSRPNM.yaml",
"CZ-10-PRA-L-BS.yaml",
"CZ-10-PRA-L-BTS.yaml",
"CZ-10-PRA-L-BVCRSR.yaml",
"CZ-10-PRA-L-BZHMPK.yaml",
"CZ-10-PRA-L-BZVPM.yaml",
"CZ-10-PRA-L-C.yaml",
"CZ-10-PRA-L-CA.yaml",
"CZ-10-PRA-L-CAPOC.yaml",
"CZ-10-PRA-L-CAS.yaml",
"CZ-10-PRA-L-CBSK.yaml",
"CZ-10-PRA-L-CBSPAC.yaml",
"CZ-10-PRA-L-CBUUER.yaml",
"CZ-10-PRA-L-CCS.yaml",
"CZ-10-PRA-L-CDOPES.yaml",
"CZ-10-PRA-L-CDS.yaml",
"CZ-10-PRA-L-CDSCLS.yaml",
"CZ-10-PRA-L-CDSCTS.yaml",
"CZ-10-PRA-L-CDSS.yaml",
"CZ-10-PRA-L-CDSVZPOKV.yaml",
"CZ-10-PRA-L-CDZUKCD.yaml",
"CZ-10-PRA-L-CEKJK.yaml",
"CZ-10-PRA-L-CES.yaml",
"CZ-10-PRA-L-CEU.yaml",
"CZ-10-PRA-L-CEUEML.yaml",
"CZ-10-PRA-L-CFU.yaml",
"CZ-10-PRA-L-CFUPVVSV.yaml",
"CZ-10-PRA-L-CGSK.yaml",
"CZ-10-PRA-L-CGUMS.yaml",
"CZ-10-PRA-L-CHVDISK.yaml",
"CZ-10-PRA-L-CKCJ.yaml",
"CZ-10-PRA-L-CKP.yaml",
"CZ-10-PRA-L-CKPAE.yaml",
"CZ-10-PRA-L-CKPPVH.yaml",
"CZ-10-PRA-L-CKS.yaml",
"CZ-10-PRA-L-CKTATZP.yaml",
"CZ-10-PRA-L-CLEO.yaml",
"CZ-10-PRA-L-CLPRSR.yaml",
"CZ-10-PRA-L-CLS.yaml",
"CZ-10-PRA-L-CLVNC.yaml",
"CZ-10-PRA-L-CLVVZ.yaml",
"CZ-10-PRA-L-CMJZS.yaml",
"CZ-10-PRA-L-CNES.yaml",
"CZ-10-PRA-L-CNPS.yaml",
"CZ-10-PRA-L-COL.yaml",
"CZ-10-PRA-L-COPK.yaml",
"CZ-10-PRA-L-COTS.yaml",
"CZ-10-PRA-L-CP.yaml",
"CZ-10-PRA-L-CPDSPPS.yaml",
"CZ-10-PRA-L-CPP.yaml",
"CZ-10-PRA-L-CPRPPP.yaml",
"CZ-10-PRA-L-CPS-ceska_pojistovna_as.yaml",
"CZ-10-PRA-L-CPS.yaml",
"CZ-10-PRA-L-CPSPOZSC.yaml",
"CZ-10-PRA-L-CPSR.yaml",
"CZ-10-PRA-L-CPSVSVV.yaml",
"CZ-10-PRA-L-CPU.yaml",
"CZ-10-PRA-L-CRS.yaml",
"CZ-10-PRA-L-CS-cedok_as.yaml",
"CZ-10-PRA-L-CS-ceskoslovensky_spisovatel.yaml",
"CZ-10-PRA-L-CS-cetos_as.yaml",
"CZ-10-PRA-L-CS-chemapol_as.yaml",
"CZ-10-PRA-L-CS-cokoladovny_as.yaml",
"CZ-10-PRA-L-CS.yaml",
"CZ-10-PRA-L-CSBZSUV-cesky_svaz_bojovniku_za_svobodu_ustredni_vybor.yaml",
"CZ-10-PRA-L-CSBZSUV.yaml",
"JP-10-SHI-M-TYIM.yaml",
"JP-10-TAK-F-KS.yaml",
"JP-10-TAK-L-GPCL.yaml",
"JP-10-TAK-L-IUIJCL.yaml",
"JP-10-TAK-L-JLB.yaml",
"JP-10-TAK-L-NGJCL.yaml",
"JP-10-TAK-L-T-takasakikenkofukushidaigakuyakugakubutoshoshiryosh.yaml",
"JP-10-TAK-L-T.yaml",
"JP-10-TAK-L-TCUEL.yaml",
"JP-10-TAK-L-TL-takasakishiritsugumma_library.yaml",
"JP-10-TAK-L-TL-takasakishiritsuharuna_library.yaml",
"JP-10-TAK-L-TL-takasakishiritsumisato_library.yaml",
"JP-10-TAK-L-TL-takasakishiritsushimmachi_library.yaml",
"JP-10-TAK-L-TL-takasakishiritsuyamatanekinenyoshii_library.yaml",
"JP-10-TAK-L-TL-takasakishokadaigaku_library.yaml",
"JP-10-TAK-L-TL.yaml",
"JP-10-TAK-L-TLB.yaml",
"JP-10-TAK-L-TUHWL.yaml",
"JP-10-TAK-M-KMA.yaml",
"JP-10-TAK-M-MMAG.yaml",
"JP-10-TAK-M-NSC.yaml",
"JP-10-TAK-M-TCDPBG.yaml",
"JP-10-TAK-M-TCFHM.yaml",
"JP-10-TAK-M-TSHFHM.yaml",
"JP-10-TAK-M-TTMA.yaml",
"JP-10-TAK-M-TYSC.yaml",
"JP-10-TAK-M-YKMAT.yaml",
"JP-10-TAK-M-YMH.yaml",
"JP-10-TAK-M-YMLMH.yaml",
"JP-10-TAM-M-THM.yaml"
],
"last_index": 39
"last_index": 29
}

View file

@ -42,7 +42,8 @@ ghcid:
reason: 'Country resolved via Wikidata P17: XX→CZ'
- ghcid: CZ-10-PRA-A-MÚAVČAÚTGM
valid_from: '2025-12-07T12:38:22.566656+00:00'
reason: 'Location resolved from institution name pattern: ''Akademie věd'' → region 10, city PRA'
reason: 'Location resolved from institution name pattern: ''Akademie věd'' → region
10, city PRA'
custodian_name:
claim_type: custodian_name
claim_value: Masarykův ústav Akademie věd ČR - Archiv Ústavu T.G.M
@ -68,8 +69,8 @@ provenance:
confidence_score: 0.85
notes:
- 'Country resolved 2025-12-06T23:56:12Z: XX→CZ via Wikidata P17'
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:28Z: Maps: Masarykův ústav a Archiv AV ČR (conf: 0.95); YouTube: not
found'
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:28Z: Maps: Masarykův ústav a
Archiv AV ČR (conf: 0.95); YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
@ -151,7 +152,8 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/111969964317962148590/reviews
rating: 5
relative_time_description: 3 years ago
text: Incredible work by the people behind it all. So many well-preserved books. It's great...👍
text: Incredible work by the people behind it all. So many well-preserved books.
It's great...👍
publish_time: '2022-08-18T17:08:05.122982Z'
- author_name: Josef Prajzler
author_uri: https://www.google.com/maps/contrib/104183040336437489284/reviews
@ -206,15 +208,19 @@ google_maps_enrichment:
is_match: true
confidence: 0.95
entity_type: GRP.HER
reasoning: 'NAME MATCH: The names are a very close match. The source ''Masarykův ústav Akademie věd ČR - Archiv Ústavu
T.G.M'' is an expanded form of the candidate ''Masarykův ústav a Archiv AV ČR''. ''Akademie věd ČR'' (Academy of Sciences
of the Czech Republic) is correctly abbreviated as ''AV ČR''. The source name specifies the archive within the institute,
which is consistent with the candidate''s name. LOCATION MATCH: The address in Praha (Prague), Czechia matches the country
code ''CZ''. TYPE MATCH: Although the Google Place types (''point_of_interest'', ''establishment'') are generic, the
source name explicitly contains ''Archiv'' (Archive). The confirmed website (mua.cas.cz) is for the Masaryk Institute
and Archives of the CAS, which describes itself as an archive and research institution. This confirms its function as
a heritage institution. ENTITY TYPE: Based on the explicit name ''Archive'' and the institution''s description as a
research archive, it is a confirmed GRP.HER.'
reasoning: 'NAME MATCH: The names are a very close match. The source ''Masarykův
ústav Akademie věd ČR - Archiv Ústavu T.G.M'' is an expanded form of the candidate
''Masarykův ústav a Archiv AV ČR''. ''Akademie věd ČR'' (Academy of Sciences
of the Czech Republic) is correctly abbreviated as ''AV ČR''. The source name
specifies the archive within the institute, which is consistent with the candidate''s
name. LOCATION MATCH: The address in Praha (Prague), Czechia matches the country
code ''CZ''. TYPE MATCH: Although the Google Place types (''point_of_interest'',
''establishment'') are generic, the source name explicitly contains ''Archiv''
(Archive). The confirmed website (mua.cas.cz) is for the Masaryk Institute and
Archives of the CAS, which describes itself as an archive and research institution.
This confirms its function as a heritage institution. ENTITY TYPE: Based on
the explicit name ''Archive'' and the institution''s description as a research
archive, it is a confirmed GRP.HER.'
agent: glm-4.6
verified: true
ch_annotator_version: ch_annotator-v1_7_0
@ -235,3 +241,28 @@ location:
street_address: Gabčíkova 2362, Praha 8-Libeň
formatted_address: 10, Gabčíkova 2362, 182 00 Praha 8-Libeň, Czechia
normalization_timestamp: '2025-12-09T06:49:28.542096+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:57:42.654886+00:00'
source_url: http://www.mua.cas.cz
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.mua.cas.cz/build/favicon/safari-pinned-tab.svg
source_url: http://www.mua.cas.cz
css_selector: '[document] > html > head > link:nth-of-type(7)'
retrieved_on: '2025-12-23T20:57:42.654886+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://mua.greendot.cz/build/img/hp-hero.jpg
source_url: http://www.mua.cas.cz
css_selector: '[document] > html > head > meta:nth-of-type(10)'
retrieved_on: '2025-12-23T20:57:42.654886+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 4

View file

@ -73,8 +73,8 @@ provenance:
confidence_score: 0.85
notes:
- 'Country resolved 2025-12-06T23:54:39Z: XX→CZ via Wikidata P17'
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:31Z: Maps: National Archives of the Czech Republic (conf: 1.00); YouTube:
not found'
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:31Z: Maps: National Archives
of the Czech Republic (conf: 1.00); YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
ch_annotator:
@ -227,8 +227,9 @@ wikidata_enrichment:
description: archives of a country
- id: Q7075
label: library
description: institution charged with the care of a collection of literary, musical, artistic, or reference materials,
such as books, manuscripts, recordings, or films
description: institution charged with the care of a collection of literary,
musical, artistic, or reference materials, such as books, manuscripts, recordings,
or films
- id: Q27031009
label: public archive
description: repository for official documents
@ -237,7 +238,8 @@ wikidata_enrichment:
description: type of Czech research organization
- id: Q2085381
label: publishing company
description: company that prints and distributes pressed goods or electronic media
description: company that prints and distributes pressed goods or electronic
media
wikidata_instance_of: *id005
wikidata_location:
country: &id007
@ -300,8 +302,8 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/111303903784806922325/reviews
rating: 5
relative_time_description: 5 years ago
text: Super welcoming and very helpful archivists (for visitors like myself who are new and struggle with Czech). Thanks
so much, keep up the great work.
text: Super welcoming and very helpful archivists (for visitors like myself who
are new and struggle with Czech). Thanks so much, keep up the great work.
publish_time: '2020-09-22T12:20:41.750155Z'
- author_name: Mały Oisior
author_uri: https://www.google.com/maps/contrib/100968724745138285308/reviews
@ -319,23 +321,29 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/105362763666969692195/reviews
rating: 1
relative_time_description: 3 weeks ago
text: The working hours of the employees of this office are ridiculous... Starting at 9, being gone by 1 pm... Paying
for something like this out of taxes is a joy...
text: The working hours of the employees of this office are ridiculous... Starting
at 9, being gone by 1 pm... Paying for something like this out of taxes is a
joy...
publish_time: '2025-11-14T12:59:55.176030204Z'
- author_name: David Veselík
author_uri: https://www.google.com/maps/contrib/116735262197680877900/reviews
rating: 3
relative_time_description: 5 months ago
text: Professional approach when searching for documents on the topic of the study. However, they themselves spoiled the
positive evaluation several times. Unfortunately, it happened to me several times that I received inaccurate information
from the research room staff when searching for information about the fund in the inventories. When I asked whether
there was a fund for police stations and it was accessible in the research room, I was told that it was not. When I
wrote directly to the relevant department, they sent it to me for viewing by email without any problems. It also happened
to me once that a younger research room employee in glasses, who hands over archival materials for the study, was sleeping
in his chair and I was very embarrassed to wake him up so that I wouldn't waste time waiting for him to hand me more
archival materials. It happened at the beginning of January. Other researchers noticed it too. That definitely doesn't
add to trust. I wouldn't expect something like that in an institution like the National Archives. It was clear that
the younger archivist didn't even find it inappropriate and he didn't even need an apology for it.
text: Professional approach when searching for documents on the topic of the study.
However, they themselves spoiled the positive evaluation several times. Unfortunately,
it happened to me several times that I received inaccurate information from
the research room staff when searching for information about the fund in the
inventories. When I asked whether there was a fund for police stations and it
was accessible in the research room, I was told that it was not. When I wrote
directly to the relevant department, they sent it to me for viewing by email
without any problems. It also happened to me once that a younger research room
employee in glasses, who hands over archival materials for the study, was sleeping
in his chair and I was very embarrassed to wake him up so that I wouldn't waste
time waiting for him to hand me more archival materials. It happened at the
beginning of January. Other researchers noticed it too. That definitely doesn't
add to trust. I wouldn't expect something like that in an institution like the
National Archives. It was clear that the younger archivist didn't even find
it inappropriate and he didn't even need an apology for it.
publish_time: '2025-06-27T19:06:48.934404281Z'
opening_hours:
open_now: false
@ -369,10 +377,12 @@ google_maps_enrichment:
is_match: true
confidence: 1.0
entity_type: GRP.HER
reasoning: 'Perfect match. 1. Name is a direct translation: ''Národní archiv'' (Czech) and ''National Archives of the
Czech Republic'' (English). 2. Location matches: The address is in Prague, Czechia, which is consistent with the source''s
country code ''CZ''. 3. Type matches: The name ''National Archives'' and website confirm it is an archive, a type of
heritage institution. 4. Entity type is correct.'
reasoning: 'Perfect match. 1. Name is a direct translation: ''Národní archiv''
(Czech) and ''National Archives of the Czech Republic'' (English). 2. Location
matches: The address is in Prague, Czechia, which is consistent with the source''s
country code ''CZ''. 3. Type matches: The name ''National Archives'' and website
confirm it is an archive, a type of heritage institution. 4. Entity type is
correct.'
agent: glm-4.6
verified: true
ch_annotator_version: ch_annotator-v1_7_0
@ -393,3 +403,32 @@ location:
street_address: Archivní 2257/4, Praha 4-Chodov
formatted_address: Archivní 2257/4, 149 00 Praha 4-Chodov, Czechia
normalization_timestamp: '2025-12-09T06:49:28.591723+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:57:48.334183+00:00'
source_url: http://www.nacr.cz/eindex.htm
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/logo_na_cs.png
source_url: http://www.nacr.cz/eindex.htm
css_selector: '#wrapper-navbar > header.header > div.header__main:nth-of-type(2)
> nav.navbar.navbar-expand-lg > div.navbar-mobile-top > div.navbar-brand > a
> img'
retrieved_on: '2025-12-23T20:57:48.334183+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: Národní archiv
- claim_type: favicon_url
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/favicon/apple-icon-180x180.png
source_url: http://www.nacr.cz/eindex.htm
css_selector: '[document] > html > head > link:nth-of-type(9)'
retrieved_on: '2025-12-23T20:57:48.334183+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
summary:
total_claims: 2
has_primary_logo: true
has_favicon: true
has_og_image: false
favicon_count: 13

View file

@ -36,7 +36,8 @@ ghcid:
reason: 'Country resolved via Wikidata P17: XX→CZ'
- ghcid: CZ-10-PRA-A-NTMAAS
valid_from: '2025-12-07T12:39:42.486868+00:00'
reason: 'Location resolved from institution name pattern: ''Prague'' → region 10, city PRA'
reason: 'Location resolved from institution name pattern: ''Prague'' → region
10, city PRA'
custodian_name:
claim_type: custodian_name
claim_value: Národní technické muzeum - Archiv architektury a stavitelství
@ -62,8 +63,8 @@ provenance:
confidence_score: 0.85
notes:
- 'Country resolved 2025-12-06T23:56:13Z: XX→CZ via Wikidata P17'
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:48Z: Maps: Centrum stavitelského dědictví NTM Plasy (conf: 0.90); YouTube:
not found'
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:48Z: Maps: Centrum stavitelského
dědictví NTM Plasy (conf: 0.90); YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
@ -147,8 +148,8 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/111216860672505384775/reviews
rating: 5
relative_time_description: 6 years ago
text: Interessting even for smaller children (5y). Also thanks to a playground inside. Worth to repeat the visit. One
floor dedicated to architect Kaplický.
text: Interessting even for smaller children (5y). Also thanks to a playground
inside. Worth to repeat the visit. One floor dedicated to architect Kaplický.
publish_time: '2019-08-09T09:56:04.887896Z'
- author_name: Pavel Rec (RudolfII)
author_uri: https://www.google.com/maps/contrib/103709696563570701814/reviews
@ -196,18 +197,22 @@ google_maps_enrichment:
is_match: true
confidence: 0.9
entity_type: GRP.HER
reasoning: The candidate is 'Centrum stavitelského dědictví NTM Plasy', which translates to 'Center for Building Heritage
of the NTM Plasy'. 'NTM' is the standard Czech abbreviation for 'Národní technické muzeum' (National Technical Museum).
The Archiv architektury a stavitelství (Archive of Architecture and Construction) is a specialized department of this
institution. The candidate's name 'Centrum stavitelského dědictví' (Center for Building Heritage) is thematically and
institutionally aligned with the Archive of Architecture and Construction. The Google type 'museum' is a correct match
for a heritage institution. Both are in the Czech Republic.
reasoning: The candidate is 'Centrum stavitelského dědictví NTM Plasy', which
translates to 'Center for Building Heritage of the NTM Plasy'. 'NTM' is the
standard Czech abbreviation for 'Národní technické muzeum' (National Technical
Museum). The Archiv architektury a stavitelství (Archive of Architecture and
Construction) is a specialized department of this institution. The candidate's
name 'Centrum stavitelského dědictví' (Center for Building Heritage) is thematically
and institutionally aligned with the Archive of Architecture and Construction.
The Google type 'museum' is a correct match for a heritage institution. Both
are in the Czech Republic.
agent: glm-4.6
verified: true
ch_annotator_version: ch_annotator-v1_7_0
google_maps_status: SUCCESS
youtube_status: NOT_FOUND
youtube_search_query: Národní technické muzeum - Archiv architektury a stavitelství official
youtube_search_query: Národní technické muzeum - Archiv architektury a stavitelství
official
youtube_search_timestamp: '2025-12-08T19:47:48.608047+00:00'
location:
latitude: 49.9355172
@ -222,3 +227,22 @@ location:
street_address: Pivovarská 5, Plasy
formatted_address: Pivovarská 5, 331 01 Plasy, Czechia
normalization_timestamp: '2025-12-09T06:49:28.696784+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:58:03.034151+00:00'
source_url: http://muzeum-plasy.cz
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.ntm.cz/file/30dc8e5fefba6ceba5d690d796c861ec/2220/favicon/NTM%20EN%20%C4%8Derven%C3%A1%20negativ.png
source_url: http://muzeum-plasy.cz
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T20:58:03.034151+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/png
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -246,3 +246,22 @@ location:
street_address: Zámek 672, Strážnice
formatted_address: Zámek 672, 696 62 Strážnice, Czechia
normalization_timestamp: '2025-12-09T06:49:28.743251+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:58:08.250911+00:00'
source_url: http://www.nulk.cz
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.nulk.cz/wp-content/uploads/2016/11/cropped-favicon-180x180.png
source_url: http://www.nulk.cz
css_selector: '[document] > html > head > link:nth-of-type(26)'
retrieved_on: '2025-12-23T20:58:08.250911+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 3

View file

@ -47,7 +47,8 @@ ghcid:
reason: 'Country resolved via Wikidata P17: XX→CZ'
- ghcid: CZ-10-PRA-A-SAČÚZK
valid_from: '2025-12-07T12:54:27.628961+00:00'
reason: 'Region resolved via GeoNames research: XX→10, city: Prague (GeoNames ID: 3067696)'
reason: 'Region resolved via GeoNames research: XX→10, city: Prague (GeoNames
ID: 3067696)'
custodian_name:
claim_type: custodian_name
claim_value: Správní archiv Českého úřadu zeměměřictví a katastru
@ -74,8 +75,8 @@ provenance:
notes:
- 'Country resolved 2025-12-06T23:56:11Z: XX→CZ via Wikidata P17'
- 'Region resolved 2025-12-07: XX→CZ-10 (Prague) via GeoNames research'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:10Z: Maps: Český úřad zeměměřický a katastrální- Zeměměřická knihovna
(conf: 0.90); YouTube: not found'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:10Z: Maps: Český úřad zeměměřický
a katastrální- Zeměměřická knihovna (conf: 0.90); YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
@ -256,11 +257,13 @@ google_maps_enrichment:
is_match: true
confidence: 0.9
entity_type: GRP.HER
reasoning: The Google Maps candidate 'Český úřad zeměměřický a katastrální- Zeměměřická knihovna' (Land Survey Office
- Surveying Library) is a library ('Zeměměřická knihovna'). The source institution is an administrative archive ('Správní
archiv') for the same parent body, the Czech Land Survey Office. The name of the parent institution matches, the location
in Prague matches, and the Google type 'library' is an expected type for a heritage institution (GRP.HER). The candidate
is the public-facing library of the same administrative body, making it a strong match.
reasoning: The Google Maps candidate 'Český úřad zeměměřický a katastrální- Zeměměřická
knihovna' (Land Survey Office - Surveying Library) is a library ('Zeměměřická
knihovna'). The source institution is an administrative archive ('Správní archiv')
for the same parent body, the Czech Land Survey Office. The name of the parent
institution matches, the location in Prague matches, and the Google type 'library'
is an expected type for a heritage institution (GRP.HER). The candidate is the
public-facing library of the same administrative body, making it a strong match.
agent: glm-4.6
verified: true
ch_annotator_version: ch_annotator-v1_7_0
@ -268,3 +271,22 @@ google_maps_status: SUCCESS
youtube_status: NOT_FOUND
youtube_search_query: Správní archiv Českého úřadu zeměměřictví a katastru official
youtube_search_timestamp: '2025-12-08T19:48:10.258212+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:58:18.366792+00:00'
source_url: http://www.cuzk.cz
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.cuzk.cz/App_Themes/CUZK/favicon.png
source_url: http://www.cuzk.cz
css_selector: '#head > link:nth-of-type(5)'
retrieved_on: '2025-12-23T20:58:18.366792+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/png
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -70,7 +70,8 @@ provenance:
confidence_score: 0.85
notes:
- 'Country resolved 2025-12-06T23:56:12Z: XX→CZ via Wikidata P17'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:15Z: Maps: Prague City Archives (conf: 0.85); YouTube: not found'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:15Z: Maps: Prague City Archives
(conf: 0.85); YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
@ -158,7 +159,8 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/109561705096115465097/reviews
rating: 5
relative_time_description: a year ago
text: The building dates back to 1995, it's nice and very interesting. I recommend visiting the archive.
text: The building dates back to 1995, it's nice and very interesting. I recommend
visiting the archive.
publish_time: '2024-04-21T15:15:20.793882Z'
- author_name: Libor Šedivý
author_uri: https://www.google.com/maps/contrib/112581391891260052369/reviews
@ -170,20 +172,25 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/102671748185462032746/reviews
rating: 3
relative_time_description: 6 years ago
text: You need to communicate at least two weeks before the planned visit so that you don't lose out unnecessarily. You
won't get anything at first... In addition, you need to take into account that they will present you with 5 archival
units in one day, i.e. not 5 cartons, but only 5 folders, regardless of whether it is a bookmark with one sheet or a
package with hundreds of documents... The ambition to complete more extensive research requires much more time than
we are used to in other archives. So much luck, patience and research happiness! 🍀
text: You need to communicate at least two weeks before the planned visit so that
you don't lose out unnecessarily. You won't get anything at first... In addition,
you need to take into account that they will present you with 5 archival units
in one day, i.e. not 5 cartons, but only 5 folders, regardless of whether it
is a bookmark with one sheet or a package with hundreds of documents... The
ambition to complete more extensive research requires much more time than we
are used to in other archives. So much luck, patience and research happiness!
🍀
publish_time: '2019-12-01T07:46:56.111948Z'
- author_name: Miroslav Havel
author_uri: https://www.google.com/maps/contrib/109030248799737237070/reviews
rating: 5
relative_time_description: 7 years ago
text: A modern archive building built in the 1990s, I was lucky enough to see the facilities. Willing and quirky workers,
the profession of archivist probably requires a certain amount of perspective, a sense of humor and self-irony. If you
come to a professional course, you can look forward not only to insightful information, but also to great comments and
glosses on current events. It's worth it. 😉
text: A modern archive building built in the 1990s, I was lucky enough to see
the facilities. Willing and quirky workers, the profession of archivist probably
requires a certain amount of perspective, a sense of humor and self-irony. If
you come to a professional course, you can look forward not only to insightful
information, but also to great comments and glosses on current events. It's
worth it. 😉
publish_time: '2018-03-17T09:34:30.117Z'
opening_hours:
open_now: false
@ -217,16 +224,21 @@ google_maps_enrichment:
is_match: true
confidence: 0.85
entity_type: GRP.HER
reasoning: 'The source institution ''Správní archiv Městské části Praha 4'' (Administrative Archive of the Prague 4 City
District) is a clear match for the Google Maps candidate ''Prague City Archives''. The reasoning is as follows: 1) NAME
MATCH: The Google name is a translation and a more general description of the specific Czech name. The source name indicates
it is the administrative archive for a specific district of Prague, while the Google name refers to the main city archives,
but their identities can be confirmed via the shared website domain ''ahmp.cz'', which stands for Archiv Hlavního města
Prahy (Archives of the Capital City of Prague). This confirms the candidate is the correct parent institution. 2) LOCATION
MATCH: The source name specifies ''Praha 4'' and the Google Maps address is in ''Praha 4-Chodov'', a perfect location
match. 3) TYPE MATCH: Although the Google Place types (''point_of_interest'', ''establishment'') are generic, the institution
is unambiguously an ''archiv'', which is a heritage institution type. 4) ENTITY TYPE: An administrative archive is a
definitive example of a Heritage Institution (GRP.HER).'
reasoning: 'The source institution ''Správní archiv Městské části Praha 4'' (Administrative
Archive of the Prague 4 City District) is a clear match for the Google Maps
candidate ''Prague City Archives''. The reasoning is as follows: 1) NAME MATCH:
The Google name is a translation and a more general description of the specific
Czech name. The source name indicates it is the administrative archive for a
specific district of Prague, while the Google name refers to the main city archives,
but their identities can be confirmed via the shared website domain ''ahmp.cz'',
which stands for Archiv Hlavního města Prahy (Archives of the Capital City of
Prague). This confirms the candidate is the correct parent institution. 2) LOCATION
MATCH: The source name specifies ''Praha 4'' and the Google Maps address is
in ''Praha 4-Chodov'', a perfect location match. 3) TYPE MATCH: Although the
Google Place types (''point_of_interest'', ''establishment'') are generic, the
institution is unambiguously an ''archiv'', which is a heritage institution
type. 4) ENTITY TYPE: An administrative archive is a definitive example of a
Heritage Institution (GRP.HER).'
agent: glm-4.6
verified: true
ch_annotator_version: ch_annotator-v1_7_0
@ -248,3 +260,22 @@ location:
street_address: Archivní 1280/6, Praha 4-Chodov
formatted_address: Archivní 1280/6, 149 00 Praha 4-Chodov, Czechia
normalization_timestamp: '2025-12-09T06:49:28.915410+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:58:23.615923+00:00'
source_url: http://www.ahmp.cz
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.ahmp.cz/img/ahmp_favicon.ico
source_url: http://www.ahmp.cz
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T20:58:23.615923+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -42,10 +42,12 @@ ghcid:
reason: 'Country resolved via Wikidata P17: XX→CZ'
- ghcid: CZ-10-PRA-A-SANÚPKIB
valid_from: '2025-12-07T12:38:22.569556+00:00'
reason: 'Location resolved from institution name pattern: ''Národního úřadu'' → region 10, city PRA'
reason: 'Location resolved from institution name pattern: ''Národního úřadu''
→ region 10, city PRA'
custodian_name:
claim_type: custodian_name
claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp.
claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační
bezp.
source_type: ch_annotator
identifiers:
- identifier_scheme: GHCID
@ -68,8 +70,8 @@ provenance:
confidence_score: 0.85
notes:
- 'Country resolved 2025-12-06T23:56:13Z: XX→CZ via Wikidata P17'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:25Z: Maps: National Archives of the Czech Republic (conf: 0.90); YouTube:
not found'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:25Z: Maps: National Archives
of the Czech Republic (conf: 0.90); YouTube: not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
@ -104,7 +106,8 @@ ch_annotator:
verified_by: null
entity_claims:
- claim_type: full_name
claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp.
claim_value: Specializovaný archiv Národního úřadu pro kybernetickou a informační
bezp.
property_uri: skos:prefLabel
provenance:
namespace: glam
@ -151,8 +154,8 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/111303903784806922325/reviews
rating: 5
relative_time_description: 5 years ago
text: Super welcoming and very helpful archivists (for visitors like myself who are new and struggle with Czech). Thanks
so much, keep up the great work.
text: Super welcoming and very helpful archivists (for visitors like myself who
are new and struggle with Czech). Thanks so much, keep up the great work.
publish_time: '2020-09-22T12:20:41.750155Z'
- author_name: Mały Oisior
author_uri: https://www.google.com/maps/contrib/100968724745138285308/reviews
@ -170,23 +173,29 @@ google_maps_enrichment:
author_uri: https://www.google.com/maps/contrib/105362763666969692195/reviews
rating: 1
relative_time_description: 3 weeks ago
text: The working hours of the employees of this office are ridiculous... Starting at 9, being gone by 1 pm... Paying
for something like this out of taxes is a joy...
text: The working hours of the employees of this office are ridiculous... Starting
at 9, being gone by 1 pm... Paying for something like this out of taxes is a
joy...
publish_time: '2025-11-14T12:59:55.176030204Z'
- author_name: David Veselík
author_uri: https://www.google.com/maps/contrib/116735262197680877900/reviews
rating: 3
relative_time_description: 5 months ago
text: Professional approach when searching for documents on the topic of the study. However, they themselves spoiled the
positive evaluation several times. Unfortunately, it happened to me several times that I received inaccurate information
from the research room staff when searching for information about the fund in the inventories. When I asked whether
there was a fund for police stations and it was accessible in the research room, I was told that it was not. When I
wrote directly to the relevant department, they sent it to me for viewing by email without any problems. It also happened
to me once that a younger research room employee in glasses, who hands over archival materials for the study, was sleeping
in his chair and I was very embarrassed to wake him up so that I wouldn't waste time waiting for him to hand me more
archival materials. It happened at the beginning of January. Other researchers noticed it too. That definitely doesn't
add to trust. I wouldn't expect something like that in an institution like the National Archives. It was clear that
the younger archivist didn't even find it inappropriate and he didn't even need an apology for it.
text: Professional approach when searching for documents on the topic of the study.
However, they themselves spoiled the positive evaluation several times. Unfortunately,
it happened to me several times that I received inaccurate information from
the research room staff when searching for information about the fund in the
inventories. When I asked whether there was a fund for police stations and it
was accessible in the research room, I was told that it was not. When I wrote
directly to the relevant department, they sent it to me for viewing by email
without any problems. It also happened to me once that a younger research room
employee in glasses, who hands over archival materials for the study, was sleeping
in his chair and I was very embarrassed to wake him up so that I wouldn't waste
time waiting for him to hand me more archival materials. It happened at the
beginning of January. Other researchers noticed it too. That definitely doesn't
add to trust. I wouldn't expect something like that in an institution like the
National Archives. It was clear that the younger archivist didn't even find
it inappropriate and he didn't even need an apology for it.
publish_time: '2025-06-27T19:06:48.934404281Z'
opening_hours:
open_now: false
@ -220,18 +229,22 @@ google_maps_enrichment:
is_match: true
confidence: 0.9
entity_type: GRP.HER
reasoning: The Google Maps candidate is the National Archives of the Czech Republic, a clear translation of the source
institution's name 'Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp.', which is a specialized
department within the National Archives. The location in Praha (Prague), Czechia, matches the country code (CZ). The
website (nacr.cz) confirms its identity as a national archive. While the Google Place types ('point_of_interest', 'establishment')
are generic and do not explicitly include 'archive', all other evidence points to this being the correct heritage institution
(an archive). Therefore, it is a match for a Heritage Institution.
reasoning: The Google Maps candidate is the National Archives of the Czech Republic,
a clear translation of the source institution's name 'Specializovaný archiv
Národního úřadu pro kybernetickou a informační bezp.', which is a specialized
department within the National Archives. The location in Praha (Prague), Czechia,
matches the country code (CZ). The website (nacr.cz) confirms its identity as
a national archive. While the Google Place types ('point_of_interest', 'establishment')
are generic and do not explicitly include 'archive', all other evidence points
to this being the correct heritage institution (an archive). Therefore, it is
a match for a Heritage Institution.
agent: glm-4.6
verified: true
ch_annotator_version: ch_annotator-v1_7_0
google_maps_status: SUCCESS
youtube_status: NOT_FOUND
youtube_search_query: Specializovaný archiv Národního úřadu pro kybernetickou a informační bezp. official
youtube_search_query: Specializovaný archiv Národního úřadu pro kybernetickou a informační
bezp. official
youtube_search_timestamp: '2025-12-08T19:48:25.873936+00:00'
location:
latitude: 50.0389097
@ -246,3 +259,38 @@ location:
street_address: Archivní 2257/4, Praha 4-Chodov
formatted_address: Archivní 2257/4, 149 00 Praha 4-Chodov, Czechia
normalization_timestamp: '2025-12-09T06:49:28.982890+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:58:30.883981+00:00'
source_url: http://www.nacr.cz
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/logo_na_en.png
source_url: http://www.nacr.cz
css_selector: '#wrapper-navbar > header.header > div.header__main:nth-of-type(2)
> nav.navbar.navbar-expand-lg > div.navbar-mobile-top > div.navbar-brand > a
> img'
retrieved_on: '2025-12-23T20:58:30.883981+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: National Archives
- claim_type: favicon_url
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/favicon/apple-icon-180x180.png
source_url: http://www.nacr.cz
css_selector: '[document] > html > head > link:nth-of-type(9)'
retrieved_on: '2025-12-23T20:58:30.883981+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://www.nacr.cz/wp-content/uploads/2020/04/podatelna2_22-1.jpg
source_url: http://www.nacr.cz
css_selector: '[document] > html > head > meta:nth-of-type(17)'
retrieved_on: '2025-12-23T20:58:30.883981+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 13

View file

@ -34,7 +34,8 @@ ghcid:
city_code: PRA
method: WIKIDATA_LOCATION_RESEARCH
resolution_timestamp: '2025-12-06T23:54:40.402432+00:00'
resolution_notes: Central Archive of Surveying and Cadastre in Prague, part of ČÚZK
resolution_notes: Central Archive of Surveying and Cadastre in Prague, part of
ČÚZK
ghcid_history:
- ghcid: CZ-10-PRA-A-UAZK
ghcid_numeric: 5974277682822411938
@ -78,7 +79,8 @@ provenance:
confidence_score: 0.85
notes:
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:35Z: Maps: rejected by LLM; YouTube: not found'
- 'YouTube/Google Maps enrichment 2025-12-08T19:48:35Z: Maps: rejected by LLM; YouTube:
not found'
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:05Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:29Z
ch_annotator:
@ -108,8 +110,8 @@ ch_annotator:
annotation_metadata:
confidence_score: 0.85
verified: false
verification_date:
verified_by:
verification_date: null
verified_by: null
entity_claims:
- claim_type: full_name
claim_value: Ústřední archiv zeměměřictví a katastru
@ -213,10 +215,12 @@ wikidata_enrichment:
google_maps_status: NO_MATCH
google_maps_rejected:
candidate_name: Central Military Archives
rejection_reason: The Google Maps candidate, 'Central Military Archives', does not match the source institution, 'Ústřední
archiv zeměměřictví a katastru' (Central Archive of Surveying and Cadastre). Although both are archives in Prague, the
names and institutional focus are different. The Google candidate is military-focused, while the source is focused on
geodesy and land registry. Therefore, these are two different institutions.
rejection_reason: The Google Maps candidate, 'Central Military Archives', does not
match the source institution, 'Ústřední archiv zeměměřictví a katastru' (Central
Archive of Surveying and Cadastre). Although both are archives in Prague, the
names and institutional focus are different. The Google candidate is military-focused,
while the source is focused on geodesy and land registry. Therefore, these are
two different institutions.
timestamp: '2025-12-08T19:48:35.122961+00:00'
youtube_status: NOT_FOUND
youtube_search_query: Ústřední archiv zeměměřictví a katastru official
@ -237,3 +241,22 @@ location:
entity_id: 3067696
city_code: PRA
original_timestamp: '2025-12-09T20:40:58.298985+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:58:41.770641+00:00'
source_url: https://www.cuzk.cz/Urady/Zememericky-urad/Dalsi-informace/UAZK/Prohlizeni-archivalii,-archivni-mapy.aspx
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.cuzk.cz/App_Themes/CUZK/favicon.png
source_url: https://www.cuzk.cz/Urady/Zememericky-urad/Dalsi-informace/UAZK/Prohlizeni-archivalii,-archivni-mapy.aspx
css_selector: '#head > link:nth-of-type(5)'
retrieved_on: '2025-12-23T20:58:41.770641+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/png
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -251,3 +251,37 @@ location:
original_timestamp: '2025-12-09T15:34:40.266185+00:00'
geonames_name: Prague
feature_code: PPLC
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:58:47.105716+00:00'
source_url: http://udauk.cuni.cz
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: http://udauk.cuni.cz/ARCH-1-version1-afoto.jpg
source_url: http://udauk.cuni.cz
css_selector: '[document] > html > body > div.container-fluid.headerHolder > div.container.headerSubHolder
> div.row > header > div.col-md-7 > div.logo > a > img.logoSize.mobileLogoSize'
retrieved_on: '2025-12-23T20:58:47.105716+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: Homepage - Ústav dějin Univerzity Karlovy a Archiv Univerzity Karlovy
- claim_type: favicon_url
claim_value: http://udauk.cuni.cz/newlayout/UK-favicon-32x32px.png
source_url: http://udauk.cuni.cz
css_selector: '[document] > html > head > link:nth-of-type(6)'
retrieved_on: '2025-12-23T20:58:47.105716+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/x-icon
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://udauk.cuni.cz/newlayout/images/logosoc.png
source_url: http://udauk.cuni.cz
css_selector: '[document] > html > head > meta:nth-of-type(6)'
retrieved_on: '2025-12-23T20:58:47.105716+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 1

View file

@ -259,3 +259,28 @@ wikidata_enrichment:
- id: Q11424955
label: Sakakura Associates Architects and Engineers
description: ''
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:39:53.520689+00:00'
source_url: https://www.artsmaebashi.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://artsmaebashi.jp/cms/wp-content/themes/artsmaebashi/favicon.ico
source_url: https://www.artsmaebashi.jp
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T20:39:53.520689+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: http://www.artsmaebashi.jp/cms/wp-content/themes/artsmaebashi/img/artsmaebashi_ogp_image.jpg
source_url: https://www.artsmaebashi.jp
css_selector: '[document] > html > head > meta:nth-of-type(10)'
retrieved_on: '2025-12-23T20:39:53.520689+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 1

View file

@ -205,3 +205,22 @@ location:
geonames_id: 6822155
geonames_name: Midori
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:40:42.012851+00:00'
source_url: http://www.kiryu-u.ac.jp/university/library
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.kiryu-u.ac.jp/wp2024/wp-content/themes/design/favicon.ico
source_url: http://www.kiryu-u.ac.jp/university/library
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T20:40:42.012851+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/vnd.microsoft.icon
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -204,3 +204,36 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www.city.midori.gunma.jp/library/
wikidata_official_website: http://www.city.midori.gunma.jp/library/
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:40:50.168438+00:00'
source_url: http://www.city.midori.gunma.jp/library
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '#tlogo > p > a > img'
retrieved_on: '2025-12-23T20:40:50.168438+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: みどり市公式サイト
- claim_type: favicon_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '[document] > html > head > link:nth-of-type(4)'
retrieved_on: '2025-12-23T20:40:50.168438+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T20:40:50.168438+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -204,3 +204,36 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www.city.midori.gunma.jp/library/
wikidata_official_website: http://www.city.midori.gunma.jp/library/
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:40:56.212089+00:00'
source_url: http://www.city.midori.gunma.jp/library
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '#tlogo > p > a > img'
retrieved_on: '2025-12-23T20:40:56.212089+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: みどり市公式サイト
- claim_type: favicon_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '[document] > html > head > link:nth-of-type(4)'
retrieved_on: '2025-12-23T20:40:56.212089+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T20:40:56.212089+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -204,3 +204,36 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www.city.midori.gunma.jp/library/
wikidata_official_website: http://www.city.midori.gunma.jp/library/
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:41:02.206429+00:00'
source_url: http://www.city.midori.gunma.jp/library
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/design/images/header/tlogo.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '#tlogo > p > a > img'
retrieved_on: '2025-12-23T20:41:02.206429+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: みどり市公式サイト
- claim_type: favicon_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '[document] > html > head > link:nth-of-type(4)'
retrieved_on: '2025-12-23T20:41:02.206429+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png
source_url: http://www.city.midori.gunma.jp/library
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T20:41:02.206429+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -226,3 +226,36 @@ location:
geonames_id: 6822155
geonames_name: Midori
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:41:09.788813+00:00'
source_url: https://www.city.midori.gunma.jp/iwajuku
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/iwajuku/images/header/tlogo.png?a
source_url: https://www.city.midori.gunma.jp/iwajuku
css_selector: '#tlogo > p > a > img'
retrieved_on: '2025-12-23T20:41:09.788813+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: 岩宿博物館
- claim_type: favicon_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png
source_url: https://www.city.midori.gunma.jp/iwajuku
css_selector: '[document] > html > head > link:nth-of-type(8)'
retrieved_on: '2025-12-23T20:41:09.788813+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png
source_url: https://www.city.midori.gunma.jp/iwajuku
css_selector: '[document] > html > head > meta:nth-of-type(10)'
retrieved_on: '2025-12-23T20:41:09.788813+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -247,3 +247,36 @@ location:
postal_code: 376-0101
street_address: OMAMACHO OMAMA, Midori Shi, Gumma Ken, 376-0101
normalization_timestamp: '2025-12-09T06:54:37.040443+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:41:20.266954+00:00'
source_url: https://www.city.midori.gunma.jp/conodont
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/oomama/images/header/tlogo.png?a
source_url: https://www.city.midori.gunma.jp/conodont
css_selector: '#tlogo > p > a > img'
retrieved_on: '2025-12-23T20:41:20.266954+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: コノドント館 みどり市大間々博物館
- claim_type: favicon_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/apple-touch-icon-precomposed.png
source_url: https://www.city.midori.gunma.jp/conodont
css_selector: '[document] > html > head > link:nth-of-type(8)'
retrieved_on: '2025-12-23T20:41:20.266954+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://www.city.midori.gunma.jp/_template_/_site_/_default_/_res/images/sns/ogimage.png
source_url: https://www.city.midori.gunma.jp/conodont
css_selector: '[document] > html > head > meta:nth-of-type(10)'
retrieved_on: '2025-12-23T20:41:20.266954+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -201,3 +201,22 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html
wikidata_official_website: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:42:53.398009+00:00'
source_url: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.town.itakura.gunma.jp/favicon2025.ico
source_url: http://www.town.itakura.gunma.jp/cont/s029000/20140706140658.html
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T20:42:53.398009+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -207,3 +207,37 @@ location:
geonames_id: 13132763
geonames_name: Itakura
feature_code: PPL
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:43:19.036274+00:00'
source_url: http://www.toyo.ac.jp/site/library/index.html
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: http://www.toyo.ac.jp/img/common/img_logo.svg
source_url: http://www.toyo.ac.jp/site/library/index.html
css_selector: '#gheader > div.gnav__frm > h1.gnav__logo > a.gnav__logo-link >
img.gnav__logo-img'
retrieved_on: '2025-12-23T20:43:19.036274+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: 東洋大学
- claim_type: favicon_url
claim_value: http://www.toyo.ac.jp/img/common/favicon.ico
source_url: http://www.toyo.ac.jp/site/library/index.html
css_selector: '[document] > html.js_domload.js_imgload > head > link:nth-of-type(6)'
retrieved_on: '2025-12-23T20:43:19.036274+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: http://www.toyo.ac.jp/site/library/ogp.jpg
source_url: http://www.toyo.ac.jp/site/library/index.html
css_selector: '[document] > html.js_domload.js_imgload > head > meta:nth-of-type(6)'
retrieved_on: '2025-12-23T20:43:19.036274+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 3
has_primary_logo: true
has_favicon: true
has_og_image: true
favicon_count: 1

View file

@ -216,3 +216,22 @@ location:
geonames_id: 8469289
geonames_name: Ōta
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:43:50.781940+00:00'
source_url: https://opac.kanto-gakuen.ac.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://opac.kanto-gakuen.ac.jp/favicon.ico
source_url: https://opac.kanto-gakuen.ac.jp
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T20:43:50.781940+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -204,3 +204,28 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www2.lib.ota.gunma.jp/
wikidata_official_website: http://www2.lib.ota.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:44:00.439944+00:00'
source_url: http://www2.lib.ota.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(8)'
retrieved_on: '2025-12-23T20:44:00.439944+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(6)'
retrieved_on: '2025-12-23T20:44:00.439944+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -204,3 +204,28 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www2.lib.ota.gunma.jp/
wikidata_official_website: http://www2.lib.ota.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:44:11.876905+00:00'
source_url: http://www2.lib.ota.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(8)'
retrieved_on: '2025-12-23T20:44:11.876905+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(6)'
retrieved_on: '2025-12-23T20:44:11.876905+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -204,3 +204,28 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www2.lib.ota.gunma.jp/
wikidata_official_website: http://www2.lib.ota.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:44:25.192738+00:00'
source_url: http://www2.lib.ota.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(8)'
retrieved_on: '2025-12-23T20:44:25.192738+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(6)'
retrieved_on: '2025-12-23T20:44:25.192738+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -204,3 +204,28 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www2.lib.ota.gunma.jp/
wikidata_official_website: http://www2.lib.ota.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:44:37.130831+00:00'
source_url: http://www2.lib.ota.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www2.lib.ota.gunma.jp/css/img/apple-touch-icon.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(8)'
retrieved_on: '2025-12-23T20:44:37.130831+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://www2.lib.ota.gunma.jp/design_img/og_image.png
source_url: http://www2.lib.ota.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(6)'
retrieved_on: '2025-12-23T20:44:37.130831+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -245,3 +245,22 @@ location:
postal_code: 379-2301
street_address: YABUZUKACHO, Ota Shi, Gumma Ken, 379-2301
normalization_timestamp: '2025-12-09T06:54:37.451277+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:44:59.775139+00:00'
source_url: https://www.snake-center.com
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://static.wixstatic.com/media/e0add7_2ee341a95e2742109c6d64a0062cf331%7Emv2.png/v1/fill/w_180%2Ch_180%2Clg_1%2Cusm_0.66_1.00_0.01/e0add7_2ee341a95e2742109c6d64a0062cf331%7Emv2.png
source_url: https://www.snake-center.com
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T20:44:59.775139+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/png
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 3

View file

@ -384,3 +384,22 @@ location:
geonames_id: 1853412
feature_code: PPL
normalization_timestamp: '2025-12-09T06:53:31.061825+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:45:13.968373+00:00'
source_url: https://www.city.takasaki.gunma.jp/docs/2014011000353
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://www.city.takasaki.gunma.jp/docs/2014011000353
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T20:45:13.968373+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2

View file

@ -213,3 +213,23 @@ location:
geonames_id: 1850746
geonames_name: Tamamura
feature_code: PPL
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:45:22.518565+00:00'
source_url: http://www.gpwu.ac.jp/org/lib
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.gpwu.ac.jp/assets/images/apple-touch-icon.png
source_url: http://www.gpwu.ac.jp/org/lib
css_selector: '[document] > html.font_change_normal.bg_change_normal > head >
link:nth-of-type(10)'
retrieved_on: '2025-12-23T20:45:22.518565+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 3

View file

@ -246,3 +246,28 @@ wikidata_enrichment:
- id: Q317135
label: Arata Isozaki
description: Japanese architect (1931-2022)
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:46:06.337035+00:00'
source_url: https://www.haramuseum.or.jp/jp/arc
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.haramuseum.or.jp/jp/arc/wp-content/themes/haramuseum/apple-touch-icon.png
source_url: https://www.haramuseum.or.jp/jp/arc
css_selector: '[document] > html.win.chrome > head > link:nth-of-type(2)'
retrieved_on: '2025-12-23T20:46:06.337035+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://www.haramuseum.or.jp/jp/arc/wp-content/themes/haramuseum/ogp.png
source_url: https://www.haramuseum.or.jp/jp/arc
css_selector: '[document] > html.win.chrome > head > meta:nth-of-type(7)'
retrieved_on: '2025-12-23T20:46:06.337035+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 2

View file

@ -241,3 +241,23 @@ wikidata_enrichment:
image: Takehisa Yumeji Ikaho Art Museum.JPG
commons_category: Takehisa Yumeji Ikaho Memorial
wikidata_image: Takehisa Yumeji Ikaho Art Museum.JPG
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:46:37.532898+00:00'
source_url: http://yumeji.or.jp
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://yumeji.or.jp/img/logo.jpg
source_url: http://yumeji.or.jp
css_selector: '[document] > html > body > article > header > table > tbody > tr
> th > a > img'
retrieved_on: '2025-12-23T20:46:37.532898+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: 伊香保 観光なら竹久夢二記念館(大正ロマンの森)|公益財団法人 竹久夢二伊香保記念館
summary:
total_claims: 1
has_primary_logo: true
has_favicon: false
has_og_image: false
favicon_count: 0

View file

@ -87,7 +87,8 @@ ghcid:
- ghcid: JP-10-TAK-F-KS
valid_from: '2025-12-14T12:00:00+00:00'
valid_to: null
reason: Initial GHCID for Three Stelae of Kozuke (type F=Feature) replacing deleted JP-10-GP-A-TS.yaml (was Takasaki city hallucination)
reason: Initial GHCID for Three Stelae of Kozuke (type F=Feature) replacing deleted
JP-10-GP-A-TS.yaml (was Takasaki city hallucination)
location_resolution:
method: WIKIDATA_LOCATION
country_code: JP
@ -143,14 +144,17 @@ provenance:
data_source: WIKIDATA
data_tier: TIER_3_CROWD_SOURCED
extraction_date: '2025-12-14T12:00:00+00:00'
extraction_method: Manual Wikidata extraction replacing hallucinated geographic entity
extraction_method: Manual Wikidata extraction replacing hallucinated geographic
entity
confidence_score: 0.95
notes:
- 'Created 2025-12-14: Replaces deleted JP-10-GP-A-TS.yaml which was Takasaki city (Q336438) hallucination'
- 'Type: F (Feature) - stone monuments are physical heritage features per GLAMORCUBESFIXPHDNT taxonomy'
- 'Created 2025-12-14: Replaces deleted JP-10-GP-A-TS.yaml which was Takasaki city
(Q336438) hallucination'
- 'Type: F (Feature) - stone monuments are physical heritage features per GLAMORCUBESFIXPHDNT
taxonomy'
- 'Comprises three stelae: Tago (Q11431029), Yamanoue (Q11465812), Kanaizawa (Q11646374)'
- 'UNESCO Memory of the World inscription: Three Cherished Stelae of Ancient Kozuke'
- 'Japan Special Historic Site designation'
- Japan Special Historic Site designation
location:
latitude: 36.323111111
longitude: 139.002361111
@ -164,10 +168,29 @@ location:
region_code: '10'
country: JP
formatted_address: Takasaki, Gunma, Japan
description: >-
The Three Stelae of Kozuke (上野三碑, Kozuke Sanpi) are three ancient stone monuments
located in Takasaki, Gunma Prefecture, Japan. Dating from the 7th-8th centuries CE,
they are written in Classical Chinese and are among the oldest stone inscriptions in
Japan. The three stelae are: Tago Stele (多胡碑, 711 CE), Yamanoue Stele (山上碑, 681 CE),
and Kanaizawa Stele (金井沢碑, 726 CE). They are designated as a Special Historic Site
of Japan and inscribed on the UNESCO Memory of the World Register.
description: 'The Three Stelae of Kozuke (上野三碑, Kozuke Sanpi) are three ancient stone
monuments located in Takasaki, Gunma Prefecture, Japan. Dating from the 7th-8th
centuries CE, they are written in Classical Chinese and are among the oldest stone
inscriptions in Japan. The three stelae are: Tago Stele (多胡碑, 711 CE), Yamanoue
Stele (山上碑, 681 CE), and Kanaizawa Stele (金井沢碑, 726 CE). They are designated as
a Special Historic Site of Japan and inscribed on the UNESCO Memory of the World
Register.'
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:46:44.714095+00:00'
source_url: https://www.city.takasaki.gunma.jp/info/sanpi
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://www.city.takasaki.gunma.jp/info/sanpi
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T20:46:44.714095+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2

View file

@ -205,3 +205,22 @@ location:
geonames_id: 1851002
geonames_name: Takasaki
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T20:48:12.056807+00:00'
source_url: http://www.takasaki-u.ac.jp/library
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://library.takasaki-u.ac.jp/wp/wp-content/themes/tulibtwelve/favicon.ico
source_url: http://www.takasaki-u.ac.jp/library
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T20:48:12.056807+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/x-icon
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -199,3 +199,22 @@ wikidata_enrichment:
wikidata_web:
official_website: http://www.city.takasaki.gunma.jp/docs/2013122401257/
wikidata_official_website: http://www.city.takasaki.gunma.jp/docs/2013122401257/
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:05:42.815900+00:00'
source_url: http://www.city.takasaki.gunma.jp/docs/2013122401257
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: http://www.city.takasaki.gunma.jp/docs/2013122401257
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T21:05:42.815900+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2

View file

@ -211,3 +211,28 @@ wikidata_enrichment:
wikidata_web:
official_website: https://lib.city.takasaki.gunma.jp/
wikidata_official_website: https://lib.city.takasaki.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:06:00.052914+00:00'
source_url: https://lib.city.takasaki.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(7)'
retrieved_on: '2025-12-23T21:06:00.052914+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T21:06:00.052914+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 3

View file

@ -211,3 +211,28 @@ wikidata_enrichment:
wikidata_web:
official_website: https://lib.city.takasaki.gunma.jp/
wikidata_official_website: https://lib.city.takasaki.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:06:09.588778+00:00'
source_url: https://lib.city.takasaki.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(7)'
retrieved_on: '2025-12-23T21:06:09.588778+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T21:06:09.588778+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 3

View file

@ -211,3 +211,28 @@ wikidata_enrichment:
wikidata_web:
official_website: https://lib.city.takasaki.gunma.jp/
wikidata_official_website: https://lib.city.takasaki.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:06:19.001109+00:00'
source_url: https://lib.city.takasaki.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(7)'
retrieved_on: '2025-12-23T21:06:19.001109+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T21:06:19.001109+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 3

View file

@ -211,3 +211,28 @@ wikidata_enrichment:
wikidata_web:
official_website: https://lib.city.takasaki.gunma.jp/
wikidata_official_website: https://lib.city.takasaki.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:06:28.656638+00:00'
source_url: https://lib.city.takasaki.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(7)'
retrieved_on: '2025-12-23T21:06:28.656638+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T21:06:28.656638+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 3

View file

@ -211,3 +211,28 @@ wikidata_enrichment:
wikidata_web:
official_website: https://lib.city.takasaki.gunma.jp/
wikidata_official_website: https://lib.city.takasaki.gunma.jp/
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:06:38.254797+00:00'
source_url: https://lib.city.takasaki.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > link:nth-of-type(7)'
retrieved_on: '2025-12-23T21:06:38.254797+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg
source_url: https://lib.city.takasaki.gunma.jp
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T21:06:38.254797+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 3

View file

@ -211,3 +211,28 @@ location:
geonames_id: 1851002
geonames_name: Takasaki
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:06:46.412557+00:00'
source_url: http://tuc.opac.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://tuc.opac.jp/common/images/op4-favicon.ico
source_url: http://tuc.opac.jp
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T21:06:46.412557+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/vnd.microsoft.icon
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://tuc.opac.jp/unique/images/logo_0001.png
source_url: http://tuc.opac.jp
css_selector: '[document] > html > head > meta:nth-of-type(12)'
retrieved_on: '2025-12-23T21:06:46.412557+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 1

View file

@ -240,3 +240,28 @@ wikidata_enrichment:
- id: Q3178370
label: Taisei Corporation
description: Japanese corporation
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:06:55.674299+00:00'
source_url: https://lib.city.takasaki.gunma.jp/viewer/info.html?id=28
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://lib.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://lib.city.takasaki.gunma.jp/viewer/info.html?id=28
css_selector: '[document] > html > head > link:nth-of-type(7)'
retrieved_on: '2025-12-23T21:06:55.674299+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: 180x180
- claim_type: og_image_url
claim_value: https://lib.city.takasaki.gunma.jp/manage/contents/upload/5912be70d92ef.jpg
source_url: https://lib.city.takasaki.gunma.jp/viewer/info.html?id=28
css_selector: '[document] > html > head > meta:nth-of-type(9)'
retrieved_on: '2025-12-23T21:06:55.674299+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 3

View file

@ -205,3 +205,22 @@ location:
geonames_id: 1851002
geonames_name: Takasaki
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:07:02.806581+00:00'
source_url: http://www.takasaki-u.ac.jp/library
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://library.takasaki-u.ac.jp/wp/wp-content/themes/tulibtwelve/favicon.ico
source_url: http://www.takasaki-u.ac.jp/library
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T21:07:02.806581+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/x-icon
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -222,3 +222,22 @@ location:
geonames_id: 1851002
geonames_name: Takasaki
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:07:09.585806+00:00'
source_url: http://www.takasaki-u.ac.jp/library
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://library.takasaki-u.ac.jp/wp/wp-content/themes/tulibtwelve/favicon.ico
source_url: http://www.takasaki-u.ac.jp/library
css_selector: '[document] > html > head > link'
retrieved_on: '2025-12-23T21:07:09.585806+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: image/x-icon
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -236,3 +236,22 @@ wikidata_enrichment:
image: Kamitsukeno-sato Museum of Archaeology.jpg
commons_category: Kamitsukeno-sato Museum of Archaeology
wikidata_image: Kamitsukeno-sato Museum of Archaeology.jpg
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:07:15.901936+00:00'
source_url: https://www.city.takasaki.gunma.jp/docs/2014010701664
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://www.city.takasaki.gunma.jp/docs/2014010701664
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T21:07:15.901936+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2

View file

@ -314,3 +314,31 @@ location:
postal_code: 370-1293
street_address: WATANUKIMACHI, Takasaki Shi, Gumma Ken, 370-1293
normalization_timestamp: '2025-12-09T06:54:38.070396+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:07:28.604419+00:00'
source_url: http://mmag.pref.gunma.jp
extraction_method: crawl4ai
claims:
- claim_type: logo_url
claim_value: https://mmag.pref.gunma.jp/cms/wp-content/themes/mmag/img/logo-txt.png
source_url: http://mmag.pref.gunma.jp
css_selector: '#TOP > div.wrap > header.sp > div.sp-nav > div.title > a.menu-logo
> img.contain'
retrieved_on: '2025-12-23T21:07:28.604419+00:00'
extraction_method: crawl4ai_header_logo
detection_confidence: high
alt_text: 群馬県立近代美術館
- claim_type: favicon_url
claim_value: https://mmag.pref.gunma.jp/cms/wp-content/themes/mmag/favicon.ico
source_url: http://mmag.pref.gunma.jp
css_selector: '[document] > html.fontM > head > link'
retrieved_on: '2025-12-23T21:07:28.604419+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 2
has_primary_logo: true
has_favicon: true
has_og_image: false
favicon_count: 1

View file

@ -250,3 +250,28 @@ location:
postal_code: 370-3511
street_address: KANEKOMACHI, Takasaki Shi, Gumma Ken, 370-3511
normalization_timestamp: '2025-12-09T06:54:38.092335+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:07:38.385059+00:00'
source_url: https://www.nippon-kinunosato.or.jp
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.nippon-kinunosato.or.jp/icon/safari-pinned-tab.svg
source_url: https://www.nippon-kinunosato.or.jp
css_selector: '[document] > html > head > link:nth-of-type(5)'
retrieved_on: '2025-12-23T21:07:38.385059+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: https://www.nippon-kinunosato.or.jp/sakura/wp-content/themes/kinunosato/assets/img/ogp.png
source_url: https://www.nippon-kinunosato.or.jp
css_selector: '[document] > html > head > meta:nth-of-type(10)'
retrieved_on: '2025-12-23T21:07:38.385059+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 5

View file

@ -219,3 +219,22 @@ wikidata_enrichment:
wikidata_media:
image: Malus sieboldii F.jpg
wikidata_image: Malus sieboldii F.jpg
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:07:45.481419+00:00'
source_url: http://www.city.takasaki.gunma.jp/docs/2017082200011
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: http://www.city.takasaki.gunma.jp/docs/2017082200011
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T21:07:45.481419+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2

View file

@ -218,3 +218,22 @@ location:
geonames_id: 1851002
geonames_name: Takasaki
feature_code: PPLA2
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:07:56.267081+00:00'
source_url: http://www.city.takasaki.gunma.jp/docs/2014021900025
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: http://www.city.takasaki.gunma.jp/docs/2014021900025
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T21:07:56.267081+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2

View file

@ -241,3 +241,22 @@ location:
postal_code: 370-0862
street_address: KATAOKAMACHI, Takasaki Shi, Gumma Ken, 370-0862
normalization_timestamp: '2025-12-09T06:54:38.205614+00:00'
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:08:04.330227+00:00'
source_url: https://www.city.takasaki.gunma.jp/docs/2014040100192
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: https://www.city.takasaki.gunma.jp/apple-touch-icon.png
source_url: https://www.city.takasaki.gunma.jp/docs/2014040100192
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T21:08:04.330227+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2

View file

@ -201,3 +201,28 @@ wikidata_enrichment:
wikidata_web:
official_website: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751
wikidata_official_website: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751
logo_enrichment:
enrichment_timestamp: '2025-12-23T21:08:29.930154+00:00'
source_url: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.town.kanna.gunma.jp/theme/base/img_common/smartphone.png
source_url: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-23T21:08:29.930154+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
- claim_type: og_image_url
claim_value: http://www.town.kanna.gunma.jp/theme/base/img_common/ogp_noimage.png
source_url: http://town.kanna.gunma.jp/index.php?key=muq1gopqk-751#_751
css_selector: '[document] > html > head > meta:nth-of-type(8)'
retrieved_on: '2025-12-23T21:08:29.930154+00:00'
extraction_method: crawl4ai_meta_og
summary:
total_claims: 2
has_primary_logo: false
has_favicon: true
has_og_image: true
favicon_count: 2