enrich: logo enrichment progress (JP: 1500, CZ: 40 started)
This commit is contained in:
parent
8036eb5a3f
commit
4f6ca92084
38 changed files with 1314 additions and 252 deletions
|
|
@ -7016,7 +7016,47 @@
|
|||
"JP-10-KIT-M-MESM.yaml",
|
||||
"JP-10-MAE-A-GPA.yaml",
|
||||
"JP-10-MAE-L-GLI.yaml",
|
||||
"JP-10-MAE-L-GLM.yaml"
|
||||
"JP-10-MAE-L-GLM.yaml",
|
||||
"CZ-10-BRA-L-BSS.yaml",
|
||||
"CZ-10-BRE-A-LAPNP.yaml",
|
||||
"CZ-10-BRN-A-NPUB.yaml",
|
||||
"CZ-10-BRN-L-MZKSTK.yaml",
|
||||
"CZ-10-CEL-L-SSTK.yaml",
|
||||
"CZ-10-CES-A-ACNB.yaml",
|
||||
"CZ-10-CES-A-ACSSD.yaml",
|
||||
"CZ-10-CES-A-ACT.yaml",
|
||||
"CZ-10-DOL-L-VUVSR.yaml",
|
||||
"CZ-10-DOM-A-ANK.yaml",
|
||||
"CZ-10-HUS-L-URSK.yaml",
|
||||
"CZ-10-JIC-L-SDS.yaml",
|
||||
"CZ-10-JIL-L-BVUBVLS.yaml",
|
||||
"CZ-10-LIB-L-SLSR.yaml",
|
||||
"CZ-10-LIT-A-AHMP.yaml",
|
||||
"CZ-10-PAN-L-VPBSOISK.yaml",
|
||||
"CZ-10-PET-A-ANM.yaml",
|
||||
"CZ-10-PET-A-ANTM.yaml",
|
||||
"CZ-10-PET-A-MUAAVCRVV.yaml",
|
||||
"CZ-10-PET-A-NFA.yaml",
|
||||
"CZ-10-PNM-A-ACR.yaml",
|
||||
"CZ-10-PRA-A-ABIS.yaml",
|
||||
"CZ-10-PRA-A-AKPR.yaml",
|
||||
"CZ-10-PRA-A-AMVC.yaml",
|
||||
"CZ-10-PRA-A-ANBU.yaml",
|
||||
"CZ-10-PRA-A-ANBUS.yaml",
|
||||
"CZ-10-PRA-A-ANG.yaml",
|
||||
"CZ-10-PRA-A-APCR.yaml",
|
||||
"CZ-10-PRA-A-APH.yaml",
|
||||
"CZ-10-PRA-A-APS.yaml",
|
||||
"CZ-10-PRA-A-ASMVCR.yaml",
|
||||
"CZ-10-PRA-A-AUACVV.yaml",
|
||||
"CZ-10-PRA-A-AUMAVENA.yaml",
|
||||
"CZ-10-PRA-A-AUMAVESPR-archivalie_ulozene_mimo_archivy_v_evidenci_soa_pra.yaml",
|
||||
"CZ-10-PRA-A-AUMAVESPR.yaml",
|
||||
"CZ-10-PRA-A-AUMAVESPV.yaml",
|
||||
"CZ-10-PRA-A-AUMAVESPZ.yaml",
|
||||
"CZ-10-PRA-A-AUPZSI.yaml",
|
||||
"CZ-10-PRA-A-AZMVP.yaml",
|
||||
"CZ-10-PRA-A-BAKPR.yaml"
|
||||
],
|
||||
"last_index": 59
|
||||
"last_index": 39
|
||||
}
|
||||
|
|
@ -80,9 +80,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:39Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:00:20Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:35:14Z: XXX->BRE via Wikidata Q52679463 coords (50.0860,14.3893) -> Brevnov (GeoNames:3078748)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:44:46Z: Maps: Muzeum literatury - Památník národního písemnictví (conf:
|
||||
0.95); YouTube: not found'
|
||||
- 'City resolved 2025-12-07T00:35:14Z: XXX->BRE via Wikidata Q52679463 coords (50.0860,14.3893)
|
||||
-> Brevnov (GeoNames:3078748)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:44:46Z: Maps: Muzeum literatury
|
||||
- Památník národního písemnictví (conf: 0.95); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:03Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
ch_annotator:
|
||||
|
|
@ -248,24 +249,29 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/111854463313628190722/reviews
|
||||
rating: 5
|
||||
relative_time_description: a year ago
|
||||
text: Great exposition of the history of Czech literature. Several interactive installations. A great section about oppression
|
||||
and censorship of the communiat period (just keep in mind that Czechslovakia experienced a "mild" version of communist
|
||||
occupation; in Ukraine, for example, it was way way worse). We came 1 hour before closing time and it was not enough,
|
||||
I would recommend to allocate at least 2 hours for your visit.
|
||||
text: Great exposition of the history of Czech literature. Several interactive
|
||||
installations. A great section about oppression and censorship of the communiat
|
||||
period (just keep in mind that Czechslovakia experienced a "mild" version of
|
||||
communist occupation; in Ukraine, for example, it was way way worse). We came
|
||||
1 hour before closing time and it was not enough, I would recommend to allocate
|
||||
at least 2 hours for your visit.
|
||||
publish_time: '2024-04-20T20:58:03.274534Z'
|
||||
- author_name: Evgeniia
|
||||
author_uri: https://www.google.com/maps/contrib/115318432713391005078/reviews
|
||||
rating: 4
|
||||
relative_time_description: 6 months ago
|
||||
text: Exposition about the Czech literature. Free entry. Cafe inside. WC inside as well
|
||||
text: Exposition about the Czech literature. Free entry. Cafe inside. WC inside
|
||||
as well
|
||||
publish_time: '2025-06-07T13:48:13.315370Z'
|
||||
- author_name: Julka Borghouts
|
||||
author_uri: https://www.google.com/maps/contrib/106169637951493716953/reviews
|
||||
rating: 4
|
||||
relative_time_description: 2 years ago
|
||||
text: It was interesting but I think it was because our guide who was the former director of the mudeum could say a lot
|
||||
of interesting facts. It's about a 2-3 hours visit to see everything after that you can have a nice walk/rest in the
|
||||
park. Would recommend ( idk for children because it was not interactive at all but there were cool things to see).
|
||||
text: It was interesting but I think it was because our guide who was the former
|
||||
director of the mudeum could say a lot of interesting facts. It's about a 2-3
|
||||
hours visit to see everything after that you can have a nice walk/rest in the
|
||||
park. Would recommend ( idk for children because it was not interactive at all
|
||||
but there were cool things to see).
|
||||
publish_time: '2023-08-25T16:47:39.617735Z'
|
||||
- author_name: Tetiana Yazlovetska
|
||||
author_uri: https://www.google.com/maps/contrib/111299840988431556455/reviews
|
||||
|
|
@ -279,10 +285,12 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/102002753176059146376/reviews
|
||||
rating: 3
|
||||
relative_time_description: 3 years ago
|
||||
text: I went there with my 2 years old toddler. So start was very unpleasant I was unable to take the potty to my small
|
||||
backpack (just in case) and water too. Which is big No No. But there wasn't a single person so my daughter was really
|
||||
good time. I wasn't able to read everything but somehow I had the feeling of chaotic curators work. So who knows?! There
|
||||
was everything and nothing special. But the price is super 100 CZK so perfect ❤️ and it's stroller friendly entry.
|
||||
text: I went there with my 2 years old toddler. So start was very unpleasant I
|
||||
was unable to take the potty to my small backpack (just in case) and water too.
|
||||
Which is big No No. But there wasn't a single person so my daughter was really
|
||||
good time. I wasn't able to read everything but somehow I had the feeling of
|
||||
chaotic curators work. So who knows?! There was everything and nothing special.
|
||||
But the price is super 100 CZK so perfect ❤️ and it's stroller friendly entry.
|
||||
publish_time: '2022-12-03T19:36:43.940881Z'
|
||||
opening_hours:
|
||||
open_now: false
|
||||
|
|
@ -316,11 +324,13 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.95
|
||||
entity_type: GRP.HER
|
||||
reasoning: The Google Maps candidate 'Muzeum literatury - Památník národního písemnictví' is a direct and descriptive
|
||||
name for the source institution 'Literární archiv Památníku národního písemnictví', both referencing the 'Památník národního
|
||||
písemnictví' (Museum of National Literature). The location in Prague, Czechia, matches the expected location for the
|
||||
national institution. The Google Place type 'museum' is a perfect match for the expected GRP.HER entity type. The business
|
||||
is operational and its website confirms the match.
|
||||
reasoning: The Google Maps candidate 'Muzeum literatury - Památník národního písemnictví'
|
||||
is a direct and descriptive name for the source institution 'Literární archiv
|
||||
Památníku národního písemnictví', both referencing the 'Památník národního písemnictví'
|
||||
(Museum of National Literature). The location in Prague, Czechia, matches the
|
||||
expected location for the national institution. The Google Place type 'museum'
|
||||
is a perfect match for the expected GRP.HER entity type. The business is operational
|
||||
and its website confirms the match.
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -343,3 +353,28 @@ location:
|
|||
formatted_address: Pelléova 44/22, 160 00 Praha 6-Bubeneč, Czechia
|
||||
geonames_id: 3078748
|
||||
normalization_timestamp: '2025-12-09T06:49:27.270377+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:12.679007+00:00'
|
||||
source_url: http://www.pamatniknarodnihopisemnictvi.cz/o-literarnim-archivu
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.pamatniknarodnihopisemnictvi.cz/build/images/favicons/apple-touch-icon.4ae46d90.png
|
||||
source_url: http://www.pamatniknarodnihopisemnictvi.cz/o-literarnim-archivu
|
||||
css_selector: '[document] > html > head > link:nth-of-type(8)'
|
||||
retrieved_on: '2025-12-23T20:18:12.679007+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: 180x180
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://img.pamatniknarodnihopisemnictvi.cz/userimages/og_image_scheme/1/e9ec123bd4b4af5b99049f8d9309363c_large.png
|
||||
source_url: http://www.pamatniknarodnihopisemnictvi.cz/o-literarnim-archivu
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(12)'
|
||||
retrieved_on: '2025-12-23T20:18:12.679007+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 4
|
||||
|
|
|
|||
|
|
@ -43,10 +43,11 @@ ghcid:
|
|||
latitude: 48.97447
|
||||
longitude: 14.47434
|
||||
ghcid_history:
|
||||
- previous_ghcid_component: "CB"
|
||||
new_ghcid_component: "CES"
|
||||
change_date: "2025-12-20T19:55:24Z"
|
||||
reason: "Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City: Ceske Budejovice"
|
||||
- previous_ghcid_component: CB
|
||||
new_ghcid_component: CES
|
||||
change_date: '2025-12-20T19:55:24Z'
|
||||
reason: 'Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City:
|
||||
Ceske Budejovice'
|
||||
- ghcid: CZ-10-CB-A-ACNB
|
||||
ghcid_numeric: 14327918484041920941
|
||||
valid_from: '2025-12-08T11:21:41.224062+00:00'
|
||||
|
|
@ -64,7 +65,8 @@ ghcid:
|
|||
reason: 'Region resolved via Wikidata P131: XX->10 (CZ-10)'
|
||||
- ghcid: CZ-10-CB-A-AČNB
|
||||
valid_from: '2025-12-07T00:19:32.878169+00:00'
|
||||
reason: 'Location resolved via CH-Annotator TOP.SET extraction: České -> Ceske Budejovice (GeoNames:3077916)'
|
||||
reason: 'Location resolved via CH-Annotator TOP.SET extraction: České -> Ceske
|
||||
Budejovice (GeoNames:3077916)'
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: Archiv České národní banky
|
||||
|
|
@ -93,10 +95,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:04:46Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'Location resolved 2025-12-07T00:19:32Z: CH-Annotator TOP.SET extraction ''České'' -> Ceske Budejovice (GeoNames:3077916,
|
||||
Region:31)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:44:56Z: Maps: State Regional Archive Ceske Budejovice (conf: 0.90); YouTube:
|
||||
not found'
|
||||
- 'Location resolved 2025-12-07T00:19:32Z: CH-Annotator TOP.SET extraction ''České''
|
||||
-> Ceske Budejovice (GeoNames:3077916, Region:31)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:44:56Z: Maps: State Regional Archive
|
||||
Ceske Budejovice (conf: 0.90); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:03Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
ch_annotator:
|
||||
|
|
@ -216,7 +218,8 @@ wikidata_enrichment:
|
|||
headquarters_location:
|
||||
id: Q973974
|
||||
label: Prague 1
|
||||
description: administrative district, municipal district and municipal part of Prague
|
||||
description: administrative district, municipal district and municipal part
|
||||
of Prague
|
||||
country: &id006
|
||||
id: Q213
|
||||
label: Czech Republic
|
||||
|
|
@ -240,7 +243,8 @@ google_maps_enrichment:
|
|||
coordinates:
|
||||
latitude: 48.9764677
|
||||
longitude: 14.4845555
|
||||
formatted_address: 40, Rudolfovská tř. 70, 370 01 České Budějovice-České Budějovice 1, Czechia
|
||||
formatted_address: 40, Rudolfovská tř. 70, 370 01 České Budějovice-České Budějovice
|
||||
1, Czechia
|
||||
short_address: Rudolfovská tř. 70, České Budějovice 1
|
||||
phone_local: 386 701 214
|
||||
phone_international: +420 386 701 214
|
||||
|
|
@ -256,17 +260,19 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/106223614438373881734/reviews
|
||||
rating: 5
|
||||
relative_time_description: 9 months ago
|
||||
text: Hello, we attended Eleonore's lecture yesterday and it was great. Beautiful. I have never experienced such a beautiful
|
||||
lecture.
|
||||
text: Hello, we attended Eleonore's lecture yesterday and it was great. Beautiful.
|
||||
I have never experienced such a beautiful lecture.
|
||||
publish_time: '2025-02-26T09:05:14.246458Z'
|
||||
- author_name: Jakub Bouček (Opravdové příběhy)
|
||||
author_uri: https://www.google.com/maps/contrib/109262906392895391758/reviews
|
||||
rating: 5
|
||||
relative_time_description: 7 years ago
|
||||
text: 'State archives are one of the places where you can find real information about the history of a particular region,
|
||||
and the České Budějovice archive is a good example of this.
|
||||
text: 'State archives are one of the places where you can find real information
|
||||
about the history of a particular region, and the České Budějovice archive is
|
||||
a good example of this.
|
||||
|
||||
So if you want to find out details about the history of the city and its surroundings, the archive is the right place.'
|
||||
So if you want to find out details about the history of the city and its surroundings,
|
||||
the archive is the right place.'
|
||||
publish_time: '2018-03-31T13:23:16.449Z'
|
||||
- author_name: Hana Havlova
|
||||
author_uri: https://www.google.com/maps/contrib/109085353083285723508/reviews
|
||||
|
|
@ -315,12 +321,15 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.9
|
||||
entity_type: GRP.HER
|
||||
reasoning: '1. NAME MATCH: Partial but strong match. The source is ''Archiv České národní banky'' (Archive of the Czech
|
||||
National Bank), while the candidate is ''State Regional Archive Ceske Budejovice''. Although the names differ, both
|
||||
are archives, and the source Wikidata item points to ''Státní oblastní archiv v Českých Budějovicích'', which translates
|
||||
directly to the candidate''s name. 2. LOCATION MATCH: Perfect match. Both are in České Budějovice, Czechia. 3. TYPE
|
||||
MATCH: The Google Place types (''point_of_interest'', ''establishment'') are generic, but the name ''Archive'' and the
|
||||
website (ceskearchivy.cz) confirm it is an archive, which is a valid heritage type. 4. ENTITY TYPE: The institution
|
||||
reasoning: '1. NAME MATCH: Partial but strong match. The source is ''Archiv České
|
||||
národní banky'' (Archive of the Czech National Bank), while the candidate is
|
||||
''State Regional Archive Ceske Budejovice''. Although the names differ, both
|
||||
are archives, and the source Wikidata item points to ''Státní oblastní archiv
|
||||
v Českých Budějovicích'', which translates directly to the candidate''s name.
|
||||
2. LOCATION MATCH: Perfect match. Both are in České Budějovice, Czechia. 3.
|
||||
TYPE MATCH: The Google Place types (''point_of_interest'', ''establishment'')
|
||||
are generic, but the name ''Archive'' and the website (ceskearchivy.cz) confirm
|
||||
it is an archive, which is a valid heritage type. 4. ENTITY TYPE: The institution
|
||||
is an archive, which falls under the definition of a heritage institution (GRP.HER).'
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
|
|
@ -342,7 +351,41 @@ location:
|
|||
region_code: '10'
|
||||
country: CZ
|
||||
street_address: Rudolfovská tř. 70, České Budějovice 1
|
||||
formatted_address: 40, Rudolfovská tř. 70, 370 01 České Budějovice-České Budějovice 1, Czechia
|
||||
formatted_address: 40, Rudolfovská tř. 70, 370 01 České Budějovice-České Budějovice
|
||||
1, Czechia
|
||||
geonames_id: 3077916
|
||||
feature_code: PPLA
|
||||
normalization_timestamp: '2025-12-09T06:49:27.380425+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:24.032330+00:00'
|
||||
source_url: https://www.ceskearchivy.cz/statni-okresni-archivy/ceske-budejovice/soka-cb-uvod
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: logo_url
|
||||
claim_value: https://www.ceskearchivy.cz/images/INST_logo.png
|
||||
source_url: https://www.ceskearchivy.cz/statni-okresni-archivy/ceske-budejovice/soka-cb-uvod
|
||||
css_selector: '#mod-custom206 > p > a > img'
|
||||
retrieved_on: '2025-12-23T20:18:24.032330+00:00'
|
||||
extraction_method: crawl4ai_header_logo
|
||||
detection_confidence: high
|
||||
alt_text: ''
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.ceskearchivy.cz/favicon.ico
|
||||
source_url: https://www.ceskearchivy.cz/statni-okresni-archivy/ceske-budejovice/soka-cb-uvod
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:18:24.032330+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: image/vnd.microsoft.icon
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://www.ceskearchivy.cz/administrator/cache/preview/80b931d60f4be56fcd0c341aab8b9bc2.jpg
|
||||
source_url: https://www.ceskearchivy.cz/statni-okresni-archivy/ceske-budejovice/soka-cb-uvod
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:18:24.032330+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 3
|
||||
has_primary_logo: true
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -80,8 +80,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:01:20Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:28:15Z: XXX->LIT via Wikidata Q19672898 coords (50.0400,14.4949) -> Litochleby (GeoNames:3071686)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:16Z: Maps: Prague City Archives (conf: 1.00); YouTube: not found'
|
||||
- 'City resolved 2025-12-07T00:28:15Z: XXX->LIT via Wikidata Q19672898 coords (50.0400,14.4949)
|
||||
-> Litochleby (GeoNames:3071686)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:16Z: Maps: Prague City Archives
|
||||
(conf: 1.00); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:03Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
ch_annotator:
|
||||
|
|
@ -203,7 +205,8 @@ wikidata_enrichment:
|
|||
description: accumulation of historical records of a town or city
|
||||
- id: Q2085381
|
||||
label: publishing company
|
||||
description: company that prints and distributes pressed goods or electronic media
|
||||
description: company that prints and distributes pressed goods or electronic
|
||||
media
|
||||
wikidata_instance_of: *id005
|
||||
wikidata_location:
|
||||
country: &id006
|
||||
|
|
@ -262,7 +265,8 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/109561705096115465097/reviews
|
||||
rating: 5
|
||||
relative_time_description: a year ago
|
||||
text: The building dates back to 1995, it's nice and very interesting. I recommend visiting the archive.
|
||||
text: The building dates back to 1995, it's nice and very interesting. I recommend
|
||||
visiting the archive.
|
||||
publish_time: '2024-04-21T15:15:20.793882Z'
|
||||
- author_name: Libor Šedivý
|
||||
author_uri: https://www.google.com/maps/contrib/112581391891260052369/reviews
|
||||
|
|
@ -274,20 +278,25 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/102671748185462032746/reviews
|
||||
rating: 3
|
||||
relative_time_description: 6 years ago
|
||||
text: You need to communicate at least two weeks before the planned visit so that you don't lose out unnecessarily. You
|
||||
won't get anything at first... In addition, you need to take into account that they will present you with 5 archival
|
||||
units in one day, i.e. not 5 cartons, but only 5 folders, regardless of whether it is a bookmark with one sheet or a
|
||||
package with hundreds of documents... The ambition to complete more extensive research requires much more time than
|
||||
we are used to in other archives. So much luck, patience and research happiness! 🍀
|
||||
text: You need to communicate at least two weeks before the planned visit so that
|
||||
you don't lose out unnecessarily. You won't get anything at first... In addition,
|
||||
you need to take into account that they will present you with 5 archival units
|
||||
in one day, i.e. not 5 cartons, but only 5 folders, regardless of whether it
|
||||
is a bookmark with one sheet or a package with hundreds of documents... The
|
||||
ambition to complete more extensive research requires much more time than we
|
||||
are used to in other archives. So much luck, patience and research happiness!
|
||||
🍀
|
||||
publish_time: '2019-12-01T07:46:56.111948Z'
|
||||
- author_name: Miroslav Havel
|
||||
author_uri: https://www.google.com/maps/contrib/109030248799737237070/reviews
|
||||
rating: 5
|
||||
relative_time_description: 7 years ago
|
||||
text: A modern archive building built in the 1990s, I was lucky enough to see the facilities. Willing and quirky workers,
|
||||
the profession of archivist probably requires a certain amount of perspective, a sense of humor and self-irony. If you
|
||||
come to a professional course, you can look forward not only to insightful information, but also to great comments and
|
||||
glosses on current events. It's worth it. 😉
|
||||
text: A modern archive building built in the 1990s, I was lucky enough to see
|
||||
the facilities. Willing and quirky workers, the profession of archivist probably
|
||||
requires a certain amount of perspective, a sense of humor and self-irony. If
|
||||
you come to a professional course, you can look forward not only to insightful
|
||||
information, but also to great comments and glosses on current events. It's
|
||||
worth it. 😉
|
||||
publish_time: '2018-03-17T09:34:30.117Z'
|
||||
opening_hours:
|
||||
open_now: false
|
||||
|
|
@ -321,10 +330,12 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 1.0
|
||||
entity_type: GRP.HER
|
||||
reasoning: Strong match confirmed by name translation, identical official website, and correct location. The name 'Archiv
|
||||
hlavního města Prahy' translates to 'Prague City Archives'. The source city is Prague, and the candidate address is
|
||||
in Praha 4 (Prague 4), Czechia. The provided website 'http://www.ahmp.cz/' matches the institution's official domain.
|
||||
While Google Place types are generic, the context and website confirm it is an archive, fitting the GRP.HER entity type.
|
||||
reasoning: Strong match confirmed by name translation, identical official website,
|
||||
and correct location. The name 'Archiv hlavního města Prahy' translates to 'Prague
|
||||
City Archives'. The source city is Prague, and the candidate address is in Praha
|
||||
4 (Prague 4), Czechia. The provided website 'http://www.ahmp.cz/' matches the
|
||||
institution's official domain. While Google Place types are generic, the context
|
||||
and website confirm it is an archive, fitting the GRP.HER entity type.
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -347,3 +358,22 @@ location:
|
|||
formatted_address: Archivní 1280/6, 149 00 Praha 4-Chodov, Czechia
|
||||
geonames_id: 3071686
|
||||
normalization_timestamp: '2025-12-09T06:49:27.606055+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:48.316223+00:00'
|
||||
source_url: http://www.ahmp.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.ahmp.cz/img/ahmp_favicon.ico
|
||||
source_url: http://www.ahmp.cz
|
||||
css_selector: '[document] > html > head > link'
|
||||
retrieved_on: '2025-12-23T20:18:48.316223+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -39,10 +39,11 @@ ghcid:
|
|||
city_label: Pelc Tyrolka
|
||||
geonames_id: 3068455
|
||||
ghcid_history:
|
||||
- previous_ghcid_component: "PT"
|
||||
new_ghcid_component: "PET"
|
||||
change_date: "2025-12-20T19:55:24Z"
|
||||
reason: "Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City: Pelc Tyrolka"
|
||||
- previous_ghcid_component: PT
|
||||
new_ghcid_component: PET
|
||||
change_date: '2025-12-20T19:55:24Z'
|
||||
reason: 'Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City:
|
||||
Pelc Tyrolka'
|
||||
- ghcid: XX-XX-XXX-A-ANM
|
||||
ghcid_numeric: 18249419148031109659
|
||||
valid_from: '2025-12-06T23:37:44.753389+00:00'
|
||||
|
|
@ -84,7 +85,8 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-06T23:59:55Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:31:35Z: XXX->PT via Wikidata Q25228907 coords (50.1100,14.4347) -> Pelc Tyrolka (GeoNames:3068455)'
|
||||
- 'City resolved 2025-12-07T00:31:35Z: XXX->PT via Wikidata Q25228907 coords (50.1100,14.4347)
|
||||
-> Pelc Tyrolka (GeoNames:3068455)'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:08Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:31Z
|
||||
- 'YouTube/Google Maps enrichment 2025-12-09T09:29:38Z: YouTube: not found'
|
||||
|
|
@ -115,8 +117,8 @@ ch_annotator:
|
|||
annotation_metadata:
|
||||
confidence_score: 0.85
|
||||
verified: false
|
||||
verification_date:
|
||||
verified_by:
|
||||
verification_date: null
|
||||
verified_by: null
|
||||
entity_claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Archiv Národního muzea
|
||||
|
|
@ -189,8 +191,8 @@ wikidata_enrichment:
|
|||
instance_of: &id005
|
||||
- id: Q53566456
|
||||
label: museum archive
|
||||
description: archive established by a museum to collect, organize, preserve, and provide access to its organizational
|
||||
records
|
||||
description: archive established by a museum to collect, organize, preserve,
|
||||
and provide access to its organizational records
|
||||
- id: Q101470010
|
||||
label: specialized archives
|
||||
description: type of archives in Czechia
|
||||
|
|
@ -244,3 +246,28 @@ location:
|
|||
youtube_status: NOT_FOUND
|
||||
youtube_search_query: Archiv Národního muzea official
|
||||
youtube_search_timestamp: '2025-12-09T09:29:38.113936+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:56.138622+00:00'
|
||||
source_url: http://www.nm.cz/Studovny-a-badatelny/Archiv-Narodniho-muzea-studovna
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.nm.cz/file/ad1e3d8b659d4c5536c61a5d693fed81/4/favicon/nmicon.png
|
||||
source_url: http://www.nm.cz/Studovny-a-badatelny/Archiv-Narodniho-muzea-studovna
|
||||
css_selector: '[document] > html > head > link:nth-of-type(5)'
|
||||
retrieved_on: '2025-12-23T20:18:56.138622+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: image/png
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://www.nm.cz/file/0df9b1550064a10e390fd23d2a739d27/3293/Archiv_foto_terezim.jpg.jpg
|
||||
source_url: http://www.nm.cz/Studovny-a-badatelny/Archiv-Narodniho-muzea-studovna
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(4)'
|
||||
retrieved_on: '2025-12-23T20:18:56.138622+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -39,10 +39,11 @@ ghcid:
|
|||
city_label: Pelc Tyrolka
|
||||
geonames_id: 3068455
|
||||
ghcid_history:
|
||||
- previous_ghcid_component: "PT"
|
||||
new_ghcid_component: "PET"
|
||||
change_date: "2025-12-20T19:55:24Z"
|
||||
reason: "Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City: Pelc Tyrolka"
|
||||
- previous_ghcid_component: PT
|
||||
new_ghcid_component: PET
|
||||
change_date: '2025-12-20T19:55:24Z'
|
||||
reason: 'Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City:
|
||||
Pelc Tyrolka'
|
||||
- ghcid: XX-XX-XXX-A-ANTM
|
||||
ghcid_numeric: 9067919020428215504
|
||||
valid_from: '2025-12-06T23:37:44.282267+00:00'
|
||||
|
|
@ -84,7 +85,8 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:38Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:04:25Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:32:04Z: XXX->PT via Wikidata Q101474199 coords (50.1100,14.4347) -> Pelc Tyrolka (GeoNames:3068455)'
|
||||
- 'City resolved 2025-12-07T00:32:04Z: XXX->PT via Wikidata Q101474199 coords (50.1100,14.4347)
|
||||
-> Pelc Tyrolka (GeoNames:3068455)'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:08Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:31Z
|
||||
- 'YouTube/Google Maps enrichment 2025-12-09T09:29:38Z: YouTube: not found'
|
||||
|
|
@ -115,8 +117,8 @@ ch_annotator:
|
|||
annotation_metadata:
|
||||
confidence_score: 0.85
|
||||
verified: false
|
||||
verification_date:
|
||||
verified_by:
|
||||
verification_date: null
|
||||
verified_by: null
|
||||
entity_claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Archiv Národního technického muzea
|
||||
|
|
@ -181,8 +183,8 @@ wikidata_enrichment:
|
|||
description: type of archives in Czechia
|
||||
- id: Q53566456
|
||||
label: museum archive
|
||||
description: archive established by a museum to collect, organize, preserve, and provide access to its organizational
|
||||
records
|
||||
description: archive established by a museum to collect, organize, preserve,
|
||||
and provide access to its organizational records
|
||||
wikidata_instance_of: *id005
|
||||
wikidata_location:
|
||||
headquarters_location:
|
||||
|
|
@ -222,3 +224,22 @@ location:
|
|||
youtube_status: NOT_FOUND
|
||||
youtube_search_query: Archiv Národního technického muzea official
|
||||
youtube_search_timestamp: '2025-12-09T09:29:38.777578+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:02.872590+00:00'
|
||||
source_url: https://www.ntm.cz/archiv-knihovna/archiv-ntm
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.ntm.cz/file/30dc8e5fefba6ceba5d690d796c861ec/2220/favicon/NTM%20EN%20%C4%8Derven%C3%A1%20negativ.png
|
||||
source_url: https://www.ntm.cz/archiv-knihovna/archiv-ntm
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:02.872590+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: image/png
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -279,3 +279,28 @@ youtube_status: NOT_FOUND
|
|||
youtube_search_query: Masarykův ústav a Archiv Akademie věd České republiky v.v.i.
|
||||
official
|
||||
youtube_search_timestamp: '2025-12-09T09:29:39.442991+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:08.054373+00:00'
|
||||
source_url: http://www.mua.cas.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.mua.cas.cz/build/favicon/safari-pinned-tab.svg
|
||||
source_url: http://www.mua.cas.cz
|
||||
css_selector: '[document] > html > head > link:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:08.054373+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://mua.greendot.cz/build/img/hp-hero.jpg
|
||||
source_url: http://www.mua.cas.cz
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(10)'
|
||||
retrieved_on: '2025-12-23T20:19:08.054373+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 4
|
||||
|
|
|
|||
|
|
@ -39,10 +39,11 @@ ghcid:
|
|||
city_label: Pelc Tyrolka
|
||||
geonames_id: 3068455
|
||||
ghcid_history:
|
||||
- previous_ghcid_component: "PT"
|
||||
new_ghcid_component: "PET"
|
||||
change_date: "2025-12-20T19:57:18Z"
|
||||
reason: "Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City: Pelc Tyrolka"
|
||||
- previous_ghcid_component: PT
|
||||
new_ghcid_component: PET
|
||||
change_date: '2025-12-20T19:57:18Z'
|
||||
reason: 'Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City:
|
||||
Pelc Tyrolka'
|
||||
- ghcid: XX-XX-XXX-A-NFA
|
||||
ghcid_numeric: 15166324295331575978
|
||||
valid_from: '2025-12-06T23:37:43.718883+00:00'
|
||||
|
|
@ -295,3 +296,28 @@ location:
|
|||
youtube_status: NOT_FOUND
|
||||
youtube_search_query: Národní filmový archiv official
|
||||
youtube_search_timestamp: '2025-12-09T09:29:40.114231+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:13.185638+00:00'
|
||||
source_url: https://nfa.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://nfa.cz/safari-pinned-tab.svg
|
||||
source_url: https://nfa.cz
|
||||
css_selector: '[document] > html.no-js.show--consent > head > link:nth-of-type(6)'
|
||||
retrieved_on: '2025-12-23T20:19:13.185638+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://nfa.cz/dokumenty/74703/image-thumb__74703__OGImage/zastupny-obrazek-open-graph.jpg
|
||||
source_url: https://nfa.cz
|
||||
css_selector: '[document] > html.no-js.show--consent > head > meta:nth-of-type(11)'
|
||||
retrieved_on: '2025-12-23T20:19:13.185638+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 5
|
||||
|
|
|
|||
|
|
@ -35,10 +35,11 @@ ghcid:
|
|||
city_label: Praha-Nove Mesto
|
||||
geonames_id: 11839017
|
||||
ghcid_history:
|
||||
- previous_ghcid_component: "PM"
|
||||
new_ghcid_component: "PNM"
|
||||
change_date: "2025-12-20T19:55:24Z"
|
||||
reason: "Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City: Praha-Nove Mesto"
|
||||
- previous_ghcid_component: PM
|
||||
new_ghcid_component: PNM
|
||||
change_date: '2025-12-20T19:55:24Z'
|
||||
reason: 'Fixed 2-letter city code to proper 3-letter code per AGENTS.md. City:
|
||||
Praha-Nove Mesto'
|
||||
- ghcid: CZ-10-PM-A-ACR
|
||||
ghcid_numeric: 5300371129343583721
|
||||
valid_from: '2025-12-08T11:21:33.063067+00:00'
|
||||
|
|
@ -56,7 +57,8 @@ ghcid:
|
|||
reason: 'Region resolved via Wikidata P131: XX->10 (CZ-10)'
|
||||
- ghcid: CZ-10-PM-A-AČR
|
||||
valid_from: '2025-12-07T00:27:25.913910+00:00'
|
||||
reason: 'City resolved via Wikidata Q28563975 coordinates: XXX->PM (Praha-Nove Mesto)'
|
||||
reason: 'City resolved via Wikidata Q28563975 coordinates: XXX->PM (Praha-Nove
|
||||
Mesto)'
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: Archiv Českého rozhlasu
|
||||
|
|
@ -84,8 +86,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:39Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:00:58Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:27:25Z: XXX->PM via Wikidata Q28563975 coords (50.0742,14.4428) -> Praha-Nove Mesto (GeoNames:11839017)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:47Z: Maps: rejected by LLM; YouTube: not found'
|
||||
- 'City resolved 2025-12-07T00:27:25Z: XXX->PM via Wikidata Q28563975 coords (50.0742,14.4428)
|
||||
-> Praha-Nove Mesto (GeoNames:11839017)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:47Z: Maps: rejected by LLM; YouTube:
|
||||
not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:03Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
ch_annotator:
|
||||
|
|
@ -115,8 +119,8 @@ ch_annotator:
|
|||
annotation_metadata:
|
||||
confidence_score: 0.85
|
||||
verified: false
|
||||
verification_date:
|
||||
verified_by:
|
||||
verification_date: null
|
||||
verified_by: null
|
||||
entity_claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Archiv Českého rozhlasu
|
||||
|
|
@ -195,7 +199,8 @@ wikidata_enrichment:
|
|||
headquarters_location:
|
||||
id: Q2444636
|
||||
label: Prague 2
|
||||
description: administrative district, municipal district and municipal part of Prague
|
||||
description: administrative district, municipal district and municipal part
|
||||
of Prague
|
||||
wikidata_country: *id005
|
||||
wikidata_located_in: *id006
|
||||
wikidata_organization:
|
||||
|
|
@ -214,10 +219,12 @@ wikidata_enrichment:
|
|||
google_maps_status: NO_MATCH
|
||||
google_maps_rejected:
|
||||
candidate_name: Český rozhlas
|
||||
rejection_reason: The Google Maps candidate 'Český rozhlas' is the main broadcasting corporation (Czech Radio), not the
|
||||
specific 'Archiv Českého rozhlasu' (Czech Radio Archives). Although the archive is part of this organization and located
|
||||
at the same address, they are distinct entities. The Google entry's types ('point_of_interest', 'establishment') do not
|
||||
specify an archive or other heritage institution, leading to a mismatch with the target entity.
|
||||
rejection_reason: The Google Maps candidate 'Český rozhlas' is the main broadcasting
|
||||
corporation (Czech Radio), not the specific 'Archiv Českého rozhlasu' (Czech Radio
|
||||
Archives). Although the archive is part of this organization and located at the
|
||||
same address, they are distinct entities. The Google entry's types ('point_of_interest',
|
||||
'establishment') do not specify an archive or other heritage institution, leading
|
||||
to a mismatch with the target entity.
|
||||
timestamp: '2025-12-08T19:45:46.803336+00:00'
|
||||
youtube_status: NOT_FOUND
|
||||
youtube_search_query: Archiv Českého rozhlasu official
|
||||
|
|
@ -237,3 +244,28 @@ location:
|
|||
original_timestamp: '2025-12-09T15:34:38.871222+00:00'
|
||||
geonames_name: Praha-Nové Město
|
||||
feature_code: PPL
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:18.778759+00:00'
|
||||
source_url: https://informace.rozhlas.cz/sluzby-archivu-7965113
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://informace.rozhlas.cz/sites/all/themes/custom/e7/apple-touch-icon-precomposed-144x144.png
|
||||
source_url: https://informace.rozhlas.cz/sluzby-archivu-7965113
|
||||
css_selector: '[document] > html.js.show--consent > head > link:nth-of-type(4)'
|
||||
retrieved_on: '2025-12-23T20:19:18.778759+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: 144x144
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://portal.rozhlas.cz/sites/default/files/images/02923537.jpeg
|
||||
source_url: https://informace.rozhlas.cz/sluzby-archivu-7965113
|
||||
css_selector: '[document] > html.js.show--consent > head > meta:nth-of-type(15)'
|
||||
retrieved_on: '2025-12-23T20:19:18.778759+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 5
|
||||
|
|
|
|||
|
|
@ -75,9 +75,12 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:39Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:00:05Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:25:25Z: XXX->PRA via Wikidata Q46996293 coords (50.0913,14.4037) -> Prague (GeoNames:3067696)'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.447081+00:00. Re-enrichment required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:54Z: Maps: rejected by LLM; YouTube: not found'
|
||||
- 'City resolved 2025-12-07T00:25:25Z: XXX->PRA via Wikidata Q46996293 coords (50.0913,14.4037)
|
||||
-> Prague (GeoNames:3067696)'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.447081+00:00. Re-enrichment
|
||||
required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:54Z: Maps: rejected by LLM; YouTube:
|
||||
not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
ch_annotator:
|
||||
|
|
@ -107,8 +110,8 @@ ch_annotator:
|
|||
annotation_metadata:
|
||||
confidence_score: 0.85
|
||||
verified: false
|
||||
verification_date:
|
||||
verified_by:
|
||||
verification_date: null
|
||||
verified_by: null
|
||||
entity_claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Archiv Kanceláře prezidenta republiky
|
||||
|
|
@ -210,10 +213,11 @@ wikidata_enrichment:
|
|||
google_maps_status: NO_MATCH
|
||||
google_maps_rejected:
|
||||
candidate_name: Archiv Pražského hradu
|
||||
rejection_reason: The source institution 'Archiv Kanceláře prezidenta republiky' (Archive of the Office of the President
|
||||
of the Republic) and the Google Maps candidate 'Archiv Pražského hradu' (Prague Castle Archive) are two distinct entities.
|
||||
The former is the presidential archive, while the latter is the archive of Prague Castle. While both are archives located
|
||||
in Prague, they are not the same institution.
|
||||
rejection_reason: The source institution 'Archiv Kanceláře prezidenta republiky'
|
||||
(Archive of the Office of the President of the Republic) and the Google Maps candidate
|
||||
'Archiv Pražského hradu' (Prague Castle Archive) are two distinct entities. The
|
||||
former is the presidential archive, while the latter is the archive of Prague
|
||||
Castle. While both are archives located in Prague, they are not the same institution.
|
||||
timestamp: '2025-12-08T19:45:54.624391+00:00'
|
||||
youtube_status: NOT_FOUND
|
||||
youtube_search_query: Archiv Kanceláře prezidenta republiky official
|
||||
|
|
@ -233,3 +237,22 @@ location:
|
|||
original_timestamp: '2025-12-09T15:34:38.958575+00:00'
|
||||
geonames_name: Prague
|
||||
feature_code: PPLC
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:26.299080+00:00'
|
||||
source_url: http://www.prazskyhradarchiv.cz/archivKPR/cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.prazskyhradarchiv.cz/img/safari-pinned-tab.svg
|
||||
source_url: http://www.prazskyhradarchiv.cz/archivKPR/cz
|
||||
css_selector: '[document] > html.js > head > link:nth-of-type(5)'
|
||||
retrieved_on: '2025-12-23T20:19:26.299080+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 4
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ ghcid:
|
|||
reason: 'Country resolved via Wikidata P17: XX→CZ'
|
||||
- ghcid: CZ-10-PRA-A-AMVČ
|
||||
valid_from: '2025-12-07T12:39:42.482491+00:00'
|
||||
reason: 'Location resolved from institution name pattern: ''Prague'' → region 10, city PRA'
|
||||
reason: 'Location resolved from institution name pattern: ''Prague'' → region
|
||||
10, city PRA'
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: Archiv Ministerstva vnitra ČR
|
||||
|
|
@ -67,7 +68,8 @@ provenance:
|
|||
confidence_score: 0.85
|
||||
notes:
|
||||
- 'Country resolved 2025-12-06T23:56:11Z: XX→CZ via Wikidata P17'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:59Z: Maps: National Archive (conf: 0.80); YouTube: not found'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:45:59Z: Maps: National Archive (conf:
|
||||
0.80); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
|
|
@ -150,15 +152,16 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/107425710254765644916/reviews
|
||||
rating: 5
|
||||
relative_time_description: 10 months ago
|
||||
text: The staff are always very friendly. There's even a lady who speaks English very well. Even without the language,
|
||||
you can communicate. Everyone is helpful and extremely customer-friendly. I enjoy coming here.
|
||||
text: The staff are always very friendly. There's even a lady who speaks English
|
||||
very well. Even without the language, you can communicate. Everyone is helpful
|
||||
and extremely customer-friendly. I enjoy coming here.
|
||||
publish_time: '2025-02-06T21:43:14.918876Z'
|
||||
- author_name: Barunka G.
|
||||
author_uri: https://www.google.com/maps/contrib/102003756317069132015/reviews
|
||||
rating: 4
|
||||
relative_time_description: 5 years ago
|
||||
text: In the research room of the 1st department (Milady Horákové Street), they are very helpful and willing to help you
|
||||
search for archival materials.
|
||||
text: In the research room of the 1st department (Milady Horákové Street), they
|
||||
are very helpful and willing to help you search for archival materials.
|
||||
publish_time: '2020-08-12T16:42:30.619293Z'
|
||||
- author_name: Kamila Svobodova
|
||||
author_uri: https://www.google.com/maps/contrib/109299302074939194601/reviews
|
||||
|
|
@ -210,12 +213,15 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.8
|
||||
entity_type: GRP.HER
|
||||
reasoning: 'NAME MATCH: The candidate name ''National Archive'' is a general but plausible English translation for the
|
||||
source ''Archiv Ministerstva vnitra ČR''. The website nacr.cz confirms it is the National Archives of the Czech Republic,
|
||||
which aligns with the source being a ministry archive. LOCATION MATCH: The address is in Praha (Prague), Czechia, matching
|
||||
the expected country ''CZ''. TYPE MATCH: The Google Place type ''library'' is an acceptable heritage type. The website
|
||||
confirms it is an archive institution. ENTITY TYPE: It is a heritage institution (archive). Confidence is not 1.0 due
|
||||
to the generic English name and lack of a more direct name translation, but the evidence strongly supports a match.'
|
||||
reasoning: 'NAME MATCH: The candidate name ''National Archive'' is a general but
|
||||
plausible English translation for the source ''Archiv Ministerstva vnitra ČR''.
|
||||
The website nacr.cz confirms it is the National Archives of the Czech Republic,
|
||||
which aligns with the source being a ministry archive. LOCATION MATCH: The address
|
||||
is in Praha (Prague), Czechia, matching the expected country ''CZ''. TYPE MATCH:
|
||||
The Google Place type ''library'' is an acceptable heritage type. The website
|
||||
confirms it is an archive institution. ENTITY TYPE: It is a heritage institution
|
||||
(archive). Confidence is not 1.0 due to the generic English name and lack of
|
||||
a more direct name translation, but the evidence strongly supports a match.'
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -236,3 +242,38 @@ location:
|
|||
street_address: M. Horákové 5, Praha 6-Hradčany
|
||||
formatted_address: 133, M. Horákové 5, 160 00 Praha 6-Hradčany, Czechia
|
||||
normalization_timestamp: '2025-12-09T06:49:27.868521+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:31.631386+00:00'
|
||||
source_url: http://www.nacr.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: logo_url
|
||||
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/logo_na_en.png
|
||||
source_url: http://www.nacr.cz
|
||||
css_selector: '#wrapper-navbar > header.header > div.header__main:nth-of-type(2)
|
||||
> nav.navbar.navbar-expand-lg > div.navbar-mobile-top > div.navbar-brand > a
|
||||
> img'
|
||||
retrieved_on: '2025-12-23T20:19:31.631386+00:00'
|
||||
extraction_method: crawl4ai_header_logo
|
||||
detection_confidence: high
|
||||
alt_text: National Archives
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/favicon/apple-icon-180x180.png
|
||||
source_url: http://www.nacr.cz
|
||||
css_selector: '[document] > html > head > link:nth-of-type(9)'
|
||||
retrieved_on: '2025-12-23T20:19:31.631386+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: 180x180
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://www.nacr.cz/wp-content/uploads/2020/04/podatelna2_22-1.jpg
|
||||
source_url: http://www.nacr.cz
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(17)'
|
||||
retrieved_on: '2025-12-23T20:19:31.631386+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 3
|
||||
has_primary_logo: true
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 13
|
||||
|
|
|
|||
|
|
@ -30,7 +30,8 @@ ghcid:
|
|||
city_code: PRA
|
||||
method: WIKIDATA_LOCATION_RESEARCH
|
||||
resolution_timestamp: '2025-12-06T23:54:40.395769+00:00'
|
||||
resolution_notes: National Security Authority Archive in Prague (national security office)
|
||||
resolution_notes: National Security Authority Archive in Prague (national security
|
||||
office)
|
||||
ghcid_history:
|
||||
- ghcid: CZ-10-PRA-A-ANBU
|
||||
ghcid_numeric: 16528725694186928927
|
||||
|
|
@ -73,8 +74,10 @@ provenance:
|
|||
confidence_score: 0.85
|
||||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.454454+00:00. Re-enrichment required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:08Z: Maps: National Security Authority (conf: 1.00); YouTube: not found'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.454454+00:00. Re-enrichment
|
||||
required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:08Z: Maps: National Security
|
||||
Authority (conf: 1.00); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
ch_annotator:
|
||||
|
|
@ -217,10 +220,12 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/105428557309487306330/reviews
|
||||
rating: 4
|
||||
relative_time_description: 6 years ago
|
||||
text: Beer was already being brewed here at the end of the 18th century, and the Košířský brewery was successful. At the
|
||||
end of the 19th century, its annual output was 40,000 hl. After the First World War, things got worse and worse, and
|
||||
competition grew stronger. The end came in 1934... The buildings themselves survived the change of owners and are in
|
||||
good condition today. They represent a nice example of industrial buildings of the time, even with a preserved factory
|
||||
text: Beer was already being brewed here at the end of the 18th century, and the
|
||||
Košířský brewery was successful. At the end of the 19th century, its annual
|
||||
output was 40,000 hl. After the First World War, things got worse and worse,
|
||||
and competition grew stronger. The end came in 1934... The buildings themselves
|
||||
survived the change of owners and are in good condition today. They represent
|
||||
a nice example of industrial buildings of the time, even with a preserved factory
|
||||
chimney.
|
||||
publish_time: '2019-10-17T15:05:18.213430Z'
|
||||
- author_name: Ivo Novotný
|
||||
|
|
@ -273,12 +278,15 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 1.0
|
||||
entity_type: GRP.HER
|
||||
reasoning: 'The Google Maps place is a match. 1. NAME MATCH: ''National Security Authority'' is the English translation
|
||||
of ''Národní bezpečnostního úřadu''. The source name, ''Archiv Národního bezpečnostního úřadu'', identifies this place
|
||||
as the archive of that authority. The website ''nbu.cz'' confirms the identity. 2. LOCATION MATCH: The address is in
|
||||
Praha, Czechia, matching the source country (CZ). 3. TYPE MATCH: While the Google Place type is ''local_government_office'',
|
||||
this is not grounds for rejection. The source explicitly identifies the institution as an ''Archiv'', which falls under
|
||||
the GRP.HER definition. 4. ENTITY TYPE: The institution is an archive, a type of heritage custodian.'
|
||||
reasoning: 'The Google Maps place is a match. 1. NAME MATCH: ''National Security
|
||||
Authority'' is the English translation of ''Národní bezpečnostního úřadu''.
|
||||
The source name, ''Archiv Národního bezpečnostního úřadu'', identifies this
|
||||
place as the archive of that authority. The website ''nbu.cz'' confirms the
|
||||
identity. 2. LOCATION MATCH: The address is in Praha, Czechia, matching the
|
||||
source country (CZ). 3. TYPE MATCH: While the Google Place type is ''local_government_office'',
|
||||
this is not grounds for rejection. The source explicitly identifies the institution
|
||||
as an ''Archiv'', which falls under the GRP.HER definition. 4. ENTITY TYPE:
|
||||
The institution is an archive, a type of heritage custodian.'
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -299,3 +307,22 @@ location:
|
|||
street_address: Na Popelce 16/2, Praha 5
|
||||
formatted_address: Na Popelce 16/2, 150 06 Praha 5, Czechia
|
||||
normalization_timestamp: '2025-12-09T06:49:27.913673+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:36.697627+00:00'
|
||||
source_url: http://www.nbu.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.nbu.cz/templates/nbu/favicon.ico
|
||||
source_url: http://www.nbu.cz
|
||||
css_selector: '[document] > html > head > link:nth-of-type(2)'
|
||||
retrieved_on: '2025-12-23T20:19:36.697627+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: image/vnd.microsoft.icon
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ ghcid:
|
|||
reason: 'Country resolved via Wikidata P17: XX→CZ'
|
||||
- ghcid: CZ-10-PRA-A-ANBÚS
|
||||
valid_from: '2025-12-07T12:39:42.484117+00:00'
|
||||
reason: 'Location resolved from institution name pattern: ''Prague'' → region 10, city PRA'
|
||||
reason: 'Location resolved from institution name pattern: ''Prague'' → region
|
||||
10, city PRA'
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: Archiv Národního bezpečnostního úřadu - specializovaný
|
||||
|
|
@ -67,8 +68,9 @@ provenance:
|
|||
confidence_score: 0.85
|
||||
notes:
|
||||
- 'Country resolved 2025-12-06T23:56:12Z: XX→CZ via Wikidata P17'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:12Z: Maps: Ústav pro studium totalitních režimů - Archiv bezpečnostních
|
||||
složek - Badatelna Na Struze (conf: 0.95); YouTube: not found'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:12Z: Maps: Ústav pro studium
|
||||
totalitních režimů - Archiv bezpečnostních složek - Badatelna Na Struze (conf:
|
||||
0.95); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:27Z
|
||||
|
|
@ -128,7 +130,8 @@ ch_annotator:
|
|||
creation_method: create_custodian_from_ch_annotator.py
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJz-raT_CUC0cRzgWQGob4kwc
|
||||
name: Ústav pro studium totalitních režimů - Archiv bezpečnostních složek - Badatelna Na Struze
|
||||
name: Ústav pro studium totalitních režimů - Archiv bezpečnostních složek - Badatelna
|
||||
Na Struze
|
||||
fetch_timestamp: '2025-12-08T19:46:09.193225+00:00'
|
||||
api_status: OK
|
||||
coordinates:
|
||||
|
|
@ -150,36 +153,41 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/103022670456700368685/reviews
|
||||
rating: 5
|
||||
relative_time_description: 6 years ago
|
||||
text: An office where people with passion for the cause work. Experts who can advise and help with research as much as
|
||||
possible. Safety rules are bearable and understandable at the given place. An agreed meeting at a specific time in which
|
||||
a specialist in the given issue will also be dedicated to you. This is how every office should work.
|
||||
text: An office where people with passion for the cause work. Experts who can
|
||||
advise and help with research as much as possible. Safety rules are bearable
|
||||
and understandable at the given place. An agreed meeting at a specific time
|
||||
in which a specialist in the given issue will also be dedicated to you. This
|
||||
is how every office should work.
|
||||
publish_time: '2018-12-11T20:04:25.713896388Z'
|
||||
- author_name: Salziger Reiter
|
||||
author_uri: https://www.google.com/maps/contrib/109315899389057527570/reviews
|
||||
rating: 5
|
||||
relative_time_description: a year ago
|
||||
text: Very helpful and accommodating employees. The request was processed quite quickly and the handover took place as
|
||||
agreed.
|
||||
text: Very helpful and accommodating employees. The request was processed quite
|
||||
quickly and the handover took place as agreed.
|
||||
publish_time: '2024-08-03T15:09:18.204573Z'
|
||||
- author_name: Michala Pickova
|
||||
author_uri: https://www.google.com/maps/contrib/105350789437390454077/reviews
|
||||
rating: 5
|
||||
relative_time_description: 7 years ago
|
||||
text: An official miracle. Pleasant people from the gatekeeper to the research room, extremely helpful and willing, the
|
||||
ladies in the research room were very nice. More such miracles ☺
|
||||
text: An official miracle. Pleasant people from the gatekeeper to the research
|
||||
room, extremely helpful and willing, the ladies in the research room were very
|
||||
nice. More such miracles ☺
|
||||
publish_time: '2018-07-17T12:48:37.042Z'
|
||||
- author_name: Michaela Blaháková
|
||||
author_uri: https://www.google.com/maps/contrib/103600310747938101580/reviews
|
||||
rating: 5
|
||||
relative_time_description: 2 years ago
|
||||
text: Communication was perfect, everyone was very nice and helpful, nothing was a problem and on the contrary they advised
|
||||
and helped with everything. Thank you very much.
|
||||
text: Communication was perfect, everyone was very nice and helpful, nothing was
|
||||
a problem and on the contrary they advised and helped with everything. Thank
|
||||
you very much.
|
||||
publish_time: '2022-12-09T21:22:32.106182Z'
|
||||
- author_name: Vasil “Ben Lee Meier” Mohorita
|
||||
author_uri: https://www.google.com/maps/contrib/108576684499626840750/reviews
|
||||
rating: 5
|
||||
relative_time_description: 5 years ago
|
||||
text: Thank you very much for your help in finding and subsequently publishing information and facts about Operation Benjamin...
|
||||
text: Thank you very much for your help in finding and subsequently publishing
|
||||
information and facts about Operation Benjamin...
|
||||
publish_time: '2020-10-02T21:21:44.310941Z'
|
||||
opening_hours:
|
||||
open_now: false
|
||||
|
|
@ -213,11 +221,14 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.95
|
||||
entity_type: GRP.HER
|
||||
reasoning: The source name is 'Archiv Národního bezpečnostního úřadu' (Archive of the National Security Office). The candidate
|
||||
name, 'Ústav pro studium totalitních režimů - Archiv bezpečnostních složek - Badatelna Na Struze', refers to the successor
|
||||
institution, which took over the archival responsibilities. 'Archiv bezpečnostních složek' (Archive of the Security
|
||||
Forces) is the modern name for the same collection. The location is correct (Prague, Czechia). While the generic Google
|
||||
types lack 'archive', the website and detailed name confirm it is an archive. The entity type is a correct match.
|
||||
reasoning: The source name is 'Archiv Národního bezpečnostního úřadu' (Archive
|
||||
of the National Security Office). The candidate name, 'Ústav pro studium totalitních
|
||||
režimů - Archiv bezpečnostních složek - Badatelna Na Struze', refers to the
|
||||
successor institution, which took over the archival responsibilities. 'Archiv
|
||||
bezpečnostních složek' (Archive of the Security Forces) is the modern name for
|
||||
the same collection. The location is correct (Prague, Czechia). While the generic
|
||||
Google types lack 'archive', the website and detailed name confirm it is an
|
||||
archive. The entity type is a correct match.
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -238,3 +249,32 @@ location:
|
|||
street_address: Na Struze 229, Nové Město
|
||||
formatted_address: 3, Na Struze 229, Nové Město, 110 00 Praha-Praha 1, Czechia
|
||||
normalization_timestamp: '2025-12-09T06:49:27.953513+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:42.765327+00:00'
|
||||
source_url: http://www.abscr.cz/cs/provoz-badatelen#struha
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: logo_url
|
||||
claim_value: https://www.abscr.cz/wp-content/themes/ustrcr/img/logo.png
|
||||
source_url: http://www.abscr.cz/cs/provoz-badatelen#struha
|
||||
css_selector: '[document] > html.no-js > body.wp-singular.page-template-default
|
||||
> div.container:nth-of-type(2) > header.header.cf > div.header__between.cf:nth-of-type(2)
|
||||
> p.header__logo > a > img'
|
||||
retrieved_on: '2025-12-23T20:19:42.765327+00:00'
|
||||
extraction_method: crawl4ai_header_logo
|
||||
detection_confidence: high
|
||||
alt_text: Archiv bezpečnostních složek
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.abscr.cz/wp-content/themes/ustrcr/img/favicon.ico
|
||||
source_url: http://www.abscr.cz/cs/provoz-badatelen#struha
|
||||
css_selector: '[document] > html.no-js > head > link'
|
||||
retrieved_on: '2025-12-23T20:19:42.765327+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: true
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -80,8 +80,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:04:43Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:27:41Z: XXX->PRA via Wikidata Q101475944 coords (50.0875,14.4214) -> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:20Z: Maps: Czech Police Museum (conf: 1.00); YouTube: not found'
|
||||
- 'City resolved 2025-12-07T00:27:41Z: XXX->PRA via Wikidata Q101475944 coords (50.0875,14.4214)
|
||||
-> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:20Z: Maps: Czech Police Museum
|
||||
(conf: 1.00); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
ch_annotator:
|
||||
|
|
@ -208,17 +210,20 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/110876883927358051472/reviews
|
||||
rating: 3
|
||||
relative_time_description: 2 months ago
|
||||
text: '1. I will start with the only & the biggest drawback in the Museum. There are NO English audio guides, NO information
|
||||
display in English. It become completely impossible to translate each and every wonderful thing kept in the museum.
|
||||
text: '1. I will start with the only & the biggest drawback in the Museum. There
|
||||
are NO English audio guides, NO information display in English. It become completely
|
||||
impossible to translate each and every wonderful thing kept in the museum.
|
||||
|
||||
|
||||
2. However, I do compliment the efforts which have gone behind in collecting & setting up each and every artefact with
|
||||
such care & diligence. The aspects of homicide, forensics, uniforms, weapons, equipment’s used & being presently used
|
||||
by the forces & shown for display is just commendable.
|
||||
2. However, I do compliment the efforts which have gone behind in collecting
|
||||
& setting up each and every artefact with such care & diligence. The aspects
|
||||
of homicide, forensics, uniforms, weapons, equipment’s used & being presently
|
||||
used by the forces & shown for display is just commendable.
|
||||
|
||||
|
||||
3. If the museum is visited with due interest, then 2 hours are required, ticket per person is very minimal, paid parking
|
||||
is available, washroom is available within the facility.
|
||||
3. If the museum is visited with due interest, then 2 hours are required, ticket
|
||||
per person is very minimal, paid parking is available, washroom is available
|
||||
within the facility.
|
||||
|
||||
|
||||
A must visit place.'
|
||||
|
|
@ -227,31 +232,36 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/107353925454189365150/reviews
|
||||
rating: 5
|
||||
relative_time_description: 3 months ago
|
||||
text: Large museum, all possible aspects of police activities and history from the end of 18th century are covered. There
|
||||
are some interactive parts (including musical instruments, Identikit computer tool, etc), although most of the exposition
|
||||
are old-fashioned glass showcases. The building itself is also interesting, it's former Augustines monastery.
|
||||
text: Large museum, all possible aspects of police activities and history from
|
||||
the end of 18th century are covered. There are some interactive parts (including
|
||||
musical instruments, Identikit computer tool, etc), although most of the exposition
|
||||
are old-fashioned glass showcases. The building itself is also interesting,
|
||||
it's former Augustines monastery.
|
||||
publish_time: '2025-08-14T13:30:50.341602510Z'
|
||||
- author_name: Francis
|
||||
author_uri: https://www.google.com/maps/contrib/112502021501125017989/reviews
|
||||
rating: 5
|
||||
relative_time_description: 5 months ago
|
||||
text: Interesting museum. The museum is huge and it will take you around an hour to get through. It’s also cheap to access.
|
||||
The museum covers a substantial amount of history. The only disappointment is that it’s not friendly to English speakers
|
||||
text: Interesting museum. The museum is huge and it will take you around an hour
|
||||
to get through. It’s also cheap to access. The museum covers a substantial amount
|
||||
of history. The only disappointment is that it’s not friendly to English speakers
|
||||
as mostly everything is in Czech only. Still worth a visit though.
|
||||
publish_time: '2025-07-03T08:19:55.748706435Z'
|
||||
- author_name: grace frances
|
||||
author_uri: https://www.google.com/maps/contrib/102980876546458200665/reviews
|
||||
rating: 5
|
||||
relative_time_description: 4 months ago
|
||||
text: This was amazing! There was so so much to see and there were interactive areas of the museum. Me and my friends
|
||||
had a really fun time and i would highly recommend.
|
||||
text: This was amazing! There was so so much to see and there were interactive
|
||||
areas of the museum. Me and my friends had a really fun time and i would highly
|
||||
recommend.
|
||||
publish_time: '2025-08-05T12:35:18.729156215Z'
|
||||
- author_name: Gordon Crawford
|
||||
author_uri: https://www.google.com/maps/contrib/117476350871498178843/reviews
|
||||
rating: 5
|
||||
relative_time_description: 3 months ago
|
||||
text: Really big museum so to do it justice give yourself plenty of time. Well laid out. Lots of motorcycles, uniforms
|
||||
and all aspects of the history of the police.
|
||||
text: Really big museum so to do it justice give yourself plenty of time. Well
|
||||
laid out. Lots of motorcycles, uniforms and all aspects of the history of the
|
||||
police.
|
||||
publish_time: '2025-08-21T18:57:16.423398324Z'
|
||||
opening_hours:
|
||||
open_now: false
|
||||
|
|
@ -285,11 +295,13 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 1.0
|
||||
entity_type: GRP.HER
|
||||
reasoning: 'NAME MATCH: The Google Place name ''Czech Police Museum'' is a direct English translation of the source name
|
||||
''Archiv Policie České republiky'' (Archive of the Police of the Czech Republic), as confirmed by the museum''s website
|
||||
which states it is part of the police archives. TYPE MATCH: The Google Place type ''museum'' is in the list of expected
|
||||
types for GRP.HER. LOCATION MATCH: The address is in Praha (Prague), Czechia, matching the country ''CZ'' of the source.
|
||||
ENTITY TYPE: It is a museum, a type of heritage custodian.'
|
||||
reasoning: 'NAME MATCH: The Google Place name ''Czech Police Museum'' is a direct
|
||||
English translation of the source name ''Archiv Policie České republiky'' (Archive
|
||||
of the Police of the Czech Republic), as confirmed by the museum''s website
|
||||
which states it is part of the police archives. TYPE MATCH: The Google Place
|
||||
type ''museum'' is in the list of expected types for GRP.HER. LOCATION MATCH:
|
||||
The address is in Praha (Prague), Czechia, matching the country ''CZ'' of the
|
||||
source. ENTITY TYPE: It is a museum, a type of heritage custodian.'
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -312,3 +324,23 @@ location:
|
|||
formatted_address: Ke Karlovu 453/1, Nové Město, 120 00 Praha-Praha 2, Czechia
|
||||
geonames_id: 3067696
|
||||
normalization_timestamp: '2025-12-09T06:49:28.022072+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:50.538752+00:00'
|
||||
source_url: https://www.muzeumpolicie.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.muzeumpolicie.cz/img/SERVER_logo.png
|
||||
source_url: https://www.muzeumpolicie.cz
|
||||
css_selector: '[document] > html.fontawesome-i2svg-active.fontawesome-i2svg-complete
|
||||
> head > link:nth-of-type(6)'
|
||||
retrieved_on: '2025-12-23T20:19:50.538752+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: image/x-icon
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -79,9 +79,11 @@ provenance:
|
|||
confidence_score: 0.85
|
||||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:39Z: XX→CZ via Wikidata P17'
|
||||
- 'City resolved 2025-12-07T00:35:11Z: XXX->PRA via Wikidata Q46996155 coords (50.0902,14.3987) -> Prague (GeoNames:3067696)'
|
||||
- 'City resolved 2025-12-07T00:35:11Z: XXX->PRA via Wikidata Q46996155 coords (50.0902,14.3987)
|
||||
-> Prague (GeoNames:3067696)'
|
||||
- 'Region resolved 2025-12-07T11:29:19Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:23Z: Maps: Archiv Pražského hradu (conf: 1.00); YouTube: not found'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:23Z: Maps: Archiv Pražského hradu
|
||||
(conf: 1.00); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
ch_annotator:
|
||||
|
|
@ -256,20 +258,22 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/105730745460531884055/reviews
|
||||
rating: 4
|
||||
relative_time_description: 5 years ago
|
||||
text: 'The archives of prague castle with all documents are preserved here. The artifacts and scrolls were found in the
|
||||
1920 and preserved.
|
||||
text: 'The archives of prague castle with all documents are preserved here. The
|
||||
artifacts and scrolls were found in the 1920 and preserved.
|
||||
|
||||
|
||||
All the repairs , restoration about the place is maintained and organised well.
|
||||
|
||||
|
||||
The tour is very interesting with the guide explaining all details about this place.'
|
||||
The tour is very interesting with the guide explaining all details about this
|
||||
place.'
|
||||
publish_time: '2020-08-14T16:07:35.186666Z'
|
||||
- author_name: Andy Stewart
|
||||
author_uri: https://www.google.com/maps/contrib/109939887611010111342/reviews
|
||||
rating: 5
|
||||
relative_time_description: 7 years ago
|
||||
text: Keep saying this, but so many beautifully kept historic buildings in one area, another one well worth visiting.
|
||||
text: Keep saying this, but so many beautifully kept historic buildings in one
|
||||
area, another one well worth visiting.
|
||||
publish_time: '2018-10-17T19:56:38.574233136Z'
|
||||
- author_name: Kevin Lu
|
||||
author_uri: https://www.google.com/maps/contrib/111795433458884676995/reviews
|
||||
|
|
@ -281,15 +285,15 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/117295849426126661636/reviews
|
||||
rating: 5
|
||||
relative_time_description: 2 years ago
|
||||
text: The castle's documents are kept in the archives. The exterior is breathtaking. It is elegantly decorated. The entrance
|
||||
overlooks the square.
|
||||
text: The castle's documents are kept in the archives. The exterior is breathtaking.
|
||||
It is elegantly decorated. The entrance overlooks the square.
|
||||
publish_time: '2023-01-03T21:16:17.193792Z'
|
||||
- author_name: Davide Nardoni
|
||||
author_uri: https://www.google.com/maps/contrib/111964551332611513527/reviews
|
||||
rating: 5
|
||||
relative_time_description: 2 years ago
|
||||
text: The Prague Castle archives are kept here. A modern building with no architectural impact, located to the side of
|
||||
the famous Prague Cathedral.
|
||||
text: The Prague Castle archives are kept here. A modern building with no architectural
|
||||
impact, located to the side of the famous Prague Cathedral.
|
||||
publish_time: '2022-12-31T08:30:56.561469Z'
|
||||
photo_count: 10
|
||||
photos_metadata:
|
||||
|
|
@ -313,8 +317,9 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 1.0
|
||||
entity_type: GRP.HER
|
||||
reasoning: Excellent match. The names are identical, the location is in the expected city and country (Czechia), and the
|
||||
Google Place type 'library' is a perfect match for the expected heritage institution type.
|
||||
reasoning: Excellent match. The names are identical, the location is in the expected
|
||||
city and country (Czechia), and the Google Place type 'library' is a perfect
|
||||
match for the expected heritage institution type.
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -337,3 +342,28 @@ location:
|
|||
formatted_address: III. nádvoří 119 08, 119 00 Praha 1-Hrad, Czechia
|
||||
geonames_id: 3067696
|
||||
normalization_timestamp: '2025-12-09T06:49:28.061633+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:55.330200+00:00'
|
||||
source_url: https://www.prazskyhradarchiv.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.prazskyhradarchiv.cz/img/safari-pinned-tab.svg
|
||||
source_url: https://www.prazskyhradarchiv.cz
|
||||
css_selector: '[document] > html.js > head > link:nth-of-type(5)'
|
||||
retrieved_on: '2025-12-23T20:19:55.330200+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://www.prazskyhradarchiv.cz/img/social-media-logo.png
|
||||
source_url: https://www.prazskyhradarchiv.cz
|
||||
css_selector: '[document] > html.js > head > meta:nth-of-type(14)'
|
||||
retrieved_on: '2025-12-23T20:19:55.330200+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 4
|
||||
|
|
|
|||
|
|
@ -80,9 +80,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:00:23Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:31:16Z: XXX->PRA via Wikidata Q55025755 coords (50.0875,14.4214) -> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:31Z: Maps: Parlamentní knihovna a Archiv Poslanecké sněmovny (conf: 1.00);
|
||||
YouTube: not found'
|
||||
- 'City resolved 2025-12-07T00:31:16Z: XXX->PRA via Wikidata Q55025755 coords (50.0875,14.4214)
|
||||
-> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:31Z: Maps: Parlamentní knihovna
|
||||
a Archiv Poslanecké sněmovny (conf: 1.00); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
ch_annotator:
|
||||
|
|
@ -261,12 +262,14 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 1.0
|
||||
entity_type: GRP.HER
|
||||
reasoning: 'NAME MATCH: The Google Maps name ''Parlamentní knihovna a Archiv Poslanecké sněmovny'' (Parliamentary Library
|
||||
and Archive of the Chamber of Deputies) explicitly contains the source institution''s name ''Archiv Poslanecké sněmovny''.
|
||||
LOCATION MATCH: Both are in the Czech Republic (CZ), with the Google Maps address specifying Prague. TYPE MATCH: Although
|
||||
Google''s types (''government_office'') are not in the expected list, the name confirms it is an ''archiv'' (archive)
|
||||
and ''knihovna'' (library), which are heritage institution types. ENTITY TYPE: The entity is an archive, which is a
|
||||
type of Heritage Custodian (GRP.HER).'
|
||||
reasoning: 'NAME MATCH: The Google Maps name ''Parlamentní knihovna a Archiv Poslanecké
|
||||
sněmovny'' (Parliamentary Library and Archive of the Chamber of Deputies) explicitly
|
||||
contains the source institution''s name ''Archiv Poslanecké sněmovny''. LOCATION
|
||||
MATCH: Both are in the Czech Republic (CZ), with the Google Maps address specifying
|
||||
Prague. TYPE MATCH: Although Google''s types (''government_office'') are not
|
||||
in the expected list, the name confirms it is an ''archiv'' (archive) and ''knihovna''
|
||||
(library), which are heritage institution types. ENTITY TYPE: The entity is
|
||||
an archive, which is a type of Heritage Custodian (GRP.HER).'
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -289,3 +292,22 @@ location:
|
|||
formatted_address: Komunardů 1634/44, 170 00 Praha 7-Holešovice, Czechia
|
||||
geonames_id: 3067696
|
||||
normalization_timestamp: '2025-12-09T06:49:28.101039+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:00.532753+00:00'
|
||||
source_url: https://www.psp.cz/sqw/hp.sqw
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.psp.cz/favicon.ico
|
||||
source_url: https://www.psp.cz/sqw/hp.sqw
|
||||
css_selector: '[document] > html > head > link'
|
||||
retrieved_on: '2025-12-23T20:20:00.532753+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: image/x-icon
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -73,8 +73,10 @@ provenance:
|
|||
confidence_score: 0.85
|
||||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:38Z: XX→CZ via Wikidata P17'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.461153+00:00. Re-enrichment required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:35Z: Maps: National Archive (conf: 0.90); YouTube: not found'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.461153+00:00. Re-enrichment
|
||||
required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:46:35Z: Maps: National Archive (conf:
|
||||
0.90); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
ch_annotator:
|
||||
|
|
@ -197,15 +199,16 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/107425710254765644916/reviews
|
||||
rating: 5
|
||||
relative_time_description: 10 months ago
|
||||
text: The staff are always very friendly. There's even a lady who speaks English very well. Even without the language,
|
||||
you can communicate. Everyone is helpful and extremely customer-friendly. I enjoy coming here.
|
||||
text: The staff are always very friendly. There's even a lady who speaks English
|
||||
very well. Even without the language, you can communicate. Everyone is helpful
|
||||
and extremely customer-friendly. I enjoy coming here.
|
||||
publish_time: '2025-02-06T21:43:14.918876Z'
|
||||
- author_name: Barunka G.
|
||||
author_uri: https://www.google.com/maps/contrib/102003756317069132015/reviews
|
||||
rating: 4
|
||||
relative_time_description: 5 years ago
|
||||
text: In the research room of the 1st department (Milady Horákové Street), they are very helpful and willing to help you
|
||||
search for archival materials.
|
||||
text: In the research room of the 1st department (Milady Horákové Street), they
|
||||
are very helpful and willing to help you search for archival materials.
|
||||
publish_time: '2020-08-12T16:42:30.619293Z'
|
||||
- author_name: Kamila Svobodova
|
||||
author_uri: https://www.google.com/maps/contrib/109299302074939194601/reviews
|
||||
|
|
@ -257,11 +260,13 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.9
|
||||
entity_type: GRP.HER
|
||||
reasoning: 'High confidence match. The names refer to the same institution: ''Achivní správa Ministerstva vnitra České
|
||||
republiky'' (Archive Administration of the Ministry of the Interior of the Czech Republic) is commonly translated as
|
||||
''National Archive'', as confirmed by its website. The location is in Prague, Czechia, matching the source country.
|
||||
The Google place type ''library'' is a strong proxy for an archive and fits the expected heritage institution types.
|
||||
The source is an archive, and archives are a core heritage custodian type (glam:HeritageCustodian).'
|
||||
reasoning: 'High confidence match. The names refer to the same institution: ''Achivní
|
||||
správa Ministerstva vnitra České republiky'' (Archive Administration of the
|
||||
Ministry of the Interior of the Czech Republic) is commonly translated as ''National
|
||||
Archive'', as confirmed by its website. The location is in Prague, Czechia,
|
||||
matching the source country. The Google place type ''library'' is a strong proxy
|
||||
for an archive and fits the expected heritage institution types. The source
|
||||
is an archive, and archives are a core heritage custodian type (glam:HeritageCustodian).'
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -282,3 +287,38 @@ location:
|
|||
street_address: M. Horákové 5, Praha 6-Hradčany
|
||||
formatted_address: 133, M. Horákové 5, 160 00 Praha 6-Hradčany, Czechia
|
||||
normalization_timestamp: '2025-12-09T06:49:28.148618+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:05.833479+00:00'
|
||||
source_url: http://www.nacr.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: logo_url
|
||||
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/logo_na_en.png
|
||||
source_url: http://www.nacr.cz
|
||||
css_selector: '#wrapper-navbar > header.header > div.header__main:nth-of-type(2)
|
||||
> nav.navbar.navbar-expand-lg > div.navbar-mobile-top > div.navbar-brand > a
|
||||
> img'
|
||||
retrieved_on: '2025-12-23T20:20:05.833479+00:00'
|
||||
extraction_method: crawl4ai_header_logo
|
||||
detection_confidence: high
|
||||
alt_text: National Archives
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.nacr.cz/wp-content/themes/narodni_archiv/img/favicon/apple-icon-180x180.png
|
||||
source_url: http://www.nacr.cz
|
||||
css_selector: '[document] > html > head > link:nth-of-type(9)'
|
||||
retrieved_on: '2025-12-23T20:20:05.833479+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: 180x180
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://www.nacr.cz/wp-content/uploads/2020/04/podatelna2_22-1.jpg
|
||||
source_url: http://www.nacr.cz
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(17)'
|
||||
retrieved_on: '2025-12-23T20:20:05.833479+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 3
|
||||
has_primary_logo: true
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 13
|
||||
|
|
|
|||
|
|
@ -80,9 +80,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:01:05Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:37:04Z: XXX->PRA via Wikidata Q101475934 coords (50.0875,14.4214) -> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:00Z: Maps: Office for Foreign Relations and Information (conf: 0.90);
|
||||
YouTube: not found'
|
||||
- 'City resolved 2025-12-07T00:37:04Z: XXX->PRA via Wikidata Q101475934 coords (50.0875,14.4214)
|
||||
-> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:00Z: Maps: Office for Foreign
|
||||
Relations and Information (conf: 0.90); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
ch_annotator:
|
||||
|
|
@ -223,10 +224,12 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.9
|
||||
entity_type: GRP.HER
|
||||
reasoning: The name is a direct translation (Archiv Úřadu pro zahraniční styky a informace -> Office for Foreign Relations
|
||||
and Information). The location is a perfect match, being in the same district of Prague, Czech Republic. The website
|
||||
(uzsi.cz) confirms the identity. The name 'Archiv...' explicitly states it is an archive, and the Wikidata ID Q101475934
|
||||
corresponds to the National Security Archive. While the Google Place types are generic, the contextual evidence strongly
|
||||
reasoning: The name is a direct translation (Archiv Úřadu pro zahraniční styky
|
||||
a informace -> Office for Foreign Relations and Information). The location is
|
||||
a perfect match, being in the same district of Prague, Czech Republic. The website
|
||||
(uzsi.cz) confirms the identity. The name 'Archiv...' explicitly states it is
|
||||
an archive, and the Wikidata ID Q101475934 corresponds to the National Security
|
||||
Archive. While the Google Place types are generic, the contextual evidence strongly
|
||||
supports this being an archive, which is a type of heritage institution.
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
|
|
@ -250,3 +253,22 @@ location:
|
|||
formatted_address: Střelničná 1673/10, 182 00 Praha 8-Kobylisy, Czechia
|
||||
geonames_id: 3067696
|
||||
normalization_timestamp: '2025-12-09T06:49:28.326287+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:29.242351+00:00'
|
||||
source_url: https://www.uzsi.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.uzsi.cz/favicon.png
|
||||
source_url: https://www.uzsi.cz
|
||||
css_selector: '[document] > html > head > link'
|
||||
retrieved_on: '2025-12-23T20:20:29.242351+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -80,9 +80,10 @@ provenance:
|
|||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'Region resolved 2025-12-07T00:02:00Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- 'City resolved 2025-12-07T00:31:18Z: XXX->PRA via Wikidata Q101493927 coords (50.0875,14.4214) -> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:03Z: Maps: Archiv Židovského muzea v Praze (conf: 1.00); YouTube: not
|
||||
found'
|
||||
- 'City resolved 2025-12-07T00:31:18Z: XXX->PRA via Wikidata Q101493927 coords (50.0875,14.4214)
|
||||
-> Prague (GeoNames:3067696)'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:03Z: Maps: Archiv Židovského
|
||||
muzea v Praze (conf: 1.00); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
ch_annotator:
|
||||
|
|
@ -182,14 +183,16 @@ wikidata_enrichment:
|
|||
instance_of: &id004
|
||||
- id: Q12161242
|
||||
label: private archive
|
||||
description: archival collection or institution that is not accessible to the public
|
||||
description: archival collection or institution that is not accessible to the
|
||||
public
|
||||
- id: Q53566456
|
||||
label: museum archive
|
||||
description: archive established by a museum to collect, organize, preserve, and provide access to its organizational
|
||||
records
|
||||
description: archive established by a museum to collect, organize, preserve,
|
||||
and provide access to its organizational records
|
||||
- id: Q1307560
|
||||
label: Jewish museum
|
||||
description: type of museum that documents the history and culture of the Jewish people
|
||||
description: type of museum that documents the history and culture of the Jewish
|
||||
people
|
||||
wikidata_instance_of: *id004
|
||||
wikidata_location:
|
||||
headquarters_location:
|
||||
|
|
@ -242,7 +245,8 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/115657411803981337484/reviews
|
||||
rating: 5
|
||||
relative_time_description: 7 years ago
|
||||
text: Peace and prosperity, to those far and near (inscription on the synagogue wall)
|
||||
text: Peace and prosperity, to those far and near (inscription on the synagogue
|
||||
wall)
|
||||
publish_time: '2018-08-21T16:08:34.751790Z'
|
||||
- author_name: Stanka Černáková
|
||||
author_uri: https://www.google.com/maps/contrib/113668783615557513346/reviews
|
||||
|
|
@ -290,8 +294,9 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 1.0
|
||||
entity_type: GRP.HER
|
||||
reasoning: The candidate's name exactly matches the source institution. The location is in Prague, Czechia, matching the
|
||||
expected country. The Google Place type 'museum' is an expected type for a heritage institution. The website confirms
|
||||
reasoning: The candidate's name exactly matches the source institution. The location
|
||||
is in Prague, Czechia, matching the expected country. The Google Place type
|
||||
'museum' is an expected type for a heritage institution. The website confirms
|
||||
this is the archive of the Jewish Museum in Prague, solidifying the match.
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
|
|
@ -315,3 +320,28 @@ location:
|
|||
formatted_address: 32, Stroupežnického 290, Anděl, 150 00 Praha-Praha 5, Czechia
|
||||
geonames_id: 3067696
|
||||
normalization_timestamp: '2025-12-09T06:49:28.367270+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:34.817680+00:00'
|
||||
source_url: https://www.jewishmuseum.cz/sbirky-a-vyzkum/sbirky-a-fondy/archiv-sbirky-a-fondy
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://c.jewishmuseum.cz/images/design/favicon.ico
|
||||
source_url: https://www.jewishmuseum.cz/sbirky-a-vyzkum/sbirky-a-fondy/archiv-sbirky-a-fondy
|
||||
css_selector: '[document] > html.show--consent > head > link'
|
||||
retrieved_on: '2025-12-23T20:20:34.817680+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: https://c.jewishmuseum.cz/images/design/2013/zmp-logo-fb.png
|
||||
source_url: https://www.jewishmuseum.cz/sbirky-a-vyzkum/sbirky-a-fondy/archiv-sbirky-a-fondy
|
||||
css_selector: '[document] > html.show--consent > head > meta:nth-of-type(9)'
|
||||
retrieved_on: '2025-12-23T20:20:34.817680+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -74,10 +74,13 @@ provenance:
|
|||
confidence_score: 0.85
|
||||
notes:
|
||||
- 'Country resolved 2025-12-06T23:54:40Z: XX→CZ via Wikidata P17'
|
||||
- 'City resolved 2025-12-07T00:36:54Z: XXX->PRA via Wikidata Q46996293 coords (50.0913,14.4037) -> Prague (GeoNames:3067696)'
|
||||
- 'City resolved 2025-12-07T00:36:54Z: XXX->PRA via Wikidata Q46996293 coords (50.0913,14.4037)
|
||||
-> Prague (GeoNames:3067696)'
|
||||
- 'Region resolved 2025-12-07T11:30:40Z: XX->10 via Wikidata P131 (CZ-10)'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.468069+00:00. Re-enrichment required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:07Z: Maps: Archiv Pražského hradu (conf: 0.95); YouTube: not found'
|
||||
- Removed incorrect wikidata_enrichment on 2025-12-08T08:18:45.468069+00:00. Re-enrichment
|
||||
required with proper matching.
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:07Z: Maps: Archiv Pražského hradu
|
||||
(conf: 0.95); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
ch_annotator:
|
||||
|
|
@ -234,20 +237,22 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/105730745460531884055/reviews
|
||||
rating: 4
|
||||
relative_time_description: 5 years ago
|
||||
text: 'The archives of prague castle with all documents are preserved here. The artifacts and scrolls were found in the
|
||||
1920 and preserved.
|
||||
text: 'The archives of prague castle with all documents are preserved here. The
|
||||
artifacts and scrolls were found in the 1920 and preserved.
|
||||
|
||||
|
||||
All the repairs , restoration about the place is maintained and organised well.
|
||||
|
||||
|
||||
The tour is very interesting with the guide explaining all details about this place.'
|
||||
The tour is very interesting with the guide explaining all details about this
|
||||
place.'
|
||||
publish_time: '2020-08-14T16:07:35.186666Z'
|
||||
- author_name: Andy Stewart
|
||||
author_uri: https://www.google.com/maps/contrib/109939887611010111342/reviews
|
||||
rating: 5
|
||||
relative_time_description: 7 years ago
|
||||
text: Keep saying this, but so many beautifully kept historic buildings in one area, another one well worth visiting.
|
||||
text: Keep saying this, but so many beautifully kept historic buildings in one
|
||||
area, another one well worth visiting.
|
||||
publish_time: '2018-10-17T19:56:38.574233136Z'
|
||||
- author_name: Kevin Lu
|
||||
author_uri: https://www.google.com/maps/contrib/111795433458884676995/reviews
|
||||
|
|
@ -259,15 +264,15 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/117295849426126661636/reviews
|
||||
rating: 5
|
||||
relative_time_description: 2 years ago
|
||||
text: The castle's documents are kept in the archives. The exterior is breathtaking. It is elegantly decorated. The entrance
|
||||
overlooks the square.
|
||||
text: The castle's documents are kept in the archives. The exterior is breathtaking.
|
||||
It is elegantly decorated. The entrance overlooks the square.
|
||||
publish_time: '2023-01-03T21:16:17.193792Z'
|
||||
- author_name: Davide Nardoni
|
||||
author_uri: https://www.google.com/maps/contrib/111964551332611513527/reviews
|
||||
rating: 5
|
||||
relative_time_description: 2 years ago
|
||||
text: The Prague Castle archives are kept here. A modern building with no architectural impact, located to the side of
|
||||
the famous Prague Cathedral.
|
||||
text: The Prague Castle archives are kept here. A modern building with no architectural
|
||||
impact, located to the side of the famous Prague Cathedral.
|
||||
publish_time: '2022-12-31T08:30:56.561469Z'
|
||||
photo_count: 10
|
||||
photos_metadata:
|
||||
|
|
@ -291,12 +296,15 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.95
|
||||
entity_type: GRP.HER
|
||||
reasoning: The names refer to the same institution. The source name is 'Bezpečnostní archiv Kanceláře prezidenta republiky'
|
||||
(Security Archive of the Office of the President of the Republic) and the Google Maps name is 'Archiv Pražského hradu'
|
||||
(Archive of Prague Castle). Research confirms that the Bezpečnostní archiv is the legal successor to the Archive of
|
||||
Prague Castle and is located within the Prague Castle complex. The location is a correct match (Praha 1, Czechia). The
|
||||
Google Place type 'library' is consistent with the expected heritage institution types. The entity is a heritage custodian
|
||||
institution. Therefore, it is a high-confidence match.
|
||||
reasoning: The names refer to the same institution. The source name is 'Bezpečnostní
|
||||
archiv Kanceláře prezidenta republiky' (Security Archive of the Office of the
|
||||
President of the Republic) and the Google Maps name is 'Archiv Pražského hradu'
|
||||
(Archive of Prague Castle). Research confirms that the Bezpečnostní archiv is
|
||||
the legal successor to the Archive of Prague Castle and is located within the
|
||||
Prague Castle complex. The location is a correct match (Praha 1, Czechia). The
|
||||
Google Place type 'library' is consistent with the expected heritage institution
|
||||
types. The entity is a heritage custodian institution. Therefore, it is a high-confidence
|
||||
match.
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
ch_annotator_version: ch_annotator-v1_7_0
|
||||
|
|
@ -319,3 +327,22 @@ location:
|
|||
formatted_address: III. nádvoří 119 08, 119 00 Praha 1-Hrad, Czechia
|
||||
geonames_id: 3067696
|
||||
normalization_timestamp: '2025-12-09T06:49:28.406971+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:39.662727+00:00'
|
||||
source_url: http://www.prazskyhradarchiv.cz/archivKPR/cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.prazskyhradarchiv.cz/img/safari-pinned-tab.svg
|
||||
source_url: http://www.prazskyhradarchiv.cz/archivKPR/cz
|
||||
css_selector: '[document] > html.js > head > link:nth-of-type(5)'
|
||||
retrieved_on: '2025-12-23T20:20:39.662727+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 4
|
||||
|
|
|
|||
|
|
@ -36,7 +36,8 @@ ghcid:
|
|||
reason: 'Country resolved via Wikidata P17: XX→CZ'
|
||||
- ghcid: CZ-10-PRA-A-BAVZ
|
||||
valid_from: '2025-12-07T12:39:42.485462+00:00'
|
||||
reason: 'Location resolved from institution name pattern: ''Prague'' → region 10, city PRA'
|
||||
reason: 'Location resolved from institution name pattern: ''Prague'' → region
|
||||
10, city PRA'
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: Bezpečnostní archiv Vojenského zpravodajství
|
||||
|
|
@ -62,7 +63,8 @@ provenance:
|
|||
confidence_score: 0.85
|
||||
notes:
|
||||
- 'Country resolved 2025-12-06T23:56:12Z: XX→CZ via Wikidata P17'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:15Z: Maps: Central Military Archives (conf: 0.80); YouTube: not found'
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T19:47:15Z: Maps: Central Military Archives
|
||||
(conf: 0.80); YouTube: not found'
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:11:19Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:48:04Z
|
||||
- Canonical location added via normalize_custodian_files.py on 2025-12-09T06:49:28Z
|
||||
|
|
@ -156,22 +158,25 @@ google_maps_enrichment:
|
|||
author_uri: https://www.google.com/maps/contrib/110616501951342355701/reviews
|
||||
rating: 5
|
||||
relative_time_description: 6 years ago
|
||||
text: I visited my former colleagues at the reception desk here. The archive has beautiful plant decorations in front
|
||||
of the entrance and in the lobby. It is peaceful and quiet. Many people come here to do research.
|
||||
text: I visited my former colleagues at the reception desk here. The archive has
|
||||
beautiful plant decorations in front of the entrance and in the lobby. It is
|
||||
peaceful and quiet. Many people come here to do research.
|
||||
publish_time: '2019-10-04T22:45:28.959462Z'
|
||||
- author_name: Vasil “Ben Lee Meier” Mohorita
|
||||
author_uri: https://www.google.com/maps/contrib/108576684499626840750/reviews
|
||||
rating: 5
|
||||
relative_time_description: 3 years ago
|
||||
text: The first time I was in that area was for my cousin's swearing-in ceremony. Around 1967. Then I went there on the
|
||||
occasion of the commemorations of November 17, 1939. Today I go there to the army archives.
|
||||
text: The first time I was in that area was for my cousin's swearing-in ceremony.
|
||||
Around 1967. Then I went there on the occasion of the commemorations of November
|
||||
17, 1939. Today I go there to the army archives.
|
||||
publish_time: '2022-01-11T21:48:03.858557Z'
|
||||
- author_name: Diiinka
|
||||
author_uri: https://www.google.com/maps/contrib/101993642396958778679/reviews
|
||||
rating: 2
|
||||
relative_time_description: 3 years ago
|
||||
text: There is no coffee machine in the archive, nor any facilities outside the research room. The reception staff is
|
||||
reluctant and uninformed. The archival materials in the cardboard boxes were numbered, but they were very confusingly
|
||||
text: There is no coffee machine in the archive, nor any facilities outside the
|
||||
research room. The reception staff is reluctant and uninformed. The archival
|
||||
materials in the cardboard boxes were numbered, but they were very confusingly
|
||||
shuffled.
|
||||
publish_time: '2022-02-05T13:49:32.848868Z'
|
||||
photo_count: 10
|
||||
|
|
@ -196,10 +201,12 @@ google_maps_enrichment:
|
|||
is_match: true
|
||||
confidence: 0.8
|
||||
entity_type: GRP.HER
|
||||
reasoning: The Google Maps candidate 'Central Military Archives' is a likely translation or official English name for
|
||||
the source institution 'Bezpečnostní archiv Vojenského zpravodajství'. The location in Praha (CZ) is a match. The website
|
||||
(vuapraha.cz) corresponds to the Vojenský ústřední archiv (Central Military Archive), confirming the institutional identity.
|
||||
Although Google lacks a specific 'archive' type, the candidate is a military archive, which is a type of heritage institution
|
||||
reasoning: The Google Maps candidate 'Central Military Archives' is a likely translation
|
||||
or official English name for the source institution 'Bezpečnostní archiv Vojenského
|
||||
zpravodajství'. The location in Praha (CZ) is a match. The website (vuapraha.cz)
|
||||
corresponds to the Vojenský ústřední archiv (Central Military Archive), confirming
|
||||
the institutional identity. Although Google lacks a specific 'archive' type,
|
||||
the candidate is a military archive, which is a type of heritage institution
|
||||
(GRP.HER).
|
||||
agent: glm-4.6
|
||||
verified: true
|
||||
|
|
@ -221,3 +228,22 @@ location:
|
|||
street_address: Pilotů 217/12, Praha 6
|
||||
formatted_address: Pilotů 217/12, 161 00 Praha 6, Czechia
|
||||
normalization_timestamp: '2025-12-09T06:49:28.469190+00:00'
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:47.483767+00:00'
|
||||
source_url: https://www.vuapraha.cz
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.vuapraha.cz/wp-content/themes/iq-theme/dist/img/safari-pinned-tab.svg
|
||||
source_url: https://www.vuapraha.cz
|
||||
css_selector: '[document] > html > head > link:nth-of-type(4)'
|
||||
retrieved_on: '2025-12-23T20:20:47.483767+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 7
|
||||
|
|
|
|||
|
|
@ -211,3 +211,22 @@ location:
|
|||
geonames_id: 1857843
|
||||
geonames_name: Maebashi
|
||||
feature_code: PPLA
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:17:47.555064+00:00'
|
||||
source_url: http://www.media.gunma-u.ac.jp
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.media.gunma-u.ac.jp/assets/templates/media/favicon.ico
|
||||
source_url: http://www.media.gunma-u.ac.jp
|
||||
css_selector: '[document] > html > head > link:nth-of-type(2)'
|
||||
retrieved_on: '2025-12-23T20:17:47.555064+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 1
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -207,3 +207,30 @@ location:
|
|||
geonames_id: 1857843
|
||||
geonames_name: Maebashi
|
||||
feature_code: PPLA
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:16.281140+00:00'
|
||||
source_url: http://www.maebashi-it.ac.jp/library
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: logo_url
|
||||
claim_value: https://www.maebashi-it.ac.jp/images/logo.gif
|
||||
source_url: http://www.maebashi-it.ac.jp/library
|
||||
css_selector: '#h_logo > a > img'
|
||||
retrieved_on: '2025-12-23T20:18:16.281140+00:00'
|
||||
extraction_method: crawl4ai_header_logo
|
||||
detection_confidence: high
|
||||
alt_text: 前橋工科大学
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.maebashi-it.ac.jp/favicon.ico
|
||||
source_url: http://www.maebashi-it.ac.jp/library
|
||||
css_selector: '[document] > html > head > link'
|
||||
retrieved_on: '2025-12-23T20:18:16.281140+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: image/vnd.microsoft.icon
|
||||
favicon_sizes: ''
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: true
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 1
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007096.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007096.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:33.146679+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007096.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007096.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:18:33.146679+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007096.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:18:33.146679+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007087.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007087.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:42.071653+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007087.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007087.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:18:42.071653+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007087.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:18:42.071653+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007092.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007092.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:18:51.243654+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007092.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007092.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:18:51.243654+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007092.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:18:51.243654+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007088.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007088.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:00.185807+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007088.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007088.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:00.185807+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007088.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:00.185807+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007095.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007095.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:08.942928+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007095.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007095.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:08.942928+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007095.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:08.942928+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007091.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007091.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:17.937119+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007091.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007091.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:17.937119+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007091.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:17.937119+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007085.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007085.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:26.935729+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007085.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007085.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:26.935729+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007085.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:26.935729+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007094.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007094.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:35.422766+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007094.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007094.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:35.422766+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007094.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:35.422766+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007089.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007089.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:44.147868+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007089.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007089.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:44.147868+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007089.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:44.147868+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007090.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007090.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:19:52.929296+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007090.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007090.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:19:52.929296+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007090.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:19:52.929296+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007093.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007093.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:01.788226+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007093.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007093.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:20:01.788226+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007093.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:20:01.788226+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007097.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007097.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:10.619515+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007097.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007097.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:20:10.619515+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007097.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:20:10.619515+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -205,3 +205,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007750.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007750.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:19.504053+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007750.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007750.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:20:19.504053+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007750.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:20:19.504053+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -204,3 +204,28 @@ wikidata_enrichment:
|
|||
wikidata_web:
|
||||
official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007086.html
|
||||
wikidata_official_website: http://www.city.maebashi.gunma.jp/shisetsu/425/p007086.html
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:28.730915+00:00'
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007086.html
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: favicon_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/smartphone.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007086.html
|
||||
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
||||
retrieved_on: '2025-12-23T20:20:28.730915+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: ''
|
||||
- claim_type: og_image_url
|
||||
claim_value: http://www.city.maebashi.gunma.jp/material/images/group/10/banner.png
|
||||
source_url: http://www.city.maebashi.gunma.jp/shisetsu/425/p007086.html
|
||||
css_selector: '[document] > html > head > meta:nth-of-type(7)'
|
||||
retrieved_on: '2025-12-23T20:20:28.730915+00:00'
|
||||
extraction_method: crawl4ai_meta_og
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: false
|
||||
has_favicon: true
|
||||
has_og_image: true
|
||||
favicon_count: 2
|
||||
|
|
|
|||
|
|
@ -217,3 +217,30 @@ location:
|
|||
geonames_id: 1857843
|
||||
geonames_name: Maebashi
|
||||
feature_code: PPLA
|
||||
logo_enrichment:
|
||||
enrichment_timestamp: '2025-12-23T20:20:40.682320+00:00'
|
||||
source_url: https://www.lib.gunma-ct.ac.jp/index.htm
|
||||
extraction_method: crawl4ai
|
||||
claims:
|
||||
- claim_type: logo_url
|
||||
claim_value: https://www.gunma-ct.ac.jp/cms/wp-content/themes/kosen/images/common/header_logo.svg
|
||||
source_url: https://www.lib.gunma-ct.ac.jp/index.htm
|
||||
css_selector: '#mainmenu_sm_logo > a > img'
|
||||
retrieved_on: '2025-12-23T20:20:40.682320+00:00'
|
||||
extraction_method: crawl4ai_header_logo
|
||||
detection_confidence: high
|
||||
alt_text: 群馬工業高等専門学校
|
||||
- claim_type: favicon_url
|
||||
claim_value: https://www.gunma-ct.ac.jp/cms/wp-content/uploads/2024/08/favicon.png
|
||||
source_url: https://www.lib.gunma-ct.ac.jp/index.htm
|
||||
css_selector: '[document] > html > head > link:nth-of-type(18)'
|
||||
retrieved_on: '2025-12-23T20:20:40.682320+00:00'
|
||||
extraction_method: crawl4ai_link_rel
|
||||
favicon_type: ''
|
||||
favicon_sizes: 192x192
|
||||
summary:
|
||||
total_claims: 2
|
||||
has_primary_logo: true
|
||||
has_favicon: true
|
||||
has_og_image: false
|
||||
favicon_count: 2
|
||||
|
|
|
|||
Loading…
Reference in a new issue