445 lines
15 KiB
YAML
445 lines
15 KiB
YAML
original_entry:
|
|
organisatie: Club van Suikerzakjesverzamelaars in Nederland
|
|
collectie_nederland: ja
|
|
reference:
|
|
- label: https://www.suikerzak.nl/
|
|
type:
|
|
- S
|
|
entry_index: 1021
|
|
processing_timestamp: '2025-11-27T16:43:59.817431+00:00'
|
|
enrichment_status: skipped
|
|
provenance:
|
|
schema_version: 1.0.0
|
|
generated_at: '2025-11-28T23:47:30.547276+00:00'
|
|
sources:
|
|
original_entry:
|
|
- source_type: nde_csv_registry
|
|
data_tier: TIER_1_AUTHORITATIVE
|
|
claims_extracted:
|
|
- organisatie
|
|
- collectie_nederland
|
|
- reference
|
|
- type
|
|
google_maps:
|
|
- source_type: google_maps_api
|
|
fetch_timestamp: '2025-11-28T09:58:58.954001+00:00'
|
|
api_endpoint: https://maps.googleapis.com/maps/api/place/textsearch
|
|
place_id: ChIJWzo_OP9mxkcRP9DdY57bTtM
|
|
claims_extracted:
|
|
- coordinates
|
|
- formatted_address
|
|
- address_components
|
|
- business_status
|
|
- website
|
|
- phone
|
|
- rating
|
|
- reviews
|
|
- opening_hours
|
|
genealogiewerkbalk:
|
|
- source_type: genealogiewerkbalk_registry
|
|
fetch_timestamp: '2025-12-03T13:51:42.061162+00:00'
|
|
data_url: https://docs.google.com/spreadsheets/d/1rS_Z5L6L2vvfGLS6eHI8wfyiwB-KUfHEr7W1VNY3rpg/export?format=csv
|
|
match_method: google_maps_admin2
|
|
claims_extracted:
|
|
- municipality_name
|
|
- municipality_code
|
|
- municipal_archive_name
|
|
- municipal_archive_website
|
|
- municipal_archive_isil
|
|
- province_name
|
|
- province_code
|
|
- provincial_archive_name
|
|
- provincial_archive_website
|
|
linkup_timespan:
|
|
- source_type: linkup_web_search
|
|
fetch_timestamp: '2025-12-15T21:33:47.899348+00:00'
|
|
search_query: '"Club van Suikerzakjesverzamelaars in Nederland" Houten opgericht
|
|
OR gesticht OR sinds'
|
|
source_urls:
|
|
- https://www.harrydietz.nl/index.php/suikerzakjes
|
|
- https://nl.wikipedia.org/wiki/Suikerzakje
|
|
- http://nl.wikisage.org/wiki/Club_van_Suikerzakjesverzamelaars_in_Nederland
|
|
- https://nl.wikipedia.org/wiki/Suikerfabriek
|
|
- https://www.tabakshistorie.nl/nl/algemeen/bestaande-fabriekspanden-eo/31/
|
|
claims_extracted:
|
|
- timespan_begin
|
|
data_tier: TIER_4_INFERRED
|
|
archive_path: data/custodian/web/1021/linkup/linkup_founding_20251215T213347Z.json
|
|
data_tier_summary:
|
|
TIER_1_AUTHORITATIVE:
|
|
- original_entry (NDE CSV)
|
|
TIER_2_VERIFIED:
|
|
- wikidata_api
|
|
- google_maps_api
|
|
- genealogiewerkbalk_registry
|
|
TIER_3_CROWD_SOURCED: []
|
|
TIER_4_INFERRED:
|
|
- website_scrape
|
|
- exa_web_search
|
|
notes:
|
|
- Provenance tracking added retroactively
|
|
- claim_level_provenance available in sources section
|
|
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:19:24Z
|
|
- Canonical location added via normalize_custodian_files.py on 2025-12-09T07:09:49Z
|
|
skip_reason: no_wikidata_id
|
|
google_maps_enrichment:
|
|
place_id: ChIJWzo_OP9mxkcRP9DdY57bTtM
|
|
name: Club van Suikerzakjesverzamelaars in Nederland
|
|
fetch_timestamp: '2025-11-28T09:58:58.954001+00:00'
|
|
api_status: OK
|
|
coordinates:
|
|
latitude: 52.0339982
|
|
longitude: 5.1726966999999995
|
|
formatted_address: De Fuik 29, 3995 BJ Houten
|
|
short_address: De Fuik 29, Houten
|
|
address_components:
|
|
- long_name: '29'
|
|
short_name: '29'
|
|
types:
|
|
- street_number
|
|
- long_name: De Fuik
|
|
short_name: De Fuik
|
|
types:
|
|
- route
|
|
- long_name: Houten
|
|
short_name: Houten
|
|
types:
|
|
- locality
|
|
- political
|
|
- long_name: Houten
|
|
short_name: Houten
|
|
types:
|
|
- administrative_area_level_2
|
|
- political
|
|
- long_name: Utrecht
|
|
short_name: UT
|
|
types:
|
|
- administrative_area_level_1
|
|
- political
|
|
- long_name: Nederland
|
|
short_name: NL
|
|
types:
|
|
- country
|
|
- political
|
|
- long_name: 3995 BJ
|
|
short_name: 3995 BJ
|
|
types:
|
|
- postal_code
|
|
phone_local: 06 22111485
|
|
phone_international: +31 6 22111485
|
|
website: http://www.suikerzak.nl/
|
|
google_place_types:
|
|
- point_of_interest
|
|
- store
|
|
- establishment
|
|
primary_type: store
|
|
business_status: OPERATIONAL
|
|
opening_hours:
|
|
open_now: true
|
|
periods:
|
|
- open:
|
|
day: 0
|
|
hour: 0
|
|
minute: 0
|
|
weekday_text:
|
|
- 'maandag: 24 uur geopend'
|
|
- 'dinsdag: 24 uur geopend'
|
|
- 'woensdag: 24 uur geopend'
|
|
- 'donderdag: 24 uur geopend'
|
|
- 'vrijdag: 24 uur geopend'
|
|
- 'zaterdag: 24 uur geopend'
|
|
- 'zondag: 24 uur geopend'
|
|
rating: 5
|
|
total_ratings: 1
|
|
reviews:
|
|
- author_name: P.J V Krieken
|
|
author_uri: https://www.google.com/maps/contrib/103613345661508403588/reviews
|
|
rating: 5
|
|
relative_time_description: 7 maanden geleden
|
|
text: Leuke site en dat helemaal opgezet vj hobby en met een catalogus van alle
|
|
bekend zijnde suikerzakjes. (Ned.erfgoed )
|
|
publish_time: '2025-04-16T14:08:41.123310Z'
|
|
google_maps_url: https://maps.google.com/?cid=15226348863513022527&g_mp=Cidnb29nbGUubWFwcy5wbGFjZXMudjEuUGxhY2VzLlNlYXJjaFRleHQQAhgEIAA
|
|
street_view_url: https://maps.googleapis.com/maps/api/streetview?size=600x400&location=52.0339982,5.1726966999999995&key=AIzaSyAHuazNth9ZvfRFYk5-v49CwXwhABH8Ri0
|
|
icon_mask_uri: https://maps.gstatic.com/mapfiles/place_api/icons/v2/shopping_pinlet
|
|
icon_background_color: '#4B96F3'
|
|
utc_offset_minutes: 60
|
|
google_maps_status: SUCCESS
|
|
google_maps_search_query: Club van Suikerzakjesverzamelaars in Nederland, Netherlands
|
|
web_enrichment:
|
|
web_archives:
|
|
- url: https://www.suikerzak.nl/
|
|
directory: web/1021/suikerzak.nl
|
|
pages_archived: 13
|
|
archive_method: wget_warc_deep
|
|
warc_file: archive.warc.gz
|
|
warc_size_bytes: 109629
|
|
warc_format: ISO 28500
|
|
full_site_archive_timestamp: '2025-11-29T17:46:40.278592+00:00'
|
|
ghcid:
|
|
ghcid_current: NL-UT-HOU-S-CSN
|
|
ghcid_original: NL-UT-HOU-S-CSN
|
|
ghcid_uuid: 1754a45a-8bf0-569e-9c6b-8c1b65e63c70
|
|
ghcid_uuid_sha256: 5cc2a09d-0cfb-8ad5-a5c6-5a3e8d4a69e3
|
|
ghcid_numeric: 6684081393359330005
|
|
record_id: 019adf94-bae2-7a75-a550-1a466f6b1663
|
|
generation_timestamp: '2025-12-02T14:56:45.615377+00:00'
|
|
ghcid_history:
|
|
- ghcid: NL-UT-HOU-S-CSN
|
|
ghcid_numeric: 6684081393359330005
|
|
valid_from: '2025-12-02T14:56:45.615377+00:00'
|
|
valid_to: null
|
|
reason: GHCID regenerated with Google Maps locality fix (Dec 2025)
|
|
location_resolution:
|
|
method: GOOGLE_MAPS_LOCALITY
|
|
google_maps_locality: Houten
|
|
geonames_id: 2753557
|
|
geonames_name: Houten
|
|
feature_code: PPL
|
|
population: 1335
|
|
admin1_code: 09
|
|
region_code: UT
|
|
country_code: NL
|
|
source_coordinates:
|
|
latitude: 52.0339982
|
|
longitude: 5.1726966999999995
|
|
source: google_maps
|
|
geonames_id: 2753557
|
|
identifiers:
|
|
- identifier_scheme: GHCID
|
|
identifier_value: NL-UT-HOU-S-CSN
|
|
- identifier_scheme: GHCID_UUID
|
|
identifier_value: 1754a45a-8bf0-569e-9c6b-8c1b65e63c70
|
|
identifier_url: urn:uuid:1754a45a-8bf0-569e-9c6b-8c1b65e63c70
|
|
- identifier_scheme: GHCID_UUID_SHA256
|
|
identifier_value: 5cc2a09d-0cfb-8ad5-a5c6-5a3e8d4a69e3
|
|
identifier_url: urn:uuid:5cc2a09d-0cfb-8ad5-a5c6-5a3e8d4a69e3
|
|
- identifier_scheme: GHCID_NUMERIC
|
|
identifier_value: '6684081393359330005'
|
|
- identifier_scheme: RECORD_ID
|
|
identifier_value: 019adf94-bae2-7a75-a550-1a466f6b1663
|
|
identifier_url: urn:uuid:019adf94-bae2-7a75-a550-1a466f6b1663
|
|
custodian_name:
|
|
claim_type: custodian_name
|
|
claim_value: Club van Suikerzakjesverzamelaars in Nederland
|
|
source: web_title_tag
|
|
confidence: 0.8
|
|
consensus_method: true
|
|
sources_checked: 3
|
|
sources_matched: 3
|
|
extraction_timestamp: '2025-12-02T13:08:44.258272+00:00'
|
|
matching_sources:
|
|
- source: google_maps
|
|
name: Club van Suikerzakjesverzamelaars in Nederland
|
|
score: 1.0
|
|
- source: original_entry
|
|
name: Club van Suikerzakjesverzamelaars in Nederland
|
|
score: 1.0
|
|
genealogiewerkbalk_enrichment:
|
|
source: Genealogiewerkbalk.nl Municipality Archives Registry
|
|
source_url: https://www.genealogiewerkbalk.nl/archieven.html
|
|
data_url: https://docs.google.com/spreadsheets/d/1rS_Z5L6L2vvfGLS6eHI8wfyiwB-KUfHEr7W1VNY3rpg/export?format=csv
|
|
data_tier: TIER_2_VERIFIED
|
|
enrichment_timestamp: '2025-12-03T13:51:42.061149+00:00'
|
|
match_method: google_maps_admin2
|
|
match_confidence: 0.95
|
|
municipality:
|
|
name: Houten
|
|
code: '321'
|
|
municipal_archive:
|
|
name: Regionaal Archief Zuid-Utrecht
|
|
website: https://www.razu.nl/
|
|
isil: null
|
|
isil_note: geenRAZU
|
|
province:
|
|
name: Utrecht
|
|
code: '26'
|
|
provincial_archive:
|
|
name: Het Utrechts Archief
|
|
website: https://hetutrechtsarchief.nl/
|
|
extra_info: Kijk voor meer info over archieven in Utrecht op https://www.landschaperfgoedutrecht.nl/erfgoed/historische-verenigingen-en-archieven/archieven/
|
|
digital_platforms:
|
|
- platform_name: Club van Suikerzakjesverzamelaars in Nederland Website
|
|
platform_url: http://www.suikerzak.nl/
|
|
platform_type: WEBSITE
|
|
platform_category:
|
|
- Organizational website
|
|
digital_collections: Organizational website
|
|
technology_stack: Standard web technology
|
|
data_standards:
|
|
- HTML5
|
|
user_services: General information, Contact
|
|
sustainability_model: Institutional funding
|
|
enrichment_timestamp: '2025-12-05T11:21:46.303886+00:00'
|
|
source_method: automated_extraction
|
|
wikidata_enrichment:
|
|
status: NOT_FOUND
|
|
fetch_timestamp: '2025-12-06T19:41:06.655662+00:00'
|
|
search_query: Club van Suikerzakjesverzamelaars in Nederland
|
|
location:
|
|
latitude: 52.0339982
|
|
longitude: 5.1726966999999995
|
|
coordinate_provenance:
|
|
source_type: GOOGLE_MAPS
|
|
source_path: google_maps_enrichment.coordinates
|
|
original_timestamp: '2025-11-28T09:58:58.954001+00:00'
|
|
entity_id: ChIJWzo_OP9mxkcRP9DdY57bTtM
|
|
api_endpoint: https://maps.googleapis.com/maps/api/place/textsearch
|
|
city: Houten
|
|
region: Utrecht
|
|
region_code: UT
|
|
country: NL
|
|
postal_code: 3995 BJ
|
|
street_address: De Fuik 29, Houten
|
|
formatted_address: De Fuik 29, 3995 BJ Houten
|
|
geonames_id: 2753557
|
|
geonames_name: Houten
|
|
feature_code: PPL
|
|
normalization_timestamp: '2025-12-09T07:09:49.067750+00:00'
|
|
web_person_claims:
|
|
extraction_timestamp: '2025-12-12T21:55:11.464101+00:00'
|
|
extraction_method: ch_annotator_entity_extraction_v2
|
|
filtering_applied: true
|
|
source_annotations:
|
|
- custodian/web/1021/suikerzak.nl/annotations_v1.7.0.yaml
|
|
persons_count: 1
|
|
persons_rejected_count: 0
|
|
persons:
|
|
- name: Harry Dietz
|
|
entity_type: AGT.PER
|
|
entity_id: entity-4
|
|
class_uri: crm:E21_Person
|
|
recognition_confidence: 0.95
|
|
provenance:
|
|
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[2]
|
|
timestamp: '2025-12-06T08:44:40.482134+00:00'
|
|
agent: zai/glm-4.6
|
|
confidence: 1.0
|
|
context_convention: GLAM-NER v1.7.0-unified
|
|
source_url: https://www.suikerzak.nl/
|
|
relationships: []
|
|
crawl4ai_enrichment:
|
|
retrieval_timestamp: '2025-12-14T18:20:11.002083+00:00'
|
|
retrieval_agent: crawl4ai
|
|
source_url: http://www.suikerzak.nl/
|
|
status_code: null
|
|
error: Crawl failed with status None
|
|
digital_platform_v2:
|
|
transformation_metadata:
|
|
transformed_from: httpx_beautifulsoup
|
|
transformation_date: '2025-12-14T23:23:18.554141+00:00'
|
|
transformation_version: '2.1'
|
|
source_status_code: 200
|
|
primary_platform:
|
|
platform_id: primary_website_suikerzak_nl
|
|
platform_name: Club van Suikerzakjesverzamelaars in Nederland Website
|
|
platform_url: https://www.suikerzak.nl/
|
|
platform_type: DISCOVERY_PORTAL
|
|
description: Website van de Club van Suikerzakjesverzamelaars in Nederland
|
|
language: nl
|
|
og_image: null
|
|
favicon: http://www.suikerzak.nl/images/template/engage.png
|
|
collection_urls:
|
|
- http://www.suikerzak.nl/catalogi
|
|
- http://www.suikerzak.nl/zoeken
|
|
navigation_links:
|
|
- http://www.suikerzak.nl/
|
|
- http://www.suikerzak.nl/de-club
|
|
- http://www.suikerzak.nl/suiker-zakjes
|
|
- http://www.suikerzak.nl/catalogi
|
|
- http://www.suikerzak.nl/zoeken
|
|
- http://www.suikerzak.nl/vraag-aanbod
|
|
- http://www.suikerzak.nl/adressen-links
|
|
- http://www.suikerzak.nl/clubwinkel
|
|
- http://www.suikerzak.nl/8-nieuwsberichten/71-naamstempels-op-de-achterkant-van-suikerzakjes
|
|
- http://www.suikerzak.nl/2-ongecategoriseerd/190-tentoonstelling-marcel-bekema
|
|
- http://www.suikerzak.nl/uit-de-oude-doos
|
|
- http://www.suikerzak.nl/images/suikerzakjespraat_19601214a.jpg
|
|
- http://www.suikerzak.nl/images/suikerzakjespraat_19601214b.jpg
|
|
- http://www.suikerzak.nl/8-nieuwsberichten/151-onze-24e-internationale-ruildag-25-oktober-2025
|
|
- http://www.suikerzak.nl/8-nieuwsberichten/180-artikel-over-van-de-valk-serie-9
|
|
- http://www.suikerzak.nl/8-nieuwsberichten/176-nieuwe-catalogus-suikerzakjes-van-sogeler
|
|
- http://www.suikerzak.nl/8-nieuwsberichten/177-nieuwe-catalogus-suikerzakjes-van-comby
|
|
- http://www.suikerzak.nl/?start=11
|
|
- http://www.suikerzak.nl/?start=22
|
|
- http://www.suikerzak.nl/?start=33
|
|
timespan:
|
|
begin_of_the_begin: '2000-12-27T00:00:00Z'
|
|
end_of_the_begin: '2000-12-27T00:00:00Z'
|
|
begin_of_the_end: null
|
|
end_of_the_end: null
|
|
sources:
|
|
- 'Linkup web search: https://www.suikerzak.nl/de-club'
|
|
notes: 'Found via pattern: full_date_nl'
|
|
web-enrichments:
|
|
extraction_timestamp: '2025-12-13T19:47:02.652261+00:00'
|
|
extraction_method: hybrid_llm_pattern_layout_v1
|
|
confidence_threshold: 0.6
|
|
entities_count: 6
|
|
claims:
|
|
- entity: Club van Suikerzakjesverzamelaars in Nederland
|
|
entity_type: GRP.ASS
|
|
xpath: /html/head/title
|
|
base_confidence: 1.0
|
|
layout_score: 0.15
|
|
pattern_score: 0.0
|
|
final_confidence: 1.0
|
|
layout_match: high_conf:head/title
|
|
- entity: De Fuik 29
|
|
entity_type: TOP.ADR
|
|
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[3]
|
|
base_confidence: 0.95
|
|
layout_score: 0.2
|
|
pattern_score: 0.0
|
|
final_confidence: 1.0
|
|
layout_match: high_conf:body/footer/*
|
|
- entity: Harry Dietz
|
|
entity_type: AGT.PER
|
|
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[2]
|
|
base_confidence: 0.95
|
|
layout_score: 0.1
|
|
pattern_score: 0.15
|
|
final_confidence: 1.0
|
|
layout_match: high_conf:body/*/p
|
|
pattern_match: ^[A-Z][a-z]+\s+[A-Z][a-z]+
|
|
- entity: https://www.suikerzak.nl
|
|
entity_type: APP.URL
|
|
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[7]/a
|
|
base_confidence: 1.0
|
|
layout_score: 0.0
|
|
pattern_score: 0.0
|
|
final_confidence: 1.0
|
|
- entity: 3995 BJ Houten
|
|
entity_type: TOP.SET
|
|
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[4]
|
|
base_confidence: 0.95
|
|
layout_score: 0.0
|
|
pattern_score: 0.0
|
|
final_confidence: 0.95
|
|
- entity: 125.000 suikerzakjes en -wikkels
|
|
entity_type: QTY.CNT
|
|
xpath: /html/body/div[@id='wrapper']/div[@id='contentarea']/div[@class='container']/div[@id='middenkolom']/section[@class='blog-featured']/div[@class='items-row
|
|
cols-1 row-6']/article[@class='item column-1']/h2
|
|
base_confidence: 0.9
|
|
layout_score: 0.0
|
|
pattern_score: 0.0
|
|
final_confidence: 0.9
|
|
logo_enrichment:
|
|
enrichment_timestamp: '2025-12-22T10:40:24.147972+00:00'
|
|
source_url: http://www.suikerzak.nl
|
|
extraction_method: crawl4ai
|
|
claims:
|
|
- claim_type: favicon_url
|
|
claim_value: http://www.suikerzak.nl/images/template/engage.png
|
|
source_url: http://www.suikerzak.nl
|
|
css_selector: '[document] > html > head > link:nth-of-type(3)'
|
|
retrieved_on: '2025-12-22T10:40:24.147972+00:00'
|
|
extraction_method: crawl4ai_link_rel
|
|
favicon_type: ''
|
|
favicon_sizes: ''
|
|
summary:
|
|
total_claims: 1
|
|
has_primary_logo: false
|
|
has_favicon: true
|
|
has_og_image: false
|
|
favicon_count: 2
|