glam/data/custodian/NL-UT-HOU-S-CSN.yaml
2025-12-31 00:00:25 +01:00

445 lines
15 KiB
YAML

original_entry:
organisatie: Club van Suikerzakjesverzamelaars in Nederland
collectie_nederland: ja
reference:
- label: https://www.suikerzak.nl/
type:
- S
entry_index: 1021
processing_timestamp: '2025-11-27T16:43:59.817431+00:00'
enrichment_status: skipped
provenance:
schema_version: 1.0.0
generated_at: '2025-11-28T23:47:30.547276+00:00'
sources:
original_entry:
- source_type: nde_csv_registry
data_tier: TIER_1_AUTHORITATIVE
claims_extracted:
- organisatie
- collectie_nederland
- reference
- type
google_maps:
- source_type: google_maps_api
fetch_timestamp: '2025-11-28T09:58:58.954001+00:00'
api_endpoint: https://maps.googleapis.com/maps/api/place/textsearch
place_id: ChIJWzo_OP9mxkcRP9DdY57bTtM
claims_extracted:
- coordinates
- formatted_address
- address_components
- business_status
- website
- phone
- rating
- reviews
- opening_hours
genealogiewerkbalk:
- source_type: genealogiewerkbalk_registry
fetch_timestamp: '2025-12-03T13:51:42.061162+00:00'
data_url: https://docs.google.com/spreadsheets/d/1rS_Z5L6L2vvfGLS6eHI8wfyiwB-KUfHEr7W1VNY3rpg/export?format=csv
match_method: google_maps_admin2
claims_extracted:
- municipality_name
- municipality_code
- municipal_archive_name
- municipal_archive_website
- municipal_archive_isil
- province_name
- province_code
- provincial_archive_name
- provincial_archive_website
linkup_timespan:
- source_type: linkup_web_search
fetch_timestamp: '2025-12-15T21:33:47.899348+00:00'
search_query: '"Club van Suikerzakjesverzamelaars in Nederland" Houten opgericht
OR gesticht OR sinds'
source_urls:
- https://www.harrydietz.nl/index.php/suikerzakjes
- https://nl.wikipedia.org/wiki/Suikerzakje
- http://nl.wikisage.org/wiki/Club_van_Suikerzakjesverzamelaars_in_Nederland
- https://nl.wikipedia.org/wiki/Suikerfabriek
- https://www.tabakshistorie.nl/nl/algemeen/bestaande-fabriekspanden-eo/31/
claims_extracted:
- timespan_begin
data_tier: TIER_4_INFERRED
archive_path: data/custodian/web/1021/linkup/linkup_founding_20251215T213347Z.json
data_tier_summary:
TIER_1_AUTHORITATIVE:
- original_entry (NDE CSV)
TIER_2_VERIFIED:
- wikidata_api
- google_maps_api
- genealogiewerkbalk_registry
TIER_3_CROWD_SOURCED: []
TIER_4_INFERRED:
- website_scrape
- exa_web_search
notes:
- Provenance tracking added retroactively
- claim_level_provenance available in sources section
- Canonical location added via normalize_custodian_files.py on 2025-12-08T23:19:24Z
- Canonical location added via normalize_custodian_files.py on 2025-12-09T07:09:49Z
skip_reason: no_wikidata_id
google_maps_enrichment:
place_id: ChIJWzo_OP9mxkcRP9DdY57bTtM
name: Club van Suikerzakjesverzamelaars in Nederland
fetch_timestamp: '2025-11-28T09:58:58.954001+00:00'
api_status: OK
coordinates:
latitude: 52.0339982
longitude: 5.1726966999999995
formatted_address: De Fuik 29, 3995 BJ Houten
short_address: De Fuik 29, Houten
address_components:
- long_name: '29'
short_name: '29'
types:
- street_number
- long_name: De Fuik
short_name: De Fuik
types:
- route
- long_name: Houten
short_name: Houten
types:
- locality
- political
- long_name: Houten
short_name: Houten
types:
- administrative_area_level_2
- political
- long_name: Utrecht
short_name: UT
types:
- administrative_area_level_1
- political
- long_name: Nederland
short_name: NL
types:
- country
- political
- long_name: 3995 BJ
short_name: 3995 BJ
types:
- postal_code
phone_local: 06 22111485
phone_international: +31 6 22111485
website: http://www.suikerzak.nl/
google_place_types:
- point_of_interest
- store
- establishment
primary_type: store
business_status: OPERATIONAL
opening_hours:
open_now: true
periods:
- open:
day: 0
hour: 0
minute: 0
weekday_text:
- 'maandag: 24 uur geopend'
- 'dinsdag: 24 uur geopend'
- 'woensdag: 24 uur geopend'
- 'donderdag: 24 uur geopend'
- 'vrijdag: 24 uur geopend'
- 'zaterdag: 24 uur geopend'
- 'zondag: 24 uur geopend'
rating: 5
total_ratings: 1
reviews:
- author_name: P.J V Krieken
author_uri: https://www.google.com/maps/contrib/103613345661508403588/reviews
rating: 5
relative_time_description: 7 maanden geleden
text: Leuke site en dat helemaal opgezet vj hobby en met een catalogus van alle
bekend zijnde suikerzakjes. (Ned.erfgoed )
publish_time: '2025-04-16T14:08:41.123310Z'
google_maps_url: https://maps.google.com/?cid=15226348863513022527&g_mp=Cidnb29nbGUubWFwcy5wbGFjZXMudjEuUGxhY2VzLlNlYXJjaFRleHQQAhgEIAA
street_view_url: https://maps.googleapis.com/maps/api/streetview?size=600x400&location=52.0339982,5.1726966999999995&key=AIzaSyAHuazNth9ZvfRFYk5-v49CwXwhABH8Ri0
icon_mask_uri: https://maps.gstatic.com/mapfiles/place_api/icons/v2/shopping_pinlet
icon_background_color: '#4B96F3'
utc_offset_minutes: 60
google_maps_status: SUCCESS
google_maps_search_query: Club van Suikerzakjesverzamelaars in Nederland, Netherlands
web_enrichment:
web_archives:
- url: https://www.suikerzak.nl/
directory: web/1021/suikerzak.nl
pages_archived: 13
archive_method: wget_warc_deep
warc_file: archive.warc.gz
warc_size_bytes: 109629
warc_format: ISO 28500
full_site_archive_timestamp: '2025-11-29T17:46:40.278592+00:00'
ghcid:
ghcid_current: NL-UT-HOU-S-CSN
ghcid_original: NL-UT-HOU-S-CSN
ghcid_uuid: 1754a45a-8bf0-569e-9c6b-8c1b65e63c70
ghcid_uuid_sha256: 5cc2a09d-0cfb-8ad5-a5c6-5a3e8d4a69e3
ghcid_numeric: 6684081393359330005
record_id: 019adf94-bae2-7a75-a550-1a466f6b1663
generation_timestamp: '2025-12-02T14:56:45.615377+00:00'
ghcid_history:
- ghcid: NL-UT-HOU-S-CSN
ghcid_numeric: 6684081393359330005
valid_from: '2025-12-02T14:56:45.615377+00:00'
valid_to: null
reason: GHCID regenerated with Google Maps locality fix (Dec 2025)
location_resolution:
method: GOOGLE_MAPS_LOCALITY
google_maps_locality: Houten
geonames_id: 2753557
geonames_name: Houten
feature_code: PPL
population: 1335
admin1_code: 09
region_code: UT
country_code: NL
source_coordinates:
latitude: 52.0339982
longitude: 5.1726966999999995
source: google_maps
geonames_id: 2753557
identifiers:
- identifier_scheme: GHCID
identifier_value: NL-UT-HOU-S-CSN
- identifier_scheme: GHCID_UUID
identifier_value: 1754a45a-8bf0-569e-9c6b-8c1b65e63c70
identifier_url: urn:uuid:1754a45a-8bf0-569e-9c6b-8c1b65e63c70
- identifier_scheme: GHCID_UUID_SHA256
identifier_value: 5cc2a09d-0cfb-8ad5-a5c6-5a3e8d4a69e3
identifier_url: urn:uuid:5cc2a09d-0cfb-8ad5-a5c6-5a3e8d4a69e3
- identifier_scheme: GHCID_NUMERIC
identifier_value: '6684081393359330005'
- identifier_scheme: RECORD_ID
identifier_value: 019adf94-bae2-7a75-a550-1a466f6b1663
identifier_url: urn:uuid:019adf94-bae2-7a75-a550-1a466f6b1663
custodian_name:
claim_type: custodian_name
claim_value: Club van Suikerzakjesverzamelaars in Nederland
source: web_title_tag
confidence: 0.8
consensus_method: true
sources_checked: 3
sources_matched: 3
extraction_timestamp: '2025-12-02T13:08:44.258272+00:00'
matching_sources:
- source: google_maps
name: Club van Suikerzakjesverzamelaars in Nederland
score: 1.0
- source: original_entry
name: Club van Suikerzakjesverzamelaars in Nederland
score: 1.0
genealogiewerkbalk_enrichment:
source: Genealogiewerkbalk.nl Municipality Archives Registry
source_url: https://www.genealogiewerkbalk.nl/archieven.html
data_url: https://docs.google.com/spreadsheets/d/1rS_Z5L6L2vvfGLS6eHI8wfyiwB-KUfHEr7W1VNY3rpg/export?format=csv
data_tier: TIER_2_VERIFIED
enrichment_timestamp: '2025-12-03T13:51:42.061149+00:00'
match_method: google_maps_admin2
match_confidence: 0.95
municipality:
name: Houten
code: '321'
municipal_archive:
name: Regionaal Archief Zuid-Utrecht
website: https://www.razu.nl/
isil: null
isil_note: geenRAZU
province:
name: Utrecht
code: '26'
provincial_archive:
name: Het Utrechts Archief
website: https://hetutrechtsarchief.nl/
extra_info: Kijk voor meer info over archieven in Utrecht op https://www.landschaperfgoedutrecht.nl/erfgoed/historische-verenigingen-en-archieven/archieven/
digital_platforms:
- platform_name: Club van Suikerzakjesverzamelaars in Nederland Website
platform_url: http://www.suikerzak.nl/
platform_type: WEBSITE
platform_category:
- Organizational website
digital_collections: Organizational website
technology_stack: Standard web technology
data_standards:
- HTML5
user_services: General information, Contact
sustainability_model: Institutional funding
enrichment_timestamp: '2025-12-05T11:21:46.303886+00:00'
source_method: automated_extraction
wikidata_enrichment:
status: NOT_FOUND
fetch_timestamp: '2025-12-06T19:41:06.655662+00:00'
search_query: Club van Suikerzakjesverzamelaars in Nederland
location:
latitude: 52.0339982
longitude: 5.1726966999999995
coordinate_provenance:
source_type: GOOGLE_MAPS
source_path: google_maps_enrichment.coordinates
original_timestamp: '2025-11-28T09:58:58.954001+00:00'
entity_id: ChIJWzo_OP9mxkcRP9DdY57bTtM
api_endpoint: https://maps.googleapis.com/maps/api/place/textsearch
city: Houten
region: Utrecht
region_code: UT
country: NL
postal_code: 3995 BJ
street_address: De Fuik 29, Houten
formatted_address: De Fuik 29, 3995 BJ Houten
geonames_id: 2753557
geonames_name: Houten
feature_code: PPL
normalization_timestamp: '2025-12-09T07:09:49.067750+00:00'
web_person_claims:
extraction_timestamp: '2025-12-12T21:55:11.464101+00:00'
extraction_method: ch_annotator_entity_extraction_v2
filtering_applied: true
source_annotations:
- custodian/web/1021/suikerzak.nl/annotations_v1.7.0.yaml
persons_count: 1
persons_rejected_count: 0
persons:
- name: Harry Dietz
entity_type: AGT.PER
entity_id: entity-4
class_uri: crm:E21_Person
recognition_confidence: 0.95
provenance:
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[2]
timestamp: '2025-12-06T08:44:40.482134+00:00'
agent: zai/glm-4.6
confidence: 1.0
context_convention: GLAM-NER v1.7.0-unified
source_url: https://www.suikerzak.nl/
relationships: []
crawl4ai_enrichment:
retrieval_timestamp: '2025-12-14T18:20:11.002083+00:00'
retrieval_agent: crawl4ai
source_url: http://www.suikerzak.nl/
status_code: null
error: Crawl failed with status None
digital_platform_v2:
transformation_metadata:
transformed_from: httpx_beautifulsoup
transformation_date: '2025-12-14T23:23:18.554141+00:00'
transformation_version: '2.1'
source_status_code: 200
primary_platform:
platform_id: primary_website_suikerzak_nl
platform_name: Club van Suikerzakjesverzamelaars in Nederland Website
platform_url: https://www.suikerzak.nl/
platform_type: DISCOVERY_PORTAL
description: Website van de Club van Suikerzakjesverzamelaars in Nederland
language: nl
og_image: null
favicon: http://www.suikerzak.nl/images/template/engage.png
collection_urls:
- http://www.suikerzak.nl/catalogi
- http://www.suikerzak.nl/zoeken
navigation_links:
- http://www.suikerzak.nl/
- http://www.suikerzak.nl/de-club
- http://www.suikerzak.nl/suiker-zakjes
- http://www.suikerzak.nl/catalogi
- http://www.suikerzak.nl/zoeken
- http://www.suikerzak.nl/vraag-aanbod
- http://www.suikerzak.nl/adressen-links
- http://www.suikerzak.nl/clubwinkel
- http://www.suikerzak.nl/8-nieuwsberichten/71-naamstempels-op-de-achterkant-van-suikerzakjes
- http://www.suikerzak.nl/2-ongecategoriseerd/190-tentoonstelling-marcel-bekema
- http://www.suikerzak.nl/uit-de-oude-doos
- http://www.suikerzak.nl/images/suikerzakjespraat_19601214a.jpg
- http://www.suikerzak.nl/images/suikerzakjespraat_19601214b.jpg
- http://www.suikerzak.nl/8-nieuwsberichten/151-onze-24e-internationale-ruildag-25-oktober-2025
- http://www.suikerzak.nl/8-nieuwsberichten/180-artikel-over-van-de-valk-serie-9
- http://www.suikerzak.nl/8-nieuwsberichten/176-nieuwe-catalogus-suikerzakjes-van-sogeler
- http://www.suikerzak.nl/8-nieuwsberichten/177-nieuwe-catalogus-suikerzakjes-van-comby
- http://www.suikerzak.nl/?start=11
- http://www.suikerzak.nl/?start=22
- http://www.suikerzak.nl/?start=33
timespan:
begin_of_the_begin: '2000-12-27T00:00:00Z'
end_of_the_begin: '2000-12-27T00:00:00Z'
begin_of_the_end: null
end_of_the_end: null
sources:
- 'Linkup web search: https://www.suikerzak.nl/de-club'
notes: 'Found via pattern: full_date_nl'
web-enrichments:
extraction_timestamp: '2025-12-13T19:47:02.652261+00:00'
extraction_method: hybrid_llm_pattern_layout_v1
confidence_threshold: 0.6
entities_count: 6
claims:
- entity: Club van Suikerzakjesverzamelaars in Nederland
entity_type: GRP.ASS
xpath: /html/head/title
base_confidence: 1.0
layout_score: 0.15
pattern_score: 0.0
final_confidence: 1.0
layout_match: high_conf:head/title
- entity: De Fuik 29
entity_type: TOP.ADR
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[3]
base_confidence: 0.95
layout_score: 0.2
pattern_score: 0.0
final_confidence: 1.0
layout_match: high_conf:body/footer/*
- entity: Harry Dietz
entity_type: AGT.PER
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[2]
base_confidence: 0.95
layout_score: 0.1
pattern_score: 0.15
final_confidence: 1.0
layout_match: high_conf:body/*/p
pattern_match: ^[A-Z][a-z]+\s+[A-Z][a-z]+
- entity: https://www.suikerzak.nl
entity_type: APP.URL
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[7]/a
base_confidence: 1.0
layout_score: 0.0
pattern_score: 0.0
final_confidence: 1.0
- entity: 3995 BJ Houten
entity_type: TOP.SET
xpath: /html/body/div[@id='wrapper']/div[@id='rechterkolom']/div[@class='moduletablecontact']/div[@class='customcontact']/p[4]
base_confidence: 0.95
layout_score: 0.0
pattern_score: 0.0
final_confidence: 0.95
- entity: 125.000 suikerzakjes en -wikkels
entity_type: QTY.CNT
xpath: /html/body/div[@id='wrapper']/div[@id='contentarea']/div[@class='container']/div[@id='middenkolom']/section[@class='blog-featured']/div[@class='items-row
cols-1 row-6']/article[@class='item column-1']/h2
base_confidence: 0.9
layout_score: 0.0
pattern_score: 0.0
final_confidence: 0.9
logo_enrichment:
enrichment_timestamp: '2025-12-22T10:40:24.147972+00:00'
source_url: http://www.suikerzak.nl
extraction_method: crawl4ai
claims:
- claim_type: favicon_url
claim_value: http://www.suikerzak.nl/images/template/engage.png
source_url: http://www.suikerzak.nl
css_selector: '[document] > html > head > link:nth-of-type(3)'
retrieved_on: '2025-12-22T10:40:24.147972+00:00'
extraction_method: crawl4ai_link_rel
favicon_type: ''
favicon_sizes: ''
summary:
total_claims: 1
has_primary_logo: false
has_favicon: true
has_og_image: false
favicon_count: 2