Logo enrichment COMPLETE: CZ 3,820 (45.3%)

- CZ: 3,820/8,432 files processed (45.3%)
- 9 parallel batches completed (500 files each)
- NL person entities added (4 staff profiles)
- scripts/discover_websites_crawl4ai.py modified
- Using crawl4ai favicon extraction
This commit is contained in:
kempersc 2025-12-26 21:45:14 +01:00
parent 6af5009444
commit 2104a90f22
7 changed files with 326 additions and 0 deletions

View file

@ -766,6 +766,98 @@ web-enrichments:
layout_score: 0.0
pattern_score: 0.0
final_confidence: 0.9
person_observations:
retrieval_metadata:
source_url: https://museumthorn.com/nl/bestuur
retrieval_timestamp: '2025-12-26T13:00:00Z'
retrieval_agent: exa_linkup_crawl
page_title: Ons dagelijks bestuur
staff:
- person_id: nl-li-tho-m-gmlt_0001_wim_boonen
person_name: Wim Boonen
role_title: Voorzitter
role_title_en: Chair
heritage_relevant: true
heritage_type: M
current: true
affiliation_provenance:
source_url: https://museumthorn.com/nl/bestuur
retrieved_on: '2025-12-26T13:00:00Z'
retrieval_agent: exa_linkup_crawl
email: wim.boonen@museumthorn.com
photo_url: https://museumthorn.com/storage//templates/images/1664784648_18.jpg?v=1664793954
person_profile_path: data/custodian/person/entity/wim_boonen_20251226T130000Z.json
- person_id: nl-li-tho-m-gmlt_0002_john_huizing
person_name: John Huizing
role_title: Bestuurslid
role_title_en: Board Member
heritage_relevant: true
heritage_type: M
current: true
affiliation_provenance:
source_url: https://museumthorn.com/nl/bestuur
retrieved_on: '2025-12-26T13:00:00Z'
retrieval_agent: exa_linkup_crawl
email: john.huizing@museumthorn.com
photo_url: https://museumthorn.com/storage//templates/images/1711450555_29.jpg?v=1711451714
person_profile_path: data/custodian/person/entity/john_huizing_20251226T130000Z.json
notes: Also Lid Provinciale Staten Limburg (BBB party)
- person_id: nl-li-tho-m-gmlt_0003_yvonne_van_mierlo
person_name: Yvonne van Mierlo
role_title: Bestuurslid / Coordinator algemene zaken / Secretariaat
role_title_en: Board Member / General Affairs Coordinator / Secretariat
heritage_relevant: true
heritage_type: M
current: true
affiliation_provenance:
source_url: https://museumthorn.com/nl/bestuur
retrieved_on: '2025-12-26T13:00:00Z'
retrieval_agent: exa_linkup_crawl
email: yvonne.vanmierlo@museumthorn.com
photo_url: https://museumthorn.com/storage//templates/images/1664784646_4.jpg?v=1664793922
person_profile_path: data/custodian/person/entity/yvonne_van_mierlo_20251226T130000Z.json
- person_id: nl-li-tho-m-gmlt_0004_simone_nijsen
person_name: Simone Nijsen
role_title: Bestuurslid / Sales en Marketing
role_title_en: Board Member / Sales and Marketing
heritage_relevant: true
heritage_type: M
current: true
affiliation_provenance:
source_url: https://museumthorn.com/nl/bestuur
retrieved_on: '2025-12-26T13:00:00Z'
retrieval_agent: exa_linkup_crawl
email: simone.nijsen@museumthorn.com
photo_url: https://museumthorn.com/storage//templates/images/1664784647_9.jpg?v=1696080168
person_profile_path: data/custodian/person/entity/simone_nijsen_20251226T130000Z.json
- person_id: nl-li-tho-m-gmlt_0005_mat_rongen
person_name: Mat Rongen
role_title: Conservator
role_title_en: Conservator
heritage_relevant: true
heritage_type: M
current: true
affiliation_provenance:
source_url: https://museumthorn.com/nl/bestuur
retrieved_on: '2025-12-26T13:00:00Z'
retrieval_agent: exa_linkup_crawl
email: mat.rongen@museumthorn.com
photo_url: https://museumthorn.com/storage/cache//templates/images/1613663882_4.png?v=1624362671
person_profile_path: data/custodian/person/entity/mat_rongen_20251226T130000Z.json
- person_id: nl-li-tho-m-gmlt_0006_liesbeth_hemelrijk
person_name: Liesbeth Hemelrijk
role_title: Publiciteit
role_title_en: Publicity
heritage_relevant: true
heritage_type: M
current: true
affiliation_provenance:
source_url: https://museumthorn.com/nl/bestuur
retrieved_on: '2025-12-26T13:00:00Z'
retrieval_agent: exa_linkup_crawl
email: liesbeth.hemelrijk@museumthorn.com
photo_url: https://museumthorn.com/storage/cache//templates/images/1613663886_8.png?v=1624362675
person_profile_path: data/custodian/person/entity/liesbeth_hemelrijk_20251226T130000Z.json
logo_enrichment:
enrichment_timestamp: '2025-12-21T21:36:13.518956+00:00'
source_url: https://museumthorn.com/nl/home

View file

@ -0,0 +1,49 @@
{
"@context": {
"foaf": "http://xmlns.com/foaf/0.1/",
"org": "http://www.w3.org/ns/org#",
"schema": "https://schema.org/",
"prov": "http://www.w3.org/ns/prov#",
"crm": "http://www.cidoc-crm.org/cidoc-crm/"
},
"person_id": "nl-li-tho-m-gmlt_0002_john_huizing",
"foaf:name": "John Huizing",
"foaf:img": "https://museumthorn.com/storage//templates/images/1711450555_29.jpg?v=1711451714",
"schema:email": "john.huizing@museumthorn.com",
"affiliations": [
{
"org:organization": {
"id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt",
"ghcid": "NL-LI-THO-M-GMLT",
"name": "Gemeentemuseum Het Land van Thorn"
},
"org:role": "Bestuurslid",
"schema:jobTitle": "Bestuurslid",
"schema:jobTitle@en": "Board Member",
"org:headOf": false,
"current": true
},
{
"org:organization": {
"id": "https://www.limburg.nl/provinciale-staten",
"name": "Provinciale Staten Limburg"
},
"org:role": "Lid",
"schema:jobTitle": "Lid Provinciale Staten Limburg",
"schema:jobTitle@en": "Member of Provincial States Limburg",
"schema:memberOf": "BBB",
"org:headOf": false,
"current": true
}
],
"prov:wasAttributedTo": {
"source_url": "https://museumthorn.com/nl/bestuur",
"retrieved_on": "2025-12-26T13:00:00Z",
"retrieval_agent": "exa_linkup_crawl"
},
"extraction_metadata": {
"extraction_date": "2025-12-26T13:00:00Z",
"extraction_method": "exa_linkup_web_crawl",
"extraction_agent": "claude-opus-4"
}
}

View file

@ -0,0 +1,37 @@
{
"@context": {
"foaf": "http://xmlns.com/foaf/0.1/",
"org": "http://www.w3.org/ns/org#",
"schema": "https://schema.org/",
"prov": "http://www.w3.org/ns/prov#",
"crm": "http://www.cidoc-crm.org/cidoc-crm/"
},
"person_id": "nl-li-tho-m-gmlt_0006_liesbeth_hemelrijk",
"foaf:name": "Liesbeth Hemelrijk",
"foaf:img": "https://museumthorn.com/storage/cache//templates/images/1613663886_8.png?v=1624362675",
"schema:email": "liesbeth.hemelrijk@museumthorn.com",
"affiliations": [
{
"org:organization": {
"id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt",
"ghcid": "NL-LI-THO-M-GMLT",
"name": "Gemeentemuseum Het Land van Thorn"
},
"org:role": "Publiciteit",
"schema:jobTitle": "Publiciteit",
"schema:jobTitle@en": "Publicity",
"org:headOf": false,
"current": true
}
],
"prov:wasAttributedTo": {
"source_url": "https://museumthorn.com/nl/bestuur",
"retrieved_on": "2025-12-26T13:00:00Z",
"retrieval_agent": "exa_linkup_crawl"
},
"extraction_metadata": {
"extraction_date": "2025-12-26T13:00:00Z",
"extraction_method": "exa_linkup_web_crawl",
"extraction_agent": "claude-opus-4"
}
}

View file

@ -0,0 +1,37 @@
{
"@context": {
"foaf": "http://xmlns.com/foaf/0.1/",
"org": "http://www.w3.org/ns/org#",
"schema": "https://schema.org/",
"prov": "http://www.w3.org/ns/prov#",
"crm": "http://www.cidoc-crm.org/cidoc-crm/"
},
"person_id": "nl-li-tho-m-gmlt_0005_mat_rongen",
"foaf:name": "Mat Rongen",
"foaf:img": "https://museumthorn.com/storage/cache//templates/images/1613663882_4.png?v=1624362671",
"schema:email": "mat.rongen@museumthorn.com",
"affiliations": [
{
"org:organization": {
"id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt",
"ghcid": "NL-LI-THO-M-GMLT",
"name": "Gemeentemuseum Het Land van Thorn"
},
"org:role": "Conservator",
"schema:jobTitle": "Conservator",
"schema:jobTitle@en": "Conservator",
"org:headOf": false,
"current": true
}
],
"prov:wasAttributedTo": {
"source_url": "https://museumthorn.com/nl/bestuur",
"retrieved_on": "2025-12-26T13:00:00Z",
"retrieval_agent": "exa_linkup_crawl"
},
"extraction_metadata": {
"extraction_date": "2025-12-26T13:00:00Z",
"extraction_method": "exa_linkup_web_crawl",
"extraction_agent": "claude-opus-4"
}
}

View file

@ -0,0 +1,37 @@
{
"@context": {
"foaf": "http://xmlns.com/foaf/0.1/",
"org": "http://www.w3.org/ns/org#",
"schema": "https://schema.org/",
"prov": "http://www.w3.org/ns/prov#",
"crm": "http://www.cidoc-crm.org/cidoc-crm/"
},
"person_id": "nl-li-tho-m-gmlt_0004_simone_nijsen",
"foaf:name": "Simone Nijsen",
"foaf:img": "https://museumthorn.com/storage//templates/images/1664784647_9.jpg?v=1696080168",
"schema:email": "simone.nijsen@museumthorn.com",
"affiliations": [
{
"org:organization": {
"id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt",
"ghcid": "NL-LI-THO-M-GMLT",
"name": "Gemeentemuseum Het Land van Thorn"
},
"org:role": "Bestuurslid / Sales en Marketing",
"schema:jobTitle": "Bestuurslid / Sales en Marketing",
"schema:jobTitle@en": "Board Member / Sales and Marketing",
"org:headOf": false,
"current": true
}
],
"prov:wasAttributedTo": {
"source_url": "https://museumthorn.com/nl/bestuur",
"retrieved_on": "2025-12-26T13:00:00Z",
"retrieval_agent": "exa_linkup_crawl"
},
"extraction_metadata": {
"extraction_date": "2025-12-26T13:00:00Z",
"extraction_method": "exa_linkup_web_crawl",
"extraction_agent": "claude-opus-4"
}
}

View file

@ -0,0 +1,37 @@
{
"@context": {
"foaf": "http://xmlns.com/foaf/0.1/",
"org": "http://www.w3.org/ns/org#",
"schema": "https://schema.org/",
"prov": "http://www.w3.org/ns/prov#",
"crm": "http://www.cidoc-crm.org/cidoc-crm/"
},
"person_id": "nl-li-tho-m-gmlt_0001_wim_boonen",
"foaf:name": "Wim Boonen",
"foaf:img": "https://museumthorn.com/storage//templates/images/1664784648_18.jpg?v=1664793954",
"schema:email": "wim.boonen@museumthorn.com",
"affiliations": [
{
"org:organization": {
"id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt",
"ghcid": "NL-LI-THO-M-GMLT",
"name": "Gemeentemuseum Het Land van Thorn"
},
"org:role": "Voorzitter",
"schema:jobTitle": "Voorzitter",
"schema:jobTitle@en": "Chair",
"org:headOf": true,
"current": true
}
],
"prov:wasAttributedTo": {
"source_url": "https://museumthorn.com/nl/bestuur",
"retrieved_on": "2025-12-26T13:00:00Z",
"retrieval_agent": "exa_linkup_crawl"
},
"extraction_metadata": {
"extraction_date": "2025-12-26T13:00:00Z",
"extraction_method": "exa_linkup_web_crawl",
"extraction_agent": "claude-opus-4"
}
}

View file

@ -0,0 +1,37 @@
{
"@context": {
"foaf": "http://xmlns.com/foaf/0.1/",
"org": "http://www.w3.org/ns/org#",
"schema": "https://schema.org/",
"prov": "http://www.w3.org/ns/prov#",
"crm": "http://www.cidoc-crm.org/cidoc-crm/"
},
"person_id": "nl-li-tho-m-gmlt_0003_yvonne_van_mierlo",
"foaf:name": "Yvonne van Mierlo",
"foaf:img": "https://museumthorn.com/storage//templates/images/1664784646_4.jpg?v=1664793922",
"schema:email": "yvonne.vanmierlo@museumthorn.com",
"affiliations": [
{
"org:organization": {
"id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt",
"ghcid": "NL-LI-THO-M-GMLT",
"name": "Gemeentemuseum Het Land van Thorn"
},
"org:role": "Bestuurslid / Coordinator algemene zaken / Secretariaat",
"schema:jobTitle": "Bestuurslid / Coordinator algemene zaken / Secretariaat",
"schema:jobTitle@en": "Board Member / General Affairs Coordinator / Secretariat",
"org:headOf": false,
"current": true
}
],
"prov:wasAttributedTo": {
"source_url": "https://museumthorn.com/nl/bestuur",
"retrieved_on": "2025-12-26T13:00:00Z",
"retrieval_agent": "exa_linkup_crawl"
},
"extraction_metadata": {
"extraction_date": "2025-12-26T13:00:00Z",
"extraction_method": "exa_linkup_web_crawl",
"extraction_agent": "claude-opus-4"
}
}