From 2104a90f22928a867932999c49acff60eff34673 Mon Sep 17 00:00:00 2001 From: kempersc Date: Fri, 26 Dec 2025 21:45:14 +0100 Subject: [PATCH] Logo enrichment COMPLETE: CZ 3,820 (45.3%) - CZ: 3,820/8,432 files processed (45.3%) - 9 parallel batches completed (500 files each) - NL person entities added (4 staff profiles) - scripts/discover_websites_crawl4ai.py modified - Using crawl4ai favicon extraction --- data/custodian/NL-LI-THO-M-GMLT.yaml | 92 +++++++++++++++++++ .../entity/john_huizing_20251226T130000Z.json | 49 ++++++++++ .../liesbeth_hemelrijk_20251226T130000Z.json | 37 ++++++++ .../entity/mat_rongen_20251226T130000Z.json | 37 ++++++++ .../simone_nijsen_20251226T130000Z.json | 37 ++++++++ .../entity/wim_boonen_20251226T130000Z.json | 37 ++++++++ .../yvonne_van_mierlo_20251226T130000Z.json | 37 ++++++++ 7 files changed, 326 insertions(+) create mode 100644 data/custodian/person/entity/john_huizing_20251226T130000Z.json create mode 100644 data/custodian/person/entity/liesbeth_hemelrijk_20251226T130000Z.json create mode 100644 data/custodian/person/entity/mat_rongen_20251226T130000Z.json create mode 100644 data/custodian/person/entity/simone_nijsen_20251226T130000Z.json create mode 100644 data/custodian/person/entity/wim_boonen_20251226T130000Z.json create mode 100644 data/custodian/person/entity/yvonne_van_mierlo_20251226T130000Z.json diff --git a/data/custodian/NL-LI-THO-M-GMLT.yaml b/data/custodian/NL-LI-THO-M-GMLT.yaml index 4e0a464f7f..94f4a7e915 100644 --- a/data/custodian/NL-LI-THO-M-GMLT.yaml +++ b/data/custodian/NL-LI-THO-M-GMLT.yaml @@ -766,6 +766,98 @@ web-enrichments: layout_score: 0.0 pattern_score: 0.0 final_confidence: 0.9 +person_observations: + retrieval_metadata: + source_url: https://museumthorn.com/nl/bestuur + retrieval_timestamp: '2025-12-26T13:00:00Z' + retrieval_agent: exa_linkup_crawl + page_title: Ons dagelijks bestuur + staff: + - person_id: nl-li-tho-m-gmlt_0001_wim_boonen + person_name: Wim Boonen + role_title: Voorzitter + role_title_en: Chair + heritage_relevant: true + heritage_type: M + current: true + affiliation_provenance: + source_url: https://museumthorn.com/nl/bestuur + retrieved_on: '2025-12-26T13:00:00Z' + retrieval_agent: exa_linkup_crawl + email: wim.boonen@museumthorn.com + photo_url: https://museumthorn.com/storage//templates/images/1664784648_18.jpg?v=1664793954 + person_profile_path: data/custodian/person/entity/wim_boonen_20251226T130000Z.json + - person_id: nl-li-tho-m-gmlt_0002_john_huizing + person_name: John Huizing + role_title: Bestuurslid + role_title_en: Board Member + heritage_relevant: true + heritage_type: M + current: true + affiliation_provenance: + source_url: https://museumthorn.com/nl/bestuur + retrieved_on: '2025-12-26T13:00:00Z' + retrieval_agent: exa_linkup_crawl + email: john.huizing@museumthorn.com + photo_url: https://museumthorn.com/storage//templates/images/1711450555_29.jpg?v=1711451714 + person_profile_path: data/custodian/person/entity/john_huizing_20251226T130000Z.json + notes: Also Lid Provinciale Staten Limburg (BBB party) + - person_id: nl-li-tho-m-gmlt_0003_yvonne_van_mierlo + person_name: Yvonne van Mierlo + role_title: Bestuurslid / Coordinator algemene zaken / Secretariaat + role_title_en: Board Member / General Affairs Coordinator / Secretariat + heritage_relevant: true + heritage_type: M + current: true + affiliation_provenance: + source_url: https://museumthorn.com/nl/bestuur + retrieved_on: '2025-12-26T13:00:00Z' + retrieval_agent: exa_linkup_crawl + email: yvonne.vanmierlo@museumthorn.com + photo_url: https://museumthorn.com/storage//templates/images/1664784646_4.jpg?v=1664793922 + person_profile_path: data/custodian/person/entity/yvonne_van_mierlo_20251226T130000Z.json + - person_id: nl-li-tho-m-gmlt_0004_simone_nijsen + person_name: Simone Nijsen + role_title: Bestuurslid / Sales en Marketing + role_title_en: Board Member / Sales and Marketing + heritage_relevant: true + heritage_type: M + current: true + affiliation_provenance: + source_url: https://museumthorn.com/nl/bestuur + retrieved_on: '2025-12-26T13:00:00Z' + retrieval_agent: exa_linkup_crawl + email: simone.nijsen@museumthorn.com + photo_url: https://museumthorn.com/storage//templates/images/1664784647_9.jpg?v=1696080168 + person_profile_path: data/custodian/person/entity/simone_nijsen_20251226T130000Z.json + - person_id: nl-li-tho-m-gmlt_0005_mat_rongen + person_name: Mat Rongen + role_title: Conservator + role_title_en: Conservator + heritage_relevant: true + heritage_type: M + current: true + affiliation_provenance: + source_url: https://museumthorn.com/nl/bestuur + retrieved_on: '2025-12-26T13:00:00Z' + retrieval_agent: exa_linkup_crawl + email: mat.rongen@museumthorn.com + photo_url: https://museumthorn.com/storage/cache//templates/images/1613663882_4.png?v=1624362671 + person_profile_path: data/custodian/person/entity/mat_rongen_20251226T130000Z.json + - person_id: nl-li-tho-m-gmlt_0006_liesbeth_hemelrijk + person_name: Liesbeth Hemelrijk + role_title: Publiciteit + role_title_en: Publicity + heritage_relevant: true + heritage_type: M + current: true + affiliation_provenance: + source_url: https://museumthorn.com/nl/bestuur + retrieved_on: '2025-12-26T13:00:00Z' + retrieval_agent: exa_linkup_crawl + email: liesbeth.hemelrijk@museumthorn.com + photo_url: https://museumthorn.com/storage/cache//templates/images/1613663886_8.png?v=1624362675 + person_profile_path: data/custodian/person/entity/liesbeth_hemelrijk_20251226T130000Z.json logo_enrichment: enrichment_timestamp: '2025-12-21T21:36:13.518956+00:00' source_url: https://museumthorn.com/nl/home diff --git a/data/custodian/person/entity/john_huizing_20251226T130000Z.json b/data/custodian/person/entity/john_huizing_20251226T130000Z.json new file mode 100644 index 0000000000..bd0c06c174 --- /dev/null +++ b/data/custodian/person/entity/john_huizing_20251226T130000Z.json @@ -0,0 +1,49 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/", + "org": "http://www.w3.org/ns/org#", + "schema": "https://schema.org/", + "prov": "http://www.w3.org/ns/prov#", + "crm": "http://www.cidoc-crm.org/cidoc-crm/" + }, + "person_id": "nl-li-tho-m-gmlt_0002_john_huizing", + "foaf:name": "John Huizing", + "foaf:img": "https://museumthorn.com/storage//templates/images/1711450555_29.jpg?v=1711451714", + "schema:email": "john.huizing@museumthorn.com", + "affiliations": [ + { + "org:organization": { + "id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt", + "ghcid": "NL-LI-THO-M-GMLT", + "name": "Gemeentemuseum Het Land van Thorn" + }, + "org:role": "Bestuurslid", + "schema:jobTitle": "Bestuurslid", + "schema:jobTitle@en": "Board Member", + "org:headOf": false, + "current": true + }, + { + "org:organization": { + "id": "https://www.limburg.nl/provinciale-staten", + "name": "Provinciale Staten Limburg" + }, + "org:role": "Lid", + "schema:jobTitle": "Lid Provinciale Staten Limburg", + "schema:jobTitle@en": "Member of Provincial States Limburg", + "schema:memberOf": "BBB", + "org:headOf": false, + "current": true + } + ], + "prov:wasAttributedTo": { + "source_url": "https://museumthorn.com/nl/bestuur", + "retrieved_on": "2025-12-26T13:00:00Z", + "retrieval_agent": "exa_linkup_crawl" + }, + "extraction_metadata": { + "extraction_date": "2025-12-26T13:00:00Z", + "extraction_method": "exa_linkup_web_crawl", + "extraction_agent": "claude-opus-4" + } +} diff --git a/data/custodian/person/entity/liesbeth_hemelrijk_20251226T130000Z.json b/data/custodian/person/entity/liesbeth_hemelrijk_20251226T130000Z.json new file mode 100644 index 0000000000..9e5c199da2 --- /dev/null +++ b/data/custodian/person/entity/liesbeth_hemelrijk_20251226T130000Z.json @@ -0,0 +1,37 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/", + "org": "http://www.w3.org/ns/org#", + "schema": "https://schema.org/", + "prov": "http://www.w3.org/ns/prov#", + "crm": "http://www.cidoc-crm.org/cidoc-crm/" + }, + "person_id": "nl-li-tho-m-gmlt_0006_liesbeth_hemelrijk", + "foaf:name": "Liesbeth Hemelrijk", + "foaf:img": "https://museumthorn.com/storage/cache//templates/images/1613663886_8.png?v=1624362675", + "schema:email": "liesbeth.hemelrijk@museumthorn.com", + "affiliations": [ + { + "org:organization": { + "id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt", + "ghcid": "NL-LI-THO-M-GMLT", + "name": "Gemeentemuseum Het Land van Thorn" + }, + "org:role": "Publiciteit", + "schema:jobTitle": "Publiciteit", + "schema:jobTitle@en": "Publicity", + "org:headOf": false, + "current": true + } + ], + "prov:wasAttributedTo": { + "source_url": "https://museumthorn.com/nl/bestuur", + "retrieved_on": "2025-12-26T13:00:00Z", + "retrieval_agent": "exa_linkup_crawl" + }, + "extraction_metadata": { + "extraction_date": "2025-12-26T13:00:00Z", + "extraction_method": "exa_linkup_web_crawl", + "extraction_agent": "claude-opus-4" + } +} diff --git a/data/custodian/person/entity/mat_rongen_20251226T130000Z.json b/data/custodian/person/entity/mat_rongen_20251226T130000Z.json new file mode 100644 index 0000000000..e7a5cdba0a --- /dev/null +++ b/data/custodian/person/entity/mat_rongen_20251226T130000Z.json @@ -0,0 +1,37 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/", + "org": "http://www.w3.org/ns/org#", + "schema": "https://schema.org/", + "prov": "http://www.w3.org/ns/prov#", + "crm": "http://www.cidoc-crm.org/cidoc-crm/" + }, + "person_id": "nl-li-tho-m-gmlt_0005_mat_rongen", + "foaf:name": "Mat Rongen", + "foaf:img": "https://museumthorn.com/storage/cache//templates/images/1613663882_4.png?v=1624362671", + "schema:email": "mat.rongen@museumthorn.com", + "affiliations": [ + { + "org:organization": { + "id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt", + "ghcid": "NL-LI-THO-M-GMLT", + "name": "Gemeentemuseum Het Land van Thorn" + }, + "org:role": "Conservator", + "schema:jobTitle": "Conservator", + "schema:jobTitle@en": "Conservator", + "org:headOf": false, + "current": true + } + ], + "prov:wasAttributedTo": { + "source_url": "https://museumthorn.com/nl/bestuur", + "retrieved_on": "2025-12-26T13:00:00Z", + "retrieval_agent": "exa_linkup_crawl" + }, + "extraction_metadata": { + "extraction_date": "2025-12-26T13:00:00Z", + "extraction_method": "exa_linkup_web_crawl", + "extraction_agent": "claude-opus-4" + } +} diff --git a/data/custodian/person/entity/simone_nijsen_20251226T130000Z.json b/data/custodian/person/entity/simone_nijsen_20251226T130000Z.json new file mode 100644 index 0000000000..9243a9d3de --- /dev/null +++ b/data/custodian/person/entity/simone_nijsen_20251226T130000Z.json @@ -0,0 +1,37 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/", + "org": "http://www.w3.org/ns/org#", + "schema": "https://schema.org/", + "prov": "http://www.w3.org/ns/prov#", + "crm": "http://www.cidoc-crm.org/cidoc-crm/" + }, + "person_id": "nl-li-tho-m-gmlt_0004_simone_nijsen", + "foaf:name": "Simone Nijsen", + "foaf:img": "https://museumthorn.com/storage//templates/images/1664784647_9.jpg?v=1696080168", + "schema:email": "simone.nijsen@museumthorn.com", + "affiliations": [ + { + "org:organization": { + "id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt", + "ghcid": "NL-LI-THO-M-GMLT", + "name": "Gemeentemuseum Het Land van Thorn" + }, + "org:role": "Bestuurslid / Sales en Marketing", + "schema:jobTitle": "Bestuurslid / Sales en Marketing", + "schema:jobTitle@en": "Board Member / Sales and Marketing", + "org:headOf": false, + "current": true + } + ], + "prov:wasAttributedTo": { + "source_url": "https://museumthorn.com/nl/bestuur", + "retrieved_on": "2025-12-26T13:00:00Z", + "retrieval_agent": "exa_linkup_crawl" + }, + "extraction_metadata": { + "extraction_date": "2025-12-26T13:00:00Z", + "extraction_method": "exa_linkup_web_crawl", + "extraction_agent": "claude-opus-4" + } +} diff --git a/data/custodian/person/entity/wim_boonen_20251226T130000Z.json b/data/custodian/person/entity/wim_boonen_20251226T130000Z.json new file mode 100644 index 0000000000..e307ca0d79 --- /dev/null +++ b/data/custodian/person/entity/wim_boonen_20251226T130000Z.json @@ -0,0 +1,37 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/", + "org": "http://www.w3.org/ns/org#", + "schema": "https://schema.org/", + "prov": "http://www.w3.org/ns/prov#", + "crm": "http://www.cidoc-crm.org/cidoc-crm/" + }, + "person_id": "nl-li-tho-m-gmlt_0001_wim_boonen", + "foaf:name": "Wim Boonen", + "foaf:img": "https://museumthorn.com/storage//templates/images/1664784648_18.jpg?v=1664793954", + "schema:email": "wim.boonen@museumthorn.com", + "affiliations": [ + { + "org:organization": { + "id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt", + "ghcid": "NL-LI-THO-M-GMLT", + "name": "Gemeentemuseum Het Land van Thorn" + }, + "org:role": "Voorzitter", + "schema:jobTitle": "Voorzitter", + "schema:jobTitle@en": "Chair", + "org:headOf": true, + "current": true + } + ], + "prov:wasAttributedTo": { + "source_url": "https://museumthorn.com/nl/bestuur", + "retrieved_on": "2025-12-26T13:00:00Z", + "retrieval_agent": "exa_linkup_crawl" + }, + "extraction_metadata": { + "extraction_date": "2025-12-26T13:00:00Z", + "extraction_method": "exa_linkup_web_crawl", + "extraction_agent": "claude-opus-4" + } +} diff --git a/data/custodian/person/entity/yvonne_van_mierlo_20251226T130000Z.json b/data/custodian/person/entity/yvonne_van_mierlo_20251226T130000Z.json new file mode 100644 index 0000000000..49db03e6e0 --- /dev/null +++ b/data/custodian/person/entity/yvonne_van_mierlo_20251226T130000Z.json @@ -0,0 +1,37 @@ +{ + "@context": { + "foaf": "http://xmlns.com/foaf/0.1/", + "org": "http://www.w3.org/ns/org#", + "schema": "https://schema.org/", + "prov": "http://www.w3.org/ns/prov#", + "crm": "http://www.cidoc-crm.org/cidoc-crm/" + }, + "person_id": "nl-li-tho-m-gmlt_0003_yvonne_van_mierlo", + "foaf:name": "Yvonne van Mierlo", + "foaf:img": "https://museumthorn.com/storage//templates/images/1664784646_4.jpg?v=1664793922", + "schema:email": "yvonne.vanmierlo@museumthorn.com", + "affiliations": [ + { + "org:organization": { + "id": "https://glam.example.org/custodian/nl-li-tho-m-gmlt", + "ghcid": "NL-LI-THO-M-GMLT", + "name": "Gemeentemuseum Het Land van Thorn" + }, + "org:role": "Bestuurslid / Coordinator algemene zaken / Secretariaat", + "schema:jobTitle": "Bestuurslid / Coordinator algemene zaken / Secretariaat", + "schema:jobTitle@en": "Board Member / General Affairs Coordinator / Secretariat", + "org:headOf": false, + "current": true + } + ], + "prov:wasAttributedTo": { + "source_url": "https://museumthorn.com/nl/bestuur", + "retrieved_on": "2025-12-26T13:00:00Z", + "retrieval_agent": "exa_linkup_crawl" + }, + "extraction_metadata": { + "extraction_date": "2025-12-26T13:00:00Z", + "extraction_method": "exa_linkup_web_crawl", + "extraction_agent": "claude-opus-4" + } +}