diff --git a/data/nde/enriched/entries/0005_Q81181263.yaml b/data/nde/enriched/entries/0005_Q81181263.yaml index 8ad5eaa155..cbfed71d1a 100644 --- a/data/nde/enriched/entries/0005_Q81181263.yaml +++ b/data/nde/enriched/entries/0005_Q81181263.yaml @@ -536,18 +536,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:43:58.861472+00:00' source_archive: web/0005/aaenhunze.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gemeente Aa en Hunze - source_url: https://www.aaenhunze.nl/ - retrieved_on: '2025-11-29T13:28:29.517181+00:00' - xpath: /html/head/title - html_file: web/0005/aaenhunze.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:43:58.860729+00:00' - claim_type: address claim_value: Spiekersteeg 1 raw_value: Spiekersteeg 1 @@ -628,6 +618,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:43:58.861346+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Aa en Hunze diff --git a/data/nde/enriched/entries/0006_Q81181245.yaml b/data/nde/enriched/entries/0006_Q81181245.yaml index c4ddd9876e..950dbed69d 100644 --- a/data/nde/enriched/entries/0006_Q81181245.yaml +++ b/data/nde/enriched/entries/0006_Q81181245.yaml @@ -507,18 +507,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:43:59.122650+00:00' source_archive: web/0006/borger-odoorn.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Borger-Odoorn - source_url: https://www.borger-odoorn.nl/ - retrieved_on: '2025-11-29T13:30:23.176866+00:00' - xpath: /html/head/title - html_file: web/0006/borger-odoorn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:43:59.122129+00:00' - claim_type: org_name claim_value: default icon raw_value: default icon @@ -581,6 +571,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:43:59.122541+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Borger-Odoorn diff --git a/data/nde/enriched/entries/0007_Q81181227.yaml b/data/nde/enriched/entries/0007_Q81181227.yaml index 5b7d2d3e9d..09a3c6ca56 100644 --- a/data/nde/enriched/entries/0007_Q81181227.yaml +++ b/data/nde/enriched/entries/0007_Q81181227.yaml @@ -518,18 +518,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:43:59.362549+00:00' source_archive: web/0007/coevorden.nl - claims_count: 17 + claims_count: 16 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Coevorden - source_url: https://www.coevorden.nl/ - retrieved_on: '2025-11-29T13:32:08.818759+00:00' - xpath: /html/head/title - html_file: web/0007/coevorden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:43:59.361804+00:00' - claim_type: org_name claim_value: paspoort icon raw_value: paspoort icon @@ -692,6 +682,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:43:59.362340+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Coevorden diff --git a/data/nde/enriched/entries/0011_Q81181243.yaml b/data/nde/enriched/entries/0011_Q81181243.yaml index f1c0fcc6b1..db97d6665d 100644 --- a/data/nde/enriched/entries/0011_Q81181243.yaml +++ b/data/nde/enriched/entries/0011_Q81181243.yaml @@ -549,18 +549,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:00.017321+00:00' source_archive: web/0011/gemeente.emmen.nl - claims_count: 32 + claims_count: 31 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Emmen - source_url: http://www.gemeente.emmen.nl - retrieved_on: '2025-11-29T14:24:46.985786+00:00' - xpath: /html/head/title - html_file: web/0011/gemeente.emmen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.016608+00:00' - claim_type: org_name claim_value: afspraak icon raw_value: afspraak icon @@ -871,6 +861,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.017183+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Emmen diff --git a/data/nde/enriched/entries/0012_Q81181321.yaml b/data/nde/enriched/entries/0012_Q81181321.yaml index 758bae6e80..178394ae10 100644 --- a/data/nde/enriched/entries/0012_Q81181321.yaml +++ b/data/nde/enriched/entries/0012_Q81181321.yaml @@ -492,18 +492,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:00.530508+00:00' source_archive: web/0012/meppel.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Meppel - source_url: http://www.meppel.nl - retrieved_on: '2025-11-29T14:27:04.292512+00:00' - xpath: /html/head/title - html_file: web/0012/meppel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.529951+00:00' - claim_type: description_short claim_value: De officiële website van Gemeente Meppel – Informatie over wonen, werken en leven in Meppel. @@ -586,6 +576,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.530413+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Meppel diff --git a/data/nde/enriched/entries/0014_Q81181341.yaml b/data/nde/enriched/entries/0014_Q81181341.yaml index f978135173..093c92f006 100644 --- a/data/nde/enriched/entries/0014_Q81181341.yaml +++ b/data/nde/enriched/entries/0014_Q81181341.yaml @@ -521,7 +521,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:00.780426+00:00' source_archive: web/0014/noordenveld.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Home-NL @@ -553,36 +553,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:00.779541+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noordenveld.nl/ - retrieved_on: '2025-11-29T14:24:46.856894+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0014/noordenveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.779551+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noordenveld.nl/ - retrieved_on: '2025-11-29T14:24:46.856894+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0014/noordenveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.779555+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noordenveld.nl/ - retrieved_on: '2025-11-29T14:24:46.856894+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0014/noordenveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.779559+00:00' - claim_type: description_short claim_value: Homepagina Nederlands raw_value: Homepagina Nederlands @@ -643,6 +613,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.780230+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Noordenveld diff --git a/data/nde/enriched/entries/0015_Q81181229.yaml b/data/nde/enriched/entries/0015_Q81181229.yaml index d277e2fbb7..2714c629ca 100644 --- a/data/nde/enriched/entries/0015_Q81181229.yaml +++ b/data/nde/enriched/entries/0015_Q81181229.yaml @@ -367,18 +367,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:00.887651+00:00' source_archive: web/0015/gemeentewesterveld.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gemeente Westerveld - source_url: https://www.gemeentewesterveld.nl/ - retrieved_on: '2025-11-29T14:24:47.974615+00:00' - xpath: /html/head/title - html_file: web/0015/gemeentewesterveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.886185+00:00' - claim_type: email claim_value: info@gemeentewesterveld.nl raw_value: info@gemeentewesterveld.nl @@ -439,6 +429,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.887341+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Westerveld diff --git a/data/nde/enriched/entries/0016_Q81181377.yaml b/data/nde/enriched/entries/0016_Q81181377.yaml index 933253cbed..210afb8844 100644 --- a/data/nde/enriched/entries/0016_Q81181377.yaml +++ b/data/nde/enriched/entries/0016_Q81181377.yaml @@ -557,7 +557,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:01.020682+00:00' source_archive: web/0016/tynaarlo.nl - claims_count: 9 + claims_count: 8 claims: - claim_type: org_name claim_value: Gemeentelijk archief @@ -597,16 +597,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:44:01.020346+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&t=Gemeentelijk+archief - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&t=Gemeentelijk+archief - source_url: https://www.tynaarlo.nl/bestuur-en-organisatie/gemeentelijk-archief - retrieved_on: '2025-11-29T14:24:48.197833+00:00' - xpath: /html/body/div/main/section[2]/div[2]/div/div/div[1]/a - html_file: web/0016/tynaarlo.nl/pages/bestuur-en-organisatie_gemeentelijk-archief.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:01.020515+00:00' - claim_type: social_twitter claim_value: https://x.com/share?url=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&text=Gemeentelijk+archief raw_value: https://x.com/share?url=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&text=Gemeentelijk+archief @@ -657,6 +647,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:01.020542+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Tynaarlo diff --git a/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml b/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml index 619a84de81..ee4ce018c7 100644 --- a/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml +++ b/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml @@ -378,18 +378,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:01.302803+00:00' source_archive: web/0017/harmoniummuseumnederland.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Harmonium Museum Nederland - source_url: https://harmoniummuseumnederland.nl/ - retrieved_on: '2025-11-29T14:24:50.775403+00:00' - xpath: /html/head/title - html_file: web/0017/harmoniummuseumnederland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:01.302081+00:00' - claim_type: description_short claim_value: Lees alles over de achtergrond en de kerncollectie, die behouden bleef, van het Harmonium Museum Nederland, dat helaas is gesloten sinds eind @@ -443,6 +433,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:44:01.302638+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Harmonium Museum Nederland diff --git a/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml b/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml index 94b09fd799..b93452838a 100644 --- a/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml +++ b/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml @@ -375,18 +375,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:01.943174+00:00' source_archive: web/0020/historischekringhoogeveen.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historische Kring Hoogeveen - source_url: https://www.historischekringhoogeveen.nl/ - retrieved_on: '2025-11-29T14:24:51.613006+00:00' - xpath: /html/head/title - html_file: web/0020/historischekringhoogeveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:01.942281+00:00' - claim_type: description_short claim_value: Description raw_value: Description @@ -397,6 +387,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:44:01.942394+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Hoogeveen diff --git a/data/nde/enriched/entries/0022_Q1911968.yaml b/data/nde/enriched/entries/0022_Q1911968.yaml index 14f0a72c7f..e79fd0efab 100644 --- a/data/nde/enriched/entries/0022_Q1911968.yaml +++ b/data/nde/enriched/entries/0022_Q1911968.yaml @@ -565,18 +565,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:02.429976+00:00' source_archive: web/0022/smalspoorcentrum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Industrieel Smalspoor Museum Industrieel Smalspoor Museum - source_url: http://www.smalspoorcentrum.nl/ - retrieved_on: '2025-11-29T14:26:08.048987+00:00' - xpath: /html/head/title - html_file: web/0022/smalspoorcentrum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:02.428857+00:00' - claim_type: org_name claim_value: Industrieel Smalspoor Museum raw_value: Industrieel Smalspoor Museum @@ -637,6 +627,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:02.429890+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Industrieel Smalspoor Museum diff --git a/data/nde/enriched/entries/0023_Q22006174.yaml b/data/nde/enriched/entries/0023_Q22006174.yaml index 3aee500087..34585863f5 100644 --- a/data/nde/enriched/entries/0023_Q22006174.yaml +++ b/data/nde/enriched/entries/0023_Q22006174.yaml @@ -602,18 +602,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:02.855034+00:00' source_archive: web/0023/miramar-zeemuseum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Miramar Zeemuseum - source_url: https://miramar-zeemuseum.nl/ - retrieved_on: '2025-11-29T14:26:04.763022+00:00' - xpath: /html/head/title - html_file: web/0023/miramar-zeemuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:02.853829+00:00' - claim_type: description_short claim_value: Wat ooit begon met de fascinatie voor een schelp, gevonden op het strand van Mallorca, is uitgegroeid tot een waar natuurhistorisch museum in @@ -677,6 +667,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:02.854875+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Miramar Zeemuseum diff --git a/data/nde/enriched/entries/0025_Q56461228.yaml b/data/nde/enriched/entries/0025_Q56461228.yaml index 0c589c4a30..a1b201b93c 100644 --- a/data/nde/enriched/entries/0025_Q56461228.yaml +++ b/data/nde/enriched/entries/0025_Q56461228.yaml @@ -530,18 +530,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:03.370593+00:00' source_archive: web/0025/dewemme.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum de Wemme Zuidwolde - source_url: https://www.dewemme.nl/ - retrieved_on: '2025-11-29T14:26:07.259076+00:00' - xpath: /html/head/title - html_file: web/0025/dewemme.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:03.369684+00:00' - claim_type: org_name claim_value: BTC Art raw_value: BTC Art - Shine @@ -642,6 +632,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:03.370465+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum De Wemme diff --git a/data/nde/enriched/entries/0027_Q19832258.yaml b/data/nde/enriched/entries/0027_Q19832258.yaml index db65d08df4..b24a8ca483 100644 --- a/data/nde/enriched/entries/0027_Q19832258.yaml +++ b/data/nde/enriched/entries/0027_Q19832258.yaml @@ -706,18 +706,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:24:53.363590+00:00' source_archive: web/0027/papierknipmuseum.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - source_url: http://www.papierknipmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0027/papierknipmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:24:53.356977+00:00' - claim_type: email claim_value: knipkunst@gmail.com raw_value: knipkunst@gmail.com @@ -728,3 +718,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:24:53.362796+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0028_aold_daoln.yaml b/data/nde/enriched/entries/0028_aold_daoln.yaml index 00c3fdd47b..a60f50f7d1 100644 --- a/data/nde/enriched/entries/0028_aold_daoln.yaml +++ b/data/nde/enriched/entries/0028_aold_daoln.yaml @@ -372,18 +372,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:04.212422+00:00' source_archive: web/0028/aolddaoln.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Aold Daol'n - source_url: https://www.aolddaoln.nl/ - retrieved_on: '2025-11-29T14:26:14.597135+00:00' - xpath: /html/head/title - html_file: web/0028/aolddaoln.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:04.210876+00:00' - claim_type: description_short claim_value: Behoud van de geschiedenis en cultuur van Dalen en de naburige dorpen door middel van tentoonstellingen, onderzoek en evenementen. Vier samen met @@ -438,6 +428,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:04.212155+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Aold Daol'n diff --git a/data/nde/enriched/entries/0029_Q110995917.yaml b/data/nde/enriched/entries/0029_Q110995917.yaml index 599c77a5b9..d58e54b5fc 100644 --- a/data/nde/enriched/entries/0029_Q110995917.yaml +++ b/data/nde/enriched/entries/0029_Q110995917.yaml @@ -549,18 +549,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:04.621248+00:00' source_archive: web/0029/aolddaoln.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Aold Daol'n - source_url: https://www.aolddaoln.nl/ - retrieved_on: '2025-11-29T14:26:16.952349+00:00' - xpath: /html/head/title - html_file: web/0029/aolddaoln.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:04.620027+00:00' - claim_type: description_short claim_value: Behoud van de geschiedenis en cultuur van Dalen en de naburige dorpen door middel van tentoonstellingen, onderzoek en evenementen. Vier samen met @@ -615,6 +605,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:04.621081+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museummolen Jan Pol diff --git a/data/nde/enriched/entries/0037_Q1345886.yaml b/data/nde/enriched/entries/0037_Q1345886.yaml index 44fb507297..36e6536c06 100644 --- a/data/nde/enriched/entries/0037_Q1345886.yaml +++ b/data/nde/enriched/entries/0037_Q1345886.yaml @@ -717,18 +717,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:05.753578+00:00' source_archive: web/0037/klompenmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Internationaal Klompenmuseum Eelde - source_url: http://www.klompenmuseum.nl/ - retrieved_on: '2025-11-29T14:29:05.952480+00:00' - xpath: /html/head/title - html_file: web/0037/klompenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:05.752826+00:00' - claim_type: description_short claim_value: 'Internationaal Klompenmuseum Tentoonstelling 2025: Houten Schoeisel Festival Bekijk onze virtuele tour Geopend: 28 maart 2026 t/m 1 november 2026, @@ -817,6 +807,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:05.753486+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Internationaal Klompenmuseum diff --git a/data/nde/enriched/entries/0039_Q108369683.yaml b/data/nde/enriched/entries/0039_Q108369683.yaml index afa4fa9825..9ec9af2a82 100644 --- a/data/nde/enriched/entries/0039_Q108369683.yaml +++ b/data/nde/enriched/entries/0039_Q108369683.yaml @@ -485,18 +485,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:06.586496+00:00' source_archive: web/0039/oudmeppel.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Oud Meppel - source_url: https://www.oudmeppel.nl/ - retrieved_on: '2025-11-29T14:32:06.738895+00:00' - xpath: /html/head/title - html_file: web/0039/oudmeppel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:06.585102+00:00' - claim_type: description_short claim_value: "Welkom bij Oud Meppel Overzicht \nActiviteiten \nDiavoorstellingen\ \ \nTentoonstellingen \nStadswandelingen \nWerkgroepen \nVerkoop \nDiversen\ @@ -563,6 +553,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:06.586287+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oud Meppel diff --git a/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml b/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml index 366acb084c..3868b37a1a 100644 --- a/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml +++ b/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml @@ -350,18 +350,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:07.100160+00:00' source_archive: web/0041/ahvassen.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://ahvassen.nl/ - retrieved_on: '2025-11-29T14:29:15.708064+00:00' - xpath: /html/head/title - html_file: web/0041/ahvassen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:07.099272+00:00' - claim_type: description_short claim_value: Asser Historische Vereniging raw_value: Asser Historische Vereniging @@ -392,6 +382,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:07.099994+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Asser Historische Vereniging diff --git a/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml b/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml index 2d2214ebf5..0805ea52c2 100644 --- a/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml +++ b/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml @@ -347,18 +347,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:07.171284+00:00' source_archive: web/0042/drentsehistorischevereniging.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://drentsehistorischevereniging.nl/ - retrieved_on: '2025-11-29T14:29:17.235710+00:00' - xpath: /html/head/title - html_file: web/0042/drentsehistorischevereniging.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:07.170786+00:00' - claim_type: org_name claim_value: Drentse Historische Vereniging raw_value: Drentse Historische Vereniging @@ -389,6 +379,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:07.171165+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Drentse Historische Vereniging diff --git a/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml b/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml index 97a2a6f9ae..d8490f4883 100644 --- a/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml +++ b/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml @@ -355,7 +355,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:07.584213+00:00' source_archive: web/0043/archief-optspoor.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Historische vereniging der gemeente Gasselte @@ -367,16 +367,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:07.583755+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://archief-optspoor.nl/ - retrieved_on: '2025-11-29T14:29:26.053674+00:00' - xpath: /html/body/div[1]/div/div/div/div/main/article/header/h1 - html_file: web/0043/archief-optspoor.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:07.584126+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische vereniging der gemeente Gasselte diff --git a/data/nde/enriched/entries/0047_Q30277559.yaml b/data/nde/enriched/entries/0047_Q30277559.yaml index 77b45a3d01..202371ee52 100644 --- a/data/nde/enriched/entries/0047_Q30277559.yaml +++ b/data/nde/enriched/entries/0047_Q30277559.yaml @@ -566,7 +566,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:07.769543+00:00' source_archive: web/0047/hetflevolandsarchief.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Het Flevolands Archief @@ -598,16 +598,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:07.769111+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.hetflevolandsarchief.nl/ - retrieved_on: '2025-11-29T14:29:20.248807+00:00' - xpath: /html/body/div[2]/main/div[6]/div/div/div/h1 - html_file: web/0047/hetflevolandsarchief.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:07.769239+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Flevolands Archief diff --git a/data/nde/enriched/entries/0069_Q13137174.yaml b/data/nde/enriched/entries/0069_Q13137174.yaml index ac548fae0d..a6d056d33d 100644 --- a/data/nde/enriched/entries/0069_Q13137174.yaml +++ b/data/nde/enriched/entries/0069_Q13137174.yaml @@ -727,18 +727,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:13.342313+00:00' source_archive: web/0069/museumfederatiefryslan.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museumfederatie Fryslân - source_url: https://www.museumfederatiefryslan.nl/ - retrieved_on: '2025-11-29T14:34:37.779528+00:00' - xpath: /html/head/title - html_file: web/0069/museumfederatiefryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:13.341754+00:00' - claim_type: description_short claim_value: Museumfederatie Fryslân staat voor het toegankelijk maken en behouden van het erfgoed in Friesland op een zo hoog mogelijk niveau. Samenwerken op @@ -793,6 +783,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:13.342260+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museumfederatie Fryslân diff --git a/data/nde/enriched/entries/0070_Q17517652.yaml b/data/nde/enriched/entries/0070_Q17517652.yaml index 032e19e149..7da12381c2 100644 --- a/data/nde/enriched/entries/0070_Q17517652.yaml +++ b/data/nde/enriched/entries/0070_Q17517652.yaml @@ -461,7 +461,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:13.420511+00:00' source_archive: web/0070/kiekhuus.nl - claims_count: 8 + claims_count: 6 claims: - claim_type: org_name claim_value: Terug in de tijd @@ -495,26 +495,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:44:13.420097+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?t=Terug in de tijd&u=https://www.kiekhuus.nl/ - raw_value: https://www.facebook.com/sharer.php?t=Terug in de tijd&u=https://www.kiekhuus.nl/ - source_url: http://www.kiekhuus.nl - retrieved_on: '2025-11-29T14:32:53.893434+00:00' - xpath: /html/body/div[2]/div[2]/div/section/div/div/div[1]/div/div/div/a[1] - html_file: web/0070/kiekhuus.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:13.420372+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Terug in de tijd&url=https://www.kiekhuus.nl/&via= - raw_value: https://twitter.com/intent/tweet?text=Terug in de tijd&url=https://www.kiekhuus.nl/&via= - source_url: http://www.kiekhuus.nl - retrieved_on: '2025-11-29T14:32:53.893434+00:00' - xpath: /html/body/div[2]/div[2]/div/section/div/div/div[1]/div/div/div/a[2] - html_file: web/0070/kiekhuus.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:13.420378+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/kiekhuus/ raw_value: https://www.facebook.com/kiekhuus/ @@ -545,6 +525,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:13.420399+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: '''t Kiekhuus' diff --git a/data/nde/enriched/entries/0072_Q3457274.yaml b/data/nde/enriched/entries/0072_Q3457274.yaml index b1d1e6a462..40470c442c 100644 --- a/data/nde/enriched/entries/0072_Q3457274.yaml +++ b/data/nde/enriched/entries/0072_Q3457274.yaml @@ -791,18 +791,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:13.941510+00:00' source_archive: web/0072/museumdrachten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Dr8888 - source_url: http://www.museumdrachten.nl - retrieved_on: '2025-11-29T14:33:08.640521+00:00' - xpath: /html/head/title - html_file: web/0072/museumdrachten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:13.940327+00:00' - claim_type: org_name claim_value: Museum Dr8888 raw_value: Museum Dr8888 @@ -853,6 +843,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:13.941354+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Drachten diff --git a/data/nde/enriched/entries/0080_Q1942351.yaml b/data/nde/enriched/entries/0080_Q1942351.yaml index a8f6a86a8b..7bbfaa6462 100644 --- a/data/nde/enriched/entries/0080_Q1942351.yaml +++ b/data/nde/enriched/entries/0080_Q1942351.yaml @@ -702,7 +702,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:16.206454+00:00' source_archive: web/0080/damshus.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: It Damshûs @@ -756,16 +756,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:16.206249+00:00' - - claim_type: org_name - claim_value: Oproep - raw_value: Oproep - source_url: https://www.damshus.nl/ - retrieved_on: '2025-11-29T14:36:55.509152+00:00' - xpath: /html/body/div[1]/div/div/div/article/div/div/div/div[2]/div/div/div[1]/div/h1 - html_file: web/0080/damshus.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:16.206329+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting "It Damshûs" diff --git a/data/nde/enriched/entries/0082_Q2201887.yaml b/data/nde/enriched/entries/0082_Q2201887.yaml index 5f8c221ae5..ef85df0709 100644 --- a/data/nde/enriched/entries/0082_Q2201887.yaml +++ b/data/nde/enriched/entries/0082_Q2201887.yaml @@ -946,7 +946,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:16.964027+00:00' source_archive: web/0082/observeum.nl - claims_count: 7 + claims_count: 5 claims: - claim_type: org_name claim_value: Museum & Sterrenwacht Burgum @@ -980,16 +980,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:16.963804+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=https://www.observeum.nl/&t=Home - raw_value: http://www.facebook.com/sharer.php?u=https://www.observeum.nl/&t=Home - source_url: https://www.observeum.nl/ - retrieved_on: '2025-11-29T14:36:01.804617+00:00' - xpath: /html/body/div/div[2]/div[2]/div[2]/div/div[2]/div[41]/div/a[1] - html_file: web/0082/observeum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:16.963824+00:00' - claim_type: social_twitter claim_value: https://x.com/share?text=Home&url=https%3A%2F%2Fwww.observeum.nl%2F raw_value: https://x.com/share?text=Home&url=https%3A%2F%2Fwww.observeum.nl%2F @@ -1000,16 +990,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:16.963828+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&url=https://www.observeum.nl/&title=Home - raw_value: http://www.linkedin.com/shareArticle?mini=true&url=https://www.observeum.nl/&title=Home - source_url: https://www.observeum.nl/ - retrieved_on: '2025-11-29T14:36:01.804617+00:00' - xpath: /html/body/div/div[2]/div[2]/div[2]/div/div[2]/div[41]/div/a[4] - html_file: web/0082/observeum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:16.963834+00:00' - claim_type: org_name claim_value: Museum en Sterrenwacht Burgum raw_value: Museum en Sterrenwacht Burgum @@ -1020,6 +1000,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:16.963894+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Observeum diff --git a/data/nde/enriched/entries/0086_Q13137164.yaml b/data/nde/enriched/entries/0086_Q13137164.yaml index 2d621378d5..687f2e5b61 100644 --- a/data/nde/enriched/entries/0086_Q13137164.yaml +++ b/data/nde/enriched/entries/0086_Q13137164.yaml @@ -651,18 +651,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:18.289120+00:00' source_archive: web/0086/dekemastate.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Dekema State - source_url: https://dekemastate.nl/ - retrieved_on: '2025-11-29T14:38:06.707912+00:00' - xpath: /html/head/title - html_file: web/0086/dekemastate.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:18.288285+00:00' - claim_type: org_name claim_value: Klik om het zoekinvoerveld te openen raw_value: Klik om het zoekinvoerveld te openen @@ -735,6 +725,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:18.288910+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Landgoed Dekema State / Dekema State diff --git a/data/nde/enriched/entries/0093_Q81181219.yaml b/data/nde/enriched/entries/0093_Q81181219.yaml index 98b1bf01d0..8de89b9a10 100644 --- a/data/nde/enriched/entries/0093_Q81181219.yaml +++ b/data/nde/enriched/entries/0093_Q81181219.yaml @@ -525,18 +525,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:19.908909+00:00' source_archive: web/0093/achtkarspelen.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Achtkarspelen - source_url: http://www.achtkarspelen.nl - retrieved_on: '2025-11-29T14:36:05.489613+00:00' - xpath: /html/head/title - html_file: web/0093/achtkarspelen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:19.908104+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -599,6 +589,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:19.908727+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Achtkarspelen diff --git a/data/nde/enriched/entries/0095_Q81181251.yaml b/data/nde/enriched/entries/0095_Q81181251.yaml index 2f7337dbcc..db05b54c52 100644 --- a/data/nde/enriched/entries/0095_Q81181251.yaml +++ b/data/nde/enriched/entries/0095_Q81181251.yaml @@ -360,7 +360,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:12.247324+00:00' source_archive: web/0095/noardeast-fryslan.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Homepage Noardeast-Fryslân @@ -392,36 +392,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:25:12.245246+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0095/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.245258+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0095/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.245268+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0095/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.245278+00:00' - claim_type: description_short claim_value: Homepage Noardeast-Fryslân raw_value: Homepage Noardeast-Fryslân @@ -482,6 +452,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:12.246132+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content temporal_extent: begin_of_the_begin: null end_of_the_begin: null diff --git a/data/nde/enriched/entries/0099_Q81181286.yaml b/data/nde/enriched/entries/0099_Q81181286.yaml index b0cfc16030..e7b8eb9212 100644 --- a/data/nde/enriched/entries/0099_Q81181286.yaml +++ b/data/nde/enriched/entries/0099_Q81181286.yaml @@ -468,7 +468,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:20.689774+00:00' source_archive: web/0099/heerenveen.nl - claims_count: 29 + claims_count: 25 claims: - claim_type: org_name claim_value: Alles over gemeente Heerenveen @@ -480,16 +480,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688647+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/header/nav/ul/li[1]/a/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688660+00:00' - claim_type: org_name claim_value: A-Z raw_value: A-Z @@ -530,16 +520,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688677+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/header/nav/div[2]/form/div/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688700+00:00' - claim_type: org_name claim_value: icon_verkiezingen raw_value: icon_verkiezingen @@ -590,16 +570,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688718+00:00' - - claim_type: org_name - claim_value: Contact - raw_value: Contact - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/header/nav/div[3]/nav/a[6]/div/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688722+00:00' - claim_type: org_name claim_value: Klok raw_value: Klok @@ -650,16 +620,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688745+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/main/div[3]/div[1]/div[1]/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688759+00:00' - claim_type: org_name claim_value: LinkedIn profiel van Gemeente Heerenveen raw_value: LinkedIn profiel van Gemeente Heerenveen @@ -762,6 +722,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:20.689444+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Heerenveen diff --git a/data/nde/enriched/entries/0101_Q81181296.yaml b/data/nde/enriched/entries/0101_Q81181296.yaml index ea9112cb86..29793dd1f7 100644 --- a/data/nde/enriched/entries/0101_Q81181296.yaml +++ b/data/nde/enriched/entries/0101_Q81181296.yaml @@ -362,7 +362,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:12.500058+00:00' source_archive: web/0101/noardeast-fryslan.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Homepage Noardeast-Fryslân @@ -394,36 +394,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:25:12.498580+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0101/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.498591+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0101/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.498601+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0101/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.498611+00:00' - claim_type: description_short claim_value: Homepage Noardeast-Fryslân raw_value: Homepage Noardeast-Fryslân @@ -484,6 +454,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:12.499739+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content temporal_extent: begin_of_the_begin: null end_of_the_begin: null diff --git a/data/nde/enriched/entries/0102_Q121225319.yaml b/data/nde/enriched/entries/0102_Q121225319.yaml index 99d94a95f1..0070d0a80c 100644 --- a/data/nde/enriched/entries/0102_Q121225319.yaml +++ b/data/nde/enriched/entries/0102_Q121225319.yaml @@ -572,7 +572,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:20.875194+00:00' source_archive: web/0102/noardeast-fryslan.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Homepage Noardeast-Fryslân @@ -604,36 +604,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.874548+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noardeast-fryslan.nl/ - retrieved_on: '2025-11-29T14:37:55.770920+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0102/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.874553+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noardeast-fryslan.nl/ - retrieved_on: '2025-11-29T14:37:55.770920+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0102/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.874558+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noardeast-fryslan.nl/ - retrieved_on: '2025-11-29T14:37:55.770920+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0102/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.874562+00:00' - claim_type: description_short claim_value: Homepage Noardeast-Fryslân raw_value: Homepage Noardeast-Fryslân @@ -694,6 +664,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:20.875058+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Noardeast-Fryslân diff --git a/data/nde/enriched/entries/0103_Q81181358.yaml b/data/nde/enriched/entries/0103_Q81181358.yaml index 751964d267..675fb41588 100644 --- a/data/nde/enriched/entries/0103_Q81181358.yaml +++ b/data/nde/enriched/entries/0103_Q81181358.yaml @@ -299,18 +299,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:12.598702+00:00' source_archive: web/0103/documentatiestichting.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Contact - raw_value: Contact – Documentatiestichting Leeuwarderadeel - source_url: http://www.documentatiestichting.nl/contact/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0103/documentatiestichting.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.597880+00:00' - claim_type: email claim_value: documentatiestichting@gmail.com raw_value: documentatiestichting@gmail.com @@ -331,3 +321,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:25:12.598321+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0104_Q2754844.yaml b/data/nde/enriched/entries/0104_Q2754844.yaml index 3e0457b6d1..b33349b307 100644 --- a/data/nde/enriched/entries/0104_Q2754844.yaml +++ b/data/nde/enriched/entries/0104_Q2754844.yaml @@ -904,18 +904,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:20.991233+00:00' source_archive: web/0104/historischcentrumleeuwarden.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historisch Centrum Leeuwarden - source_url: http://historischcentrumleeuwarden.nl - retrieved_on: '2025-11-29T14:38:07.710368+00:00' - xpath: /html/head/title - html_file: web/0104/historischcentrumleeuwarden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.990803+00:00' - claim_type: description_short claim_value: Het Historisch Centrum Leeuwarden (HCL) is het informatie- en activiteitencentrum voor de geschiedenis van Leeuwarden en omgeving. @@ -988,6 +978,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:20.991168+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Centrum Leeuwarden diff --git a/data/nde/enriched/entries/0107_Q81181235.yaml b/data/nde/enriched/entries/0107_Q81181235.yaml index b715999575..8773f4746a 100644 --- a/data/nde/enriched/entries/0107_Q81181235.yaml +++ b/data/nde/enriched/entries/0107_Q81181235.yaml @@ -358,18 +358,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.184233+00:00' source_archive: web/0107/smallingerland.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Smallingerland - source_url: https://www.smallingerland.nl/ - retrieved_on: '2025-11-29T14:38:08.667336+00:00' - xpath: /html/head/title - html_file: web/0107/smallingerland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.183016+00:00' - claim_type: email claim_value: gemeente@smallingerland.nl raw_value: gemeente@smallingerland.nl @@ -430,6 +420,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.184035+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeente Smallingerland diff --git a/data/nde/enriched/entries/0109_Q81181393.yaml b/data/nde/enriched/entries/0109_Q81181393.yaml index a2972d860f..ef33e22453 100644 --- a/data/nde/enriched/entries/0109_Q81181393.yaml +++ b/data/nde/enriched/entries/0109_Q81181393.yaml @@ -388,18 +388,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.633706+00:00' source_archive: web/0109/terschelling.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Terschelling - source_url: https://www.terschelling.nl/ - retrieved_on: '2025-11-29T14:38:09.179167+00:00' - xpath: /html/head/title - html_file: web/0109/terschelling.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.632596+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -470,6 +460,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.633521+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Terschelling diff --git a/data/nde/enriched/entries/0110_Q81181206.yaml b/data/nde/enriched/entries/0110_Q81181206.yaml index 1b79a68781..d6426c758e 100644 --- a/data/nde/enriched/entries/0110_Q81181206.yaml +++ b/data/nde/enriched/entries/0110_Q81181206.yaml @@ -518,18 +518,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.752348+00:00' source_archive: web/0110/t-diel.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Tytsjerksteradiel - source_url: http://www.t-diel.nl - retrieved_on: '2025-11-29T14:38:09.847466+00:00' - xpath: /html/head/title - html_file: web/0110/t-diel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.751276+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -592,6 +582,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.751956+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Tytsjerksteradiel diff --git a/data/nde/enriched/entries/0111_Q81181373.yaml b/data/nde/enriched/entries/0111_Q81181373.yaml index 15315dba8d..55a7cc4a37 100644 --- a/data/nde/enriched/entries/0111_Q81181373.yaml +++ b/data/nde/enriched/entries/0111_Q81181373.yaml @@ -538,18 +538,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.884305+00:00' source_archive: web/0111/vlieland.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Vlieland - source_url: http://www.vlieland.nl - retrieved_on: '2025-11-29T14:38:10.812158+00:00' - xpath: /html/head/title - html_file: web/0111/vlieland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.883565+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -620,6 +610,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.884109+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Vlieland diff --git a/data/nde/enriched/entries/0115_Q13136222.yaml b/data/nde/enriched/entries/0115_Q13136222.yaml index 3f6698a56c..6d1e7839dd 100644 --- a/data/nde/enriched/entries/0115_Q13136222.yaml +++ b/data/nde/enriched/entries/0115_Q13136222.yaml @@ -506,18 +506,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:23.119742+00:00' source_archive: web/0115/ijstijdenmuseum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - ijstijdenmuseum - source_url: https://www.ijstijdenmuseum.nl/ - retrieved_on: '2025-11-29T14:38:51.284331+00:00' - xpath: /html/head/title - html_file: web/0115/ijstijdenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:23.119152+00:00' - claim_type: description_short claim_value: De IJstijden in deze streken In het IJstijdenmuseum in Buitenpost wordt het boeiende verhaal verteld van de twee laatste IJstijden. Deze hebben @@ -572,6 +562,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:23.119650+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: IJstijdenmuseum diff --git a/data/nde/enriched/entries/0122_Q12013196.yaml b/data/nde/enriched/entries/0122_Q12013196.yaml index 0b6af8370d..44ceaa74ce 100644 --- a/data/nde/enriched/entries/0122_Q12013196.yaml +++ b/data/nde/enriched/entries/0122_Q12013196.yaml @@ -1209,18 +1209,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:24.410901+00:00' source_archive: web/0122/museumbelvedere.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Belvédère - source_url: https://www.museumbelvedere.nl/ - retrieved_on: '2025-11-29T15:18:22.314806+00:00' - xpath: /html/head/title - html_file: web/0122/museumbelvedere.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:24.410285+00:00' - claim_type: email claim_value: info@museumbelvedere.nl raw_value: info@museumbelvedere.nl @@ -1271,6 +1261,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:24.410817+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Museum Belvédère diff --git a/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml b/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml index 418a85e1cf..cf5696cfae 100644 --- a/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml +++ b/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml @@ -184,18 +184,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:24.984364+00:00' source_archive: web/0124/museumenmolenmakkinga.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.museumenmolenmakkinga.nl/ - retrieved_on: '2025-11-29T14:39:27.748370+00:00' - xpath: /html/head/title - html_file: web/0124/museumenmolenmakkinga.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:24.984075+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 raw_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 @@ -206,6 +196,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:24.984314+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Korenmolen "De Weyert" diff --git a/data/nde/enriched/entries/0125_Q13137340.yaml b/data/nde/enriched/entries/0125_Q13137340.yaml index 0647e520ae..69baacd6b2 100644 --- a/data/nde/enriched/entries/0125_Q13137340.yaml +++ b/data/nde/enriched/entries/0125_Q13137340.yaml @@ -550,18 +550,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:25.116518+00:00' source_archive: web/0125/museumenmolenmakkinga.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.museumenmolenmakkinga.nl/ - retrieved_on: '2025-11-29T14:54:51.730430+00:00' - xpath: /html/head/title - html_file: web/0125/museumenmolenmakkinga.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:25.116241+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 raw_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 @@ -572,6 +562,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:25.116469+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oold Ark diff --git a/data/nde/enriched/entries/0126_Q13137168.yaml b/data/nde/enriched/entries/0126_Q13137168.yaml index c9e9423a0e..a98126819f 100644 --- a/data/nde/enriched/entries/0126_Q13137168.yaml +++ b/data/nde/enriched/entries/0126_Q13137168.yaml @@ -641,18 +641,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:25.214070+00:00' source_archive: web/0126/museumsloten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://museumsloten.nl/ - retrieved_on: '2025-11-29T14:54:52.407703+00:00' - xpath: /html/head/title - html_file: web/0126/museumsloten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:25.213029+00:00' - claim_type: description_short claim_value: Ontdek Sloten, de kleinste van de Friese 11 steden, in Museum Sloten, waar toverlantaarns de geschiedenis vertellen, en boek meteen je tickets. @@ -705,6 +695,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:25.213909+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Sloten diff --git a/data/nde/enriched/entries/0131_Q81181312.yaml b/data/nde/enriched/entries/0131_Q81181312.yaml index 3e20c158f0..bdff97c230 100644 --- a/data/nde/enriched/entries/0131_Q81181312.yaml +++ b/data/nde/enriched/entries/0131_Q81181312.yaml @@ -508,18 +508,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:26.659287+00:00' source_archive: web/0131/fryslan.frl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Fryslan - source_url: https://www.fryslan.frl/ - retrieved_on: '2025-11-29T15:14:45.171069+00:00' - xpath: /html/head/title - html_file: web/0131/fryslan.frl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:26.657906+00:00' - claim_type: org_name claim_value: Fryslan raw_value: Fryslan @@ -580,6 +570,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:26.658955+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Provincie Fryslân diff --git a/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml b/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml index 96bf4ed18c..528d98e89d 100644 --- a/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml +++ b/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml @@ -182,7 +182,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:27.725393+00:00' source_archive: web/0139/hollandsecirkel.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Stichting de Hollandse Cirkel @@ -214,16 +214,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:44:27.725070+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.kadaster.nl/ - retrieved_on: '' - xpath: /html/body/div/div/div/div/article/div/div/div/div[5]/div[1]/div/div/div/h1 - html_file: web/0139/hollandsecirkel.nl/mirror/hollandsecirkel.nl/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:27.725271+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting De Hollandse Cirkel diff --git a/data/nde/enriched/entries/0140_Q17595943.yaml b/data/nde/enriched/entries/0140_Q17595943.yaml index d41b1060ed..216dc5ebbc 100644 --- a/data/nde/enriched/entries/0140_Q17595943.yaml +++ b/data/nde/enriched/entries/0140_Q17595943.yaml @@ -620,18 +620,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:27.849066+00:00' source_archive: web/0140/archief.gazelle.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - Gazelle - source_url: https://archief.gazelle.nl/ - retrieved_on: '2025-11-29T15:21:15.145560+00:00' - xpath: /html/head/title - html_file: web/0140/archief.gazelle.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:27.847294+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/KoninklijkeGazelle raw_value: https://www.facebook.com/KoninklijkeGazelle @@ -672,6 +662,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:27.848728+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Koninklijke Gazelle diff --git a/data/nde/enriched/entries/0143_Q1127079.yaml b/data/nde/enriched/entries/0143_Q1127079.yaml index 6f5f248c96..2dd1f2b353 100644 --- a/data/nde/enriched/entries/0143_Q1127079.yaml +++ b/data/nde/enriched/entries/0143_Q1127079.yaml @@ -1287,18 +1287,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:28.142443+00:00' source_archive: web/0143/museumhetvalkhof.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: home - raw_value: home - Valkhof Museum - source_url: http://www.museumhetvalkhof.nl - retrieved_on: '2025-11-29T15:21:15.209526+00:00' - xpath: /html/head/title - html_file: web/0143/museumhetvalkhof.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.142145+00:00' - claim_type: org_name claim_value: Valkhof Museum raw_value: Valkhof Museum @@ -1309,6 +1299,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:44:28.142360+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Het Valkhof diff --git a/data/nde/enriched/entries/0144_Q2710899.yaml b/data/nde/enriched/entries/0144_Q2710899.yaml index 30484dc3ed..533b15c588 100644 --- a/data/nde/enriched/entries/0144_Q2710899.yaml +++ b/data/nde/enriched/entries/0144_Q2710899.yaml @@ -873,18 +873,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:28.228638+00:00' source_archive: web/0144/nationaalonderduikmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal onderduikmuseum - source_url: https://nationaalonderduikmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0144/nationaalonderduikmuseum.nl/mirror/nationaalonderduikmuseum.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.227592+00:00' - claim_type: org_name claim_value: Nationaal onderduikmuseum - raw_value: Nationaal onderduikmuseum - @@ -965,6 +955,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:28.228541+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Onderduikmuseum diff --git a/data/nde/enriched/entries/0145_Q2654815.yaml b/data/nde/enriched/entries/0145_Q2654815.yaml index 419d694ac2..6ad2b38825 100644 --- a/data/nde/enriched/entries/0145_Q2654815.yaml +++ b/data/nde/enriched/entries/0145_Q2654815.yaml @@ -711,7 +711,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:28.300061+00:00' source_archive: web/0145/antonpieckmuseum.nl - claims_count: 9 + claims_count: 7 claims: - claim_type: org_name claim_value: Anton Pieck Museum @@ -733,26 +733,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:28.298463+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: https://www.antonpieckmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/0145/antonpieckmuseum.nl/mirror/www.antonpieckmuseum.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.298469+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: https://www.antonpieckmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/0145/antonpieckmuseum.nl/mirror/www.antonpieckmuseum.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.298473+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -803,6 +783,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:28.299741+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Anton Pieck Museum diff --git a/data/nde/enriched/entries/0146_Q1663974.yaml b/data/nde/enriched/entries/0146_Q1663974.yaml index 354cf580bc..a76d7967b3 100644 --- a/data/nde/enriched/entries/0146_Q1663974.yaml +++ b/data/nde/enriched/entries/0146_Q1663974.yaml @@ -646,18 +646,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:28.483481+00:00' source_archive: web/0146/tua.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Theologische Universiteit Apeldoorn - source_url: http://www.tua.nl - retrieved_on: '2025-11-29T15:21:15.976954+00:00' - xpath: /html/head/title - html_file: web/0146/tua.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.482634+00:00' - claim_type: org_name claim_value: Theologische Universiteit Apeldoorn raw_value: Theologische Universiteit Apeldoorn @@ -738,6 +728,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:28.483392+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Theologische Universiteit Apeldoorn diff --git a/data/nde/enriched/entries/0149_Q98894593.yaml b/data/nde/enriched/entries/0149_Q98894593.yaml index d6504f9b0d..5df416f406 100644 --- a/data/nde/enriched/entries/0149_Q98894593.yaml +++ b/data/nde/enriched/entries/0149_Q98894593.yaml @@ -568,18 +568,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:28.738219+00:00' source_archive: web/0149/cvz7aar.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Cultuurhistorische Vereniging Zevenaar (CVZ) (powered by e-captain.nl) - source_url: https://www.cvz7aar.nl - retrieved_on: '2025-11-29T15:21:18.588342+00:00' - xpath: /html/head/title - html_file: web/0149/cvz7aar.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.737638+00:00' - claim_type: social_youtube claim_value: https://www.youtube.com/channel/UCa0uF7kyU0vLxKO5qx-ZHNg/videos?reload=9&app=desktop&view=0&sort=da&flow=grid&cbrd=1 raw_value: https://www.youtube.com/channel/UCa0uF7kyU0vLxKO5qx-ZHNg/videos?reload=9&app=desktop&view=0&sort=da&flow=grid&cbrd=1 @@ -600,6 +590,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:28.738156+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cultuurhistorische Vereniging Zevenaar diff --git a/data/nde/enriched/entries/0155_Q13636575.yaml b/data/nde/enriched/entries/0155_Q13636575.yaml index 07cfe5e2df..c5a272918e 100644 --- a/data/nde/enriched/entries/0155_Q13636575.yaml +++ b/data/nde/enriched/entries/0155_Q13636575.yaml @@ -963,18 +963,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:29.855125+00:00' source_archive: web/0155/streekmuseumtiel.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Flipje en Streekmuseum Tiel - source_url: https://streekmuseumtiel.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0155/streekmuseumtiel.nl/mirror/streekmuseumtiel.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:29.854444+00:00' - claim_type: description_short claim_value: "DUIK IN DE\nHISTORIE \n\nIn het Flipje en Streekmuseum wordt de\ \ historie van Tiel en de Betuwe getoond. Ook haar beroemdste inwoner - Flipje\ @@ -1041,6 +1031,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:29.855011+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Flipje & Streekmuseum Tiel diff --git a/data/nde/enriched/entries/0157_Q104033126.yaml b/data/nde/enriched/entries/0157_Q104033126.yaml index ac097c4339..9055d12ef0 100644 --- a/data/nde/enriched/entries/0157_Q104033126.yaml +++ b/data/nde/enriched/entries/0157_Q104033126.yaml @@ -510,18 +510,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:29.935972+00:00' source_archive: web/0157/gelderlandinbeeld.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gelderland in beeld - source_url: https://www.gelderlandinbeeld.nl/ - retrieved_on: '2025-11-29T15:22:43.989143+00:00' - xpath: /html/head/title - html_file: web/0157/gelderlandinbeeld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:29.935777+00:00' - claim_type: email claim_value: erfgoedcentrum@rozet.nl raw_value: erfgoedcentrum@rozet.nl @@ -542,6 +532,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:29.935958+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gelderland in Beeld diff --git a/data/nde/enriched/entries/0158_Q2330735.yaml b/data/nde/enriched/entries/0158_Q2330735.yaml index a29dd36154..206d805de2 100644 --- a/data/nde/enriched/entries/0158_Q2330735.yaml +++ b/data/nde/enriched/entries/0158_Q2330735.yaml @@ -793,18 +793,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:30.034033+00:00' source_archive: web/0158/geldersarchief.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.geldersarchief.nl - retrieved_on: '2025-11-29T15:22:43.748847+00:00' - xpath: /html/head/title - html_file: web/0158/geldersarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:30.032762+00:00' - claim_type: org_name claim_value: chat raw_value: chat @@ -881,6 +871,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:30.033839+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gelders Archief diff --git a/data/nde/enriched/entries/0159_Q4688292.yaml b/data/nde/enriched/entries/0159_Q4688292.yaml index 976aa3558a..837254d9a9 100644 --- a/data/nde/enriched/entries/0159_Q4688292.yaml +++ b/data/nde/enriched/entries/0159_Q4688292.yaml @@ -709,18 +709,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:30.419385+00:00' source_archive: web/0159/geologischmuseum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gelders Geologisch Museum - source_url: https://geologischmuseum.nl/ - retrieved_on: '2025-11-29T15:22:49.377757+00:00' - xpath: /html/head/title - html_file: web/0159/geologischmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:30.418790+00:00' - claim_type: org_name claim_value: Gelders Geologisch Museum raw_value: Gelders Geologisch Museum @@ -781,6 +771,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:30.419326+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gelders Geologisch Museum diff --git a/data/nde/enriched/entries/0165_Q81181326.yaml b/data/nde/enriched/entries/0165_Q81181326.yaml index 0b5c83cc98..7a2e203213 100644 --- a/data/nde/enriched/entries/0165_Q81181326.yaml +++ b/data/nde/enriched/entries/0165_Q81181326.yaml @@ -496,18 +496,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:31.788139+00:00' source_archive: web/0165/nijkerk.eu - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Nijkerk - source_url: https://www.nijkerk.eu/gemeentearchief - retrieved_on: '2025-11-29T15:22:51.188307+00:00' - xpath: /html/head/title - html_file: web/0165/nijkerk.eu/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:31.787315+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -548,6 +538,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:31.787782+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Nijkerk diff --git a/data/nde/enriched/entries/0169_Q2332962.yaml b/data/nde/enriched/entries/0169_Q2332962.yaml index a7f43778d1..11ec59c73d 100644 --- a/data/nde/enriched/entries/0169_Q2332962.yaml +++ b/data/nde/enriched/entries/0169_Q2332962.yaml @@ -673,18 +673,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:32.713519+00:00' source_archive: web/0169/belmontearboretum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Belmonte Arboretum - source_url: https://belmontearboretum.nl/ - retrieved_on: '2025-11-29T15:26:05.199368+00:00' - xpath: /html/head/title - html_file: web/0169/belmontearboretum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:32.713034+00:00' - claim_type: org_name claim_value: Belmonte Arboretum raw_value: Belmonte Arboretum @@ -715,6 +705,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:32.713421+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Belmonte Arboretum diff --git a/data/nde/enriched/entries/0170_Q2365901.yaml b/data/nde/enriched/entries/0170_Q2365901.yaml index 2c42dc0405..277b1c1536 100644 --- a/data/nde/enriched/entries/0170_Q2365901.yaml +++ b/data/nde/enriched/entries/0170_Q2365901.yaml @@ -688,7 +688,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:33.192176+00:00' source_archive: web/0170/grenslandmuseum.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Stichting Grenslandmuseum Dinxperlo @@ -700,26 +700,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:33.191065+00:00' - - claim_type: org_name - claim_value: Previous - raw_value: Previous - source_url: https://www.grenslandmuseum.nl/ - retrieved_on: '2025-11-29T15:26:21.418250+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/0170/grenslandmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:33.191079+00:00' - - claim_type: org_name - claim_value: Next - raw_value: Next - source_url: https://www.grenslandmuseum.nl/ - retrieved_on: '2025-11-29T15:26:21.418250+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/0170/grenslandmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:33.191084+00:00' - claim_type: org_name claim_value: Right-open raw_value: Right-open @@ -790,6 +770,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:33.191917+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Grenslandmuseum diff --git a/data/nde/enriched/entries/0172_Q98907725.yaml b/data/nde/enriched/entries/0172_Q98907725.yaml index 4fdac88d7f..48839ecc05 100644 --- a/data/nde/enriched/entries/0172_Q98907725.yaml +++ b/data/nde/enriched/entries/0172_Q98907725.yaml @@ -546,18 +546,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:33.713426+00:00' source_archive: web/0172/heemkundekringbergh.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Heemkundekring Bergh - source_url: https://www.heemkundekringbergh.nl - retrieved_on: '2025-11-29T15:27:37.429075+00:00' - xpath: /html/head/title - html_file: web/0172/heemkundekringbergh.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:33.712712+00:00' - claim_type: description_short claim_value: Homepagina raw_value: Homepagina @@ -600,6 +590,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:33.713325+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Bergh diff --git a/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml b/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml index 4902dcc2df..8e635b12b1 100644 --- a/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml +++ b/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml @@ -225,18 +225,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:34.113320+00:00' source_archive: web/0174/heiligenbeeldenmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Heiligenbeeldenmuseum Kranenburg - source_url: https://www.heiligenbeeldenmuseum.nl/ - retrieved_on: '2025-11-29T15:26:10.471256+00:00' - xpath: /html/head/title - html_file: web/0174/heiligenbeeldenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:34.112922+00:00' - claim_type: description_short claim_value: Heiligenbeeldenmuseum Kranenburg raw_value: Heiligenbeeldenmuseum Kranenburg @@ -277,6 +267,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:34.113278+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Heiligenbeeldenmuseum diff --git a/data/nde/enriched/entries/0177_Q7476442.yaml b/data/nde/enriched/entries/0177_Q7476442.yaml index c765664be2..5da5e6b377 100644 --- a/data/nde/enriched/entries/0177_Q7476442.yaml +++ b/data/nde/enriched/entries/0177_Q7476442.yaml @@ -826,7 +826,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:34.449363+00:00' source_archive: web/0177/historischmuseumede.nl - claims_count: 13 + claims_count: 10 claims: - claim_type: org_name claim_value: Historisch Museum Ede brengt verhalen van Ede tot leven @@ -848,16 +848,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:34.448156+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://www.historischmuseumede.nl - retrieved_on: '2025-11-29T15:26:16.779276+00:00' - xpath: /html/body/div[1]/header/div[1]/div/div/div/div[2]/div/div/div/div[2]/div[2]/div/button/span[2]/span/svg/title - html_file: web/0177/historischmuseumede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:34.448181+00:00' - claim_type: org_name claim_value: Uren raw_value: Uren @@ -940,26 +930,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:44:34.448927+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - source_url: https://www.historischmuseumede.nl - retrieved_on: '2025-11-29T15:26:16.779276+00:00' - xpath: /html/body/div[1]/main/div/div/div/div/article/div/div/div[6]/div/div/div/div/div[1]/div/div/article/div[2]/ul/li[2]/ul/li[1]/a - html_file: web/0177/historischmuseumede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:34.449077+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - raw_value: https://twitter.com/share?url=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - source_url: https://www.historischmuseumede.nl - retrieved_on: '2025-11-29T15:26:16.779276+00:00' - xpath: /html/body/div[1]/main/div/div/div/div/article/div/div/div[6]/div/div/div/div/div[1]/div/div/article/div[2]/ul/li[2]/ul/li[2]/a - html_file: web/0177/historischmuseumede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:34.449084+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Museum Ede diff --git a/data/nde/enriched/entries/0178_Q98904482.yaml b/data/nde/enriched/entries/0178_Q98904482.yaml index 343d7fd833..a2beb3eaba 100644 --- a/data/nde/enriched/entries/0178_Q98904482.yaml +++ b/data/nde/enriched/entries/0178_Q98904482.yaml @@ -295,18 +295,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:34.584225+00:00' source_archive: web/0178/historischmuseumhedel.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.historischmuseumhedel.nl/ - retrieved_on: '2025-11-29T15:26:19.658845+00:00' - xpath: /html/head/title - html_file: web/0178/historischmuseumhedel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:34.583694+00:00' - claim_type: org_name claim_value: Historisch Museum Hedel raw_value: Historisch Museum Hedel @@ -337,6 +327,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:34.584143+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Hedel's Historie diff --git a/data/nde/enriched/entries/0184_Q38677497.yaml b/data/nde/enriched/entries/0184_Q38677497.yaml index 1ec81826d9..3eaf87f21a 100644 --- a/data/nde/enriched/entries/0184_Q38677497.yaml +++ b/data/nde/enriched/entries/0184_Q38677497.yaml @@ -939,7 +939,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:36.137017+00:00' source_archive: web/0184/gemeentearchief.ede.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Gemeentearchief Ede @@ -951,16 +951,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:36.136505+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://gemeentearchief.ede.nl/ - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://gemeentearchief.ede.nl/ - source_url: https://gemeentearchief.ede.nl - retrieved_on: '2025-11-29T15:26:47.211861+00:00' - xpath: /html/body/div/div/main/section/div[6]/div/div/ul/li[1]/a - html_file: web/0184/gemeentearchief.ede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:36.136893+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/sharing/share-offsite/?url=https://gemeentearchief.ede.nl/ raw_value: https://www.linkedin.com/sharing/share-offsite/?url=https://gemeentearchief.ede.nl/ @@ -1011,6 +1001,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:36.136923+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeente Ede diff --git a/data/nde/enriched/entries/0186_Q98894809.yaml b/data/nde/enriched/entries/0186_Q98894809.yaml index a816705617..22d69f6a34 100644 --- a/data/nde/enriched/entries/0186_Q98894809.yaml +++ b/data/nde/enriched/entries/0186_Q98894809.yaml @@ -503,18 +503,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:36.794780+00:00' source_archive: web/0186/hkwestervoort.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Historische Kring Westervoort - source_url: https://www.hkwestervoort.nl - retrieved_on: '2025-11-29T15:27:14.285337+00:00' - xpath: /html/head/title - html_file: web/0186/hkwestervoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:36.792802+00:00' - claim_type: description_short claim_value: Historische Kring Westervoort raw_value: Historische Kring Westervoort @@ -555,6 +545,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:36.794479+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Westervoort diff --git a/data/nde/enriched/entries/0201_Q98895215.yaml b/data/nde/enriched/entries/0201_Q98895215.yaml index 757f27c968..fa9caba628 100644 --- a/data/nde/enriched/entries/0201_Q98895215.yaml +++ b/data/nde/enriched/entries/0201_Q98895215.yaml @@ -632,18 +632,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:40.581965+00:00' source_archive: web/0201/historischeverenigingvoorst.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.historischeverenigingvoorst.nl - retrieved_on: '2025-11-29T15:29:57.119726+00:00' - xpath: /html/head/title - html_file: web/0201/historischeverenigingvoorst.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:40.580692+00:00' - claim_type: org_name claim_value: Datum raw_value: Datum @@ -714,6 +704,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:40.581750+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging Voorst diff --git a/data/nde/enriched/entries/0202_Q61930724.yaml b/data/nde/enriched/entries/0202_Q61930724.yaml index 5b15b9c7f0..0c08af968c 100644 --- a/data/nde/enriched/entries/0202_Q61930724.yaml +++ b/data/nde/enriched/entries/0202_Q61930724.yaml @@ -928,18 +928,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:40.670247+00:00' source_archive: web/0202/bronbeek.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Bronbeek - source_url: https://www.bronbeek.nl/ - retrieved_on: '2025-11-29T15:29:53.013757+00:00' - xpath: /html/head/title - html_file: web/0202/bronbeek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:40.669788+00:00' - claim_type: description_short claim_value: Bronbeek is een museum en hét kenniscentrum van het koloniaal-militair verleden van het Koninkrijk der Nederlanden. Het bevindt zich op een cultuurhistorisch @@ -996,6 +986,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:40.670137+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Koninklijk Tehuis voor Oud-Militairen en Museum Bronbeek diff --git a/data/nde/enriched/entries/0203_stichting_korpora.yaml b/data/nde/enriched/entries/0203_stichting_korpora.yaml index 5c920fec29..6c234a2881 100644 --- a/data/nde/enriched/entries/0203_stichting_korpora.yaml +++ b/data/nde/enriched/entries/0203_stichting_korpora.yaml @@ -399,18 +399,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:41.114748+00:00' source_archive: web/0203/korpora.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Korpora - source_url: https://www.korpora.nl/ - retrieved_on: '2025-11-29T15:32:28.709223+00:00' - xpath: /html/head/title - html_file: web/0203/korpora.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:41.113200+00:00' - claim_type: org_name claim_value: Korpora raw_value: Korpora @@ -461,6 +451,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:41.114337+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Korpora diff --git a/data/nde/enriched/entries/0213_Q54957003.yaml b/data/nde/enriched/entries/0213_Q54957003.yaml index 1da0354608..cf7caaed2c 100644 --- a/data/nde/enriched/entries/0213_Q54957003.yaml +++ b/data/nde/enriched/entries/0213_Q54957003.yaml @@ -894,7 +894,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:43.665080+00:00' source_archive: web/0213/vantlindenhoutmuseum.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Museum Kinderdorp Neerbosch Nijmegen @@ -920,16 +920,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:44:43.664098+00:00' - - claim_type: org_name - claim_value: museumkinderdorpneerbosch.nl - raw_value: museumkinderdorpneerbosch.nl - source_url: http://www.vantlindenhoutmuseum.nl - retrieved_on: '2025-11-29T15:31:25.473939+00:00' - xpath: /html/head/meta[10] - html_file: web/0213/vantlindenhoutmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:44:43.664302+00:00' - claim_type: email claim_value: info@museumkinderdorpneerbosch.nl raw_value: info@museumkinderdorpneerbosch.nl @@ -990,16 +980,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:43.664714+00:00' - - claim_type: org_name - claim_value: NIEUWS - raw_value: NIEUWS - source_url: http://www.vantlindenhoutmuseum.nl - retrieved_on: '2025-11-29T15:31:25.473939+00:00' - xpath: /html/body/div/div/div/div/div[2]/div/div/section[1]/div/div/div/div/div/div[1]/div/h1 - html_file: web/0213/vantlindenhoutmuseum.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:43.664765+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Kinderdorp Neerbosch diff --git a/data/nde/enriched/entries/0219_Q109382770.yaml b/data/nde/enriched/entries/0219_Q109382770.yaml index e2978f9728..a46c11a398 100644 --- a/data/nde/enriched/entries/0219_Q109382770.yaml +++ b/data/nde/enriched/entries/0219_Q109382770.yaml @@ -532,18 +532,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:45.516054+00:00' source_archive: web/0219/museumoene.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Cultuur Historisch Museum Oene - source_url: https://museumoene.nl - retrieved_on: '2025-11-29T15:33:01.789691+00:00' - xpath: /html/head/title - html_file: web/0219/museumoene.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:45.515544+00:00' - claim_type: description_short claim_value: Cultuur Historisch Museum Oene raw_value: Cultuur Historisch Museum Oene @@ -564,6 +554,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:45.515976+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Cultuur Historisch Museum Oene diff --git a/data/nde/enriched/entries/0220_Q98961921.yaml b/data/nde/enriched/entries/0220_Q98961921.yaml index 8e1109f425..59c355a4f3 100644 --- a/data/nde/enriched/entries/0220_Q98961921.yaml +++ b/data/nde/enriched/entries/0220_Q98961921.yaml @@ -740,7 +740,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:45.976892+00:00' source_archive: web/0220/sjoelelburg.nl - claims_count: 5 + claims_count: 2 claims: - claim_type: org_name claim_value: Museum Sjoel Elburg @@ -780,124 +780,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:44:45.976067+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://sjoelelburg.nl/&t=Het - Museum - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://sjoelelburg.nl/&t=Het - Museum - source_url: https://sjoelelburg.nl/ - retrieved_on: '2025-11-29T15:36:05.747990+00:00' - xpath: /html/body/div/div[5]/div/div/div/div/div[3]/ul/li[2]/a - html_file: web/0220/sjoelelburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:45.976626+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?source=https://sjoelelburg.nl/&text=Het - Museum:https://sjoelelburg.nl/ - raw_value: https://twitter.com/intent/tweet?source=https://sjoelelburg.nl/&text=Het - Museum:https://sjoelelburg.nl/ - source_url: https://sjoelelburg.nl/ - retrieved_on: '2025-11-29T15:36:05.747990+00:00' - xpath: /html/body/div/div[5]/div/div/div/div/div[3]/ul/li[3]/a - html_file: web/0220/sjoelelburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:45.976635+00:00' - - claim_type: social_linkedin - claim_value: 'http://www.linkedin.com/shareArticle?mini=true&url=https://sjoelelburg.nl/&title=Het - Museum&summary=Steun het museum Openingstijden en prijzen Museum Sjoel ElburgMuseum - Sjoel Elburg is... een verhalenmuseum over het (on)gewone dagelijks leven van - joodse families die vanaf 1700 in Elburg hebben gewoond. Museum Sjoel Elburg - biedt... een historisch perspectief voor een actueel thema: integratie en verdraagzaamheid. - Museum Sjoel Elburg presenteert... de joodse geschiedenis op een dynamische, - onorthodoxe, boeiende en eigenzinnige wijze en laat daarmee een eigen museaal - geluid horen. De inrichting De inrichting van Museum Sjoel Elburg is tot stand - gekomen met medewerking van DJO Ontwerpers Den Haag en Cultuurproject Apeldoorn. - Inrichting museum De tentoonstellingVan 11 juli tot en met 29 november 2025 - de serie ‘Chassidische legenden’ te zien van de Groninger kunstenaar H.N. Werkman - De tentoonstelling Educatie Een bezoek aan Museum Sjoel Elburg is een unieke - ervaring. Nergens in Nederland is de geschiedenis van de joden in de mediene - – de provincie – zo mooi uitgewerkt als in dit museum. EDUCATIEJoden in Elburg - Namenlijst Ter herinnering: alle namen en gegevens van in Elburg geboren slachtoffers. - JODEN IN ELBURG 30 nov Sophie Northeimer (1899-1973) 16 mei Josef Steinhauer - 16 mei Gerda Steinhauer 14 mei Jozeph de Lange 11 mei Jozeph Beem 08 mrt Barend - de Hond 08 mrt Klaartje de Hond – de laatste brief HET LAATSTE NIEUWS 30 okt - Zoektocht naar onderduikgevers van Joods echtpaar Gans-Koopman Het Joodse echtpaar - Gesina Gans-Koopman en Eli Gans uit Amsterdam duikt in 1944 in Doornspijk onder. - Gesina heeft een vals persoonsbewijs op naam van Gesina Lees meer 26 okt Sjoellezing - 20 november 2025 Op uitnodiging van de Vrienden van Museum Sjoel Elburg verzorgt - drs. Cor Hoogerwerf op donderdag 20 november 2025 in de Ichthuskerk te Elburg - de Sjoellezing Lees meer 10 okt Sjoel ontvangt ruim 1000 euro via Rabo ClubSupport - Museum Sjoel Elburg deed dit jaar weer mee met de actie Rabo ClubSupport en - dat was niet zonder resultaat. Penningmeester en bestuurslid van Museum Sjoel - Lees meer 08 okt Najaarscursus 2025: ‘Joodse wijsheid en filosofie’ Wat is wijsheid? - Hoe leef je een goed en betekenisvol leven? Eeuwenlang zochten Joodse denkers - naar antwoorden op deze vragen. In de najaarscursus van 2025 Lees meer 28 aug - Vreugde in duistere tijden Van een vriend kreeg de Groninger kunstenaar en drukker - Hendrik Nicolaas Werkman het boekje ‘Die Legende des Baalsjems’ van de Joodse - godsdienstfilosoof Martin Buber te Lees meer 26 jul Oude Joodse legenden als - vorm van verzet Wat moet je in het heden met vertellingen van hemelwandelingen, - vliegende paarden en de wederopstanding van een Joodse bruid? De Groninger kunstenaar - H.N. Werkman zag Lees meer 17 jul In memoriam Theo van Ledden (1945-2025) Met - droefenis delen wij het bericht van het overlijden van Theo van Ledden. Theo - was oud-voorzitter van het bestuur van Museum Sjoel Elburg. Hij blijft Lees - meer 11 jul Chassidische legenden in de sjoel Tot en met 29 november biedt Museum - Sjoel Elburg de tentoonstelling ‘H.N. Werkman. Chassidische legenden en de kunst - van het verzet’. De opening werd 10 Lees meer Tentoonstellingen Verwacht 06 - dec, 2025 Mag Saar er zijn? Openingstijden en prijzen MUSEUM SJOEL ELBURG IS - WEER OPEN!Welkom! Museum Sjoel Elburg is weer open, zij het met nieuwe coronaregels - van doen en laten en onder de […]&source=https://sjoelelburg.nl/' - raw_value: 'http://www.linkedin.com/shareArticle?mini=true&url=https://sjoelelburg.nl/&title=Het - Museum&summary=Steun het museum Openingstijden en prijzen Museum Sjoel ElburgMuseum - Sjoel Elburg is... een verhalenmuseum over het (on)gewone dagelijks leven van - joodse families die vanaf 1700 in Elburg hebben gewoond. Museum Sjoel Elburg - biedt... een historisch perspectief voor een actueel thema: integratie en verdraagzaamheid. - Museum Sjoel Elburg presenteert... de joodse geschiedenis op een dynamische, - onorthodoxe, boeiende en eigenzinnige wijze en laat daarmee een eigen museaal - geluid horen. De inrichting De inrichting van Museum Sjoel Elburg is tot stand - gekomen met medewerking van DJO Ontwerpers Den Haag en Cultuurproject Apeldoorn. - Inrichting museum De tentoonstellingVan 11 juli tot en met 29 november 2025 - de serie ‘Chassidische legenden’ te zien van de Groninger kunstenaar H.N. Werkman - De tentoonstelling Educatie Een bezoek aan Museum Sjoel Elburg is een unieke - ervaring. Nergens in Nederland is de geschiedenis van de joden in de mediene - – de provincie – zo mooi uitgewerkt als in dit museum. EDUCATIEJoden in Elburg - Namenlijst Ter herinnering: alle namen en gegevens van in Elburg geboren slachtoffers. - JODEN IN ELBURG 30 nov Sophie Northeimer (1899-1973) 16 mei Josef Steinhauer - 16 mei Gerda Steinhauer 14 mei Jozeph de Lange 11 mei Jozeph Beem 08 mrt Barend - de Hond 08 mrt Klaartje de Hond – de laatste brief HET LAATSTE NIEUWS 30 okt - Zoektocht naar onderduikgevers van Joods echtpaar Gans-Koopman Het Joodse echtpaar - Gesina Gans-Koopman en Eli Gans uit Amsterdam duikt in 1944 in Doornspijk onder. - Gesina heeft een vals persoonsbewijs op naam van Gesina Lees meer 26 okt Sjoellezing - 20 november 2025 Op uitnodiging van de Vrienden van Museum Sjoel Elburg verzorgt - drs. Cor Hoogerwerf op donderdag 20 november 2025 in de Ichthuskerk te Elburg - de Sjoellezing Lees meer 10 okt Sjoel ontvangt ruim 1000 euro via Rabo ClubSupport - Museum Sjoel Elburg deed dit jaar weer mee met de actie Rabo ClubSupport en - dat was niet zonder resultaat. Penningmeester en bestuurslid van Museum Sjoel - Lees meer 08 okt Najaarscursus 2025: ‘Joodse wijsheid en filosofie’ Wat is wijsheid? - Hoe leef je een goed en betekenisvol leven? Eeuwenlang zochten Joodse denkers - naar antwoorden op deze vragen. In de najaarscursus van 2025 Lees meer 28 aug - Vreugde in duistere tijden Van een vriend kreeg de Groninger kunstenaar en drukker - Hendrik Nicolaas Werkman het boekje ‘Die Legende des Baalsjems’ van de Joodse - godsdienstfilosoof Martin Buber te Lees meer 26 jul Oude Joodse legenden als - vorm van verzet Wat moet je in het heden met vertellingen van hemelwandelingen, - vliegende paarden en de wederopstanding van een Joodse bruid? De Groninger kunstenaar - H.N. Werkman zag Lees meer 17 jul In memoriam Theo van Ledden (1945-2025) Met - droefenis delen wij het bericht van het overlijden van Theo van Ledden. Theo - was oud-voorzitter van het bestuur van Museum Sjoel Elburg. Hij blijft Lees - meer 11 jul Chassidische legenden in de sjoel Tot en met 29 november biedt Museum - Sjoel Elburg de tentoonstelling ‘H.N. Werkman. Chassidische legenden en de kunst - van het verzet’. De opening werd 10 Lees meer Tentoonstellingen Verwacht 06 - dec, 2025 Mag Saar er zijn? Openingstijden en prijzen MUSEUM SJOEL ELBURG IS - WEER OPEN!Welkom! Museum Sjoel Elburg is weer open, zij het met nieuwe coronaregels - van doen en laten en onder de […]&source=https://sjoelelburg.nl/' - source_url: https://sjoelelburg.nl/ - retrieved_on: '2025-11-29T15:36:05.747990+00:00' - xpath: /html/body/div/div[5]/div/div/div/div/div[3]/ul/li[5]/a - html_file: web/0220/sjoelelburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:45.976653+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Sjoel Elburg diff --git a/data/nde/enriched/entries/0225_Q98908556.yaml b/data/nde/enriched/entries/0225_Q98908556.yaml index adce77c7fb..1aa88ee207 100644 --- a/data/nde/enriched/entries/0225_Q98908556.yaml +++ b/data/nde/enriched/entries/0225_Q98908556.yaml @@ -1000,18 +1000,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:47.390334+00:00' source_archive: web/0225/slotloevestein.nl - claims_count: 7 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Slot Loevestein - raw_value: Slot Loevestein - Slot Loevestein - Rijksmuseum - source_url: http://www.slotloevestein.nl/ - retrieved_on: '2025-11-29T15:36:51.543964+00:00' - xpath: /html/head/title - html_file: web/0225/slotloevestein.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:47.388969+00:00' - claim_type: description_short claim_value: 'Rijksmuseum Slot Loevestein - UNESCO Werelderfgoed Nieuwe Hollandse Waterlinie - Beroemdste gevangene: Hugo de Groot. Ontsnapt in boekenkist.' @@ -1024,16 +1014,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:44:47.389211+00:00' - - claim_type: org_name - claim_value: Slot Loevestein - Rijksmuseum - raw_value: Slot Loevestein - Rijksmuseum - source_url: http://www.slotloevestein.nl/ - retrieved_on: '2025-11-29T15:36:51.543964+00:00' - xpath: /html/head/meta[10] - html_file: web/0225/slotloevestein.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:44:47.389563+00:00' - claim_type: email claim_value: info@slotloevestein.nl raw_value: info@slotloevestein.nl @@ -1074,6 +1054,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:47.390140+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksmuseum Slot Loevestein diff --git a/data/nde/enriched/entries/0226_Q98893885.yaml b/data/nde/enriched/entries/0226_Q98893885.yaml index 9bc7c75e03..94da321c2e 100644 --- a/data/nde/enriched/entries/0226_Q98893885.yaml +++ b/data/nde/enriched/entries/0226_Q98893885.yaml @@ -387,18 +387,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:47.493414+00:00' source_archive: web/0226/museumveluwezoom.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Veluwezoom - source_url: https://www.museumveluwezoom.nl/ - retrieved_on: '2025-11-29T15:34:48.112339+00:00' - xpath: /html/head/title - html_file: web/0226/museumveluwezoom.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:47.492523+00:00' - claim_type: description_short claim_value: Museum Veluwezoom, gevestigd in Kasteel Doorwerth, biedt tentoonstellingen over de kunstenaarskolonie in Oosterbeek en de Veluwezoom. @@ -441,6 +431,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:47.493260+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Veluwezoom diff --git a/data/nde/enriched/entries/0230_Q18774274.yaml b/data/nde/enriched/entries/0230_Q18774274.yaml index 50110a4db3..81b1a0827c 100644 --- a/data/nde/enriched/entries/0230_Q18774274.yaml +++ b/data/nde/enriched/entries/0230_Q18774274.yaml @@ -609,7 +609,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:48.857637+00:00' source_archive: web/0230/mariahoeveputten.nl - claims_count: 14 + claims_count: 11 claims: - claim_type: org_name claim_value: Mariahoeve Putten @@ -621,36 +621,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:48.857013+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://mariahoeveputten.nl/ - retrieved_on: '2025-11-29T15:37:22.983423+00:00' - xpath: /html/body/footer/div[1]/div/div[1]/div[5]/ul/li[1]/a/svg/title - html_file: web/0230/mariahoeveputten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:48.857027+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://mariahoeveputten.nl/ - retrieved_on: '2025-11-29T15:37:22.983423+00:00' - xpath: /html/body/footer/div[1]/div/div[1]/div[5]/ul/li[2]/a/svg/title - html_file: web/0230/mariahoeveputten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:48.857033+00:00' - - claim_type: org_name - claim_value: YouTube - raw_value: YouTube - source_url: https://mariahoeveputten.nl/ - retrieved_on: '2025-11-29T15:37:22.983423+00:00' - xpath: /html/body/footer/div[1]/div/div[1]/div[5]/ul/li[3]/a/svg/title - html_file: web/0230/mariahoeveputten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:48.857037+00:00' - claim_type: org_name claim_value: Museumboerderij De Mariahoeve raw_value: Museumboerderij De Mariahoeve @@ -751,6 +721,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:48.857555+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museumboerderij Mariahoeve diff --git a/data/nde/enriched/entries/0234_Q2710899.yaml b/data/nde/enriched/entries/0234_Q2710899.yaml index 7e08d58d7c..3c27df9134 100644 --- a/data/nde/enriched/entries/0234_Q2710899.yaml +++ b/data/nde/enriched/entries/0234_Q2710899.yaml @@ -851,18 +851,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:50.618049+00:00' source_archive: web/0234/nationaalonderduikmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal onderduikmuseum - source_url: https://nationaalonderduikmuseum.nl - retrieved_on: '2025-11-29T15:40:23.434916+00:00' - xpath: /html/head/title - html_file: web/0234/nationaalonderduikmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:50.616994+00:00' - claim_type: org_name claim_value: Nationaal onderduikmuseum - raw_value: Nationaal onderduikmuseum - @@ -943,6 +933,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:50.617944+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Onderduikmuseum diff --git a/data/nde/enriched/entries/0245_Q98907726.yaml b/data/nde/enriched/entries/0245_Q98907726.yaml index 1933bc9856..db776d2e52 100644 --- a/data/nde/enriched/entries/0245_Q98907726.yaml +++ b/data/nde/enriched/entries/0245_Q98907726.yaml @@ -477,7 +477,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:53.689231+00:00' source_archive: web/0245/oudheidkundigeverenigingwehl.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Oudheidkundige vereniging Wehl @@ -489,16 +489,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:53.688202+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.oudheidkundigeverenigingwehl.nl - retrieved_on: '2025-11-29T15:39:17.440024+00:00' - xpath: /html/body/div/div/div/div/main/article/header/h1 - html_file: web/0245/oudheidkundigeverenigingwehl.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:53.689023+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oudheidkundige Vereniging Wehl diff --git a/data/nde/enriched/entries/0246_Q98895220.yaml b/data/nde/enriched/entries/0246_Q98895220.yaml index 73ebb9fd39..111e838531 100644 --- a/data/nde/enriched/entries/0246_Q98895220.yaml +++ b/data/nde/enriched/entries/0246_Q98895220.yaml @@ -321,7 +321,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:54.060526+00:00' source_archive: web/0246/zuwent.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Oudheidkundige Vereniging Zuwent @@ -333,16 +333,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:54.059903+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://zuwent.nl - retrieved_on: '2025-11-29T15:39:48.774087+00:00' - xpath: /html/body/div/div/div/div/div/main/div[1]/h1 - html_file: web/0246/zuwent.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:54.060459+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oudheidkundige Vereniging Zuwent diff --git a/data/nde/enriched/entries/0251_Q26203717.yaml b/data/nde/enriched/entries/0251_Q26203717.yaml index 19d863071f..405d8557c5 100644 --- a/data/nde/enriched/entries/0251_Q26203717.yaml +++ b/data/nde/enriched/entries/0251_Q26203717.yaml @@ -848,22 +848,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:55.005764+00:00' source_archive: web/0251/regionaalarchiefrivierenland.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - raw_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - source_url: https://regionaalarchiefrivierenland.nl/home - retrieved_on: '2025-11-29T15:39:50.244225+00:00' - xpath: /html/head/title - html_file: web/0251/regionaalarchiefrivierenland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:55.004643+00:00' - claim_type: description_short claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, @@ -918,6 +904,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:55.005600+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regionaal Archief Rivierenland diff --git a/data/nde/enriched/entries/0252_Q81181406.yaml b/data/nde/enriched/entries/0252_Q81181406.yaml index 18b15680ce..a0e6b18888 100644 --- a/data/nde/enriched/entries/0252_Q81181406.yaml +++ b/data/nde/enriched/entries/0252_Q81181406.yaml @@ -553,18 +553,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:55.136545+00:00' source_archive: web/0252/erfgoedcentrumzutphen.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Erfgoedcentrum Zutphen - source_url: https://erfgoedcentrumzutphen.nl/ - retrieved_on: '2025-11-29T15:39:50.527456+00:00' - xpath: /html/head/title - html_file: web/0252/erfgoedcentrumzutphen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:55.135283+00:00' - claim_type: description_short claim_value: 'Het Erfgoedcentrum Zutphen: historisch hart van de regio! Eén plek voor onze vier erfgoedpartners: Archeologie, Monumentenzorg, Musea Zutphen en @@ -679,6 +669,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:55.136478+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regionaal Archief Zutphen | Erfgoedcentrum Zutphen diff --git a/data/nde/enriched/entries/0258_Q56459713.yaml b/data/nde/enriched/entries/0258_Q56459713.yaml index 915b5260c0..2173768cd2 100644 --- a/data/nde/enriched/entries/0258_Q56459713.yaml +++ b/data/nde/enriched/entries/0258_Q56459713.yaml @@ -872,18 +872,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:56.930660+00:00' source_archive: web/0258/stadsmuseum-harderwijk.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stadsmuseum Harderwijk - source_url: https://www.stadsmuseum-harderwijk.nl/ - retrieved_on: '2025-11-29T15:41:05.031608+00:00' - xpath: /html/head/title - html_file: web/0258/stadsmuseum-harderwijk.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:56.930073+00:00' - claim_type: org_name claim_value: Stadsmuseum Harderwijk raw_value: Stadsmuseum Harderwijk @@ -954,6 +944,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:56.930575+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsmuseum Harderwijk diff --git a/data/nde/enriched/entries/0262_Q110995923.yaml b/data/nde/enriched/entries/0262_Q110995923.yaml index 50d340ad4a..334c393c84 100644 --- a/data/nde/enriched/entries/0262_Q110995923.yaml +++ b/data/nde/enriched/entries/0262_Q110995923.yaml @@ -500,7 +500,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:57.915376+00:00' source_archive: web/0262/buurderijdelagehof.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Buurderij De Lage Hof | @@ -562,16 +562,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:57.915141+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fbuurderijdelagehof.nl%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fbuurderijdelagehof.nl%2F - source_url: https://buurderijdelagehof.nl/ - retrieved_on: '2025-11-29T15:40:44.887084+00:00' - xpath: /html/body/div[1]/div/footer/div[1]/div/div[2]/div/div/div/div/div[1]/div[1]/div/div/div/div[3]/a - html_file: web/0262/buurderijdelagehof.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:57.915168+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Buurderij De Lage Hof diff --git a/data/nde/enriched/entries/0263_Q98904476.yaml b/data/nde/enriched/entries/0263_Q98904476.yaml index 83a92a5f7c..8cc4547f92 100644 --- a/data/nde/enriched/entries/0263_Q98904476.yaml +++ b/data/nde/enriched/entries/0263_Q98904476.yaml @@ -444,18 +444,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:58.361382+00:00' source_archive: web/0263/erfgoedlov.org - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Erfgoed Meubelfabriek L.O.V. - source_url: https://erfgoedlov.org/ - retrieved_on: '2025-11-29T15:42:06.973774+00:00' - xpath: /html/head/title - html_file: web/0263/erfgoedlov.org/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:58.360020+00:00' - claim_type: description_short claim_value: Labor Omnia Vincit (Arbeid overwint alles) was de idealistische strijdkreet van de in 1910 te Oosterbeek door Gerrit Pelt opgerichte meubelfabriek. @@ -504,6 +494,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:58.361105+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Erfgoed Meubelfabriek L.O.V diff --git a/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml b/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml index ae4541e0c2..6637293cad 100644 --- a/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml +++ b/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml @@ -268,22 +268,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:01.077118+00:00' source_archive: web/0273/regionaalarchiefrivierenland.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - raw_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - source_url: https://regionaalarchiefrivierenland.nl/ - retrieved_on: '2025-11-29T15:44:06.761754+00:00' - xpath: /html/head/title - html_file: web/0273/regionaalarchiefrivierenland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:01.076512+00:00' - claim_type: description_short claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, @@ -338,6 +324,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:01.077015+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Bommelwaard diff --git a/data/nde/enriched/entries/0276_Q111190981.yaml b/data/nde/enriched/entries/0276_Q111190981.yaml index 1ffafa33b0..db0e06e8c5 100644 --- a/data/nde/enriched/entries/0276_Q111190981.yaml +++ b/data/nde/enriched/entries/0276_Q111190981.yaml @@ -415,18 +415,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:02.280716+00:00' source_archive: web/0276/noordveluwsarchief.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - NoVA - source_url: https://noordveluwsarchief.nl/ - retrieved_on: '2025-11-29T15:47:48.936473+00:00' - xpath: /html/head/title - html_file: web/0276/noordveluwsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:02.278739+00:00' - claim_type: description_short claim_value: Het noordveluws archief geeft toegang tot de geschiedenis van acht gemeenten op de Veluwe. @@ -499,6 +489,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:02.280407+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchivariaat Noordwest-Veluwe diff --git a/data/nde/enriched/entries/0280_Q56459657.yaml b/data/nde/enriched/entries/0280_Q56459657.yaml index 1dd2905e38..93ae625772 100644 --- a/data/nde/enriched/entries/0280_Q56459657.yaml +++ b/data/nde/enriched/entries/0280_Q56459657.yaml @@ -638,18 +638,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:03.236578+00:00' source_archive: web/0280/hagedoornsplaatse.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Veluws Museum Hagedoorns Plaatse - source_url: http://www.hagedoornsplaatse.nl - retrieved_on: '2025-11-29T15:46:05.283440+00:00' - xpath: /html/head/title - html_file: web/0280/hagedoornsplaatse.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:03.234944+00:00' - claim_type: description_short claim_value: 'Water, historie en de Veluwe: samen komen zij tot leven bij Veluws Museum Hagedoorns Plaatse Nieuw dit seizoen! De gloednieuwe tentoonstelling @@ -740,6 +730,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:03.236230+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Veluws Museum Hagedoorns Plaatse diff --git a/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml b/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml index afbcdf3327..93439046fd 100644 --- a/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml +++ b/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml @@ -255,7 +255,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:22.374849+00:00' source_archive: web/0283/zuwent.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Oudheidkundige Vereniging Zuwent @@ -267,13 +267,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:25:22.374268+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.zuwent.nl/ - retrieved_on: '' - xpath: /html/body/div/div/div/div/div/main/div[1]/h1 - html_file: web/0283/zuwent.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:25:22.374780+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0285_Q2470853.yaml b/data/nde/enriched/entries/0285_Q2470853.yaml index 17ef4c6f0c..3558f4fc75 100644 --- a/data/nde/enriched/entries/0285_Q2470853.yaml +++ b/data/nde/enriched/entries/0285_Q2470853.yaml @@ -1120,7 +1120,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:04.150070+00:00' source_archive: web/0285/bergendal.wereldmuseum.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: 'Wereldmuseum Berg en Dal | Het Archief: 2014' @@ -1204,36 +1204,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:04.149232+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&title=Het%20Archief%3A%202014%20-%202023&source=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&summary=Het%20Archief%3A%202014%20-%202023 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&title=Het%20Archief%3A%202014%20-%202023&source=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&summary=Het%20Archief%3A%202014%20-%202023 - source_url: https://bergendal.wereldmuseum.nl/nl - retrieved_on: '2025-11-29T15:47:20.084264+00:00' - xpath: /html/body/div[2]/div[2]/main/div[3]/div/section/div/div/div/div/div[2]/a[1] - html_file: web/0285/bergendal.wereldmuseum.nl/pages/nl.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:04.149438+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&text=Het%20Archief%3A%202014%20-%202023&via=afrikamuseum&hashtags=afrikamuseum - raw_value: https://twitter.com/intent/tweet?url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&text=Het%20Archief%3A%202014%20-%202023&via=afrikamuseum&hashtags=afrikamuseum - source_url: https://bergendal.wereldmuseum.nl/nl - retrieved_on: '2025-11-29T15:47:20.084264+00:00' - xpath: /html/body/div[2]/div[2]/main/div[3]/div/section/div/div/div/div/div[2]/a[2] - html_file: web/0285/bergendal.wereldmuseum.nl/pages/nl.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:04.149462+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A//bergendal.wereldmuseum.nl/nl/node/4177 - raw_value: https://www.facebook.com/sharer.php?u=https%3A//bergendal.wereldmuseum.nl/nl/node/4177 - source_url: https://bergendal.wereldmuseum.nl/nl - retrieved_on: '2025-11-29T15:47:20.084264+00:00' - xpath: /html/body/div[2]/div[2]/main/div[3]/div/section/div/div/div/div/div[2]/a[3] - html_file: web/0285/bergendal.wereldmuseum.nl/pages/nl.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:04.149468+00:00' - claim_type: org_name claim_value: 'Het Archief: 2014 - 2023' raw_value: 'Het Archief: 2014 - 2023' @@ -1244,6 +1214,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:04.149600+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Wereldmuseum Berg en Dal diff --git a/data/nde/enriched/entries/0286_Q484899.yaml b/data/nde/enriched/entries/0286_Q484899.yaml index f2c79646d6..d504f9d3f5 100644 --- a/data/nde/enriched/entries/0286_Q484899.yaml +++ b/data/nde/enriched/entries/0286_Q484899.yaml @@ -847,18 +847,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:04.559349+00:00' source_archive: web/0286/velorama.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Fietsmuseum Velorama - source_url: https://velorama.nl/ - retrieved_on: '2025-11-29T15:47:25.907146+00:00' - xpath: /html/head/title - html_file: web/0286/velorama.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:04.558471+00:00' - claim_type: org_name claim_value: Nationaal Fietsmuseum Velorama raw_value: Nationaal Fietsmuseum Velorama @@ -919,6 +909,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:04.559245+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Fietsmuseum Velorama diff --git a/data/nde/enriched/entries/0295_Q121900753.yaml b/data/nde/enriched/entries/0295_Q121900753.yaml index 0fee756760..dc5baf8208 100644 --- a/data/nde/enriched/entries/0295_Q121900753.yaml +++ b/data/nde/enriched/entries/0295_Q121900753.yaml @@ -533,18 +533,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:06.876921+00:00' source_archive: web/0295/museumaandea.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum aan de A - source_url: https://museumaandea.nl/ - retrieved_on: '2025-11-29T15:49:11.715226+00:00' - xpath: /html/head/title - html_file: web/0295/museumaandea.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:06.876192+00:00' - claim_type: description_short claim_value: Museum van, door en over Groningers. Kijk voor je bezoek altijd op onze website voor de actuele tentoonstellingen. @@ -627,6 +617,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:06.876793+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum aan de A diff --git a/data/nde/enriched/entries/0309_Q81181191.yaml b/data/nde/enriched/entries/0309_Q81181191.yaml index 2508753dfb..c1a5c47799 100644 --- a/data/nde/enriched/entries/0309_Q81181191.yaml +++ b/data/nde/enriched/entries/0309_Q81181191.yaml @@ -441,18 +441,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:10.510345+00:00' source_archive: web/0309/eemsdelta.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Eemsdelta - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/head/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509614+00:00' - claim_type: org_name claim_value: Gemeente Eemsdelta raw_value: Gemeente Eemsdelta @@ -473,46 +463,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:10.509633+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509642+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509647+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509651+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509655+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -603,6 +553,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:10.510187+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Appingedam diff --git a/data/nde/enriched/entries/0311_Q81181383.yaml b/data/nde/enriched/entries/0311_Q81181383.yaml index 2852d3c733..c902d76ff2 100644 --- a/data/nde/enriched/entries/0311_Q81181383.yaml +++ b/data/nde/enriched/entries/0311_Q81181383.yaml @@ -438,18 +438,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:10.889962+00:00' source_archive: web/0311/westerwolde.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerwolde - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/head/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888532+00:00' - claim_type: org_name claim_value: Gemeente Westerwolde raw_value: Gemeente Westerwolde @@ -480,46 +470,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:10.888559+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888572+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888577+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888581+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888585+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -600,6 +550,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:10.889714+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Bellingwedde diff --git a/data/nde/enriched/entries/0313_Q81181241.yaml b/data/nde/enriched/entries/0313_Q81181241.yaml index 4ba6c92732..72b5834281 100644 --- a/data/nde/enriched/entries/0313_Q81181241.yaml +++ b/data/nde/enriched/entries/0313_Q81181241.yaml @@ -356,18 +356,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.083942+00:00' source_archive: web/0313/eemsdelta.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Eemsdelta - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/head/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082770+00:00' - claim_type: org_name claim_value: Gemeente Eemsdelta raw_value: Gemeente Eemsdelta @@ -388,46 +378,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.082789+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082798+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082803+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082808+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082812+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -518,6 +468,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.083501+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Delfzijl diff --git a/data/nde/enriched/entries/0315_Q81181260.yaml b/data/nde/enriched/entries/0315_Q81181260.yaml index 8c5c50d6b8..7212802ca2 100644 --- a/data/nde/enriched/entries/0315_Q81181260.yaml +++ b/data/nde/enriched/entries/0315_Q81181260.yaml @@ -348,18 +348,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.269475+00:00' source_archive: web/0315/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:27.421565+00:00' - xpath: /html/head/title - html_file: web/0315/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.268968+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -412,6 +402,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.269372+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Grootegast diff --git a/data/nde/enriched/entries/0316_Q81181282.yaml b/data/nde/enriched/entries/0316_Q81181282.yaml index 6b181c6d9f..11a13e1940 100644 --- a/data/nde/enriched/entries/0316_Q81181282.yaml +++ b/data/nde/enriched/entries/0316_Q81181282.yaml @@ -353,18 +353,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.373161+00:00' source_archive: web/0316/gemeente.groningen.nl - claims_count: 31 + claims_count: 30 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Groningen - source_url: https://gemeente.groningen.nl/ - retrieved_on: '2025-11-29T15:51:35.978903+00:00' - xpath: /html/head/title - html_file: web/0316/gemeente.groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.372331+00:00' - claim_type: org_name claim_value: mobiliteit icon raw_value: mobiliteit icon @@ -667,6 +657,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.372954+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Haren diff --git a/data/nde/enriched/entries/0317_Q81181273.yaml b/data/nde/enriched/entries/0317_Q81181273.yaml index 169661d5bf..dbefbe6c60 100644 --- a/data/nde/enriched/entries/0317_Q81181273.yaml +++ b/data/nde/enriched/entries/0317_Q81181273.yaml @@ -375,18 +375,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.477554+00:00' source_archive: web/0317/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/head/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476796+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -407,36 +397,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.476814+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476823+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476827+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476831+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -497,6 +457,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.477409+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Hoogezand-Sappemeer diff --git a/data/nde/enriched/entries/0318_Q81181307.yaml b/data/nde/enriched/entries/0318_Q81181307.yaml index a951df6afe..e0eab76d5c 100644 --- a/data/nde/enriched/entries/0318_Q81181307.yaml +++ b/data/nde/enriched/entries/0318_Q81181307.yaml @@ -390,18 +390,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.572721+00:00' source_archive: web/0318/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:38.210879+00:00' - xpath: /html/head/title - html_file: web/0318/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.571982+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -454,6 +444,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.572608+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Leek diff --git a/data/nde/enriched/entries/0319_Q81181310.yaml b/data/nde/enriched/entries/0319_Q81181310.yaml index 1193c80847..237128011c 100644 --- a/data/nde/enriched/entries/0319_Q81181310.yaml +++ b/data/nde/enriched/entries/0319_Q81181310.yaml @@ -422,18 +422,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.677180+00:00' source_archive: web/0319/eemsdelta.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Eemsdelta - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/head/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676253+00:00' - claim_type: org_name claim_value: Gemeente Eemsdelta raw_value: Gemeente Eemsdelta @@ -454,46 +444,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.676274+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676283+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676288+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676292+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676296+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -584,6 +534,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.676898+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Loppersum diff --git a/data/nde/enriched/entries/0320_Q81181318.yaml b/data/nde/enriched/entries/0320_Q81181318.yaml index 7d8f5061d6..64386132aa 100644 --- a/data/nde/enriched/entries/0320_Q81181318.yaml +++ b/data/nde/enriched/entries/0320_Q81181318.yaml @@ -318,18 +318,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.780971+00:00' source_archive: web/0320/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:41.935757+00:00' - xpath: /html/head/title - html_file: web/0320/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.780253+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -382,6 +372,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.780763+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Marum diff --git a/data/nde/enriched/entries/0321_Q81181324.yaml b/data/nde/enriched/entries/0321_Q81181324.yaml index dca250aa55..be98d73262 100644 --- a/data/nde/enriched/entries/0321_Q81181324.yaml +++ b/data/nde/enriched/entries/0321_Q81181324.yaml @@ -370,18 +370,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.877032+00:00' source_archive: web/0321/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/head/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876362+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -402,36 +392,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.876380+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876388+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876392+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876396+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -492,6 +452,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.876893+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Menterwolde diff --git a/data/nde/enriched/entries/0322_Q81181387.yaml b/data/nde/enriched/entries/0322_Q81181387.yaml index 6d5d7b379a..20ac2d4a13 100644 --- a/data/nde/enriched/entries/0322_Q81181387.yaml +++ b/data/nde/enriched/entries/0322_Q81181387.yaml @@ -531,18 +531,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.991194+00:00' source_archive: web/0322/gemeente-oldambt.nl - claims_count: 35 + claims_count: 34 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Oldambt - source_url: http://www.gemeente-oldambt.nl - retrieved_on: '2025-11-29T15:51:42.548124+00:00' - xpath: /html/head/title - html_file: web/0322/gemeente-oldambt.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.990301+00:00' - claim_type: org_name claim_value: verkiezingen icon raw_value: verkiezingen icon @@ -883,6 +873,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.990987+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Oldambt diff --git a/data/nde/enriched/entries/0323_Q81181329.yaml b/data/nde/enriched/entries/0323_Q81181329.yaml index e837a75ec6..d79f9b9599 100644 --- a/data/nde/enriched/entries/0323_Q81181329.yaml +++ b/data/nde/enriched/entries/0323_Q81181329.yaml @@ -517,7 +517,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.129080+00:00' source_archive: web/0323/pekela.nl - claims_count: 9 + claims_count: 7 claims: - claim_type: org_name claim_value: Homepage @@ -529,26 +529,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:12.128265+00:00' - - claim_type: address - claim_value: '' - raw_value: ' ' - source_url: https://www.pekela.nl/ - retrieved_on: '2025-11-29T15:51:43.085163+00:00' - xpath: /html/head/script[24] - html_file: web/0323/pekela.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:45:12.128639+00:00' - - claim_type: phone - claim_value: '' - raw_value: '' - source_url: https://www.pekela.nl/ - retrieved_on: '2025-11-29T15:51:43.085163+00:00' - xpath: /html/head/script[24] - html_file: web/0323/pekela.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_telephone - extraction_timestamp: '2025-12-01T10:45:12.128645+00:00' - claim_type: email claim_value: info@pekela.nl raw_value: info@pekela.nl @@ -609,6 +589,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.128949+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Pekela diff --git a/data/nde/enriched/entries/0324_Q81181354.yaml b/data/nde/enriched/entries/0324_Q81181354.yaml index eece160435..dcd0a837d7 100644 --- a/data/nde/enriched/entries/0324_Q81181354.yaml +++ b/data/nde/enriched/entries/0324_Q81181354.yaml @@ -329,18 +329,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.232037+00:00' source_archive: web/0324/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/head/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231147+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -361,36 +351,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:12.231165+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231174+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231178+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231181+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -451,6 +411,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.231898+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Slochteren diff --git a/data/nde/enriched/entries/0326_Q81181363.yaml b/data/nde/enriched/entries/0326_Q81181363.yaml index 7f6a36c85e..137f1551df 100644 --- a/data/nde/enriched/entries/0326_Q81181363.yaml +++ b/data/nde/enriched/entries/0326_Q81181363.yaml @@ -315,18 +315,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.417417+00:00' source_archive: web/0326/gemeente.groningen.nl - claims_count: 31 + claims_count: 30 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Groningen - source_url: https://gemeente.groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.937089+00:00' - xpath: /html/head/title - html_file: web/0326/gemeente.groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.416409+00:00' - claim_type: org_name claim_value: mobiliteit icon raw_value: mobiliteit icon @@ -629,6 +619,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.417176+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Ten Boer diff --git a/data/nde/enriched/entries/0327_Q81181368.yaml b/data/nde/enriched/entries/0327_Q81181368.yaml index eda5653f1f..97b7660f6f 100644 --- a/data/nde/enriched/entries/0327_Q81181368.yaml +++ b/data/nde/enriched/entries/0327_Q81181368.yaml @@ -513,7 +513,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.523986+00:00' source_archive: web/0327/veendam.nl - claims_count: 9 + claims_count: 7 claims: - claim_type: org_name claim_value: Onderwerpen @@ -545,26 +545,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:12.522797+00:00' - - claim_type: address - claim_value: '' - raw_value: ' ' - source_url: https://www.veendam.nl/ - retrieved_on: '2025-11-29T15:51:45.448208+00:00' - xpath: /html/head/script[23] - html_file: web/0327/veendam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:45:12.523239+00:00' - - claim_type: phone - claim_value: '' - raw_value: '' - source_url: https://www.veendam.nl/ - retrieved_on: '2025-11-29T15:51:45.448208+00:00' - xpath: /html/head/script[23] - html_file: web/0327/veendam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_telephone - extraction_timestamp: '2025-12-01T10:45:12.523248+00:00' - claim_type: email claim_value: info@veendam.nl raw_value: info@veendam.nl @@ -615,6 +595,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.523838+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Veendam diff --git a/data/nde/enriched/entries/0328_Q81181347.yaml b/data/nde/enriched/entries/0328_Q81181347.yaml index 6abcd46c31..4183541deb 100644 --- a/data/nde/enriched/entries/0328_Q81181347.yaml +++ b/data/nde/enriched/entries/0328_Q81181347.yaml @@ -462,18 +462,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.881895+00:00' source_archive: web/0328/westerwolde.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerwolde - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/head/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880682+00:00' - claim_type: org_name claim_value: Gemeente Westerwolde raw_value: Gemeente Westerwolde @@ -504,46 +494,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:12.880707+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880746+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880751+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880755+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880759+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -624,6 +574,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.881659+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Vlagtwedde diff --git a/data/nde/enriched/entries/0329_Q121292045.yaml b/data/nde/enriched/entries/0329_Q121292045.yaml index a489584c43..464461af31 100644 --- a/data/nde/enriched/entries/0329_Q121292045.yaml +++ b/data/nde/enriched/entries/0329_Q121292045.yaml @@ -501,18 +501,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.260776+00:00' source_archive: web/0329/westerwolde.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerwolde - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/head/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259662+00:00' - claim_type: org_name claim_value: Gemeente Westerwolde raw_value: Gemeente Westerwolde @@ -543,46 +533,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:13.259686+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259698+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259702+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259706+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259710+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -663,6 +613,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:13.260539+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Westerwolde diff --git a/data/nde/enriched/entries/0331_Q81181399.yaml b/data/nde/enriched/entries/0331_Q81181399.yaml index 38b60ac686..6d75baf713 100644 --- a/data/nde/enriched/entries/0331_Q81181399.yaml +++ b/data/nde/enriched/entries/0331_Q81181399.yaml @@ -466,18 +466,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.469757+00:00' source_archive: web/0331/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:49.784707+00:00' - xpath: /html/head/title - html_file: web/0331/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.468482+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -530,6 +520,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:13.469358+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Zuidhorn diff --git a/data/nde/enriched/entries/0332_Q23987486.yaml b/data/nde/enriched/entries/0332_Q23987486.yaml index 794dcdf805..8d94f0a0fc 100644 --- a/data/nde/enriched/entries/0332_Q23987486.yaml +++ b/data/nde/enriched/entries/0332_Q23987486.yaml @@ -1691,18 +1691,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:13.624102+00:00' source_archive: web/0332/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/head/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.622976+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -1723,36 +1713,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:13.623040+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.623052+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.623056+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.623061+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -1813,6 +1773,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:13.623956+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Midden-Groningen diff --git a/data/nde/enriched/entries/0333_Q114079325.yaml b/data/nde/enriched/entries/0333_Q114079325.yaml index bfd82ba9cc..1b3b02b654 100644 --- a/data/nde/enriched/entries/0333_Q114079325.yaml +++ b/data/nde/enriched/entries/0333_Q114079325.yaml @@ -494,18 +494,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.762568+00:00' source_archive: web/0333/historischarchief.midden-groningen.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historisch Archief Midden-Groningen - source_url: https://historischarchief.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.698335+00:00' - xpath: /html/head/title - html_file: web/0333/historischarchief.midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.761501+00:00' - claim_type: description_short claim_value: Het historisch documentatiecentrum voor de gemeente Midden-Groningen, ontstaan uit een fusie van de gemeenten Hoogezand, Slochteren en Muntendam. @@ -596,6 +586,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:13.762485+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Archief Midden-Groningen diff --git a/data/nde/enriched/entries/0334_Q2190733.yaml b/data/nde/enriched/entries/0334_Q2190733.yaml index edec2e4c73..b1581dc619 100644 --- a/data/nde/enriched/entries/0334_Q2190733.yaml +++ b/data/nde/enriched/entries/0334_Q2190733.yaml @@ -693,7 +693,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.846249+00:00' source_archive: web/0334/groningerarchieven.nl - claims_count: 10 + claims_count: 9 claims: - claim_type: org_name claim_value: Groninger Archieven @@ -717,16 +717,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:13.845534+00:00' - - claim_type: org_name - claim_value: groningerarchieven.nl - raw_value: groningerarchieven.nl - source_url: http://www.groningerarchieven.nl - retrieved_on: '2025-11-29T15:51:51.080844+00:00' - xpath: /html/head/meta[11] - html_file: web/0334/groningerarchieven.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:45:13.845871+00:00' - claim_type: email claim_value: info@groningerarchieven.nl raw_value: info@groningerarchieven.nl @@ -797,6 +787,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:13.846185+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Groninger Archieven diff --git a/data/nde/enriched/entries/0337_Q56460901.yaml b/data/nde/enriched/entries/0337_Q56460901.yaml index f3d54fa586..0d86bc9c09 100644 --- a/data/nde/enriched/entries/0337_Q56460901.yaml +++ b/data/nde/enriched/entries/0337_Q56460901.yaml @@ -669,18 +669,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:14.960213+00:00' source_archive: web/0337/speelgoedmuseumroden.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Speelgoedmuseum Roden - source_url: https://speelgoedmuseumroden.nl/ - retrieved_on: '2025-11-29T15:53:26.720773+00:00' - xpath: /html/head/title - html_file: web/0337/speelgoedmuseumroden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:14.959525+00:00' - claim_type: description_short claim_value: Speelgoedmuseum Roden Een wereld vol speelgoed. Ontdek de grootste speelgoedcollectie van Nederland. @@ -753,6 +743,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:14.960115+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Speelgoedmuseum Kinderwereld Roden diff --git a/data/nde/enriched/entries/0345_Q110981303.yaml b/data/nde/enriched/entries/0345_Q110981303.yaml index 2f2bdf1a03..091654e03b 100644 --- a/data/nde/enriched/entries/0345_Q110981303.yaml +++ b/data/nde/enriched/entries/0345_Q110981303.yaml @@ -510,18 +510,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:17.172563+00:00' source_archive: web/0345/domiestoen.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Domies Toen - source_url: https://www.domiestoen.nl/ - retrieved_on: '2025-11-29T15:53:28.219736+00:00' - xpath: /html/head/title - html_file: web/0345/domiestoen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:17.172184+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/domiestoen/ raw_value: https://www.facebook.com/domiestoen/ @@ -552,6 +542,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:17.172519+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Domies Toen diff --git a/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml b/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml index 7921f36ad7..a6f181fbc5 100644 --- a/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml +++ b/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml @@ -296,18 +296,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:17.568039+00:00' source_archive: web/0346/historiestedum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Historie Stedum - source_url: https://www.historiestedum.nl/ - retrieved_on: '2025-11-29T15:54:01.697816+00:00' - xpath: /html/head/title - html_file: web/0346/historiestedum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:17.567477+00:00' - claim_type: description_short claim_value: Van harte welkom op de website van Stichting Historie Stedum. Deze stichting heeft tot doelstelling om het verleden van Stedum levend te houden @@ -356,6 +346,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:17.567887+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Historie Stedum diff --git a/data/nde/enriched/entries/0348_Q15879552.yaml b/data/nde/enriched/entries/0348_Q15879552.yaml index d8bd063c0d..23ef43f8ec 100644 --- a/data/nde/enriched/entries/0348_Q15879552.yaml +++ b/data/nde/enriched/entries/0348_Q15879552.yaml @@ -835,7 +835,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:17.798230+00:00' source_archive: web/0348/muzeeaquarium.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: MuzeeAquarium Delfzijl (Groningen) @@ -893,16 +893,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:17.797859+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.muzeeaquarium.nl/ - retrieved_on: '2025-11-29T15:54:02.600828+00:00' - xpath: /html/body/div/div/div[3]/div/main/div/div/div[2]/div/div/div/section[2]/div[2]/div/section/div[2]/div/div[2]/div/div[1]/h1 - html_file: web/0348/muzeeaquarium.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:17.797903+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Muzeeaquarium Delfzijl diff --git a/data/nde/enriched/entries/0350_Q81181257.yaml b/data/nde/enriched/entries/0350_Q81181257.yaml index 29bb006fd4..64cb4b7ef4 100644 --- a/data/nde/enriched/entries/0350_Q81181257.yaml +++ b/data/nde/enriched/entries/0350_Q81181257.yaml @@ -431,18 +431,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:18.047922+00:00' source_archive: web/0350/noorderzijlvest.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Waterschap Noorderzijlvest - source_url: https://www.noorderzijlvest.nl/ - retrieved_on: '2025-11-29T15:54:03.327411+00:00' - xpath: /html/head/title - html_file: web/0350/noorderzijlvest.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:18.047276+00:00' - claim_type: description_short claim_value: Waterschap Noorderzijlvest zorgt voor veilige dijken en kades, schoon water, gezuiverd afvalwater en voldoende water in elk seizoen. Dit doen we in @@ -507,6 +497,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:18.047772+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waterschap Noorderzijlvest diff --git a/data/nde/enriched/entries/0351_Q81181371.yaml b/data/nde/enriched/entries/0351_Q81181371.yaml index 18fc0391fc..ec1b695ad2 100644 --- a/data/nde/enriched/entries/0351_Q81181371.yaml +++ b/data/nde/enriched/entries/0351_Q81181371.yaml @@ -416,18 +416,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:18.502391+00:00' source_archive: web/0351/hunzeenaas.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Waterschap Hunze en Aa's - source_url: https://www.hunzeenaas.nl/ - retrieved_on: '2025-11-29T15:55:53.549676+00:00' - xpath: /html/head/title - html_file: web/0351/hunzeenaas.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:18.501635+00:00' - claim_type: description_short claim_value: Het waterschap is er voor u Elke ochtend wordt u wakker zonder zorgen over wateroverlast. U ziet boeren hun gewassen telen. En op een warme zomerdag @@ -494,6 +484,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:18.502323+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waterschap Hunze en Aa's diff --git a/data/nde/enriched/entries/0356_Q9777.yaml b/data/nde/enriched/entries/0356_Q9777.yaml index 3d54753495..33b46f5b4a 100644 --- a/data/nde/enriched/entries/0356_Q9777.yaml +++ b/data/nde/enriched/entries/0356_Q9777.yaml @@ -2782,18 +2782,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:19.725401+00:00' source_archive: web/0356/venlo.nl - claims_count: 15 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Venlo - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/head/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724104+00:00' - claim_type: org_name claim_value: Gemeente Venlo raw_value: Gemeente Venlo @@ -2814,36 +2804,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:19.724213+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724218+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724223+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724227+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -2854,16 +2814,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:19.724231+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[5]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724235+00:00' - claim_type: description_short claim_value: Website van de gemeente Venlo. Op deze website vindt u onze producten en diensten, ons bestuur en andere informatie over de gemeente Venlo. @@ -2936,6 +2886,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:19.725223+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Venlo diff --git a/data/nde/enriched/entries/0364_Q59962272.yaml b/data/nde/enriched/entries/0364_Q59962272.yaml index e78a282265..7fc093d94d 100644 --- a/data/nde/enriched/entries/0364_Q59962272.yaml +++ b/data/nde/enriched/entries/0364_Q59962272.yaml @@ -850,18 +850,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:22.582467+00:00' source_archive: web/0364/regioarchiefsittard-geleen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - RegioArchief Sittard-Geleen - source_url: https://regioarchiefsittard-geleen.nl/ - retrieved_on: '2025-11-29T16:00:50.143099+00:00' - xpath: /html/head/title - html_file: web/0364/regioarchiefsittard-geleen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:22.581461+00:00' - claim_type: description_short claim_value: Het RegioArchief Sittard-Geleen geeft toegang tot de geschiedenis van Sittard-Geleen en de omliggende regio. @@ -934,6 +924,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:22.582333+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regioarchief Sittard-Geleen diff --git a/data/nde/enriched/entries/0366_Q121224886.yaml b/data/nde/enriched/entries/0366_Q121224886.yaml index b84b5ee3a3..48857226ac 100644 --- a/data/nde/enriched/entries/0366_Q121224886.yaml +++ b/data/nde/enriched/entries/0366_Q121224886.yaml @@ -330,18 +330,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:23.337087+00:00' source_archive: web/0366/regioarchiefsittard-geleen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - RegioArchief Sittard-Geleen - source_url: https://regioarchiefsittard-geleen.nl/ - retrieved_on: '2025-11-29T16:03:28.878801+00:00' - xpath: /html/head/title - html_file: web/0366/regioarchiefsittard-geleen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:23.336060+00:00' - claim_type: description_short claim_value: Het RegioArchief Sittard-Geleen geeft toegang tot de geschiedenis van Sittard-Geleen en de omliggende regio. @@ -414,6 +404,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:23.336951+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regioarchief Sittard-Geleen diff --git a/data/nde/enriched/entries/0369_Q135412874.yaml b/data/nde/enriched/entries/0369_Q135412874.yaml index 702e1f1c7f..cb533325bc 100644 --- a/data/nde/enriched/entries/0369_Q135412874.yaml +++ b/data/nde/enriched/entries/0369_Q135412874.yaml @@ -471,18 +471,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:23.716463+00:00' source_archive: web/0369/boerderijenstichtinglimburg.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Boerderijenstichting Limburg - source_url: https://www.boerderijenstichtinglimburg.nl/ - retrieved_on: '2025-11-29T16:00:24.689795+00:00' - xpath: /html/head/title - html_file: web/0369/boerderijenstichtinglimburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:23.715809+00:00' - claim_type: description_short claim_value: Boerderijenstichting Limburg. Hart voor de Limburgse boerderij! raw_value: Boerderijenstichting Limburg. Hart voor de Limburgse boerderij! @@ -513,6 +503,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: schema_org_description extraction_timestamp: '2025-12-01T10:45:23.716207+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Boerderijenstichting Limburg diff --git a/data/nde/enriched/entries/0370_Q13442809.yaml b/data/nde/enriched/entries/0370_Q13442809.yaml index 5bd70ff518..c037a27376 100644 --- a/data/nde/enriched/entries/0370_Q13442809.yaml +++ b/data/nde/enriched/entries/0370_Q13442809.yaml @@ -780,7 +780,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:24.177531+00:00' source_archive: web/0370/botatuin.nl - claims_count: 10 + claims_count: 4 claims: - claim_type: org_name claim_value: Botanische Tuin Kerkrade @@ -822,66 +822,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:24.176941+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[1]/div[2]/div[4]/div/div/div[1]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177064+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F&title=Adventsmarkt&summary=ADVENTSMARKT29+EN+30+NOVEMBER%26nbsp%3BOp+29+en+30+november+houdt+de+Botanische+tuin - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F&title=Adventsmarkt&summary=ADVENTSMARKT29+EN+30+NOVEMBER%26nbsp%3BOp+29+en+30+november+houdt+de+Botanische+tuin - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[1]/div[2]/div[4]/div/div/div[2]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177096+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[2]/div[2]/div[4]/div/div/div[1]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177128+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F&title=Beleef%20de%20helende%20klanken&summary=VRJDAG+5+DECEMBERBELEEF+DE+HELENDE+KLANKEN+IN+DE+BOTANISCHE+TUIN - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F&title=Beleef%20de%20helende%20klanken&summary=VRJDAG+5+DECEMBERBELEEF+DE+HELENDE+KLANKEN+IN+DE+BOTANISCHE+TUIN - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[2]/div[2]/div[4]/div/div/div[2]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177146+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[3]/div[2]/div[4]/div/div/div[1]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177176+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F&title=Beleef%20dickens&summary=BELEEF+DICKENS+IN+DE+BOTANISCHE+TUIN+KERKRADEZATERDAG+13+%26amp%3B+ZONDAG+14+DECEMBER - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F&title=Beleef%20dickens&summary=BELEEF+DICKENS+IN+DE+BOTANISCHE+TUIN+KERKRADEZATERDAG+13+%26amp%3B+ZONDAG+14+DECEMBER - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[3]/div[2]/div[4]/div/div/div[2]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177186+00:00' + removed_invalid_claims: + - removed_count: 6 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Botanische Tuin Kerkrade diff --git a/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml b/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml index c268ef5474..71ec34f624 100644 --- a/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml +++ b/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml @@ -377,18 +377,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:24.294037+00:00' source_archive: web/0371/lokaleregelgeving.overheid.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken | Lokale wet- en regelgeving - source_url: https://lokaleregelgeving.overheid.nl/CVDR642447 - retrieved_on: '2025-11-29T16:00:57.077018+00:00' - xpath: /html/head/title - html_file: web/0371/lokaleregelgeving.overheid.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:24.293533+00:00' - claim_type: org_name claim_value: Doorzoek 99540 regelingen van lokale overheden raw_value: Doorzoek 99540 regelingen van lokale overheden @@ -399,6 +389,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:24.293991+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Centrum voor Regionale Geschiedenis Rijckheyt diff --git a/data/nde/enriched/entries/0372_Q111080967.yaml b/data/nde/enriched/entries/0372_Q111080967.yaml index 5abb062204..4e74ce1322 100644 --- a/data/nde/enriched/entries/0372_Q111080967.yaml +++ b/data/nde/enriched/entries/0372_Q111080967.yaml @@ -734,18 +734,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:24.690844+00:00' source_archive: web/0372/limburgserfgoed.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Coöperatie Erfgoed Limburg U.A. - source_url: https://www.limburgserfgoed.nl/ - retrieved_on: '2025-11-29T16:03:42.046377+00:00' - xpath: /html/body/title - html_file: web/0372/limburgserfgoed.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:24.690325+00:00' - claim_type: email claim_value: info@limburgserfgoed.nl raw_value: info@limburgserfgoed.nl @@ -786,6 +776,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:24.690790+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Coöperatie Erfgoed Limburg diff --git a/data/nde/enriched/entries/0375_Q81181279.yaml b/data/nde/enriched/entries/0375_Q81181279.yaml index a03f77c3d1..55cd4212c3 100644 --- a/data/nde/enriched/entries/0375_Q81181279.yaml +++ b/data/nde/enriched/entries/0375_Q81181279.yaml @@ -326,18 +326,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:25.030406+00:00' source_archive: web/0375/rijckheyt.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.rijckheyt.nl - retrieved_on: '2025-11-29T16:02:04.195590+00:00' - xpath: /html/head/title - html_file: web/0375/rijckheyt.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.029600+00:00' - claim_type: description_short claim_value: Beschrijving raw_value: Beschrijving @@ -418,6 +408,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:25.030346+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heerlen diff --git a/data/nde/enriched/entries/0378_Q81181301.yaml b/data/nde/enriched/entries/0378_Q81181301.yaml index 6fdbedd212..b2dd3a5c0f 100644 --- a/data/nde/enriched/entries/0378_Q81181301.yaml +++ b/data/nde/enriched/entries/0378_Q81181301.yaml @@ -522,18 +522,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:25.540655+00:00' source_archive: web/0378/kerkrade.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Kerkrade - source_url: https://www.kerkrade.nl/gemeentearchief - retrieved_on: '2025-11-29T16:02:07.296256+00:00' - xpath: /html/head/title - html_file: web/0378/kerkrade.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.539853+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -604,6 +594,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:25.540440+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Kerkrade diff --git a/data/nde/enriched/entries/0379_Q111190984.yaml b/data/nde/enriched/entries/0379_Q111190984.yaml index 6510763d5b..4902d3ea91 100644 --- a/data/nde/enriched/entries/0379_Q111190984.yaml +++ b/data/nde/enriched/entries/0379_Q111190984.yaml @@ -413,18 +413,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:25.779865+00:00' source_archive: web/0379/landgraaf.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Landgraaf - source_url: https://www.landgraaf.nl/gemeentearchief - retrieved_on: '2025-11-29T16:02:08.739776+00:00' - xpath: /html/head/title - html_file: web/0379/landgraaf.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.779036+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -507,6 +497,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:25.779686+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeente Landgraaf diff --git a/data/nde/enriched/entries/0381_Q107341629.yaml b/data/nde/enriched/entries/0381_Q107341629.yaml index 71dfae7d36..6191a571b8 100644 --- a/data/nde/enriched/entries/0381_Q107341629.yaml +++ b/data/nde/enriched/entries/0381_Q107341629.yaml @@ -519,18 +519,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:25.991888+00:00' source_archive: web/0381/archief.venlo.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeentearchief Venlo - source_url: https://archief.venlo.nl/ - retrieved_on: '2025-11-29T16:02:10.014909+00:00' - xpath: /html/head/title - html_file: web/0381/archief.venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.991080+00:00' - claim_type: description_short claim_value: Duik in de collecties van het Gemeentearchief Venlo. raw_value: Duik in de collecties van het Gemeentearchief Venlo. @@ -581,6 +571,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:25.991827+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Venlo diff --git a/data/nde/enriched/entries/0384_Q127703473.yaml b/data/nde/enriched/entries/0384_Q127703473.yaml index e5a715b833..9a36dcc620 100644 --- a/data/nde/enriched/entries/0384_Q127703473.yaml +++ b/data/nde/enriched/entries/0384_Q127703473.yaml @@ -316,18 +316,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:26.630558+00:00' source_archive: web/0384/limburgserfgoednet.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://limburgserfgoednet.nl/gratheminbeeld - retrieved_on: '2025-11-29T16:03:31.846272+00:00' - xpath: /html/head/title - html_file: web/0384/limburgserfgoednet.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:26.630223+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -388,6 +378,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:26.630497+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Grathem in Beeld diff --git a/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml b/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml index e9ac3e86af..4de8e26cf9 100644 --- a/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml +++ b/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml @@ -292,7 +292,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:28.366971+00:00' source_archive: web/0389/heibloem.nu - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Heemkundevereniging Heibloem @@ -316,16 +316,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:45:28.366468+00:00' - - claim_type: org_name - claim_value: Heibloem.nu - raw_value: Heibloem.nu - source_url: https://heibloem.nu/vereniging/heemkundevereniging-heibloem - retrieved_on: '2025-11-29T16:04:07.432877+00:00' - xpath: /html[1]/head/meta[13] - html_file: web/0389/heibloem.nu/pages/vereniging_heemkundevereniging-heibloem.tmp.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:45:28.366518+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Heiboem raw_value: https://www.facebook.com/Heiboem @@ -336,6 +326,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:28.366783+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Heibloem diff --git a/data/nde/enriched/entries/0390_heemkunde_margraten.yaml b/data/nde/enriched/entries/0390_heemkunde_margraten.yaml index 9ec9e7325a..4c98c4e9e4 100644 --- a/data/nde/enriched/entries/0390_heemkunde_margraten.yaml +++ b/data/nde/enriched/entries/0390_heemkunde_margraten.yaml @@ -370,28 +370,16 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:28.733218+00:00' source_archive: web/0390/heemkunde-margraten.nl - claims_count: 2 - claims: - - claim_type: org_name - claim_value: heemkunde-margraten.nl - raw_value: heemkunde-margraten.nl - source_url: https://heemkunde-margraten.nl/ - retrieved_on: '2025-11-29T16:04:34.004258+00:00' - xpath: /html/head/title - html_file: web/0390/heemkunde-margraten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:28.732602+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://heemkunde-margraten.nl/ - retrieved_on: '2025-11-29T16:04:34.004258+00:00' - xpath: /html/body/main/article/header/div/h1 - html_file: web/0390/heemkunde-margraten.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:28.733133+00:00' + claims_count: 0 + claims: [] + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Margraten diff --git a/data/nde/enriched/entries/0391_Q111081387.yaml b/data/nde/enriched/entries/0391_Q111081387.yaml index 512ad7129b..b99fcfc323 100644 --- a/data/nde/enriched/entries/0391_Q111081387.yaml +++ b/data/nde/enriched/entries/0391_Q111081387.yaml @@ -438,18 +438,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:28.797408+00:00' source_archive: web/0391/limburgserfgoednet.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://limburgserfgoednet.nl/heemkundeverenigingnieuwstadt - retrieved_on: '2025-11-29T16:04:34.657100+00:00' - xpath: /html/head/title - html_file: web/0391/limburgserfgoednet.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:28.796852+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -510,6 +500,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:28.797219+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkunde Vereniging Nieuwstadt diff --git a/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml b/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml index 78e062b0b6..c02f57b81e 100644 --- a/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml +++ b/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml @@ -460,18 +460,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:29.651604+00:00' source_archive: web/0393/sankttolbertvaals.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Heemkundekring Sankt-Tolbert Vaals - source_url: https://sankttolbertvaals.nl/ - retrieved_on: '2025-11-29T16:06:30.561780+00:00' - xpath: /html/head/title - html_file: web/0393/sankttolbertvaals.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:29.650375+00:00' - claim_type: description_short claim_value: In dienst van verleden en heden Heemkundekring Sankt-Tolbert Vaals In dienst van verleden en heden Heemkundekring Sankt-Tolbert Vaals In dienst @@ -522,6 +512,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:29.651223+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Sankt Tolbert Vaals diff --git a/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml b/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml index 076768be40..5e405dc40e 100644 --- a/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml +++ b/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml @@ -478,18 +478,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:31.337738+00:00' source_archive: web/0399/heemkunderoggel.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Heemkunde Roggel - source_url: https://heemkunderoggel.nl/ - retrieved_on: '2025-11-29T16:10:34.722234+00:00' - xpath: /html/head/title - html_file: web/0399/heemkunderoggel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:31.336486+00:00' - claim_type: description_short claim_value: 'Geschied- en heemkundige info over Roggel en de directe omgeving. Welkom op de website Welkom op de website van Heemkundevereniging Roggel. Naast @@ -546,6 +536,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:31.337569+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Roggel diff --git a/data/nde/enriched/entries/0402_Q117843367.yaml b/data/nde/enriched/entries/0402_Q117843367.yaml index a34d418230..986710c80d 100644 --- a/data/nde/enriched/entries/0402_Q117843367.yaml +++ b/data/nde/enriched/entries/0402_Q117843367.yaml @@ -441,18 +441,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:31.975874+00:00' source_archive: web/0402/historischcentrumlimburg.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://historischcentrumlimburg.nl/ - retrieved_on: '2025-11-29T16:08:04.760725+00:00' - xpath: /html/head/title - html_file: web/0402/historischcentrumlimburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:31.974920+00:00' - claim_type: description_short claim_value: Beschrijving raw_value: Beschrijving @@ -533,6 +523,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:31.975798+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Centrum Limburg locatie Heerlen diff --git a/data/nde/enriched/entries/0403_Q21004453.yaml b/data/nde/enriched/entries/0403_Q21004453.yaml index 49f817bb1c..0ab38f0056 100644 --- a/data/nde/enriched/entries/0403_Q21004453.yaml +++ b/data/nde/enriched/entries/0403_Q21004453.yaml @@ -623,18 +623,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:32.071713+00:00' source_archive: web/0403/historischcentrumlimburg.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://historischcentrumlimburg.nl/ - retrieved_on: '2025-11-29T16:08:05.192621+00:00' - xpath: /html/head/title - html_file: web/0403/historischcentrumlimburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:32.071117+00:00' - claim_type: description_short claim_value: Beschrijving raw_value: Beschrijving @@ -715,6 +705,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:32.071656+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Centrum Limburg diff --git a/data/nde/enriched/entries/0414_Q111081369.yaml b/data/nde/enriched/entries/0414_Q111081369.yaml index 35d5316012..36350e7361 100644 --- a/data/nde/enriched/entries/0414_Q111081369.yaml +++ b/data/nde/enriched/entries/0414_Q111081369.yaml @@ -563,7 +563,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:34.006569+00:00' source_archive: web/0414/limburgsemolens.nl - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Limburgse Molens @@ -575,16 +575,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:34.005697+00:00' - - claim_type: address - claim_value: '' - raw_value: '' - source_url: https://www.limburgsemolens.nl/ - retrieved_on: '2025-11-29T16:10:31.990275+00:00' - xpath: /html/head/script[21] - html_file: web/0414/limburgsemolens.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_address - extraction_timestamp: '2025-12-01T10:45:34.006123+00:00' - claim_type: org_name claim_value: Het laatste nieuws raw_value: Het laatste nieuws @@ -595,6 +585,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:34.006438+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Molenstichting Limburg diff --git a/data/nde/enriched/entries/0417_Q2126143.yaml b/data/nde/enriched/entries/0417_Q2126143.yaml index 49882dc554..f0625dcbd6 100644 --- a/data/nde/enriched/entries/0417_Q2126143.yaml +++ b/data/nde/enriched/entries/0417_Q2126143.yaml @@ -704,7 +704,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:34.952937+00:00' source_archive: web/0417/delocht.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: De Locht @@ -716,16 +716,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:34.951641+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://delocht.nl/ - retrieved_on: '2025-11-29T16:13:45.900342+00:00' - xpath: /html/body/div[1]/div/div/div[1]/header/div[1]/div[1]/div/div[3]/div[2]/div[1]/div[1]/button/span[2]/svg/title - html_file: web/0417/delocht.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:34.951671+00:00' - claim_type: email claim_value: '%20info@delocht.nl' raw_value: '%20info@delocht.nl' @@ -776,6 +766,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:34.952383+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Locht diff --git a/data/nde/enriched/entries/0420_Q110053532.yaml b/data/nde/enriched/entries/0420_Q110053532.yaml index 49e3761bde..2b60435541 100644 --- a/data/nde/enriched/entries/0420_Q110053532.yaml +++ b/data/nde/enriched/entries/0420_Q110053532.yaml @@ -524,18 +524,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:36.379205+00:00' source_archive: web/0420/museumvandevrouw.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum van de Vrouw - source_url: https://www.museumvandevrouw.nl/ - retrieved_on: '2025-11-29T16:12:16.581474+00:00' - xpath: /html/head/title - html_file: web/0420/museumvandevrouw.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:36.378145+00:00' - claim_type: description_short claim_value: Het museum vertelt verhalen die het vrouwenleven kenmerken en ons op bijzondere wijze verbinden. Hedendaagse thema’s, verbonden met oude tradities, @@ -648,6 +638,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:36.379044+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum van de Vrouw diff --git a/data/nde/enriched/entries/0427_Q2202460.yaml b/data/nde/enriched/entries/0427_Q2202460.yaml index b12f18b389..2d411ed975 100644 --- a/data/nde/enriched/entries/0427_Q2202460.yaml +++ b/data/nde/enriched/entries/0427_Q2202460.yaml @@ -1031,18 +1031,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:38.846544+00:00' source_archive: web/0427/romeinsekatakomben.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Romeinse Katakomben - source_url: https://www.romeinsekatakomben.nl/ - retrieved_on: '2025-11-29T16:14:11.466829+00:00' - xpath: /html/head/title - html_file: web/0427/romeinsekatakomben.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:38.845819+00:00' - claim_type: description_short claim_value: Museum Romeinse Katakomben is een onvergetelijke belevenis. Dwalend door de onderaardse gangen neemt de gids je mee naar het Rome uit het vroege @@ -1107,6 +1097,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:38.846375+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Romeinse Katakomben diff --git a/data/nde/enriched/entries/0430_unknown.yaml b/data/nde/enriched/entries/0430_unknown.yaml index be34b20fc5..2b798c5ebb 100644 --- a/data/nde/enriched/entries/0430_unknown.yaml +++ b/data/nde/enriched/entries/0430_unknown.yaml @@ -175,18 +175,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:39.109284+00:00' source_archive: web/0430/limburgserfgoednet.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://limburgserfgoednet.nl/stichtingbeheerkunstschattensintlambertus - retrieved_on: '2025-11-29T16:14:08.345397+00:00' - xpath: /html/head/title - html_file: web/0430/limburgserfgoednet.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:39.108699+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -247,6 +237,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:39.109049+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Beheer Kunstschatten St. Lambertus diff --git a/data/nde/enriched/entries/0432_Q56460988.yaml b/data/nde/enriched/entries/0432_Q56460988.yaml index 457700205b..e68f479fc8 100644 --- a/data/nde/enriched/entries/0432_Q56460988.yaml +++ b/data/nde/enriched/entries/0432_Q56460988.yaml @@ -548,7 +548,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:27.463179+00:00' source_archive: web/0432/filmhuiszicht.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Welkom bij Filmhuis ZICHT! @@ -640,36 +640,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:27.462863+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - source_url: https://www.filmhuiszicht.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[1] - html_file: web/0432/filmhuiszicht.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:27.462894+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - source_url: https://www.filmhuiszicht.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[2] - html_file: web/0432/filmhuiszicht.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:27.462901+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?original_referer=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - raw_value: https://twitter.com/intent/tweet?original_referer=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - source_url: https://www.filmhuiszicht.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[3] - html_file: web/0432/filmhuiszicht.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:27.462906+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content youtube_enrichment: source_url: https://www.filmhuiszicht.nl/media/oembed?url=https%3A//www.youtube.com/watch%3Fv%3DY9ve22tKDzY&max_width=0&max_height=0&hash=9q__k-xwLv0ehz2VJW5bSohNdA-S0XcYVtMAHf9SMB8 fetch_timestamp: '2025-12-01T17:34:22.710861+00:00' diff --git a/data/nde/enriched/entries/0441_Q892727.yaml b/data/nde/enriched/entries/0441_Q892727.yaml index 634303f788..6cdfda97a9 100644 --- a/data/nde/enriched/entries/0441_Q892727.yaml +++ b/data/nde/enriched/entries/0441_Q892727.yaml @@ -1559,7 +1559,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:42.790289+00:00' source_archive: web/0441/bonnefanten.nl - claims_count: 6 + claims_count: 4 claims: - claim_type: org_name claim_value: Bonnefanten — The art museum of Limburg @@ -1593,26 +1593,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:42.789882+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=undefined - raw_value: https://www.facebook.com/sharer/sharer.php?u=undefined - source_url: http://www.bonnefanten.nl/en/ - retrieved_on: '2025-11-29T16:17:05.490417+00:00' - xpath: /html/body/div[2]/div[3]/main/div/div[2]/div[2]/ul/li[1]/a - html_file: web/0441/bonnefanten.nl/mirror/www.bonnefanten.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:42.790184+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Bonnefanten&url=undefined - raw_value: https://twitter.com/intent/tweet?text=Bonnefanten&url=undefined - source_url: http://www.bonnefanten.nl/en/ - retrieved_on: '2025-11-29T16:17:05.490417+00:00' - xpath: /html/body/div[2]/div[3]/main/div/div[2]/div[2]/ul/li[2]/a - html_file: web/0441/bonnefanten.nl/mirror/www.bonnefanten.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:42.790192+00:00' - claim_type: org_name claim_value: Bonnefanten raw_value: Bonnefanten @@ -1623,6 +1603,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:42.790231+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bonnefanten museum diff --git a/data/nde/enriched/entries/0442_unknown.yaml b/data/nde/enriched/entries/0442_unknown.yaml index 6253754fcd..d0c1dbcef0 100644 --- a/data/nde/enriched/entries/0442_unknown.yaml +++ b/data/nde/enriched/entries/0442_unknown.yaml @@ -206,18 +206,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:43.244622+00:00' source_archive: web/0442/oudamerica.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Werkgroep Oud-America - source_url: https://oudamerica.nl/ - retrieved_on: '2025-11-29T16:18:01.886475+00:00' - xpath: /html/head/title - html_file: web/0442/oudamerica.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:43.244132+00:00' - claim_type: description_short claim_value: 'Stichting werkgroep Oud-America Opgericht in 1979 Tijdens een vergadering van de dorpsraad America in december 1979 werd door enkele enthousiaste personen @@ -278,6 +268,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:45:43.244487+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Werkgroep Oud-America diff --git a/data/nde/enriched/entries/0453_Q59962312.yaml b/data/nde/enriched/entries/0453_Q59962312.yaml index 80d2386d4b..200a1af3e1 100644 --- a/data/nde/enriched/entries/0453_Q59962312.yaml +++ b/data/nde/enriched/entries/0453_Q59962312.yaml @@ -976,18 +976,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:45.396322+00:00' source_archive: web/0453/bibliotheekmb.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheekmb.nl - retrieved_on: '2025-11-29T16:21:21.690535+00:00' - xpath: /html/head/title - html_file: web/0453/bibliotheekmb.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:45.395721+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/bibliotheeklochal raw_value: https://www.facebook.com/bibliotheeklochal @@ -1028,6 +1018,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:45.396180+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Midden-Brabant diff --git a/data/nde/enriched/entries/0457_Q110907483.yaml b/data/nde/enriched/entries/0457_Q110907483.yaml index a8d3c23208..41314cfbe5 100644 --- a/data/nde/enriched/entries/0457_Q110907483.yaml +++ b/data/nde/enriched/entries/0457_Q110907483.yaml @@ -342,18 +342,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:45.709494+00:00' source_archive: web/0457/nyenaenwasvannassau.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.nyenaenwasvannassau.nl/site/ - retrieved_on: '2025-11-29T16:20:20.575005+00:00' - xpath: /html/head/title - html_file: web/0457/nyenaenwasvannassau.nl/mirror/www.nyenaenwasvannassau.nl/site/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:45.709304+00:00' - claim_type: description_short claim_value: cultuur-historische vereniging Nyen aenwas van Nassau Dinteloord raw_value: cultuur-historische vereniging Nyen aenwas van Nassau Dinteloord @@ -364,6 +354,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:45.709350+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cultuur Historische Vereniging Nyen Aenwas van Nassau diff --git a/data/nde/enriched/entries/0461_Q1278103.yaml b/data/nde/enriched/entries/0461_Q1278103.yaml index bce6a50973..b972f189b7 100644 --- a/data/nde/enriched/entries/0461_Q1278103.yaml +++ b/data/nde/enriched/entries/0461_Q1278103.yaml @@ -709,18 +709,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:46.535506+00:00' source_archive: web/0461/eindhovenmuseum.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Eindhoven Museum - source_url: https://eindhovenmuseum.nl/ - retrieved_on: '2025-11-29T16:20:34.350093+00:00' - xpath: /html/head/title - html_file: web/0461/eindhovenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:46.534621+00:00' - claim_type: description_short claim_value: Eindhoven Museum beheert 23.000 objecten uit de geschiedenis van Eindhoven en omgeving. Een archief dat een unieke kijk geeft in de cultuurhistorische @@ -795,6 +785,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:46.535255+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Eindhoven Museum diff --git a/data/nde/enriched/entries/0468_unknown.yaml b/data/nde/enriched/entries/0468_unknown.yaml index 696a4778b9..21afa74985 100644 --- a/data/nde/enriched/entries/0468_unknown.yaml +++ b/data/nde/enriched/entries/0468_unknown.yaml @@ -292,19 +292,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:47.655571+00:00' source_archive: web/0468/proxy.archieven.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Archieven.nl - raw_value: Archieven.nl - Gemeentearchief Bergen op Zoom (per 1 juli 2016 samengevoegd - bij h... - source_url: https://proxy.archieven.nl/0/4A9E2B87DB3F1949E053CA00A8C054B4 - retrieved_on: '2025-11-29T16:20:37.709505+00:00' - xpath: /html/head/title - html_file: web/0468/proxy.archieven.nl/pages/0_4A9E2B87DB3F1949E053CA00A8C054B4.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.654661+00:00' - claim_type: org_name claim_value: organisatie_link-svg raw_value: organisatie_link-svg @@ -349,6 +338,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:45:47.655299+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Bergen op Zoom diff --git a/data/nde/enriched/entries/0469_Q2173323.yaml b/data/nde/enriched/entries/0469_Q2173323.yaml index 8eb52f269f..174da4ca95 100644 --- a/data/nde/enriched/entries/0469_Q2173323.yaml +++ b/data/nde/enriched/entries/0469_Q2173323.yaml @@ -461,18 +461,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:47.730847+00:00' source_archive: web/0469/gemeentearchiefgemert-bakel.nl - claims_count: 14 + claims_count: 13 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeentearchief Gemert-Bakel - source_url: http://www.gemeentearchiefgemert-bakel.nl/ - retrieved_on: '2025-11-29T23:28:18.855592+00:00' - xpath: /html/head/title - html_file: web/0469/gemeentearchiefgemert-bakel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.730297+00:00' - claim_type: org_name claim_value: ander icon raw_value: ander icon @@ -603,6 +593,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:47.730728+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Gemert-Bakel diff --git a/data/nde/enriched/entries/0471_unknown.yaml b/data/nde/enriched/entries/0471_unknown.yaml index f630bb516c..1069436fc2 100644 --- a/data/nde/enriched/entries/0471_unknown.yaml +++ b/data/nde/enriched/entries/0471_unknown.yaml @@ -296,18 +296,16 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:47.822412+00:00' source_archive: web/0471/gemeentearchiefroosendaal.nl - claims_count: 1 - claims: - - claim_type: org_name - claim_value: gemeentearchiefroosendaal.nl - raw_value: gemeentearchiefroosendaal.nl - source_url: https://gemeentearchiefroosendaal.nl/ - retrieved_on: '2025-11-29T16:20:40.196712+00:00' - xpath: /html/head/title - html_file: web/0471/gemeentearchiefroosendaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.822235+00:00' + claims_count: 0 + claims: [] + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: gemeentearchiefroosendaal.nl diff --git a/data/nde/enriched/entries/0472_Q1969635.yaml b/data/nde/enriched/entries/0472_Q1969635.yaml index 575c678c8c..1087950ae7 100644 --- a/data/nde/enriched/entries/0472_Q1969635.yaml +++ b/data/nde/enriched/entries/0472_Q1969635.yaml @@ -684,18 +684,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:47.916622+00:00' source_archive: web/0472/geniemuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Geniemuseum - source_url: http://www.geniemuseum.nl - retrieved_on: '2025-11-29T16:20:38.449546+00:00' - xpath: /html/head/title - html_file: web/0472/geniemuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.916070+00:00' - claim_type: description_short claim_value: Geniemuseum te Vught. Stichting Historische Genieverzameling raw_value: Geniemuseum te Vught. Stichting Historische Genieverzameling @@ -736,6 +726,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:47.916532+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Geniemuseum diff --git a/data/nde/enriched/entries/0474_Q110907346.yaml b/data/nde/enriched/entries/0474_Q110907346.yaml index b5c2148577..c20ac642c2 100644 --- a/data/nde/enriched/entries/0474_Q110907346.yaml +++ b/data/nde/enriched/entries/0474_Q110907346.yaml @@ -483,7 +483,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:48.400575+00:00' source_archive: web/0474/schoorudenhout.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: '''t Schoor » Erfgoedcentrum ’t Schoor Udenhout' @@ -495,16 +495,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:48.400196+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: http://www.schoorudenhout.nl/ - retrieved_on: '2025-11-29T16:21:20.488870+00:00' - xpath: /html/body/main/a/h1 - html_file: web/0474/schoorudenhout.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:48.400527+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: '''t Schoor » Erfgoedcentrum ’t Schoor Udenhout' diff --git a/data/nde/enriched/entries/0476_Q110907502.yaml b/data/nde/enriched/entries/0476_Q110907502.yaml index a423b96115..7a89f02bb2 100644 --- a/data/nde/enriched/entries/0476_Q110907502.yaml +++ b/data/nde/enriched/entries/0476_Q110907502.yaml @@ -347,18 +347,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:48.575884+00:00' source_archive: web/0476/heemkundevereniging.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.heemkundevereniging.nl/ - retrieved_on: '2025-11-29T16:20:42.059405+00:00' - xpath: /html/head/title - html_file: web/0476/heemkundevereniging.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:48.575295+00:00' - claim_type: description_short claim_value: Heemkundevereniging De Hooge Dorpen raw_value: Heemkundevereniging De Hooge Dorpen @@ -379,6 +369,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:48.575819+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkunde vereniging De Hooge Dorpen diff --git a/data/nde/enriched/entries/0481_Q110907480.yaml b/data/nde/enriched/entries/0481_Q110907480.yaml index 78d4b6f7c5..fcf50ff53c 100644 --- a/data/nde/enriched/entries/0481_Q110907480.yaml +++ b/data/nde/enriched/entries/0481_Q110907480.yaml @@ -344,18 +344,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:49.186768+00:00' source_archive: web/0481/heemkunde-megen-haren-macharen.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: home - raw_value: home - Heemkunde Megen Haren Macharen - source_url: http://www.heemkunde-megen-haren-macharen.nl/ - retrieved_on: '2025-11-29T16:21:23.623722+00:00' - xpath: /html/head/title - html_file: web/0481/heemkunde-megen-haren-macharen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:49.185928+00:00' - claim_type: description_short claim_value: Welkom bij Heemkundekring Megen, Haren en Macharen Onze heemkundekring is toegewijd aan het verzamelen, bewaren en delen van de rijke geschiedenis @@ -394,6 +384,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:45:49.186482+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkunde Megen Haren en Macharen diff --git a/data/nde/enriched/entries/0484_Q110907488.yaml b/data/nde/enriched/entries/0484_Q110907488.yaml index 5e16232f2d..8eb6371a7a 100644 --- a/data/nde/enriched/entries/0484_Q110907488.yaml +++ b/data/nde/enriched/entries/0484_Q110907488.yaml @@ -273,18 +273,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:50.052478+00:00' source_archive: web/0484/dekleinemeijerij.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Heemkundekring - source_url: http://www.dekleinemeijerij.nl - retrieved_on: '2025-11-29T16:21:41.747522+00:00' - xpath: /html/head/title - html_file: web/0484/dekleinemeijerij.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:50.051646+00:00' - claim_type: description_short claim_value: 'Heemkundekring ''De Kleine Meijerij'' heeft als werkgebied Berkel-Enschot, Biezenmortel, Esch, Haaren, Helvoirt, Heukelom, Moergestel, Oisterwijk en Udenhout @@ -309,6 +299,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:50.051772+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring "De Kleine Meijerij" diff --git a/data/nde/enriched/entries/0486_unknown.yaml b/data/nde/enriched/entries/0486_unknown.yaml index dd649206f9..762a927b75 100644 --- a/data/nde/enriched/entries/0486_unknown.yaml +++ b/data/nde/enriched/entries/0486_unknown.yaml @@ -160,18 +160,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:50.586758+00:00' source_archive: web/0486/heemkundelangenboom.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Heemkundelangenboom.nl - raw_value: Heemkundelangenboom.nl - source_url: https://www.heemkundelangenboom.nl - retrieved_on: '2025-11-29T16:21:43.012971+00:00' - xpath: /html/head/title - html_file: web/0486/heemkundelangenboom.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:50.585074+00:00' - claim_type: phone claim_value: '+31610705225' raw_value: '+31610705225' @@ -202,6 +192,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:50.586366+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring "Felix Walter" diff --git a/data/nde/enriched/entries/0496_Q110907501.yaml b/data/nde/enriched/entries/0496_Q110907501.yaml index eb8219184b..64cf0de08e 100644 --- a/data/nde/enriched/entries/0496_Q110907501.yaml +++ b/data/nde/enriched/entries/0496_Q110907501.yaml @@ -387,18 +387,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:54.205572+00:00' source_archive: web/0496/deheerlijkheidoirschot.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - de Heerlijkheid - source_url: http://www.deheerlijkheidoirschot.nl/ - retrieved_on: '2025-11-29T16:29:34.743382+00:00' - xpath: /html/head/title - html_file: web/0496/deheerlijkheidoirschot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:54.204866+00:00' - claim_type: org_name claim_value: de Heerlijkheid raw_value: de Heerlijkheid @@ -419,6 +409,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:54.205397+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring De Heerlijkheid Oirschot diff --git a/data/nde/enriched/entries/0498_Q110907495.yaml b/data/nde/enriched/entries/0498_Q110907495.yaml index 3dc2fcb80c..df4da18347 100644 --- a/data/nde/enriched/entries/0498_Q110907495.yaml +++ b/data/nde/enriched/entries/0498_Q110907495.yaml @@ -392,18 +392,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:54.783363+00:00' source_archive: web/0498/heemkundekringgemert.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Heemkundekring Gemert - source_url: http://www.heemkundekringgemert.nl/ - retrieved_on: '2025-11-29T16:28:18.189967+00:00' - xpath: /html/head/title - html_file: web/0498/heemkundekringgemert.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:54.782762+00:00' - claim_type: org_name claim_value: Heemkundekring Gemert raw_value: Heemkundekring Gemert @@ -454,6 +444,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:54.783302+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring De Kommanderij diff --git a/data/nde/enriched/entries/0502_Q110907460.yaml b/data/nde/enriched/entries/0502_Q110907460.yaml index 1a53aa7508..f432685a2c 100644 --- a/data/nde/enriched/entries/0502_Q110907460.yaml +++ b/data/nde/enriched/entries/0502_Q110907460.yaml @@ -260,18 +260,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:55.686199+00:00' source_archive: web/0502/denbeerschenaard.nl - claims_count: 6 + claims_count: 1 claims: - - claim_type: org_name - claim_value: denbeerschenaard.nl - raw_value: denbeerschenaard.nl - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/head/title - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:55.684590+00:00' - claim_type: email claim_value: s.en.a.vanhelvoort@gmail.com raw_value: s.en.a.vanhelvoort@gmail.com @@ -282,46 +272,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:45:55.685305+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u= - raw_value: https://www.facebook.com/sharer/sharer.php?u= - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[2]/div/div/div/div/div[1]/div[1]/div[1]/div/div/div/div/ul/li[1]/a - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:55.685755+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url= - raw_value: https://www.linkedin.com/shareArticle?mini=true&url= - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[2]/div/div/div/div/div[1]/div[1]/div[1]/div/div/div/div/ul/li[2]/a - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:55.685775+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url= - raw_value: https://twitter.com/share?url= - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[2]/div/div/div/div/div[1]/div[1]/div[1]/div/div/div/div/ul/li[3]/a - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:55.685787+00:00' - - claim_type: org_name - claim_value: 'Nieuw:' - raw_value: 'Nieuw:' - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[3]/div/div/div/div/div[2]/div/div/div/div/div[1]/div/div[2]/div/div/div/h1[1] - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:55.685977+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Den Beerschen Aard diff --git a/data/nde/enriched/entries/0515_Q110907441.yaml b/data/nde/enriched/entries/0515_Q110907441.yaml index db684bc0ac..3a5928635d 100644 --- a/data/nde/enriched/entries/0515_Q110907441.yaml +++ b/data/nde/enriched/entries/0515_Q110907441.yaml @@ -442,18 +442,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:59.386765+00:00' source_archive: web/0515/heemkundebladel.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: heemkundebladel.nl - raw_value: heemkundebladel.nl - source_url: http://www.heemkundebladel.nl - retrieved_on: '2025-11-29T16:34:45.284626+00:00' - xpath: /html/head/title - html_file: web/0515/heemkundebladel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:59.386417+00:00' - claim_type: description_short claim_value: Dit domein kan te koop zijn! raw_value: Dit domein kan te koop zijn! @@ -464,6 +454,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:59.386619+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Pladella Villa Heemkunde Bladel diff --git a/data/nde/enriched/entries/0521_Q110907442.yaml b/data/nde/enriched/entries/0521_Q110907442.yaml index 17ebb4cb7d..344ca4b3ad 100644 --- a/data/nde/enriched/entries/0521_Q110907442.yaml +++ b/data/nde/enriched/entries/0521_Q110907442.yaml @@ -439,18 +439,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:00.593017+00:00' source_archive: web/0521/heemkundekringzeeland.nl - claims_count: 10 + claims_count: 7 claims: - - claim_type: org_name - claim_value: heemkundekringzeeland.nl - raw_value: heemkundekringzeeland.nl – Erfgoed en heemkunde uit dorp Zeeland (NBr) - source_url: http://www.heemkundekringzeeland.nl/ - retrieved_on: '2025-11-29T16:36:02.304955+00:00' - xpath: /html/head/title - html_file: web/0521/heemkundekringzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:00.591772+00:00' - claim_type: org_name claim_value: Klik om het zoekinvoerveld te openen raw_value: Klik om het zoekinvoerveld te openen @@ -501,26 +491,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:00.592664+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://heemkundekringzeeland.nl/&t=Homepage - raw_value: https://www.facebook.com/sharer.php?u=https://heemkundekringzeeland.nl/&t=Homepage - source_url: http://www.heemkundekringzeeland.nl/ - retrieved_on: '2025-11-29T16:36:02.304955+00:00' - xpath: /html/body/div[1]/div/div[8]/div/div/div/div/div[2]/div/div/ul/li[1]/a - html_file: web/0521/heemkundekringzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:00.592687+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?text=Homepage&url=https://heemkundekringzeeland.nl/ - raw_value: https://twitter.com/share?text=Homepage&url=https://heemkundekringzeeland.nl/ - source_url: http://www.heemkundekringzeeland.nl/ - retrieved_on: '2025-11-29T16:36:02.304955+00:00' - xpath: /html/body/div[1]/div/div[8]/div/div/div/div/div[2]/div/div/ul/li[2]/a - html_file: web/0521/heemkundekringzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:00.592692+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Heemkundekring-Zeeland-1757812647846979 raw_value: https://www.facebook.com/Heemkundekring-Zeeland-1757812647846979 @@ -541,6 +511,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:00.592799+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Zeeland diff --git a/data/nde/enriched/entries/0522_Q110907510.yaml b/data/nde/enriched/entries/0522_Q110907510.yaml index d514964d13..4523c97fbe 100644 --- a/data/nde/enriched/entries/0522_Q110907510.yaml +++ b/data/nde/enriched/entries/0522_Q110907510.yaml @@ -556,7 +556,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:01.083646+00:00' source_archive: web/0522/heemkundedendungen.nl - claims_count: 11 + claims_count: 8 claims: - claim_type: org_name claim_value: Welkom bij Heemkundevereniging "Op die Dunghen" @@ -569,36 +569,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:01.082289+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: http://www.heemkundedendungen.nl/ - retrieved_on: '2025-11-29T16:38:07.166640+00:00' - xpath: /html/body/div[1]/div/div[2]/div/div[1]/a[1]/svg/title - html_file: web/0522/heemkundedendungen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:01.082302+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: http://www.heemkundedendungen.nl/ - retrieved_on: '2025-11-29T16:38:07.166640+00:00' - xpath: /html/body/div[1]/div/div[2]/div/div[1]/a[2]/svg/title - html_file: web/0522/heemkundedendungen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:01.082307+00:00' - - claim_type: org_name - claim_value: Search - raw_value: Search - source_url: http://www.heemkundedendungen.nl/ - retrieved_on: '2025-11-29T16:38:07.166640+00:00' - xpath: /html/body/div[1]/div/div[4]/div/div[1]/section[1]/search/form/div/span/svg/title - html_file: web/0522/heemkundedendungen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:01.082313+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -669,6 +639,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:01.083511+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Op die Dunghen diff --git a/data/nde/enriched/entries/0526_Q110907473.yaml b/data/nde/enriched/entries/0526_Q110907473.yaml index 0c2ae6113e..ec67412051 100644 --- a/data/nde/enriched/entries/0526_Q110907473.yaml +++ b/data/nde/enriched/entries/0526_Q110907473.yaml @@ -300,18 +300,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:31.309953+00:00' source_archive: web/0526/heemkundegeffen.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: heemkundegeffen.nl - raw_value: heemkundegeffen.nl – Heemkunde Werkgroep Geffen - source_url: http://www.heemkundegeffen.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0526/heemkundegeffen.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:31.309435+00:00' - claim_type: email claim_value: heemkundegroep@gmail.com raw_value: heemkundegroep@gmail.com @@ -322,3 +312,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:25:31.309767+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0527_Q110907499.yaml b/data/nde/enriched/entries/0527_Q110907499.yaml index 11efd6a917..49b1b67d5f 100644 --- a/data/nde/enriched/entries/0527_Q110907499.yaml +++ b/data/nde/enriched/entries/0527_Q110907499.yaml @@ -303,18 +303,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:02.013149+00:00' source_archive: web/0527/nuwelant.nl - claims_count: 7 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nuwelant - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/head/title - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:02.012369+00:00' - claim_type: description_short claim_value: Nuwelant is de heemkundewerkgroep van de parochie Nuland inclusief Heeseind. @@ -337,36 +327,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:02.012631+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.nuwelant.nl&t=Nuwelant - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.nuwelant.nl&t=Nuwelant - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/body/div[3]/ul/li[1]/a - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:02.012942+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?text=Nuwelant&url=https%3A%2F%2Fwww.nuwelant.nl - raw_value: https://twitter.com/share?text=Nuwelant&url=https%3A%2F%2Fwww.nuwelant.nl - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/body/div[3]/ul/li[2]/a - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:02.012946+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.nuwelant.nl&title=Nuwelant - raw_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.nuwelant.nl&title=Nuwelant - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/body/div[3]/ul/li[5]/a - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:02.012952+00:00' - claim_type: org_name claim_value: Bezoek ons vernieuwd archief raw_value: Bezoek ons vernieuwd archief @@ -377,6 +337,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:02.013004+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundewerkgroep Nuwelant diff --git a/data/nde/enriched/entries/0530_Q110907466.yaml b/data/nde/enriched/entries/0530_Q110907466.yaml index dd3b58815f..480f53a427 100644 --- a/data/nde/enriched/entries/0530_Q110907466.yaml +++ b/data/nde/enriched/entries/0530_Q110907466.yaml @@ -431,7 +431,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:02.913053+00:00' source_archive: web/0530/oudevrijheid.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Home » Heemkundekring De Oude Vrijheid Sint-Oedenrode @@ -491,16 +491,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:02.912910+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.oudevrijheid.nl - retrieved_on: '2025-11-29T16:38:36.818019+00:00' - xpath: /html/body/div/div[3]/div/div/div/div/h1 - html_file: web/0530/oudevrijheid.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:02.912978+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundige Kring De Oude Vrijheid diff --git a/data/nde/enriched/entries/0534_Q110908866.yaml b/data/nde/enriched/entries/0534_Q110908866.yaml index c6ed60eb84..551f1f90db 100644 --- a/data/nde/enriched/entries/0534_Q110908866.yaml +++ b/data/nde/enriched/entries/0534_Q110908866.yaml @@ -727,7 +727,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:04.374832+00:00' source_archive: web/0534/kasteelheeswijk.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Kasteel Heeswijk @@ -791,16 +791,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:04.374449+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.kasteelheeswijk.nl/ - retrieved_on: '2025-11-29T16:39:00.497134+00:00' - xpath: /html/body/main/div[1]/h1 - html_file: web/0534/kasteelheeswijk.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:04.374511+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Kasteel Heeswijk diff --git a/data/nde/enriched/entries/0536_Q115131080.yaml b/data/nde/enriched/entries/0536_Q115131080.yaml index f8227730b6..28ba72b09b 100644 --- a/data/nde/enriched/entries/0536_Q115131080.yaml +++ b/data/nde/enriched/entries/0536_Q115131080.yaml @@ -684,18 +684,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:04.684203+00:00' source_archive: web/0536/maczekmemorialbreda.nl - claims_count: 14 + claims_count: 13 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Maczek Memorial - source_url: https://www.maczekmemorialbreda.nl/ - retrieved_on: '2025-11-29T16:39:03.407285+00:00' - xpath: /html/head/title - html_file: web/0536/maczekmemorialbreda.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:04.683005+00:00' - claim_type: description_short claim_value: Beleef het verhaal van Generaal Maczek en zijn soldaten van de 1e Poolse Pantserdivisie tijdens de Tweede Wereldoorlog. @@ -830,6 +820,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:04.684011+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Maczek Memorial Breda diff --git a/data/nde/enriched/entries/0537_Q2423105.yaml b/data/nde/enriched/entries/0537_Q2423105.yaml index c4bd84e206..82325e3e99 100644 --- a/data/nde/enriched/entries/0537_Q2423105.yaml +++ b/data/nde/enriched/entries/0537_Q2423105.yaml @@ -637,7 +637,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:05.137067+00:00' source_archive: web/0537/museumboerderij.nl - claims_count: 5 + claims_count: 4 claims: - claim_type: org_name claim_value: De Meierijsche Museumboerderij @@ -679,16 +679,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:05.136696+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.museumboerderij.nl/ - retrieved_on: '2025-11-29T16:40:26.326802+00:00' - xpath: /html/body/div[1]/div/div/div/div[1]/div/div[2]/div/div/div/div/h1 - html_file: web/0537/museumboerderij.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:05.136786+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Meierijsche Museumboerderij diff --git a/data/nde/enriched/entries/0539_Q1842735.yaml b/data/nde/enriched/entries/0539_Q1842735.yaml index 2c55fab1ed..a30fb4699a 100644 --- a/data/nde/enriched/entries/0539_Q1842735.yaml +++ b/data/nde/enriched/entries/0539_Q1842735.yaml @@ -647,18 +647,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:05.281442+00:00' source_archive: web/0539/museumderoos.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum De Roos - source_url: http://www.museumderoos.nl - retrieved_on: '2025-11-29T16:40:33.290943+00:00' - xpath: /html/head/title - html_file: web/0539/museumderoos.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:05.281033+00:00' - claim_type: org_name claim_value: Museum De Roos raw_value: Museum De Roos @@ -679,6 +669,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:05.281366+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum De Roos diff --git a/data/nde/enriched/entries/0540_Q110906682.yaml b/data/nde/enriched/entries/0540_Q110906682.yaml index bf5afdd847..ba72a93cf5 100644 --- a/data/nde/enriched/entries/0540_Q110906682.yaml +++ b/data/nde/enriched/entries/0540_Q110906682.yaml @@ -774,7 +774,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:05.454960+00:00' source_archive: web/0540/museumoudeslot.nl - claims_count: 16 + claims_count: 13 claims: - claim_type: org_name claim_value: Homepage @@ -798,26 +798,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:05.453715+00:00' - - claim_type: org_name - claim_value: Museum 't Oude Slot - raw_value: Museum 't Oude Slot - source_url: http://www.museumoudeslot.nl - retrieved_on: '2025-11-29T16:40:36.177230+00:00' - xpath: /html/head/meta[10] - html_file: web/0540/museumoudeslot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:05.453945+00:00' - - claim_type: org_name - claim_value: Museum 't Oude Slot - raw_value: Museum 't Oude Slot - source_url: http://www.museumoudeslot.nl - retrieved_on: '2025-11-29T16:40:36.177230+00:00' - xpath: /html/head/script[2] - html_file: web/0540/museumoudeslot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_name - extraction_timestamp: '2025-12-01T10:46:05.454039+00:00' - claim_type: description claim_value: Museum 't Oude Slot raw_value: Museum 't Oude Slot @@ -928,18 +908,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:05.454653+00:00' - - claim_type: org_name - claim_value: 'Erfgoed, kunst en educatie: het is allemaal te vinden in Museum - ’t Oude Slot.' - raw_value: 'Erfgoed, kunst en educatie: het is allemaal te vinden in Museum ’t - Oude Slot.' - source_url: http://www.museumoudeslot.nl - retrieved_on: '2025-11-29T16:40:36.177230+00:00' - xpath: /html/body/div[3]/main/section[1]/div[3]/div/div[1]/div/div/div[1]/div/h1 - html_file: web/0540/museumoudeslot.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:05.454766+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum 't Oude Slot diff --git a/data/nde/enriched/entries/0547_Q3483633.yaml b/data/nde/enriched/entries/0547_Q3483633.yaml index 9fd5c2511c..abe4fb102f 100644 --- a/data/nde/enriched/entries/0547_Q3483633.yaml +++ b/data/nde/enriched/entries/0547_Q3483633.yaml @@ -895,7 +895,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:06.185571+00:00' source_archive: web/0547/dewieger.nl - claims_count: 18 + claims_count: 14 claims: - claim_type: org_name claim_value: De Wieger Deurne @@ -907,16 +907,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:06.183776+00:00' - - claim_type: org_name - claim_value: close - raw_value: close - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[1]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183790+00:00' - claim_type: org_name claim_value: arrow-circle-o-down raw_value: arrow-circle-o-down @@ -927,26 +917,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:06.183794+00:00' - - claim_type: org_name - claim_value: facebook - raw_value: facebook - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[3]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183798+00:00' - - claim_type: org_name - claim_value: linkedin - raw_value: linkedin - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[4]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183801+00:00' - claim_type: org_name claim_value: ellipsis-v raw_value: ellipsis-v @@ -957,16 +927,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:06.183804+00:00' - - claim_type: org_name - claim_value: instagram - raw_value: instagram - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[6]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183807+00:00' - claim_type: org_name claim_value: long-arrow-left raw_value: long-arrow-left @@ -1077,6 +1037,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:06.184998+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Wieger diff --git a/data/nde/enriched/entries/0552_Q110907382.yaml b/data/nde/enriched/entries/0552_Q110907382.yaml index 020b122409..13bc5e1414 100644 --- a/data/nde/enriched/entries/0552_Q110907382.yaml +++ b/data/nde/enriched/entries/0552_Q110907382.yaml @@ -402,18 +402,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:07.741528+00:00' source_archive: web/0552/kruysenhuis.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Kruysenhuis Oirschot - source_url: http://www.kruysenhuis.nl - retrieved_on: '2025-11-29T16:41:53.905471+00:00' - xpath: /html/head/title - html_file: web/0552/kruysenhuis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:07.741186+00:00' - claim_type: email claim_value: info@kruysenhuis.nl raw_value: info@kruysenhuis.nl @@ -474,6 +464,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:07.741488+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Kruysenhuis diff --git a/data/nde/enriched/entries/0553_Q27949674.yaml b/data/nde/enriched/entries/0553_Q27949674.yaml index de9cdb24ae..ad35c677a8 100644 --- a/data/nde/enriched/entries/0553_Q27949674.yaml +++ b/data/nde/enriched/entries/0553_Q27949674.yaml @@ -916,18 +916,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:07.879236+00:00' source_archive: web/0553/museumklokenpeel.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Klok & Peel - source_url: http://www.museumklokenpeel.nl/ - retrieved_on: '2025-11-29T16:41:35.507077+00:00' - xpath: /html/head/title - html_file: web/0553/museumklokenpeel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:07.878316+00:00' - claim_type: description_short claim_value: "Een compleet dagje uit voor jong én oud, en eigenlijk gewoon twee\ \ musea voor de prijs van één. Bij ons leer je namelijk niet alleen alles over\ @@ -1038,6 +1028,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:07.879090+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Klok & Peel diff --git a/data/nde/enriched/entries/0554_Q2112422.yaml b/data/nde/enriched/entries/0554_Q2112422.yaml index d08f807b12..f61aeda3f7 100644 --- a/data/nde/enriched/entries/0554_Q2112422.yaml +++ b/data/nde/enriched/entries/0554_Q2112422.yaml @@ -919,18 +919,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:08.271284+00:00' source_archive: web/0554/museumkrona.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Krona - source_url: https://www.museumkrona.nl/nl/ - retrieved_on: '2025-11-29T16:41:40.522595+00:00' - xpath: /html/head/title - html_file: web/0554/museumkrona.nl/mirror/www.museumkrona.nl/nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:08.270784+00:00' - claim_type: phone claim_value: +31413 26 34 31 raw_value: +31413 26 34 31 @@ -991,6 +981,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:08.271226+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Krona diff --git a/data/nde/enriched/entries/0559_Q2119394.yaml b/data/nde/enriched/entries/0559_Q2119394.yaml index 6725c39d9b..8ee4eb8335 100644 --- a/data/nde/enriched/entries/0559_Q2119394.yaml +++ b/data/nde/enriched/entries/0559_Q2119394.yaml @@ -701,18 +701,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:08.842074+00:00' source_archive: web/0559/museumvekemans.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Vekemans - source_url: http://www.museumvekemans.nl - retrieved_on: '2025-11-29T16:41:52.213034+00:00' - xpath: /html/head/title - html_file: web/0559/museumvekemans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:08.841345+00:00' - claim_type: description_short claim_value: Museum Vekemans in Boxtel toont de ontwikkeling van wassen en strijken en biedt een overzicht van een eeuw Brabantse boerendracht. Beide collecties @@ -749,6 +739,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:08.841790+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Vekemans diff --git a/data/nde/enriched/entries/0566_Q4295172.yaml b/data/nde/enriched/entries/0566_Q4295172.yaml index 4fc454c9a7..04fc4f58fc 100644 --- a/data/nde/enriched/entries/0566_Q4295172.yaml +++ b/data/nde/enriched/entries/0566_Q4295172.yaml @@ -676,18 +676,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:10.305805+00:00' source_archive: web/0566/zouavenmuseum.nl - claims_count: 8 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Nederlands Zouavenmuseum - source_url: http://www.zouavenmuseum.nl - retrieved_on: '2025-11-29T16:42:02.160819+00:00' - xpath: /html/head/title - html_file: web/0566/zouavenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:10.305059+00:00' - claim_type: org_name claim_value: Stichting Nederlands Zouavenmuseum raw_value: Stichting Nederlands Zouavenmuseum @@ -718,26 +708,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:46:10.305589+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://zouavenmuseum.nl/nieuwsbericht/ - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://zouavenmuseum.nl/nieuwsbericht/ - source_url: http://www.zouavenmuseum.nl - retrieved_on: '2025-11-29T16:42:02.160819+00:00' - xpath: /html/body/div/div/ul/li/div/li/div/li/div[2]/div/section/article/div[2]/div/div/div/a[1] - html_file: web/0566/zouavenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:10.305711+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Welkom - raw_value: https://twitter.com/intent/tweet?text=Welkom - source_url: http://www.zouavenmuseum.nl - retrieved_on: '2025-11-29T16:42:02.160819+00:00' - xpath: /html/body/div/div/ul/li/div/li/div/li/div[2]/div/section/article/div[2]/div/div/div/a[2] - html_file: web/0566/zouavenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:10.305719+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/www.zouavenmuseum.nl/ raw_value: https://www.facebook.com/www.zouavenmuseum.nl/ @@ -758,6 +728,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:10.305756+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zouavenmuseum diff --git a/data/nde/enriched/entries/0567_Q1823028.yaml b/data/nde/enriched/entries/0567_Q1823028.yaml index 79db349cb2..6877f325ea 100644 --- a/data/nde/enriched/entries/0567_Q1823028.yaml +++ b/data/nde/enriched/entries/0567_Q1823028.yaml @@ -917,7 +917,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:10.395773+00:00' source_archive: web/0567/libertypark.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Domeinnaam Libertypark.nl overnemen? Koop nu en start met je plan. @@ -973,16 +973,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:46:10.395661+00:00' - - claim_type: org_name - claim_value: Libertypark.nl - raw_value: Libertypark.nl - source_url: http://www.libertypark.nl - retrieved_on: '2025-11-29T16:42:03.432042+00:00' - xpath: /html/body/section[1]/div[1]/div[1]/h1 - html_file: web/0567/libertypark.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:10.395730+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oorlogsmuseum Overloon diff --git a/data/nde/enriched/entries/0569_Q56459509.yaml b/data/nde/enriched/entries/0569_Q56459509.yaml index 7c82b5f851..a30eb515c8 100644 --- a/data/nde/enriched/entries/0569_Q56459509.yaml +++ b/data/nde/enriched/entries/0569_Q56459509.yaml @@ -537,18 +537,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:10.982436+00:00' source_archive: web/0569/pietervermeulenmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Pieter Vermeulen Museum - source_url: http://www.pietervermeulenmuseum.nl/ - retrieved_on: '2025-11-29T16:42:36.860974+00:00' - xpath: /html/head/title - html_file: web/0569/pietervermeulenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:10.981599+00:00' - claim_type: description_short claim_value: Het Pieter Vermeulen Museum is een kindvriendelijk natuurmuseum en het Centrum voor Natuur- en Milieueducatie in de Gemeente Velsen. @@ -631,6 +621,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:10.982337+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Pieter Vermeulen Museum diff --git a/data/nde/enriched/entries/0575_Q110907548.yaml b/data/nde/enriched/entries/0575_Q110907548.yaml index 0dd72afbe8..e4819928d3 100644 --- a/data/nde/enriched/entries/0575_Q110907548.yaml +++ b/data/nde/enriched/entries/0575_Q110907548.yaml @@ -544,28 +544,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:12.876455+00:00' source_archive: web/0575/westbrabantsarchief.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - West-Brabants Archief - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/head/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875919+00:00' - - claim_type: org_name - claim_value: search - raw_value: search - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/body/header/div/div/div/form/button/svg/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875932+00:00' - claim_type: org_name claim_value: info raw_value: info @@ -596,26 +576,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:12.875946+00:00' - - claim_type: org_name - claim_value: facebook - raw_value: facebook - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[1]/a/svg/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875950+00:00' - - claim_type: org_name - claim_value: instagram - raw_value: instagram - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[3]/a/svg/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875954+00:00' - claim_type: description_short claim_value: West-Brabants Archief raw_value: West-Brabants Archief @@ -666,6 +626,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:12.876373+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: West-Brabants Archief diff --git a/data/nde/enriched/entries/0580_Q2176121.yaml b/data/nde/enriched/entries/0580_Q2176121.yaml index 0ee2e36624..ffe34bc601 100644 --- a/data/nde/enriched/entries/0580_Q2176121.yaml +++ b/data/nde/enriched/entries/0580_Q2176121.yaml @@ -681,18 +681,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:13.859300+00:00' source_archive: web/0580/stadsarchief.breda.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stadsarchief Breda - source_url: https://stadsarchief.breda.nl/ - retrieved_on: '2025-11-29T16:43:18.587458+00:00' - xpath: /html/head/title - html_file: web/0580/stadsarchief.breda.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:13.858765+00:00' - claim_type: description_short claim_value: Stadsarchief Breda verzamelt en beheert de geschiedenis van de stad en zijn inwoners. Die rijke historie stelt Stadsarchief Breda gratis ter beschikking @@ -747,6 +737,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:13.859236+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsarchief Breda diff --git a/data/nde/enriched/entries/0581_Q2783790.yaml b/data/nde/enriched/entries/0581_Q2783790.yaml index a3ee85807f..c1a9219eb4 100644 --- a/data/nde/enriched/entries/0581_Q2783790.yaml +++ b/data/nde/enriched/entries/0581_Q2783790.yaml @@ -617,7 +617,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:13.913914+00:00' source_archive: web/0581/graafsmuseum.nl - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: graafsmuseum.nl - This website is for sale! - graafsmuseum Resources @@ -647,16 +647,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:13.913795+00:00' - - claim_type: org_name - claim_value: graafsmuseum.nl - raw_value: graafsmuseum.nl - source_url: http://www.graafsmuseum.nl - retrieved_on: '2025-11-29T16:43:19.145358+00:00' - xpath: /html/body/div[1]/div/div/h1 - html_file: web/0581/graafsmuseum.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:13.913902+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsmuseum Grave diff --git a/data/nde/enriched/entries/0592_Q2375610.yaml b/data/nde/enriched/entries/0592_Q2375610.yaml index 4adb16a83b..6f8efd56ea 100644 --- a/data/nde/enriched/entries/0592_Q2375610.yaml +++ b/data/nde/enriched/entries/0592_Q2375610.yaml @@ -672,7 +672,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:15.976694+00:00' source_archive: web/0592/speelgoedmuseum.nl - claims_count: 8 + claims_count: 7 claims: - claim_type: org_name claim_value: Speelgoedmuseum Oosterhout @@ -696,16 +696,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:15.975181+00:00' - - claim_type: org_name - claim_value: speelgoedmuseum.nl - raw_value: speelgoedmuseum.nl - source_url: http://www.speelgoedmuseum.nl/ - retrieved_on: '2025-11-29T16:43:30.313429+00:00' - xpath: /html/head/meta[9] - html_file: web/0592/speelgoedmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:15.975800+00:00' - claim_type: email claim_value: info@speelgoedmuseum.nl raw_value: info@speelgoedmuseum.nl @@ -756,6 +746,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:15.976429+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Speelgoedmuseum Oosterhout diff --git a/data/nde/enriched/entries/0594_unknown.yaml b/data/nde/enriched/entries/0594_unknown.yaml index 3f97b5f545..e81c1e9613 100644 --- a/data/nde/enriched/entries/0594_unknown.yaml +++ b/data/nde/enriched/entries/0594_unknown.yaml @@ -166,18 +166,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:16.772424+00:00' source_archive: web/0594/stichtingzhc.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Zuidelijk Historisch Contact - source_url: https://stichtingzhc.nl/ - retrieved_on: '2025-11-29T16:46:09.150368+00:00' - xpath: /html/head/title - html_file: web/0594/stichtingzhc.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:16.772025+00:00' - claim_type: org_name claim_value: Stichting Zuidelijk Historisch Contact raw_value: Stichting Zuidelijk Historisch Contact @@ -188,6 +178,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:16.772170+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Zuidelijk Historisch Contact diff --git a/data/nde/enriched/entries/0595_Q2297235.yaml b/data/nde/enriched/entries/0595_Q2297235.yaml index 0a8c5c5fe0..6fcbf25ab4 100644 --- a/data/nde/enriched/entries/0595_Q2297235.yaml +++ b/data/nde/enriched/entries/0595_Q2297235.yaml @@ -745,18 +745,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:16.858110+00:00' source_archive: web/0595/salha.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Streekarchief Langstraat Heusden Altena - source_url: https://salha.nl/ - retrieved_on: '2025-11-29T16:43:50.195687+00:00' - xpath: /html/head/title - html_file: web/0595/salha.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:16.857499+00:00' - claim_type: description_short claim_value: 'Streekarchief Langstraat Heusden Altena laat als professionele collectiebeheerder een zo groot mogelijk publiek kennis maken met haar fysieke en digitale collecties. @@ -843,6 +833,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:16.858025+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Langstraat Heusden Altena diff --git a/data/nde/enriched/entries/0598_Q3983824.yaml b/data/nde/enriched/entries/0598_Q3983824.yaml index b45603b912..16b7ec6fcb 100644 --- a/data/nde/enriched/entries/0598_Q3983824.yaml +++ b/data/nde/enriched/entries/0598_Q3983824.yaml @@ -1449,18 +1449,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:17.177066+00:00' source_archive: web/0598/textielmuseum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - textielmuseum.nl - TextielMuseum - source_url: http://www.textielmuseum.nl - retrieved_on: '2025-11-29T16:43:53.233054+00:00' - xpath: /html/head/title - html_file: web/0598/textielmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:17.176154+00:00' - claim_type: description_short claim_value: Kom naar het TextielMuseum in Tilburg voor inspirerende tentoonstellingen op het gebied van kunst, design, mode en erfgoed. Ontdek ook het TextielLab, @@ -1525,6 +1515,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:17.176754+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: TextielMuseum diff --git a/data/nde/enriched/entries/0603_Q56461052.yaml b/data/nde/enriched/entries/0603_Q56461052.yaml index bc67f13ace..117c3a3500 100644 --- a/data/nde/enriched/entries/0603_Q56461052.yaml +++ b/data/nde/enriched/entries/0603_Q56461052.yaml @@ -651,18 +651,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:17.940832+00:00' source_archive: web/0603/vsmm.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Valkerij en Sigarenmakerij Museum - source_url: http://www.vsmm.nl - retrieved_on: '2025-11-29T16:44:02.613539+00:00' - xpath: /html/head/title - html_file: web/0603/vsmm.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:17.940360+00:00' - claim_type: description_short claim_value: Beleef een uniek museum met twee bijzondere collecties! Onze gepassioneerde vrijwilligers nemen u mee op een boeiende reis door de wereld van de valkerij @@ -697,6 +687,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:46:17.940646+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Valkerij en Sigarenmakerij Museum diff --git a/data/nde/enriched/entries/0606_Q10896041.yaml b/data/nde/enriched/entries/0606_Q10896041.yaml index 1613024d7c..b6f91de8da 100644 --- a/data/nde/enriched/entries/0606_Q10896041.yaml +++ b/data/nde/enriched/entries/0606_Q10896041.yaml @@ -394,7 +394,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:18.568788+00:00' source_archive: web/0606/vangoghhuis.com - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Vincent van GoghHuis Zundert @@ -449,16 +449,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:18.568513+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.vangoghhuis.com - retrieved_on: '2025-11-29T16:47:06.622674+00:00' - xpath: /html/body/div[3]/div[3]/div[3]/div/div/h1 - html_file: web/0606/vangoghhuis.com/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:18.568646+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Van Goghhuis diff --git a/data/nde/enriched/entries/0610_Q110907548.yaml b/data/nde/enriched/entries/0610_Q110907548.yaml index 026ae9a2dd..4b96a824bd 100644 --- a/data/nde/enriched/entries/0610_Q110907548.yaml +++ b/data/nde/enriched/entries/0610_Q110907548.yaml @@ -548,28 +548,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:19.970036+00:00' source_archive: web/0610/westbrabantsarchief.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - West-Brabants Archief - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/head/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969184+00:00' - - claim_type: org_name - claim_value: search - raw_value: search - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/body/header/div/div/div/form/button/svg/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969196+00:00' - claim_type: org_name claim_value: info raw_value: info @@ -600,26 +580,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:19.969230+00:00' - - claim_type: org_name - claim_value: facebook - raw_value: facebook - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[1]/a/svg/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969235+00:00' - - claim_type: org_name - claim_value: instagram - raw_value: instagram - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[3]/a/svg/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969239+00:00' - claim_type: description_short claim_value: West-Brabants Archief raw_value: West-Brabants Archief @@ -670,6 +630,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:19.969804+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: West-Brabants Archief diff --git a/data/nde/enriched/entries/0612_Q110907546.yaml b/data/nde/enriched/entries/0612_Q110907546.yaml index 72ed54dd76..61c229074b 100644 --- a/data/nde/enriched/entries/0612_Q110907546.yaml +++ b/data/nde/enriched/entries/0612_Q110907546.yaml @@ -309,18 +309,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:20.588999+00:00' source_archive: web/0612/heemkundeoploo.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - De Heerlyckheit Plo - source_url: http://www.heemkundeoploo.nl - retrieved_on: '2025-11-29T16:45:44.909939+00:00' - xpath: /html/head/title - html_file: web/0612/heemkundeoploo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:20.587363+00:00' - claim_type: description_short claim_value: 01. 02. 03. Over ons Onze vereniging Heemkundevereniging ‘De Heerlyckheit Plo’ is een vereniging met zo’n 150 enthousiaste leden. We zijn geïnteresseerd @@ -365,6 +355,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:20.588307+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging De Heerlyckheit Plo diff --git a/data/nde/enriched/entries/0613_Q18286289.yaml b/data/nde/enriched/entries/0613_Q18286289.yaml index 6f60051054..34722d28ad 100644 --- a/data/nde/enriched/entries/0613_Q18286289.yaml +++ b/data/nde/enriched/entries/0613_Q18286289.yaml @@ -615,18 +615,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:20.667768+00:00' source_archive: web/0613/metropoolregioeindhoven.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Metropool Regio Eindhoven - source_url: https://metropoolregioeindhoven.nl/ - retrieved_on: '2025-11-29T16:45:45.437125+00:00' - xpath: /html/head/title - html_file: web/0613/metropoolregioeindhoven.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:20.667121+00:00' - claim_type: description_short claim_value: Samenwerking is de sleutel tot het succes van de regio. De 21 regiogemeenten hebben daarom hun krachten gebundeld in de Metropoolregio Eindhoven. @@ -649,6 +639,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:20.667338+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Metropoolregio Eindhoven diff --git a/data/nde/enriched/entries/0614_Q2395096.yaml b/data/nde/enriched/entries/0614_Q2395096.yaml index 3ebd607174..857c7e489c 100644 --- a/data/nde/enriched/entries/0614_Q2395096.yaml +++ b/data/nde/enriched/entries/0614_Q2395096.yaml @@ -662,18 +662,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:21.129546+00:00' source_archive: web/0614/oertijdmuseum.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Oertijdmuseum - source_url: http://www.oertijdmuseum.nl/ - retrieved_on: '2025-11-29T23:30:02.349318+00:00' - xpath: /html/head/title - html_file: web/0614/oertijdmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:21.128658+00:00' - claim_type: description_short claim_value: Het Oertijdmuseum is het grootste geologische museum van Nederland. Wandel tussen de dino's of bekijk live hoe fossielen worden uitgeprepareerd. @@ -800,6 +790,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:21.129474+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oertijdmuseum diff --git a/data/nde/enriched/entries/0615_Q20970639.yaml b/data/nde/enriched/entries/0615_Q20970639.yaml index f9ff6bc9b8..e1fcb163ee 100644 --- a/data/nde/enriched/entries/0615_Q20970639.yaml +++ b/data/nde/enriched/entries/0615_Q20970639.yaml @@ -766,18 +766,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:21.210106+00:00' source_archive: web/0615/museumweesp.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Weesp - source_url: https://museumweesp.nl/ - retrieved_on: '2025-11-29T16:45:46.255656+00:00' - xpath: /html/head/title - html_file: web/0615/museumweesp.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:21.209446+00:00' - claim_type: description_short claim_value: Gemeentemuseum Weesp wordt mogelijk gemaakt door de gemeente Weesp; de collectie behoort aan de gemeente en haar inwoners. @@ -860,6 +850,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:21.209999+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Weesp diff --git a/data/nde/enriched/entries/0618_Q4452658.yaml b/data/nde/enriched/entries/0618_Q4452658.yaml index 2b11c8a628..f1f4926b36 100644 --- a/data/nde/enriched/entries/0618_Q4452658.yaml +++ b/data/nde/enriched/entries/0618_Q4452658.yaml @@ -1323,18 +1323,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:21.901387+00:00' source_archive: web/0618/annefrank.org - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Anne Frank House - source_url: https://www.annefrank.org/en/ - retrieved_on: '2025-11-29T16:47:00.573583+00:00' - xpath: /html/head/title - html_file: web/0618/annefrank.org/mirror/www.annefrank.org/en/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:21.900843+00:00' - claim_type: description_short claim_value: The official website of the Anne Frank House, with the most complete and up-to-date information about Anne Frank, her diary, and the Secret Annex. @@ -1399,6 +1389,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:21.901278+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Anne Frank Stichting diff --git a/data/nde/enriched/entries/0633_Q315883.yaml b/data/nde/enriched/entries/0633_Q315883.yaml index 2a9afdb3be..84b95d23f5 100644 --- a/data/nde/enriched/entries/0633_Q315883.yaml +++ b/data/nde/enriched/entries/0633_Q315883.yaml @@ -884,18 +884,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:37.835255+00:00' source_archive: web/0633/embassyofthefreemind.com - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - EMBASSY OF THE FREE MIND - source_url: http://www.embassyofthefreemind.com/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0633/embassyofthefreemind.com/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:37.833607+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -994,3 +984,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:25:37.835025+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0635_Q2919762.yaml b/data/nde/enriched/entries/0635_Q2919762.yaml index 57a251b1aa..b74e99f500 100644 --- a/data/nde/enriched/entries/0635_Q2919762.yaml +++ b/data/nde/enriched/entries/0635_Q2919762.yaml @@ -1012,18 +1012,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:26.143219+00:00' source_archive: web/0635/bijbelsmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Bijbels Museum - source_url: http://www.bijbelsmuseum.nl/ - retrieved_on: '2025-11-29T16:51:01.973096+00:00' - xpath: /html/head/title - html_file: web/0635/bijbelsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:26.141484+00:00' - claim_type: description_short claim_value: Bijbels Museum maakt door NL reizende kunsttentoonstellingen rond bijbelse thema's en verhalen, vaak verbreed naar andere religies. @@ -1106,6 +1096,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:26.142905+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bijbels Museum diff --git a/data/nde/enriched/entries/0638_unknown.yaml b/data/nde/enriched/entries/0638_unknown.yaml index de711ed07c..eb0e94701c 100644 --- a/data/nde/enriched/entries/0638_unknown.yaml +++ b/data/nde/enriched/entries/0638_unknown.yaml @@ -251,7 +251,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:27.228314+00:00' source_archive: web/0638/grotekerknaarden.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Grote Kerk Naarden @@ -283,16 +283,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:27.228175+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://grotekerknaarden.nl/ - retrieved_on: '2025-11-29T16:50:48.909438+00:00' - xpath: /html/body/div/div/div/div/main/h1 - html_file: web/0638/grotekerknaarden.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:27.228216+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Grote Kerk Naarden diff --git a/data/nde/enriched/entries/0643_Q56459403.yaml b/data/nde/enriched/entries/0643_Q56459403.yaml index 30c08ece3f..7fd3bb5c40 100644 --- a/data/nde/enriched/entries/0643_Q56459403.yaml +++ b/data/nde/enriched/entries/0643_Q56459403.yaml @@ -468,18 +468,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:27.906295+00:00' source_archive: web/0643/cultuurmuseumtexel.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Texels erfgoedmuseum - Waelstee - source_url: https://www.cultuurmuseumtexel.nl/ - retrieved_on: '2025-11-29T16:51:00.339708+00:00' - xpath: /html/head/title - html_file: web/0643/cultuurmuseumtexel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:27.905076+00:00' - claim_type: org_name claim_value: Texels erfgoedmuseum - Waelstee raw_value: Texels erfgoedmuseum - Waelstee @@ -520,6 +510,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:27.906056+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Waelstee diff --git a/data/nde/enriched/entries/0644_Q56461228.yaml b/data/nde/enriched/entries/0644_Q56461228.yaml index 4d1683537c..4814340434 100644 --- a/data/nde/enriched/entries/0644_Q56461228.yaml +++ b/data/nde/enriched/entries/0644_Q56461228.yaml @@ -512,18 +512,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:27.987580+00:00' source_archive: web/0644/dewemme.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum de Wemme Zuidwolde - source_url: https://www.dewemme.nl/ - retrieved_on: '2025-11-29T16:50:52.674646+00:00' - xpath: /html/head/title - html_file: web/0644/dewemme.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:27.986679+00:00' - claim_type: org_name claim_value: BTC Art raw_value: BTC Art - Shine @@ -624,6 +614,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:27.987451+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum De Wemme diff --git a/data/nde/enriched/entries/0647_unknown.yaml b/data/nde/enriched/entries/0647_unknown.yaml index 93f3eba9ed..a450ee339d 100644 --- a/data/nde/enriched/entries/0647_unknown.yaml +++ b/data/nde/enriched/entries/0647_unknown.yaml @@ -391,18 +391,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:28.923092+00:00' source_archive: web/0647/dnb.nl - claims_count: 12 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Archief - raw_value: Archief | De Nederlandsche Bank - source_url: https://www.dnb.nl/archief/ - retrieved_on: '2025-11-29T16:51:03.166899+00:00' - xpath: /html/head/title - html_file: web/0647/dnb.nl/mirror/www.dnb.nl/archief/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:28.919724+00:00' - claim_type: description_short claim_value: De Nederlandsche Bank wil dat haar informatie toegankelijk is voor iedereen. Informatie van vandaag en informatie van vroeger. @@ -485,26 +475,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:28.922191+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?url=https://www.dnb.nl/archief/ - raw_value: https://twitter.com/intent/tweet?url=https://www.dnb.nl/archief/ - source_url: https://www.dnb.nl/archief/ - retrieved_on: '2025-11-29T16:51:03.166899+00:00' - xpath: /html/body/main/div[1]/div/div[1]/div[3]/div/a[2] - html_file: web/0647/dnb.nl/mirror/www.dnb.nl/archief/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:28.922195+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://www.dnb.nl/archief/ - raw_value: https://www.facebook.com/sharer.php?u=https://www.dnb.nl/archief/ - source_url: https://www.dnb.nl/archief/ - retrieved_on: '2025-11-29T16:51:03.166899+00:00' - xpath: /html/body/main/div[1]/div/div[1]/div[3]/div/a[3] - html_file: web/0647/dnb.nl/mirror/www.dnb.nl/archief/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:28.922199+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/company/de-nederlandsche-bank raw_value: https://www.linkedin.com/company/de-nederlandsche-bank @@ -515,6 +485,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:28.922214+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Nederlandse Bank Archief diff --git a/data/nde/enriched/entries/0649_Q1282056.yaml b/data/nde/enriched/entries/0649_Q1282056.yaml index 39fc012887..5681ec045c 100644 --- a/data/nde/enriched/entries/0649_Q1282056.yaml +++ b/data/nde/enriched/entries/0649_Q1282056.yaml @@ -850,7 +850,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:29.811202+00:00' source_archive: web/0649/ecomare.nl - claims_count: 11 + claims_count: 9 claims: - claim_type: org_name claim_value: Duik in de zee! Een spetterend dagje uit @@ -934,26 +934,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:29.810673+00:00' - - claim_type: social_facebook - claim_value: https://facebook.com/sharer.php?u= - raw_value: https://facebook.com/sharer.php?u= - source_url: http://www.ecomare.nl/ - retrieved_on: '2025-11-29T16:53:57.242395+00:00' - xpath: /html/body/div[3]/section[4]/footer/div[3]/div[3]/div/div/div/ul/li[1]/a - html_file: web/0649/ecomare.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:29.810715+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url= - raw_value: https://twitter.com/share?url= - source_url: http://www.ecomare.nl/ - retrieved_on: '2025-11-29T16:53:57.242395+00:00' - xpath: /html/body/div[3]/section[4]/footer/div[3]/div[3]/div/div/div/ul/li[2]/a - html_file: web/0649/ecomare.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:29.810726+00:00' - claim_type: org_name claim_value: Ontvang onze nieuwsbrief raw_value: Ontvang onze nieuwsbrief @@ -964,6 +944,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:29.810834+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Ecomare diff --git a/data/nde/enriched/entries/0651_Q110995897.yaml b/data/nde/enriched/entries/0651_Q110995897.yaml index 8fb2b468ab..a42067a383 100644 --- a/data/nde/enriched/entries/0651_Q110995897.yaml +++ b/data/nde/enriched/entries/0651_Q110995897.yaml @@ -584,18 +584,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:30.047693+00:00' source_archive: web/0651/fashionforgood.com - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Fashion for Good - source_url: https://fashionforgood.com/ - retrieved_on: '2025-11-29T16:52:02.534527+00:00' - xpath: /html/head/title - html_file: web/0651/fashionforgood.com/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:30.045611+00:00' - claim_type: description_short claim_value: Fashion for Good unites the fashion ecosystem to transform and build a regenerative industry. By fostering collaboration and facilitating connections, @@ -660,6 +650,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:30.047307+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Fashion for Good diff --git a/data/nde/enriched/entries/0653_Q133734238.yaml b/data/nde/enriched/entries/0653_Q133734238.yaml index e63b0e2dbe..e94dbb1734 100644 --- a/data/nde/enriched/entries/0653_Q133734238.yaml +++ b/data/nde/enriched/entries/0653_Q133734238.yaml @@ -726,7 +726,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:30.646503+00:00' source_archive: web/0653/flessenscheepjesmuseum.nl - claims_count: 5 + claims_count: 4 claims: - claim_type: org_name claim_value: Flessenscheepjesmuseum Enkhuizen @@ -750,16 +750,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:30.645854+00:00' - - claim_type: org_name - claim_value: Flessenscheepjesmuseum.nl - raw_value: Flessenscheepjesmuseum.nl - source_url: http://www.flessenscheepjesmuseum.nl - retrieved_on: '2025-11-29T16:52:11.108066+00:00' - xpath: /html/head/meta[10] - html_file: web/0653/flessenscheepjesmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:30.646047+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Flessenscheepjes-Museum-604211196329145/ raw_value: https://www.facebook.com/Flessenscheepjes-Museum-604211196329145/ @@ -780,6 +770,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:30.646376+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Flessenscheepjesmuseum diff --git a/data/nde/enriched/entries/0656_Q574961.yaml b/data/nde/enriched/entries/0656_Q574961.yaml index c3735dec3c..cf41c2db17 100644 --- a/data/nde/enriched/entries/0656_Q574961.yaml +++ b/data/nde/enriched/entries/0656_Q574961.yaml @@ -2064,18 +2064,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:31.211544+00:00' source_archive: web/0656/franshalsmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.franshalsmuseum.nl/en/ - retrieved_on: '2025-11-29T16:52:13.762843+00:00' - xpath: /html/head/title - html_file: web/0656/franshalsmuseum.nl/mirror/www.franshalsmuseum.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.210566+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/accounts/login/?next=https%3A%2F%2Fwww.instagram.com%2Ffranshalsmuseum%2F&is_from_rle raw_value: https://www.instagram.com/accounts/login/?next=https%3A%2F%2Fwww.instagram.com%2Ffranshalsmuseum%2F&is_from_rle @@ -2116,6 +2106,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:31.211256+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Frans Hals Museum diff --git a/data/nde/enriched/entries/0658_unknown.yaml b/data/nde/enriched/entries/0658_unknown.yaml index 7ecbf2bd98..71bec6bae2 100644 --- a/data/nde/enriched/entries/0658_unknown.yaml +++ b/data/nde/enriched/entries/0658_unknown.yaml @@ -354,18 +354,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:31.520515+00:00' source_archive: web/0658/velsen.nl - claims_count: 13 + claims_count: 12 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Velsen - source_url: https://www.velsen.nl/gemeentearchief-inzage - retrieved_on: '2025-11-29T16:52:15.455376+00:00' - xpath: /html/head/title - html_file: web/0658/velsen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.519653+00:00' - claim_type: org_name claim_value: externe-link-icoon raw_value: externe-link-icoon @@ -486,6 +476,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:31.520342+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Velsen diff --git a/data/nde/enriched/entries/0659_Q9971.yaml b/data/nde/enriched/entries/0659_Q9971.yaml index 833194bd0a..d84bcb8c33 100644 --- a/data/nde/enriched/entries/0659_Q9971.yaml +++ b/data/nde/enriched/entries/0659_Q9971.yaml @@ -1977,18 +1977,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:31.701735+00:00' source_archive: web/0659/waterland.nl - claims_count: 27 + claims_count: 26 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Waterland - source_url: https://www.waterland.nl/ - retrieved_on: '2025-11-29T16:52:14.576977+00:00' - xpath: /html/head/title - html_file: web/0659/waterland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.700922+00:00' - claim_type: org_name claim_value: externe-link-icoon raw_value: externe-link-icoon @@ -2249,6 +2239,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:31.701599+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waterland diff --git a/data/nde/enriched/entries/0660_Q9980.yaml b/data/nde/enriched/entries/0660_Q9980.yaml index 665462cb99..d099432135 100644 --- a/data/nde/enriched/entries/0660_Q9980.yaml +++ b/data/nde/enriched/entries/0660_Q9980.yaml @@ -1990,18 +1990,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:31.923151+00:00' source_archive: web/0660/zandvoort.nl - claims_count: 11 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Zandvoort - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/head/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922434+00:00' - claim_type: org_name claim_value: Gemeente Zandvoort raw_value: Gemeente Zandvoort @@ -2022,36 +2012,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:31.922454+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922470+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922475+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922479+00:00' - claim_type: description_short claim_value: Dit is de homepage van gemeente Zandvoort raw_value: Dit is de homepage van gemeente Zandvoort @@ -2102,6 +2062,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:31.923004+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zandvoort diff --git a/data/nde/enriched/entries/0661_Q124843656.yaml b/data/nde/enriched/entries/0661_Q124843656.yaml index 63fb0faa4f..4d848ef590 100644 --- a/data/nde/enriched/entries/0661_Q124843656.yaml +++ b/data/nde/enriched/entries/0661_Q124843656.yaml @@ -507,7 +507,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:39.373741+00:00' source_archive: web/0661/weesp.nl - claims_count: 10 + claims_count: 9 claims: - claim_type: org_name claim_value: Stadsgebied Weesp @@ -531,16 +531,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T12:25:39.372895+00:00' - - claim_type: org_name - claim_value: Amsterdam.nl - raw_value: Amsterdam.nl - source_url: http://www.weesp.nl/ - retrieved_on: '' - xpath: /html/head/meta[8] - html_file: web/0661/weesp.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T12:25:39.373129+00:00' - claim_type: phone claim_value: '14020' raw_value: '14020' @@ -611,3 +601,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:39.373541+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0664_Q2574390.yaml b/data/nde/enriched/entries/0664_Q2574390.yaml index 6db6d5b4ca..8b7d941e48 100644 --- a/data/nde/enriched/entries/0664_Q2574390.yaml +++ b/data/nde/enriched/entries/0664_Q2574390.yaml @@ -801,7 +801,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:32.278840+00:00' source_archive: web/0664/hetgrachtenhuis.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Hét museum in Amsterdam over de grachten @@ -825,16 +825,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:32.278136+00:00' - - claim_type: org_name - claim_value: https://grachten.museum/ - raw_value: https://grachten.museum/ - source_url: http://www.hetgrachtenhuis.nl - retrieved_on: '2025-11-29T16:52:16.578623+00:00' - xpath: /html/head/meta[10] - html_file: web/0664/hetgrachtenhuis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:32.278401+00:00' - claim_type: email claim_value: mail@grachten.museum raw_value: mail@grachten.museum @@ -875,6 +865,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:32.278754+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Grachtenmuseum diff --git a/data/nde/enriched/entries/0670_Q1616123.yaml b/data/nde/enriched/entries/0670_Q1616123.yaml index 0091941e83..1e77f27290 100644 --- a/data/nde/enriched/entries/0670_Q1616123.yaml +++ b/data/nde/enriched/entries/0670_Q1616123.yaml @@ -1609,18 +1609,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:33.840918+00:00' source_archive: web/0670/hetscheepvaartmuseum.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Het Scheepvaartmuseum Amsterdam - source_url: https://www.hetscheepvaartmuseum.nl/ - retrieved_on: '2025-11-29T16:54:11.742862+00:00' - xpath: /html/head/title - html_file: web/0670/hetscheepvaartmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:33.840063+00:00' - claim_type: description_short claim_value: In Het Scheepvaartmuseum komt alles boven water. Verken 500 jaar maritieme geschiedenis en hoe deze in verbinding staat met de samenleving van @@ -1716,6 +1706,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:33.840819+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Scheepvaartmuseum diff --git a/data/nde/enriched/entries/0671_Q3049198.yaml b/data/nde/enriched/entries/0671_Q3049198.yaml index ce07087e00..3fcadeb947 100644 --- a/data/nde/enriched/entries/0671_Q3049198.yaml +++ b/data/nde/enriched/entries/0671_Q3049198.yaml @@ -608,18 +608,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:34.089325+00:00' source_archive: web/0671/hetwaalresmuseum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Het Waalres Museum - source_url: http://www.hetwaalresmuseum.nl/ - retrieved_on: '2025-11-29T16:54:14.625679+00:00' - xpath: /html/head/title - html_file: web/0671/hetwaalresmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:34.089099+00:00' - claim_type: email claim_value: hetwaalresmuseum@gmail.com raw_value: hetwaalresmuseum@gmail.com @@ -652,6 +642,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:34.089304+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waalres Museum diff --git a/data/nde/enriched/entries/0672_Q2335767.yaml b/data/nde/enriched/entries/0672_Q2335767.yaml index 3aa0e6f3c9..c54cb9cb31 100644 --- a/data/nde/enriched/entries/0672_Q2335767.yaml +++ b/data/nde/enriched/entries/0672_Q2335767.yaml @@ -544,18 +544,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:34.564670+00:00' source_archive: web/0672/zijpermuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Zijper Museum | Museum en Informatiepunt - source_url: https://www.zijpermuseum.nl/ - retrieved_on: '2025-11-29T16:58:14.787453+00:00' - xpath: /html/head/title - html_file: web/0672/zijpermuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:34.563713+00:00' - claim_type: description_short claim_value: 'Nieuws 1: Impressie lezing Frans Rikhof “Bescherming Rijksmuseumkunst”; Nieuws 2: Gastexpositie van Inge en Lianne. Mijn bezoek Te doen Het museum Archieven @@ -600,6 +590,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:34.564498+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zijper Museum diff --git a/data/nde/enriched/entries/0680_unknown.yaml b/data/nde/enriched/entries/0680_unknown.yaml index eac8ef98fc..8b23d00368 100644 --- a/data/nde/enriched/entries/0680_unknown.yaml +++ b/data/nde/enriched/entries/0680_unknown.yaml @@ -165,18 +165,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:37.928548+00:00' source_archive: web/0680/historischekringdiemen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Vereniging Historische Kring Diemen - source_url: https://historischekringdiemen.nl/ - retrieved_on: '2025-11-29T16:58:34.595297+00:00' - xpath: /html/head/title - html_file: web/0680/historischekringdiemen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:37.927879+00:00' - claim_type: description_short claim_value: De Historische Kring Diemen, of ook wel HKD, houd zicht bezig met de geschiedenis van Diemen in kaart te brengen. Artikelen, foto's en meer @@ -249,6 +239,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:37.928459+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Diemen diff --git a/data/nde/enriched/entries/0686_unknown.yaml b/data/nde/enriched/entries/0686_unknown.yaml index 43a9157981..3b215c9b95 100644 --- a/data/nde/enriched/entries/0686_unknown.yaml +++ b/data/nde/enriched/entries/0686_unknown.yaml @@ -200,18 +200,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:40.046588+00:00' source_archive: web/0686/oudakersloot.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Historische Vereniging Oud-Akersloot - source_url: https://www.oudakersloot.nl/ - retrieved_on: '2025-11-29T16:59:25.000536+00:00' - xpath: /html/head/title - html_file: web/0686/oudakersloot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:40.045401+00:00' - claim_type: description_short claim_value: Historische Vereniging Oud-Akersloot - Historie, schoolfoto's, video's, beeldbank, documenten @@ -298,6 +288,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:40.046258+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging "Oud-Akersloot" diff --git a/data/nde/enriched/entries/0690_unknown.yaml b/data/nde/enriched/entries/0690_unknown.yaml index 615a2e578c..d4c3d6f48f 100644 --- a/data/nde/enriched/entries/0690_unknown.yaml +++ b/data/nde/enriched/entries/0690_unknown.yaml @@ -239,7 +239,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:41.190283+00:00' source_archive: web/0690/oudstedebroec.eu - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Historische vereniging Oud Stede Broec @@ -291,16 +291,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:41.189964+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.oudstedebroec.eu/ - retrieved_on: '2025-11-29T17:01:49.574066+00:00' - xpath: /html/body/div[2]/div/main/div[3]/div/div[1]/div[1]/div/h1 - html_file: web/0690/oudstedebroec.eu/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:41.190066+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische vereniging Oud Stede Broec diff --git a/data/nde/enriched/entries/0691_unknown.yaml b/data/nde/enriched/entries/0691_unknown.yaml index 0fe18c318b..3eca8a05e1 100644 --- a/data/nde/enriched/entries/0691_unknown.yaml +++ b/data/nde/enriched/entries/0691_unknown.yaml @@ -170,7 +170,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:41.603800+00:00' source_archive: web/0691/lijnendoordetijd.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Lijnen door de Tijd @@ -182,16 +182,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:41.603515+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.lijnendoordetijd.nl/ - retrieved_on: '2025-11-29T17:03:51.422260+00:00' - xpath: /html/body/div[5]/div[2]/div/main/article/header/h1 - html_file: web/0691/lijnendoordetijd.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:41.603768+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Lijnen door de Tijd diff --git a/data/nde/enriched/entries/0695_Q4469762.yaml b/data/nde/enriched/entries/0695_Q4469762.yaml index 8b72a99748..dfa72b68b2 100644 --- a/data/nde/enriched/entries/0695_Q4469762.yaml +++ b/data/nde/enriched/entries/0695_Q4469762.yaml @@ -1040,18 +1040,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:40.392951+00:00' source_archive: web/0695/hhnk.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Hoogheemraadschap Hollands Noorderkwartier - source_url: https://www.hhnk.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0695/hhnk.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:40.391340+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -1122,3 +1112,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:40.392625+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0702_Q110907392.yaml b/data/nde/enriched/entries/0702_Q110907392.yaml index 1bb7a07636..a72584b8ef 100644 --- a/data/nde/enriched/entries/0702_Q110907392.yaml +++ b/data/nde/enriched/entries/0702_Q110907392.yaml @@ -531,18 +531,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:40.865601+00:00' source_archive: web/0702/janvisser-museum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Jan Visser Museum - source_url: http://www.janvisser-museum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0702/janvisser-museum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:40.863918+00:00' - claim_type: description_short claim_value: Museum Informatie Welkom bij het Jan Visser Museum. Het Jan Visser Museum is oorspronkelijk gericht op het landbouwverleden tot voor de mechanisatie. @@ -611,3 +601,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:25:40.865396+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0704_Q702726.yaml b/data/nde/enriched/entries/0704_Q702726.yaml index d715372e2c..474cefb636 100644 --- a/data/nde/enriched/entries/0704_Q702726.yaml +++ b/data/nde/enriched/entries/0704_Q702726.yaml @@ -1397,7 +1397,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:44.063535+00:00' source_archive: web/0704/jck.nl - claims_count: 7 + claims_count: 4 claims: - claim_type: org_name claim_value: Joods Museum + junior @@ -1421,36 +1421,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:44.062835+00:00' - - claim_type: address - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:03:20.017426+00:00' - xpath: /html/head/script[2] - html_file: web/0704/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:46:44.063111+00:00' - - claim_type: postal_code - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:03:20.017426+00:00' - xpath: /html/head/script[2] - html_file: web/0704/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_postalCode - extraction_timestamp: '2025-12-01T10:46:44.063114+00:00' - - claim_type: city - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:03:20.017426+00:00' - xpath: /html/head/script[2] - html_file: web/0704/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_addressLocality - extraction_timestamp: '2025-12-01T10:46:44.063115+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/joodscultureelkwartier raw_value: https://www.facebook.com/joodscultureelkwartier @@ -1471,6 +1441,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:44.063362+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Joods Museum diff --git a/data/nde/enriched/entries/0707_Q18654836.yaml b/data/nde/enriched/entries/0707_Q18654836.yaml index 06581d8074..600e5a5baf 100644 --- a/data/nde/enriched/entries/0707_Q18654836.yaml +++ b/data/nde/enriched/entries/0707_Q18654836.yaml @@ -475,7 +475,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:44.526216+00:00' source_archive: web/0707/huisvanhilde.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Huis van Hilde, ontdek de archeologie van Noord-Holland @@ -487,26 +487,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:44.525288+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: https://huisvanhilde.nl/ - retrieved_on: '2025-11-29T17:03:30.394129+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/0707/huisvanhilde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:44.525300+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: https://huisvanhilde.nl/ - retrieved_on: '2025-11-29T17:03:30.394129+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/0707/huisvanhilde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:44.525304+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -579,6 +559,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:44.526036+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Huis van Hilde diff --git a/data/nde/enriched/entries/0709_Q17402020.yaml b/data/nde/enriched/entries/0709_Q17402020.yaml index aa7912ac36..64f483ad91 100644 --- a/data/nde/enriched/entries/0709_Q17402020.yaml +++ b/data/nde/enriched/entries/0709_Q17402020.yaml @@ -798,7 +798,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:44.839785+00:00' source_archive: web/0709/defensie.nl - claims_count: 6 + claims_count: 3 claims: - claim_type: org_name claim_value: Defensiemusea @@ -838,36 +838,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:46:44.839373+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&text=Defensiemusea%20%23defensie - raw_value: https://twitter.com/share?url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&text=Defensiemusea%20%23defensie - source_url: http://www.defensie.nl/marinemuseum - retrieved_on: '2025-11-29T17:03:37.241226+00:00' - xpath: /html/body/div/main/div/div[3]/div/div/ul/li[1]/a - html_file: web/0709/defensie.nl/pages/marinemuseum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:44.839688+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea - source_url: http://www.defensie.nl/marinemuseum - retrieved_on: '2025-11-29T17:03:37.241226+00:00' - xpath: /html/body/div/main/div/div[3]/div/div/ul/li[2]/a - html_file: web/0709/defensie.nl/pages/marinemuseum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:44.839697+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&title=Defensiemusea&source=Defensie.nl&summary=De+Stichting+Defensiemusea+is+de+overkoepelende+organisatie+van+3+militaire+musea%3A+het+Nationaal+Militair+Museum%2C+het+Marinemuseum%C2%A0en+het+Mariniersmuseum.+Op+1+januari+2015+is+door+koning+Willem+Alexander+het+predicaat+%27Koninklijk%27+toegekend.+De+stichting+heet+sindsdien+Koninklijke+Stichting+Defensiemusea+%28KSD%29. - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&title=Defensiemusea&source=Defensie.nl&summary=De+Stichting+Defensiemusea+is+de+overkoepelende+organisatie+van+3+militaire+musea%3A+het+Nationaal+Militair+Museum%2C+het+Marinemuseum%C2%A0en+het+Mariniersmuseum.+Op+1+januari+2015+is+door+koning+Willem+Alexander+het+predicaat+%27Koninklijk%27+toegekend.+De+stichting+heet+sindsdien+Koninklijke+Stichting+Defensiemusea+%28KSD%29. - source_url: http://www.defensie.nl/marinemuseum - retrieved_on: '2025-11-29T17:03:37.241226+00:00' - xpath: /html/body/div/main/div/div[3]/div/div/ul/li[3]/a - html_file: web/0709/defensie.nl/pages/marinemuseum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:44.839704+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Marinemuseum diff --git a/data/nde/enriched/entries/0711_Q110671441.yaml b/data/nde/enriched/entries/0711_Q110671441.yaml index d0ffef980d..16c388c1de 100644 --- a/data/nde/enriched/entries/0711_Q110671441.yaml +++ b/data/nde/enriched/entries/0711_Q110671441.yaml @@ -549,7 +549,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:45.106955+00:00' source_archive: web/0711/modemuze.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Welkom bij Modemuze @@ -561,16 +561,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:45.105676+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://modemuze.nl/ - retrieved_on: '2025-11-29T17:03:42.665364+00:00' - xpath: /html/body/div/header/div/div/div/div/div[2]/nav/ul/li[6]/div/div/div[2]/div[1]/div/div/div/h1 - html_file: web/0711/modemuze.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:45.106618+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: netwerk Modemuze diff --git a/data/nde/enriched/entries/0718_Q18285904.yaml b/data/nde/enriched/entries/0718_Q18285904.yaml index 6e73994c72..a1dffed55d 100644 --- a/data/nde/enriched/entries/0718_Q18285904.yaml +++ b/data/nde/enriched/entries/0718_Q18285904.yaml @@ -530,7 +530,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:46.780337+00:00' source_archive: web/0718/etersheimerbraak.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Etersheimerbraak @@ -572,16 +572,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:46:46.780184+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/share/r/12LUGM1W382/?mibextid=wwXIfr - raw_value: https://www.facebook.com/share/r/12LUGM1W382/?mibextid=wwXIfr - source_url: https://www.etersheimerbraak.nl/#DikTrom - retrieved_on: '2025-11-29T23:38:32.060792+00:00' - xpath: /html/body/div[6]/div[1]/div/div[3]/div/div/div/div[2]/div[2]/a - html_file: web/0718/etersheimerbraak.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:46.780266+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Etersheimerbraak/ raw_value: https://www.facebook.com/Etersheimerbraak/ @@ -592,6 +582,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:46.780273+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Schooltje van Dik Trom diff --git a/data/nde/enriched/entries/0721_Q277316.yaml b/data/nde/enriched/entries/0721_Q277316.yaml index 593050b433..f3d442480f 100644 --- a/data/nde/enriched/entries/0721_Q277316.yaml +++ b/data/nde/enriched/entries/0721_Q277316.yaml @@ -1550,18 +1550,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:47.854376+00:00' source_archive: web/0721/rembrandthuis.nl - claims_count: 18 + claims_count: 17 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Rembrandthuis - source_url: https://www.rembrandthuis.nl/nl/ - retrieved_on: '2025-11-29T17:05:29.222154+00:00' - xpath: /html/head/title - html_file: web/0721/rembrandthuis.nl/mirror/www.rembrandthuis.nl/nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:47.851521+00:00' - claim_type: org_name claim_value: Toegankelijkheid gereedschappen raw_value: Toegankelijkheid gereedschappen @@ -1734,6 +1724,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:47.854128+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rembrandthuis diff --git a/data/nde/enriched/entries/0726_Q19827882.yaml b/data/nde/enriched/entries/0726_Q19827882.yaml index 64e0e7f879..4bfa615891 100644 --- a/data/nde/enriched/entries/0726_Q19827882.yaml +++ b/data/nde/enriched/entries/0726_Q19827882.yaml @@ -680,18 +680,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:48.363033+00:00' source_archive: web/0726/kaapskil.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Kaap Skil - source_url: http://www.kaapskil.nl - retrieved_on: '2025-11-29T17:05:39.220270+00:00' - xpath: /html/head/title - html_file: web/0726/kaapskil.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:48.362019+00:00' - claim_type: description_short claim_value: Een uniek museum vol verhalen en avonturen. Bekijk de opgedoken schatten uit scheepswrakken, ontdek de Reede van Texel en stap terug in de tijd in het @@ -756,6 +746,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:48.362867+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Kaap Skil diff --git a/data/nde/enriched/entries/0728_Q493160.yaml b/data/nde/enriched/entries/0728_Q493160.yaml index 731862ac50..b78fba3f5f 100644 --- a/data/nde/enriched/entries/0728_Q493160.yaml +++ b/data/nde/enriched/entries/0728_Q493160.yaml @@ -1244,18 +1244,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:48.661113+00:00' source_archive: web/0728/opsolder.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - keuze pagina - Museum Ons' Lieve Heer op Solder - source_url: https://www.opsolder.nl/ - retrieved_on: '2025-11-29T17:05:42.677403+00:00' - xpath: /html/head/title - html_file: web/0728/opsolder.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:48.660288+00:00' - claim_type: org_name claim_value: Museum Ons' Lieve Heer op Solder raw_value: Museum Ons' Lieve Heer op Solder @@ -1316,6 +1306,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:48.660952+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Ons' Lieve Heer op Solder diff --git a/data/nde/enriched/entries/0733_unknown.yaml b/data/nde/enriched/entries/0733_unknown.yaml index 0e1c7722a6..5a816a8b4d 100644 --- a/data/nde/enriched/entries/0733_unknown.yaml +++ b/data/nde/enriched/entries/0733_unknown.yaml @@ -315,18 +315,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:49.966345+00:00' source_archive: web/0733/tantejaantje.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Tante Jaantje - source_url: https://tantejaantje.nl/ - retrieved_on: '2025-11-29T17:06:26.989513+00:00' - xpath: /html/head/title - html_file: web/0733/tantejaantje.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:49.965571+00:00' - claim_type: org_name claim_value: Tante Jaantje raw_value: Tante Jaantje @@ -357,6 +347,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:49.966159+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museumboerderij Tante Jaantje diff --git a/data/nde/enriched/entries/0746_Q474823.yaml b/data/nde/enriched/entries/0746_Q474823.yaml index 00e94e9b41..9774f064b2 100644 --- a/data/nde/enriched/entries/0746_Q474823.yaml +++ b/data/nde/enriched/entries/0746_Q474823.yaml @@ -1754,18 +1754,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:53.170772+00:00' source_archive: web/0746/niod.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - NIOD - source_url: https://www.niod.nl/en - retrieved_on: '2025-11-29T17:08:14.011446+00:00' - xpath: /html/head/title - html_file: web/0746/niod.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:53.169232+00:00' - claim_type: description_short claim_value: Het NIOD Instituut voor Oorlogs-, Holocaust- en Genocidestudies is een nationaal en internationaal expertisecentrum voor interdisciplinair onderzoek @@ -1784,6 +1774,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:53.169424+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: NIOD Instituut voor Oorlogs-, Holocaust- en Genocidestudies diff --git a/data/nde/enriched/entries/0749_Q126174339.yaml b/data/nde/enriched/entries/0749_Q126174339.yaml index 2cb4ee735e..d85894757f 100644 --- a/data/nde/enriched/entries/0749_Q126174339.yaml +++ b/data/nde/enriched/entries/0749_Q126174339.yaml @@ -407,18 +407,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:53.673343+00:00' source_archive: web/0749/odnzkg.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Omgevingsdienst Noordzeekanaalgebied - source_url: https://www.odnzkg.nl - retrieved_on: '2025-11-29T17:08:23.562527+00:00' - xpath: /html/head/title - html_file: web/0749/odnzkg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:53.671967+00:00' - claim_type: description_short claim_value: Omgevingsdienst Noordzeekanaalgebied Voor 3 provincies en 8 gemeenten voeren we vergunning- en toezichttaken uit bij bedrijven op het gebied van milieu, @@ -517,6 +507,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:53.673151+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Omgevingsdienst Noordzeekanaalgebied diff --git a/data/nde/enriched/entries/0750_unknown.yaml b/data/nde/enriched/entries/0750_unknown.yaml index a1b28c59c5..972097406a 100644 --- a/data/nde/enriched/entries/0750_unknown.yaml +++ b/data/nde/enriched/entries/0750_unknown.yaml @@ -366,18 +366,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:54.132842+00:00' source_archive: web/0750/oorlogsmuseummedemblik.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Oorlogsmuseum Medemblik - source_url: https://www.oorlogsmuseummedemblik.nl/ - retrieved_on: '2025-11-29T17:15:56.834443+00:00' - xpath: /html/head/title - html_file: web/0750/oorlogsmuseummedemblik.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:54.131507+00:00' - claim_type: description_short claim_value: Tickets Prijzen & Openingstijden Arrangementen Onderwijs De combinatie escaperoom achtige opdrachten met praktisch zelf in een invasieboot/truck is @@ -436,6 +426,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:54.132481+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oorlogsmuseum Medemblik diff --git a/data/nde/enriched/entries/0755_Q701.yaml b/data/nde/enriched/entries/0755_Q701.yaml index 554be6d551..4ef40e511b 100644 --- a/data/nde/enriched/entries/0755_Q701.yaml +++ b/data/nde/enriched/entries/0755_Q701.yaml @@ -3790,18 +3790,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:56.018199+00:00' source_archive: web/0755/noord-holland.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.noord-holland.nl/ - retrieved_on: '2025-11-29T17:16:16.631233+00:00' - xpath: /html/head/title - html_file: web/0755/noord-holland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:56.017409+00:00' - claim_type: description_short claim_value: De provincie Noord-Holland werkt aan een duurzame, bereikbare, leefbare en innovatieve provincie. @@ -3874,6 +3864,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:56.018062+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Provincie Noord-Holland diff --git a/data/nde/enriched/entries/0757_Q2189005.yaml b/data/nde/enriched/entries/0757_Q2189005.yaml index b7ce07061b..e71f556361 100644 --- a/data/nde/enriched/entries/0757_Q2189005.yaml +++ b/data/nde/enriched/entries/0757_Q2189005.yaml @@ -633,18 +633,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:56.318625+00:00' source_archive: web/0757/regionaalarchiefalkmaar.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Regionaal Archief Alkmaar - source_url: http://www.regionaalarchiefalkmaar.nl/ - retrieved_on: '2025-11-29T17:16:20.097108+00:00' - xpath: /html/head/title - html_file: web/0757/regionaalarchiefalkmaar.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:56.318097+00:00' - claim_type: description_short claim_value: Regionaal Archief te Alkmaar, intergemeentelijk samenwerkingsverband in Noord-Kennemerland, westelijk West-Friesland en de Kop van Noord-Holland. @@ -713,6 +703,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:56.318497+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regionaal Archief Alkmaar diff --git a/data/nde/enriched/entries/0759_Q190804.yaml b/data/nde/enriched/entries/0759_Q190804.yaml index 94226b4a8b..de3438d02d 100644 --- a/data/nde/enriched/entries/0759_Q190804.yaml +++ b/data/nde/enriched/entries/0759_Q190804.yaml @@ -4233,7 +4233,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:56.750160+00:00' source_archive: web/0759/rijksmuseum.nl - claims_count: 8 + claims_count: 7 claims: - claim_type: org_name claim_value: Rijksmuseum, hét museum van Nederland @@ -4259,16 +4259,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:56.749524+00:00' - - claim_type: org_name - claim_value: Rijksmuseum.nl - raw_value: Rijksmuseum.nl - source_url: https://www.rijksmuseum.nl/ - retrieved_on: '2025-11-29T17:16:25.944763+00:00' - xpath: /html/head/meta[8] - html_file: web/0759/rijksmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:56.749699+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/rijksmuseum/ raw_value: https://www.instagram.com/rijksmuseum/ @@ -4319,6 +4309,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:56.750051+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksmuseum Amsterdam diff --git a/data/nde/enriched/entries/0760_Q113006081.yaml b/data/nde/enriched/entries/0760_Q113006081.yaml index 3e49053f8e..b4b037865d 100644 --- a/data/nde/enriched/entries/0760_Q113006081.yaml +++ b/data/nde/enriched/entries/0760_Q113006081.yaml @@ -852,18 +852,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:56.993052+00:00' source_archive: web/0760/muiderslot.nl - claims_count: 9 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Muiderslot - raw_value: Muiderslot - Muiderslot - source_url: http://www.muiderslot.nl/ - retrieved_on: '2025-11-29T17:16:31.806339+00:00' - xpath: /html/head/title - html_file: web/0760/muiderslot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:56.991165+00:00' - claim_type: email claim_value: info@muiderslot.nl raw_value: info@muiderslot.nl @@ -934,16 +924,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:56.992545+00:00' - - claim_type: org_name - claim_value: sinterklaas op het Muiderslot - raw_value: sinterklaas op het Muiderslot - source_url: http://www.muiderslot.nl/ - retrieved_on: '2025-11-29T17:16:31.806339+00:00' - xpath: /html/body/div[1]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div/div/div/div/div/div/div/h1 - html_file: web/0760/muiderslot.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:56.992802+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksmuseum Muiderslot diff --git a/data/nde/enriched/entries/0764_Q2170763.yaml b/data/nde/enriched/entries/0764_Q2170763.yaml index 77974ca3f9..3d83162de2 100644 --- a/data/nde/enriched/entries/0764_Q2170763.yaml +++ b/data/nde/enriched/entries/0764_Q2170763.yaml @@ -1576,7 +1576,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:44.028921+00:00' source_archive: web/0764/amsterdam.nl - claims_count: 13 + claims_count: 10 claims: - claim_type: org_name claim_value: Welkom bij Stadsarchief Amsterdam @@ -1620,36 +1620,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:25:44.027878+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/share.php?u=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - raw_value: https://www.facebook.com/share.php?u=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - source_url: https://www.amsterdam.nl/stadsarchief - retrieved_on: '' - xpath: /html/body/div[2]/div[5]/div/div/div[2]/div/div[2]/a - html_file: web/0764/amsterdam.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:44.028571+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - raw_value: https://twitter.com/intent/tweet?text=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - source_url: https://www.amsterdam.nl/stadsarchief - retrieved_on: '' - xpath: /html/body/div[2]/div[5]/div/div/div[2]/div/div[3]/a - html_file: web/0764/amsterdam.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:44.028580+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&title=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - raw_value: http://www.linkedin.com/shareArticle?mini=true&title=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - source_url: https://www.amsterdam.nl/stadsarchief - retrieved_on: '' - xpath: /html/body/div[2]/div[5]/div/div/div[2]/div/div[4]/a - html_file: web/0764/amsterdam.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:44.028586+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Stadsarchief raw_value: https://www.facebook.com/Stadsarchief @@ -1710,3 +1680,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:25:44.028707+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0771_Q50038175.yaml b/data/nde/enriched/entries/0771_Q50038175.yaml index bd040e6c8d..63fef458cb 100644 --- a/data/nde/enriched/entries/0771_Q50038175.yaml +++ b/data/nde/enriched/entries/0771_Q50038175.yaml @@ -334,18 +334,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:58.620007+00:00' source_archive: web/0771/gbwhaarlem.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Burgerweeshuis Haarlem - source_url: https://www.gbwhaarlem.nl/ - retrieved_on: '2025-11-29T17:17:32.906372+00:00' - xpath: /html/head/title - html_file: web/0771/gbwhaarlem.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:58.619555+00:00' - claim_type: org_name claim_value: Stichting Burgerweeshuis Haarlem raw_value: Stichting Burgerweeshuis Haarlem @@ -356,6 +346,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:58.619689+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Gereformeerd of Burgerweeshuis te Haarlem diff --git a/data/nde/enriched/entries/0778_Q702726.yaml b/data/nde/enriched/entries/0778_Q702726.yaml index 6398bc9ea3..b0c411b420 100644 --- a/data/nde/enriched/entries/0778_Q702726.yaml +++ b/data/nde/enriched/entries/0778_Q702726.yaml @@ -1372,7 +1372,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:59.437548+00:00' source_archive: web/0778/jck.nl - claims_count: 7 + claims_count: 4 claims: - claim_type: org_name claim_value: Joods Museum + junior @@ -1396,36 +1396,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:59.436963+00:00' - - claim_type: address - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:17:47.784542+00:00' - xpath: /html/head/script[2] - html_file: web/0778/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:46:59.437191+00:00' - - claim_type: postal_code - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:17:47.784542+00:00' - xpath: /html/head/script[2] - html_file: web/0778/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_postalCode - extraction_timestamp: '2025-12-01T10:46:59.437193+00:00' - - claim_type: city - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:17:47.784542+00:00' - xpath: /html/head/script[2] - html_file: web/0778/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_addressLocality - extraction_timestamp: '2025-12-01T10:46:59.437195+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/joodscultureelkwartier raw_value: https://www.facebook.com/joodscultureelkwartier @@ -1446,6 +1416,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:59.437379+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Joods Museum diff --git a/data/nde/enriched/entries/0790_Q122922125.yaml b/data/nde/enriched/entries/0790_Q122922125.yaml index 8390b5248e..d5be144e37 100644 --- a/data/nde/enriched/entries/0790_Q122922125.yaml +++ b/data/nde/enriched/entries/0790_Q122922125.yaml @@ -493,18 +493,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:03.289456+00:00' source_archive: web/0790/notarielestichting.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting tot Bevordering der Notariële Wetenschap - source_url: https://notarielestichting.nl/ - retrieved_on: '2025-11-29T17:22:30.410880+00:00' - xpath: /html/head/title - html_file: web/0790/notarielestichting.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:03.288434+00:00' - claim_type: org_name claim_value: Stichting tot Bevordering der Notariële Wetenschap raw_value: Stichting tot Bevordering der Notariële Wetenschap @@ -525,6 +515,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:03.289137+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting tot Bevordering der Notariële Wetenschap diff --git a/data/nde/enriched/entries/0792_unknown.yaml b/data/nde/enriched/entries/0792_unknown.yaml index c8eff99089..aaedc77e4a 100644 --- a/data/nde/enriched/entries/0792_unknown.yaml +++ b/data/nde/enriched/entries/0792_unknown.yaml @@ -254,18 +254,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:04.065655+00:00' source_archive: web/0792/traditiekamermld.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Traditiekamer Marineluchtvaartdienst - source_url: https://www.traditiekamermld.nl/ - retrieved_on: '2025-11-29T17:38:03.422459+00:00' - xpath: /html/head/title - html_file: web/0792/traditiekamermld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:04.064327+00:00' - claim_type: description_short claim_value: Stichting vrienden van de Traditiekamer Marineluchtvaartdienst De Stichting De Stichting Vrienden van de Traditiekamer Marineluchtvaartdienst @@ -306,6 +296,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:47:04.064890+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Traditiekamer Marineluchtvaartdienst diff --git a/data/nde/enriched/entries/0807_Q14856958.yaml b/data/nde/enriched/entries/0807_Q14856958.yaml index d6295220d5..004f91708f 100644 --- a/data/nde/enriched/entries/0807_Q14856958.yaml +++ b/data/nde/enriched/entries/0807_Q14856958.yaml @@ -656,18 +656,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:06.985580+00:00' source_archive: web/0807/weegschaalmuseum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Weegschaalmuseum - source_url: http://www.weegschaalmuseum.nl - retrieved_on: '2025-11-29T17:22:38.912590+00:00' - xpath: /html/head/title - html_file: web/0807/weegschaalmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:06.984745+00:00' - claim_type: description_short claim_value: Welkom In het dagelijks leven staan wij niet stil bij de herkomst van de kilo of de meter. Wat is ijken precies en wat heeft Napoleon hier mee @@ -704,6 +694,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:06.985462+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Weegschaalmuseum diff --git a/data/nde/enriched/entries/0816_Q2335767.yaml b/data/nde/enriched/entries/0816_Q2335767.yaml index be8b1df873..c6bd514678 100644 --- a/data/nde/enriched/entries/0816_Q2335767.yaml +++ b/data/nde/enriched/entries/0816_Q2335767.yaml @@ -540,18 +540,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:09.309364+00:00' source_archive: web/0816/zijpermuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Zijper Museum | Museum en Informatiepunt - source_url: https://www.zijpermuseum.nl/ - retrieved_on: '2025-11-29T17:30:12.465133+00:00' - xpath: /html/head/title - html_file: web/0816/zijpermuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:09.308057+00:00' - claim_type: description_short claim_value: 'Nieuws 1: Impressie lezing Frans Rikhof “Bescherming Rijksmuseumkunst”; Nieuws 2: Gastexpositie van Inge en Lianne. Mijn bezoek Te doen Het museum Archieven @@ -596,6 +586,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:09.309183+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zijper Museum diff --git a/data/nde/enriched/entries/0821_unknown.yaml b/data/nde/enriched/entries/0821_unknown.yaml index f7b0143252..a1dc867699 100644 --- a/data/nde/enriched/entries/0821_unknown.yaml +++ b/data/nde/enriched/entries/0821_unknown.yaml @@ -315,7 +315,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:10.829272+00:00' source_archive: web/0821/theneedforlegacy.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: THE NEED FOR LEGACY @@ -348,16 +348,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:10.829205+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.theneedforlegacy.nl/ - retrieved_on: '2025-11-29T17:28:39.531190+00:00' - xpath: /html/body/div[2]/div/div/article/div[1]/div[1]/h1 - html_file: web/0821/theneedforlegacy.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:10.829234+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: THE NEED FOR LEGACY diff --git a/data/nde/enriched/entries/0840_Q110995895.yaml b/data/nde/enriched/entries/0840_Q110995895.yaml index 09c55f2558..203c5a3c35 100644 --- a/data/nde/enriched/entries/0840_Q110995895.yaml +++ b/data/nde/enriched/entries/0840_Q110995895.yaml @@ -551,18 +551,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:14.969497+00:00' source_archive: web/0840/museum-ommen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historisch Museum Ommen - source_url: https://www.museum-ommen.nl/ - retrieved_on: '2025-11-29T17:33:31.648353+00:00' - xpath: /html/head/title - html_file: web/0840/museum-ommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:14.967166+00:00' - claim_type: org_name claim_value: Historisch Museum Ommen raw_value: Historisch Museum Ommen @@ -633,6 +623,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:14.968745+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cultuurhistorisch Centrum Ommen diff --git a/data/nde/enriched/entries/0842_Q110891772.yaml b/data/nde/enriched/entries/0842_Q110891772.yaml index 3e2455eda3..1cb107c691 100644 --- a/data/nde/enriched/entries/0842_Q110891772.yaml +++ b/data/nde/enriched/entries/0842_Q110891772.yaml @@ -439,18 +439,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:15.768313+00:00' source_archive: web/0842/ommen.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Ommen - source_url: http://www.ommen.nl/ - retrieved_on: '2025-11-29T23:43:36.620918+00:00' - xpath: /html/head/title - html_file: web/0842/ommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.767681+00:00' - claim_type: description_short claim_value: Officiële website van gemeente Ommen. Hier vindt u alle informatie, nieuwsberichten, bestuur en dienstverlening van gemeente Ommen. @@ -543,6 +533,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:15.768187+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De twintigste eeuw en de gemeente Ommen diff --git a/data/nde/enriched/entries/0843_Q85311353.yaml b/data/nde/enriched/entries/0843_Q85311353.yaml index 6a31cc6773..c40d5edf43 100644 --- a/data/nde/enriched/entries/0843_Q85311353.yaml +++ b/data/nde/enriched/entries/0843_Q85311353.yaml @@ -520,58 +520,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:15.919838+00:00' source_archive: web/0843/deventerverhaal.nl - claims_count: 13 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Deventer Verhaal - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/head/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918854+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[1]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918867+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[2]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918872+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[3]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918876+00:00' - - claim_type: org_name - claim_value: YouTube - raw_value: YouTube - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[4]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918880+00:00' - claim_type: email claim_value: annevangeuns@deventerhaal.nl raw_value: annevangeuns@deventerhaal.nl @@ -652,6 +602,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:15.919737+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Deventer Verhaal diff --git a/data/nde/enriched/entries/0847_Q110891825.yaml b/data/nde/enriched/entries/0847_Q110891825.yaml index a1e0fe9b29..bfef9e4a3c 100644 --- a/data/nde/enriched/entries/0847_Q110891825.yaml +++ b/data/nde/enriched/entries/0847_Q110891825.yaml @@ -301,18 +301,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:16.397973+00:00' source_archive: web/0847/enschede.nl - claims_count: 14 + claims_count: 13 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Enschede - source_url: http://www.enschede.nl/ - retrieved_on: '2025-11-29T23:42:18.266943+00:00' - xpath: /html/head/title - html_file: web/0847/enschede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:16.396722+00:00' - claim_type: org_name claim_value: parkeervergunning icon raw_value: parkeervergunning icon @@ -443,6 +433,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:47:16.397548+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Enschede in WO2 diff --git a/data/nde/enriched/entries/0849_unknown.yaml b/data/nde/enriched/entries/0849_unknown.yaml index 904c89c3cc..792f359987 100644 --- a/data/nde/enriched/entries/0849_unknown.yaml +++ b/data/nde/enriched/entries/0849_unknown.yaml @@ -311,18 +311,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:16.865258+00:00' source_archive: web/0849/erfgoedrijssenholten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://erfgoedrijssenholten.nl/ - retrieved_on: '2025-11-29T17:33:45.143071+00:00' - xpath: /html/head/title - html_file: web/0849/erfgoedrijssenholten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:16.864853+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -375,6 +365,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:16.865185+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Erfgoed Rijssen-Holten diff --git a/data/nde/enriched/entries/0850_Q76885143.yaml b/data/nde/enriched/entries/0850_Q76885143.yaml index 2bb8ce32de..b61fb48719 100644 --- a/data/nde/enriched/entries/0850_Q76885143.yaml +++ b/data/nde/enriched/entries/0850_Q76885143.yaml @@ -484,18 +484,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:16.961961+00:00' source_archive: web/0850/erfgoedcentrumzutphen.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Erfgoedcentrum Zutphen - source_url: https://erfgoedcentrumzutphen.nl/ - retrieved_on: '2025-11-29T17:33:46.006659+00:00' - xpath: /html/head/title - html_file: web/0850/erfgoedcentrumzutphen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:16.961256+00:00' - claim_type: description_short claim_value: 'Het Erfgoedcentrum Zutphen: historisch hart van de regio! Eén plek voor onze vier erfgoedpartners: Archeologie, Monumentenzorg, Musea Zutphen en @@ -610,6 +600,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:16.961902+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Erfgoedcentrum Zutphen diff --git a/data/nde/enriched/entries/0853_Q110891801.yaml b/data/nde/enriched/entries/0853_Q110891801.yaml index dd0954f978..978686ff20 100644 --- a/data/nde/enriched/entries/0853_Q110891801.yaml +++ b/data/nde/enriched/entries/0853_Q110891801.yaml @@ -415,7 +415,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:17.846834+00:00' source_archive: web/0853/musicsupport.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Music Support @@ -495,16 +495,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:17.846473+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.musicsupport.nl/ - retrieved_on: '2025-11-29T23:43:55.558526+00:00' - xpath: /html/body/div[1]/div/section/div/div/div/div[1]/h1 - html_file: web/0853/musicsupport.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:17.846556+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Flip's' Music Heino diff --git a/data/nde/enriched/entries/0856_Q110891796.yaml b/data/nde/enriched/entries/0856_Q110891796.yaml index bbbc73632e..f1969adc25 100644 --- a/data/nde/enriched/entries/0856_Q110891796.yaml +++ b/data/nde/enriched/entries/0856_Q110891796.yaml @@ -441,18 +441,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:18.487733+00:00' source_archive: web/0856/borne.nl - claims_count: 23 + claims_count: 22 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Borne - source_url: https://www.borne.nl/gemeentearchief-borne - retrieved_on: '2025-11-29T17:34:14.799159+00:00' - xpath: /html/head/title - html_file: web/0856/borne.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:18.487122+00:00' - claim_type: org_name claim_value: externe-link-icoon raw_value: externe-link-icoon @@ -673,6 +663,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:18.487614+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Borne diff --git a/data/nde/enriched/entries/0857_Q81181239.yaml b/data/nde/enriched/entries/0857_Q81181239.yaml index 5af3f2b99c..fde83f3631 100644 --- a/data/nde/enriched/entries/0857_Q81181239.yaml +++ b/data/nde/enriched/entries/0857_Q81181239.yaml @@ -449,18 +449,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:18.616848+00:00' source_archive: web/0857/sabinfo.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Sabinfo.nl - raw_value: Sabinfo.nl - Een overzicht van de mooiste links voor jou - source_url: http://www.sabinfo.nl - retrieved_on: '2025-11-29T17:34:16.512228+00:00' - xpath: /html/head/title - html_file: web/0857/sabinfo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:18.615691+00:00' - claim_type: description_short claim_value: Op zoek naar een bron voor informatie? Hier vind je de beste websites. Van technologie tot lifestyle, hier vind je de links die je nodig hebt voor @@ -475,6 +465,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:18.615812+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsarchief Deventer diff --git a/data/nde/enriched/entries/0860_unknown.yaml b/data/nde/enriched/entries/0860_unknown.yaml index 40f4269bc0..f3f0c7d1c4 100644 --- a/data/nde/enriched/entries/0860_unknown.yaml +++ b/data/nde/enriched/entries/0860_unknown.yaml @@ -324,7 +324,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:19.242813+00:00' source_archive: web/0860/olst-wijhe.nl - claims_count: 11 + claims_count: 10 claims: - claim_type: org_name claim_value: Inwoners @@ -428,16 +428,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:19.242685+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.olst-wijhe.nl%2Finwoners&t=Inwoners - raw_value: https://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.olst-wijhe.nl%2Finwoners&t=Inwoners - source_url: https://www.olst-wijhe.nl/gemeentelijkarchief - retrieved_on: '2025-11-29T17:34:22.099033+00:00' - xpath: /html/body/div[5]/div/div/a[2] - html_file: web/0860/olst-wijhe.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.242689+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Olst-Wijhe diff --git a/data/nde/enriched/entries/0862_Q121224964.yaml b/data/nde/enriched/entries/0862_Q121224964.yaml index f2069f3d59..0b3e9f61ed 100644 --- a/data/nde/enriched/entries/0862_Q121224964.yaml +++ b/data/nde/enriched/entries/0862_Q121224964.yaml @@ -423,18 +423,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:19.576760+00:00' source_archive: web/0862/staphorst.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Staphorst - source_url: https://www.staphorst.nl/gemeentearchief - retrieved_on: '2025-11-29T17:34:24.308444+00:00' - xpath: /html/head/title - html_file: web/0862/staphorst.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:19.575532+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -537,6 +527,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:19.576323+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Staphorst diff --git a/data/nde/enriched/entries/0866_Q121225125.yaml b/data/nde/enriched/entries/0866_Q121225125.yaml index ce46644ddc..4b225d31f8 100644 --- a/data/nde/enriched/entries/0866_Q121225125.yaml +++ b/data/nde/enriched/entries/0866_Q121225125.yaml @@ -429,7 +429,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:19.940502+00:00' source_archive: web/0866/hengelo.nl - claims_count: 11 + claims_count: 8 claims: - claim_type: org_name claim_value: Inwoners @@ -461,38 +461,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:47:19.940085+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://www.hengelo.nl/Inwoners - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://www.hengelo.nl/Inwoners - source_url: https://www.hengelo.nl/ - retrieved_on: '2025-11-29T23:43:56.464319+00:00' - xpath: /html/body/div[2]/div[4]/div/div[1]/a - html_file: web/0866/hengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.940208+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https://www.hengelo.nl/Inwoners - Inwoners - raw_value: https://twitter.com/intent/tweet?text=https://www.hengelo.nl/Inwoners - Inwoners - source_url: https://www.hengelo.nl/ - retrieved_on: '2025-11-29T23:43:56.464319+00:00' - xpath: /html/body/div[2]/div[4]/div/div[3]/a - html_file: web/0866/hengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.940214+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https://www.hengelo.nl/Inwoners&title=Inwoners - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https://www.hengelo.nl/Inwoners&title=Inwoners - source_url: https://www.hengelo.nl/ - retrieved_on: '2025-11-29T23:43:56.464319+00:00' - xpath: /html/body/div[2]/div[4]/div/div[5]/a - html_file: web/0866/hengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.940219+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/gemeentehengelo raw_value: https://www.facebook.com/gemeentehengelo @@ -543,6 +511,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:19.940332+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Hengelo diff --git a/data/nde/enriched/entries/0870_Q121224972.yaml b/data/nde/enriched/entries/0870_Q121224972.yaml index 3a2b27b42e..102cfea4f8 100644 --- a/data/nde/enriched/entries/0870_Q121224972.yaml +++ b/data/nde/enriched/entries/0870_Q121224972.yaml @@ -414,18 +414,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:20.558263+00:00' source_archive: web/0870/wierden.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Wierden - source_url: https://www.wierden.nl/gemeentearchief - retrieved_on: '2025-11-29T17:34:38.054608+00:00' - xpath: /html/head/title - html_file: web/0870/wierden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:20.557408+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -498,6 +488,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:20.558049+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Wierden diff --git a/data/nde/enriched/entries/0873_Q110891812.yaml b/data/nde/enriched/entries/0873_Q110891812.yaml index 62563f0bf0..b2268727cb 100644 --- a/data/nde/enriched/entries/0873_Q110891812.yaml +++ b/data/nde/enriched/entries/0873_Q110891812.yaml @@ -343,18 +343,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:21.070573+00:00' source_archive: web/0873/dorpshuislutten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Dorpshuis Lutten - source_url: https://dorpshuislutten.nl/ - retrieved_on: '2025-11-29T23:44:14.717591+00:00' - xpath: /html/head/title - html_file: web/0873/dorpshuislutten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:21.069863+00:00' - claim_type: description_short claim_value: Welkom op de website van dorpshuis Lutten. Een multifuncioneel centrum in Lutten. Er zijn zaaltjes te huur en diverse werkgroepen hebben hier hun onderkomen. @@ -407,6 +397,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:21.070520+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gouwe Buurt Lutten diff --git a/data/nde/enriched/entries/0883_Q13726962.yaml b/data/nde/enriched/entries/0883_Q13726962.yaml index b5f864ec23..1bc51ed667 100644 --- a/data/nde/enriched/entries/0883_Q13726962.yaml +++ b/data/nde/enriched/entries/0883_Q13726962.yaml @@ -591,18 +591,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:22.693037+00:00' source_archive: web/0883/museumhengelo.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Hengelo - source_url: http://www.museumhengelo.nl - retrieved_on: '2025-11-29T17:34:45.670564+00:00' - xpath: /html/head/title - html_file: web/0883/museumhengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:22.692434+00:00' - claim_type: org_name claim_value: Museum Hengelo raw_value: Museum Hengelo @@ -643,6 +633,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:22.692970+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Museum Hengelo diff --git a/data/nde/enriched/entries/0890_Q110891816.yaml b/data/nde/enriched/entries/0890_Q110891816.yaml index 2ad039157e..552cabe40f 100644 --- a/data/nde/enriched/entries/0890_Q110891816.yaml +++ b/data/nde/enriched/entries/0890_Q110891816.yaml @@ -420,18 +420,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:24.323168+00:00' source_archive: web/0890/historischekringhaaksbergen.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historische Kring Haaksbergen - source_url: http://www.historischekringhaaksbergen.nl/ - retrieved_on: '2025-11-29T23:50:41.734829+00:00' - xpath: /html/head/title - html_file: web/0890/historischekringhaaksbergen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:24.321138+00:00' - claim_type: description_short claim_value: Welkom bij de Historische Kring Haaksbergen. Wij organiseren regelmatig leuke en leerzame activiteiten. Benieuwd wat er binnenkort te doen is? @@ -484,6 +474,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:24.322782+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Haaksbergen diff --git a/data/nde/enriched/entries/0893_unknown.yaml b/data/nde/enriched/entries/0893_unknown.yaml index 07dfbd3286..e09d775274 100644 --- a/data/nde/enriched/entries/0893_unknown.yaml +++ b/data/nde/enriched/entries/0893_unknown.yaml @@ -313,19 +313,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:25.523933+00:00' source_archive: web/0893/weblog.oudommen.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: OudOmmen.nl - raw_value: OudOmmen.nl | Webarchief voor de gemeente Ommen, de plek waar de geschiedenis - van de regio Ommen samen komt. - source_url: https://weblog.oudommen.nl - retrieved_on: '2025-11-29T17:38:16.781043+00:00' - xpath: /html/head/title - html_file: web/0893/weblog.oudommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:25.522914+00:00' - claim_type: description_short claim_value: Webarchief voor de gemeente Ommen, de plek waar de geschiedenis van de regio Ommen samen komt. @@ -368,6 +357,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:25.523735+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum-Ommen diff --git a/data/nde/enriched/entries/0900_Q110891747.yaml b/data/nde/enriched/entries/0900_Q110891747.yaml index b4dea68b64..4c9e95e642 100644 --- a/data/nde/enriched/entries/0900_Q110891747.yaml +++ b/data/nde/enriched/entries/0900_Q110891747.yaml @@ -551,18 +551,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:27.573292+00:00' source_archive: web/0900/olstererfgoed.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME | Olstererfgoed - source_url: http://www.olstererfgoed.nl/ - retrieved_on: '2025-11-29T23:51:12.240979+00:00' - xpath: /html/head/title - html_file: web/0900/olstererfgoed.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:27.572558+00:00' - claim_type: org_name claim_value: Olstererfgoed raw_value: Olstererfgoed @@ -587,6 +577,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:27.573025+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging 't Olster Erfgoed diff --git a/data/nde/enriched/entries/0912_Q110891756.yaml b/data/nde/enriched/entries/0912_Q110891756.yaml index 33538f2176..e6977d5587 100644 --- a/data/nde/enriched/entries/0912_Q110891756.yaml +++ b/data/nde/enriched/entries/0912_Q110891756.yaml @@ -374,7 +374,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:30.479723+00:00' source_archive: web/0912/historiezwartsluis.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Historische Vereniging Zwartsluis @@ -398,16 +398,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:30.479366+00:00' - - claim_type: org_name - claim_value: HistorischeVerenigingZwartsluis.nl - raw_value: HistorischeVerenigingZwartsluis.nl - source_url: https://historiezwartsluis.nl/ - retrieved_on: '2025-11-29T23:51:24.954993+00:00' - xpath: /html/head/meta[10] - html_file: web/0912/historiezwartsluis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:47:30.479482+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/historiezwartsluis/ raw_value: https://www.facebook.com/historiezwartsluis/ @@ -418,6 +408,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:30.479634+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging Zwartsluis diff --git a/data/nde/enriched/entries/0914_unknown.yaml b/data/nde/enriched/entries/0914_unknown.yaml index 49e814e1ea..59701c583e 100644 --- a/data/nde/enriched/entries/0914_unknown.yaml +++ b/data/nde/enriched/entries/0914_unknown.yaml @@ -331,18 +331,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:31.029924+00:00' source_archive: web/0914/proxy.archieven.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Archieven.nl - raw_value: Archieven.nl - 0964 Huisarchief Weldam, 1438-1920 (Westfries Archief) - source_url: https://proxy.archieven.nl/0/F8BA43C796AD4BEC97A456978826D3AD - retrieved_on: '2025-11-29T17:38:07.432449+00:00' - xpath: /html/head/title - html_file: web/0914/proxy.archieven.nl/pages/0_F8BA43C796AD4BEC97A456978826D3AD.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:31.028828+00:00' - claim_type: org_name claim_value: organisatie_link-svg raw_value: organisatie_link-svg @@ -385,6 +375,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:31.029781+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Weldam diff --git a/data/nde/enriched/entries/0916_Q110891804.yaml b/data/nde/enriched/entries/0916_Q110891804.yaml index c634ad6245..6953c5cf13 100644 --- a/data/nde/enriched/entries/0916_Q110891804.yaml +++ b/data/nde/enriched/entries/0916_Q110891804.yaml @@ -390,18 +390,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:31.455795+00:00' source_archive: web/0916/kempermeubelproductie.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Kemper meubelproductie - source_url: http://www.kempermeubelproductie.nl/ - retrieved_on: '2025-11-29T23:51:57.623489+00:00' - xpath: /html/head/title - html_file: web/0916/kempermeubelproductie.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:31.454407+00:00' - claim_type: org_name claim_value: Kemper meubelproductie raw_value: Kemper meubelproductie @@ -412,6 +402,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:47:31.454925+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Kemper Alferink Collectie diff --git a/data/nde/enriched/entries/0919_unknown.yaml b/data/nde/enriched/entries/0919_unknown.yaml index f29c605615..50f8f6103a 100644 --- a/data/nde/enriched/entries/0919_unknown.yaml +++ b/data/nde/enriched/entries/0919_unknown.yaml @@ -305,7 +305,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:32.562526+00:00' source_archive: web/0919/landgoedereninoverijssel.nl - claims_count: 5 + claims_count: 3 claims: - claim_type: org_name claim_value: Voorpagina @@ -329,26 +329,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:32.562139+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=http://www.landgoedereninoverijssel.nl - raw_value: http://www.facebook.com/sharer.php?u=http://www.landgoedereninoverijssel.nl - source_url: https://www.landgoedereninoverijssel.nl - retrieved_on: '2025-11-29T17:39:03.019799+00:00' - xpath: /html/body/div/header/div/span[2]/a[1] - html_file: web/0919/landgoedereninoverijssel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:32.562398+00:00' - - claim_type: social_twitter - claim_value: http://twitter.com/share?text=Landgoederen%20in%20Overijssel&url=http://www.landgoedereninoverijssel.nl - raw_value: http://twitter.com/share?text=Landgoederen%20in%20Overijssel&url=http://www.landgoedereninoverijssel.nl - source_url: https://www.landgoedereninoverijssel.nl - retrieved_on: '2025-11-29T17:39:03.019799+00:00' - xpath: /html/body/div/header/div/span[2]/a[2] - html_file: web/0919/landgoedereninoverijssel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:32.562402+00:00' - claim_type: org_name claim_value: De kroonjuwelen van de provincie Overijssel in het zonnetje raw_value: De kroonjuwelen van de provincie Overijssel in het zonnetje @@ -359,6 +339,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:32.562476+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Landgoederen in Overijssel diff --git a/data/nde/enriched/entries/0921_Q134993513.yaml b/data/nde/enriched/entries/0921_Q134993513.yaml index e7dd9a3757..e18fc154b7 100644 --- a/data/nde/enriched/entries/0921_Q134993513.yaml +++ b/data/nde/enriched/entries/0921_Q134993513.yaml @@ -391,18 +391,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:33.344821+00:00' source_archive: web/0921/veerman.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Machinefabriek Veerman - source_url: https://www.veerman.nl/ - retrieved_on: '2025-11-29T23:52:04.759447+00:00' - xpath: /html/head/title[1] - html_file: web/0921/veerman.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:33.344205+00:00' - claim_type: org_name claim_value: Home | Machinefabriek Veerman raw_value: "Home | Machinefabriek Veerman\n \n \n \ @@ -474,6 +464,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:33.344741+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Machinefabriek G.R. Veerman B.V diff --git a/data/nde/enriched/entries/0925_Q125421055.yaml b/data/nde/enriched/entries/0925_Q125421055.yaml index 26e95e6be9..011cc1192e 100644 --- a/data/nde/enriched/entries/0925_Q125421055.yaml +++ b/data/nde/enriched/entries/0925_Q125421055.yaml @@ -543,18 +543,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:34.585478+00:00' source_archive: web/0925/anno.nl - claims_count: 9 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - ANNO Stadsmuseum Zwolle - source_url: https://anno.nl/ - retrieved_on: '2025-11-29T17:39:22.323146+00:00' - xpath: /html/head/title - html_file: web/0925/anno.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:34.584948+00:00' - claim_type: description_short claim_value: ANNO vertelt de verhalen van Zwolle. Het is een unieke plek in Nederland waar museum, archeologie, bouwhistorie, monumenten en archief samen onder één @@ -625,26 +615,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:34.585355+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fanno.nl%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fanno.nl%2F - source_url: https://anno.nl/ - retrieved_on: '2025-11-29T17:39:22.323146+00:00' - xpath: /html/body/section[2]/div/section[2]/div/div/div[3]/div/div/div[2]/a[1] - html_file: web/0925/anno.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:34.585364+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fanno.nl%2F&title=ANNO+Stadsmuseum - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fanno.nl%2F&title=ANNO+Stadsmuseum - source_url: https://anno.nl/ - retrieved_on: '2025-11-29T17:39:22.323146+00:00' - xpath: /html/body/section[2]/div/section[2]/div/div/div[3]/div/div/div[2]/a[2] - html_file: web/0925/anno.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:34.585370+00:00' - claim_type: org_name claim_value: ANNO Stadsmuseum raw_value: ANNO Stadsmuseum @@ -655,6 +625,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:34.585418+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: ANNO Stadsmuseum Zwolle diff --git a/data/nde/enriched/entries/0934_Q110891759.yaml b/data/nde/enriched/entries/0934_Q110891759.yaml index ae645719ed..10c85a27b3 100644 --- a/data/nde/enriched/entries/0934_Q110891759.yaml +++ b/data/nde/enriched/entries/0934_Q110891759.yaml @@ -294,18 +294,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:37.224668+00:00' source_archive: web/0934/excelsior-westenholte.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Muziekvereniging Excelsior Westenholte - source_url: http://excelsior-westenholte.nl/ - retrieved_on: '2025-11-29T23:59:44.246963+00:00' - xpath: /html/head/title - html_file: web/0934/excelsior-westenholte.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:37.224109+00:00' - claim_type: org_name claim_value: Muziekvereniging Excelsior Westenholte raw_value: Muziekvereniging Excelsior Westenholte @@ -416,6 +406,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:37.224590+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Muziekvereniging Excelsior Westenholte diff --git a/data/nde/enriched/entries/0935_Q2755329.yaml b/data/nde/enriched/entries/0935_Q2755329.yaml index 8ba7203c86..bb1062f3d6 100644 --- a/data/nde/enriched/entries/0935_Q2755329.yaml +++ b/data/nde/enriched/entries/0935_Q2755329.yaml @@ -570,18 +570,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:37.611822+00:00' source_archive: web/0935/tinnenfigurenmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Tinnen Figuren Museum - source_url: http://www.tinnenfigurenmuseum.nl - retrieved_on: '2025-11-29T17:39:44.688497+00:00' - xpath: /html/head/title - html_file: web/0935/tinnenfigurenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:37.610690+00:00' - claim_type: org_name claim_value: Nationaal Tinnen Figuren Museum raw_value: Nationaal Tinnen Figuren Museum @@ -622,6 +612,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:37.611610+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Tinnen Figuren Museum diff --git a/data/nde/enriched/entries/0939_Q56460838.yaml b/data/nde/enriched/entries/0939_Q56460838.yaml index 6c69998bb1..edc3fd3f59 100644 --- a/data/nde/enriched/entries/0939_Q56460838.yaml +++ b/data/nde/enriched/entries/0939_Q56460838.yaml @@ -607,18 +607,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:38.245281+00:00' source_archive: web/0939/openluchtmuseumootmarsum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Openlucht Museum Ootmarsum - source_url: http://www.openluchtmuseumootmarsum.nl - retrieved_on: '2025-11-29T17:39:22.567766+00:00' - xpath: /html/head/title - html_file: web/0939/openluchtmuseumootmarsum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:38.244897+00:00' - claim_type: email claim_value: info@openluchtmuseumootmarsum.nl raw_value: info@openluchtmuseumootmarsum.nl @@ -649,6 +639,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:38.245204+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Openluchtmuseum Ootmarsum diff --git a/data/nde/enriched/entries/0943_unknown.yaml b/data/nde/enriched/entries/0943_unknown.yaml index 3ee63d1a1f..155cf5ca55 100644 --- a/data/nde/enriched/entries/0943_unknown.yaml +++ b/data/nde/enriched/entries/0943_unknown.yaml @@ -254,7 +254,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:39.132035+00:00' source_archive: web/0943/politie.nl - claims_count: 14 + claims_count: 13 claims: - claim_type: org_name claim_value: Zwolle-Koggelaan @@ -336,16 +336,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:39.131914+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.politie.nl%2Fmijn-buurt%2Fpolitiebureaus%2F02%2Fzwolle-koggelaan.html - raw_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.politie.nl%2Fmijn-buurt%2Fpolitiebureaus%2F02%2Fzwolle-koggelaan.html - source_url: https://www.politie.nl/mijn-buurt/politiebureaus/02/zwolle-koggelaan.html - retrieved_on: '2025-11-29T17:39:46.372511+00:00' - xpath: /html/body/div[3]/main/section/div/ul/li[2]/a - html_file: web/0943/politie.nl/pages/mijn-buurt_politiebureaus_02_zwolle-koggelaan.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:39.131919+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/politie/ raw_value: https://www.facebook.com/politie/ @@ -396,6 +386,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:39.131948+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Politie IJsselland diff --git a/data/nde/enriched/entries/0948_Q56460883.yaml b/data/nde/enriched/entries/0948_Q56460883.yaml index c8006a8319..89e0bcd222 100644 --- a/data/nde/enriched/entries/0948_Q56460883.yaml +++ b/data/nde/enriched/entries/0948_Q56460883.yaml @@ -594,18 +594,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:40.749367+00:00' source_archive: web/0948/rijssensmuseum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Rijssens Museum - source_url: http://www.rijssensmuseum.nl - retrieved_on: '2025-11-29T17:41:51.197216+00:00' - xpath: /html/head/title - html_file: web/0948/rijssensmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:40.748254+00:00' - claim_type: description_short claim_value: Wij heten u van harte welkom op de website van het Rijssens Museum. Het Rijssens Museum is een stadsmuseum. Het toont de opmerkelijke en rijke geschiedenis @@ -662,6 +652,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:40.749237+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijssens Museum diff --git a/data/nde/enriched/entries/0949_Q56461263.yaml b/data/nde/enriched/entries/0949_Q56461263.yaml index d5ba8b5055..1db4b9fb3c 100644 --- a/data/nde/enriched/entries/0949_Q56461263.yaml +++ b/data/nde/enriched/entries/0949_Q56461263.yaml @@ -572,7 +572,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:41.149643+00:00' source_archive: web/0949/schoonewelle.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Museum voor Natuur, Ambacht en Exposities @@ -598,16 +598,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:41.148802+00:00' - - claim_type: org_name - claim_value: schoonewelle.nl - raw_value: schoonewelle.nl - source_url: http://www.schoonewelle.nl - retrieved_on: '2025-11-29T17:40:19.726937+00:00' - xpath: /html/head/meta[13] - html_file: web/0949/schoonewelle.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:47:41.148929+00:00' - claim_type: email claim_value: schoonewelle@schoonewelle.nl raw_value: schoonewelle@schoonewelle.nl @@ -638,6 +628,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:41.149570+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Schoonewelle Museum voor Natuur en Ambacht diff --git a/data/nde/enriched/entries/0955_Q17611858.yaml b/data/nde/enriched/entries/0955_Q17611858.yaml index 176636fba4..aeaf4bbe8e 100644 --- a/data/nde/enriched/entries/0955_Q17611858.yaml +++ b/data/nde/enriched/entries/0955_Q17611858.yaml @@ -573,7 +573,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:42.673956+00:00' source_archive: web/0955/stadsmuseumalmelo.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Stedelijk Museum Almelo @@ -637,16 +637,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:42.673543+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.stadsmuseumalmelo.nl - retrieved_on: '2025-11-29T17:40:23.068502+00:00' - xpath: /html/body/main/div[1]/h1 - html_file: web/0955/stadsmuseumalmelo.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:42.673610+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stedelijk Museum Almelo diff --git a/data/nde/enriched/entries/0960_unknown.yaml b/data/nde/enriched/entries/0960_unknown.yaml index 7fcf9a797f..b0481a4d36 100644 --- a/data/nde/enriched/entries/0960_unknown.yaml +++ b/data/nde/enriched/entries/0960_unknown.yaml @@ -264,18 +264,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:43.962831+00:00' source_archive: web/0960/ijssellinie.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.ijssellinie.nl/ - retrieved_on: '2025-11-29T17:40:49.079494+00:00' - xpath: /html/head/title - html_file: web/0960/ijssellinie.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:43.961788+00:00' - claim_type: description_short claim_value: