From 8ebca2f84597c55f828a1b3970f9c656ae9b3b04 Mon Sep 17 00:00:00 2001 From: kempersc Date: Tue, 2 Dec 2025 00:00:45 +0100 Subject: [PATCH] add pid --- data/nde/enriched/entries/0005_Q81181263.yaml | 20 +- data/nde/enriched/entries/0006_Q81181245.yaml | 20 +- data/nde/enriched/entries/0007_Q81181227.yaml | 20 +- data/nde/enriched/entries/0011_Q81181243.yaml | 20 +- data/nde/enriched/entries/0012_Q81181321.yaml | 20 +- data/nde/enriched/entries/0014_Q81181341.yaml | 40 +-- data/nde/enriched/entries/0015_Q81181229.yaml | 20 +- data/nde/enriched/entries/0016_Q81181377.yaml | 20 +- .../0017_harmonium_museum_nederland.yaml | 20 +- .../entries/0020_roel_van_der_sleen.yaml | 20 +- data/nde/enriched/entries/0022_Q1911968.yaml | 20 +- data/nde/enriched/entries/0023_Q22006174.yaml | 20 +- data/nde/enriched/entries/0025_Q56461228.yaml | 20 +- data/nde/enriched/entries/0027_Q19832258.yaml | 20 +- .../nde/enriched/entries/0028_aold_daoln.yaml | 20 +- .../nde/enriched/entries/0029_Q110995917.yaml | 20 +- data/nde/enriched/entries/0037_Q1345886.yaml | 20 +- .../nde/enriched/entries/0039_Q108369683.yaml | 20 +- .../0041_asser_historische_vereniging.yaml | 20 +- .../0042_asser_historische_vereniging_2.yaml | 20 +- .../0043_historische_vereniging_gasselte.yaml | 20 +- data/nde/enriched/entries/0047_Q30277559.yaml | 20 +- data/nde/enriched/entries/0069_Q13137174.yaml | 20 +- data/nde/enriched/entries/0070_Q17517652.yaml | 30 +- data/nde/enriched/entries/0072_Q3457274.yaml | 20 +- data/nde/enriched/entries/0080_Q1942351.yaml | 20 +- data/nde/enriched/entries/0082_Q2201887.yaml | 30 +- data/nde/enriched/entries/0086_Q13137164.yaml | 20 +- data/nde/enriched/entries/0093_Q81181219.yaml | 20 +- data/nde/enriched/entries/0095_Q81181251.yaml | 40 +-- data/nde/enriched/entries/0099_Q81181286.yaml | 50 +-- data/nde/enriched/entries/0101_Q81181296.yaml | 40 +-- .../nde/enriched/entries/0102_Q121225319.yaml | 40 +-- data/nde/enriched/entries/0103_Q81181358.yaml | 20 +- data/nde/enriched/entries/0104_Q2754844.yaml | 20 +- data/nde/enriched/entries/0107_Q81181235.yaml | 20 +- data/nde/enriched/entries/0109_Q81181393.yaml | 20 +- data/nde/enriched/entries/0110_Q81181206.yaml | 20 +- data/nde/enriched/entries/0111_Q81181373.yaml | 20 +- data/nde/enriched/entries/0115_Q13136222.yaml | 20 +- data/nde/enriched/entries/0122_Q12013196.yaml | 20 +- ..._museum_oold_ark_korenmolen_de_weyert.yaml | 20 +- data/nde/enriched/entries/0125_Q13137340.yaml | 20 +- data/nde/enriched/entries/0126_Q13137168.yaml | 20 +- data/nde/enriched/entries/0131_Q81181312.yaml | 20 +- .../entries/0139_de_hollandse_cirkel.yaml | 20 +- data/nde/enriched/entries/0140_Q17595943.yaml | 20 +- data/nde/enriched/entries/0143_Q1127079.yaml | 20 +- data/nde/enriched/entries/0144_Q2710899.yaml | 20 +- data/nde/enriched/entries/0145_Q2654815.yaml | 30 +- data/nde/enriched/entries/0146_Q1663974.yaml | 20 +- data/nde/enriched/entries/0149_Q98894593.yaml | 20 +- data/nde/enriched/entries/0155_Q13636575.yaml | 20 +- .../nde/enriched/entries/0157_Q104033126.yaml | 20 +- data/nde/enriched/entries/0158_Q2330735.yaml | 20 +- data/nde/enriched/entries/0159_Q4688292.yaml | 20 +- data/nde/enriched/entries/0165_Q81181326.yaml | 20 +- data/nde/enriched/entries/0169_Q2332962.yaml | 20 +- data/nde/enriched/entries/0170_Q2365901.yaml | 30 +- data/nde/enriched/entries/0172_Q98907725.yaml | 20 +- ...0174_heiligenbeeldenmuseum_kranenburg.yaml | 20 +- data/nde/enriched/entries/0177_Q7476442.yaml | 40 +-- data/nde/enriched/entries/0178_Q98904482.yaml | 20 +- data/nde/enriched/entries/0184_Q38677497.yaml | 20 +- data/nde/enriched/entries/0186_Q98894809.yaml | 20 +- data/nde/enriched/entries/0201_Q98895215.yaml | 20 +- data/nde/enriched/entries/0202_Q61930724.yaml | 20 +- .../entries/0203_stichting_korpora.yaml | 20 +- data/nde/enriched/entries/0213_Q54957003.yaml | 30 +- .../nde/enriched/entries/0219_Q109382770.yaml | 20 +- data/nde/enriched/entries/0220_Q98961921.yaml | 128 +------- data/nde/enriched/entries/0225_Q98908556.yaml | 30 +- data/nde/enriched/entries/0226_Q98893885.yaml | 20 +- data/nde/enriched/entries/0230_Q18774274.yaml | 40 +-- data/nde/enriched/entries/0234_Q2710899.yaml | 20 +- data/nde/enriched/entries/0245_Q98907726.yaml | 20 +- data/nde/enriched/entries/0246_Q98895220.yaml | 20 +- data/nde/enriched/entries/0251_Q26203717.yaml | 24 +- data/nde/enriched/entries/0252_Q81181406.yaml | 20 +- data/nde/enriched/entries/0258_Q56459713.yaml | 20 +- .../nde/enriched/entries/0262_Q110995923.yaml | 20 +- data/nde/enriched/entries/0263_Q98904476.yaml | 20 +- .../0273_streekarchief_bommelerwaard.yaml | 24 +- .../nde/enriched/entries/0276_Q111190981.yaml | 20 +- data/nde/enriched/entries/0280_Q56459657.yaml | 20 +- ...vereniging_oudheidkunde_lichtenvoorde.yaml | 20 +- data/nde/enriched/entries/0285_Q2470853.yaml | 40 +-- data/nde/enriched/entries/0286_Q484899.yaml | 20 +- .../nde/enriched/entries/0295_Q121900753.yaml | 20 +- data/nde/enriched/entries/0309_Q81181191.yaml | 60 +--- data/nde/enriched/entries/0311_Q81181383.yaml | 60 +--- data/nde/enriched/entries/0313_Q81181241.yaml | 60 +--- data/nde/enriched/entries/0315_Q81181260.yaml | 20 +- data/nde/enriched/entries/0316_Q81181282.yaml | 20 +- data/nde/enriched/entries/0317_Q81181273.yaml | 50 +-- data/nde/enriched/entries/0318_Q81181307.yaml | 20 +- data/nde/enriched/entries/0319_Q81181310.yaml | 60 +--- data/nde/enriched/entries/0320_Q81181318.yaml | 20 +- data/nde/enriched/entries/0321_Q81181324.yaml | 50 +-- data/nde/enriched/entries/0322_Q81181387.yaml | 20 +- data/nde/enriched/entries/0323_Q81181329.yaml | 30 +- data/nde/enriched/entries/0324_Q81181354.yaml | 50 +-- data/nde/enriched/entries/0326_Q81181363.yaml | 20 +- data/nde/enriched/entries/0327_Q81181368.yaml | 30 +- data/nde/enriched/entries/0328_Q81181347.yaml | 60 +--- .../nde/enriched/entries/0329_Q121292045.yaml | 60 +--- data/nde/enriched/entries/0331_Q81181399.yaml | 20 +- data/nde/enriched/entries/0332_Q23987486.yaml | 50 +-- .../nde/enriched/entries/0333_Q114079325.yaml | 20 +- data/nde/enriched/entries/0334_Q2190733.yaml | 20 +- data/nde/enriched/entries/0337_Q56460901.yaml | 20 +- .../nde/enriched/entries/0345_Q110981303.yaml | 20 +- .../0346_stichting_historie_stedum.yaml | 20 +- data/nde/enriched/entries/0348_Q15879552.yaml | 20 +- data/nde/enriched/entries/0350_Q81181257.yaml | 20 +- data/nde/enriched/entries/0351_Q81181371.yaml | 20 +- data/nde/enriched/entries/0356_Q9777.yaml | 60 +--- data/nde/enriched/entries/0364_Q59962272.yaml | 20 +- .../nde/enriched/entries/0366_Q121224886.yaml | 20 +- .../nde/enriched/entries/0369_Q135412874.yaml | 20 +- data/nde/enriched/entries/0370_Q13442809.yaml | 70 +--- .../0371_historisch_centrum_limburg.yaml | 20 +- .../nde/enriched/entries/0372_Q111080967.yaml | 20 +- data/nde/enriched/entries/0375_Q81181279.yaml | 20 +- data/nde/enriched/entries/0378_Q81181301.yaml | 20 +- .../nde/enriched/entries/0379_Q111190984.yaml | 20 +- .../nde/enriched/entries/0381_Q107341629.yaml | 20 +- .../nde/enriched/entries/0384_Q127703473.yaml | 20 +- .../entries/0389_heemkunde_heibloem.yaml | 20 +- .../entries/0390_heemkunde_margraten.yaml | 32 +- .../nde/enriched/entries/0391_Q111081387.yaml | 20 +- .../0393_heemkundekring_sankt_tolbert.yaml | 20 +- .../0399_heemkundevereniging_roggel.yaml | 20 +- .../nde/enriched/entries/0402_Q117843367.yaml | 20 +- data/nde/enriched/entries/0403_Q21004453.yaml | 20 +- .../nde/enriched/entries/0414_Q111081369.yaml | 20 +- data/nde/enriched/entries/0417_Q2126143.yaml | 20 +- .../nde/enriched/entries/0420_Q110053532.yaml | 20 +- data/nde/enriched/entries/0427_Q2202460.yaml | 20 +- data/nde/enriched/entries/0430_unknown.yaml | 20 +- data/nde/enriched/entries/0432_Q56460988.yaml | 40 +-- data/nde/enriched/entries/0441_Q892727.yaml | 30 +- data/nde/enriched/entries/0442_unknown.yaml | 20 +- data/nde/enriched/entries/0453_Q59962312.yaml | 20 +- .../nde/enriched/entries/0457_Q110907483.yaml | 20 +- data/nde/enriched/entries/0461_Q1278103.yaml | 20 +- data/nde/enriched/entries/0468_unknown.yaml | 21 +- data/nde/enriched/entries/0469_Q2173323.yaml | 20 +- data/nde/enriched/entries/0471_unknown.yaml | 22 +- data/nde/enriched/entries/0472_Q1969635.yaml | 20 +- .../nde/enriched/entries/0474_Q110907346.yaml | 20 +- .../nde/enriched/entries/0476_Q110907502.yaml | 20 +- .../nde/enriched/entries/0481_Q110907480.yaml | 20 +- .../nde/enriched/entries/0484_Q110907488.yaml | 20 +- data/nde/enriched/entries/0486_unknown.yaml | 20 +- .../nde/enriched/entries/0496_Q110907501.yaml | 20 +- .../nde/enriched/entries/0498_Q110907495.yaml | 20 +- .../nde/enriched/entries/0502_Q110907460.yaml | 60 +--- .../nde/enriched/entries/0515_Q110907441.yaml | 20 +- .../nde/enriched/entries/0521_Q110907442.yaml | 40 +-- .../nde/enriched/entries/0522_Q110907510.yaml | 40 +-- .../nde/enriched/entries/0526_Q110907473.yaml | 20 +- .../nde/enriched/entries/0527_Q110907499.yaml | 50 +-- .../nde/enriched/entries/0530_Q110907466.yaml | 20 +- .../nde/enriched/entries/0534_Q110908866.yaml | 20 +- .../nde/enriched/entries/0536_Q115131080.yaml | 20 +- data/nde/enriched/entries/0537_Q2423105.yaml | 20 +- data/nde/enriched/entries/0539_Q1842735.yaml | 20 +- .../nde/enriched/entries/0540_Q110906682.yaml | 42 +-- data/nde/enriched/entries/0547_Q3483633.yaml | 50 +-- .../nde/enriched/entries/0552_Q110907382.yaml | 20 +- data/nde/enriched/entries/0553_Q27949674.yaml | 20 +- data/nde/enriched/entries/0554_Q2112422.yaml | 20 +- data/nde/enriched/entries/0559_Q2119394.yaml | 20 +- data/nde/enriched/entries/0566_Q4295172.yaml | 40 +-- data/nde/enriched/entries/0567_Q1823028.yaml | 20 +- data/nde/enriched/entries/0569_Q56459509.yaml | 20 +- .../nde/enriched/entries/0575_Q110907548.yaml | 50 +-- data/nde/enriched/entries/0580_Q2176121.yaml | 20 +- data/nde/enriched/entries/0581_Q2783790.yaml | 20 +- data/nde/enriched/entries/0592_Q2375610.yaml | 20 +- data/nde/enriched/entries/0594_unknown.yaml | 20 +- data/nde/enriched/entries/0595_Q2297235.yaml | 20 +- data/nde/enriched/entries/0598_Q3983824.yaml | 20 +- data/nde/enriched/entries/0603_Q56461052.yaml | 20 +- data/nde/enriched/entries/0606_Q10896041.yaml | 20 +- .../nde/enriched/entries/0610_Q110907548.yaml | 50 +-- .../nde/enriched/entries/0612_Q110907546.yaml | 20 +- data/nde/enriched/entries/0613_Q18286289.yaml | 20 +- data/nde/enriched/entries/0614_Q2395096.yaml | 20 +- data/nde/enriched/entries/0615_Q20970639.yaml | 20 +- data/nde/enriched/entries/0618_Q4452658.yaml | 20 +- data/nde/enriched/entries/0633_Q315883.yaml | 20 +- data/nde/enriched/entries/0635_Q2919762.yaml | 20 +- data/nde/enriched/entries/0638_unknown.yaml | 20 +- data/nde/enriched/entries/0643_Q56459403.yaml | 20 +- data/nde/enriched/entries/0644_Q56461228.yaml | 20 +- data/nde/enriched/entries/0647_unknown.yaml | 40 +-- data/nde/enriched/entries/0649_Q1282056.yaml | 30 +- .../nde/enriched/entries/0651_Q110995897.yaml | 20 +- .../nde/enriched/entries/0653_Q133734238.yaml | 20 +- data/nde/enriched/entries/0656_Q574961.yaml | 20 +- data/nde/enriched/entries/0658_unknown.yaml | 20 +- data/nde/enriched/entries/0659_Q9971.yaml | 20 +- data/nde/enriched/entries/0660_Q9980.yaml | 50 +-- .../nde/enriched/entries/0661_Q124843656.yaml | 20 +- data/nde/enriched/entries/0664_Q2574390.yaml | 20 +- data/nde/enriched/entries/0670_Q1616123.yaml | 20 +- data/nde/enriched/entries/0671_Q3049198.yaml | 20 +- data/nde/enriched/entries/0672_Q2335767.yaml | 20 +- data/nde/enriched/entries/0680_unknown.yaml | 20 +- data/nde/enriched/entries/0686_unknown.yaml | 20 +- data/nde/enriched/entries/0690_unknown.yaml | 20 +- data/nde/enriched/entries/0691_unknown.yaml | 20 +- data/nde/enriched/entries/0695_Q4469762.yaml | 20 +- .../nde/enriched/entries/0702_Q110907392.yaml | 20 +- data/nde/enriched/entries/0704_Q702726.yaml | 40 +-- data/nde/enriched/entries/0707_Q18654836.yaml | 30 +- data/nde/enriched/entries/0709_Q17402020.yaml | 40 +-- .../nde/enriched/entries/0711_Q110671441.yaml | 20 +- data/nde/enriched/entries/0718_Q18285904.yaml | 20 +- data/nde/enriched/entries/0721_Q277316.yaml | 20 +- data/nde/enriched/entries/0726_Q19827882.yaml | 20 +- data/nde/enriched/entries/0728_Q493160.yaml | 20 +- data/nde/enriched/entries/0733_unknown.yaml | 20 +- data/nde/enriched/entries/0746_Q474823.yaml | 20 +- .../nde/enriched/entries/0749_Q126174339.yaml | 20 +- data/nde/enriched/entries/0750_unknown.yaml | 20 +- data/nde/enriched/entries/0755_Q701.yaml | 20 +- data/nde/enriched/entries/0757_Q2189005.yaml | 20 +- data/nde/enriched/entries/0759_Q190804.yaml | 20 +- .../nde/enriched/entries/0760_Q113006081.yaml | 30 +- data/nde/enriched/entries/0764_Q2170763.yaml | 40 +-- data/nde/enriched/entries/0771_Q50038175.yaml | 20 +- data/nde/enriched/entries/0778_Q702726.yaml | 40 +-- .../nde/enriched/entries/0790_Q122922125.yaml | 20 +- data/nde/enriched/entries/0792_unknown.yaml | 20 +- data/nde/enriched/entries/0807_Q14856958.yaml | 20 +- data/nde/enriched/entries/0816_Q2335767.yaml | 20 +- data/nde/enriched/entries/0821_unknown.yaml | 20 +- .../nde/enriched/entries/0840_Q110995895.yaml | 20 +- .../nde/enriched/entries/0842_Q110891772.yaml | 20 +- data/nde/enriched/entries/0843_Q85311353.yaml | 60 +--- .../nde/enriched/entries/0847_Q110891825.yaml | 20 +- data/nde/enriched/entries/0849_unknown.yaml | 20 +- data/nde/enriched/entries/0850_Q76885143.yaml | 20 +- .../nde/enriched/entries/0853_Q110891801.yaml | 20 +- .../nde/enriched/entries/0856_Q110891796.yaml | 20 +- data/nde/enriched/entries/0857_Q81181239.yaml | 20 +- data/nde/enriched/entries/0860_unknown.yaml | 20 +- .../nde/enriched/entries/0862_Q121224964.yaml | 20 +- .../nde/enriched/entries/0866_Q121225125.yaml | 42 +-- .../nde/enriched/entries/0870_Q121224972.yaml | 20 +- .../nde/enriched/entries/0873_Q110891812.yaml | 20 +- data/nde/enriched/entries/0883_Q13726962.yaml | 20 +- .../nde/enriched/entries/0890_Q110891816.yaml | 20 +- data/nde/enriched/entries/0893_unknown.yaml | 21 +- .../nde/enriched/entries/0900_Q110891747.yaml | 20 +- .../nde/enriched/entries/0912_Q110891756.yaml | 20 +- data/nde/enriched/entries/0914_unknown.yaml | 20 +- .../nde/enriched/entries/0916_Q110891804.yaml | 20 +- data/nde/enriched/entries/0919_unknown.yaml | 30 +- .../nde/enriched/entries/0921_Q134993513.yaml | 20 +- .../nde/enriched/entries/0925_Q125421055.yaml | 40 +-- .../nde/enriched/entries/0934_Q110891759.yaml | 20 +- data/nde/enriched/entries/0935_Q2755329.yaml | 20 +- data/nde/enriched/entries/0939_Q56460838.yaml | 20 +- data/nde/enriched/entries/0943_unknown.yaml | 20 +- data/nde/enriched/entries/0948_Q56460883.yaml | 20 +- data/nde/enriched/entries/0949_Q56461263.yaml | 20 +- data/nde/enriched/entries/0955_Q17611858.yaml | 20 +- data/nde/enriched/entries/0960_unknown.yaml | 20 +- .../nde/enriched/entries/0961_Q110891767.yaml | 20 +- .../nde/enriched/entries/0964_Q110891795.yaml | 20 +- .../nde/enriched/entries/0969_Q110891746.yaml | 20 +- .../nde/enriched/entries/0971_Q110891750.yaml | 20 +- data/nde/enriched/entries/0973_Q56461027.yaml | 20 +- data/nde/enriched/entries/0976_unknown.yaml | 20 +- .../nde/enriched/entries/0978_Q110891779.yaml | 21 +- data/nde/enriched/entries/0983_Q98961290.yaml | 20 +- .../nde/enriched/entries/0990_Q110891821.yaml | 20 +- .../nde/enriched/entries/0995_Q110891786.yaml | 20 +- data/nde/enriched/entries/0997_unknown.yaml | 21 +- data/nde/enriched/entries/0998_unknown.yaml | 72 +--- data/nde/enriched/entries/0999_unknown.yaml | 40 +-- .../nde/enriched/entries/1002_Q110891779.yaml | 21 +- .../nde/enriched/entries/1007_Q110891796.yaml | 20 +- data/nde/enriched/entries/1009_Q81181187.yaml | 40 +-- data/nde/enriched/entries/1011_unknown.yaml | 20 +- data/nde/enriched/entries/1015_Q572269.yaml | 20 +- data/nde/enriched/entries/1021_unknown.yaml | 20 +- data/nde/enriched/entries/1023_Q2346824.yaml | 20 +- data/nde/enriched/entries/1033_unknown.yaml | 20 +- data/nde/enriched/entries/1034_unknown.yaml | 20 +- data/nde/enriched/entries/1045_Q3389883.yaml | 20 +- .../nde/enriched/entries/1047_Q110995912.yaml | 20 +- data/nde/enriched/entries/1052_Q2493502.yaml | 30 +- data/nde/enriched/entries/1059_Q963825.yaml | 20 +- data/nde/enriched/entries/1060_Q2144884.yaml | 40 +-- data/nde/enriched/entries/1062_Q19974153.yaml | 20 +- data/nde/enriched/entries/1064_Q2418919.yaml | 20 +- .../nde/enriched/entries/1065_Q110292105.yaml | 20 +- data/nde/enriched/entries/1071_Q2545824.yaml | 310 +----------------- data/nde/enriched/entries/1074_Q1766396.yaml | 20 +- data/nde/enriched/entries/1075_Q2131198.yaml | 30 +- data/nde/enriched/entries/1081_unknown.yaml | 20 +- data/nde/enriched/entries/1087_Q29944161.yaml | 20 +- data/nde/enriched/entries/1089_Q2519854.yaml | 20 +- data/nde/enriched/entries/1093_unknown.yaml | 20 +- data/nde/enriched/entries/1095_Q55076484.yaml | 20 +- data/nde/enriched/entries/1096_Q18775346.yaml | 20 +- data/nde/enriched/entries/1097_Q56460926.yaml | 20 +- data/nde/enriched/entries/1102_unknown.yaml | 20 +- data/nde/enriched/entries/1108_Q2643296.yaml | 50 +-- data/nde/enriched/entries/1109_Q55076504.yaml | 20 +- data/nde/enriched/entries/1113_Q1852477.yaml | 20 +- data/nde/enriched/entries/1117_unknown.yaml | 60 +--- data/nde/enriched/entries/1118_Q70354640.yaml | 30 +- data/nde/enriched/entries/1119_Q81181278.yaml | 20 +- .../nde/enriched/entries/1122_Q111363451.yaml | 20 +- data/nde/enriched/entries/1125_Q56459326.yaml | 20 +- data/nde/enriched/entries/1131_Q4288330.yaml | 20 +- data/nde/enriched/entries/1132_Q56459493.yaml | 20 +- .../nde/enriched/entries/1133_Q110282067.yaml | 50 +-- data/nde/enriched/entries/1134_Q23900557.yaml | 20 +- data/nde/enriched/entries/1136_unknown.yaml | 20 +- data/nde/enriched/entries/1137_Q56459493.yaml | 20 +- data/nde/enriched/entries/1141_Q2153365.yaml | 32 +- data/nde/enriched/entries/1144_unknown.yaml | 20 +- data/nde/enriched/entries/1146_Q26432.yaml | 20 +- data/nde/enriched/entries/1158_Q2654416.yaml | 24 +- data/nde/enriched/entries/1160_unknown.yaml | 20 +- data/nde/enriched/entries/1162_Q2632714.yaml | 20 +- data/nde/enriched/entries/1165_Q1821169.yaml | 20 +- data/nde/enriched/entries/1166_Q2041110.yaml | 20 +- data/nde/enriched/entries/1176_Q2036123.yaml | 20 +- data/nde/enriched/entries/1180_Q2041110.yaml | 20 +- data/nde/enriched/entries/1181_Q59486.yaml | 20 +- data/nde/enriched/entries/1182_Q3141841.yaml | 20 +- data/nde/enriched/entries/1184_Q1155243.yaml | 20 +- data/nde/enriched/entries/1188_Q2874177.yaml | 80 +---- .../nde/enriched/entries/1191_Q135734962.yaml | 20 +- .../nde/enriched/entries/1192_Q111190988.yaml | 20 +- data/nde/enriched/entries/1193_unknown.yaml | 20 +- data/nde/enriched/entries/1197_Q3229492.yaml | 20 +- data/nde/enriched/entries/1199_Q1857094.yaml | 20 +- .../nde/enriched/entries/1202_Q124386169.yaml | 20 +- data/nde/enriched/entries/1203_Q15224245.yaml | 20 +- .../nde/enriched/entries/1210_invalid_id.yaml | 20 +- data/nde/enriched/entries/1212_Q2584045.yaml | 20 +- data/nde/enriched/entries/1213_Q3046110.yaml | 40 +-- data/nde/enriched/entries/1214_Q2619632.yaml | 20 +- data/nde/enriched/entries/1215_Q2304570.yaml | 21 +- data/nde/enriched/entries/1217_Q2425770.yaml | 80 +---- data/nde/enriched/entries/1221_Q1930398.yaml | 20 +- data/nde/enriched/entries/1233_Q2653692.yaml | 44 +-- data/nde/enriched/entries/1249_Q679527.yaml | 90 +---- data/nde/enriched/entries/1250_Q2216754.yaml | 21 +- data/nde/enriched/entries/1256_Q4360916.yaml | 20 +- data/nde/enriched/entries/1259_Q2754878.yaml | 20 +- data/nde/enriched/entries/1265_Q4011822.yaml | 30 +- data/nde/enriched/entries/1272_Q13736930.yaml | 20 +- .../nde/enriched/entries/1274_invalid_id.yaml | 20 +- data/nde/enriched/entries/1275_Q21113770.yaml | 20 +- data/nde/enriched/entries/1276_Q3311591.yaml | 20 +- data/nde/enriched/entries/1277_Q1893708.yaml | 20 +- data/nde/enriched/entries/1279_Q1919182.yaml | 20 +- data/nde/enriched/entries/1285_Q2797811.yaml | 20 +- data/nde/enriched/entries/1289_Q2367258.yaml | 20 +- data/nde/enriched/entries/1291_Q694.yaml | 40 +-- data/nde/enriched/entries/1292_Q2103699.yaml | 30 +- data/nde/enriched/entries/1293_Q758610.yaml | 20 +- data/nde/enriched/entries/1294_Q15881312.yaml | 20 +- data/nde/enriched/entries/1297_Q758610.yaml | 20 +- data/nde/enriched/entries/1298_Q2632714.yaml | 20 +- data/nde/enriched/entries/1302_Q3912134.yaml | 80 +---- .../nde/enriched/entries/1311_Q136473440.yaml | 40 +-- data/nde/enriched/entries/1314_Q1456512.yaml | 30 +- .../nde/enriched/entries/1321_invalid_id.yaml | 30 +- data/nde/enriched/entries/1326_Q81181215.yaml | 20 +- data/nde/enriched/entries/1328_Q81181253.yaml | 20 +- data/nde/enriched/entries/1329_Q81181182.yaml | 20 +- data/nde/enriched/entries/1330_Q81181215.yaml | 20 +- data/nde/enriched/entries/1336_Q1872824.yaml | 40 +-- .../nde/enriched/entries/1340_invalid_id.yaml | 21 +- data/nde/enriched/entries/1342_Q2803129.yaml | 50 +-- data/nde/enriched/entries/1344_Q2036123.yaml | 20 +- data/nde/enriched/entries/1348_Q933459.yaml | 20 +- .../nde/enriched/entries/1351_Q110282063.yaml | 20 +- data/nde/enriched/entries/1357_kb_isil.yaml | 20 +- data/nde/enriched/entries/1359_kb_isil.yaml | 30 +- data/nde/enriched/entries/1362_kb_isil.yaml | 20 +- data/nde/enriched/entries/1367_kb_isil.yaml | 20 +- data/nde/enriched/entries/1371_kb_isil.yaml | 20 +- data/nde/enriched/entries/1373_kb_isil.yaml | 20 +- data/nde/enriched/entries/1374_kb_isil.yaml | 20 +- data/nde/enriched/entries/1388_kb_isil.yaml | 20 +- data/nde/enriched/entries/1389_kb_isil.yaml | 20 +- data/nde/enriched/entries/1393_kb_isil.yaml | 20 +- data/nde/enriched/entries/1397_kb_isil.yaml | 20 +- data/nde/enriched/entries/1406_kb_isil.yaml | 30 +- data/nde/enriched/entries/1407_kb_isil.yaml | 20 +- data/nde/enriched/entries/1408_kb_isil.yaml | 20 +- data/nde/enriched/entries/1416_kb_isil.yaml | 20 +- data/nde/enriched/entries/1421_kb_isil.yaml | 40 +-- data/nde/enriched/entries/1422_kb_isil.yaml | 20 +- data/nde/enriched/entries/1424_kb_isil.yaml | 20 +- data/nde/enriched/entries/1428_kb_isil.yaml | 30 +- data/nde/enriched/entries/1429_kb_isil.yaml | 20 +- data/nde/enriched/entries/1432_kb_isil.yaml | 20 +- data/nde/enriched/entries/1434_kb_isil.yaml | 20 +- data/nde/enriched/entries/1446_kb_isil.yaml | 20 +- data/nde/enriched/entries/1449_kb_isil.yaml | 30 +- data/nde/enriched/entries/1450_kb_isil.yaml | 20 +- data/nde/enriched/entries/1451_kb_isil.yaml | 20 +- data/nde/enriched/entries/1452_kb_isil.yaml | 20 +- data/nde/enriched/entries/1454_kb_isil.yaml | 20 +- data/nde/enriched/entries/1459_kb_isil.yaml | 40 +-- data/nde/enriched/entries/1466_kb_isil.yaml | 20 +- data/nde/enriched/entries/1470_kb_isil.yaml | 20 +- data/nde/enriched/entries/1474_kb_isil.yaml | 20 +- data/nde/enriched/entries/1481_kb_isil.yaml | 20 +- data/nde/enriched/entries/1486_kb_isil.yaml | 20 +- data/nde/enriched/entries/1487_kb_isil.yaml | 20 +- data/nde/enriched/entries/1489_kb_isil.yaml | 40 +-- data/nde/enriched/entries/1490_kb_isil.yaml | 20 +- data/nde/enriched/entries/1492_kb_isil.yaml | 20 +- data/nde/enriched/entries/1493_kb_isil.yaml | 40 +-- data/nde/enriched/entries/1496_kb_isil.yaml | 20 +- data/nde/enriched/entries/1500_kb_isil.yaml | 20 +- data/nde/enriched/entries/1501_kb_isil.yaml | 20 +- .../entries/1502_huygens_instituut_hi.yaml | 20 +- ...6_sociaal_en_cultureel_planbureau_scp.yaml | 20 +- ...historische_kring_albertus_perk_hhkap.yaml | 20 +- data/nde/enriched/entries/1510_kitlv.yaml | 20 +- .../1513_stadsarchief_zoetermeer_saz.yaml | 20 +- .../1516_het_scheepvaartmuseum_amsterdam.yaml | 20 +- .../entries/1517_rijksmuseum_amsterdam.yaml | 40 +-- .../1518_museum_huis_van_het_boek.yaml | 20 +- .../entries/1520_museum_kaap_skil.yaml | 20 +- .../enriched/entries/1522_valkhof_museum.yaml | 20 +- .../enriched/entries/1524_leudalmuseum.yaml | 20 +- ...urhistorisch_museum_texel_de_waelstee.yaml | 20 +- .../entries/1529_museum_belv_d_re.yaml | 20 +- .../1531_museum_veere_incl_stadhuis.yaml | 20 +- .../entries/1533_frans_hals_museum.yaml | 20 +- ...1534_museum_stoomtrein_katwijk_leiden.yaml | 30 +- .../entries/1539_museon_omniversum.yaml | 20 +- .../entries/1543_textielmuseum_tilburg.yaml | 20 +- data/nde/enriched/entries/1544_ecomare.yaml | 30 +- .../entries/1545_het_nieuwe_domein.yaml | 50 +-- .../entries/1547_anne_frank_huis.yaml | 20 +- .../entries/1552_hildo_krop_museum.yaml | 20 +- .../entries/1553_museum_bronbeek.yaml | 20 +- .../1559_brandweermuseum_hellevoetsluis.yaml | 20 +- .../1560_museum_urk_het_oude_raadhuis.yaml | 20 +- .../1566_speelgoedmuseum_oosterhout.yaml | 20 +- .../1568_embassy_of_the_free_mind.yaml | 20 +- .../1570_grachtenmuseum_amsterdam.yaml | 20 +- .../1576_nationaal_bomenmuseum_gimborn.yaml | 20 +- .../enriched/entries/1581_joods_museum.yaml | 40 +-- ...e_archeologiemuseum_provincie_noord_h.yaml | 30 +- ...eum_boijmans_van_beuningen_incl_depot.yaml | 80 +---- ...1_literatuurmuseum_kinderboekenmuseum.yaml | 20 +- .../nde/enriched/entries/1612_muiderslot.yaml | 30 +- .../1614_nationaal_glasmuseum_leerdam.yaml | 40 +-- .../enriched/entries/1617_museum_anno.yaml | 40 +-- .../1620_walburgiskerk_incl_librije.yaml | 20 +- .../entries/1623_hendrick_hamel_museum.yaml | 40 +-- .../entries/1627_bonnefanten_museum.yaml | 30 +- .../entries/1628_museum_gevangenpoort.yaml | 20 +- .../entries/1638_de_mesdag_collectie.yaml | 40 +-- .../entries/1640_hof_van_nederland.yaml | 70 +--- .../enriched/entries/1644_huis_van_gijn.yaml | 80 +---- .../1647_marius_van_dokkum_museum.yaml | 30 +- .../enriched/entries/1654_sudergemaal.yaml | 20 +- .../entries/1661_stichting_oer_ij.yaml | 20 +- .../1664_museumboerderij_tante_jaantje.yaml | 20 +- .../1669_heemkundevereniging_helden.yaml | 30 +- ...1670_historisch_genootschap_crommenie.yaml | 20 +- .../linkml/modules/classes/WebPortal.yaml | 55 +++- 481 files changed, 4376 insertions(+), 8123 deletions(-) diff --git a/data/nde/enriched/entries/0005_Q81181263.yaml b/data/nde/enriched/entries/0005_Q81181263.yaml index 8ad5eaa155..cbfed71d1a 100644 --- a/data/nde/enriched/entries/0005_Q81181263.yaml +++ b/data/nde/enriched/entries/0005_Q81181263.yaml @@ -536,18 +536,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:43:58.861472+00:00' source_archive: web/0005/aaenhunze.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gemeente Aa en Hunze - source_url: https://www.aaenhunze.nl/ - retrieved_on: '2025-11-29T13:28:29.517181+00:00' - xpath: /html/head/title - html_file: web/0005/aaenhunze.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:43:58.860729+00:00' - claim_type: address claim_value: Spiekersteeg 1 raw_value: Spiekersteeg 1 @@ -628,6 +618,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:43:58.861346+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Aa en Hunze diff --git a/data/nde/enriched/entries/0006_Q81181245.yaml b/data/nde/enriched/entries/0006_Q81181245.yaml index c4ddd9876e..950dbed69d 100644 --- a/data/nde/enriched/entries/0006_Q81181245.yaml +++ b/data/nde/enriched/entries/0006_Q81181245.yaml @@ -507,18 +507,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:43:59.122650+00:00' source_archive: web/0006/borger-odoorn.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Borger-Odoorn - source_url: https://www.borger-odoorn.nl/ - retrieved_on: '2025-11-29T13:30:23.176866+00:00' - xpath: /html/head/title - html_file: web/0006/borger-odoorn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:43:59.122129+00:00' - claim_type: org_name claim_value: default icon raw_value: default icon @@ -581,6 +571,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:43:59.122541+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Borger-Odoorn diff --git a/data/nde/enriched/entries/0007_Q81181227.yaml b/data/nde/enriched/entries/0007_Q81181227.yaml index 5b7d2d3e9d..09a3c6ca56 100644 --- a/data/nde/enriched/entries/0007_Q81181227.yaml +++ b/data/nde/enriched/entries/0007_Q81181227.yaml @@ -518,18 +518,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:43:59.362549+00:00' source_archive: web/0007/coevorden.nl - claims_count: 17 + claims_count: 16 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Coevorden - source_url: https://www.coevorden.nl/ - retrieved_on: '2025-11-29T13:32:08.818759+00:00' - xpath: /html/head/title - html_file: web/0007/coevorden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:43:59.361804+00:00' - claim_type: org_name claim_value: paspoort icon raw_value: paspoort icon @@ -692,6 +682,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:43:59.362340+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Coevorden diff --git a/data/nde/enriched/entries/0011_Q81181243.yaml b/data/nde/enriched/entries/0011_Q81181243.yaml index f1c0fcc6b1..db97d6665d 100644 --- a/data/nde/enriched/entries/0011_Q81181243.yaml +++ b/data/nde/enriched/entries/0011_Q81181243.yaml @@ -549,18 +549,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:00.017321+00:00' source_archive: web/0011/gemeente.emmen.nl - claims_count: 32 + claims_count: 31 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Emmen - source_url: http://www.gemeente.emmen.nl - retrieved_on: '2025-11-29T14:24:46.985786+00:00' - xpath: /html/head/title - html_file: web/0011/gemeente.emmen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.016608+00:00' - claim_type: org_name claim_value: afspraak icon raw_value: afspraak icon @@ -871,6 +861,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.017183+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Emmen diff --git a/data/nde/enriched/entries/0012_Q81181321.yaml b/data/nde/enriched/entries/0012_Q81181321.yaml index 758bae6e80..178394ae10 100644 --- a/data/nde/enriched/entries/0012_Q81181321.yaml +++ b/data/nde/enriched/entries/0012_Q81181321.yaml @@ -492,18 +492,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:00.530508+00:00' source_archive: web/0012/meppel.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Meppel - source_url: http://www.meppel.nl - retrieved_on: '2025-11-29T14:27:04.292512+00:00' - xpath: /html/head/title - html_file: web/0012/meppel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.529951+00:00' - claim_type: description_short claim_value: De officiële website van Gemeente Meppel – Informatie over wonen, werken en leven in Meppel. @@ -586,6 +576,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.530413+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Meppel diff --git a/data/nde/enriched/entries/0014_Q81181341.yaml b/data/nde/enriched/entries/0014_Q81181341.yaml index f978135173..093c92f006 100644 --- a/data/nde/enriched/entries/0014_Q81181341.yaml +++ b/data/nde/enriched/entries/0014_Q81181341.yaml @@ -521,7 +521,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:00.780426+00:00' source_archive: web/0014/noordenveld.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Home-NL @@ -553,36 +553,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:00.779541+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noordenveld.nl/ - retrieved_on: '2025-11-29T14:24:46.856894+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0014/noordenveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.779551+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noordenveld.nl/ - retrieved_on: '2025-11-29T14:24:46.856894+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0014/noordenveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.779555+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noordenveld.nl/ - retrieved_on: '2025-11-29T14:24:46.856894+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0014/noordenveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.779559+00:00' - claim_type: description_short claim_value: Homepagina Nederlands raw_value: Homepagina Nederlands @@ -643,6 +613,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.780230+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Noordenveld diff --git a/data/nde/enriched/entries/0015_Q81181229.yaml b/data/nde/enriched/entries/0015_Q81181229.yaml index d277e2fbb7..2714c629ca 100644 --- a/data/nde/enriched/entries/0015_Q81181229.yaml +++ b/data/nde/enriched/entries/0015_Q81181229.yaml @@ -367,18 +367,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:00.887651+00:00' source_archive: web/0015/gemeentewesterveld.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gemeente Westerveld - source_url: https://www.gemeentewesterveld.nl/ - retrieved_on: '2025-11-29T14:24:47.974615+00:00' - xpath: /html/head/title - html_file: web/0015/gemeentewesterveld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:00.886185+00:00' - claim_type: email claim_value: info@gemeentewesterveld.nl raw_value: info@gemeentewesterveld.nl @@ -439,6 +429,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:00.887341+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Westerveld diff --git a/data/nde/enriched/entries/0016_Q81181377.yaml b/data/nde/enriched/entries/0016_Q81181377.yaml index 933253cbed..210afb8844 100644 --- a/data/nde/enriched/entries/0016_Q81181377.yaml +++ b/data/nde/enriched/entries/0016_Q81181377.yaml @@ -557,7 +557,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:01.020682+00:00' source_archive: web/0016/tynaarlo.nl - claims_count: 9 + claims_count: 8 claims: - claim_type: org_name claim_value: Gemeentelijk archief @@ -597,16 +597,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:44:01.020346+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&t=Gemeentelijk+archief - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&t=Gemeentelijk+archief - source_url: https://www.tynaarlo.nl/bestuur-en-organisatie/gemeentelijk-archief - retrieved_on: '2025-11-29T14:24:48.197833+00:00' - xpath: /html/body/div/main/section[2]/div[2]/div/div/div[1]/a - html_file: web/0016/tynaarlo.nl/pages/bestuur-en-organisatie_gemeentelijk-archief.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:01.020515+00:00' - claim_type: social_twitter claim_value: https://x.com/share?url=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&text=Gemeentelijk+archief raw_value: https://x.com/share?url=https%3A%2F%2Fwww.tynaarlo.nl%2Fbestuur-en-organisatie%2Fgemeentelijk-archief&text=Gemeentelijk+archief @@ -657,6 +647,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:01.020542+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Tynaarlo diff --git a/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml b/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml index 619a84de81..ee4ce018c7 100644 --- a/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml +++ b/data/nde/enriched/entries/0017_harmonium_museum_nederland.yaml @@ -378,18 +378,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:01.302803+00:00' source_archive: web/0017/harmoniummuseumnederland.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Harmonium Museum Nederland - source_url: https://harmoniummuseumnederland.nl/ - retrieved_on: '2025-11-29T14:24:50.775403+00:00' - xpath: /html/head/title - html_file: web/0017/harmoniummuseumnederland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:01.302081+00:00' - claim_type: description_short claim_value: Lees alles over de achtergrond en de kerncollectie, die behouden bleef, van het Harmonium Museum Nederland, dat helaas is gesloten sinds eind @@ -443,6 +433,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:44:01.302638+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Harmonium Museum Nederland diff --git a/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml b/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml index 94b09fd799..b93452838a 100644 --- a/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml +++ b/data/nde/enriched/entries/0020_roel_van_der_sleen.yaml @@ -375,18 +375,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:01.943174+00:00' source_archive: web/0020/historischekringhoogeveen.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historische Kring Hoogeveen - source_url: https://www.historischekringhoogeveen.nl/ - retrieved_on: '2025-11-29T14:24:51.613006+00:00' - xpath: /html/head/title - html_file: web/0020/historischekringhoogeveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:01.942281+00:00' - claim_type: description_short claim_value: Description raw_value: Description @@ -397,6 +387,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:44:01.942394+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Hoogeveen diff --git a/data/nde/enriched/entries/0022_Q1911968.yaml b/data/nde/enriched/entries/0022_Q1911968.yaml index 14f0a72c7f..e79fd0efab 100644 --- a/data/nde/enriched/entries/0022_Q1911968.yaml +++ b/data/nde/enriched/entries/0022_Q1911968.yaml @@ -565,18 +565,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:02.429976+00:00' source_archive: web/0022/smalspoorcentrum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Industrieel Smalspoor Museum Industrieel Smalspoor Museum - source_url: http://www.smalspoorcentrum.nl/ - retrieved_on: '2025-11-29T14:26:08.048987+00:00' - xpath: /html/head/title - html_file: web/0022/smalspoorcentrum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:02.428857+00:00' - claim_type: org_name claim_value: Industrieel Smalspoor Museum raw_value: Industrieel Smalspoor Museum @@ -637,6 +627,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:02.429890+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Industrieel Smalspoor Museum diff --git a/data/nde/enriched/entries/0023_Q22006174.yaml b/data/nde/enriched/entries/0023_Q22006174.yaml index 3aee500087..34585863f5 100644 --- a/data/nde/enriched/entries/0023_Q22006174.yaml +++ b/data/nde/enriched/entries/0023_Q22006174.yaml @@ -602,18 +602,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:02.855034+00:00' source_archive: web/0023/miramar-zeemuseum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Miramar Zeemuseum - source_url: https://miramar-zeemuseum.nl/ - retrieved_on: '2025-11-29T14:26:04.763022+00:00' - xpath: /html/head/title - html_file: web/0023/miramar-zeemuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:02.853829+00:00' - claim_type: description_short claim_value: Wat ooit begon met de fascinatie voor een schelp, gevonden op het strand van Mallorca, is uitgegroeid tot een waar natuurhistorisch museum in @@ -677,6 +667,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:02.854875+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Miramar Zeemuseum diff --git a/data/nde/enriched/entries/0025_Q56461228.yaml b/data/nde/enriched/entries/0025_Q56461228.yaml index 0c589c4a30..a1b201b93c 100644 --- a/data/nde/enriched/entries/0025_Q56461228.yaml +++ b/data/nde/enriched/entries/0025_Q56461228.yaml @@ -530,18 +530,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:03.370593+00:00' source_archive: web/0025/dewemme.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum de Wemme Zuidwolde - source_url: https://www.dewemme.nl/ - retrieved_on: '2025-11-29T14:26:07.259076+00:00' - xpath: /html/head/title - html_file: web/0025/dewemme.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:03.369684+00:00' - claim_type: org_name claim_value: BTC Art raw_value: BTC Art - Shine @@ -642,6 +632,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:03.370465+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum De Wemme diff --git a/data/nde/enriched/entries/0027_Q19832258.yaml b/data/nde/enriched/entries/0027_Q19832258.yaml index db65d08df4..b24a8ca483 100644 --- a/data/nde/enriched/entries/0027_Q19832258.yaml +++ b/data/nde/enriched/entries/0027_Q19832258.yaml @@ -706,18 +706,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:24:53.363590+00:00' source_archive: web/0027/papierknipmuseum.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - source_url: http://www.papierknipmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0027/papierknipmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:24:53.356977+00:00' - claim_type: email claim_value: knipkunst@gmail.com raw_value: knipkunst@gmail.com @@ -728,3 +718,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:24:53.362796+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0028_aold_daoln.yaml b/data/nde/enriched/entries/0028_aold_daoln.yaml index 00c3fdd47b..a60f50f7d1 100644 --- a/data/nde/enriched/entries/0028_aold_daoln.yaml +++ b/data/nde/enriched/entries/0028_aold_daoln.yaml @@ -372,18 +372,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:04.212422+00:00' source_archive: web/0028/aolddaoln.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Aold Daol'n - source_url: https://www.aolddaoln.nl/ - retrieved_on: '2025-11-29T14:26:14.597135+00:00' - xpath: /html/head/title - html_file: web/0028/aolddaoln.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:04.210876+00:00' - claim_type: description_short claim_value: Behoud van de geschiedenis en cultuur van Dalen en de naburige dorpen door middel van tentoonstellingen, onderzoek en evenementen. Vier samen met @@ -438,6 +428,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:04.212155+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Aold Daol'n diff --git a/data/nde/enriched/entries/0029_Q110995917.yaml b/data/nde/enriched/entries/0029_Q110995917.yaml index 599c77a5b9..d58e54b5fc 100644 --- a/data/nde/enriched/entries/0029_Q110995917.yaml +++ b/data/nde/enriched/entries/0029_Q110995917.yaml @@ -549,18 +549,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:04.621248+00:00' source_archive: web/0029/aolddaoln.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Aold Daol'n - source_url: https://www.aolddaoln.nl/ - retrieved_on: '2025-11-29T14:26:16.952349+00:00' - xpath: /html/head/title - html_file: web/0029/aolddaoln.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:04.620027+00:00' - claim_type: description_short claim_value: Behoud van de geschiedenis en cultuur van Dalen en de naburige dorpen door middel van tentoonstellingen, onderzoek en evenementen. Vier samen met @@ -615,6 +605,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:04.621081+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museummolen Jan Pol diff --git a/data/nde/enriched/entries/0037_Q1345886.yaml b/data/nde/enriched/entries/0037_Q1345886.yaml index 44fb507297..36e6536c06 100644 --- a/data/nde/enriched/entries/0037_Q1345886.yaml +++ b/data/nde/enriched/entries/0037_Q1345886.yaml @@ -717,18 +717,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:05.753578+00:00' source_archive: web/0037/klompenmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Internationaal Klompenmuseum Eelde - source_url: http://www.klompenmuseum.nl/ - retrieved_on: '2025-11-29T14:29:05.952480+00:00' - xpath: /html/head/title - html_file: web/0037/klompenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:05.752826+00:00' - claim_type: description_short claim_value: 'Internationaal Klompenmuseum Tentoonstelling 2025: Houten Schoeisel Festival Bekijk onze virtuele tour Geopend: 28 maart 2026 t/m 1 november 2026, @@ -817,6 +807,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:05.753486+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Internationaal Klompenmuseum diff --git a/data/nde/enriched/entries/0039_Q108369683.yaml b/data/nde/enriched/entries/0039_Q108369683.yaml index afa4fa9825..9ec9af2a82 100644 --- a/data/nde/enriched/entries/0039_Q108369683.yaml +++ b/data/nde/enriched/entries/0039_Q108369683.yaml @@ -485,18 +485,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:06.586496+00:00' source_archive: web/0039/oudmeppel.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Oud Meppel - source_url: https://www.oudmeppel.nl/ - retrieved_on: '2025-11-29T14:32:06.738895+00:00' - xpath: /html/head/title - html_file: web/0039/oudmeppel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:06.585102+00:00' - claim_type: description_short claim_value: "Welkom bij Oud Meppel Overzicht \nActiviteiten \nDiavoorstellingen\ \ \nTentoonstellingen \nStadswandelingen \nWerkgroepen \nVerkoop \nDiversen\ @@ -563,6 +553,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:06.586287+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oud Meppel diff --git a/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml b/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml index 366acb084c..3868b37a1a 100644 --- a/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml +++ b/data/nde/enriched/entries/0041_asser_historische_vereniging.yaml @@ -350,18 +350,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:07.100160+00:00' source_archive: web/0041/ahvassen.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://ahvassen.nl/ - retrieved_on: '2025-11-29T14:29:15.708064+00:00' - xpath: /html/head/title - html_file: web/0041/ahvassen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:07.099272+00:00' - claim_type: description_short claim_value: Asser Historische Vereniging raw_value: Asser Historische Vereniging @@ -392,6 +382,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:07.099994+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Asser Historische Vereniging diff --git a/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml b/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml index 2d2214ebf5..0805ea52c2 100644 --- a/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml +++ b/data/nde/enriched/entries/0042_asser_historische_vereniging_2.yaml @@ -347,18 +347,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:07.171284+00:00' source_archive: web/0042/drentsehistorischevereniging.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://drentsehistorischevereniging.nl/ - retrieved_on: '2025-11-29T14:29:17.235710+00:00' - xpath: /html/head/title - html_file: web/0042/drentsehistorischevereniging.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:07.170786+00:00' - claim_type: org_name claim_value: Drentse Historische Vereniging raw_value: Drentse Historische Vereniging @@ -389,6 +379,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:07.171165+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Drentse Historische Vereniging diff --git a/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml b/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml index 97a2a6f9ae..d8490f4883 100644 --- a/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml +++ b/data/nde/enriched/entries/0043_historische_vereniging_gasselte.yaml @@ -355,7 +355,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:07.584213+00:00' source_archive: web/0043/archief-optspoor.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Historische vereniging der gemeente Gasselte @@ -367,16 +367,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:07.583755+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://archief-optspoor.nl/ - retrieved_on: '2025-11-29T14:29:26.053674+00:00' - xpath: /html/body/div[1]/div/div/div/div/main/article/header/h1 - html_file: web/0043/archief-optspoor.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:07.584126+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische vereniging der gemeente Gasselte diff --git a/data/nde/enriched/entries/0047_Q30277559.yaml b/data/nde/enriched/entries/0047_Q30277559.yaml index 77b45a3d01..202371ee52 100644 --- a/data/nde/enriched/entries/0047_Q30277559.yaml +++ b/data/nde/enriched/entries/0047_Q30277559.yaml @@ -566,7 +566,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:07.769543+00:00' source_archive: web/0047/hetflevolandsarchief.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Het Flevolands Archief @@ -598,16 +598,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:07.769111+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.hetflevolandsarchief.nl/ - retrieved_on: '2025-11-29T14:29:20.248807+00:00' - xpath: /html/body/div[2]/main/div[6]/div/div/div/h1 - html_file: web/0047/hetflevolandsarchief.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:07.769239+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Flevolands Archief diff --git a/data/nde/enriched/entries/0069_Q13137174.yaml b/data/nde/enriched/entries/0069_Q13137174.yaml index ac548fae0d..a6d056d33d 100644 --- a/data/nde/enriched/entries/0069_Q13137174.yaml +++ b/data/nde/enriched/entries/0069_Q13137174.yaml @@ -727,18 +727,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:13.342313+00:00' source_archive: web/0069/museumfederatiefryslan.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museumfederatie Fryslân - source_url: https://www.museumfederatiefryslan.nl/ - retrieved_on: '2025-11-29T14:34:37.779528+00:00' - xpath: /html/head/title - html_file: web/0069/museumfederatiefryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:13.341754+00:00' - claim_type: description_short claim_value: Museumfederatie Fryslân staat voor het toegankelijk maken en behouden van het erfgoed in Friesland op een zo hoog mogelijk niveau. Samenwerken op @@ -793,6 +783,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:13.342260+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museumfederatie Fryslân diff --git a/data/nde/enriched/entries/0070_Q17517652.yaml b/data/nde/enriched/entries/0070_Q17517652.yaml index 032e19e149..7da12381c2 100644 --- a/data/nde/enriched/entries/0070_Q17517652.yaml +++ b/data/nde/enriched/entries/0070_Q17517652.yaml @@ -461,7 +461,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:13.420511+00:00' source_archive: web/0070/kiekhuus.nl - claims_count: 8 + claims_count: 6 claims: - claim_type: org_name claim_value: Terug in de tijd @@ -495,26 +495,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:44:13.420097+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?t=Terug in de tijd&u=https://www.kiekhuus.nl/ - raw_value: https://www.facebook.com/sharer.php?t=Terug in de tijd&u=https://www.kiekhuus.nl/ - source_url: http://www.kiekhuus.nl - retrieved_on: '2025-11-29T14:32:53.893434+00:00' - xpath: /html/body/div[2]/div[2]/div/section/div/div/div[1]/div/div/div/a[1] - html_file: web/0070/kiekhuus.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:13.420372+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Terug in de tijd&url=https://www.kiekhuus.nl/&via= - raw_value: https://twitter.com/intent/tweet?text=Terug in de tijd&url=https://www.kiekhuus.nl/&via= - source_url: http://www.kiekhuus.nl - retrieved_on: '2025-11-29T14:32:53.893434+00:00' - xpath: /html/body/div[2]/div[2]/div/section/div/div/div[1]/div/div/div/a[2] - html_file: web/0070/kiekhuus.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:13.420378+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/kiekhuus/ raw_value: https://www.facebook.com/kiekhuus/ @@ -545,6 +525,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:13.420399+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: '''t Kiekhuus' diff --git a/data/nde/enriched/entries/0072_Q3457274.yaml b/data/nde/enriched/entries/0072_Q3457274.yaml index b1d1e6a462..40470c442c 100644 --- a/data/nde/enriched/entries/0072_Q3457274.yaml +++ b/data/nde/enriched/entries/0072_Q3457274.yaml @@ -791,18 +791,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:13.941510+00:00' source_archive: web/0072/museumdrachten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Dr8888 - source_url: http://www.museumdrachten.nl - retrieved_on: '2025-11-29T14:33:08.640521+00:00' - xpath: /html/head/title - html_file: web/0072/museumdrachten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:13.940327+00:00' - claim_type: org_name claim_value: Museum Dr8888 raw_value: Museum Dr8888 @@ -853,6 +843,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:13.941354+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Drachten diff --git a/data/nde/enriched/entries/0080_Q1942351.yaml b/data/nde/enriched/entries/0080_Q1942351.yaml index a8f6a86a8b..7bbfaa6462 100644 --- a/data/nde/enriched/entries/0080_Q1942351.yaml +++ b/data/nde/enriched/entries/0080_Q1942351.yaml @@ -702,7 +702,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:16.206454+00:00' source_archive: web/0080/damshus.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: It Damshûs @@ -756,16 +756,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:16.206249+00:00' - - claim_type: org_name - claim_value: Oproep - raw_value: Oproep - source_url: https://www.damshus.nl/ - retrieved_on: '2025-11-29T14:36:55.509152+00:00' - xpath: /html/body/div[1]/div/div/div/article/div/div/div/div[2]/div/div/div[1]/div/h1 - html_file: web/0080/damshus.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:16.206329+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting "It Damshûs" diff --git a/data/nde/enriched/entries/0082_Q2201887.yaml b/data/nde/enriched/entries/0082_Q2201887.yaml index 5f8c221ae5..ef85df0709 100644 --- a/data/nde/enriched/entries/0082_Q2201887.yaml +++ b/data/nde/enriched/entries/0082_Q2201887.yaml @@ -946,7 +946,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:16.964027+00:00' source_archive: web/0082/observeum.nl - claims_count: 7 + claims_count: 5 claims: - claim_type: org_name claim_value: Museum & Sterrenwacht Burgum @@ -980,16 +980,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:16.963804+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=https://www.observeum.nl/&t=Home - raw_value: http://www.facebook.com/sharer.php?u=https://www.observeum.nl/&t=Home - source_url: https://www.observeum.nl/ - retrieved_on: '2025-11-29T14:36:01.804617+00:00' - xpath: /html/body/div/div[2]/div[2]/div[2]/div/div[2]/div[41]/div/a[1] - html_file: web/0082/observeum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:16.963824+00:00' - claim_type: social_twitter claim_value: https://x.com/share?text=Home&url=https%3A%2F%2Fwww.observeum.nl%2F raw_value: https://x.com/share?text=Home&url=https%3A%2F%2Fwww.observeum.nl%2F @@ -1000,16 +990,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:16.963828+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&url=https://www.observeum.nl/&title=Home - raw_value: http://www.linkedin.com/shareArticle?mini=true&url=https://www.observeum.nl/&title=Home - source_url: https://www.observeum.nl/ - retrieved_on: '2025-11-29T14:36:01.804617+00:00' - xpath: /html/body/div/div[2]/div[2]/div[2]/div/div[2]/div[41]/div/a[4] - html_file: web/0082/observeum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:16.963834+00:00' - claim_type: org_name claim_value: Museum en Sterrenwacht Burgum raw_value: Museum en Sterrenwacht Burgum @@ -1020,6 +1000,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:16.963894+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Observeum diff --git a/data/nde/enriched/entries/0086_Q13137164.yaml b/data/nde/enriched/entries/0086_Q13137164.yaml index 2d621378d5..687f2e5b61 100644 --- a/data/nde/enriched/entries/0086_Q13137164.yaml +++ b/data/nde/enriched/entries/0086_Q13137164.yaml @@ -651,18 +651,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:18.289120+00:00' source_archive: web/0086/dekemastate.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Dekema State - source_url: https://dekemastate.nl/ - retrieved_on: '2025-11-29T14:38:06.707912+00:00' - xpath: /html/head/title - html_file: web/0086/dekemastate.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:18.288285+00:00' - claim_type: org_name claim_value: Klik om het zoekinvoerveld te openen raw_value: Klik om het zoekinvoerveld te openen @@ -735,6 +725,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:18.288910+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Landgoed Dekema State / Dekema State diff --git a/data/nde/enriched/entries/0093_Q81181219.yaml b/data/nde/enriched/entries/0093_Q81181219.yaml index 98b1bf01d0..8de89b9a10 100644 --- a/data/nde/enriched/entries/0093_Q81181219.yaml +++ b/data/nde/enriched/entries/0093_Q81181219.yaml @@ -525,18 +525,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:19.908909+00:00' source_archive: web/0093/achtkarspelen.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Achtkarspelen - source_url: http://www.achtkarspelen.nl - retrieved_on: '2025-11-29T14:36:05.489613+00:00' - xpath: /html/head/title - html_file: web/0093/achtkarspelen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:19.908104+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -599,6 +589,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:19.908727+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Achtkarspelen diff --git a/data/nde/enriched/entries/0095_Q81181251.yaml b/data/nde/enriched/entries/0095_Q81181251.yaml index 2f7337dbcc..db05b54c52 100644 --- a/data/nde/enriched/entries/0095_Q81181251.yaml +++ b/data/nde/enriched/entries/0095_Q81181251.yaml @@ -360,7 +360,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:12.247324+00:00' source_archive: web/0095/noardeast-fryslan.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Homepage Noardeast-Fryslân @@ -392,36 +392,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:25:12.245246+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0095/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.245258+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0095/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.245268+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0095/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.245278+00:00' - claim_type: description_short claim_value: Homepage Noardeast-Fryslân raw_value: Homepage Noardeast-Fryslân @@ -482,6 +452,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:12.246132+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content temporal_extent: begin_of_the_begin: null end_of_the_begin: null diff --git a/data/nde/enriched/entries/0099_Q81181286.yaml b/data/nde/enriched/entries/0099_Q81181286.yaml index b0cfc16030..e7b8eb9212 100644 --- a/data/nde/enriched/entries/0099_Q81181286.yaml +++ b/data/nde/enriched/entries/0099_Q81181286.yaml @@ -468,7 +468,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:20.689774+00:00' source_archive: web/0099/heerenveen.nl - claims_count: 29 + claims_count: 25 claims: - claim_type: org_name claim_value: Alles over gemeente Heerenveen @@ -480,16 +480,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688647+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/header/nav/ul/li[1]/a/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688660+00:00' - claim_type: org_name claim_value: A-Z raw_value: A-Z @@ -530,16 +520,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688677+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/header/nav/div[2]/form/div/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688700+00:00' - claim_type: org_name claim_value: icon_verkiezingen raw_value: icon_verkiezingen @@ -590,16 +570,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688718+00:00' - - claim_type: org_name - claim_value: Contact - raw_value: Contact - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/header/nav/div[3]/nav/a[6]/div/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688722+00:00' - claim_type: org_name claim_value: Klok raw_value: Klok @@ -650,16 +620,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.688745+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.heerenveen.nl - retrieved_on: '2025-11-29T14:37:26.166072+00:00' - xpath: /html/body/main/div[3]/div[1]/div[1]/svg/title - html_file: web/0099/heerenveen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.688759+00:00' - claim_type: org_name claim_value: LinkedIn profiel van Gemeente Heerenveen raw_value: LinkedIn profiel van Gemeente Heerenveen @@ -762,6 +722,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:20.689444+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Heerenveen diff --git a/data/nde/enriched/entries/0101_Q81181296.yaml b/data/nde/enriched/entries/0101_Q81181296.yaml index ea9112cb86..29793dd1f7 100644 --- a/data/nde/enriched/entries/0101_Q81181296.yaml +++ b/data/nde/enriched/entries/0101_Q81181296.yaml @@ -362,7 +362,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:12.500058+00:00' source_archive: web/0101/noardeast-fryslan.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Homepage Noardeast-Fryslân @@ -394,36 +394,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:25:12.498580+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0101/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.498591+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0101/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.498601+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noardeast-fryslan.nl/streekarchief - retrieved_on: '' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0101/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.498611+00:00' - claim_type: description_short claim_value: Homepage Noardeast-Fryslân raw_value: Homepage Noardeast-Fryslân @@ -484,6 +454,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:12.499739+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content temporal_extent: begin_of_the_begin: null end_of_the_begin: null diff --git a/data/nde/enriched/entries/0102_Q121225319.yaml b/data/nde/enriched/entries/0102_Q121225319.yaml index 99d94a95f1..0070d0a80c 100644 --- a/data/nde/enriched/entries/0102_Q121225319.yaml +++ b/data/nde/enriched/entries/0102_Q121225319.yaml @@ -572,7 +572,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:20.875194+00:00' source_archive: web/0102/noardeast-fryslan.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Homepage Noardeast-Fryslân @@ -604,36 +604,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:20.874548+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.noardeast-fryslan.nl/ - retrieved_on: '2025-11-29T14:37:55.770920+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0102/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.874553+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.noardeast-fryslan.nl/ - retrieved_on: '2025-11-29T14:37:55.770920+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0102/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.874558+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.noardeast-fryslan.nl/ - retrieved_on: '2025-11-29T14:37:55.770920+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0102/noardeast-fryslan.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.874562+00:00' - claim_type: description_short claim_value: Homepage Noardeast-Fryslân raw_value: Homepage Noardeast-Fryslân @@ -694,6 +664,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:20.875058+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Noardeast-Fryslân diff --git a/data/nde/enriched/entries/0103_Q81181358.yaml b/data/nde/enriched/entries/0103_Q81181358.yaml index 751964d267..675fb41588 100644 --- a/data/nde/enriched/entries/0103_Q81181358.yaml +++ b/data/nde/enriched/entries/0103_Q81181358.yaml @@ -299,18 +299,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:12.598702+00:00' source_archive: web/0103/documentatiestichting.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Contact - raw_value: Contact – Documentatiestichting Leeuwarderadeel - source_url: http://www.documentatiestichting.nl/contact/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0103/documentatiestichting.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:12.597880+00:00' - claim_type: email claim_value: documentatiestichting@gmail.com raw_value: documentatiestichting@gmail.com @@ -331,3 +321,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:25:12.598321+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0104_Q2754844.yaml b/data/nde/enriched/entries/0104_Q2754844.yaml index 3e0457b6d1..b33349b307 100644 --- a/data/nde/enriched/entries/0104_Q2754844.yaml +++ b/data/nde/enriched/entries/0104_Q2754844.yaml @@ -904,18 +904,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:20.991233+00:00' source_archive: web/0104/historischcentrumleeuwarden.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historisch Centrum Leeuwarden - source_url: http://historischcentrumleeuwarden.nl - retrieved_on: '2025-11-29T14:38:07.710368+00:00' - xpath: /html/head/title - html_file: web/0104/historischcentrumleeuwarden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:20.990803+00:00' - claim_type: description_short claim_value: Het Historisch Centrum Leeuwarden (HCL) is het informatie- en activiteitencentrum voor de geschiedenis van Leeuwarden en omgeving. @@ -988,6 +978,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:20.991168+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Centrum Leeuwarden diff --git a/data/nde/enriched/entries/0107_Q81181235.yaml b/data/nde/enriched/entries/0107_Q81181235.yaml index b715999575..8773f4746a 100644 --- a/data/nde/enriched/entries/0107_Q81181235.yaml +++ b/data/nde/enriched/entries/0107_Q81181235.yaml @@ -358,18 +358,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.184233+00:00' source_archive: web/0107/smallingerland.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Smallingerland - source_url: https://www.smallingerland.nl/ - retrieved_on: '2025-11-29T14:38:08.667336+00:00' - xpath: /html/head/title - html_file: web/0107/smallingerland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.183016+00:00' - claim_type: email claim_value: gemeente@smallingerland.nl raw_value: gemeente@smallingerland.nl @@ -430,6 +420,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.184035+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeente Smallingerland diff --git a/data/nde/enriched/entries/0109_Q81181393.yaml b/data/nde/enriched/entries/0109_Q81181393.yaml index a2972d860f..ef33e22453 100644 --- a/data/nde/enriched/entries/0109_Q81181393.yaml +++ b/data/nde/enriched/entries/0109_Q81181393.yaml @@ -388,18 +388,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.633706+00:00' source_archive: web/0109/terschelling.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Terschelling - source_url: https://www.terschelling.nl/ - retrieved_on: '2025-11-29T14:38:09.179167+00:00' - xpath: /html/head/title - html_file: web/0109/terschelling.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.632596+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -470,6 +460,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.633521+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Terschelling diff --git a/data/nde/enriched/entries/0110_Q81181206.yaml b/data/nde/enriched/entries/0110_Q81181206.yaml index 1b79a68781..d6426c758e 100644 --- a/data/nde/enriched/entries/0110_Q81181206.yaml +++ b/data/nde/enriched/entries/0110_Q81181206.yaml @@ -518,18 +518,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.752348+00:00' source_archive: web/0110/t-diel.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Tytsjerksteradiel - source_url: http://www.t-diel.nl - retrieved_on: '2025-11-29T14:38:09.847466+00:00' - xpath: /html/head/title - html_file: web/0110/t-diel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.751276+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -592,6 +582,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.751956+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Tytsjerksteradiel diff --git a/data/nde/enriched/entries/0111_Q81181373.yaml b/data/nde/enriched/entries/0111_Q81181373.yaml index 15315dba8d..55a7cc4a37 100644 --- a/data/nde/enriched/entries/0111_Q81181373.yaml +++ b/data/nde/enriched/entries/0111_Q81181373.yaml @@ -538,18 +538,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:21.884305+00:00' source_archive: web/0111/vlieland.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Vlieland - source_url: http://www.vlieland.nl - retrieved_on: '2025-11-29T14:38:10.812158+00:00' - xpath: /html/head/title - html_file: web/0111/vlieland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:21.883565+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -620,6 +610,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:21.884109+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Vlieland diff --git a/data/nde/enriched/entries/0115_Q13136222.yaml b/data/nde/enriched/entries/0115_Q13136222.yaml index 3f6698a56c..6d1e7839dd 100644 --- a/data/nde/enriched/entries/0115_Q13136222.yaml +++ b/data/nde/enriched/entries/0115_Q13136222.yaml @@ -506,18 +506,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:23.119742+00:00' source_archive: web/0115/ijstijdenmuseum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - ijstijdenmuseum - source_url: https://www.ijstijdenmuseum.nl/ - retrieved_on: '2025-11-29T14:38:51.284331+00:00' - xpath: /html/head/title - html_file: web/0115/ijstijdenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:23.119152+00:00' - claim_type: description_short claim_value: De IJstijden in deze streken In het IJstijdenmuseum in Buitenpost wordt het boeiende verhaal verteld van de twee laatste IJstijden. Deze hebben @@ -572,6 +562,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:23.119650+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: IJstijdenmuseum diff --git a/data/nde/enriched/entries/0122_Q12013196.yaml b/data/nde/enriched/entries/0122_Q12013196.yaml index 0b6af8370d..44ceaa74ce 100644 --- a/data/nde/enriched/entries/0122_Q12013196.yaml +++ b/data/nde/enriched/entries/0122_Q12013196.yaml @@ -1209,18 +1209,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:24.410901+00:00' source_archive: web/0122/museumbelvedere.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Belvédère - source_url: https://www.museumbelvedere.nl/ - retrieved_on: '2025-11-29T15:18:22.314806+00:00' - xpath: /html/head/title - html_file: web/0122/museumbelvedere.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:24.410285+00:00' - claim_type: email claim_value: info@museumbelvedere.nl raw_value: info@museumbelvedere.nl @@ -1271,6 +1261,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:24.410817+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Museum Belvédère diff --git a/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml b/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml index 418a85e1cf..cf5696cfae 100644 --- a/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml +++ b/data/nde/enriched/entries/0124_museum_oold_ark_korenmolen_de_weyert.yaml @@ -184,18 +184,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:24.984364+00:00' source_archive: web/0124/museumenmolenmakkinga.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.museumenmolenmakkinga.nl/ - retrieved_on: '2025-11-29T14:39:27.748370+00:00' - xpath: /html/head/title - html_file: web/0124/museumenmolenmakkinga.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:24.984075+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 raw_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 @@ -206,6 +196,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:24.984314+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Korenmolen "De Weyert" diff --git a/data/nde/enriched/entries/0125_Q13137340.yaml b/data/nde/enriched/entries/0125_Q13137340.yaml index 0647e520ae..69baacd6b2 100644 --- a/data/nde/enriched/entries/0125_Q13137340.yaml +++ b/data/nde/enriched/entries/0125_Q13137340.yaml @@ -550,18 +550,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:25.116518+00:00' source_archive: web/0125/museumenmolenmakkinga.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.museumenmolenmakkinga.nl/ - retrieved_on: '2025-11-29T14:54:51.730430+00:00' - xpath: /html/head/title - html_file: web/0125/museumenmolenmakkinga.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:25.116241+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 raw_value: https://www.facebook.com/Museum-en-Molen-Makkinga-686046891528133 @@ -572,6 +562,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:25.116469+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oold Ark diff --git a/data/nde/enriched/entries/0126_Q13137168.yaml b/data/nde/enriched/entries/0126_Q13137168.yaml index c9e9423a0e..a98126819f 100644 --- a/data/nde/enriched/entries/0126_Q13137168.yaml +++ b/data/nde/enriched/entries/0126_Q13137168.yaml @@ -641,18 +641,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:25.214070+00:00' source_archive: web/0126/museumsloten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://museumsloten.nl/ - retrieved_on: '2025-11-29T14:54:52.407703+00:00' - xpath: /html/head/title - html_file: web/0126/museumsloten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:25.213029+00:00' - claim_type: description_short claim_value: Ontdek Sloten, de kleinste van de Friese 11 steden, in Museum Sloten, waar toverlantaarns de geschiedenis vertellen, en boek meteen je tickets. @@ -705,6 +695,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:25.213909+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Sloten diff --git a/data/nde/enriched/entries/0131_Q81181312.yaml b/data/nde/enriched/entries/0131_Q81181312.yaml index 3e20c158f0..bdff97c230 100644 --- a/data/nde/enriched/entries/0131_Q81181312.yaml +++ b/data/nde/enriched/entries/0131_Q81181312.yaml @@ -508,18 +508,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:26.659287+00:00' source_archive: web/0131/fryslan.frl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Fryslan - source_url: https://www.fryslan.frl/ - retrieved_on: '2025-11-29T15:14:45.171069+00:00' - xpath: /html/head/title - html_file: web/0131/fryslan.frl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:26.657906+00:00' - claim_type: org_name claim_value: Fryslan raw_value: Fryslan @@ -580,6 +570,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:26.658955+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Provincie Fryslân diff --git a/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml b/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml index 96bf4ed18c..528d98e89d 100644 --- a/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml +++ b/data/nde/enriched/entries/0139_de_hollandse_cirkel.yaml @@ -182,7 +182,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:27.725393+00:00' source_archive: web/0139/hollandsecirkel.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Stichting de Hollandse Cirkel @@ -214,16 +214,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:44:27.725070+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.kadaster.nl/ - retrieved_on: '' - xpath: /html/body/div/div/div/div/article/div/div/div/div[5]/div[1]/div/div/div/h1 - html_file: web/0139/hollandsecirkel.nl/mirror/hollandsecirkel.nl/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:27.725271+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting De Hollandse Cirkel diff --git a/data/nde/enriched/entries/0140_Q17595943.yaml b/data/nde/enriched/entries/0140_Q17595943.yaml index d41b1060ed..216dc5ebbc 100644 --- a/data/nde/enriched/entries/0140_Q17595943.yaml +++ b/data/nde/enriched/entries/0140_Q17595943.yaml @@ -620,18 +620,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:27.849066+00:00' source_archive: web/0140/archief.gazelle.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - Gazelle - source_url: https://archief.gazelle.nl/ - retrieved_on: '2025-11-29T15:21:15.145560+00:00' - xpath: /html/head/title - html_file: web/0140/archief.gazelle.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:27.847294+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/KoninklijkeGazelle raw_value: https://www.facebook.com/KoninklijkeGazelle @@ -672,6 +662,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:27.848728+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Koninklijke Gazelle diff --git a/data/nde/enriched/entries/0143_Q1127079.yaml b/data/nde/enriched/entries/0143_Q1127079.yaml index 6f5f248c96..2dd1f2b353 100644 --- a/data/nde/enriched/entries/0143_Q1127079.yaml +++ b/data/nde/enriched/entries/0143_Q1127079.yaml @@ -1287,18 +1287,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:28.142443+00:00' source_archive: web/0143/museumhetvalkhof.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: home - raw_value: home - Valkhof Museum - source_url: http://www.museumhetvalkhof.nl - retrieved_on: '2025-11-29T15:21:15.209526+00:00' - xpath: /html/head/title - html_file: web/0143/museumhetvalkhof.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.142145+00:00' - claim_type: org_name claim_value: Valkhof Museum raw_value: Valkhof Museum @@ -1309,6 +1299,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:44:28.142360+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Het Valkhof diff --git a/data/nde/enriched/entries/0144_Q2710899.yaml b/data/nde/enriched/entries/0144_Q2710899.yaml index 30484dc3ed..533b15c588 100644 --- a/data/nde/enriched/entries/0144_Q2710899.yaml +++ b/data/nde/enriched/entries/0144_Q2710899.yaml @@ -873,18 +873,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:28.228638+00:00' source_archive: web/0144/nationaalonderduikmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal onderduikmuseum - source_url: https://nationaalonderduikmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0144/nationaalonderduikmuseum.nl/mirror/nationaalonderduikmuseum.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.227592+00:00' - claim_type: org_name claim_value: Nationaal onderduikmuseum - raw_value: Nationaal onderduikmuseum - @@ -965,6 +955,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:28.228541+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Onderduikmuseum diff --git a/data/nde/enriched/entries/0145_Q2654815.yaml b/data/nde/enriched/entries/0145_Q2654815.yaml index 419d694ac2..6ad2b38825 100644 --- a/data/nde/enriched/entries/0145_Q2654815.yaml +++ b/data/nde/enriched/entries/0145_Q2654815.yaml @@ -711,7 +711,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:28.300061+00:00' source_archive: web/0145/antonpieckmuseum.nl - claims_count: 9 + claims_count: 7 claims: - claim_type: org_name claim_value: Anton Pieck Museum @@ -733,26 +733,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:28.298463+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: https://www.antonpieckmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/0145/antonpieckmuseum.nl/mirror/www.antonpieckmuseum.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.298469+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: https://www.antonpieckmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/0145/antonpieckmuseum.nl/mirror/www.antonpieckmuseum.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.298473+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -803,6 +783,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:28.299741+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Anton Pieck Museum diff --git a/data/nde/enriched/entries/0146_Q1663974.yaml b/data/nde/enriched/entries/0146_Q1663974.yaml index 354cf580bc..a76d7967b3 100644 --- a/data/nde/enriched/entries/0146_Q1663974.yaml +++ b/data/nde/enriched/entries/0146_Q1663974.yaml @@ -646,18 +646,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:28.483481+00:00' source_archive: web/0146/tua.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Theologische Universiteit Apeldoorn - source_url: http://www.tua.nl - retrieved_on: '2025-11-29T15:21:15.976954+00:00' - xpath: /html/head/title - html_file: web/0146/tua.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.482634+00:00' - claim_type: org_name claim_value: Theologische Universiteit Apeldoorn raw_value: Theologische Universiteit Apeldoorn @@ -738,6 +728,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:28.483392+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Theologische Universiteit Apeldoorn diff --git a/data/nde/enriched/entries/0149_Q98894593.yaml b/data/nde/enriched/entries/0149_Q98894593.yaml index d6504f9b0d..5df416f406 100644 --- a/data/nde/enriched/entries/0149_Q98894593.yaml +++ b/data/nde/enriched/entries/0149_Q98894593.yaml @@ -568,18 +568,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:28.738219+00:00' source_archive: web/0149/cvz7aar.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Cultuurhistorische Vereniging Zevenaar (CVZ) (powered by e-captain.nl) - source_url: https://www.cvz7aar.nl - retrieved_on: '2025-11-29T15:21:18.588342+00:00' - xpath: /html/head/title - html_file: web/0149/cvz7aar.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:28.737638+00:00' - claim_type: social_youtube claim_value: https://www.youtube.com/channel/UCa0uF7kyU0vLxKO5qx-ZHNg/videos?reload=9&app=desktop&view=0&sort=da&flow=grid&cbrd=1 raw_value: https://www.youtube.com/channel/UCa0uF7kyU0vLxKO5qx-ZHNg/videos?reload=9&app=desktop&view=0&sort=da&flow=grid&cbrd=1 @@ -600,6 +590,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:28.738156+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cultuurhistorische Vereniging Zevenaar diff --git a/data/nde/enriched/entries/0155_Q13636575.yaml b/data/nde/enriched/entries/0155_Q13636575.yaml index 07cfe5e2df..c5a272918e 100644 --- a/data/nde/enriched/entries/0155_Q13636575.yaml +++ b/data/nde/enriched/entries/0155_Q13636575.yaml @@ -963,18 +963,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:29.855125+00:00' source_archive: web/0155/streekmuseumtiel.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Flipje en Streekmuseum Tiel - source_url: https://streekmuseumtiel.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0155/streekmuseumtiel.nl/mirror/streekmuseumtiel.nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:29.854444+00:00' - claim_type: description_short claim_value: "DUIK IN DE\nHISTORIE \n\nIn het Flipje en Streekmuseum wordt de\ \ historie van Tiel en de Betuwe getoond. Ook haar beroemdste inwoner - Flipje\ @@ -1041,6 +1031,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:29.855011+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Flipje & Streekmuseum Tiel diff --git a/data/nde/enriched/entries/0157_Q104033126.yaml b/data/nde/enriched/entries/0157_Q104033126.yaml index ac097c4339..9055d12ef0 100644 --- a/data/nde/enriched/entries/0157_Q104033126.yaml +++ b/data/nde/enriched/entries/0157_Q104033126.yaml @@ -510,18 +510,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:29.935972+00:00' source_archive: web/0157/gelderlandinbeeld.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gelderland in beeld - source_url: https://www.gelderlandinbeeld.nl/ - retrieved_on: '2025-11-29T15:22:43.989143+00:00' - xpath: /html/head/title - html_file: web/0157/gelderlandinbeeld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:29.935777+00:00' - claim_type: email claim_value: erfgoedcentrum@rozet.nl raw_value: erfgoedcentrum@rozet.nl @@ -542,6 +532,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:29.935958+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gelderland in Beeld diff --git a/data/nde/enriched/entries/0158_Q2330735.yaml b/data/nde/enriched/entries/0158_Q2330735.yaml index a29dd36154..206d805de2 100644 --- a/data/nde/enriched/entries/0158_Q2330735.yaml +++ b/data/nde/enriched/entries/0158_Q2330735.yaml @@ -793,18 +793,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:30.034033+00:00' source_archive: web/0158/geldersarchief.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.geldersarchief.nl - retrieved_on: '2025-11-29T15:22:43.748847+00:00' - xpath: /html/head/title - html_file: web/0158/geldersarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:30.032762+00:00' - claim_type: org_name claim_value: chat raw_value: chat @@ -881,6 +871,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:30.033839+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gelders Archief diff --git a/data/nde/enriched/entries/0159_Q4688292.yaml b/data/nde/enriched/entries/0159_Q4688292.yaml index 976aa3558a..837254d9a9 100644 --- a/data/nde/enriched/entries/0159_Q4688292.yaml +++ b/data/nde/enriched/entries/0159_Q4688292.yaml @@ -709,18 +709,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:30.419385+00:00' source_archive: web/0159/geologischmuseum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gelders Geologisch Museum - source_url: https://geologischmuseum.nl/ - retrieved_on: '2025-11-29T15:22:49.377757+00:00' - xpath: /html/head/title - html_file: web/0159/geologischmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:30.418790+00:00' - claim_type: org_name claim_value: Gelders Geologisch Museum raw_value: Gelders Geologisch Museum @@ -781,6 +771,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:30.419326+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gelders Geologisch Museum diff --git a/data/nde/enriched/entries/0165_Q81181326.yaml b/data/nde/enriched/entries/0165_Q81181326.yaml index 0b5c83cc98..7a2e203213 100644 --- a/data/nde/enriched/entries/0165_Q81181326.yaml +++ b/data/nde/enriched/entries/0165_Q81181326.yaml @@ -496,18 +496,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:31.788139+00:00' source_archive: web/0165/nijkerk.eu - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Nijkerk - source_url: https://www.nijkerk.eu/gemeentearchief - retrieved_on: '2025-11-29T15:22:51.188307+00:00' - xpath: /html/head/title - html_file: web/0165/nijkerk.eu/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:31.787315+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -548,6 +538,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:31.787782+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Nijkerk diff --git a/data/nde/enriched/entries/0169_Q2332962.yaml b/data/nde/enriched/entries/0169_Q2332962.yaml index a7f43778d1..11ec59c73d 100644 --- a/data/nde/enriched/entries/0169_Q2332962.yaml +++ b/data/nde/enriched/entries/0169_Q2332962.yaml @@ -673,18 +673,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:32.713519+00:00' source_archive: web/0169/belmontearboretum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Belmonte Arboretum - source_url: https://belmontearboretum.nl/ - retrieved_on: '2025-11-29T15:26:05.199368+00:00' - xpath: /html/head/title - html_file: web/0169/belmontearboretum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:32.713034+00:00' - claim_type: org_name claim_value: Belmonte Arboretum raw_value: Belmonte Arboretum @@ -715,6 +705,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:32.713421+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Belmonte Arboretum diff --git a/data/nde/enriched/entries/0170_Q2365901.yaml b/data/nde/enriched/entries/0170_Q2365901.yaml index 2c42dc0405..277b1c1536 100644 --- a/data/nde/enriched/entries/0170_Q2365901.yaml +++ b/data/nde/enriched/entries/0170_Q2365901.yaml @@ -688,7 +688,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:33.192176+00:00' source_archive: web/0170/grenslandmuseum.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Stichting Grenslandmuseum Dinxperlo @@ -700,26 +700,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:33.191065+00:00' - - claim_type: org_name - claim_value: Previous - raw_value: Previous - source_url: https://www.grenslandmuseum.nl/ - retrieved_on: '2025-11-29T15:26:21.418250+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/0170/grenslandmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:33.191079+00:00' - - claim_type: org_name - claim_value: Next - raw_value: Next - source_url: https://www.grenslandmuseum.nl/ - retrieved_on: '2025-11-29T15:26:21.418250+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/0170/grenslandmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:33.191084+00:00' - claim_type: org_name claim_value: Right-open raw_value: Right-open @@ -790,6 +770,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:33.191917+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Grenslandmuseum diff --git a/data/nde/enriched/entries/0172_Q98907725.yaml b/data/nde/enriched/entries/0172_Q98907725.yaml index 4fdac88d7f..48839ecc05 100644 --- a/data/nde/enriched/entries/0172_Q98907725.yaml +++ b/data/nde/enriched/entries/0172_Q98907725.yaml @@ -546,18 +546,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:33.713426+00:00' source_archive: web/0172/heemkundekringbergh.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Heemkundekring Bergh - source_url: https://www.heemkundekringbergh.nl - retrieved_on: '2025-11-29T15:27:37.429075+00:00' - xpath: /html/head/title - html_file: web/0172/heemkundekringbergh.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:33.712712+00:00' - claim_type: description_short claim_value: Homepagina raw_value: Homepagina @@ -600,6 +590,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:33.713325+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Bergh diff --git a/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml b/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml index 4902dcc2df..8e635b12b1 100644 --- a/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml +++ b/data/nde/enriched/entries/0174_heiligenbeeldenmuseum_kranenburg.yaml @@ -225,18 +225,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:34.113320+00:00' source_archive: web/0174/heiligenbeeldenmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Heiligenbeeldenmuseum Kranenburg - source_url: https://www.heiligenbeeldenmuseum.nl/ - retrieved_on: '2025-11-29T15:26:10.471256+00:00' - xpath: /html/head/title - html_file: web/0174/heiligenbeeldenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:34.112922+00:00' - claim_type: description_short claim_value: Heiligenbeeldenmuseum Kranenburg raw_value: Heiligenbeeldenmuseum Kranenburg @@ -277,6 +267,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:34.113278+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Heiligenbeeldenmuseum diff --git a/data/nde/enriched/entries/0177_Q7476442.yaml b/data/nde/enriched/entries/0177_Q7476442.yaml index c765664be2..5da5e6b377 100644 --- a/data/nde/enriched/entries/0177_Q7476442.yaml +++ b/data/nde/enriched/entries/0177_Q7476442.yaml @@ -826,7 +826,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:34.449363+00:00' source_archive: web/0177/historischmuseumede.nl - claims_count: 13 + claims_count: 10 claims: - claim_type: org_name claim_value: Historisch Museum Ede brengt verhalen van Ede tot leven @@ -848,16 +848,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:34.448156+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://www.historischmuseumede.nl - retrieved_on: '2025-11-29T15:26:16.779276+00:00' - xpath: /html/body/div[1]/header/div[1]/div/div/div/div[2]/div/div/div/div[2]/div[2]/div/button/span[2]/span/svg/title - html_file: web/0177/historischmuseumede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:34.448181+00:00' - claim_type: org_name claim_value: Uren raw_value: Uren @@ -940,26 +930,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:44:34.448927+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - source_url: https://www.historischmuseumede.nl - retrieved_on: '2025-11-29T15:26:16.779276+00:00' - xpath: /html/body/div[1]/main/div/div/div/div/article/div/div/div[6]/div/div/div/div/div[1]/div/div/article/div[2]/ul/li[2]/ul/li[1]/a - html_file: web/0177/historischmuseumede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:34.449077+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - raw_value: https://twitter.com/share?url=https%3A%2F%2Fhistorischmuseumede.nl%2Fevents%2Fverwacht-verkerke-edese-wereldspeler-in-posters%2F - source_url: https://www.historischmuseumede.nl - retrieved_on: '2025-11-29T15:26:16.779276+00:00' - xpath: /html/body/div[1]/main/div/div/div/div/article/div/div/div[6]/div/div/div/div/div[1]/div/div/article/div[2]/ul/li[2]/ul/li[2]/a - html_file: web/0177/historischmuseumede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:34.449084+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Museum Ede diff --git a/data/nde/enriched/entries/0178_Q98904482.yaml b/data/nde/enriched/entries/0178_Q98904482.yaml index 343d7fd833..a2beb3eaba 100644 --- a/data/nde/enriched/entries/0178_Q98904482.yaml +++ b/data/nde/enriched/entries/0178_Q98904482.yaml @@ -295,18 +295,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:34.584225+00:00' source_archive: web/0178/historischmuseumhedel.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.historischmuseumhedel.nl/ - retrieved_on: '2025-11-29T15:26:19.658845+00:00' - xpath: /html/head/title - html_file: web/0178/historischmuseumhedel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:34.583694+00:00' - claim_type: org_name claim_value: Historisch Museum Hedel raw_value: Historisch Museum Hedel @@ -337,6 +327,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:34.584143+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Hedel's Historie diff --git a/data/nde/enriched/entries/0184_Q38677497.yaml b/data/nde/enriched/entries/0184_Q38677497.yaml index 1ec81826d9..3eaf87f21a 100644 --- a/data/nde/enriched/entries/0184_Q38677497.yaml +++ b/data/nde/enriched/entries/0184_Q38677497.yaml @@ -939,7 +939,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:36.137017+00:00' source_archive: web/0184/gemeentearchief.ede.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Gemeentearchief Ede @@ -951,16 +951,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:36.136505+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://gemeentearchief.ede.nl/ - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://gemeentearchief.ede.nl/ - source_url: https://gemeentearchief.ede.nl - retrieved_on: '2025-11-29T15:26:47.211861+00:00' - xpath: /html/body/div/div/main/section/div[6]/div/div/ul/li[1]/a - html_file: web/0184/gemeentearchief.ede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:36.136893+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/sharing/share-offsite/?url=https://gemeentearchief.ede.nl/ raw_value: https://www.linkedin.com/sharing/share-offsite/?url=https://gemeentearchief.ede.nl/ @@ -1011,6 +1001,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:36.136923+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeente Ede diff --git a/data/nde/enriched/entries/0186_Q98894809.yaml b/data/nde/enriched/entries/0186_Q98894809.yaml index a816705617..22d69f6a34 100644 --- a/data/nde/enriched/entries/0186_Q98894809.yaml +++ b/data/nde/enriched/entries/0186_Q98894809.yaml @@ -503,18 +503,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:36.794780+00:00' source_archive: web/0186/hkwestervoort.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Historische Kring Westervoort - source_url: https://www.hkwestervoort.nl - retrieved_on: '2025-11-29T15:27:14.285337+00:00' - xpath: /html/head/title - html_file: web/0186/hkwestervoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:36.792802+00:00' - claim_type: description_short claim_value: Historische Kring Westervoort raw_value: Historische Kring Westervoort @@ -555,6 +545,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:36.794479+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Westervoort diff --git a/data/nde/enriched/entries/0201_Q98895215.yaml b/data/nde/enriched/entries/0201_Q98895215.yaml index 757f27c968..fa9caba628 100644 --- a/data/nde/enriched/entries/0201_Q98895215.yaml +++ b/data/nde/enriched/entries/0201_Q98895215.yaml @@ -632,18 +632,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:40.581965+00:00' source_archive: web/0201/historischeverenigingvoorst.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.historischeverenigingvoorst.nl - retrieved_on: '2025-11-29T15:29:57.119726+00:00' - xpath: /html/head/title - html_file: web/0201/historischeverenigingvoorst.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:40.580692+00:00' - claim_type: org_name claim_value: Datum raw_value: Datum @@ -714,6 +704,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:40.581750+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging Voorst diff --git a/data/nde/enriched/entries/0202_Q61930724.yaml b/data/nde/enriched/entries/0202_Q61930724.yaml index 5b15b9c7f0..0c08af968c 100644 --- a/data/nde/enriched/entries/0202_Q61930724.yaml +++ b/data/nde/enriched/entries/0202_Q61930724.yaml @@ -928,18 +928,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:40.670247+00:00' source_archive: web/0202/bronbeek.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Bronbeek - source_url: https://www.bronbeek.nl/ - retrieved_on: '2025-11-29T15:29:53.013757+00:00' - xpath: /html/head/title - html_file: web/0202/bronbeek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:40.669788+00:00' - claim_type: description_short claim_value: Bronbeek is een museum en hét kenniscentrum van het koloniaal-militair verleden van het Koninkrijk der Nederlanden. Het bevindt zich op een cultuurhistorisch @@ -996,6 +986,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:40.670137+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Koninklijk Tehuis voor Oud-Militairen en Museum Bronbeek diff --git a/data/nde/enriched/entries/0203_stichting_korpora.yaml b/data/nde/enriched/entries/0203_stichting_korpora.yaml index 5c920fec29..6c234a2881 100644 --- a/data/nde/enriched/entries/0203_stichting_korpora.yaml +++ b/data/nde/enriched/entries/0203_stichting_korpora.yaml @@ -399,18 +399,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:41.114748+00:00' source_archive: web/0203/korpora.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Korpora - source_url: https://www.korpora.nl/ - retrieved_on: '2025-11-29T15:32:28.709223+00:00' - xpath: /html/head/title - html_file: web/0203/korpora.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:41.113200+00:00' - claim_type: org_name claim_value: Korpora raw_value: Korpora @@ -461,6 +451,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:41.114337+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Korpora diff --git a/data/nde/enriched/entries/0213_Q54957003.yaml b/data/nde/enriched/entries/0213_Q54957003.yaml index 1da0354608..cf7caaed2c 100644 --- a/data/nde/enriched/entries/0213_Q54957003.yaml +++ b/data/nde/enriched/entries/0213_Q54957003.yaml @@ -894,7 +894,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:43.665080+00:00' source_archive: web/0213/vantlindenhoutmuseum.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Museum Kinderdorp Neerbosch Nijmegen @@ -920,16 +920,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:44:43.664098+00:00' - - claim_type: org_name - claim_value: museumkinderdorpneerbosch.nl - raw_value: museumkinderdorpneerbosch.nl - source_url: http://www.vantlindenhoutmuseum.nl - retrieved_on: '2025-11-29T15:31:25.473939+00:00' - xpath: /html/head/meta[10] - html_file: web/0213/vantlindenhoutmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:44:43.664302+00:00' - claim_type: email claim_value: info@museumkinderdorpneerbosch.nl raw_value: info@museumkinderdorpneerbosch.nl @@ -990,16 +980,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:43.664714+00:00' - - claim_type: org_name - claim_value: NIEUWS - raw_value: NIEUWS - source_url: http://www.vantlindenhoutmuseum.nl - retrieved_on: '2025-11-29T15:31:25.473939+00:00' - xpath: /html/body/div/div/div/div/div[2]/div/div/section[1]/div/div/div/div/div/div[1]/div/h1 - html_file: web/0213/vantlindenhoutmuseum.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:43.664765+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Kinderdorp Neerbosch diff --git a/data/nde/enriched/entries/0219_Q109382770.yaml b/data/nde/enriched/entries/0219_Q109382770.yaml index e2978f9728..a46c11a398 100644 --- a/data/nde/enriched/entries/0219_Q109382770.yaml +++ b/data/nde/enriched/entries/0219_Q109382770.yaml @@ -532,18 +532,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:45.516054+00:00' source_archive: web/0219/museumoene.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Cultuur Historisch Museum Oene - source_url: https://museumoene.nl - retrieved_on: '2025-11-29T15:33:01.789691+00:00' - xpath: /html/head/title - html_file: web/0219/museumoene.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:45.515544+00:00' - claim_type: description_short claim_value: Cultuur Historisch Museum Oene raw_value: Cultuur Historisch Museum Oene @@ -564,6 +554,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:45.515976+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Cultuur Historisch Museum Oene diff --git a/data/nde/enriched/entries/0220_Q98961921.yaml b/data/nde/enriched/entries/0220_Q98961921.yaml index 8e1109f425..59c355a4f3 100644 --- a/data/nde/enriched/entries/0220_Q98961921.yaml +++ b/data/nde/enriched/entries/0220_Q98961921.yaml @@ -740,7 +740,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:45.976892+00:00' source_archive: web/0220/sjoelelburg.nl - claims_count: 5 + claims_count: 2 claims: - claim_type: org_name claim_value: Museum Sjoel Elburg @@ -780,124 +780,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:44:45.976067+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://sjoelelburg.nl/&t=Het - Museum - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://sjoelelburg.nl/&t=Het - Museum - source_url: https://sjoelelburg.nl/ - retrieved_on: '2025-11-29T15:36:05.747990+00:00' - xpath: /html/body/div/div[5]/div/div/div/div/div[3]/ul/li[2]/a - html_file: web/0220/sjoelelburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:45.976626+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?source=https://sjoelelburg.nl/&text=Het - Museum:https://sjoelelburg.nl/ - raw_value: https://twitter.com/intent/tweet?source=https://sjoelelburg.nl/&text=Het - Museum:https://sjoelelburg.nl/ - source_url: https://sjoelelburg.nl/ - retrieved_on: '2025-11-29T15:36:05.747990+00:00' - xpath: /html/body/div/div[5]/div/div/div/div/div[3]/ul/li[3]/a - html_file: web/0220/sjoelelburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:45.976635+00:00' - - claim_type: social_linkedin - claim_value: 'http://www.linkedin.com/shareArticle?mini=true&url=https://sjoelelburg.nl/&title=Het - Museum&summary=Steun het museum Openingstijden en prijzen Museum Sjoel ElburgMuseum - Sjoel Elburg is... een verhalenmuseum over het (on)gewone dagelijks leven van - joodse families die vanaf 1700 in Elburg hebben gewoond. Museum Sjoel Elburg - biedt... een historisch perspectief voor een actueel thema: integratie en verdraagzaamheid. - Museum Sjoel Elburg presenteert... de joodse geschiedenis op een dynamische, - onorthodoxe, boeiende en eigenzinnige wijze en laat daarmee een eigen museaal - geluid horen. De inrichting De inrichting van Museum Sjoel Elburg is tot stand - gekomen met medewerking van DJO Ontwerpers Den Haag en Cultuurproject Apeldoorn. - Inrichting museum De tentoonstellingVan 11 juli tot en met 29 november 2025 - de serie ‘Chassidische legenden’ te zien van de Groninger kunstenaar H.N. Werkman - De tentoonstelling Educatie Een bezoek aan Museum Sjoel Elburg is een unieke - ervaring. Nergens in Nederland is de geschiedenis van de joden in de mediene - – de provincie – zo mooi uitgewerkt als in dit museum. EDUCATIEJoden in Elburg - Namenlijst Ter herinnering: alle namen en gegevens van in Elburg geboren slachtoffers. - JODEN IN ELBURG 30 nov Sophie Northeimer (1899-1973) 16 mei Josef Steinhauer - 16 mei Gerda Steinhauer 14 mei Jozeph de Lange 11 mei Jozeph Beem 08 mrt Barend - de Hond 08 mrt Klaartje de Hond – de laatste brief HET LAATSTE NIEUWS 30 okt - Zoektocht naar onderduikgevers van Joods echtpaar Gans-Koopman Het Joodse echtpaar - Gesina Gans-Koopman en Eli Gans uit Amsterdam duikt in 1944 in Doornspijk onder. - Gesina heeft een vals persoonsbewijs op naam van Gesina Lees meer 26 okt Sjoellezing - 20 november 2025 Op uitnodiging van de Vrienden van Museum Sjoel Elburg verzorgt - drs. Cor Hoogerwerf op donderdag 20 november 2025 in de Ichthuskerk te Elburg - de Sjoellezing Lees meer 10 okt Sjoel ontvangt ruim 1000 euro via Rabo ClubSupport - Museum Sjoel Elburg deed dit jaar weer mee met de actie Rabo ClubSupport en - dat was niet zonder resultaat. Penningmeester en bestuurslid van Museum Sjoel - Lees meer 08 okt Najaarscursus 2025: ‘Joodse wijsheid en filosofie’ Wat is wijsheid? - Hoe leef je een goed en betekenisvol leven? Eeuwenlang zochten Joodse denkers - naar antwoorden op deze vragen. In de najaarscursus van 2025 Lees meer 28 aug - Vreugde in duistere tijden Van een vriend kreeg de Groninger kunstenaar en drukker - Hendrik Nicolaas Werkman het boekje ‘Die Legende des Baalsjems’ van de Joodse - godsdienstfilosoof Martin Buber te Lees meer 26 jul Oude Joodse legenden als - vorm van verzet Wat moet je in het heden met vertellingen van hemelwandelingen, - vliegende paarden en de wederopstanding van een Joodse bruid? De Groninger kunstenaar - H.N. Werkman zag Lees meer 17 jul In memoriam Theo van Ledden (1945-2025) Met - droefenis delen wij het bericht van het overlijden van Theo van Ledden. Theo - was oud-voorzitter van het bestuur van Museum Sjoel Elburg. Hij blijft Lees - meer 11 jul Chassidische legenden in de sjoel Tot en met 29 november biedt Museum - Sjoel Elburg de tentoonstelling ‘H.N. Werkman. Chassidische legenden en de kunst - van het verzet’. De opening werd 10 Lees meer Tentoonstellingen Verwacht 06 - dec, 2025 Mag Saar er zijn? Openingstijden en prijzen MUSEUM SJOEL ELBURG IS - WEER OPEN!Welkom! Museum Sjoel Elburg is weer open, zij het met nieuwe coronaregels - van doen en laten en onder de […]&source=https://sjoelelburg.nl/' - raw_value: 'http://www.linkedin.com/shareArticle?mini=true&url=https://sjoelelburg.nl/&title=Het - Museum&summary=Steun het museum Openingstijden en prijzen Museum Sjoel ElburgMuseum - Sjoel Elburg is... een verhalenmuseum over het (on)gewone dagelijks leven van - joodse families die vanaf 1700 in Elburg hebben gewoond. Museum Sjoel Elburg - biedt... een historisch perspectief voor een actueel thema: integratie en verdraagzaamheid. - Museum Sjoel Elburg presenteert... de joodse geschiedenis op een dynamische, - onorthodoxe, boeiende en eigenzinnige wijze en laat daarmee een eigen museaal - geluid horen. De inrichting De inrichting van Museum Sjoel Elburg is tot stand - gekomen met medewerking van DJO Ontwerpers Den Haag en Cultuurproject Apeldoorn. - Inrichting museum De tentoonstellingVan 11 juli tot en met 29 november 2025 - de serie ‘Chassidische legenden’ te zien van de Groninger kunstenaar H.N. Werkman - De tentoonstelling Educatie Een bezoek aan Museum Sjoel Elburg is een unieke - ervaring. Nergens in Nederland is de geschiedenis van de joden in de mediene - – de provincie – zo mooi uitgewerkt als in dit museum. EDUCATIEJoden in Elburg - Namenlijst Ter herinnering: alle namen en gegevens van in Elburg geboren slachtoffers. - JODEN IN ELBURG 30 nov Sophie Northeimer (1899-1973) 16 mei Josef Steinhauer - 16 mei Gerda Steinhauer 14 mei Jozeph de Lange 11 mei Jozeph Beem 08 mrt Barend - de Hond 08 mrt Klaartje de Hond – de laatste brief HET LAATSTE NIEUWS 30 okt - Zoektocht naar onderduikgevers van Joods echtpaar Gans-Koopman Het Joodse echtpaar - Gesina Gans-Koopman en Eli Gans uit Amsterdam duikt in 1944 in Doornspijk onder. - Gesina heeft een vals persoonsbewijs op naam van Gesina Lees meer 26 okt Sjoellezing - 20 november 2025 Op uitnodiging van de Vrienden van Museum Sjoel Elburg verzorgt - drs. Cor Hoogerwerf op donderdag 20 november 2025 in de Ichthuskerk te Elburg - de Sjoellezing Lees meer 10 okt Sjoel ontvangt ruim 1000 euro via Rabo ClubSupport - Museum Sjoel Elburg deed dit jaar weer mee met de actie Rabo ClubSupport en - dat was niet zonder resultaat. Penningmeester en bestuurslid van Museum Sjoel - Lees meer 08 okt Najaarscursus 2025: ‘Joodse wijsheid en filosofie’ Wat is wijsheid? - Hoe leef je een goed en betekenisvol leven? Eeuwenlang zochten Joodse denkers - naar antwoorden op deze vragen. In de najaarscursus van 2025 Lees meer 28 aug - Vreugde in duistere tijden Van een vriend kreeg de Groninger kunstenaar en drukker - Hendrik Nicolaas Werkman het boekje ‘Die Legende des Baalsjems’ van de Joodse - godsdienstfilosoof Martin Buber te Lees meer 26 jul Oude Joodse legenden als - vorm van verzet Wat moet je in het heden met vertellingen van hemelwandelingen, - vliegende paarden en de wederopstanding van een Joodse bruid? De Groninger kunstenaar - H.N. Werkman zag Lees meer 17 jul In memoriam Theo van Ledden (1945-2025) Met - droefenis delen wij het bericht van het overlijden van Theo van Ledden. Theo - was oud-voorzitter van het bestuur van Museum Sjoel Elburg. Hij blijft Lees - meer 11 jul Chassidische legenden in de sjoel Tot en met 29 november biedt Museum - Sjoel Elburg de tentoonstelling ‘H.N. Werkman. Chassidische legenden en de kunst - van het verzet’. De opening werd 10 Lees meer Tentoonstellingen Verwacht 06 - dec, 2025 Mag Saar er zijn? Openingstijden en prijzen MUSEUM SJOEL ELBURG IS - WEER OPEN!Welkom! Museum Sjoel Elburg is weer open, zij het met nieuwe coronaregels - van doen en laten en onder de […]&source=https://sjoelelburg.nl/' - source_url: https://sjoelelburg.nl/ - retrieved_on: '2025-11-29T15:36:05.747990+00:00' - xpath: /html/body/div/div[5]/div/div/div/div/div[3]/ul/li[5]/a - html_file: web/0220/sjoelelburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:45.976653+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Sjoel Elburg diff --git a/data/nde/enriched/entries/0225_Q98908556.yaml b/data/nde/enriched/entries/0225_Q98908556.yaml index adce77c7fb..1aa88ee207 100644 --- a/data/nde/enriched/entries/0225_Q98908556.yaml +++ b/data/nde/enriched/entries/0225_Q98908556.yaml @@ -1000,18 +1000,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:47.390334+00:00' source_archive: web/0225/slotloevestein.nl - claims_count: 7 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Slot Loevestein - raw_value: Slot Loevestein - Slot Loevestein - Rijksmuseum - source_url: http://www.slotloevestein.nl/ - retrieved_on: '2025-11-29T15:36:51.543964+00:00' - xpath: /html/head/title - html_file: web/0225/slotloevestein.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:47.388969+00:00' - claim_type: description_short claim_value: 'Rijksmuseum Slot Loevestein - UNESCO Werelderfgoed Nieuwe Hollandse Waterlinie - Beroemdste gevangene: Hugo de Groot. Ontsnapt in boekenkist.' @@ -1024,16 +1014,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:44:47.389211+00:00' - - claim_type: org_name - claim_value: Slot Loevestein - Rijksmuseum - raw_value: Slot Loevestein - Rijksmuseum - source_url: http://www.slotloevestein.nl/ - retrieved_on: '2025-11-29T15:36:51.543964+00:00' - xpath: /html/head/meta[10] - html_file: web/0225/slotloevestein.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:44:47.389563+00:00' - claim_type: email claim_value: info@slotloevestein.nl raw_value: info@slotloevestein.nl @@ -1074,6 +1054,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:47.390140+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksmuseum Slot Loevestein diff --git a/data/nde/enriched/entries/0226_Q98893885.yaml b/data/nde/enriched/entries/0226_Q98893885.yaml index 9bc7c75e03..94da321c2e 100644 --- a/data/nde/enriched/entries/0226_Q98893885.yaml +++ b/data/nde/enriched/entries/0226_Q98893885.yaml @@ -387,18 +387,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:47.493414+00:00' source_archive: web/0226/museumveluwezoom.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Veluwezoom - source_url: https://www.museumveluwezoom.nl/ - retrieved_on: '2025-11-29T15:34:48.112339+00:00' - xpath: /html/head/title - html_file: web/0226/museumveluwezoom.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:47.492523+00:00' - claim_type: description_short claim_value: Museum Veluwezoom, gevestigd in Kasteel Doorwerth, biedt tentoonstellingen over de kunstenaarskolonie in Oosterbeek en de Veluwezoom. @@ -441,6 +431,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:47.493260+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Veluwezoom diff --git a/data/nde/enriched/entries/0230_Q18774274.yaml b/data/nde/enriched/entries/0230_Q18774274.yaml index 50110a4db3..81b1a0827c 100644 --- a/data/nde/enriched/entries/0230_Q18774274.yaml +++ b/data/nde/enriched/entries/0230_Q18774274.yaml @@ -609,7 +609,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:48.857637+00:00' source_archive: web/0230/mariahoeveputten.nl - claims_count: 14 + claims_count: 11 claims: - claim_type: org_name claim_value: Mariahoeve Putten @@ -621,36 +621,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:48.857013+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://mariahoeveputten.nl/ - retrieved_on: '2025-11-29T15:37:22.983423+00:00' - xpath: /html/body/footer/div[1]/div/div[1]/div[5]/ul/li[1]/a/svg/title - html_file: web/0230/mariahoeveputten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:48.857027+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://mariahoeveputten.nl/ - retrieved_on: '2025-11-29T15:37:22.983423+00:00' - xpath: /html/body/footer/div[1]/div/div[1]/div[5]/ul/li[2]/a/svg/title - html_file: web/0230/mariahoeveputten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:48.857033+00:00' - - claim_type: org_name - claim_value: YouTube - raw_value: YouTube - source_url: https://mariahoeveputten.nl/ - retrieved_on: '2025-11-29T15:37:22.983423+00:00' - xpath: /html/body/footer/div[1]/div/div[1]/div[5]/ul/li[3]/a/svg/title - html_file: web/0230/mariahoeveputten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:48.857037+00:00' - claim_type: org_name claim_value: Museumboerderij De Mariahoeve raw_value: Museumboerderij De Mariahoeve @@ -751,6 +721,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:48.857555+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museumboerderij Mariahoeve diff --git a/data/nde/enriched/entries/0234_Q2710899.yaml b/data/nde/enriched/entries/0234_Q2710899.yaml index 7e08d58d7c..3c27df9134 100644 --- a/data/nde/enriched/entries/0234_Q2710899.yaml +++ b/data/nde/enriched/entries/0234_Q2710899.yaml @@ -851,18 +851,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:50.618049+00:00' source_archive: web/0234/nationaalonderduikmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal onderduikmuseum - source_url: https://nationaalonderduikmuseum.nl - retrieved_on: '2025-11-29T15:40:23.434916+00:00' - xpath: /html/head/title - html_file: web/0234/nationaalonderduikmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:50.616994+00:00' - claim_type: org_name claim_value: Nationaal onderduikmuseum - raw_value: Nationaal onderduikmuseum - @@ -943,6 +933,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:50.617944+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Onderduikmuseum diff --git a/data/nde/enriched/entries/0245_Q98907726.yaml b/data/nde/enriched/entries/0245_Q98907726.yaml index 1933bc9856..db776d2e52 100644 --- a/data/nde/enriched/entries/0245_Q98907726.yaml +++ b/data/nde/enriched/entries/0245_Q98907726.yaml @@ -477,7 +477,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:53.689231+00:00' source_archive: web/0245/oudheidkundigeverenigingwehl.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Oudheidkundige vereniging Wehl @@ -489,16 +489,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:53.688202+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.oudheidkundigeverenigingwehl.nl - retrieved_on: '2025-11-29T15:39:17.440024+00:00' - xpath: /html/body/div/div/div/div/main/article/header/h1 - html_file: web/0245/oudheidkundigeverenigingwehl.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:53.689023+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oudheidkundige Vereniging Wehl diff --git a/data/nde/enriched/entries/0246_Q98895220.yaml b/data/nde/enriched/entries/0246_Q98895220.yaml index 73ebb9fd39..111e838531 100644 --- a/data/nde/enriched/entries/0246_Q98895220.yaml +++ b/data/nde/enriched/entries/0246_Q98895220.yaml @@ -321,7 +321,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:54.060526+00:00' source_archive: web/0246/zuwent.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Oudheidkundige Vereniging Zuwent @@ -333,16 +333,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:44:54.059903+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://zuwent.nl - retrieved_on: '2025-11-29T15:39:48.774087+00:00' - xpath: /html/body/div/div/div/div/div/main/div[1]/h1 - html_file: web/0246/zuwent.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:44:54.060459+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oudheidkundige Vereniging Zuwent diff --git a/data/nde/enriched/entries/0251_Q26203717.yaml b/data/nde/enriched/entries/0251_Q26203717.yaml index 19d863071f..405d8557c5 100644 --- a/data/nde/enriched/entries/0251_Q26203717.yaml +++ b/data/nde/enriched/entries/0251_Q26203717.yaml @@ -848,22 +848,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:44:55.005764+00:00' source_archive: web/0251/regionaalarchiefrivierenland.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - raw_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - source_url: https://regionaalarchiefrivierenland.nl/home - retrieved_on: '2025-11-29T15:39:50.244225+00:00' - xpath: /html/head/title - html_file: web/0251/regionaalarchiefrivierenland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:55.004643+00:00' - claim_type: description_short claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, @@ -918,6 +904,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:55.005600+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regionaal Archief Rivierenland diff --git a/data/nde/enriched/entries/0252_Q81181406.yaml b/data/nde/enriched/entries/0252_Q81181406.yaml index 18b15680ce..a0e6b18888 100644 --- a/data/nde/enriched/entries/0252_Q81181406.yaml +++ b/data/nde/enriched/entries/0252_Q81181406.yaml @@ -553,18 +553,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:55.136545+00:00' source_archive: web/0252/erfgoedcentrumzutphen.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Erfgoedcentrum Zutphen - source_url: https://erfgoedcentrumzutphen.nl/ - retrieved_on: '2025-11-29T15:39:50.527456+00:00' - xpath: /html/head/title - html_file: web/0252/erfgoedcentrumzutphen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:55.135283+00:00' - claim_type: description_short claim_value: 'Het Erfgoedcentrum Zutphen: historisch hart van de regio! Eén plek voor onze vier erfgoedpartners: Archeologie, Monumentenzorg, Musea Zutphen en @@ -679,6 +669,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:55.136478+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regionaal Archief Zutphen | Erfgoedcentrum Zutphen diff --git a/data/nde/enriched/entries/0258_Q56459713.yaml b/data/nde/enriched/entries/0258_Q56459713.yaml index 915b5260c0..2173768cd2 100644 --- a/data/nde/enriched/entries/0258_Q56459713.yaml +++ b/data/nde/enriched/entries/0258_Q56459713.yaml @@ -872,18 +872,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:56.930660+00:00' source_archive: web/0258/stadsmuseum-harderwijk.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stadsmuseum Harderwijk - source_url: https://www.stadsmuseum-harderwijk.nl/ - retrieved_on: '2025-11-29T15:41:05.031608+00:00' - xpath: /html/head/title - html_file: web/0258/stadsmuseum-harderwijk.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:56.930073+00:00' - claim_type: org_name claim_value: Stadsmuseum Harderwijk raw_value: Stadsmuseum Harderwijk @@ -954,6 +944,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:44:56.930575+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsmuseum Harderwijk diff --git a/data/nde/enriched/entries/0262_Q110995923.yaml b/data/nde/enriched/entries/0262_Q110995923.yaml index 50d340ad4a..334c393c84 100644 --- a/data/nde/enriched/entries/0262_Q110995923.yaml +++ b/data/nde/enriched/entries/0262_Q110995923.yaml @@ -500,7 +500,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:57.915376+00:00' source_archive: web/0262/buurderijdelagehof.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Buurderij De Lage Hof | @@ -562,16 +562,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:57.915141+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fbuurderijdelagehof.nl%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fbuurderijdelagehof.nl%2F - source_url: https://buurderijdelagehof.nl/ - retrieved_on: '2025-11-29T15:40:44.887084+00:00' - xpath: /html/body/div[1]/div/footer/div[1]/div/div[2]/div/div/div/div/div[1]/div[1]/div/div/div/div[3]/a - html_file: web/0262/buurderijdelagehof.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:44:57.915168+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Buurderij De Lage Hof diff --git a/data/nde/enriched/entries/0263_Q98904476.yaml b/data/nde/enriched/entries/0263_Q98904476.yaml index 83a92a5f7c..8cc4547f92 100644 --- a/data/nde/enriched/entries/0263_Q98904476.yaml +++ b/data/nde/enriched/entries/0263_Q98904476.yaml @@ -444,18 +444,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:44:58.361382+00:00' source_archive: web/0263/erfgoedlov.org - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Erfgoed Meubelfabriek L.O.V. - source_url: https://erfgoedlov.org/ - retrieved_on: '2025-11-29T15:42:06.973774+00:00' - xpath: /html/head/title - html_file: web/0263/erfgoedlov.org/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:44:58.360020+00:00' - claim_type: description_short claim_value: Labor Omnia Vincit (Arbeid overwint alles)  was de idealistische strijdkreet van de in 1910 te Oosterbeek door Gerrit Pelt opgerichte meubelfabriek. @@ -504,6 +494,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:44:58.361105+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Erfgoed Meubelfabriek L.O.V diff --git a/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml b/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml index ae4541e0c2..6637293cad 100644 --- a/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml +++ b/data/nde/enriched/entries/0273_streekarchief_bommelerwaard.yaml @@ -268,22 +268,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:01.077118+00:00' source_archive: web/0273/regionaalarchiefrivierenland.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - raw_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken - en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, - Neder-Betuwe, Tiel en Zaltbommel en over polders en waterschappen in Rivierenland. - source_url: https://regionaalarchiefrivierenland.nl/ - retrieved_on: '2025-11-29T15:44:06.761754+00:00' - xpath: /html/head/title - html_file: web/0273/regionaalarchiefrivierenland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:01.076512+00:00' - claim_type: description_short claim_value: Het RAR verzamelt en beheert archieven, afbeeldingen, kranten, boeken en documentatie over de gemeenten Buren, Culemborg, West Betuwe, Maasdriel, @@ -338,6 +324,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:01.077015+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Bommelwaard diff --git a/data/nde/enriched/entries/0276_Q111190981.yaml b/data/nde/enriched/entries/0276_Q111190981.yaml index 1ffafa33b0..db0e06e8c5 100644 --- a/data/nde/enriched/entries/0276_Q111190981.yaml +++ b/data/nde/enriched/entries/0276_Q111190981.yaml @@ -415,18 +415,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:02.280716+00:00' source_archive: web/0276/noordveluwsarchief.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - NoVA - source_url: https://noordveluwsarchief.nl/ - retrieved_on: '2025-11-29T15:47:48.936473+00:00' - xpath: /html/head/title - html_file: web/0276/noordveluwsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:02.278739+00:00' - claim_type: description_short claim_value: Het noordveluws archief geeft toegang tot de geschiedenis van acht gemeenten op de Veluwe. @@ -499,6 +489,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:02.280407+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchivariaat Noordwest-Veluwe diff --git a/data/nde/enriched/entries/0280_Q56459657.yaml b/data/nde/enriched/entries/0280_Q56459657.yaml index 1dd2905e38..93ae625772 100644 --- a/data/nde/enriched/entries/0280_Q56459657.yaml +++ b/data/nde/enriched/entries/0280_Q56459657.yaml @@ -638,18 +638,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:03.236578+00:00' source_archive: web/0280/hagedoornsplaatse.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Veluws Museum Hagedoorns Plaatse - source_url: http://www.hagedoornsplaatse.nl - retrieved_on: '2025-11-29T15:46:05.283440+00:00' - xpath: /html/head/title - html_file: web/0280/hagedoornsplaatse.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:03.234944+00:00' - claim_type: description_short claim_value: 'Water, historie en de Veluwe: samen komen zij tot leven bij Veluws Museum Hagedoorns Plaatse Nieuw dit seizoen! De gloednieuwe tentoonstelling @@ -740,6 +730,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:03.236230+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Veluws Museum Hagedoorns Plaatse diff --git a/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml b/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml index afbcdf3327..93439046fd 100644 --- a/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml +++ b/data/nde/enriched/entries/0283_vereniging_oudheidkunde_lichtenvoorde.yaml @@ -255,7 +255,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:22.374849+00:00' source_archive: web/0283/zuwent.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Oudheidkundige Vereniging Zuwent @@ -267,13 +267,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:25:22.374268+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.zuwent.nl/ - retrieved_on: '' - xpath: /html/body/div/div/div/div/div/main/div[1]/h1 - html_file: web/0283/zuwent.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:25:22.374780+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0285_Q2470853.yaml b/data/nde/enriched/entries/0285_Q2470853.yaml index 17ef4c6f0c..3558f4fc75 100644 --- a/data/nde/enriched/entries/0285_Q2470853.yaml +++ b/data/nde/enriched/entries/0285_Q2470853.yaml @@ -1120,7 +1120,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:04.150070+00:00' source_archive: web/0285/bergendal.wereldmuseum.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: 'Wereldmuseum Berg en Dal | Het Archief: 2014' @@ -1204,36 +1204,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:04.149232+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&title=Het%20Archief%3A%202014%20-%202023&source=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&summary=Het%20Archief%3A%202014%20-%202023 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&title=Het%20Archief%3A%202014%20-%202023&source=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&summary=Het%20Archief%3A%202014%20-%202023 - source_url: https://bergendal.wereldmuseum.nl/nl - retrieved_on: '2025-11-29T15:47:20.084264+00:00' - xpath: /html/body/div[2]/div[2]/main/div[3]/div/section/div/div/div/div/div[2]/a[1] - html_file: web/0285/bergendal.wereldmuseum.nl/pages/nl.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:04.149438+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&text=Het%20Archief%3A%202014%20-%202023&via=afrikamuseum&hashtags=afrikamuseum - raw_value: https://twitter.com/intent/tweet?url=https%3A//bergendal.wereldmuseum.nl/nl/node/4177&text=Het%20Archief%3A%202014%20-%202023&via=afrikamuseum&hashtags=afrikamuseum - source_url: https://bergendal.wereldmuseum.nl/nl - retrieved_on: '2025-11-29T15:47:20.084264+00:00' - xpath: /html/body/div[2]/div[2]/main/div[3]/div/section/div/div/div/div/div[2]/a[2] - html_file: web/0285/bergendal.wereldmuseum.nl/pages/nl.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:04.149462+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A//bergendal.wereldmuseum.nl/nl/node/4177 - raw_value: https://www.facebook.com/sharer.php?u=https%3A//bergendal.wereldmuseum.nl/nl/node/4177 - source_url: https://bergendal.wereldmuseum.nl/nl - retrieved_on: '2025-11-29T15:47:20.084264+00:00' - xpath: /html/body/div[2]/div[2]/main/div[3]/div/section/div/div/div/div/div[2]/a[3] - html_file: web/0285/bergendal.wereldmuseum.nl/pages/nl.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:04.149468+00:00' - claim_type: org_name claim_value: 'Het Archief: 2014 - 2023' raw_value: 'Het Archief: 2014 - 2023' @@ -1244,6 +1214,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:04.149600+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Wereldmuseum Berg en Dal diff --git a/data/nde/enriched/entries/0286_Q484899.yaml b/data/nde/enriched/entries/0286_Q484899.yaml index f2c79646d6..d504f9d3f5 100644 --- a/data/nde/enriched/entries/0286_Q484899.yaml +++ b/data/nde/enriched/entries/0286_Q484899.yaml @@ -847,18 +847,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:04.559349+00:00' source_archive: web/0286/velorama.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Fietsmuseum Velorama - source_url: https://velorama.nl/ - retrieved_on: '2025-11-29T15:47:25.907146+00:00' - xpath: /html/head/title - html_file: web/0286/velorama.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:04.558471+00:00' - claim_type: org_name claim_value: Nationaal Fietsmuseum Velorama raw_value: Nationaal Fietsmuseum Velorama @@ -919,6 +909,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:04.559245+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Fietsmuseum Velorama diff --git a/data/nde/enriched/entries/0295_Q121900753.yaml b/data/nde/enriched/entries/0295_Q121900753.yaml index 0fee756760..dc5baf8208 100644 --- a/data/nde/enriched/entries/0295_Q121900753.yaml +++ b/data/nde/enriched/entries/0295_Q121900753.yaml @@ -533,18 +533,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:06.876921+00:00' source_archive: web/0295/museumaandea.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum aan de A - source_url: https://museumaandea.nl/ - retrieved_on: '2025-11-29T15:49:11.715226+00:00' - xpath: /html/head/title - html_file: web/0295/museumaandea.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:06.876192+00:00' - claim_type: description_short claim_value: Museum van, door en over Groningers. Kijk voor je bezoek altijd op onze website voor de actuele tentoonstellingen. @@ -627,6 +617,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:06.876793+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum aan de A diff --git a/data/nde/enriched/entries/0309_Q81181191.yaml b/data/nde/enriched/entries/0309_Q81181191.yaml index 2508753dfb..c1a5c47799 100644 --- a/data/nde/enriched/entries/0309_Q81181191.yaml +++ b/data/nde/enriched/entries/0309_Q81181191.yaml @@ -441,18 +441,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:10.510345+00:00' source_archive: web/0309/eemsdelta.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Eemsdelta - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/head/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509614+00:00' - claim_type: org_name claim_value: Gemeente Eemsdelta raw_value: Gemeente Eemsdelta @@ -473,46 +463,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:10.509633+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509642+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509647+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509651+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:22.610004+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0309/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.509655+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -603,6 +553,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:10.510187+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Appingedam diff --git a/data/nde/enriched/entries/0311_Q81181383.yaml b/data/nde/enriched/entries/0311_Q81181383.yaml index 2852d3c733..c902d76ff2 100644 --- a/data/nde/enriched/entries/0311_Q81181383.yaml +++ b/data/nde/enriched/entries/0311_Q81181383.yaml @@ -438,18 +438,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:10.889962+00:00' source_archive: web/0311/westerwolde.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerwolde - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/head/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888532+00:00' - claim_type: org_name claim_value: Gemeente Westerwolde raw_value: Gemeente Westerwolde @@ -480,46 +470,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:10.888559+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888572+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888577+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888581+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:25.286603+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0311/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:10.888585+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -600,6 +550,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:10.889714+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Bellingwedde diff --git a/data/nde/enriched/entries/0313_Q81181241.yaml b/data/nde/enriched/entries/0313_Q81181241.yaml index 4ba6c92732..72b5834281 100644 --- a/data/nde/enriched/entries/0313_Q81181241.yaml +++ b/data/nde/enriched/entries/0313_Q81181241.yaml @@ -356,18 +356,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.083942+00:00' source_archive: web/0313/eemsdelta.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Eemsdelta - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/head/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082770+00:00' - claim_type: org_name claim_value: Gemeente Eemsdelta raw_value: Gemeente Eemsdelta @@ -388,46 +378,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.082789+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082798+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082803+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082808+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:26.290002+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0313/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.082812+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -518,6 +468,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.083501+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Delfzijl diff --git a/data/nde/enriched/entries/0315_Q81181260.yaml b/data/nde/enriched/entries/0315_Q81181260.yaml index 8c5c50d6b8..7212802ca2 100644 --- a/data/nde/enriched/entries/0315_Q81181260.yaml +++ b/data/nde/enriched/entries/0315_Q81181260.yaml @@ -348,18 +348,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.269475+00:00' source_archive: web/0315/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:27.421565+00:00' - xpath: /html/head/title - html_file: web/0315/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.268968+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -412,6 +402,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.269372+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Grootegast diff --git a/data/nde/enriched/entries/0316_Q81181282.yaml b/data/nde/enriched/entries/0316_Q81181282.yaml index 6b181c6d9f..11a13e1940 100644 --- a/data/nde/enriched/entries/0316_Q81181282.yaml +++ b/data/nde/enriched/entries/0316_Q81181282.yaml @@ -353,18 +353,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.373161+00:00' source_archive: web/0316/gemeente.groningen.nl - claims_count: 31 + claims_count: 30 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Groningen - source_url: https://gemeente.groningen.nl/ - retrieved_on: '2025-11-29T15:51:35.978903+00:00' - xpath: /html/head/title - html_file: web/0316/gemeente.groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.372331+00:00' - claim_type: org_name claim_value: mobiliteit icon raw_value: mobiliteit icon @@ -667,6 +657,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.372954+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Haren diff --git a/data/nde/enriched/entries/0317_Q81181273.yaml b/data/nde/enriched/entries/0317_Q81181273.yaml index 169661d5bf..dbefbe6c60 100644 --- a/data/nde/enriched/entries/0317_Q81181273.yaml +++ b/data/nde/enriched/entries/0317_Q81181273.yaml @@ -375,18 +375,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.477554+00:00' source_archive: web/0317/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/head/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476796+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -407,36 +397,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.476814+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476823+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476827+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:37.597843+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0317/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.476831+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -497,6 +457,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.477409+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Hoogezand-Sappemeer diff --git a/data/nde/enriched/entries/0318_Q81181307.yaml b/data/nde/enriched/entries/0318_Q81181307.yaml index a951df6afe..e0eab76d5c 100644 --- a/data/nde/enriched/entries/0318_Q81181307.yaml +++ b/data/nde/enriched/entries/0318_Q81181307.yaml @@ -390,18 +390,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.572721+00:00' source_archive: web/0318/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:38.210879+00:00' - xpath: /html/head/title - html_file: web/0318/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.571982+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -454,6 +444,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.572608+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Leek diff --git a/data/nde/enriched/entries/0319_Q81181310.yaml b/data/nde/enriched/entries/0319_Q81181310.yaml index 1193c80847..237128011c 100644 --- a/data/nde/enriched/entries/0319_Q81181310.yaml +++ b/data/nde/enriched/entries/0319_Q81181310.yaml @@ -422,18 +422,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.677180+00:00' source_archive: web/0319/eemsdelta.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Eemsdelta - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/head/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676253+00:00' - claim_type: org_name claim_value: Gemeente Eemsdelta raw_value: Gemeente Eemsdelta @@ -454,46 +444,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.676274+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676283+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676288+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676292+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.eemsdelta.nl/home - retrieved_on: '2025-11-29T15:51:41.290233+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0319/eemsdelta.nl/pages/home.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.676296+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -584,6 +534,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.676898+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Loppersum diff --git a/data/nde/enriched/entries/0320_Q81181318.yaml b/data/nde/enriched/entries/0320_Q81181318.yaml index 7d8f5061d6..64386132aa 100644 --- a/data/nde/enriched/entries/0320_Q81181318.yaml +++ b/data/nde/enriched/entries/0320_Q81181318.yaml @@ -318,18 +318,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.780971+00:00' source_archive: web/0320/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:41.935757+00:00' - xpath: /html/head/title - html_file: web/0320/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.780253+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -382,6 +372,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.780763+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Marum diff --git a/data/nde/enriched/entries/0321_Q81181324.yaml b/data/nde/enriched/entries/0321_Q81181324.yaml index dca250aa55..be98d73262 100644 --- a/data/nde/enriched/entries/0321_Q81181324.yaml +++ b/data/nde/enriched/entries/0321_Q81181324.yaml @@ -370,18 +370,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.877032+00:00' source_archive: web/0321/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/head/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876362+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -402,36 +392,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:11.876380+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876388+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876392+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:42.179010+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0321/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.876396+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -492,6 +452,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.876893+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Menterwolde diff --git a/data/nde/enriched/entries/0322_Q81181387.yaml b/data/nde/enriched/entries/0322_Q81181387.yaml index 6d5d7b379a..20ac2d4a13 100644 --- a/data/nde/enriched/entries/0322_Q81181387.yaml +++ b/data/nde/enriched/entries/0322_Q81181387.yaml @@ -531,18 +531,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:11.991194+00:00' source_archive: web/0322/gemeente-oldambt.nl - claims_count: 35 + claims_count: 34 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Oldambt - source_url: http://www.gemeente-oldambt.nl - retrieved_on: '2025-11-29T15:51:42.548124+00:00' - xpath: /html/head/title - html_file: web/0322/gemeente-oldambt.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:11.990301+00:00' - claim_type: org_name claim_value: verkiezingen icon raw_value: verkiezingen icon @@ -883,6 +873,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:11.990987+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Oldambt diff --git a/data/nde/enriched/entries/0323_Q81181329.yaml b/data/nde/enriched/entries/0323_Q81181329.yaml index e837a75ec6..d79f9b9599 100644 --- a/data/nde/enriched/entries/0323_Q81181329.yaml +++ b/data/nde/enriched/entries/0323_Q81181329.yaml @@ -517,7 +517,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.129080+00:00' source_archive: web/0323/pekela.nl - claims_count: 9 + claims_count: 7 claims: - claim_type: org_name claim_value: Homepage @@ -529,26 +529,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:12.128265+00:00' - - claim_type: address - claim_value: '' - raw_value: ' ' - source_url: https://www.pekela.nl/ - retrieved_on: '2025-11-29T15:51:43.085163+00:00' - xpath: /html/head/script[24] - html_file: web/0323/pekela.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:45:12.128639+00:00' - - claim_type: phone - claim_value: '' - raw_value: '' - source_url: https://www.pekela.nl/ - retrieved_on: '2025-11-29T15:51:43.085163+00:00' - xpath: /html/head/script[24] - html_file: web/0323/pekela.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_telephone - extraction_timestamp: '2025-12-01T10:45:12.128645+00:00' - claim_type: email claim_value: info@pekela.nl raw_value: info@pekela.nl @@ -609,6 +589,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.128949+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Pekela diff --git a/data/nde/enriched/entries/0324_Q81181354.yaml b/data/nde/enriched/entries/0324_Q81181354.yaml index eece160435..dcd0a837d7 100644 --- a/data/nde/enriched/entries/0324_Q81181354.yaml +++ b/data/nde/enriched/entries/0324_Q81181354.yaml @@ -329,18 +329,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.232037+00:00' source_archive: web/0324/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/head/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231147+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -361,36 +351,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:12.231165+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231174+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231178+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.274530+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0324/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.231181+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -451,6 +411,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.231898+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Slochteren diff --git a/data/nde/enriched/entries/0326_Q81181363.yaml b/data/nde/enriched/entries/0326_Q81181363.yaml index 7f6a36c85e..137f1551df 100644 --- a/data/nde/enriched/entries/0326_Q81181363.yaml +++ b/data/nde/enriched/entries/0326_Q81181363.yaml @@ -315,18 +315,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.417417+00:00' source_archive: web/0326/gemeente.groningen.nl - claims_count: 31 + claims_count: 30 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Groningen - source_url: https://gemeente.groningen.nl/ - retrieved_on: '2025-11-29T15:51:43.937089+00:00' - xpath: /html/head/title - html_file: web/0326/gemeente.groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.416409+00:00' - claim_type: org_name claim_value: mobiliteit icon raw_value: mobiliteit icon @@ -629,6 +619,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.417176+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Ten Boer diff --git a/data/nde/enriched/entries/0327_Q81181368.yaml b/data/nde/enriched/entries/0327_Q81181368.yaml index eda5653f1f..97b7660f6f 100644 --- a/data/nde/enriched/entries/0327_Q81181368.yaml +++ b/data/nde/enriched/entries/0327_Q81181368.yaml @@ -513,7 +513,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.523986+00:00' source_archive: web/0327/veendam.nl - claims_count: 9 + claims_count: 7 claims: - claim_type: org_name claim_value: Onderwerpen @@ -545,26 +545,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:12.522797+00:00' - - claim_type: address - claim_value: '' - raw_value: ' ' - source_url: https://www.veendam.nl/ - retrieved_on: '2025-11-29T15:51:45.448208+00:00' - xpath: /html/head/script[23] - html_file: web/0327/veendam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:45:12.523239+00:00' - - claim_type: phone - claim_value: '' - raw_value: '' - source_url: https://www.veendam.nl/ - retrieved_on: '2025-11-29T15:51:45.448208+00:00' - xpath: /html/head/script[23] - html_file: web/0327/veendam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_telephone - extraction_timestamp: '2025-12-01T10:45:12.523248+00:00' - claim_type: email claim_value: info@veendam.nl raw_value: info@veendam.nl @@ -615,6 +595,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.523838+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Veendam diff --git a/data/nde/enriched/entries/0328_Q81181347.yaml b/data/nde/enriched/entries/0328_Q81181347.yaml index 6abcd46c31..4183541deb 100644 --- a/data/nde/enriched/entries/0328_Q81181347.yaml +++ b/data/nde/enriched/entries/0328_Q81181347.yaml @@ -462,18 +462,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:12.881895+00:00' source_archive: web/0328/westerwolde.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerwolde - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/head/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880682+00:00' - claim_type: org_name claim_value: Gemeente Westerwolde raw_value: Gemeente Westerwolde @@ -504,46 +494,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:12.880707+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880746+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880751+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880755+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:46.979358+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0328/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:12.880759+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -624,6 +574,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:12.881659+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Vlagtwedde diff --git a/data/nde/enriched/entries/0329_Q121292045.yaml b/data/nde/enriched/entries/0329_Q121292045.yaml index a489584c43..464461af31 100644 --- a/data/nde/enriched/entries/0329_Q121292045.yaml +++ b/data/nde/enriched/entries/0329_Q121292045.yaml @@ -501,18 +501,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.260776+00:00' source_archive: web/0329/westerwolde.nl - claims_count: 16 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerwolde - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/head/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259662+00:00' - claim_type: org_name claim_value: Gemeente Westerwolde raw_value: Gemeente Westerwolde @@ -543,46 +533,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:13.259686+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259698+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259702+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259706+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.westerwolde.nl/ - retrieved_on: '2025-11-29T15:51:48.729627+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/0329/westerwolde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.259710+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -663,6 +613,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:13.260539+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Westerwolde diff --git a/data/nde/enriched/entries/0331_Q81181399.yaml b/data/nde/enriched/entries/0331_Q81181399.yaml index 38b60ac686..6d75baf713 100644 --- a/data/nde/enriched/entries/0331_Q81181399.yaml +++ b/data/nde/enriched/entries/0331_Q81181399.yaml @@ -466,18 +466,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.469757+00:00' source_archive: web/0331/westerkwartier.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Westerkwartier - source_url: https://www.westerkwartier.nl/ - retrieved_on: '2025-11-29T15:51:49.784707+00:00' - xpath: /html/head/title - html_file: web/0331/westerkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.468482+00:00' - claim_type: description_short claim_value: Op deze website vindt u alle informatie, nieuwsberichten en dienstverlening van gemeente Westerkwartier. @@ -530,6 +520,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:13.469358+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Zuidhorn diff --git a/data/nde/enriched/entries/0332_Q23987486.yaml b/data/nde/enriched/entries/0332_Q23987486.yaml index 794dcdf805..8d94f0a0fc 100644 --- a/data/nde/enriched/entries/0332_Q23987486.yaml +++ b/data/nde/enriched/entries/0332_Q23987486.yaml @@ -1691,18 +1691,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:13.624102+00:00' source_archive: web/0332/midden-groningen.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Midden-Groningen - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/head/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.622976+00:00' - claim_type: org_name claim_value: Gemeente Midden-Groningen raw_value: Gemeente Midden-Groningen @@ -1723,36 +1713,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:13.623040+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.623052+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.623056+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.057500+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0332/midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.623061+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -1813,6 +1773,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:13.623956+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Midden-Groningen diff --git a/data/nde/enriched/entries/0333_Q114079325.yaml b/data/nde/enriched/entries/0333_Q114079325.yaml index bfd82ba9cc..1b3b02b654 100644 --- a/data/nde/enriched/entries/0333_Q114079325.yaml +++ b/data/nde/enriched/entries/0333_Q114079325.yaml @@ -494,18 +494,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.762568+00:00' source_archive: web/0333/historischarchief.midden-groningen.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historisch Archief Midden-Groningen - source_url: https://historischarchief.midden-groningen.nl/ - retrieved_on: '2025-11-29T15:51:50.698335+00:00' - xpath: /html/head/title - html_file: web/0333/historischarchief.midden-groningen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:13.761501+00:00' - claim_type: description_short claim_value: Het historisch documentatiecentrum voor de gemeente Midden-Groningen, ontstaan uit een fusie van de gemeenten Hoogezand, Slochteren en Muntendam. @@ -596,6 +586,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:13.762485+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Archief Midden-Groningen diff --git a/data/nde/enriched/entries/0334_Q2190733.yaml b/data/nde/enriched/entries/0334_Q2190733.yaml index edec2e4c73..b1581dc619 100644 --- a/data/nde/enriched/entries/0334_Q2190733.yaml +++ b/data/nde/enriched/entries/0334_Q2190733.yaml @@ -693,7 +693,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:13.846249+00:00' source_archive: web/0334/groningerarchieven.nl - claims_count: 10 + claims_count: 9 claims: - claim_type: org_name claim_value: Groninger Archieven @@ -717,16 +717,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:13.845534+00:00' - - claim_type: org_name - claim_value: groningerarchieven.nl - raw_value: groningerarchieven.nl - source_url: http://www.groningerarchieven.nl - retrieved_on: '2025-11-29T15:51:51.080844+00:00' - xpath: /html/head/meta[11] - html_file: web/0334/groningerarchieven.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:45:13.845871+00:00' - claim_type: email claim_value: info@groningerarchieven.nl raw_value: info@groningerarchieven.nl @@ -797,6 +787,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:13.846185+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Groninger Archieven diff --git a/data/nde/enriched/entries/0337_Q56460901.yaml b/data/nde/enriched/entries/0337_Q56460901.yaml index f3d54fa586..0d86bc9c09 100644 --- a/data/nde/enriched/entries/0337_Q56460901.yaml +++ b/data/nde/enriched/entries/0337_Q56460901.yaml @@ -669,18 +669,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:14.960213+00:00' source_archive: web/0337/speelgoedmuseumroden.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Speelgoedmuseum Roden - source_url: https://speelgoedmuseumroden.nl/ - retrieved_on: '2025-11-29T15:53:26.720773+00:00' - xpath: /html/head/title - html_file: web/0337/speelgoedmuseumroden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:14.959525+00:00' - claim_type: description_short claim_value: Speelgoedmuseum Roden Een wereld vol speelgoed. Ontdek de grootste speelgoedcollectie van Nederland. @@ -753,6 +743,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:14.960115+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Speelgoedmuseum Kinderwereld Roden diff --git a/data/nde/enriched/entries/0345_Q110981303.yaml b/data/nde/enriched/entries/0345_Q110981303.yaml index 2f2bdf1a03..091654e03b 100644 --- a/data/nde/enriched/entries/0345_Q110981303.yaml +++ b/data/nde/enriched/entries/0345_Q110981303.yaml @@ -510,18 +510,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:17.172563+00:00' source_archive: web/0345/domiestoen.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Domies Toen - source_url: https://www.domiestoen.nl/ - retrieved_on: '2025-11-29T15:53:28.219736+00:00' - xpath: /html/head/title - html_file: web/0345/domiestoen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:17.172184+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/domiestoen/ raw_value: https://www.facebook.com/domiestoen/ @@ -552,6 +542,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:17.172519+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Domies Toen diff --git a/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml b/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml index 7921f36ad7..a6f181fbc5 100644 --- a/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml +++ b/data/nde/enriched/entries/0346_stichting_historie_stedum.yaml @@ -296,18 +296,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:17.568039+00:00' source_archive: web/0346/historiestedum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Historie Stedum - source_url: https://www.historiestedum.nl/ - retrieved_on: '2025-11-29T15:54:01.697816+00:00' - xpath: /html/head/title - html_file: web/0346/historiestedum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:17.567477+00:00' - claim_type: description_short claim_value: Van harte welkom op de website van Stichting Historie Stedum. Deze stichting heeft tot doelstelling om het verleden van Stedum levend te houden @@ -356,6 +346,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:17.567887+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Historie Stedum diff --git a/data/nde/enriched/entries/0348_Q15879552.yaml b/data/nde/enriched/entries/0348_Q15879552.yaml index d8bd063c0d..23ef43f8ec 100644 --- a/data/nde/enriched/entries/0348_Q15879552.yaml +++ b/data/nde/enriched/entries/0348_Q15879552.yaml @@ -835,7 +835,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:17.798230+00:00' source_archive: web/0348/muzeeaquarium.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: MuzeeAquarium Delfzijl (Groningen) @@ -893,16 +893,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:17.797859+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.muzeeaquarium.nl/ - retrieved_on: '2025-11-29T15:54:02.600828+00:00' - xpath: /html/body/div/div/div[3]/div/main/div/div/div[2]/div/div/div/section[2]/div[2]/div/section/div[2]/div/div[2]/div/div[1]/h1 - html_file: web/0348/muzeeaquarium.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:17.797903+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Muzeeaquarium Delfzijl diff --git a/data/nde/enriched/entries/0350_Q81181257.yaml b/data/nde/enriched/entries/0350_Q81181257.yaml index 29bb006fd4..64cb4b7ef4 100644 --- a/data/nde/enriched/entries/0350_Q81181257.yaml +++ b/data/nde/enriched/entries/0350_Q81181257.yaml @@ -431,18 +431,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:18.047922+00:00' source_archive: web/0350/noorderzijlvest.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Waterschap Noorderzijlvest - source_url: https://www.noorderzijlvest.nl/ - retrieved_on: '2025-11-29T15:54:03.327411+00:00' - xpath: /html/head/title - html_file: web/0350/noorderzijlvest.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:18.047276+00:00' - claim_type: description_short claim_value: Waterschap Noorderzijlvest zorgt voor veilige dijken en kades, schoon water, gezuiverd afvalwater en voldoende water in elk seizoen. Dit doen we in @@ -507,6 +497,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:18.047772+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waterschap Noorderzijlvest diff --git a/data/nde/enriched/entries/0351_Q81181371.yaml b/data/nde/enriched/entries/0351_Q81181371.yaml index 18fc0391fc..ec1b695ad2 100644 --- a/data/nde/enriched/entries/0351_Q81181371.yaml +++ b/data/nde/enriched/entries/0351_Q81181371.yaml @@ -416,18 +416,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:18.502391+00:00' source_archive: web/0351/hunzeenaas.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Waterschap Hunze en Aa's - source_url: https://www.hunzeenaas.nl/ - retrieved_on: '2025-11-29T15:55:53.549676+00:00' - xpath: /html/head/title - html_file: web/0351/hunzeenaas.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:18.501635+00:00' - claim_type: description_short claim_value: Het waterschap is er voor u Elke ochtend wordt u wakker zonder zorgen over wateroverlast. U ziet boeren hun gewassen telen. En op een warme zomerdag @@ -494,6 +484,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:18.502323+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waterschap Hunze en Aa's diff --git a/data/nde/enriched/entries/0356_Q9777.yaml b/data/nde/enriched/entries/0356_Q9777.yaml index 3d54753495..33b46f5b4a 100644 --- a/data/nde/enriched/entries/0356_Q9777.yaml +++ b/data/nde/enriched/entries/0356_Q9777.yaml @@ -2782,18 +2782,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:19.725401+00:00' source_archive: web/0356/venlo.nl - claims_count: 15 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Venlo - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/head/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724104+00:00' - claim_type: org_name claim_value: Gemeente Venlo raw_value: Gemeente Venlo @@ -2814,36 +2804,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:19.724213+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724218+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724223+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[3]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724227+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -2854,16 +2814,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:19.724231+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.venlo.nl/ - retrieved_on: '2025-11-29T15:55:32.860589+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[5]/a/svg/title - html_file: web/0356/venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:19.724235+00:00' - claim_type: description_short claim_value: Website van de gemeente Venlo. Op deze website vindt u onze producten en diensten, ons bestuur en andere informatie over de gemeente Venlo. @@ -2936,6 +2886,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:19.725223+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Venlo diff --git a/data/nde/enriched/entries/0364_Q59962272.yaml b/data/nde/enriched/entries/0364_Q59962272.yaml index e78a282265..7fc093d94d 100644 --- a/data/nde/enriched/entries/0364_Q59962272.yaml +++ b/data/nde/enriched/entries/0364_Q59962272.yaml @@ -850,18 +850,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:22.582467+00:00' source_archive: web/0364/regioarchiefsittard-geleen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - RegioArchief Sittard-Geleen - source_url: https://regioarchiefsittard-geleen.nl/ - retrieved_on: '2025-11-29T16:00:50.143099+00:00' - xpath: /html/head/title - html_file: web/0364/regioarchiefsittard-geleen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:22.581461+00:00' - claim_type: description_short claim_value: Het RegioArchief Sittard-Geleen geeft toegang tot de geschiedenis van Sittard-Geleen en de omliggende regio. @@ -934,6 +924,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:22.582333+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regioarchief Sittard-Geleen diff --git a/data/nde/enriched/entries/0366_Q121224886.yaml b/data/nde/enriched/entries/0366_Q121224886.yaml index b84b5ee3a3..48857226ac 100644 --- a/data/nde/enriched/entries/0366_Q121224886.yaml +++ b/data/nde/enriched/entries/0366_Q121224886.yaml @@ -330,18 +330,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:23.337087+00:00' source_archive: web/0366/regioarchiefsittard-geleen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - RegioArchief Sittard-Geleen - source_url: https://regioarchiefsittard-geleen.nl/ - retrieved_on: '2025-11-29T16:03:28.878801+00:00' - xpath: /html/head/title - html_file: web/0366/regioarchiefsittard-geleen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:23.336060+00:00' - claim_type: description_short claim_value: Het RegioArchief Sittard-Geleen geeft toegang tot de geschiedenis van Sittard-Geleen en de omliggende regio. @@ -414,6 +404,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:23.336951+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regioarchief Sittard-Geleen diff --git a/data/nde/enriched/entries/0369_Q135412874.yaml b/data/nde/enriched/entries/0369_Q135412874.yaml index 702e1f1c7f..cb533325bc 100644 --- a/data/nde/enriched/entries/0369_Q135412874.yaml +++ b/data/nde/enriched/entries/0369_Q135412874.yaml @@ -471,18 +471,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:23.716463+00:00' source_archive: web/0369/boerderijenstichtinglimburg.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Boerderijenstichting Limburg - source_url: https://www.boerderijenstichtinglimburg.nl/ - retrieved_on: '2025-11-29T16:00:24.689795+00:00' - xpath: /html/head/title - html_file: web/0369/boerderijenstichtinglimburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:23.715809+00:00' - claim_type: description_short claim_value: Boerderijenstichting Limburg. Hart voor de Limburgse boerderij! raw_value: Boerderijenstichting Limburg. Hart voor de Limburgse boerderij! @@ -513,6 +503,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: schema_org_description extraction_timestamp: '2025-12-01T10:45:23.716207+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Boerderijenstichting Limburg diff --git a/data/nde/enriched/entries/0370_Q13442809.yaml b/data/nde/enriched/entries/0370_Q13442809.yaml index 5bd70ff518..c037a27376 100644 --- a/data/nde/enriched/entries/0370_Q13442809.yaml +++ b/data/nde/enriched/entries/0370_Q13442809.yaml @@ -780,7 +780,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:24.177531+00:00' source_archive: web/0370/botatuin.nl - claims_count: 10 + claims_count: 4 claims: - claim_type: org_name claim_value: Botanische Tuin Kerkrade @@ -822,66 +822,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:24.176941+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[1]/div[2]/div[4]/div/div/div[1]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177064+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F&title=Adventsmarkt&summary=ADVENTSMARKT29+EN+30+NOVEMBER%26nbsp%3BOp+29+en+30+november+houdt+de+Botanische+tuin - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fadventsmarkt-3%2F&title=Adventsmarkt&summary=ADVENTSMARKT29+EN+30+NOVEMBER%26nbsp%3BOp+29+en+30+november+houdt+de+Botanische+tuin - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[1]/div[2]/div[4]/div/div/div[2]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177096+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[2]/div[2]/div[4]/div/div/div[1]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177128+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F&title=Beleef%20de%20helende%20klanken&summary=VRJDAG+5+DECEMBERBELEEF+DE+HELENDE+KLANKEN+IN+DE+BOTANISCHE+TUIN - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-de-helende-klanken-9%2F&title=Beleef%20de%20helende%20klanken&summary=VRJDAG+5+DECEMBERBELEEF+DE+HELENDE+KLANKEN+IN+DE+BOTANISCHE+TUIN - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[2]/div[2]/div[4]/div/div/div[2]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177146+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[3]/div[2]/div[4]/div/div/div[1]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177176+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F&title=Beleef%20dickens&summary=BELEEF+DICKENS+IN+DE+BOTANISCHE+TUIN+KERKRADEZATERDAG+13+%26amp%3B+ZONDAG+14+DECEMBER - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fbotatuin.nl%2Fevents%2Fbeleef-dickens-2%2F&title=Beleef%20dickens&summary=BELEEF+DICKENS+IN+DE+BOTANISCHE+TUIN+KERKRADEZATERDAG+13+%26amp%3B+ZONDAG+14+DECEMBER - source_url: https://botatuin.nl/ - retrieved_on: '2025-11-29T16:02:00.015725+00:00' - xpath: /html/body/div[1]/div/div/div/main/article/div/div/section[4]/div/div/div/section[2]/div/div/div/div[1]/div/div/div/div[4]/div/div[3]/div[2]/div[4]/div/div/div[2]/a - html_file: web/0370/botatuin.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:24.177186+00:00' + removed_invalid_claims: + - removed_count: 6 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Botanische Tuin Kerkrade diff --git a/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml b/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml index c268ef5474..71ec34f624 100644 --- a/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml +++ b/data/nde/enriched/entries/0371_historisch_centrum_limburg.yaml @@ -377,18 +377,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:24.294037+00:00' source_archive: web/0371/lokaleregelgeving.overheid.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken | Lokale wet- en regelgeving - source_url: https://lokaleregelgeving.overheid.nl/CVDR642447 - retrieved_on: '2025-11-29T16:00:57.077018+00:00' - xpath: /html/head/title - html_file: web/0371/lokaleregelgeving.overheid.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:24.293533+00:00' - claim_type: org_name claim_value: Doorzoek 99540 regelingen van lokale overheden raw_value: Doorzoek 99540 regelingen van lokale overheden @@ -399,6 +389,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:24.293991+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Centrum voor Regionale Geschiedenis Rijckheyt diff --git a/data/nde/enriched/entries/0372_Q111080967.yaml b/data/nde/enriched/entries/0372_Q111080967.yaml index 5abb062204..4e74ce1322 100644 --- a/data/nde/enriched/entries/0372_Q111080967.yaml +++ b/data/nde/enriched/entries/0372_Q111080967.yaml @@ -734,18 +734,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:24.690844+00:00' source_archive: web/0372/limburgserfgoed.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Coöperatie Erfgoed Limburg U.A. - source_url: https://www.limburgserfgoed.nl/ - retrieved_on: '2025-11-29T16:03:42.046377+00:00' - xpath: /html/body/title - html_file: web/0372/limburgserfgoed.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:24.690325+00:00' - claim_type: email claim_value: info@limburgserfgoed.nl raw_value: info@limburgserfgoed.nl @@ -786,6 +776,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:24.690790+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Coöperatie Erfgoed Limburg diff --git a/data/nde/enriched/entries/0375_Q81181279.yaml b/data/nde/enriched/entries/0375_Q81181279.yaml index a03f77c3d1..55cd4212c3 100644 --- a/data/nde/enriched/entries/0375_Q81181279.yaml +++ b/data/nde/enriched/entries/0375_Q81181279.yaml @@ -326,18 +326,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:25.030406+00:00' source_archive: web/0375/rijckheyt.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.rijckheyt.nl - retrieved_on: '2025-11-29T16:02:04.195590+00:00' - xpath: /html/head/title - html_file: web/0375/rijckheyt.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.029600+00:00' - claim_type: description_short claim_value: Beschrijving raw_value: Beschrijving @@ -418,6 +408,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:25.030346+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heerlen diff --git a/data/nde/enriched/entries/0378_Q81181301.yaml b/data/nde/enriched/entries/0378_Q81181301.yaml index 6fdbedd212..b2dd3a5c0f 100644 --- a/data/nde/enriched/entries/0378_Q81181301.yaml +++ b/data/nde/enriched/entries/0378_Q81181301.yaml @@ -522,18 +522,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:25.540655+00:00' source_archive: web/0378/kerkrade.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Kerkrade - source_url: https://www.kerkrade.nl/gemeentearchief - retrieved_on: '2025-11-29T16:02:07.296256+00:00' - xpath: /html/head/title - html_file: web/0378/kerkrade.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.539853+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -604,6 +594,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:25.540440+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Kerkrade diff --git a/data/nde/enriched/entries/0379_Q111190984.yaml b/data/nde/enriched/entries/0379_Q111190984.yaml index 6510763d5b..4902d3ea91 100644 --- a/data/nde/enriched/entries/0379_Q111190984.yaml +++ b/data/nde/enriched/entries/0379_Q111190984.yaml @@ -413,18 +413,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:25.779865+00:00' source_archive: web/0379/landgraaf.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Landgraaf - source_url: https://www.landgraaf.nl/gemeentearchief - retrieved_on: '2025-11-29T16:02:08.739776+00:00' - xpath: /html/head/title - html_file: web/0379/landgraaf.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.779036+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -507,6 +497,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:25.779686+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeente Landgraaf diff --git a/data/nde/enriched/entries/0381_Q107341629.yaml b/data/nde/enriched/entries/0381_Q107341629.yaml index 71dfae7d36..6191a571b8 100644 --- a/data/nde/enriched/entries/0381_Q107341629.yaml +++ b/data/nde/enriched/entries/0381_Q107341629.yaml @@ -519,18 +519,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:25.991888+00:00' source_archive: web/0381/archief.venlo.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeentearchief Venlo - source_url: https://archief.venlo.nl/ - retrieved_on: '2025-11-29T16:02:10.014909+00:00' - xpath: /html/head/title - html_file: web/0381/archief.venlo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:25.991080+00:00' - claim_type: description_short claim_value: Duik in de collecties van het Gemeentearchief Venlo. raw_value: Duik in de collecties van het Gemeentearchief Venlo. @@ -581,6 +571,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:25.991827+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Venlo diff --git a/data/nde/enriched/entries/0384_Q127703473.yaml b/data/nde/enriched/entries/0384_Q127703473.yaml index e5a715b833..9a36dcc620 100644 --- a/data/nde/enriched/entries/0384_Q127703473.yaml +++ b/data/nde/enriched/entries/0384_Q127703473.yaml @@ -316,18 +316,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:26.630558+00:00' source_archive: web/0384/limburgserfgoednet.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://limburgserfgoednet.nl/gratheminbeeld - retrieved_on: '2025-11-29T16:03:31.846272+00:00' - xpath: /html/head/title - html_file: web/0384/limburgserfgoednet.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:26.630223+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -388,6 +378,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:26.630497+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Grathem in Beeld diff --git a/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml b/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml index e9ac3e86af..4de8e26cf9 100644 --- a/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml +++ b/data/nde/enriched/entries/0389_heemkunde_heibloem.yaml @@ -292,7 +292,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:28.366971+00:00' source_archive: web/0389/heibloem.nu - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Heemkundevereniging Heibloem @@ -316,16 +316,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:45:28.366468+00:00' - - claim_type: org_name - claim_value: Heibloem.nu - raw_value: Heibloem.nu - source_url: https://heibloem.nu/vereniging/heemkundevereniging-heibloem - retrieved_on: '2025-11-29T16:04:07.432877+00:00' - xpath: /html[1]/head/meta[13] - html_file: web/0389/heibloem.nu/pages/vereniging_heemkundevereniging-heibloem.tmp.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:45:28.366518+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Heiboem raw_value: https://www.facebook.com/Heiboem @@ -336,6 +326,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:28.366783+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Heibloem diff --git a/data/nde/enriched/entries/0390_heemkunde_margraten.yaml b/data/nde/enriched/entries/0390_heemkunde_margraten.yaml index 9ec9e7325a..4c98c4e9e4 100644 --- a/data/nde/enriched/entries/0390_heemkunde_margraten.yaml +++ b/data/nde/enriched/entries/0390_heemkunde_margraten.yaml @@ -370,28 +370,16 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:28.733218+00:00' source_archive: web/0390/heemkunde-margraten.nl - claims_count: 2 - claims: - - claim_type: org_name - claim_value: heemkunde-margraten.nl - raw_value: heemkunde-margraten.nl - source_url: https://heemkunde-margraten.nl/ - retrieved_on: '2025-11-29T16:04:34.004258+00:00' - xpath: /html/head/title - html_file: web/0390/heemkunde-margraten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:28.732602+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://heemkunde-margraten.nl/ - retrieved_on: '2025-11-29T16:04:34.004258+00:00' - xpath: /html/body/main/article/header/div/h1 - html_file: web/0390/heemkunde-margraten.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:28.733133+00:00' + claims_count: 0 + claims: [] + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Margraten diff --git a/data/nde/enriched/entries/0391_Q111081387.yaml b/data/nde/enriched/entries/0391_Q111081387.yaml index 512ad7129b..b99fcfc323 100644 --- a/data/nde/enriched/entries/0391_Q111081387.yaml +++ b/data/nde/enriched/entries/0391_Q111081387.yaml @@ -438,18 +438,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:28.797408+00:00' source_archive: web/0391/limburgserfgoednet.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://limburgserfgoednet.nl/heemkundeverenigingnieuwstadt - retrieved_on: '2025-11-29T16:04:34.657100+00:00' - xpath: /html/head/title - html_file: web/0391/limburgserfgoednet.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:28.796852+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -510,6 +500,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:28.797219+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkunde Vereniging Nieuwstadt diff --git a/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml b/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml index 78e062b0b6..c02f57b81e 100644 --- a/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml +++ b/data/nde/enriched/entries/0393_heemkundekring_sankt_tolbert.yaml @@ -460,18 +460,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:29.651604+00:00' source_archive: web/0393/sankttolbertvaals.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Heemkundekring Sankt-Tolbert Vaals - source_url: https://sankttolbertvaals.nl/ - retrieved_on: '2025-11-29T16:06:30.561780+00:00' - xpath: /html/head/title - html_file: web/0393/sankttolbertvaals.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:29.650375+00:00' - claim_type: description_short claim_value: In dienst van verleden en heden Heemkundekring Sankt-Tolbert Vaals In dienst van verleden en heden Heemkundekring Sankt-Tolbert Vaals In dienst @@ -522,6 +512,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:29.651223+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Sankt Tolbert Vaals diff --git a/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml b/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml index 076768be40..5e405dc40e 100644 --- a/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml +++ b/data/nde/enriched/entries/0399_heemkundevereniging_roggel.yaml @@ -478,18 +478,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:31.337738+00:00' source_archive: web/0399/heemkunderoggel.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Heemkunde Roggel - source_url: https://heemkunderoggel.nl/ - retrieved_on: '2025-11-29T16:10:34.722234+00:00' - xpath: /html/head/title - html_file: web/0399/heemkunderoggel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:31.336486+00:00' - claim_type: description_short claim_value: 'Geschied- en heemkundige info over Roggel en de directe omgeving. Welkom op de website Welkom op de website van Heemkundevereniging Roggel. Naast @@ -546,6 +536,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:31.337569+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Roggel diff --git a/data/nde/enriched/entries/0402_Q117843367.yaml b/data/nde/enriched/entries/0402_Q117843367.yaml index a34d418230..986710c80d 100644 --- a/data/nde/enriched/entries/0402_Q117843367.yaml +++ b/data/nde/enriched/entries/0402_Q117843367.yaml @@ -441,18 +441,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:31.975874+00:00' source_archive: web/0402/historischcentrumlimburg.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://historischcentrumlimburg.nl/ - retrieved_on: '2025-11-29T16:08:04.760725+00:00' - xpath: /html/head/title - html_file: web/0402/historischcentrumlimburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:31.974920+00:00' - claim_type: description_short claim_value: Beschrijving raw_value: Beschrijving @@ -533,6 +523,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:31.975798+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Centrum Limburg locatie Heerlen diff --git a/data/nde/enriched/entries/0403_Q21004453.yaml b/data/nde/enriched/entries/0403_Q21004453.yaml index 49f817bb1c..0ab38f0056 100644 --- a/data/nde/enriched/entries/0403_Q21004453.yaml +++ b/data/nde/enriched/entries/0403_Q21004453.yaml @@ -623,18 +623,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:32.071713+00:00' source_archive: web/0403/historischcentrumlimburg.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://historischcentrumlimburg.nl/ - retrieved_on: '2025-11-29T16:08:05.192621+00:00' - xpath: /html/head/title - html_file: web/0403/historischcentrumlimburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:32.071117+00:00' - claim_type: description_short claim_value: Beschrijving raw_value: Beschrijving @@ -715,6 +705,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:32.071656+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Centrum Limburg diff --git a/data/nde/enriched/entries/0414_Q111081369.yaml b/data/nde/enriched/entries/0414_Q111081369.yaml index 35d5316012..36350e7361 100644 --- a/data/nde/enriched/entries/0414_Q111081369.yaml +++ b/data/nde/enriched/entries/0414_Q111081369.yaml @@ -563,7 +563,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:34.006569+00:00' source_archive: web/0414/limburgsemolens.nl - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Limburgse Molens @@ -575,16 +575,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:34.005697+00:00' - - claim_type: address - claim_value: '' - raw_value: '' - source_url: https://www.limburgsemolens.nl/ - retrieved_on: '2025-11-29T16:10:31.990275+00:00' - xpath: /html/head/script[21] - html_file: web/0414/limburgsemolens.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_address - extraction_timestamp: '2025-12-01T10:45:34.006123+00:00' - claim_type: org_name claim_value: Het laatste nieuws raw_value: Het laatste nieuws @@ -595,6 +585,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:34.006438+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Molenstichting Limburg diff --git a/data/nde/enriched/entries/0417_Q2126143.yaml b/data/nde/enriched/entries/0417_Q2126143.yaml index 49882dc554..f0625dcbd6 100644 --- a/data/nde/enriched/entries/0417_Q2126143.yaml +++ b/data/nde/enriched/entries/0417_Q2126143.yaml @@ -704,7 +704,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:34.952937+00:00' source_archive: web/0417/delocht.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: De Locht @@ -716,16 +716,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:34.951641+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://delocht.nl/ - retrieved_on: '2025-11-29T16:13:45.900342+00:00' - xpath: /html/body/div[1]/div/div/div[1]/header/div[1]/div[1]/div/div[3]/div[2]/div[1]/div[1]/button/span[2]/svg/title - html_file: web/0417/delocht.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:34.951671+00:00' - claim_type: email claim_value: '%20info@delocht.nl' raw_value: '%20info@delocht.nl' @@ -776,6 +766,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:34.952383+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Locht diff --git a/data/nde/enriched/entries/0420_Q110053532.yaml b/data/nde/enriched/entries/0420_Q110053532.yaml index 49e3761bde..2b60435541 100644 --- a/data/nde/enriched/entries/0420_Q110053532.yaml +++ b/data/nde/enriched/entries/0420_Q110053532.yaml @@ -524,18 +524,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:36.379205+00:00' source_archive: web/0420/museumvandevrouw.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum van de Vrouw - source_url: https://www.museumvandevrouw.nl/ - retrieved_on: '2025-11-29T16:12:16.581474+00:00' - xpath: /html/head/title - html_file: web/0420/museumvandevrouw.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:36.378145+00:00' - claim_type: description_short claim_value: Het museum vertelt verhalen die het vrouwenleven kenmerken en ons op bijzondere wijze verbinden. Hedendaagse thema’s, verbonden met oude tradities, @@ -648,6 +638,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:36.379044+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum van de Vrouw diff --git a/data/nde/enriched/entries/0427_Q2202460.yaml b/data/nde/enriched/entries/0427_Q2202460.yaml index b12f18b389..2d411ed975 100644 --- a/data/nde/enriched/entries/0427_Q2202460.yaml +++ b/data/nde/enriched/entries/0427_Q2202460.yaml @@ -1031,18 +1031,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:38.846544+00:00' source_archive: web/0427/romeinsekatakomben.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Romeinse Katakomben - source_url: https://www.romeinsekatakomben.nl/ - retrieved_on: '2025-11-29T16:14:11.466829+00:00' - xpath: /html/head/title - html_file: web/0427/romeinsekatakomben.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:38.845819+00:00' - claim_type: description_short claim_value: Museum Romeinse Katakomben is een onvergetelijke belevenis. Dwalend door de onderaardse gangen neemt de gids je mee naar het Rome uit het vroege @@ -1107,6 +1097,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:38.846375+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Romeinse Katakomben diff --git a/data/nde/enriched/entries/0430_unknown.yaml b/data/nde/enriched/entries/0430_unknown.yaml index be34b20fc5..2b798c5ebb 100644 --- a/data/nde/enriched/entries/0430_unknown.yaml +++ b/data/nde/enriched/entries/0430_unknown.yaml @@ -175,18 +175,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:39.109284+00:00' source_archive: web/0430/limburgserfgoednet.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://limburgserfgoednet.nl/stichtingbeheerkunstschattensintlambertus - retrieved_on: '2025-11-29T16:14:08.345397+00:00' - xpath: /html/head/title - html_file: web/0430/limburgserfgoednet.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:39.108699+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -247,6 +237,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:39.109049+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Beheer Kunstschatten St. Lambertus diff --git a/data/nde/enriched/entries/0432_Q56460988.yaml b/data/nde/enriched/entries/0432_Q56460988.yaml index 457700205b..e68f479fc8 100644 --- a/data/nde/enriched/entries/0432_Q56460988.yaml +++ b/data/nde/enriched/entries/0432_Q56460988.yaml @@ -548,7 +548,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:27.463179+00:00' source_archive: web/0432/filmhuiszicht.nl - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: Welkom bij Filmhuis ZICHT! @@ -640,36 +640,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:27.462863+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - source_url: https://www.filmhuiszicht.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[1] - html_file: web/0432/filmhuiszicht.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:27.462894+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - source_url: https://www.filmhuiszicht.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[2] - html_file: web/0432/filmhuiszicht.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:27.462901+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?original_referer=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - raw_value: https://twitter.com/intent/tweet?original_referer=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht&url=https%3A//www.filmhuiszicht.nl/welkom-bij-filmhuis-zicht - source_url: https://www.filmhuiszicht.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[3] - html_file: web/0432/filmhuiszicht.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:27.462906+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content youtube_enrichment: source_url: https://www.filmhuiszicht.nl/media/oembed?url=https%3A//www.youtube.com/watch%3Fv%3DY9ve22tKDzY&max_width=0&max_height=0&hash=9q__k-xwLv0ehz2VJW5bSohNdA-S0XcYVtMAHf9SMB8 fetch_timestamp: '2025-12-01T17:34:22.710861+00:00' diff --git a/data/nde/enriched/entries/0441_Q892727.yaml b/data/nde/enriched/entries/0441_Q892727.yaml index 634303f788..6cdfda97a9 100644 --- a/data/nde/enriched/entries/0441_Q892727.yaml +++ b/data/nde/enriched/entries/0441_Q892727.yaml @@ -1559,7 +1559,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:42.790289+00:00' source_archive: web/0441/bonnefanten.nl - claims_count: 6 + claims_count: 4 claims: - claim_type: org_name claim_value: Bonnefanten — The art museum of Limburg @@ -1593,26 +1593,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:42.789882+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=undefined - raw_value: https://www.facebook.com/sharer/sharer.php?u=undefined - source_url: http://www.bonnefanten.nl/en/ - retrieved_on: '2025-11-29T16:17:05.490417+00:00' - xpath: /html/body/div[2]/div[3]/main/div/div[2]/div[2]/ul/li[1]/a - html_file: web/0441/bonnefanten.nl/mirror/www.bonnefanten.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:42.790184+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Bonnefanten&url=undefined - raw_value: https://twitter.com/intent/tweet?text=Bonnefanten&url=undefined - source_url: http://www.bonnefanten.nl/en/ - retrieved_on: '2025-11-29T16:17:05.490417+00:00' - xpath: /html/body/div[2]/div[3]/main/div/div[2]/div[2]/ul/li[2]/a - html_file: web/0441/bonnefanten.nl/mirror/www.bonnefanten.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:42.790192+00:00' - claim_type: org_name claim_value: Bonnefanten raw_value: Bonnefanten @@ -1623,6 +1603,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:42.790231+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bonnefanten museum diff --git a/data/nde/enriched/entries/0442_unknown.yaml b/data/nde/enriched/entries/0442_unknown.yaml index 6253754fcd..d0c1dbcef0 100644 --- a/data/nde/enriched/entries/0442_unknown.yaml +++ b/data/nde/enriched/entries/0442_unknown.yaml @@ -206,18 +206,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:43.244622+00:00' source_archive: web/0442/oudamerica.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Werkgroep Oud-America - source_url: https://oudamerica.nl/ - retrieved_on: '2025-11-29T16:18:01.886475+00:00' - xpath: /html/head/title - html_file: web/0442/oudamerica.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:43.244132+00:00' - claim_type: description_short claim_value: 'Stichting werkgroep Oud-America Opgericht in 1979 Tijdens een vergadering van de dorpsraad America in december 1979 werd door enkele enthousiaste personen @@ -278,6 +268,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:45:43.244487+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Werkgroep Oud-America diff --git a/data/nde/enriched/entries/0453_Q59962312.yaml b/data/nde/enriched/entries/0453_Q59962312.yaml index 80d2386d4b..200a1af3e1 100644 --- a/data/nde/enriched/entries/0453_Q59962312.yaml +++ b/data/nde/enriched/entries/0453_Q59962312.yaml @@ -976,18 +976,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:45.396322+00:00' source_archive: web/0453/bibliotheekmb.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheekmb.nl - retrieved_on: '2025-11-29T16:21:21.690535+00:00' - xpath: /html/head/title - html_file: web/0453/bibliotheekmb.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:45.395721+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/bibliotheeklochal raw_value: https://www.facebook.com/bibliotheeklochal @@ -1028,6 +1018,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:45.396180+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Midden-Brabant diff --git a/data/nde/enriched/entries/0457_Q110907483.yaml b/data/nde/enriched/entries/0457_Q110907483.yaml index a8d3c23208..41314cfbe5 100644 --- a/data/nde/enriched/entries/0457_Q110907483.yaml +++ b/data/nde/enriched/entries/0457_Q110907483.yaml @@ -342,18 +342,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:45.709494+00:00' source_archive: web/0457/nyenaenwasvannassau.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.nyenaenwasvannassau.nl/site/ - retrieved_on: '2025-11-29T16:20:20.575005+00:00' - xpath: /html/head/title - html_file: web/0457/nyenaenwasvannassau.nl/mirror/www.nyenaenwasvannassau.nl/site/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:45.709304+00:00' - claim_type: description_short claim_value: cultuur-historische vereniging Nyen aenwas van Nassau Dinteloord raw_value: cultuur-historische vereniging Nyen aenwas van Nassau Dinteloord @@ -364,6 +354,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:45.709350+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cultuur Historische Vereniging Nyen Aenwas van Nassau diff --git a/data/nde/enriched/entries/0461_Q1278103.yaml b/data/nde/enriched/entries/0461_Q1278103.yaml index bce6a50973..b972f189b7 100644 --- a/data/nde/enriched/entries/0461_Q1278103.yaml +++ b/data/nde/enriched/entries/0461_Q1278103.yaml @@ -709,18 +709,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:46.535506+00:00' source_archive: web/0461/eindhovenmuseum.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Eindhoven Museum - source_url: https://eindhovenmuseum.nl/ - retrieved_on: '2025-11-29T16:20:34.350093+00:00' - xpath: /html/head/title - html_file: web/0461/eindhovenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:46.534621+00:00' - claim_type: description_short claim_value: Eindhoven Museum beheert 23.000 objecten uit de geschiedenis van Eindhoven en omgeving. Een archief dat een unieke kijk geeft in de cultuurhistorische @@ -795,6 +785,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:46.535255+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Eindhoven Museum diff --git a/data/nde/enriched/entries/0468_unknown.yaml b/data/nde/enriched/entries/0468_unknown.yaml index 696a4778b9..21afa74985 100644 --- a/data/nde/enriched/entries/0468_unknown.yaml +++ b/data/nde/enriched/entries/0468_unknown.yaml @@ -292,19 +292,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:47.655571+00:00' source_archive: web/0468/proxy.archieven.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Archieven.nl - raw_value: Archieven.nl - Gemeentearchief Bergen op Zoom (per 1 juli 2016 samengevoegd - bij h... - source_url: https://proxy.archieven.nl/0/4A9E2B87DB3F1949E053CA00A8C054B4 - retrieved_on: '2025-11-29T16:20:37.709505+00:00' - xpath: /html/head/title - html_file: web/0468/proxy.archieven.nl/pages/0_4A9E2B87DB3F1949E053CA00A8C054B4.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.654661+00:00' - claim_type: org_name claim_value: organisatie_link-svg raw_value: organisatie_link-svg @@ -349,6 +338,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:45:47.655299+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Bergen op Zoom diff --git a/data/nde/enriched/entries/0469_Q2173323.yaml b/data/nde/enriched/entries/0469_Q2173323.yaml index 8eb52f269f..174da4ca95 100644 --- a/data/nde/enriched/entries/0469_Q2173323.yaml +++ b/data/nde/enriched/entries/0469_Q2173323.yaml @@ -461,18 +461,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:47.730847+00:00' source_archive: web/0469/gemeentearchiefgemert-bakel.nl - claims_count: 14 + claims_count: 13 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeentearchief Gemert-Bakel - source_url: http://www.gemeentearchiefgemert-bakel.nl/ - retrieved_on: '2025-11-29T23:28:18.855592+00:00' - xpath: /html/head/title - html_file: web/0469/gemeentearchiefgemert-bakel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.730297+00:00' - claim_type: org_name claim_value: ander icon raw_value: ander icon @@ -603,6 +593,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:47.730728+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Gemert-Bakel diff --git a/data/nde/enriched/entries/0471_unknown.yaml b/data/nde/enriched/entries/0471_unknown.yaml index f630bb516c..1069436fc2 100644 --- a/data/nde/enriched/entries/0471_unknown.yaml +++ b/data/nde/enriched/entries/0471_unknown.yaml @@ -296,18 +296,16 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:45:47.822412+00:00' source_archive: web/0471/gemeentearchiefroosendaal.nl - claims_count: 1 - claims: - - claim_type: org_name - claim_value: gemeentearchiefroosendaal.nl - raw_value: gemeentearchiefroosendaal.nl - source_url: https://gemeentearchiefroosendaal.nl/ - retrieved_on: '2025-11-29T16:20:40.196712+00:00' - xpath: /html/head/title - html_file: web/0471/gemeentearchiefroosendaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.822235+00:00' + claims_count: 0 + claims: [] + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: gemeentearchiefroosendaal.nl diff --git a/data/nde/enriched/entries/0472_Q1969635.yaml b/data/nde/enriched/entries/0472_Q1969635.yaml index 575c678c8c..1087950ae7 100644 --- a/data/nde/enriched/entries/0472_Q1969635.yaml +++ b/data/nde/enriched/entries/0472_Q1969635.yaml @@ -684,18 +684,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:47.916622+00:00' source_archive: web/0472/geniemuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Geniemuseum - source_url: http://www.geniemuseum.nl - retrieved_on: '2025-11-29T16:20:38.449546+00:00' - xpath: /html/head/title - html_file: web/0472/geniemuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:47.916070+00:00' - claim_type: description_short claim_value: Geniemuseum te Vught. Stichting Historische Genieverzameling raw_value: Geniemuseum te Vught. Stichting Historische Genieverzameling @@ -736,6 +726,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:47.916532+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Geniemuseum diff --git a/data/nde/enriched/entries/0474_Q110907346.yaml b/data/nde/enriched/entries/0474_Q110907346.yaml index b5c2148577..c20ac642c2 100644 --- a/data/nde/enriched/entries/0474_Q110907346.yaml +++ b/data/nde/enriched/entries/0474_Q110907346.yaml @@ -483,7 +483,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:48.400575+00:00' source_archive: web/0474/schoorudenhout.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: '''t Schoor » Erfgoedcentrum ’t Schoor Udenhout' @@ -495,16 +495,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:45:48.400196+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: http://www.schoorudenhout.nl/ - retrieved_on: '2025-11-29T16:21:20.488870+00:00' - xpath: /html/body/main/a/h1 - html_file: web/0474/schoorudenhout.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:48.400527+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: '''t Schoor » Erfgoedcentrum ’t Schoor Udenhout' diff --git a/data/nde/enriched/entries/0476_Q110907502.yaml b/data/nde/enriched/entries/0476_Q110907502.yaml index a423b96115..7a89f02bb2 100644 --- a/data/nde/enriched/entries/0476_Q110907502.yaml +++ b/data/nde/enriched/entries/0476_Q110907502.yaml @@ -347,18 +347,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:48.575884+00:00' source_archive: web/0476/heemkundevereniging.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.heemkundevereniging.nl/ - retrieved_on: '2025-11-29T16:20:42.059405+00:00' - xpath: /html/head/title - html_file: web/0476/heemkundevereniging.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:48.575295+00:00' - claim_type: description_short claim_value: Heemkundevereniging De Hooge Dorpen raw_value: Heemkundevereniging De Hooge Dorpen @@ -379,6 +369,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:48.575819+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkunde vereniging De Hooge Dorpen diff --git a/data/nde/enriched/entries/0481_Q110907480.yaml b/data/nde/enriched/entries/0481_Q110907480.yaml index 78d4b6f7c5..fcf50ff53c 100644 --- a/data/nde/enriched/entries/0481_Q110907480.yaml +++ b/data/nde/enriched/entries/0481_Q110907480.yaml @@ -344,18 +344,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:49.186768+00:00' source_archive: web/0481/heemkunde-megen-haren-macharen.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: home - raw_value: home - Heemkunde Megen Haren Macharen - source_url: http://www.heemkunde-megen-haren-macharen.nl/ - retrieved_on: '2025-11-29T16:21:23.623722+00:00' - xpath: /html/head/title - html_file: web/0481/heemkunde-megen-haren-macharen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:49.185928+00:00' - claim_type: description_short claim_value: Welkom bij Heemkundekring Megen, Haren en Macharen Onze heemkundekring is toegewijd aan het verzamelen, bewaren en delen van de rijke geschiedenis @@ -394,6 +384,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:45:49.186482+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkunde Megen Haren en Macharen diff --git a/data/nde/enriched/entries/0484_Q110907488.yaml b/data/nde/enriched/entries/0484_Q110907488.yaml index 5e16232f2d..8eb6371a7a 100644 --- a/data/nde/enriched/entries/0484_Q110907488.yaml +++ b/data/nde/enriched/entries/0484_Q110907488.yaml @@ -273,18 +273,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:50.052478+00:00' source_archive: web/0484/dekleinemeijerij.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Heemkundekring - source_url: http://www.dekleinemeijerij.nl - retrieved_on: '2025-11-29T16:21:41.747522+00:00' - xpath: /html/head/title - html_file: web/0484/dekleinemeijerij.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:50.051646+00:00' - claim_type: description_short claim_value: 'Heemkundekring ''De Kleine Meijerij'' heeft als werkgebied Berkel-Enschot, Biezenmortel, Esch, Haaren, Helvoirt, Heukelom, Moergestel, Oisterwijk en Udenhout @@ -309,6 +299,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:50.051772+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring "De Kleine Meijerij" diff --git a/data/nde/enriched/entries/0486_unknown.yaml b/data/nde/enriched/entries/0486_unknown.yaml index dd649206f9..762a927b75 100644 --- a/data/nde/enriched/entries/0486_unknown.yaml +++ b/data/nde/enriched/entries/0486_unknown.yaml @@ -160,18 +160,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:50.586758+00:00' source_archive: web/0486/heemkundelangenboom.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Heemkundelangenboom.nl - raw_value: Heemkundelangenboom.nl - source_url: https://www.heemkundelangenboom.nl - retrieved_on: '2025-11-29T16:21:43.012971+00:00' - xpath: /html/head/title - html_file: web/0486/heemkundelangenboom.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:50.585074+00:00' - claim_type: phone claim_value: '+31610705225' raw_value: '+31610705225' @@ -202,6 +192,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:50.586366+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring "Felix Walter" diff --git a/data/nde/enriched/entries/0496_Q110907501.yaml b/data/nde/enriched/entries/0496_Q110907501.yaml index eb8219184b..64cf0de08e 100644 --- a/data/nde/enriched/entries/0496_Q110907501.yaml +++ b/data/nde/enriched/entries/0496_Q110907501.yaml @@ -387,18 +387,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:54.205572+00:00' source_archive: web/0496/deheerlijkheidoirschot.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - de Heerlijkheid - source_url: http://www.deheerlijkheidoirschot.nl/ - retrieved_on: '2025-11-29T16:29:34.743382+00:00' - xpath: /html/head/title - html_file: web/0496/deheerlijkheidoirschot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:54.204866+00:00' - claim_type: org_name claim_value: de Heerlijkheid raw_value: de Heerlijkheid @@ -419,6 +409,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:45:54.205397+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring De Heerlijkheid Oirschot diff --git a/data/nde/enriched/entries/0498_Q110907495.yaml b/data/nde/enriched/entries/0498_Q110907495.yaml index 3dc2fcb80c..df4da18347 100644 --- a/data/nde/enriched/entries/0498_Q110907495.yaml +++ b/data/nde/enriched/entries/0498_Q110907495.yaml @@ -392,18 +392,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:54.783363+00:00' source_archive: web/0498/heemkundekringgemert.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Heemkundekring Gemert - source_url: http://www.heemkundekringgemert.nl/ - retrieved_on: '2025-11-29T16:28:18.189967+00:00' - xpath: /html/head/title - html_file: web/0498/heemkundekringgemert.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:54.782762+00:00' - claim_type: org_name claim_value: Heemkundekring Gemert raw_value: Heemkundekring Gemert @@ -454,6 +444,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:45:54.783302+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring De Kommanderij diff --git a/data/nde/enriched/entries/0502_Q110907460.yaml b/data/nde/enriched/entries/0502_Q110907460.yaml index 1a53aa7508..f432685a2c 100644 --- a/data/nde/enriched/entries/0502_Q110907460.yaml +++ b/data/nde/enriched/entries/0502_Q110907460.yaml @@ -260,18 +260,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:55.686199+00:00' source_archive: web/0502/denbeerschenaard.nl - claims_count: 6 + claims_count: 1 claims: - - claim_type: org_name - claim_value: denbeerschenaard.nl - raw_value: denbeerschenaard.nl - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/head/title - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:55.684590+00:00' - claim_type: email claim_value: s.en.a.vanhelvoort@gmail.com raw_value: s.en.a.vanhelvoort@gmail.com @@ -282,46 +272,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:45:55.685305+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u= - raw_value: https://www.facebook.com/sharer/sharer.php?u= - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[2]/div/div/div/div/div[1]/div[1]/div[1]/div/div/div/div/ul/li[1]/a - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:55.685755+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url= - raw_value: https://www.linkedin.com/shareArticle?mini=true&url= - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[2]/div/div/div/div/div[1]/div[1]/div[1]/div/div/div/div/ul/li[2]/a - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:55.685775+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url= - raw_value: https://twitter.com/share?url= - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[2]/div/div/div/div/div[1]/div[1]/div[1]/div/div/div/div/ul/li[3]/a - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:45:55.685787+00:00' - - claim_type: org_name - claim_value: 'Nieuw:' - raw_value: 'Nieuw:' - source_url: http://www.denbeerschenaard.nl - retrieved_on: '2025-11-29T16:28:23.449747+00:00' - xpath: /html/body/div[2]/div[3]/div/div/div/div/div[2]/div/div/div/div/div[1]/div/div[2]/div/div/div/h1[1] - html_file: web/0502/denbeerschenaard.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:45:55.685977+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Den Beerschen Aard diff --git a/data/nde/enriched/entries/0515_Q110907441.yaml b/data/nde/enriched/entries/0515_Q110907441.yaml index db684bc0ac..3a5928635d 100644 --- a/data/nde/enriched/entries/0515_Q110907441.yaml +++ b/data/nde/enriched/entries/0515_Q110907441.yaml @@ -442,18 +442,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:45:59.386765+00:00' source_archive: web/0515/heemkundebladel.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: heemkundebladel.nl - raw_value: heemkundebladel.nl - source_url: http://www.heemkundebladel.nl - retrieved_on: '2025-11-29T16:34:45.284626+00:00' - xpath: /html/head/title - html_file: web/0515/heemkundebladel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:45:59.386417+00:00' - claim_type: description_short claim_value: Dit domein kan te koop zijn! raw_value: Dit domein kan te koop zijn! @@ -464,6 +454,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:45:59.386619+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Pladella Villa Heemkunde Bladel diff --git a/data/nde/enriched/entries/0521_Q110907442.yaml b/data/nde/enriched/entries/0521_Q110907442.yaml index 17ebb4cb7d..344ca4b3ad 100644 --- a/data/nde/enriched/entries/0521_Q110907442.yaml +++ b/data/nde/enriched/entries/0521_Q110907442.yaml @@ -439,18 +439,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:00.593017+00:00' source_archive: web/0521/heemkundekringzeeland.nl - claims_count: 10 + claims_count: 7 claims: - - claim_type: org_name - claim_value: heemkundekringzeeland.nl - raw_value: heemkundekringzeeland.nl – Erfgoed en heemkunde uit dorp Zeeland (NBr) - source_url: http://www.heemkundekringzeeland.nl/ - retrieved_on: '2025-11-29T16:36:02.304955+00:00' - xpath: /html/head/title - html_file: web/0521/heemkundekringzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:00.591772+00:00' - claim_type: org_name claim_value: Klik om het zoekinvoerveld te openen raw_value: Klik om het zoekinvoerveld te openen @@ -501,26 +491,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:00.592664+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://heemkundekringzeeland.nl/&t=Homepage - raw_value: https://www.facebook.com/sharer.php?u=https://heemkundekringzeeland.nl/&t=Homepage - source_url: http://www.heemkundekringzeeland.nl/ - retrieved_on: '2025-11-29T16:36:02.304955+00:00' - xpath: /html/body/div[1]/div/div[8]/div/div/div/div/div[2]/div/div/ul/li[1]/a - html_file: web/0521/heemkundekringzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:00.592687+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?text=Homepage&url=https://heemkundekringzeeland.nl/ - raw_value: https://twitter.com/share?text=Homepage&url=https://heemkundekringzeeland.nl/ - source_url: http://www.heemkundekringzeeland.nl/ - retrieved_on: '2025-11-29T16:36:02.304955+00:00' - xpath: /html/body/div[1]/div/div[8]/div/div/div/div/div[2]/div/div/ul/li[2]/a - html_file: web/0521/heemkundekringzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:00.592692+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Heemkundekring-Zeeland-1757812647846979 raw_value: https://www.facebook.com/Heemkundekring-Zeeland-1757812647846979 @@ -541,6 +511,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:00.592799+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundekring Zeeland diff --git a/data/nde/enriched/entries/0522_Q110907510.yaml b/data/nde/enriched/entries/0522_Q110907510.yaml index d514964d13..4523c97fbe 100644 --- a/data/nde/enriched/entries/0522_Q110907510.yaml +++ b/data/nde/enriched/entries/0522_Q110907510.yaml @@ -556,7 +556,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:01.083646+00:00' source_archive: web/0522/heemkundedendungen.nl - claims_count: 11 + claims_count: 8 claims: - claim_type: org_name claim_value: Welkom bij Heemkundevereniging "Op die Dunghen" @@ -569,36 +569,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:01.082289+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: http://www.heemkundedendungen.nl/ - retrieved_on: '2025-11-29T16:38:07.166640+00:00' - xpath: /html/body/div[1]/div/div[2]/div/div[1]/a[1]/svg/title - html_file: web/0522/heemkundedendungen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:01.082302+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: http://www.heemkundedendungen.nl/ - retrieved_on: '2025-11-29T16:38:07.166640+00:00' - xpath: /html/body/div[1]/div/div[2]/div/div[1]/a[2]/svg/title - html_file: web/0522/heemkundedendungen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:01.082307+00:00' - - claim_type: org_name - claim_value: Search - raw_value: Search - source_url: http://www.heemkundedendungen.nl/ - retrieved_on: '2025-11-29T16:38:07.166640+00:00' - xpath: /html/body/div[1]/div/div[4]/div/div[1]/section[1]/search/form/div/span/svg/title - html_file: web/0522/heemkundedendungen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:01.082313+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -669,6 +639,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:01.083511+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging Op die Dunghen diff --git a/data/nde/enriched/entries/0526_Q110907473.yaml b/data/nde/enriched/entries/0526_Q110907473.yaml index 0c2ae6113e..ec67412051 100644 --- a/data/nde/enriched/entries/0526_Q110907473.yaml +++ b/data/nde/enriched/entries/0526_Q110907473.yaml @@ -300,18 +300,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:31.309953+00:00' source_archive: web/0526/heemkundegeffen.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: heemkundegeffen.nl - raw_value: heemkundegeffen.nl – Heemkunde Werkgroep Geffen - source_url: http://www.heemkundegeffen.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0526/heemkundegeffen.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:31.309435+00:00' - claim_type: email claim_value: heemkundegroep@gmail.com raw_value: heemkundegroep@gmail.com @@ -322,3 +312,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:25:31.309767+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0527_Q110907499.yaml b/data/nde/enriched/entries/0527_Q110907499.yaml index 11efd6a917..49b1b67d5f 100644 --- a/data/nde/enriched/entries/0527_Q110907499.yaml +++ b/data/nde/enriched/entries/0527_Q110907499.yaml @@ -303,18 +303,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:02.013149+00:00' source_archive: web/0527/nuwelant.nl - claims_count: 7 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nuwelant - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/head/title - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:02.012369+00:00' - claim_type: description_short claim_value: Nuwelant is de heemkundewerkgroep van de parochie Nuland inclusief Heeseind. @@ -337,36 +327,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:02.012631+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.nuwelant.nl&t=Nuwelant - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.nuwelant.nl&t=Nuwelant - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/body/div[3]/ul/li[1]/a - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:02.012942+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?text=Nuwelant&url=https%3A%2F%2Fwww.nuwelant.nl - raw_value: https://twitter.com/share?text=Nuwelant&url=https%3A%2F%2Fwww.nuwelant.nl - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/body/div[3]/ul/li[2]/a - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:02.012946+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.nuwelant.nl&title=Nuwelant - raw_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.nuwelant.nl&title=Nuwelant - source_url: https://www.nuwelant.nl/ - retrieved_on: '2025-11-29T23:29:13.422667+00:00' - xpath: /html/body/div[3]/ul/li[5]/a - html_file: web/0527/nuwelant.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:02.012952+00:00' - claim_type: org_name claim_value: Bezoek ons vernieuwd archief raw_value: Bezoek ons vernieuwd archief @@ -377,6 +337,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:02.013004+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundewerkgroep Nuwelant diff --git a/data/nde/enriched/entries/0530_Q110907466.yaml b/data/nde/enriched/entries/0530_Q110907466.yaml index dd3b58815f..480f53a427 100644 --- a/data/nde/enriched/entries/0530_Q110907466.yaml +++ b/data/nde/enriched/entries/0530_Q110907466.yaml @@ -431,7 +431,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:02.913053+00:00' source_archive: web/0530/oudevrijheid.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Home » Heemkundekring De Oude Vrijheid Sint-Oedenrode @@ -491,16 +491,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:02.912910+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.oudevrijheid.nl - retrieved_on: '2025-11-29T16:38:36.818019+00:00' - xpath: /html/body/div/div[3]/div/div/div/div/h1 - html_file: web/0530/oudevrijheid.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:02.912978+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundige Kring De Oude Vrijheid diff --git a/data/nde/enriched/entries/0534_Q110908866.yaml b/data/nde/enriched/entries/0534_Q110908866.yaml index c6ed60eb84..551f1f90db 100644 --- a/data/nde/enriched/entries/0534_Q110908866.yaml +++ b/data/nde/enriched/entries/0534_Q110908866.yaml @@ -727,7 +727,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:04.374832+00:00' source_archive: web/0534/kasteelheeswijk.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Kasteel Heeswijk @@ -791,16 +791,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:04.374449+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.kasteelheeswijk.nl/ - retrieved_on: '2025-11-29T16:39:00.497134+00:00' - xpath: /html/body/main/div[1]/h1 - html_file: web/0534/kasteelheeswijk.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:04.374511+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Kasteel Heeswijk diff --git a/data/nde/enriched/entries/0536_Q115131080.yaml b/data/nde/enriched/entries/0536_Q115131080.yaml index f8227730b6..28ba72b09b 100644 --- a/data/nde/enriched/entries/0536_Q115131080.yaml +++ b/data/nde/enriched/entries/0536_Q115131080.yaml @@ -684,18 +684,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:04.684203+00:00' source_archive: web/0536/maczekmemorialbreda.nl - claims_count: 14 + claims_count: 13 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Maczek Memorial - source_url: https://www.maczekmemorialbreda.nl/ - retrieved_on: '2025-11-29T16:39:03.407285+00:00' - xpath: /html/head/title - html_file: web/0536/maczekmemorialbreda.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:04.683005+00:00' - claim_type: description_short claim_value: Beleef het verhaal van Generaal Maczek en zijn soldaten van de 1e Poolse Pantserdivisie tijdens de Tweede Wereldoorlog. @@ -830,6 +820,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:04.684011+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Maczek Memorial Breda diff --git a/data/nde/enriched/entries/0537_Q2423105.yaml b/data/nde/enriched/entries/0537_Q2423105.yaml index c4bd84e206..82325e3e99 100644 --- a/data/nde/enriched/entries/0537_Q2423105.yaml +++ b/data/nde/enriched/entries/0537_Q2423105.yaml @@ -637,7 +637,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:05.137067+00:00' source_archive: web/0537/museumboerderij.nl - claims_count: 5 + claims_count: 4 claims: - claim_type: org_name claim_value: De Meierijsche Museumboerderij @@ -679,16 +679,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:05.136696+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.museumboerderij.nl/ - retrieved_on: '2025-11-29T16:40:26.326802+00:00' - xpath: /html/body/div[1]/div/div/div/div[1]/div/div[2]/div/div/div/div/h1 - html_file: web/0537/museumboerderij.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:05.136786+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Meierijsche Museumboerderij diff --git a/data/nde/enriched/entries/0539_Q1842735.yaml b/data/nde/enriched/entries/0539_Q1842735.yaml index 2c55fab1ed..a30fb4699a 100644 --- a/data/nde/enriched/entries/0539_Q1842735.yaml +++ b/data/nde/enriched/entries/0539_Q1842735.yaml @@ -647,18 +647,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:05.281442+00:00' source_archive: web/0539/museumderoos.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum De Roos - source_url: http://www.museumderoos.nl - retrieved_on: '2025-11-29T16:40:33.290943+00:00' - xpath: /html/head/title - html_file: web/0539/museumderoos.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:05.281033+00:00' - claim_type: org_name claim_value: Museum De Roos raw_value: Museum De Roos @@ -679,6 +669,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:05.281366+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum De Roos diff --git a/data/nde/enriched/entries/0540_Q110906682.yaml b/data/nde/enriched/entries/0540_Q110906682.yaml index bf5afdd847..ba72a93cf5 100644 --- a/data/nde/enriched/entries/0540_Q110906682.yaml +++ b/data/nde/enriched/entries/0540_Q110906682.yaml @@ -774,7 +774,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:05.454960+00:00' source_archive: web/0540/museumoudeslot.nl - claims_count: 16 + claims_count: 13 claims: - claim_type: org_name claim_value: Homepage @@ -798,26 +798,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:05.453715+00:00' - - claim_type: org_name - claim_value: Museum 't Oude Slot - raw_value: Museum 't Oude Slot - source_url: http://www.museumoudeslot.nl - retrieved_on: '2025-11-29T16:40:36.177230+00:00' - xpath: /html/head/meta[10] - html_file: web/0540/museumoudeslot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:05.453945+00:00' - - claim_type: org_name - claim_value: Museum 't Oude Slot - raw_value: Museum 't Oude Slot - source_url: http://www.museumoudeslot.nl - retrieved_on: '2025-11-29T16:40:36.177230+00:00' - xpath: /html/head/script[2] - html_file: web/0540/museumoudeslot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_name - extraction_timestamp: '2025-12-01T10:46:05.454039+00:00' - claim_type: description claim_value: Museum 't Oude Slot raw_value: Museum 't Oude Slot @@ -928,18 +908,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:05.454653+00:00' - - claim_type: org_name - claim_value: 'Erfgoed, kunst en educatie: het is allemaal te vinden in Museum - ’t Oude Slot.' - raw_value: 'Erfgoed, kunst en educatie: het is allemaal te vinden in Museum ’t - Oude Slot.' - source_url: http://www.museumoudeslot.nl - retrieved_on: '2025-11-29T16:40:36.177230+00:00' - xpath: /html/body/div[3]/main/section[1]/div[3]/div/div[1]/div/div/div[1]/div/h1 - html_file: web/0540/museumoudeslot.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:05.454766+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum 't Oude Slot diff --git a/data/nde/enriched/entries/0547_Q3483633.yaml b/data/nde/enriched/entries/0547_Q3483633.yaml index 9fd5c2511c..abe4fb102f 100644 --- a/data/nde/enriched/entries/0547_Q3483633.yaml +++ b/data/nde/enriched/entries/0547_Q3483633.yaml @@ -895,7 +895,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:06.185571+00:00' source_archive: web/0547/dewieger.nl - claims_count: 18 + claims_count: 14 claims: - claim_type: org_name claim_value: De Wieger Deurne @@ -907,16 +907,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:06.183776+00:00' - - claim_type: org_name - claim_value: close - raw_value: close - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[1]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183790+00:00' - claim_type: org_name claim_value: arrow-circle-o-down raw_value: arrow-circle-o-down @@ -927,26 +917,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:06.183794+00:00' - - claim_type: org_name - claim_value: facebook - raw_value: facebook - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[3]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183798+00:00' - - claim_type: org_name - claim_value: linkedin - raw_value: linkedin - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[4]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183801+00:00' - claim_type: org_name claim_value: ellipsis-v raw_value: ellipsis-v @@ -957,16 +927,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:06.183804+00:00' - - claim_type: org_name - claim_value: instagram - raw_value: instagram - source_url: http://www.dewieger.nl - retrieved_on: '2025-11-29T16:40:44.355304+00:00' - xpath: /html/body/svg/defs/symbol[6]/title - html_file: web/0547/dewieger.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:06.183807+00:00' - claim_type: org_name claim_value: long-arrow-left raw_value: long-arrow-left @@ -1077,6 +1037,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:06.184998+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Wieger diff --git a/data/nde/enriched/entries/0552_Q110907382.yaml b/data/nde/enriched/entries/0552_Q110907382.yaml index 020b122409..13bc5e1414 100644 --- a/data/nde/enriched/entries/0552_Q110907382.yaml +++ b/data/nde/enriched/entries/0552_Q110907382.yaml @@ -402,18 +402,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:07.741528+00:00' source_archive: web/0552/kruysenhuis.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Kruysenhuis Oirschot - source_url: http://www.kruysenhuis.nl - retrieved_on: '2025-11-29T16:41:53.905471+00:00' - xpath: /html/head/title - html_file: web/0552/kruysenhuis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:07.741186+00:00' - claim_type: email claim_value: info@kruysenhuis.nl raw_value: info@kruysenhuis.nl @@ -474,6 +464,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:07.741488+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Kruysenhuis diff --git a/data/nde/enriched/entries/0553_Q27949674.yaml b/data/nde/enriched/entries/0553_Q27949674.yaml index de9cdb24ae..ad35c677a8 100644 --- a/data/nde/enriched/entries/0553_Q27949674.yaml +++ b/data/nde/enriched/entries/0553_Q27949674.yaml @@ -916,18 +916,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:07.879236+00:00' source_archive: web/0553/museumklokenpeel.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Klok & Peel - source_url: http://www.museumklokenpeel.nl/ - retrieved_on: '2025-11-29T16:41:35.507077+00:00' - xpath: /html/head/title - html_file: web/0553/museumklokenpeel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:07.878316+00:00' - claim_type: description_short claim_value: "Een compleet dagje uit voor jong én oud, en eigenlijk gewoon twee\ \ musea voor de prijs van één. Bij ons leer je namelijk niet alleen alles over\ @@ -1038,6 +1028,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:07.879090+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Klok & Peel diff --git a/data/nde/enriched/entries/0554_Q2112422.yaml b/data/nde/enriched/entries/0554_Q2112422.yaml index d08f807b12..f61aeda3f7 100644 --- a/data/nde/enriched/entries/0554_Q2112422.yaml +++ b/data/nde/enriched/entries/0554_Q2112422.yaml @@ -919,18 +919,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:08.271284+00:00' source_archive: web/0554/museumkrona.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Krona - source_url: https://www.museumkrona.nl/nl/ - retrieved_on: '2025-11-29T16:41:40.522595+00:00' - xpath: /html/head/title - html_file: web/0554/museumkrona.nl/mirror/www.museumkrona.nl/nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:08.270784+00:00' - claim_type: phone claim_value: +31413 26 34 31 raw_value: +31413 26 34 31 @@ -991,6 +981,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:08.271226+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Krona diff --git a/data/nde/enriched/entries/0559_Q2119394.yaml b/data/nde/enriched/entries/0559_Q2119394.yaml index 6725c39d9b..8ee4eb8335 100644 --- a/data/nde/enriched/entries/0559_Q2119394.yaml +++ b/data/nde/enriched/entries/0559_Q2119394.yaml @@ -701,18 +701,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:08.842074+00:00' source_archive: web/0559/museumvekemans.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Vekemans - source_url: http://www.museumvekemans.nl - retrieved_on: '2025-11-29T16:41:52.213034+00:00' - xpath: /html/head/title - html_file: web/0559/museumvekemans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:08.841345+00:00' - claim_type: description_short claim_value: Museum Vekemans in Boxtel toont de ontwikkeling van wassen en strijken en biedt een overzicht van een eeuw Brabantse boerendracht. Beide collecties @@ -749,6 +739,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:08.841790+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Vekemans diff --git a/data/nde/enriched/entries/0566_Q4295172.yaml b/data/nde/enriched/entries/0566_Q4295172.yaml index 4fc454c9a7..04fc4f58fc 100644 --- a/data/nde/enriched/entries/0566_Q4295172.yaml +++ b/data/nde/enriched/entries/0566_Q4295172.yaml @@ -676,18 +676,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:10.305805+00:00' source_archive: web/0566/zouavenmuseum.nl - claims_count: 8 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Nederlands Zouavenmuseum - source_url: http://www.zouavenmuseum.nl - retrieved_on: '2025-11-29T16:42:02.160819+00:00' - xpath: /html/head/title - html_file: web/0566/zouavenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:10.305059+00:00' - claim_type: org_name claim_value: Stichting Nederlands Zouavenmuseum raw_value: Stichting Nederlands Zouavenmuseum @@ -718,26 +708,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:46:10.305589+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://zouavenmuseum.nl/nieuwsbericht/ - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://zouavenmuseum.nl/nieuwsbericht/ - source_url: http://www.zouavenmuseum.nl - retrieved_on: '2025-11-29T16:42:02.160819+00:00' - xpath: /html/body/div/div/ul/li/div/li/div/li/div[2]/div/section/article/div[2]/div/div/div/a[1] - html_file: web/0566/zouavenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:10.305711+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Welkom - raw_value: https://twitter.com/intent/tweet?text=Welkom - source_url: http://www.zouavenmuseum.nl - retrieved_on: '2025-11-29T16:42:02.160819+00:00' - xpath: /html/body/div/div/ul/li/div/li/div/li/div[2]/div/section/article/div[2]/div/div/div/a[2] - html_file: web/0566/zouavenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:10.305719+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/www.zouavenmuseum.nl/ raw_value: https://www.facebook.com/www.zouavenmuseum.nl/ @@ -758,6 +728,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:10.305756+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zouavenmuseum diff --git a/data/nde/enriched/entries/0567_Q1823028.yaml b/data/nde/enriched/entries/0567_Q1823028.yaml index 79db349cb2..6877f325ea 100644 --- a/data/nde/enriched/entries/0567_Q1823028.yaml +++ b/data/nde/enriched/entries/0567_Q1823028.yaml @@ -917,7 +917,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:10.395773+00:00' source_archive: web/0567/libertypark.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Domeinnaam Libertypark.nl overnemen? Koop nu en start met je plan. @@ -973,16 +973,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:46:10.395661+00:00' - - claim_type: org_name - claim_value: Libertypark.nl - raw_value: Libertypark.nl - source_url: http://www.libertypark.nl - retrieved_on: '2025-11-29T16:42:03.432042+00:00' - xpath: /html/body/section[1]/div[1]/div[1]/h1 - html_file: web/0567/libertypark.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:10.395730+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oorlogsmuseum Overloon diff --git a/data/nde/enriched/entries/0569_Q56459509.yaml b/data/nde/enriched/entries/0569_Q56459509.yaml index 7c82b5f851..a30eb515c8 100644 --- a/data/nde/enriched/entries/0569_Q56459509.yaml +++ b/data/nde/enriched/entries/0569_Q56459509.yaml @@ -537,18 +537,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:10.982436+00:00' source_archive: web/0569/pietervermeulenmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Pieter Vermeulen Museum - source_url: http://www.pietervermeulenmuseum.nl/ - retrieved_on: '2025-11-29T16:42:36.860974+00:00' - xpath: /html/head/title - html_file: web/0569/pietervermeulenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:10.981599+00:00' - claim_type: description_short claim_value: Het Pieter Vermeulen Museum is een kindvriendelijk natuurmuseum en het Centrum voor Natuur- en Milieueducatie in de Gemeente Velsen. @@ -631,6 +621,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:10.982337+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Pieter Vermeulen Museum diff --git a/data/nde/enriched/entries/0575_Q110907548.yaml b/data/nde/enriched/entries/0575_Q110907548.yaml index 0dd72afbe8..e4819928d3 100644 --- a/data/nde/enriched/entries/0575_Q110907548.yaml +++ b/data/nde/enriched/entries/0575_Q110907548.yaml @@ -544,28 +544,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:12.876455+00:00' source_archive: web/0575/westbrabantsarchief.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - West-Brabants Archief - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/head/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875919+00:00' - - claim_type: org_name - claim_value: search - raw_value: search - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/body/header/div/div/div/form/button/svg/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875932+00:00' - claim_type: org_name claim_value: info raw_value: info @@ -596,26 +576,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:12.875946+00:00' - - claim_type: org_name - claim_value: facebook - raw_value: facebook - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[1]/a/svg/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875950+00:00' - - claim_type: org_name - claim_value: instagram - raw_value: instagram - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:43:10.305756+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[3]/a/svg/title - html_file: web/0575/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:12.875954+00:00' - claim_type: description_short claim_value: West-Brabants Archief raw_value: West-Brabants Archief @@ -666,6 +626,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:12.876373+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: West-Brabants Archief diff --git a/data/nde/enriched/entries/0580_Q2176121.yaml b/data/nde/enriched/entries/0580_Q2176121.yaml index 0ee2e36624..ffe34bc601 100644 --- a/data/nde/enriched/entries/0580_Q2176121.yaml +++ b/data/nde/enriched/entries/0580_Q2176121.yaml @@ -681,18 +681,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:13.859300+00:00' source_archive: web/0580/stadsarchief.breda.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stadsarchief Breda - source_url: https://stadsarchief.breda.nl/ - retrieved_on: '2025-11-29T16:43:18.587458+00:00' - xpath: /html/head/title - html_file: web/0580/stadsarchief.breda.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:13.858765+00:00' - claim_type: description_short claim_value: Stadsarchief Breda verzamelt en beheert de geschiedenis van de stad en zijn inwoners. Die rijke historie stelt Stadsarchief Breda gratis ter beschikking @@ -747,6 +737,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:13.859236+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsarchief Breda diff --git a/data/nde/enriched/entries/0581_Q2783790.yaml b/data/nde/enriched/entries/0581_Q2783790.yaml index a3ee85807f..c1a9219eb4 100644 --- a/data/nde/enriched/entries/0581_Q2783790.yaml +++ b/data/nde/enriched/entries/0581_Q2783790.yaml @@ -617,7 +617,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:13.913914+00:00' source_archive: web/0581/graafsmuseum.nl - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: graafsmuseum.nl - This website is for sale! - graafsmuseum Resources @@ -647,16 +647,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:13.913795+00:00' - - claim_type: org_name - claim_value: graafsmuseum.nl - raw_value: graafsmuseum.nl - source_url: http://www.graafsmuseum.nl - retrieved_on: '2025-11-29T16:43:19.145358+00:00' - xpath: /html/body/div[1]/div/div/h1 - html_file: web/0581/graafsmuseum.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:13.913902+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsmuseum Grave diff --git a/data/nde/enriched/entries/0592_Q2375610.yaml b/data/nde/enriched/entries/0592_Q2375610.yaml index 4adb16a83b..6f8efd56ea 100644 --- a/data/nde/enriched/entries/0592_Q2375610.yaml +++ b/data/nde/enriched/entries/0592_Q2375610.yaml @@ -672,7 +672,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:15.976694+00:00' source_archive: web/0592/speelgoedmuseum.nl - claims_count: 8 + claims_count: 7 claims: - claim_type: org_name claim_value: Speelgoedmuseum Oosterhout @@ -696,16 +696,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:15.975181+00:00' - - claim_type: org_name - claim_value: speelgoedmuseum.nl - raw_value: speelgoedmuseum.nl - source_url: http://www.speelgoedmuseum.nl/ - retrieved_on: '2025-11-29T16:43:30.313429+00:00' - xpath: /html/head/meta[9] - html_file: web/0592/speelgoedmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:15.975800+00:00' - claim_type: email claim_value: info@speelgoedmuseum.nl raw_value: info@speelgoedmuseum.nl @@ -756,6 +746,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:15.976429+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Speelgoedmuseum Oosterhout diff --git a/data/nde/enriched/entries/0594_unknown.yaml b/data/nde/enriched/entries/0594_unknown.yaml index 3f97b5f545..e81c1e9613 100644 --- a/data/nde/enriched/entries/0594_unknown.yaml +++ b/data/nde/enriched/entries/0594_unknown.yaml @@ -166,18 +166,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:16.772424+00:00' source_archive: web/0594/stichtingzhc.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Zuidelijk Historisch Contact - source_url: https://stichtingzhc.nl/ - retrieved_on: '2025-11-29T16:46:09.150368+00:00' - xpath: /html/head/title - html_file: web/0594/stichtingzhc.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:16.772025+00:00' - claim_type: org_name claim_value: Stichting Zuidelijk Historisch Contact raw_value: Stichting Zuidelijk Historisch Contact @@ -188,6 +178,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:16.772170+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Zuidelijk Historisch Contact diff --git a/data/nde/enriched/entries/0595_Q2297235.yaml b/data/nde/enriched/entries/0595_Q2297235.yaml index 0a8c5c5fe0..6fcbf25ab4 100644 --- a/data/nde/enriched/entries/0595_Q2297235.yaml +++ b/data/nde/enriched/entries/0595_Q2297235.yaml @@ -745,18 +745,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:16.858110+00:00' source_archive: web/0595/salha.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Streekarchief Langstraat Heusden Altena - source_url: https://salha.nl/ - retrieved_on: '2025-11-29T16:43:50.195687+00:00' - xpath: /html/head/title - html_file: web/0595/salha.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:16.857499+00:00' - claim_type: description_short claim_value: 'Streekarchief Langstraat Heusden Altena laat als professionele collectiebeheerder een zo groot mogelijk publiek kennis maken met haar fysieke en digitale collecties. @@ -843,6 +833,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:16.858025+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Langstraat Heusden Altena diff --git a/data/nde/enriched/entries/0598_Q3983824.yaml b/data/nde/enriched/entries/0598_Q3983824.yaml index b45603b912..16b7ec6fcb 100644 --- a/data/nde/enriched/entries/0598_Q3983824.yaml +++ b/data/nde/enriched/entries/0598_Q3983824.yaml @@ -1449,18 +1449,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:17.177066+00:00' source_archive: web/0598/textielmuseum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - textielmuseum.nl - TextielMuseum - source_url: http://www.textielmuseum.nl - retrieved_on: '2025-11-29T16:43:53.233054+00:00' - xpath: /html/head/title - html_file: web/0598/textielmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:17.176154+00:00' - claim_type: description_short claim_value: Kom naar het TextielMuseum in Tilburg voor inspirerende tentoonstellingen op het gebied van kunst, design, mode en erfgoed. Ontdek ook het TextielLab, @@ -1525,6 +1515,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:17.176754+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: TextielMuseum diff --git a/data/nde/enriched/entries/0603_Q56461052.yaml b/data/nde/enriched/entries/0603_Q56461052.yaml index bc67f13ace..117c3a3500 100644 --- a/data/nde/enriched/entries/0603_Q56461052.yaml +++ b/data/nde/enriched/entries/0603_Q56461052.yaml @@ -651,18 +651,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:17.940832+00:00' source_archive: web/0603/vsmm.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Valkerij en Sigarenmakerij Museum - source_url: http://www.vsmm.nl - retrieved_on: '2025-11-29T16:44:02.613539+00:00' - xpath: /html/head/title - html_file: web/0603/vsmm.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:17.940360+00:00' - claim_type: description_short claim_value: Beleef een uniek museum met twee bijzondere collecties! Onze gepassioneerde vrijwilligers nemen u mee op een boeiende reis door de wereld van de valkerij @@ -697,6 +687,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:46:17.940646+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Valkerij en Sigarenmakerij Museum diff --git a/data/nde/enriched/entries/0606_Q10896041.yaml b/data/nde/enriched/entries/0606_Q10896041.yaml index 1613024d7c..b6f91de8da 100644 --- a/data/nde/enriched/entries/0606_Q10896041.yaml +++ b/data/nde/enriched/entries/0606_Q10896041.yaml @@ -394,7 +394,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:18.568788+00:00' source_archive: web/0606/vangoghhuis.com - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Vincent van GoghHuis Zundert @@ -449,16 +449,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:18.568513+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.vangoghhuis.com - retrieved_on: '2025-11-29T16:47:06.622674+00:00' - xpath: /html/body/div[3]/div[3]/div[3]/div/div/h1 - html_file: web/0606/vangoghhuis.com/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:18.568646+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Van Goghhuis diff --git a/data/nde/enriched/entries/0610_Q110907548.yaml b/data/nde/enriched/entries/0610_Q110907548.yaml index 026ae9a2dd..4b96a824bd 100644 --- a/data/nde/enriched/entries/0610_Q110907548.yaml +++ b/data/nde/enriched/entries/0610_Q110907548.yaml @@ -548,28 +548,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:19.970036+00:00' source_archive: web/0610/westbrabantsarchief.nl - claims_count: 12 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - West-Brabants Archief - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/head/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969184+00:00' - - claim_type: org_name - claim_value: search - raw_value: search - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/body/header/div/div/div/form/button/svg/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969196+00:00' - claim_type: org_name claim_value: info raw_value: info @@ -600,26 +580,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:19.969230+00:00' - - claim_type: org_name - claim_value: facebook - raw_value: facebook - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[1]/a/svg/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969235+00:00' - - claim_type: org_name - claim_value: instagram - raw_value: instagram - source_url: http://www.westbrabantsarchief.nl - retrieved_on: '2025-11-29T16:45:32.262610+00:00' - xpath: /html/body/footer/div/div[3]/nav[1]/ul/li[3]/a/svg/title - html_file: web/0610/westbrabantsarchief.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:19.969239+00:00' - claim_type: description_short claim_value: West-Brabants Archief raw_value: West-Brabants Archief @@ -670,6 +630,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:19.969804+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: West-Brabants Archief diff --git a/data/nde/enriched/entries/0612_Q110907546.yaml b/data/nde/enriched/entries/0612_Q110907546.yaml index 72ed54dd76..61c229074b 100644 --- a/data/nde/enriched/entries/0612_Q110907546.yaml +++ b/data/nde/enriched/entries/0612_Q110907546.yaml @@ -309,18 +309,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:20.588999+00:00' source_archive: web/0612/heemkundeoploo.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - De Heerlyckheit Plo - source_url: http://www.heemkundeoploo.nl - retrieved_on: '2025-11-29T16:45:44.909939+00:00' - xpath: /html/head/title - html_file: web/0612/heemkundeoploo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:20.587363+00:00' - claim_type: description_short claim_value: 01. 02. 03. Over ons Onze vereniging Heemkundevereniging ‘De Heerlyckheit Plo’ is een vereniging met zo’n 150 enthousiaste leden. We zijn geïnteresseerd @@ -365,6 +355,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:20.588307+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Heemkundevereniging De Heerlyckheit Plo diff --git a/data/nde/enriched/entries/0613_Q18286289.yaml b/data/nde/enriched/entries/0613_Q18286289.yaml index 6f60051054..34722d28ad 100644 --- a/data/nde/enriched/entries/0613_Q18286289.yaml +++ b/data/nde/enriched/entries/0613_Q18286289.yaml @@ -615,18 +615,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:20.667768+00:00' source_archive: web/0613/metropoolregioeindhoven.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Metropool Regio Eindhoven - source_url: https://metropoolregioeindhoven.nl/ - retrieved_on: '2025-11-29T16:45:45.437125+00:00' - xpath: /html/head/title - html_file: web/0613/metropoolregioeindhoven.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:20.667121+00:00' - claim_type: description_short claim_value: Samenwerking is de sleutel tot het succes van de regio. De 21 regiogemeenten hebben daarom hun krachten gebundeld in de Metropoolregio Eindhoven. @@ -649,6 +639,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:20.667338+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Metropoolregio Eindhoven diff --git a/data/nde/enriched/entries/0614_Q2395096.yaml b/data/nde/enriched/entries/0614_Q2395096.yaml index 3ebd607174..857c7e489c 100644 --- a/data/nde/enriched/entries/0614_Q2395096.yaml +++ b/data/nde/enriched/entries/0614_Q2395096.yaml @@ -662,18 +662,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:21.129546+00:00' source_archive: web/0614/oertijdmuseum.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Oertijdmuseum - source_url: http://www.oertijdmuseum.nl/ - retrieved_on: '2025-11-29T23:30:02.349318+00:00' - xpath: /html/head/title - html_file: web/0614/oertijdmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:21.128658+00:00' - claim_type: description_short claim_value: Het Oertijdmuseum is het grootste geologische museum van Nederland. Wandel tussen de dino's of bekijk live hoe fossielen worden uitgeprepareerd. @@ -800,6 +790,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:21.129474+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oertijdmuseum diff --git a/data/nde/enriched/entries/0615_Q20970639.yaml b/data/nde/enriched/entries/0615_Q20970639.yaml index f9ff6bc9b8..e1fcb163ee 100644 --- a/data/nde/enriched/entries/0615_Q20970639.yaml +++ b/data/nde/enriched/entries/0615_Q20970639.yaml @@ -766,18 +766,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:21.210106+00:00' source_archive: web/0615/museumweesp.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Weesp - source_url: https://museumweesp.nl/ - retrieved_on: '2025-11-29T16:45:46.255656+00:00' - xpath: /html/head/title - html_file: web/0615/museumweesp.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:21.209446+00:00' - claim_type: description_short claim_value: Gemeentemuseum Weesp wordt mogelijk gemaakt door de gemeente Weesp; de collectie behoort aan de gemeente en haar inwoners. @@ -860,6 +850,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:21.209999+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Weesp diff --git a/data/nde/enriched/entries/0618_Q4452658.yaml b/data/nde/enriched/entries/0618_Q4452658.yaml index 2b11c8a628..f1f4926b36 100644 --- a/data/nde/enriched/entries/0618_Q4452658.yaml +++ b/data/nde/enriched/entries/0618_Q4452658.yaml @@ -1323,18 +1323,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:21.901387+00:00' source_archive: web/0618/annefrank.org - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Anne Frank House - source_url: https://www.annefrank.org/en/ - retrieved_on: '2025-11-29T16:47:00.573583+00:00' - xpath: /html/head/title - html_file: web/0618/annefrank.org/mirror/www.annefrank.org/en/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:21.900843+00:00' - claim_type: description_short claim_value: The official website of the Anne Frank House, with the most complete and up-to-date information about Anne Frank, her diary, and the Secret Annex. @@ -1399,6 +1389,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:21.901278+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Anne Frank Stichting diff --git a/data/nde/enriched/entries/0633_Q315883.yaml b/data/nde/enriched/entries/0633_Q315883.yaml index 2a9afdb3be..84b95d23f5 100644 --- a/data/nde/enriched/entries/0633_Q315883.yaml +++ b/data/nde/enriched/entries/0633_Q315883.yaml @@ -884,18 +884,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:37.835255+00:00' source_archive: web/0633/embassyofthefreemind.com - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - EMBASSY OF THE FREE MIND - source_url: http://www.embassyofthefreemind.com/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0633/embassyofthefreemind.com/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:37.833607+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -994,3 +984,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:25:37.835025+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0635_Q2919762.yaml b/data/nde/enriched/entries/0635_Q2919762.yaml index 57a251b1aa..b74e99f500 100644 --- a/data/nde/enriched/entries/0635_Q2919762.yaml +++ b/data/nde/enriched/entries/0635_Q2919762.yaml @@ -1012,18 +1012,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:26.143219+00:00' source_archive: web/0635/bijbelsmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Bijbels Museum - source_url: http://www.bijbelsmuseum.nl/ - retrieved_on: '2025-11-29T16:51:01.973096+00:00' - xpath: /html/head/title - html_file: web/0635/bijbelsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:26.141484+00:00' - claim_type: description_short claim_value: Bijbels Museum maakt door NL reizende kunsttentoonstellingen rond bijbelse thema's en verhalen, vaak verbreed naar andere religies. @@ -1106,6 +1096,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:26.142905+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bijbels Museum diff --git a/data/nde/enriched/entries/0638_unknown.yaml b/data/nde/enriched/entries/0638_unknown.yaml index de711ed07c..eb0e94701c 100644 --- a/data/nde/enriched/entries/0638_unknown.yaml +++ b/data/nde/enriched/entries/0638_unknown.yaml @@ -251,7 +251,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:27.228314+00:00' source_archive: web/0638/grotekerknaarden.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Grote Kerk Naarden @@ -283,16 +283,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:27.228175+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://grotekerknaarden.nl/ - retrieved_on: '2025-11-29T16:50:48.909438+00:00' - xpath: /html/body/div/div/div/div/main/h1 - html_file: web/0638/grotekerknaarden.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:27.228216+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Grote Kerk Naarden diff --git a/data/nde/enriched/entries/0643_Q56459403.yaml b/data/nde/enriched/entries/0643_Q56459403.yaml index 30c08ece3f..7fd3bb5c40 100644 --- a/data/nde/enriched/entries/0643_Q56459403.yaml +++ b/data/nde/enriched/entries/0643_Q56459403.yaml @@ -468,18 +468,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:27.906295+00:00' source_archive: web/0643/cultuurmuseumtexel.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Texels erfgoedmuseum - Waelstee - source_url: https://www.cultuurmuseumtexel.nl/ - retrieved_on: '2025-11-29T16:51:00.339708+00:00' - xpath: /html/head/title - html_file: web/0643/cultuurmuseumtexel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:27.905076+00:00' - claim_type: org_name claim_value: Texels erfgoedmuseum - Waelstee raw_value: Texels erfgoedmuseum - Waelstee @@ -520,6 +510,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:27.906056+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Waelstee diff --git a/data/nde/enriched/entries/0644_Q56461228.yaml b/data/nde/enriched/entries/0644_Q56461228.yaml index 4d1683537c..4814340434 100644 --- a/data/nde/enriched/entries/0644_Q56461228.yaml +++ b/data/nde/enriched/entries/0644_Q56461228.yaml @@ -512,18 +512,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:27.987580+00:00' source_archive: web/0644/dewemme.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum de Wemme Zuidwolde - source_url: https://www.dewemme.nl/ - retrieved_on: '2025-11-29T16:50:52.674646+00:00' - xpath: /html/head/title - html_file: web/0644/dewemme.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:27.986679+00:00' - claim_type: org_name claim_value: BTC Art raw_value: BTC Art - Shine @@ -624,6 +614,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:27.987451+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum De Wemme diff --git a/data/nde/enriched/entries/0647_unknown.yaml b/data/nde/enriched/entries/0647_unknown.yaml index 93f3eba9ed..a450ee339d 100644 --- a/data/nde/enriched/entries/0647_unknown.yaml +++ b/data/nde/enriched/entries/0647_unknown.yaml @@ -391,18 +391,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:28.923092+00:00' source_archive: web/0647/dnb.nl - claims_count: 12 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Archief - raw_value: Archief | De Nederlandsche Bank - source_url: https://www.dnb.nl/archief/ - retrieved_on: '2025-11-29T16:51:03.166899+00:00' - xpath: /html/head/title - html_file: web/0647/dnb.nl/mirror/www.dnb.nl/archief/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:28.919724+00:00' - claim_type: description_short claim_value: De Nederlandsche Bank wil dat haar informatie toegankelijk is voor iedereen. Informatie van vandaag en informatie van vroeger. @@ -485,26 +475,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:28.922191+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?url=https://www.dnb.nl/archief/ - raw_value: https://twitter.com/intent/tweet?url=https://www.dnb.nl/archief/ - source_url: https://www.dnb.nl/archief/ - retrieved_on: '2025-11-29T16:51:03.166899+00:00' - xpath: /html/body/main/div[1]/div/div[1]/div[3]/div/a[2] - html_file: web/0647/dnb.nl/mirror/www.dnb.nl/archief/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:28.922195+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://www.dnb.nl/archief/ - raw_value: https://www.facebook.com/sharer.php?u=https://www.dnb.nl/archief/ - source_url: https://www.dnb.nl/archief/ - retrieved_on: '2025-11-29T16:51:03.166899+00:00' - xpath: /html/body/main/div[1]/div/div[1]/div[3]/div/a[3] - html_file: web/0647/dnb.nl/mirror/www.dnb.nl/archief/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:28.922199+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/company/de-nederlandsche-bank raw_value: https://www.linkedin.com/company/de-nederlandsche-bank @@ -515,6 +485,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:28.922214+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Nederlandse Bank Archief diff --git a/data/nde/enriched/entries/0649_Q1282056.yaml b/data/nde/enriched/entries/0649_Q1282056.yaml index 39fc012887..5681ec045c 100644 --- a/data/nde/enriched/entries/0649_Q1282056.yaml +++ b/data/nde/enriched/entries/0649_Q1282056.yaml @@ -850,7 +850,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:29.811202+00:00' source_archive: web/0649/ecomare.nl - claims_count: 11 + claims_count: 9 claims: - claim_type: org_name claim_value: Duik in de zee! Een spetterend dagje uit @@ -934,26 +934,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:29.810673+00:00' - - claim_type: social_facebook - claim_value: https://facebook.com/sharer.php?u= - raw_value: https://facebook.com/sharer.php?u= - source_url: http://www.ecomare.nl/ - retrieved_on: '2025-11-29T16:53:57.242395+00:00' - xpath: /html/body/div[3]/section[4]/footer/div[3]/div[3]/div/div/div/ul/li[1]/a - html_file: web/0649/ecomare.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:29.810715+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url= - raw_value: https://twitter.com/share?url= - source_url: http://www.ecomare.nl/ - retrieved_on: '2025-11-29T16:53:57.242395+00:00' - xpath: /html/body/div[3]/section[4]/footer/div[3]/div[3]/div/div/div/ul/li[2]/a - html_file: web/0649/ecomare.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:29.810726+00:00' - claim_type: org_name claim_value: Ontvang onze nieuwsbrief raw_value: Ontvang onze nieuwsbrief @@ -964,6 +944,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:29.810834+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Ecomare diff --git a/data/nde/enriched/entries/0651_Q110995897.yaml b/data/nde/enriched/entries/0651_Q110995897.yaml index 8fb2b468ab..a42067a383 100644 --- a/data/nde/enriched/entries/0651_Q110995897.yaml +++ b/data/nde/enriched/entries/0651_Q110995897.yaml @@ -584,18 +584,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:30.047693+00:00' source_archive: web/0651/fashionforgood.com - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Fashion for Good - source_url: https://fashionforgood.com/ - retrieved_on: '2025-11-29T16:52:02.534527+00:00' - xpath: /html/head/title - html_file: web/0651/fashionforgood.com/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:30.045611+00:00' - claim_type: description_short claim_value: Fashion for Good unites the fashion ecosystem to transform and build a regenerative industry. By fostering collaboration and facilitating connections, @@ -660,6 +650,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:30.047307+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Fashion for Good diff --git a/data/nde/enriched/entries/0653_Q133734238.yaml b/data/nde/enriched/entries/0653_Q133734238.yaml index e63b0e2dbe..e94dbb1734 100644 --- a/data/nde/enriched/entries/0653_Q133734238.yaml +++ b/data/nde/enriched/entries/0653_Q133734238.yaml @@ -726,7 +726,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:30.646503+00:00' source_archive: web/0653/flessenscheepjesmuseum.nl - claims_count: 5 + claims_count: 4 claims: - claim_type: org_name claim_value: Flessenscheepjesmuseum Enkhuizen @@ -750,16 +750,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:30.645854+00:00' - - claim_type: org_name - claim_value: Flessenscheepjesmuseum.nl - raw_value: Flessenscheepjesmuseum.nl - source_url: http://www.flessenscheepjesmuseum.nl - retrieved_on: '2025-11-29T16:52:11.108066+00:00' - xpath: /html/head/meta[10] - html_file: web/0653/flessenscheepjesmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:30.646047+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Flessenscheepjes-Museum-604211196329145/ raw_value: https://www.facebook.com/Flessenscheepjes-Museum-604211196329145/ @@ -780,6 +770,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:30.646376+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Flessenscheepjesmuseum diff --git a/data/nde/enriched/entries/0656_Q574961.yaml b/data/nde/enriched/entries/0656_Q574961.yaml index c3735dec3c..cf41c2db17 100644 --- a/data/nde/enriched/entries/0656_Q574961.yaml +++ b/data/nde/enriched/entries/0656_Q574961.yaml @@ -2064,18 +2064,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:31.211544+00:00' source_archive: web/0656/franshalsmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.franshalsmuseum.nl/en/ - retrieved_on: '2025-11-29T16:52:13.762843+00:00' - xpath: /html/head/title - html_file: web/0656/franshalsmuseum.nl/mirror/www.franshalsmuseum.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.210566+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/accounts/login/?next=https%3A%2F%2Fwww.instagram.com%2Ffranshalsmuseum%2F&is_from_rle raw_value: https://www.instagram.com/accounts/login/?next=https%3A%2F%2Fwww.instagram.com%2Ffranshalsmuseum%2F&is_from_rle @@ -2116,6 +2106,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:31.211256+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Frans Hals Museum diff --git a/data/nde/enriched/entries/0658_unknown.yaml b/data/nde/enriched/entries/0658_unknown.yaml index 7ecbf2bd98..71bec6bae2 100644 --- a/data/nde/enriched/entries/0658_unknown.yaml +++ b/data/nde/enriched/entries/0658_unknown.yaml @@ -354,18 +354,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:31.520515+00:00' source_archive: web/0658/velsen.nl - claims_count: 13 + claims_count: 12 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Velsen - source_url: https://www.velsen.nl/gemeentearchief-inzage - retrieved_on: '2025-11-29T16:52:15.455376+00:00' - xpath: /html/head/title - html_file: web/0658/velsen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.519653+00:00' - claim_type: org_name claim_value: externe-link-icoon raw_value: externe-link-icoon @@ -486,6 +476,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:31.520342+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Velsen diff --git a/data/nde/enriched/entries/0659_Q9971.yaml b/data/nde/enriched/entries/0659_Q9971.yaml index 833194bd0a..d84bcb8c33 100644 --- a/data/nde/enriched/entries/0659_Q9971.yaml +++ b/data/nde/enriched/entries/0659_Q9971.yaml @@ -1977,18 +1977,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:31.701735+00:00' source_archive: web/0659/waterland.nl - claims_count: 27 + claims_count: 26 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Waterland - source_url: https://www.waterland.nl/ - retrieved_on: '2025-11-29T16:52:14.576977+00:00' - xpath: /html/head/title - html_file: web/0659/waterland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.700922+00:00' - claim_type: org_name claim_value: externe-link-icoon raw_value: externe-link-icoon @@ -2249,6 +2239,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:31.701599+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waterland diff --git a/data/nde/enriched/entries/0660_Q9980.yaml b/data/nde/enriched/entries/0660_Q9980.yaml index 665462cb99..d099432135 100644 --- a/data/nde/enriched/entries/0660_Q9980.yaml +++ b/data/nde/enriched/entries/0660_Q9980.yaml @@ -1990,18 +1990,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:31.923151+00:00' source_archive: web/0660/zandvoort.nl - claims_count: 11 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Zandvoort - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/head/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922434+00:00' - claim_type: org_name claim_value: Gemeente Zandvoort raw_value: Gemeente Zandvoort @@ -2022,36 +2012,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:31.922454+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[1]/a/svg/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922470+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[2]/a/svg/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922475+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://zandvoort.nl/ - retrieved_on: '2025-11-29T16:52:15.206837+00:00' - xpath: /html/body/div[2]/div/footer/div[2]/div/div[3]/ul/li[3]/a/svg/title - html_file: web/0660/zandvoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:31.922479+00:00' - claim_type: description_short claim_value: Dit is de homepage van gemeente Zandvoort raw_value: Dit is de homepage van gemeente Zandvoort @@ -2102,6 +2062,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:31.923004+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zandvoort diff --git a/data/nde/enriched/entries/0661_Q124843656.yaml b/data/nde/enriched/entries/0661_Q124843656.yaml index 63fb0faa4f..4d848ef590 100644 --- a/data/nde/enriched/entries/0661_Q124843656.yaml +++ b/data/nde/enriched/entries/0661_Q124843656.yaml @@ -507,7 +507,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:39.373741+00:00' source_archive: web/0661/weesp.nl - claims_count: 10 + claims_count: 9 claims: - claim_type: org_name claim_value: Stadsgebied Weesp @@ -531,16 +531,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T12:25:39.372895+00:00' - - claim_type: org_name - claim_value: Amsterdam.nl - raw_value: Amsterdam.nl - source_url: http://www.weesp.nl/ - retrieved_on: '' - xpath: /html/head/meta[8] - html_file: web/0661/weesp.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T12:25:39.373129+00:00' - claim_type: phone claim_value: '14020' raw_value: '14020' @@ -611,3 +601,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:39.373541+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0664_Q2574390.yaml b/data/nde/enriched/entries/0664_Q2574390.yaml index 6db6d5b4ca..8b7d941e48 100644 --- a/data/nde/enriched/entries/0664_Q2574390.yaml +++ b/data/nde/enriched/entries/0664_Q2574390.yaml @@ -801,7 +801,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:32.278840+00:00' source_archive: web/0664/hetgrachtenhuis.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Hét museum in Amsterdam over de grachten @@ -825,16 +825,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:32.278136+00:00' - - claim_type: org_name - claim_value: https://grachten.museum/ - raw_value: https://grachten.museum/ - source_url: http://www.hetgrachtenhuis.nl - retrieved_on: '2025-11-29T16:52:16.578623+00:00' - xpath: /html/head/meta[10] - html_file: web/0664/hetgrachtenhuis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:32.278401+00:00' - claim_type: email claim_value: mail@grachten.museum raw_value: mail@grachten.museum @@ -875,6 +865,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:32.278754+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Grachtenmuseum diff --git a/data/nde/enriched/entries/0670_Q1616123.yaml b/data/nde/enriched/entries/0670_Q1616123.yaml index 0091941e83..1e77f27290 100644 --- a/data/nde/enriched/entries/0670_Q1616123.yaml +++ b/data/nde/enriched/entries/0670_Q1616123.yaml @@ -1609,18 +1609,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:33.840918+00:00' source_archive: web/0670/hetscheepvaartmuseum.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Het Scheepvaartmuseum Amsterdam - source_url: https://www.hetscheepvaartmuseum.nl/ - retrieved_on: '2025-11-29T16:54:11.742862+00:00' - xpath: /html/head/title - html_file: web/0670/hetscheepvaartmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:33.840063+00:00' - claim_type: description_short claim_value: In Het Scheepvaartmuseum komt alles boven water. Verken 500 jaar maritieme geschiedenis en hoe deze in verbinding staat met de samenleving van @@ -1716,6 +1706,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:33.840819+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Scheepvaartmuseum diff --git a/data/nde/enriched/entries/0671_Q3049198.yaml b/data/nde/enriched/entries/0671_Q3049198.yaml index ce07087e00..3fcadeb947 100644 --- a/data/nde/enriched/entries/0671_Q3049198.yaml +++ b/data/nde/enriched/entries/0671_Q3049198.yaml @@ -608,18 +608,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:34.089325+00:00' source_archive: web/0671/hetwaalresmuseum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Het Waalres Museum - source_url: http://www.hetwaalresmuseum.nl/ - retrieved_on: '2025-11-29T16:54:14.625679+00:00' - xpath: /html/head/title - html_file: web/0671/hetwaalresmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:34.089099+00:00' - claim_type: email claim_value: hetwaalresmuseum@gmail.com raw_value: hetwaalresmuseum@gmail.com @@ -652,6 +642,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:34.089304+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waalres Museum diff --git a/data/nde/enriched/entries/0672_Q2335767.yaml b/data/nde/enriched/entries/0672_Q2335767.yaml index 3aa0e6f3c9..c54cb9cb31 100644 --- a/data/nde/enriched/entries/0672_Q2335767.yaml +++ b/data/nde/enriched/entries/0672_Q2335767.yaml @@ -544,18 +544,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:34.564670+00:00' source_archive: web/0672/zijpermuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Zijper Museum | Museum en Informatiepunt - source_url: https://www.zijpermuseum.nl/ - retrieved_on: '2025-11-29T16:58:14.787453+00:00' - xpath: /html/head/title - html_file: web/0672/zijpermuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:34.563713+00:00' - claim_type: description_short claim_value: 'Nieuws 1: Impressie lezing Frans Rikhof “Bescherming Rijksmuseumkunst”; Nieuws 2: Gastexpositie van Inge en Lianne. Mijn bezoek Te doen Het museum Archieven @@ -600,6 +590,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:34.564498+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zijper Museum diff --git a/data/nde/enriched/entries/0680_unknown.yaml b/data/nde/enriched/entries/0680_unknown.yaml index eac8ef98fc..8b23d00368 100644 --- a/data/nde/enriched/entries/0680_unknown.yaml +++ b/data/nde/enriched/entries/0680_unknown.yaml @@ -165,18 +165,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:37.928548+00:00' source_archive: web/0680/historischekringdiemen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Vereniging Historische Kring Diemen - source_url: https://historischekringdiemen.nl/ - retrieved_on: '2025-11-29T16:58:34.595297+00:00' - xpath: /html/head/title - html_file: web/0680/historischekringdiemen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:37.927879+00:00' - claim_type: description_short claim_value: De Historische Kring Diemen, of ook wel HKD, houd zicht bezig met de geschiedenis van Diemen in kaart te brengen. Artikelen, foto's en meer @@ -249,6 +239,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:37.928459+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Diemen diff --git a/data/nde/enriched/entries/0686_unknown.yaml b/data/nde/enriched/entries/0686_unknown.yaml index 43a9157981..3b215c9b95 100644 --- a/data/nde/enriched/entries/0686_unknown.yaml +++ b/data/nde/enriched/entries/0686_unknown.yaml @@ -200,18 +200,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:40.046588+00:00' source_archive: web/0686/oudakersloot.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Historische Vereniging Oud-Akersloot - source_url: https://www.oudakersloot.nl/ - retrieved_on: '2025-11-29T16:59:25.000536+00:00' - xpath: /html/head/title - html_file: web/0686/oudakersloot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:40.045401+00:00' - claim_type: description_short claim_value: Historische Vereniging Oud-Akersloot - Historie, schoolfoto's, video's, beeldbank, documenten @@ -298,6 +288,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:40.046258+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging "Oud-Akersloot" diff --git a/data/nde/enriched/entries/0690_unknown.yaml b/data/nde/enriched/entries/0690_unknown.yaml index 615a2e578c..d4c3d6f48f 100644 --- a/data/nde/enriched/entries/0690_unknown.yaml +++ b/data/nde/enriched/entries/0690_unknown.yaml @@ -239,7 +239,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:41.190283+00:00' source_archive: web/0690/oudstedebroec.eu - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Historische vereniging Oud Stede Broec @@ -291,16 +291,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:41.189964+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.oudstedebroec.eu/ - retrieved_on: '2025-11-29T17:01:49.574066+00:00' - xpath: /html/body/div[2]/div/main/div[3]/div/div[1]/div[1]/div/h1 - html_file: web/0690/oudstedebroec.eu/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:41.190066+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische vereniging Oud Stede Broec diff --git a/data/nde/enriched/entries/0691_unknown.yaml b/data/nde/enriched/entries/0691_unknown.yaml index 0fe18c318b..3eca8a05e1 100644 --- a/data/nde/enriched/entries/0691_unknown.yaml +++ b/data/nde/enriched/entries/0691_unknown.yaml @@ -170,7 +170,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:41.603800+00:00' source_archive: web/0691/lijnendoordetijd.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Lijnen door de Tijd @@ -182,16 +182,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:41.603515+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.lijnendoordetijd.nl/ - retrieved_on: '2025-11-29T17:03:51.422260+00:00' - xpath: /html/body/div[5]/div[2]/div/main/article/header/h1 - html_file: web/0691/lijnendoordetijd.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:41.603768+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Lijnen door de Tijd diff --git a/data/nde/enriched/entries/0695_Q4469762.yaml b/data/nde/enriched/entries/0695_Q4469762.yaml index 8b72a99748..dfa72b68b2 100644 --- a/data/nde/enriched/entries/0695_Q4469762.yaml +++ b/data/nde/enriched/entries/0695_Q4469762.yaml @@ -1040,18 +1040,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:40.392951+00:00' source_archive: web/0695/hhnk.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Hoogheemraadschap Hollands Noorderkwartier - source_url: https://www.hhnk.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0695/hhnk.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:40.391340+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -1122,3 +1112,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:25:40.392625+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0702_Q110907392.yaml b/data/nde/enriched/entries/0702_Q110907392.yaml index 1bb7a07636..a72584b8ef 100644 --- a/data/nde/enriched/entries/0702_Q110907392.yaml +++ b/data/nde/enriched/entries/0702_Q110907392.yaml @@ -531,18 +531,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:40.865601+00:00' source_archive: web/0702/janvisser-museum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Jan Visser Museum - source_url: http://www.janvisser-museum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0702/janvisser-museum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:40.863918+00:00' - claim_type: description_short claim_value: Museum Informatie Welkom bij het Jan Visser Museum. Het Jan Visser Museum is oorspronkelijk gericht op het landbouwverleden tot voor de mechanisatie. @@ -611,3 +601,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:25:40.865396+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0704_Q702726.yaml b/data/nde/enriched/entries/0704_Q702726.yaml index d715372e2c..474cefb636 100644 --- a/data/nde/enriched/entries/0704_Q702726.yaml +++ b/data/nde/enriched/entries/0704_Q702726.yaml @@ -1397,7 +1397,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:44.063535+00:00' source_archive: web/0704/jck.nl - claims_count: 7 + claims_count: 4 claims: - claim_type: org_name claim_value: Joods Museum + junior @@ -1421,36 +1421,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:44.062835+00:00' - - claim_type: address - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:03:20.017426+00:00' - xpath: /html/head/script[2] - html_file: web/0704/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:46:44.063111+00:00' - - claim_type: postal_code - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:03:20.017426+00:00' - xpath: /html/head/script[2] - html_file: web/0704/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_postalCode - extraction_timestamp: '2025-12-01T10:46:44.063114+00:00' - - claim_type: city - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:03:20.017426+00:00' - xpath: /html/head/script[2] - html_file: web/0704/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_addressLocality - extraction_timestamp: '2025-12-01T10:46:44.063115+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/joodscultureelkwartier raw_value: https://www.facebook.com/joodscultureelkwartier @@ -1471,6 +1441,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:44.063362+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Joods Museum diff --git a/data/nde/enriched/entries/0707_Q18654836.yaml b/data/nde/enriched/entries/0707_Q18654836.yaml index 06581d8074..600e5a5baf 100644 --- a/data/nde/enriched/entries/0707_Q18654836.yaml +++ b/data/nde/enriched/entries/0707_Q18654836.yaml @@ -475,7 +475,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:44.526216+00:00' source_archive: web/0707/huisvanhilde.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Huis van Hilde, ontdek de archeologie van Noord-Holland @@ -487,26 +487,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:44.525288+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: https://huisvanhilde.nl/ - retrieved_on: '2025-11-29T17:03:30.394129+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/0707/huisvanhilde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:44.525300+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: https://huisvanhilde.nl/ - retrieved_on: '2025-11-29T17:03:30.394129+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/0707/huisvanhilde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:44.525304+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -579,6 +559,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:44.526036+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Huis van Hilde diff --git a/data/nde/enriched/entries/0709_Q17402020.yaml b/data/nde/enriched/entries/0709_Q17402020.yaml index aa7912ac36..64f483ad91 100644 --- a/data/nde/enriched/entries/0709_Q17402020.yaml +++ b/data/nde/enriched/entries/0709_Q17402020.yaml @@ -798,7 +798,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:44.839785+00:00' source_archive: web/0709/defensie.nl - claims_count: 6 + claims_count: 3 claims: - claim_type: org_name claim_value: Defensiemusea @@ -838,36 +838,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:46:44.839373+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&text=Defensiemusea%20%23defensie - raw_value: https://twitter.com/share?url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&text=Defensiemusea%20%23defensie - source_url: http://www.defensie.nl/marinemuseum - retrieved_on: '2025-11-29T17:03:37.241226+00:00' - xpath: /html/body/div/main/div/div[3]/div/div/ul/li[1]/a - html_file: web/0709/defensie.nl/pages/marinemuseum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:44.839688+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea - source_url: http://www.defensie.nl/marinemuseum - retrieved_on: '2025-11-29T17:03:37.241226+00:00' - xpath: /html/body/div/main/div/div[3]/div/div/ul/li[2]/a - html_file: web/0709/defensie.nl/pages/marinemuseum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:44.839697+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&title=Defensiemusea&source=Defensie.nl&summary=De+Stichting+Defensiemusea+is+de+overkoepelende+organisatie+van+3+militaire+musea%3A+het+Nationaal+Militair+Museum%2C+het+Marinemuseum%C2%A0en+het+Mariniersmuseum.+Op+1+januari+2015+is+door+koning+Willem+Alexander+het+predicaat+%27Koninklijk%27+toegekend.+De+stichting+heet+sindsdien+Koninklijke+Stichting+Defensiemusea+%28KSD%29. - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.defensie.nl%2Fonderwerpen%2Fmusea%2Fdefensiemusea&title=Defensiemusea&source=Defensie.nl&summary=De+Stichting+Defensiemusea+is+de+overkoepelende+organisatie+van+3+militaire+musea%3A+het+Nationaal+Militair+Museum%2C+het+Marinemuseum%C2%A0en+het+Mariniersmuseum.+Op+1+januari+2015+is+door+koning+Willem+Alexander+het+predicaat+%27Koninklijk%27+toegekend.+De+stichting+heet+sindsdien+Koninklijke+Stichting+Defensiemusea+%28KSD%29. - source_url: http://www.defensie.nl/marinemuseum - retrieved_on: '2025-11-29T17:03:37.241226+00:00' - xpath: /html/body/div/main/div/div[3]/div/div/ul/li[3]/a - html_file: web/0709/defensie.nl/pages/marinemuseum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:44.839704+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Marinemuseum diff --git a/data/nde/enriched/entries/0711_Q110671441.yaml b/data/nde/enriched/entries/0711_Q110671441.yaml index d0ffef980d..16c388c1de 100644 --- a/data/nde/enriched/entries/0711_Q110671441.yaml +++ b/data/nde/enriched/entries/0711_Q110671441.yaml @@ -549,7 +549,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:45.106955+00:00' source_archive: web/0711/modemuze.nl - claims_count: 2 + claims_count: 1 claims: - claim_type: org_name claim_value: Welkom bij Modemuze @@ -561,16 +561,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:46:45.105676+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://modemuze.nl/ - retrieved_on: '2025-11-29T17:03:42.665364+00:00' - xpath: /html/body/div/header/div/div/div/div/div[2]/nav/ul/li[6]/div/div/div[2]/div[1]/div/div/div/h1 - html_file: web/0711/modemuze.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:45.106618+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: netwerk Modemuze diff --git a/data/nde/enriched/entries/0718_Q18285904.yaml b/data/nde/enriched/entries/0718_Q18285904.yaml index 6e73994c72..a1dffed55d 100644 --- a/data/nde/enriched/entries/0718_Q18285904.yaml +++ b/data/nde/enriched/entries/0718_Q18285904.yaml @@ -530,7 +530,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:46.780337+00:00' source_archive: web/0718/etersheimerbraak.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Etersheimerbraak @@ -572,16 +572,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:46:46.780184+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/share/r/12LUGM1W382/?mibextid=wwXIfr - raw_value: https://www.facebook.com/share/r/12LUGM1W382/?mibextid=wwXIfr - source_url: https://www.etersheimerbraak.nl/#DikTrom - retrieved_on: '2025-11-29T23:38:32.060792+00:00' - xpath: /html/body/div[6]/div[1]/div/div[3]/div/div/div/div[2]/div[2]/a - html_file: web/0718/etersheimerbraak.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:46:46.780266+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Etersheimerbraak/ raw_value: https://www.facebook.com/Etersheimerbraak/ @@ -592,6 +582,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:46.780273+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Schooltje van Dik Trom diff --git a/data/nde/enriched/entries/0721_Q277316.yaml b/data/nde/enriched/entries/0721_Q277316.yaml index 593050b433..f3d442480f 100644 --- a/data/nde/enriched/entries/0721_Q277316.yaml +++ b/data/nde/enriched/entries/0721_Q277316.yaml @@ -1550,18 +1550,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:47.854376+00:00' source_archive: web/0721/rembrandthuis.nl - claims_count: 18 + claims_count: 17 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Rembrandthuis - source_url: https://www.rembrandthuis.nl/nl/ - retrieved_on: '2025-11-29T17:05:29.222154+00:00' - xpath: /html/head/title - html_file: web/0721/rembrandthuis.nl/mirror/www.rembrandthuis.nl/nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:47.851521+00:00' - claim_type: org_name claim_value: Toegankelijkheid gereedschappen raw_value: Toegankelijkheid gereedschappen @@ -1734,6 +1724,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:47.854128+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rembrandthuis diff --git a/data/nde/enriched/entries/0726_Q19827882.yaml b/data/nde/enriched/entries/0726_Q19827882.yaml index 64e0e7f879..4bfa615891 100644 --- a/data/nde/enriched/entries/0726_Q19827882.yaml +++ b/data/nde/enriched/entries/0726_Q19827882.yaml @@ -680,18 +680,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:48.363033+00:00' source_archive: web/0726/kaapskil.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Kaap Skil - source_url: http://www.kaapskil.nl - retrieved_on: '2025-11-29T17:05:39.220270+00:00' - xpath: /html/head/title - html_file: web/0726/kaapskil.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:48.362019+00:00' - claim_type: description_short claim_value: Een uniek museum vol verhalen en avonturen. Bekijk de opgedoken schatten uit scheepswrakken, ontdek de Reede van Texel en stap terug in de tijd in het @@ -756,6 +746,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:48.362867+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Kaap Skil diff --git a/data/nde/enriched/entries/0728_Q493160.yaml b/data/nde/enriched/entries/0728_Q493160.yaml index 731862ac50..b78fba3f5f 100644 --- a/data/nde/enriched/entries/0728_Q493160.yaml +++ b/data/nde/enriched/entries/0728_Q493160.yaml @@ -1244,18 +1244,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:48.661113+00:00' source_archive: web/0728/opsolder.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - keuze pagina - Museum Ons' Lieve Heer op Solder - source_url: https://www.opsolder.nl/ - retrieved_on: '2025-11-29T17:05:42.677403+00:00' - xpath: /html/head/title - html_file: web/0728/opsolder.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:48.660288+00:00' - claim_type: org_name claim_value: Museum Ons' Lieve Heer op Solder raw_value: Museum Ons' Lieve Heer op Solder @@ -1316,6 +1306,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:48.660952+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Ons' Lieve Heer op Solder diff --git a/data/nde/enriched/entries/0733_unknown.yaml b/data/nde/enriched/entries/0733_unknown.yaml index 0e1c7722a6..5a816a8b4d 100644 --- a/data/nde/enriched/entries/0733_unknown.yaml +++ b/data/nde/enriched/entries/0733_unknown.yaml @@ -315,18 +315,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:49.966345+00:00' source_archive: web/0733/tantejaantje.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Tante Jaantje - source_url: https://tantejaantje.nl/ - retrieved_on: '2025-11-29T17:06:26.989513+00:00' - xpath: /html/head/title - html_file: web/0733/tantejaantje.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:49.965571+00:00' - claim_type: org_name claim_value: Tante Jaantje raw_value: Tante Jaantje @@ -357,6 +347,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:49.966159+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museumboerderij Tante Jaantje diff --git a/data/nde/enriched/entries/0746_Q474823.yaml b/data/nde/enriched/entries/0746_Q474823.yaml index 00e94e9b41..9774f064b2 100644 --- a/data/nde/enriched/entries/0746_Q474823.yaml +++ b/data/nde/enriched/entries/0746_Q474823.yaml @@ -1754,18 +1754,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:53.170772+00:00' source_archive: web/0746/niod.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - NIOD - source_url: https://www.niod.nl/en - retrieved_on: '2025-11-29T17:08:14.011446+00:00' - xpath: /html/head/title - html_file: web/0746/niod.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:53.169232+00:00' - claim_type: description_short claim_value: Het NIOD Instituut voor Oorlogs-, Holocaust- en Genocidestudies is een nationaal en internationaal expertisecentrum voor interdisciplinair onderzoek @@ -1784,6 +1774,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:53.169424+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: NIOD Instituut voor Oorlogs-, Holocaust- en Genocidestudies diff --git a/data/nde/enriched/entries/0749_Q126174339.yaml b/data/nde/enriched/entries/0749_Q126174339.yaml index 2cb4ee735e..d85894757f 100644 --- a/data/nde/enriched/entries/0749_Q126174339.yaml +++ b/data/nde/enriched/entries/0749_Q126174339.yaml @@ -407,18 +407,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:53.673343+00:00' source_archive: web/0749/odnzkg.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Omgevingsdienst Noordzeekanaalgebied - source_url: https://www.odnzkg.nl - retrieved_on: '2025-11-29T17:08:23.562527+00:00' - xpath: /html/head/title - html_file: web/0749/odnzkg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:53.671967+00:00' - claim_type: description_short claim_value: Omgevingsdienst Noordzeekanaalgebied Voor 3 provincies en 8 gemeenten voeren we vergunning- en toezichttaken uit bij bedrijven op het gebied van milieu, @@ -517,6 +507,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:53.673151+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Omgevingsdienst Noordzeekanaalgebied diff --git a/data/nde/enriched/entries/0750_unknown.yaml b/data/nde/enriched/entries/0750_unknown.yaml index a1b28c59c5..972097406a 100644 --- a/data/nde/enriched/entries/0750_unknown.yaml +++ b/data/nde/enriched/entries/0750_unknown.yaml @@ -366,18 +366,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:54.132842+00:00' source_archive: web/0750/oorlogsmuseummedemblik.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Oorlogsmuseum Medemblik - source_url: https://www.oorlogsmuseummedemblik.nl/ - retrieved_on: '2025-11-29T17:15:56.834443+00:00' - xpath: /html/head/title - html_file: web/0750/oorlogsmuseummedemblik.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:54.131507+00:00' - claim_type: description_short claim_value: Tickets Prijzen & Openingstijden Arrangementen Onderwijs De combinatie escaperoom achtige opdrachten met praktisch zelf in een invasieboot/truck is @@ -436,6 +426,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:54.132481+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oorlogsmuseum Medemblik diff --git a/data/nde/enriched/entries/0755_Q701.yaml b/data/nde/enriched/entries/0755_Q701.yaml index 554be6d551..4ef40e511b 100644 --- a/data/nde/enriched/entries/0755_Q701.yaml +++ b/data/nde/enriched/entries/0755_Q701.yaml @@ -3790,18 +3790,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:56.018199+00:00' source_archive: web/0755/noord-holland.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.noord-holland.nl/ - retrieved_on: '2025-11-29T17:16:16.631233+00:00' - xpath: /html/head/title - html_file: web/0755/noord-holland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:56.017409+00:00' - claim_type: description_short claim_value: De provincie Noord-Holland werkt aan een duurzame, bereikbare, leefbare en innovatieve provincie. @@ -3874,6 +3864,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:56.018062+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Provincie Noord-Holland diff --git a/data/nde/enriched/entries/0757_Q2189005.yaml b/data/nde/enriched/entries/0757_Q2189005.yaml index b7ce07061b..e71f556361 100644 --- a/data/nde/enriched/entries/0757_Q2189005.yaml +++ b/data/nde/enriched/entries/0757_Q2189005.yaml @@ -633,18 +633,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:56.318625+00:00' source_archive: web/0757/regionaalarchiefalkmaar.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Regionaal Archief Alkmaar - source_url: http://www.regionaalarchiefalkmaar.nl/ - retrieved_on: '2025-11-29T17:16:20.097108+00:00' - xpath: /html/head/title - html_file: web/0757/regionaalarchiefalkmaar.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:56.318097+00:00' - claim_type: description_short claim_value: Regionaal Archief te Alkmaar, intergemeentelijk samenwerkingsverband in Noord-Kennemerland, westelijk West-Friesland en de Kop van Noord-Holland. @@ -713,6 +703,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:56.318497+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regionaal Archief Alkmaar diff --git a/data/nde/enriched/entries/0759_Q190804.yaml b/data/nde/enriched/entries/0759_Q190804.yaml index 94226b4a8b..de3438d02d 100644 --- a/data/nde/enriched/entries/0759_Q190804.yaml +++ b/data/nde/enriched/entries/0759_Q190804.yaml @@ -4233,7 +4233,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:46:56.750160+00:00' source_archive: web/0759/rijksmuseum.nl - claims_count: 8 + claims_count: 7 claims: - claim_type: org_name claim_value: Rijksmuseum, hét museum van Nederland @@ -4259,16 +4259,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:56.749524+00:00' - - claim_type: org_name - claim_value: Rijksmuseum.nl - raw_value: Rijksmuseum.nl - source_url: https://www.rijksmuseum.nl/ - retrieved_on: '2025-11-29T17:16:25.944763+00:00' - xpath: /html/head/meta[8] - html_file: web/0759/rijksmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:46:56.749699+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/rijksmuseum/ raw_value: https://www.instagram.com/rijksmuseum/ @@ -4319,6 +4309,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:46:56.750051+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksmuseum Amsterdam diff --git a/data/nde/enriched/entries/0760_Q113006081.yaml b/data/nde/enriched/entries/0760_Q113006081.yaml index 3e49053f8e..b4b037865d 100644 --- a/data/nde/enriched/entries/0760_Q113006081.yaml +++ b/data/nde/enriched/entries/0760_Q113006081.yaml @@ -852,18 +852,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:56.993052+00:00' source_archive: web/0760/muiderslot.nl - claims_count: 9 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Muiderslot - raw_value: Muiderslot - Muiderslot - source_url: http://www.muiderslot.nl/ - retrieved_on: '2025-11-29T17:16:31.806339+00:00' - xpath: /html/head/title - html_file: web/0760/muiderslot.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:56.991165+00:00' - claim_type: email claim_value: info@muiderslot.nl raw_value: info@muiderslot.nl @@ -934,16 +924,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:56.992545+00:00' - - claim_type: org_name - claim_value: sinterklaas op het Muiderslot - raw_value: sinterklaas op het Muiderslot - source_url: http://www.muiderslot.nl/ - retrieved_on: '2025-11-29T17:16:31.806339+00:00' - xpath: /html/body/div[1]/main/div/div/div[1]/div[1]/div/div[2]/div[2]/div/div/div/div/div/div/div/h1 - html_file: web/0760/muiderslot.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:46:56.992802+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksmuseum Muiderslot diff --git a/data/nde/enriched/entries/0764_Q2170763.yaml b/data/nde/enriched/entries/0764_Q2170763.yaml index 77974ca3f9..3d83162de2 100644 --- a/data/nde/enriched/entries/0764_Q2170763.yaml +++ b/data/nde/enriched/entries/0764_Q2170763.yaml @@ -1576,7 +1576,7 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:44.028921+00:00' source_archive: web/0764/amsterdam.nl - claims_count: 13 + claims_count: 10 claims: - claim_type: org_name claim_value: Welkom bij Stadsarchief Amsterdam @@ -1620,36 +1620,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:25:44.027878+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/share.php?u=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - raw_value: https://www.facebook.com/share.php?u=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - source_url: https://www.amsterdam.nl/stadsarchief - retrieved_on: '' - xpath: /html/body/div[2]/div[5]/div/div/div[2]/div/div[2]/a - html_file: web/0764/amsterdam.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:44.028571+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - raw_value: https://twitter.com/intent/tweet?text=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - source_url: https://www.amsterdam.nl/stadsarchief - retrieved_on: '' - xpath: /html/body/div[2]/div[5]/div/div/div[2]/div/div[3]/a - html_file: web/0764/amsterdam.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:44.028580+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&title=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - raw_value: http://www.linkedin.com/shareArticle?mini=true&title=Welkom%20bij%20Stadsarchief%20Amsterdam&url=https%3a%2f%2fwww.amsterdam.nl%2fstadsarchief - source_url: https://www.amsterdam.nl/stadsarchief - retrieved_on: '' - xpath: /html/body/div[2]/div[5]/div/div/div[2]/div/div[4]/a - html_file: web/0764/amsterdam.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:25:44.028586+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/Stadsarchief raw_value: https://www.facebook.com/Stadsarchief @@ -1710,3 +1680,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:25:44.028707+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0771_Q50038175.yaml b/data/nde/enriched/entries/0771_Q50038175.yaml index bd040e6c8d..63fef458cb 100644 --- a/data/nde/enriched/entries/0771_Q50038175.yaml +++ b/data/nde/enriched/entries/0771_Q50038175.yaml @@ -334,18 +334,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:58.620007+00:00' source_archive: web/0771/gbwhaarlem.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Burgerweeshuis Haarlem - source_url: https://www.gbwhaarlem.nl/ - retrieved_on: '2025-11-29T17:17:32.906372+00:00' - xpath: /html/head/title - html_file: web/0771/gbwhaarlem.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:46:58.619555+00:00' - claim_type: org_name claim_value: Stichting Burgerweeshuis Haarlem raw_value: Stichting Burgerweeshuis Haarlem @@ -356,6 +346,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:46:58.619689+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Gereformeerd of Burgerweeshuis te Haarlem diff --git a/data/nde/enriched/entries/0778_Q702726.yaml b/data/nde/enriched/entries/0778_Q702726.yaml index 6398bc9ea3..b0c411b420 100644 --- a/data/nde/enriched/entries/0778_Q702726.yaml +++ b/data/nde/enriched/entries/0778_Q702726.yaml @@ -1372,7 +1372,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:46:59.437548+00:00' source_archive: web/0778/jck.nl - claims_count: 7 + claims_count: 4 claims: - claim_type: org_name claim_value: Joods Museum + junior @@ -1396,36 +1396,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:46:59.436963+00:00' - - claim_type: address - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:17:47.784542+00:00' - xpath: /html/head/script[2] - html_file: web/0778/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:46:59.437191+00:00' - - claim_type: postal_code - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:17:47.784542+00:00' - xpath: /html/head/script[2] - html_file: web/0778/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_postalCode - extraction_timestamp: '2025-12-01T10:46:59.437193+00:00' - - claim_type: city - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '2025-11-29T17:17:47.784542+00:00' - xpath: /html/head/script[2] - html_file: web/0778/jck.nl/pages/locatie_joods-museum.tmp.html - xpath_match_score: 1.0 - extraction_method: schema_org_addressLocality - extraction_timestamp: '2025-12-01T10:46:59.437195+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/joodscultureelkwartier raw_value: https://www.facebook.com/joodscultureelkwartier @@ -1446,6 +1416,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:46:59.437379+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Joods Museum diff --git a/data/nde/enriched/entries/0790_Q122922125.yaml b/data/nde/enriched/entries/0790_Q122922125.yaml index 8390b5248e..d5be144e37 100644 --- a/data/nde/enriched/entries/0790_Q122922125.yaml +++ b/data/nde/enriched/entries/0790_Q122922125.yaml @@ -493,18 +493,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:03.289456+00:00' source_archive: web/0790/notarielestichting.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting tot Bevordering der Notariële Wetenschap - source_url: https://notarielestichting.nl/ - retrieved_on: '2025-11-29T17:22:30.410880+00:00' - xpath: /html/head/title - html_file: web/0790/notarielestichting.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:03.288434+00:00' - claim_type: org_name claim_value: Stichting tot Bevordering der Notariële Wetenschap raw_value: Stichting tot Bevordering der Notariële Wetenschap @@ -525,6 +515,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:03.289137+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting tot Bevordering der Notariële Wetenschap diff --git a/data/nde/enriched/entries/0792_unknown.yaml b/data/nde/enriched/entries/0792_unknown.yaml index c8eff99089..aaedc77e4a 100644 --- a/data/nde/enriched/entries/0792_unknown.yaml +++ b/data/nde/enriched/entries/0792_unknown.yaml @@ -254,18 +254,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:04.065655+00:00' source_archive: web/0792/traditiekamermld.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Traditiekamer Marineluchtvaartdienst - source_url: https://www.traditiekamermld.nl/ - retrieved_on: '2025-11-29T17:38:03.422459+00:00' - xpath: /html/head/title - html_file: web/0792/traditiekamermld.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:04.064327+00:00' - claim_type: description_short claim_value: Stichting vrienden van de Traditiekamer Marineluchtvaartdienst De Stichting De Stichting Vrienden van de Traditiekamer Marineluchtvaartdienst @@ -306,6 +296,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:47:04.064890+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Traditiekamer Marineluchtvaartdienst diff --git a/data/nde/enriched/entries/0807_Q14856958.yaml b/data/nde/enriched/entries/0807_Q14856958.yaml index d6295220d5..004f91708f 100644 --- a/data/nde/enriched/entries/0807_Q14856958.yaml +++ b/data/nde/enriched/entries/0807_Q14856958.yaml @@ -656,18 +656,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:06.985580+00:00' source_archive: web/0807/weegschaalmuseum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Weegschaalmuseum - source_url: http://www.weegschaalmuseum.nl - retrieved_on: '2025-11-29T17:22:38.912590+00:00' - xpath: /html/head/title - html_file: web/0807/weegschaalmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:06.984745+00:00' - claim_type: description_short claim_value: Welkom In het dagelijks leven staan wij niet stil bij de herkomst van de kilo of de meter. Wat is ijken precies en wat heeft Napoleon hier mee @@ -704,6 +694,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:06.985462+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Weegschaalmuseum diff --git a/data/nde/enriched/entries/0816_Q2335767.yaml b/data/nde/enriched/entries/0816_Q2335767.yaml index be8b1df873..c6bd514678 100644 --- a/data/nde/enriched/entries/0816_Q2335767.yaml +++ b/data/nde/enriched/entries/0816_Q2335767.yaml @@ -540,18 +540,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:09.309364+00:00' source_archive: web/0816/zijpermuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Zijper Museum | Museum en Informatiepunt - source_url: https://www.zijpermuseum.nl/ - retrieved_on: '2025-11-29T17:30:12.465133+00:00' - xpath: /html/head/title - html_file: web/0816/zijpermuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:09.308057+00:00' - claim_type: description_short claim_value: 'Nieuws 1: Impressie lezing Frans Rikhof “Bescherming Rijksmuseumkunst”; Nieuws 2: Gastexpositie van Inge en Lianne. Mijn bezoek Te doen Het museum Archieven @@ -596,6 +586,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:09.309183+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zijper Museum diff --git a/data/nde/enriched/entries/0821_unknown.yaml b/data/nde/enriched/entries/0821_unknown.yaml index f7b0143252..a1dc867699 100644 --- a/data/nde/enriched/entries/0821_unknown.yaml +++ b/data/nde/enriched/entries/0821_unknown.yaml @@ -315,7 +315,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:10.829272+00:00' source_archive: web/0821/theneedforlegacy.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: THE NEED FOR LEGACY @@ -348,16 +348,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:10.829205+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.theneedforlegacy.nl/ - retrieved_on: '2025-11-29T17:28:39.531190+00:00' - xpath: /html/body/div[2]/div/div/article/div[1]/div[1]/h1 - html_file: web/0821/theneedforlegacy.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:10.829234+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: THE NEED FOR LEGACY diff --git a/data/nde/enriched/entries/0840_Q110995895.yaml b/data/nde/enriched/entries/0840_Q110995895.yaml index 09c55f2558..203c5a3c35 100644 --- a/data/nde/enriched/entries/0840_Q110995895.yaml +++ b/data/nde/enriched/entries/0840_Q110995895.yaml @@ -551,18 +551,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:14.969497+00:00' source_archive: web/0840/museum-ommen.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historisch Museum Ommen - source_url: https://www.museum-ommen.nl/ - retrieved_on: '2025-11-29T17:33:31.648353+00:00' - xpath: /html/head/title - html_file: web/0840/museum-ommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:14.967166+00:00' - claim_type: org_name claim_value: Historisch Museum Ommen raw_value: Historisch Museum Ommen @@ -633,6 +623,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:14.968745+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cultuurhistorisch Centrum Ommen diff --git a/data/nde/enriched/entries/0842_Q110891772.yaml b/data/nde/enriched/entries/0842_Q110891772.yaml index 3e2455eda3..1cb107c691 100644 --- a/data/nde/enriched/entries/0842_Q110891772.yaml +++ b/data/nde/enriched/entries/0842_Q110891772.yaml @@ -439,18 +439,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:15.768313+00:00' source_archive: web/0842/ommen.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Ommen - source_url: http://www.ommen.nl/ - retrieved_on: '2025-11-29T23:43:36.620918+00:00' - xpath: /html/head/title - html_file: web/0842/ommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.767681+00:00' - claim_type: description_short claim_value: Officiële website van gemeente Ommen. Hier vindt u alle informatie, nieuwsberichten, bestuur en dienstverlening van gemeente Ommen. @@ -543,6 +533,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:15.768187+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De twintigste eeuw en de gemeente Ommen diff --git a/data/nde/enriched/entries/0843_Q85311353.yaml b/data/nde/enriched/entries/0843_Q85311353.yaml index 6a31cc6773..c40d5edf43 100644 --- a/data/nde/enriched/entries/0843_Q85311353.yaml +++ b/data/nde/enriched/entries/0843_Q85311353.yaml @@ -520,58 +520,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:15.919838+00:00' source_archive: web/0843/deventerverhaal.nl - claims_count: 13 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Deventer Verhaal - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/head/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918854+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[1]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918867+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[2]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918872+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[3]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918876+00:00' - - claim_type: org_name - claim_value: YouTube - raw_value: YouTube - source_url: https://deventerverhaal.nl/ - retrieved_on: '2025-11-29T17:33:34.455085+00:00' - xpath: /html/body/footer/div[2]/div/div/a[4]/svg/title - html_file: web/0843/deventerverhaal.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:15.918880+00:00' - claim_type: email claim_value: annevangeuns@deventerhaal.nl raw_value: annevangeuns@deventerhaal.nl @@ -652,6 +602,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:15.919737+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Deventer Verhaal diff --git a/data/nde/enriched/entries/0847_Q110891825.yaml b/data/nde/enriched/entries/0847_Q110891825.yaml index a1e0fe9b29..bfef9e4a3c 100644 --- a/data/nde/enriched/entries/0847_Q110891825.yaml +++ b/data/nde/enriched/entries/0847_Q110891825.yaml @@ -301,18 +301,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:16.397973+00:00' source_archive: web/0847/enschede.nl - claims_count: 14 + claims_count: 13 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Enschede - source_url: http://www.enschede.nl/ - retrieved_on: '2025-11-29T23:42:18.266943+00:00' - xpath: /html/head/title - html_file: web/0847/enschede.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:16.396722+00:00' - claim_type: org_name claim_value: parkeervergunning icon raw_value: parkeervergunning icon @@ -443,6 +433,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:47:16.397548+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Enschede in WO2 diff --git a/data/nde/enriched/entries/0849_unknown.yaml b/data/nde/enriched/entries/0849_unknown.yaml index 904c89c3cc..792f359987 100644 --- a/data/nde/enriched/entries/0849_unknown.yaml +++ b/data/nde/enriched/entries/0849_unknown.yaml @@ -311,18 +311,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:16.865258+00:00' source_archive: web/0849/erfgoedrijssenholten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://erfgoedrijssenholten.nl/ - retrieved_on: '2025-11-29T17:33:45.143071+00:00' - xpath: /html/head/title - html_file: web/0849/erfgoedrijssenholten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:16.864853+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -375,6 +365,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:16.865185+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Erfgoed Rijssen-Holten diff --git a/data/nde/enriched/entries/0850_Q76885143.yaml b/data/nde/enriched/entries/0850_Q76885143.yaml index 2bb8ce32de..b61fb48719 100644 --- a/data/nde/enriched/entries/0850_Q76885143.yaml +++ b/data/nde/enriched/entries/0850_Q76885143.yaml @@ -484,18 +484,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:16.961961+00:00' source_archive: web/0850/erfgoedcentrumzutphen.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Erfgoedcentrum Zutphen - source_url: https://erfgoedcentrumzutphen.nl/ - retrieved_on: '2025-11-29T17:33:46.006659+00:00' - xpath: /html/head/title - html_file: web/0850/erfgoedcentrumzutphen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:16.961256+00:00' - claim_type: description_short claim_value: 'Het Erfgoedcentrum Zutphen: historisch hart van de regio! Eén plek voor onze vier erfgoedpartners: Archeologie, Monumentenzorg, Musea Zutphen en @@ -610,6 +600,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:16.961902+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Erfgoedcentrum Zutphen diff --git a/data/nde/enriched/entries/0853_Q110891801.yaml b/data/nde/enriched/entries/0853_Q110891801.yaml index dd0954f978..978686ff20 100644 --- a/data/nde/enriched/entries/0853_Q110891801.yaml +++ b/data/nde/enriched/entries/0853_Q110891801.yaml @@ -415,7 +415,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:17.846834+00:00' source_archive: web/0853/musicsupport.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Music Support @@ -495,16 +495,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:17.846473+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.musicsupport.nl/ - retrieved_on: '2025-11-29T23:43:55.558526+00:00' - xpath: /html/body/div[1]/div/section/div/div/div/div[1]/h1 - html_file: web/0853/musicsupport.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:17.846556+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Flip's' Music Heino diff --git a/data/nde/enriched/entries/0856_Q110891796.yaml b/data/nde/enriched/entries/0856_Q110891796.yaml index bbbc73632e..f1969adc25 100644 --- a/data/nde/enriched/entries/0856_Q110891796.yaml +++ b/data/nde/enriched/entries/0856_Q110891796.yaml @@ -441,18 +441,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:18.487733+00:00' source_archive: web/0856/borne.nl - claims_count: 23 + claims_count: 22 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Borne - source_url: https://www.borne.nl/gemeentearchief-borne - retrieved_on: '2025-11-29T17:34:14.799159+00:00' - xpath: /html/head/title - html_file: web/0856/borne.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:18.487122+00:00' - claim_type: org_name claim_value: externe-link-icoon raw_value: externe-link-icoon @@ -673,6 +663,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:18.487614+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Borne diff --git a/data/nde/enriched/entries/0857_Q81181239.yaml b/data/nde/enriched/entries/0857_Q81181239.yaml index 5af3f2b99c..fde83f3631 100644 --- a/data/nde/enriched/entries/0857_Q81181239.yaml +++ b/data/nde/enriched/entries/0857_Q81181239.yaml @@ -449,18 +449,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:18.616848+00:00' source_archive: web/0857/sabinfo.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Sabinfo.nl - raw_value: Sabinfo.nl - Een overzicht van de mooiste links voor jou - source_url: http://www.sabinfo.nl - retrieved_on: '2025-11-29T17:34:16.512228+00:00' - xpath: /html/head/title - html_file: web/0857/sabinfo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:18.615691+00:00' - claim_type: description_short claim_value: Op zoek naar een bron voor informatie? Hier vind je de beste websites. Van technologie tot lifestyle, hier vind je de links die je nodig hebt voor @@ -475,6 +465,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:18.615812+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsarchief Deventer diff --git a/data/nde/enriched/entries/0860_unknown.yaml b/data/nde/enriched/entries/0860_unknown.yaml index 40f4269bc0..f3f0c7d1c4 100644 --- a/data/nde/enriched/entries/0860_unknown.yaml +++ b/data/nde/enriched/entries/0860_unknown.yaml @@ -324,7 +324,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:19.242813+00:00' source_archive: web/0860/olst-wijhe.nl - claims_count: 11 + claims_count: 10 claims: - claim_type: org_name claim_value: Inwoners @@ -428,16 +428,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:19.242685+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.olst-wijhe.nl%2Finwoners&t=Inwoners - raw_value: https://www.facebook.com/sharer.php?u=http%3A%2F%2Fwww.olst-wijhe.nl%2Finwoners&t=Inwoners - source_url: https://www.olst-wijhe.nl/gemeentelijkarchief - retrieved_on: '2025-11-29T17:34:22.099033+00:00' - xpath: /html/body/div[5]/div/div/a[2] - html_file: web/0860/olst-wijhe.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.242689+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Olst-Wijhe diff --git a/data/nde/enriched/entries/0862_Q121224964.yaml b/data/nde/enriched/entries/0862_Q121224964.yaml index f2069f3d59..0b3e9f61ed 100644 --- a/data/nde/enriched/entries/0862_Q121224964.yaml +++ b/data/nde/enriched/entries/0862_Q121224964.yaml @@ -423,18 +423,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:19.576760+00:00' source_archive: web/0862/staphorst.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Staphorst - source_url: https://www.staphorst.nl/gemeentearchief - retrieved_on: '2025-11-29T17:34:24.308444+00:00' - xpath: /html/head/title - html_file: web/0862/staphorst.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:19.575532+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -537,6 +527,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:19.576323+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Staphorst diff --git a/data/nde/enriched/entries/0866_Q121225125.yaml b/data/nde/enriched/entries/0866_Q121225125.yaml index ce46644ddc..4b225d31f8 100644 --- a/data/nde/enriched/entries/0866_Q121225125.yaml +++ b/data/nde/enriched/entries/0866_Q121225125.yaml @@ -429,7 +429,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:19.940502+00:00' source_archive: web/0866/hengelo.nl - claims_count: 11 + claims_count: 8 claims: - claim_type: org_name claim_value: Inwoners @@ -461,38 +461,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:47:19.940085+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://www.hengelo.nl/Inwoners - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://www.hengelo.nl/Inwoners - source_url: https://www.hengelo.nl/ - retrieved_on: '2025-11-29T23:43:56.464319+00:00' - xpath: /html/body/div[2]/div[4]/div/div[1]/a - html_file: web/0866/hengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.940208+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https://www.hengelo.nl/Inwoners - Inwoners - raw_value: https://twitter.com/intent/tweet?text=https://www.hengelo.nl/Inwoners - Inwoners - source_url: https://www.hengelo.nl/ - retrieved_on: '2025-11-29T23:43:56.464319+00:00' - xpath: /html/body/div[2]/div[4]/div/div[3]/a - html_file: web/0866/hengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.940214+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https://www.hengelo.nl/Inwoners&title=Inwoners - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https://www.hengelo.nl/Inwoners&title=Inwoners - source_url: https://www.hengelo.nl/ - retrieved_on: '2025-11-29T23:43:56.464319+00:00' - xpath: /html/body/div[2]/div[4]/div/div[5]/a - html_file: web/0866/hengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:19.940219+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/gemeentehengelo raw_value: https://www.facebook.com/gemeentehengelo @@ -543,6 +511,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:19.940332+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Hengelo diff --git a/data/nde/enriched/entries/0870_Q121224972.yaml b/data/nde/enriched/entries/0870_Q121224972.yaml index 3a2b27b42e..102cfea4f8 100644 --- a/data/nde/enriched/entries/0870_Q121224972.yaml +++ b/data/nde/enriched/entries/0870_Q121224972.yaml @@ -414,18 +414,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:20.558263+00:00' source_archive: web/0870/wierden.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Wierden - source_url: https://www.wierden.nl/gemeentearchief - retrieved_on: '2025-11-29T17:34:38.054608+00:00' - xpath: /html/head/title - html_file: web/0870/wierden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:20.557408+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -498,6 +488,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:20.558049+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Wierden diff --git a/data/nde/enriched/entries/0873_Q110891812.yaml b/data/nde/enriched/entries/0873_Q110891812.yaml index 62563f0bf0..b2268727cb 100644 --- a/data/nde/enriched/entries/0873_Q110891812.yaml +++ b/data/nde/enriched/entries/0873_Q110891812.yaml @@ -343,18 +343,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:21.070573+00:00' source_archive: web/0873/dorpshuislutten.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Dorpshuis Lutten - source_url: https://dorpshuislutten.nl/ - retrieved_on: '2025-11-29T23:44:14.717591+00:00' - xpath: /html/head/title - html_file: web/0873/dorpshuislutten.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:21.069863+00:00' - claim_type: description_short claim_value: Welkom op de website van dorpshuis Lutten. Een multifuncioneel centrum in Lutten. Er zijn zaaltjes te huur en diverse werkgroepen hebben hier hun onderkomen. @@ -407,6 +397,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:21.070520+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gouwe Buurt Lutten diff --git a/data/nde/enriched/entries/0883_Q13726962.yaml b/data/nde/enriched/entries/0883_Q13726962.yaml index b5f864ec23..1bc51ed667 100644 --- a/data/nde/enriched/entries/0883_Q13726962.yaml +++ b/data/nde/enriched/entries/0883_Q13726962.yaml @@ -591,18 +591,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:22.693037+00:00' source_archive: web/0883/museumhengelo.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Hengelo - source_url: http://www.museumhengelo.nl - retrieved_on: '2025-11-29T17:34:45.670564+00:00' - xpath: /html/head/title - html_file: web/0883/museumhengelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:22.692434+00:00' - claim_type: org_name claim_value: Museum Hengelo raw_value: Museum Hengelo @@ -643,6 +633,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:22.692970+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Museum Hengelo diff --git a/data/nde/enriched/entries/0890_Q110891816.yaml b/data/nde/enriched/entries/0890_Q110891816.yaml index 2ad039157e..552cabe40f 100644 --- a/data/nde/enriched/entries/0890_Q110891816.yaml +++ b/data/nde/enriched/entries/0890_Q110891816.yaml @@ -420,18 +420,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:24.323168+00:00' source_archive: web/0890/historischekringhaaksbergen.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historische Kring Haaksbergen - source_url: http://www.historischekringhaaksbergen.nl/ - retrieved_on: '2025-11-29T23:50:41.734829+00:00' - xpath: /html/head/title - html_file: web/0890/historischekringhaaksbergen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:24.321138+00:00' - claim_type: description_short claim_value: Welkom bij de Historische Kring Haaksbergen. Wij organiseren regelmatig leuke en leerzame activiteiten. Benieuwd wat er binnenkort te doen is? @@ -484,6 +474,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:24.322782+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Haaksbergen diff --git a/data/nde/enriched/entries/0893_unknown.yaml b/data/nde/enriched/entries/0893_unknown.yaml index 07dfbd3286..e09d775274 100644 --- a/data/nde/enriched/entries/0893_unknown.yaml +++ b/data/nde/enriched/entries/0893_unknown.yaml @@ -313,19 +313,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:25.523933+00:00' source_archive: web/0893/weblog.oudommen.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: OudOmmen.nl - raw_value: OudOmmen.nl | Webarchief voor de gemeente Ommen, de plek waar de geschiedenis - van de regio Ommen samen komt. - source_url: https://weblog.oudommen.nl - retrieved_on: '2025-11-29T17:38:16.781043+00:00' - xpath: /html/head/title - html_file: web/0893/weblog.oudommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:25.522914+00:00' - claim_type: description_short claim_value: Webarchief voor de gemeente Ommen, de plek waar de geschiedenis van de regio Ommen samen komt. @@ -368,6 +357,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:25.523735+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum-Ommen diff --git a/data/nde/enriched/entries/0900_Q110891747.yaml b/data/nde/enriched/entries/0900_Q110891747.yaml index b4dea68b64..4c9e95e642 100644 --- a/data/nde/enriched/entries/0900_Q110891747.yaml +++ b/data/nde/enriched/entries/0900_Q110891747.yaml @@ -551,18 +551,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:27.573292+00:00' source_archive: web/0900/olstererfgoed.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME | Olstererfgoed - source_url: http://www.olstererfgoed.nl/ - retrieved_on: '2025-11-29T23:51:12.240979+00:00' - xpath: /html/head/title - html_file: web/0900/olstererfgoed.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:27.572558+00:00' - claim_type: org_name claim_value: Olstererfgoed raw_value: Olstererfgoed @@ -587,6 +577,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:27.573025+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging 't Olster Erfgoed diff --git a/data/nde/enriched/entries/0912_Q110891756.yaml b/data/nde/enriched/entries/0912_Q110891756.yaml index 33538f2176..e6977d5587 100644 --- a/data/nde/enriched/entries/0912_Q110891756.yaml +++ b/data/nde/enriched/entries/0912_Q110891756.yaml @@ -374,7 +374,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:30.479723+00:00' source_archive: web/0912/historiezwartsluis.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Historische Vereniging Zwartsluis @@ -398,16 +398,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:30.479366+00:00' - - claim_type: org_name - claim_value: HistorischeVerenigingZwartsluis.nl - raw_value: HistorischeVerenigingZwartsluis.nl - source_url: https://historiezwartsluis.nl/ - retrieved_on: '2025-11-29T23:51:24.954993+00:00' - xpath: /html/head/meta[10] - html_file: web/0912/historiezwartsluis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:47:30.479482+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/historiezwartsluis/ raw_value: https://www.facebook.com/historiezwartsluis/ @@ -418,6 +408,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:30.479634+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging Zwartsluis diff --git a/data/nde/enriched/entries/0914_unknown.yaml b/data/nde/enriched/entries/0914_unknown.yaml index 49e814e1ea..59701c583e 100644 --- a/data/nde/enriched/entries/0914_unknown.yaml +++ b/data/nde/enriched/entries/0914_unknown.yaml @@ -331,18 +331,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:31.029924+00:00' source_archive: web/0914/proxy.archieven.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Archieven.nl - raw_value: Archieven.nl - 0964 Huisarchief Weldam, 1438-1920 (Westfries Archief) - source_url: https://proxy.archieven.nl/0/F8BA43C796AD4BEC97A456978826D3AD - retrieved_on: '2025-11-29T17:38:07.432449+00:00' - xpath: /html/head/title - html_file: web/0914/proxy.archieven.nl/pages/0_F8BA43C796AD4BEC97A456978826D3AD.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:31.028828+00:00' - claim_type: org_name claim_value: organisatie_link-svg raw_value: organisatie_link-svg @@ -385,6 +375,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:31.029781+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Weldam diff --git a/data/nde/enriched/entries/0916_Q110891804.yaml b/data/nde/enriched/entries/0916_Q110891804.yaml index c634ad6245..6953c5cf13 100644 --- a/data/nde/enriched/entries/0916_Q110891804.yaml +++ b/data/nde/enriched/entries/0916_Q110891804.yaml @@ -390,18 +390,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:31.455795+00:00' source_archive: web/0916/kempermeubelproductie.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Kemper meubelproductie - source_url: http://www.kempermeubelproductie.nl/ - retrieved_on: '2025-11-29T23:51:57.623489+00:00' - xpath: /html/head/title - html_file: web/0916/kempermeubelproductie.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:31.454407+00:00' - claim_type: org_name claim_value: Kemper meubelproductie raw_value: Kemper meubelproductie @@ -412,6 +402,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:47:31.454925+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Kemper Alferink Collectie diff --git a/data/nde/enriched/entries/0919_unknown.yaml b/data/nde/enriched/entries/0919_unknown.yaml index f29c605615..50f8f6103a 100644 --- a/data/nde/enriched/entries/0919_unknown.yaml +++ b/data/nde/enriched/entries/0919_unknown.yaml @@ -305,7 +305,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:32.562526+00:00' source_archive: web/0919/landgoedereninoverijssel.nl - claims_count: 5 + claims_count: 3 claims: - claim_type: org_name claim_value: Voorpagina @@ -329,26 +329,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:32.562139+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=http://www.landgoedereninoverijssel.nl - raw_value: http://www.facebook.com/sharer.php?u=http://www.landgoedereninoverijssel.nl - source_url: https://www.landgoedereninoverijssel.nl - retrieved_on: '2025-11-29T17:39:03.019799+00:00' - xpath: /html/body/div/header/div/span[2]/a[1] - html_file: web/0919/landgoedereninoverijssel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:32.562398+00:00' - - claim_type: social_twitter - claim_value: http://twitter.com/share?text=Landgoederen%20in%20Overijssel&url=http://www.landgoedereninoverijssel.nl - raw_value: http://twitter.com/share?text=Landgoederen%20in%20Overijssel&url=http://www.landgoedereninoverijssel.nl - source_url: https://www.landgoedereninoverijssel.nl - retrieved_on: '2025-11-29T17:39:03.019799+00:00' - xpath: /html/body/div/header/div/span[2]/a[2] - html_file: web/0919/landgoedereninoverijssel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:32.562402+00:00' - claim_type: org_name claim_value: De kroonjuwelen van de provincie Overijssel in het zonnetje raw_value: De kroonjuwelen van de provincie Overijssel in het zonnetje @@ -359,6 +339,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:32.562476+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Landgoederen in Overijssel diff --git a/data/nde/enriched/entries/0921_Q134993513.yaml b/data/nde/enriched/entries/0921_Q134993513.yaml index e7dd9a3757..e18fc154b7 100644 --- a/data/nde/enriched/entries/0921_Q134993513.yaml +++ b/data/nde/enriched/entries/0921_Q134993513.yaml @@ -391,18 +391,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:33.344821+00:00' source_archive: web/0921/veerman.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Machinefabriek Veerman - source_url: https://www.veerman.nl/ - retrieved_on: '2025-11-29T23:52:04.759447+00:00' - xpath: /html/head/title[1] - html_file: web/0921/veerman.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:33.344205+00:00' - claim_type: org_name claim_value: Home | Machinefabriek Veerman raw_value: "Home | Machinefabriek Veerman\n \n \n \ @@ -474,6 +464,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:33.344741+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Machinefabriek G.R. Veerman B.V diff --git a/data/nde/enriched/entries/0925_Q125421055.yaml b/data/nde/enriched/entries/0925_Q125421055.yaml index 26e95e6be9..011cc1192e 100644 --- a/data/nde/enriched/entries/0925_Q125421055.yaml +++ b/data/nde/enriched/entries/0925_Q125421055.yaml @@ -543,18 +543,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:34.585478+00:00' source_archive: web/0925/anno.nl - claims_count: 9 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - ANNO Stadsmuseum Zwolle - source_url: https://anno.nl/ - retrieved_on: '2025-11-29T17:39:22.323146+00:00' - xpath: /html/head/title - html_file: web/0925/anno.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:34.584948+00:00' - claim_type: description_short claim_value: ANNO vertelt de verhalen van Zwolle. Het is een unieke plek in Nederland waar museum, archeologie, bouwhistorie, monumenten en archief samen onder één @@ -625,26 +615,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:34.585355+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fanno.nl%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fanno.nl%2F - source_url: https://anno.nl/ - retrieved_on: '2025-11-29T17:39:22.323146+00:00' - xpath: /html/body/section[2]/div/section[2]/div/div/div[3]/div/div/div[2]/a[1] - html_file: web/0925/anno.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:34.585364+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fanno.nl%2F&title=ANNO+Stadsmuseum - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fanno.nl%2F&title=ANNO+Stadsmuseum - source_url: https://anno.nl/ - retrieved_on: '2025-11-29T17:39:22.323146+00:00' - xpath: /html/body/section[2]/div/section[2]/div/div/div[3]/div/div/div[2]/a[2] - html_file: web/0925/anno.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:34.585370+00:00' - claim_type: org_name claim_value: ANNO Stadsmuseum raw_value: ANNO Stadsmuseum @@ -655,6 +625,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:34.585418+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: ANNO Stadsmuseum Zwolle diff --git a/data/nde/enriched/entries/0934_Q110891759.yaml b/data/nde/enriched/entries/0934_Q110891759.yaml index ae645719ed..10c85a27b3 100644 --- a/data/nde/enriched/entries/0934_Q110891759.yaml +++ b/data/nde/enriched/entries/0934_Q110891759.yaml @@ -294,18 +294,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:37.224668+00:00' source_archive: web/0934/excelsior-westenholte.nl - claims_count: 12 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Muziekvereniging Excelsior Westenholte - source_url: http://excelsior-westenholte.nl/ - retrieved_on: '2025-11-29T23:59:44.246963+00:00' - xpath: /html/head/title - html_file: web/0934/excelsior-westenholte.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:37.224109+00:00' - claim_type: org_name claim_value: Muziekvereniging Excelsior Westenholte raw_value: Muziekvereniging Excelsior Westenholte @@ -416,6 +406,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:37.224590+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Muziekvereniging Excelsior Westenholte diff --git a/data/nde/enriched/entries/0935_Q2755329.yaml b/data/nde/enriched/entries/0935_Q2755329.yaml index 8ba7203c86..bb1062f3d6 100644 --- a/data/nde/enriched/entries/0935_Q2755329.yaml +++ b/data/nde/enriched/entries/0935_Q2755329.yaml @@ -570,18 +570,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:37.611822+00:00' source_archive: web/0935/tinnenfigurenmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Tinnen Figuren Museum - source_url: http://www.tinnenfigurenmuseum.nl - retrieved_on: '2025-11-29T17:39:44.688497+00:00' - xpath: /html/head/title - html_file: web/0935/tinnenfigurenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:37.610690+00:00' - claim_type: org_name claim_value: Nationaal Tinnen Figuren Museum raw_value: Nationaal Tinnen Figuren Museum @@ -622,6 +612,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:37.611610+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Tinnen Figuren Museum diff --git a/data/nde/enriched/entries/0939_Q56460838.yaml b/data/nde/enriched/entries/0939_Q56460838.yaml index 6c69998bb1..edc3fd3f59 100644 --- a/data/nde/enriched/entries/0939_Q56460838.yaml +++ b/data/nde/enriched/entries/0939_Q56460838.yaml @@ -607,18 +607,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:38.245281+00:00' source_archive: web/0939/openluchtmuseumootmarsum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: HOME - raw_value: HOME - Openlucht Museum Ootmarsum - source_url: http://www.openluchtmuseumootmarsum.nl - retrieved_on: '2025-11-29T17:39:22.567766+00:00' - xpath: /html/head/title - html_file: web/0939/openluchtmuseumootmarsum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:38.244897+00:00' - claim_type: email claim_value: info@openluchtmuseumootmarsum.nl raw_value: info@openluchtmuseumootmarsum.nl @@ -649,6 +639,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:38.245204+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Openluchtmuseum Ootmarsum diff --git a/data/nde/enriched/entries/0943_unknown.yaml b/data/nde/enriched/entries/0943_unknown.yaml index 3ee63d1a1f..155cf5ca55 100644 --- a/data/nde/enriched/entries/0943_unknown.yaml +++ b/data/nde/enriched/entries/0943_unknown.yaml @@ -254,7 +254,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:39.132035+00:00' source_archive: web/0943/politie.nl - claims_count: 14 + claims_count: 13 claims: - claim_type: org_name claim_value: Zwolle-Koggelaan @@ -336,16 +336,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:39.131914+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.politie.nl%2Fmijn-buurt%2Fpolitiebureaus%2F02%2Fzwolle-koggelaan.html - raw_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.politie.nl%2Fmijn-buurt%2Fpolitiebureaus%2F02%2Fzwolle-koggelaan.html - source_url: https://www.politie.nl/mijn-buurt/politiebureaus/02/zwolle-koggelaan.html - retrieved_on: '2025-11-29T17:39:46.372511+00:00' - xpath: /html/body/div[3]/main/section/div/ul/li[2]/a - html_file: web/0943/politie.nl/pages/mijn-buurt_politiebureaus_02_zwolle-koggelaan.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:39.131919+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/politie/ raw_value: https://www.facebook.com/politie/ @@ -396,6 +386,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:39.131948+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Politie IJsselland diff --git a/data/nde/enriched/entries/0948_Q56460883.yaml b/data/nde/enriched/entries/0948_Q56460883.yaml index c8006a8319..89e0bcd222 100644 --- a/data/nde/enriched/entries/0948_Q56460883.yaml +++ b/data/nde/enriched/entries/0948_Q56460883.yaml @@ -594,18 +594,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:40.749367+00:00' source_archive: web/0948/rijssensmuseum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Rijssens Museum - source_url: http://www.rijssensmuseum.nl - retrieved_on: '2025-11-29T17:41:51.197216+00:00' - xpath: /html/head/title - html_file: web/0948/rijssensmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:40.748254+00:00' - claim_type: description_short claim_value: Wij heten u van harte welkom op de website van het Rijssens Museum. Het Rijssens Museum is een stadsmuseum. Het toont de opmerkelijke en rijke geschiedenis @@ -662,6 +652,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:40.749237+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijssens Museum diff --git a/data/nde/enriched/entries/0949_Q56461263.yaml b/data/nde/enriched/entries/0949_Q56461263.yaml index d5ba8b5055..1db4b9fb3c 100644 --- a/data/nde/enriched/entries/0949_Q56461263.yaml +++ b/data/nde/enriched/entries/0949_Q56461263.yaml @@ -572,7 +572,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:41.149643+00:00' source_archive: web/0949/schoonewelle.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Museum voor Natuur, Ambacht en Exposities @@ -598,16 +598,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:41.148802+00:00' - - claim_type: org_name - claim_value: schoonewelle.nl - raw_value: schoonewelle.nl - source_url: http://www.schoonewelle.nl - retrieved_on: '2025-11-29T17:40:19.726937+00:00' - xpath: /html/head/meta[13] - html_file: web/0949/schoonewelle.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:47:41.148929+00:00' - claim_type: email claim_value: schoonewelle@schoonewelle.nl raw_value: schoonewelle@schoonewelle.nl @@ -638,6 +628,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:41.149570+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Schoonewelle Museum voor Natuur en Ambacht diff --git a/data/nde/enriched/entries/0955_Q17611858.yaml b/data/nde/enriched/entries/0955_Q17611858.yaml index 176636fba4..aeaf4bbe8e 100644 --- a/data/nde/enriched/entries/0955_Q17611858.yaml +++ b/data/nde/enriched/entries/0955_Q17611858.yaml @@ -573,7 +573,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:42.673956+00:00' source_archive: web/0955/stadsmuseumalmelo.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Stedelijk Museum Almelo @@ -637,16 +637,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:42.673543+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.stadsmuseumalmelo.nl - retrieved_on: '2025-11-29T17:40:23.068502+00:00' - xpath: /html/body/main/div[1]/h1 - html_file: web/0955/stadsmuseumalmelo.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:42.673610+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stedelijk Museum Almelo diff --git a/data/nde/enriched/entries/0960_unknown.yaml b/data/nde/enriched/entries/0960_unknown.yaml index 7fcf9a797f..b0481a4d36 100644 --- a/data/nde/enriched/entries/0960_unknown.yaml +++ b/data/nde/enriched/entries/0960_unknown.yaml @@ -264,18 +264,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:43.962831+00:00' source_archive: web/0960/ijssellinie.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.ijssellinie.nl/ - retrieved_on: '2025-11-29T17:40:49.079494+00:00' - xpath: /html/head/title - html_file: web/0960/ijssellinie.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:43.961788+00:00' - claim_type: description_short claim_value:

 

raw_value:

 

@@ -356,6 +346,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:43.962679+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting De IJssellinie diff --git a/data/nde/enriched/entries/0961_Q110891767.yaml b/data/nde/enriched/entries/0961_Q110891767.yaml index c1263bf8d9..c48e3e0582 100644 --- a/data/nde/enriched/entries/0961_Q110891767.yaml +++ b/data/nde/enriched/entries/0961_Q110891767.yaml @@ -413,18 +413,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:44.363630+00:00' source_archive: web/0961/dodenakkers.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Dodenakkers.nl - source_url: http://www.dodenakkers.nl/ - retrieved_on: '2025-11-29T23:59:56.551640+00:00' - xpath: /html/head/title - html_file: web/0961/dodenakkers.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:44.362640+00:00' - claim_type: description_short claim_value: Kenniscentrum voor funerair erfgoed in Nederland en wereldwijd. Begraafplaatsen, kerkhoven en grafmonumenten. @@ -477,6 +467,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:44.363528+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Dodenakkers diff --git a/data/nde/enriched/entries/0964_Q110891795.yaml b/data/nde/enriched/entries/0964_Q110891795.yaml index 929137f41e..11055e1984 100644 --- a/data/nde/enriched/entries/0964_Q110891795.yaml +++ b/data/nde/enriched/entries/0964_Q110891795.yaml @@ -415,18 +415,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:44.937092+00:00' source_archive: web/0964/nlroutes.org - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: NLmapNew.com - raw_value: NLmapNew.com - Find Trusted Local Businesses Worldwide - source_url: https://nlroutes.org/details/stichting-heemkunde-markelo-ChIJxTQ - retrieved_on: '2025-11-29T23:56:51.810973+00:00' - xpath: /html/head/title - html_file: web/0964/nlroutes.org/pages/details_stichting-heemkunde-markelo-ChIJxTQ.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:44.936920+00:00' - claim_type: description_short claim_value: Discover and connect with verified local businesses worldwide. Find trusted service providers, read reviews, and get contact information for businesses @@ -441,6 +431,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:44.936967+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Heemkunde Markelo diff --git a/data/nde/enriched/entries/0969_Q110891746.yaml b/data/nde/enriched/entries/0969_Q110891746.yaml index cd9ba196c5..8254e2bca9 100644 --- a/data/nde/enriched/entries/0969_Q110891746.yaml +++ b/data/nde/enriched/entries/0969_Q110891746.yaml @@ -394,18 +394,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:45.430107+00:00' source_archive: web/0969/nlmappoint.org - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: NLmapNew.com - raw_value: NLmapNew.com - Find Trusted Local Businesses Worldwide - source_url: https://nlmappoint.org/details/stichting-historische-projecten-ChIJ-4E - retrieved_on: '2025-11-29T23:56:55.872283+00:00' - xpath: /html/head/title - html_file: web/0969/nlmappoint.org/pages/details_stichting-historische-projecten-ChIJ-4E.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:45.429926+00:00' - claim_type: description_short claim_value: Discover and connect with verified local businesses worldwide. Find trusted service providers, read reviews, and get contact information for businesses @@ -420,6 +410,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:45.429976+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Historische Projecten diff --git a/data/nde/enriched/entries/0971_Q110891750.yaml b/data/nde/enriched/entries/0971_Q110891750.yaml index 915e9974f9..4d163c0060 100644 --- a/data/nde/enriched/entries/0971_Q110891750.yaml +++ b/data/nde/enriched/entries/0971_Q110891750.yaml @@ -494,18 +494,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:46.277459+00:00' source_archive: web/0971/overijsselacademie.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Overijssel Academie - source_url: https://overijsselacademie.nl/ - retrieved_on: '2025-11-29T17:41:13.780896+00:00' - xpath: /html/head/title - html_file: web/0971/overijsselacademie.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:46.276725+00:00' - claim_type: org_name claim_value: Overijssel Academie raw_value: Overijssel Academie @@ -586,6 +576,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:46.277382+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: IJsselacademie diff --git a/data/nde/enriched/entries/0973_Q56461027.yaml b/data/nde/enriched/entries/0973_Q56461027.yaml index a575fcb0c4..7521ca6f7b 100644 --- a/data/nde/enriched/entries/0973_Q56461027.yaml +++ b/data/nde/enriched/entries/0973_Q56461027.yaml @@ -616,18 +616,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:46.735688+00:00' source_archive: web/0973/hildokrop.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Hildo Krop Museum - source_url: http://hildokrop.nl/ - retrieved_on: '2025-11-29T17:41:15.688374+00:00' - xpath: /html/head/title - html_file: web/0973/hildokrop.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:46.735197+00:00' - claim_type: org_name claim_value: Hildo Krop Museum - Museum uit Steenwijk raw_value: Hildo Krop Museum - Museum uit Steenwijk @@ -662,6 +652,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:46.735625+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Collectie Krop diff --git a/data/nde/enriched/entries/0976_unknown.yaml b/data/nde/enriched/entries/0976_unknown.yaml index d92cfd5913..ac604330ac 100644 --- a/data/nde/enriched/entries/0976_unknown.yaml +++ b/data/nde/enriched/entries/0976_unknown.yaml @@ -226,7 +226,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:47.271870+00:00' source_archive: web/0976/stichtingmarkehaarle.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Stichting Marke Haarle @@ -288,16 +288,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:47.271787+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.stichtingmarkehaarle.nl/ - retrieved_on: '2025-11-29T17:41:55.227050+00:00' - xpath: /html/body/div/div[2]/div/div[2]/div[1]/h1 - html_file: web/0976/stichtingmarkehaarle.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:47.271824+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Marke Haarle diff --git a/data/nde/enriched/entries/0978_Q110891779.yaml b/data/nde/enriched/entries/0978_Q110891779.yaml index 483fa6a149..ec049d5855 100644 --- a/data/nde/enriched/entries/0978_Q110891779.yaml +++ b/data/nde/enriched/entries/0978_Q110891779.yaml @@ -378,19 +378,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:47.403162+00:00' source_archive: web/0978/oudommen.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: OudOmmen.nl - raw_value: OudOmmen.nl | Webarchief voor de gemeente Ommen, de plek waar de geschiedenis - van de regio Ommen samen komt. - source_url: https://www.oudommen.nl/ - retrieved_on: '2025-11-29T23:57:58.346637+00:00' - xpath: /html/head/title - html_file: web/0978/oudommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:47.401801+00:00' - claim_type: description_short claim_value: Webarchief voor de gemeente Ommen, de plek waar de geschiedenis van de regio Ommen samen komt. @@ -433,6 +422,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:47.402961+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting OudOmmen.nl diff --git a/data/nde/enriched/entries/0983_Q98961290.yaml b/data/nde/enriched/entries/0983_Q98961290.yaml index ccae535d28..731240c817 100644 --- a/data/nde/enriched/entries/0983_Q98961290.yaml +++ b/data/nde/enriched/entries/0983_Q98961290.yaml @@ -518,18 +518,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:48.051213+00:00' source_archive: web/0983/walburgiskerk.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Walburgiskerk - source_url: http://www.walburgiskerk.nl/ - retrieved_on: '2025-11-29T17:41:48.074033+00:00' - xpath: /html/head/title - html_file: web/0983/walburgiskerk.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:48.050694+00:00' - claim_type: description_short claim_value: De kapittelkerk stamt uit de 13e eeuw en geldt als één van de 10 grootste en mooiste kerken in Nederland. Zij is gebouwd op restanten van een @@ -564,6 +554,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:47:48.050920+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Walburgiskerk diff --git a/data/nde/enriched/entries/0990_Q110891821.yaml b/data/nde/enriched/entries/0990_Q110891821.yaml index ad276c3f8d..f1aec44546 100644 --- a/data/nde/enriched/entries/0990_Q110891821.yaml +++ b/data/nde/enriched/entries/0990_Q110891821.yaml @@ -305,18 +305,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:50.046328+00:00' source_archive: web/0990/vanhethooiland.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Zorgboerderij Van 't Hooiland - source_url: http://www.vanhethooiland.nl/ - retrieved_on: '2025-11-29T23:59:46.543828+00:00' - xpath: /html/head/title - html_file: web/0990/vanhethooiland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:50.045940+00:00' - claim_type: description_short claim_value: Zorgboerderij Van 't Hooiland. Kleinschalig in grootte, groot in mogelijkheden. Onze begeleiding is persoonlijk, betrokken, liefdevol en deskundig. @@ -359,6 +349,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: schema_org_description extraction_timestamp: '2025-12-01T10:47:50.046140+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Van 't Hooiland diff --git a/data/nde/enriched/entries/0995_Q110891786.yaml b/data/nde/enriched/entries/0995_Q110891786.yaml index e3d3a9e65f..4136686df9 100644 --- a/data/nde/enriched/entries/0995_Q110891786.yaml +++ b/data/nde/enriched/entries/0995_Q110891786.yaml @@ -416,18 +416,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:50.957784+00:00' source_archive: web/0995/heemkundeweerselo.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Vereniging Heemkunde oalde gemeente Weerselo - source_url: http://www.heemkundeweerselo.nl/ - retrieved_on: '2025-11-30T00:00:39.219189+00:00' - xpath: /html/head/title - html_file: web/0995/heemkundeweerselo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:50.956875+00:00' - claim_type: description_short claim_value: 'Boek “Oorlogsverhalen uit de oude gemeente Weerselo”, De bestelde boeken kunnen worden opgehaald op: Zaterdagmiddag 29 november van 13.00 tot @@ -476,6 +466,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:50.957648+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Vereniging Heemkunde voormalige Gemeente Weerselo diff --git a/data/nde/enriched/entries/0997_unknown.yaml b/data/nde/enriched/entries/0997_unknown.yaml index b278665b1d..072d461e72 100644 --- a/data/nde/enriched/entries/0997_unknown.yaml +++ b/data/nde/enriched/entries/0997_unknown.yaml @@ -258,19 +258,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:25:52.887378+00:00' source_archive: web/0997/omheino.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: 'Home - Omheining: Vereniging voor Heemkunde van Heino, Lierderholthuis - en Laag Zuthem' - source_url: http://www.omheino.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/0997/omheino.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:25:52.886584+00:00' - claim_type: description_short claim_value: Omheining De Vereniging voor Heemkunde ‘Omheining’ wil de geschiedenis en het culturele erfgoed van de drie dorpen Heino, Lierderholthuis en Laag Zuthem @@ -321,3 +310,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:25:52.887268+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/0998_unknown.yaml b/data/nde/enriched/entries/0998_unknown.yaml index 812f432454..ae9860c1a7 100644 --- a/data/nde/enriched/entries/0998_unknown.yaml +++ b/data/nde/enriched/entries/0998_unknown.yaml @@ -214,7 +214,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:51.407307+00:00' source_archive: web/0998/recreatieparkentwente.nl - claims_count: 12 + claims_count: 6 claims: - claim_type: org_name claim_value: Projecten Verhalend landschap @@ -226,46 +226,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:47:51.406656+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ - retrieved_on: '2025-11-29T17:41:55.942155+00:00' - xpath: /html/body/div/div/div[1]/div[1]/nav/div[2]/button/svg/title - html_file: web/0998/recreatieparkentwente.nl/mirror/www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:51.406668+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ - retrieved_on: '2025-11-29T17:41:55.942155+00:00' - xpath: /html/body/div/div/main/div[1]/a[1]/span/svg/title - html_file: web/0998/recreatieparkentwente.nl/mirror/www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:51.406677+00:00' - - claim_type: org_name - claim_value: terug - raw_value: terug - source_url: https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ - retrieved_on: '2025-11-29T17:41:55.942155+00:00' - xpath: /html/body/div/div/main/div[1]/a[2]/svg/title - html_file: web/0998/recreatieparkentwente.nl/mirror/www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:51.406680+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ - retrieved_on: '2025-11-29T17:41:55.942155+00:00' - xpath: /html/body/div/div/main/div[1]/details/ul/li[1]/a/svg/title - html_file: web/0998/recreatieparkentwente.nl/mirror/www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:51.406685+00:00' - claim_type: org_name claim_value: Whatsapp raw_value: Whatsapp @@ -276,16 +236,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:47:51.406689+00:00' - - claim_type: org_name - claim_value: Twitter - raw_value: Twitter - source_url: https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ - retrieved_on: '2025-11-29T17:41:55.942155+00:00' - xpath: /html/body/div/div/main/div[1]/details/ul/li[3]/a/svg/title - html_file: web/0998/recreatieparkentwente.nl/mirror/www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:51.406693+00:00' - claim_type: description_short claim_value: In de globaliserende en steeds meer digitaliserende wereld is er behoefte aan herkenning en geborgenheid. Een plek waar je je thuis voelt, een @@ -324,18 +274,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:47:51.406934+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/&t=Projecten - Verhalend landschap - raw_value: https://www.facebook.com/sharer.php?u=https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/&t=Projecten - Verhalend landschap - source_url: https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ - retrieved_on: '2025-11-29T17:41:55.942155+00:00' - xpath: /html/body/div/div/main/div[1]/details/ul/li[1]/a - html_file: web/0998/recreatieparkentwente.nl/mirror/www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:51.407027+00:00' - claim_type: social_twitter claim_value: https://x.com/share?text=Projecten Verhalend landschap&url=https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ raw_value: https://x.com/share?text=Projecten Verhalend landschap&url=https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/ @@ -346,6 +284,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:51.407034+00:00' + removed_invalid_claims: + - removed_count: 6 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Landschap Erfgoed diff --git a/data/nde/enriched/entries/0999_unknown.yaml b/data/nde/enriched/entries/0999_unknown.yaml index 7c75d06160..cf47670fdf 100644 --- a/data/nde/enriched/entries/0999_unknown.yaml +++ b/data/nde/enriched/entries/0999_unknown.yaml @@ -87,7 +87,7 @@ custodian_status_timestamp: '2025-11-30T18:39:58.087986+00:00' web_claims: extraction_timestamp: '2025-12-01T10:47:51.588588+00:00' source_archive: web/0999/destentor.nl - claims_count: 9 + claims_count: 6 claims: - claim_type: org_name claim_value: 'Hoe Vincent (66) met fototoestel en pen alle gevels in Steenwijkerland @@ -117,16 +117,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:51.586399+00:00' - - claim_type: org_name - claim_value: destentor.nl - raw_value: destentor.nl - source_url: https://www.destentor.nl/steenwijkerland/hoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen~a647ddd6/ - retrieved_on: '2025-11-29T17:41:56.802434+00:00' - xpath: /html/head/meta[13] - html_file: web/0999/destentor.nl/mirror/www.destentor.nl/steenwijkerland/hoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen~a647ddd6/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:47:51.586730+00:00' - claim_type: email claim_value: online@destentor.nl raw_value: online@destentor.nl @@ -137,26 +127,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:47:51.587142+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.destentor.nl%2Fsteenwijkerland%2Fhoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen%7Ea647ddd6%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.destentor.nl%2Fsteenwijkerland%2Fhoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen%7Ea647ddd6%2F - source_url: https://www.destentor.nl/steenwijkerland/hoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen~a647ddd6/ - retrieved_on: '2025-11-29T17:41:56.802434+00:00' - xpath: /html/body/div[4]/section/section/div[2]/a[2] - html_file: web/0999/destentor.nl/mirror/www.destentor.nl/steenwijkerland/hoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen~a647ddd6/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:51.587672+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Hoe+Vincent+%2866%29+met+fototoestel+en+pen+alle+gevels+in+Steenwijkerland+langsging%3A+%E2%80%98Dit+kan+geen+gemeente+nazeggen%E2%80%99&url=https%3A%2F%2Fwww.destentor.nl%2Fsteenwijkerland%2Fhoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen%7Ea647ddd6%2F - raw_value: https://twitter.com/intent/tweet?text=Hoe+Vincent+%2866%29+met+fototoestel+en+pen+alle+gevels+in+Steenwijkerland+langsging%3A+%E2%80%98Dit+kan+geen+gemeente+nazeggen%E2%80%99&url=https%3A%2F%2Fwww.destentor.nl%2Fsteenwijkerland%2Fhoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen%7Ea647ddd6%2F - source_url: https://www.destentor.nl/steenwijkerland/hoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen~a647ddd6/ - retrieved_on: '2025-11-29T17:41:56.802434+00:00' - xpath: /html/body/div[4]/section/section/div[2]/a[3] - html_file: web/0999/destentor.nl/mirror/www.destentor.nl/steenwijkerland/hoe-vincent-66-met-fototoestel-en-pen-alle-gevels-in-steenwijkerland-langsging-dit-kan-geen-gemeente-nazeggen~a647ddd6/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:51.587693+00:00' - claim_type: social_twitter claim_value: https://twitter.com/de_stentor raw_value: https://twitter.com/de_stentor @@ -187,6 +157,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:51.587806+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Vincent Erdin diff --git a/data/nde/enriched/entries/1002_Q110891779.yaml b/data/nde/enriched/entries/1002_Q110891779.yaml index ed6bd41004..55141e31c8 100644 --- a/data/nde/enriched/entries/1002_Q110891779.yaml +++ b/data/nde/enriched/entries/1002_Q110891779.yaml @@ -358,19 +358,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:52.117474+00:00' source_archive: web/1002/oudommen.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: OudOmmen.nl - raw_value: OudOmmen.nl | Webarchief voor de gemeente Ommen, de plek waar de geschiedenis - van de regio Ommen samen komt. - source_url: https://www.oudommen.nl/ - retrieved_on: '2025-11-30T00:00:39.672156+00:00' - xpath: /html/head/title - html_file: web/1002/oudommen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:52.116390+00:00' - claim_type: description_short claim_value: Webarchief voor de gemeente Ommen, de plek waar de geschiedenis van de regio Ommen samen komt. @@ -413,6 +402,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:52.117271+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Webarchief / Stichting OudOmmen.nl diff --git a/data/nde/enriched/entries/1007_Q110891796.yaml b/data/nde/enriched/entries/1007_Q110891796.yaml index ff45d566d2..79551eb321 100644 --- a/data/nde/enriched/entries/1007_Q110891796.yaml +++ b/data/nde/enriched/entries/1007_Q110891796.yaml @@ -434,18 +434,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:53.333245+00:00' source_archive: web/1007/borne.nl - claims_count: 23 + claims_count: 22 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Borne - source_url: https://www.borne.nl/gemeentearchief-borne - retrieved_on: '2025-11-29T17:41:59.191689+00:00' - xpath: /html/head/title - html_file: web/1007/borne.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:53.332629+00:00' - claim_type: org_name claim_value: externe-link-icoon raw_value: externe-link-icoon @@ -666,6 +656,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:53.333107+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Borne diff --git a/data/nde/enriched/entries/1009_Q81181187.yaml b/data/nde/enriched/entries/1009_Q81181187.yaml index c8b10f8e52..ae410c4acc 100644 --- a/data/nde/enriched/entries/1009_Q81181187.yaml +++ b/data/nde/enriched/entries/1009_Q81181187.yaml @@ -385,18 +385,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:53.848335+00:00' source_archive: web/1009/vechtstromen.nl - claims_count: 11 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Vechtstromen - source_url: https://www.vechtstromen.nl - retrieved_on: '2025-11-29T17:45:07.438434+00:00' - xpath: /html/head/title - html_file: web/1009/vechtstromen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:53.847504+00:00' - claim_type: description_short claim_value: Waterschap Vechtstromen werkt aan veilige dijken, schoon en voldoende oppervlaktewater en gezuiverd afvalwater. Ons werkgebied omvat 23 gemeenten @@ -431,26 +421,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:47:53.847946+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.vechtstromen.nl%2f&t=Waterschap%20Vechtstromen%20werkt%20aan%20veilige%20dijken%2c%20schoon%20en%20voldoende%20oppervlaktewater%20en%20gezuiverd%20afvalwater.%20Ons%20werkgebied%20omvat%2023%20gemeenten%20in%20de%20provincies%20Overijssel%2c%20Drenthe%20en%20Gelderland. - raw_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.vechtstromen.nl%2f&t=Waterschap%20Vechtstromen%20werkt%20aan%20veilige%20dijken%2c%20schoon%20en%20voldoende%20oppervlaktewater%20en%20gezuiverd%20afvalwater.%20Ons%20werkgebied%20omvat%2023%20gemeenten%20in%20de%20provincies%20Overijssel%2c%20Drenthe%20en%20Gelderland. - source_url: https://www.vechtstromen.nl - retrieved_on: '2025-11-29T17:45:07.438434+00:00' - xpath: /html/body/div/footer/div/div[1]/div[1]/div/div/div/div[2]/ul/li[1]/span/a - html_file: web/1009/vechtstromen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:53.848129+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.vechtstromen.nl%2f&title=Home&ro=false&summary=Waterschap%20Vechtstromen%20werkt%20aan%20veilige%20dijken%2c%20schoon%20en%20voldoende%20oppervlaktewater%20en%20gezuiverd%20afvalwater.%20Ons%20werkgebied%20omvat%2023%20gemeenten%20in%20de%20provincies%20Overijssel%2c%20Drenthe%20en%20Gelderland. - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.vechtstromen.nl%2f&title=Home&ro=false&summary=Waterschap%20Vechtstromen%20werkt%20aan%20veilige%20dijken%2c%20schoon%20en%20voldoende%20oppervlaktewater%20en%20gezuiverd%20afvalwater.%20Ons%20werkgebied%20omvat%2023%20gemeenten%20in%20de%20provincies%20Overijssel%2c%20Drenthe%20en%20Gelderland. - source_url: https://www.vechtstromen.nl - retrieved_on: '2025-11-29T17:45:07.438434+00:00' - xpath: /html/body/div/footer/div/div[1]/div[1]/div/div/div/div[2]/ul/li[2]/span/a - html_file: web/1009/vechtstromen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:47:53.848136+00:00' - claim_type: social_twitter claim_value: https://x.com/intent/tweet?text=Home&url=https%3a%2f%2fwww.vechtstromen.nl%2f raw_value: https://x.com/intent/tweet?text=Home&url=https%3a%2f%2fwww.vechtstromen.nl%2f @@ -501,6 +471,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:53.848171+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Waterschap Vechtstromen diff --git a/data/nde/enriched/entries/1011_unknown.yaml b/data/nde/enriched/entries/1011_unknown.yaml index f5c38507a5..8af7ff9365 100644 --- a/data/nde/enriched/entries/1011_unknown.yaml +++ b/data/nde/enriched/entries/1011_unknown.yaml @@ -319,18 +319,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:54.444124+00:00' source_archive: web/1011/wijkmuseumsoesterkwartier.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: home - raw_value: home - Wijkmuseum Soesterkwartier Wijkmuseum Soesterkwartier - source_url: https://wijkmuseumsoesterkwartier.nl/ - retrieved_on: '2025-11-29T17:45:12.684555+00:00' - xpath: /html/head/title - html_file: web/1011/wijkmuseumsoesterkwartier.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:54.442763+00:00' - claim_type: description_short claim_value: Ga terug in de tijd! Bezoek het Wijkmuseum. Elke zaterdag geopend van 11.00 tot 16.00 uur. Andere dagen alleen op afspraak enminimaal 4 personen. @@ -419,6 +409,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:54.443924+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Wijkmuseum Soesterkwartier diff --git a/data/nde/enriched/entries/1015_Q572269.yaml b/data/nde/enriched/entries/1015_Q572269.yaml index ee6d01d1b5..83ad2cd423 100644 --- a/data/nde/enriched/entries/1015_Q572269.yaml +++ b/data/nde/enriched/entries/1015_Q572269.yaml @@ -1081,18 +1081,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:55.858262+00:00' source_archive: web/1015/kasteelamerongen.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Kasteel Amerongen - source_url: https://www.kasteelamerongen.nl/ - retrieved_on: '2025-11-29T17:46:36.723533+00:00' - xpath: /html/head/title - html_file: web/1015/kasteelamerongen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:55.856473+00:00' - claim_type: description_short claim_value: Welkom op Kasteel Amerongen. Welkom op Kasteel Amerongen! Stap binnen in één van de meest authentieke kastelen van Nederland en ervaar de geschiedenis @@ -1181,6 +1171,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:47:55.858018+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Kasteel Amerongen diff --git a/data/nde/enriched/entries/1021_unknown.yaml b/data/nde/enriched/entries/1021_unknown.yaml index 83666cb7c4..89ec52532a 100644 --- a/data/nde/enriched/entries/1021_unknown.yaml +++ b/data/nde/enriched/entries/1021_unknown.yaml @@ -188,7 +188,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:47:57.362745+00:00' source_archive: web/1021/suikerzak.nl - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Club van Suikerzakjesverzamelaars in Nederland @@ -210,16 +210,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:57.362324+00:00' - - claim_type: org_name - claim_value: Nieuws - raw_value: Nieuws - source_url: https://www.suikerzak.nl/ - retrieved_on: '2025-11-29T17:46:40.094028+00:00' - xpath: /html/body/div/div[3]/div/div[2]/section/h1 - html_file: web/1021/suikerzak.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:47:57.362695+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Club van Suikerzakjesverzamelaars in Nederland diff --git a/data/nde/enriched/entries/1023_Q2346824.yaml b/data/nde/enriched/entries/1023_Q2346824.yaml index 7fd6aa5ddf..4f3561a47c 100644 --- a/data/nde/enriched/entries/1023_Q2346824.yaml +++ b/data/nde/enriched/entries/1023_Q2346824.yaml @@ -682,18 +682,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:57.591303+00:00' source_archive: web/1023/archiefeemland.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.archiefeemland.nl - retrieved_on: '2025-11-29T17:46:41.660220+00:00' - xpath: /html/head/title - html_file: web/1023/archiefeemland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:57.590769+00:00' - claim_type: description_short claim_value: Archief Eemland is hét historisch informatiecentrum voor Amersfoort en de regio Eemland. Start uw onderzoek online of kom langs. @@ -778,6 +768,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:57.591254+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Archief Eemland diff --git a/data/nde/enriched/entries/1033_unknown.yaml b/data/nde/enriched/entries/1033_unknown.yaml index 341e574eea..7191a0d812 100644 --- a/data/nde/enriched/entries/1033_unknown.yaml +++ b/data/nde/enriched/entries/1033_unknown.yaml @@ -257,18 +257,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:59.847600+00:00' source_archive: web/1033/hkij.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.hkij.nl/ - retrieved_on: '2025-11-29T17:48:42.168949+00:00' - xpath: /html/head/title - html_file: web/1033/hkij.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:59.847197+00:00' - claim_type: description_short claim_value: '.... is net als de meeste IJsselsteiners van mening dat de cultuur-historische waarden van IJsselstein en omgeving actief moeten worden beschermd, behouden @@ -283,6 +273,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:47:59.847257+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring IJsselstein diff --git a/data/nde/enriched/entries/1034_unknown.yaml b/data/nde/enriched/entries/1034_unknown.yaml index 777793d9b9..2ea2332060 100644 --- a/data/nde/enriched/entries/1034_unknown.yaml +++ b/data/nde/enriched/entries/1034_unknown.yaml @@ -179,18 +179,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:47:59.891105+00:00' source_archive: web/1034/hkloenen.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historische Kring Loenen - source_url: '' - retrieved_on: '' - xpath: /html/head/title - html_file: web/1034/hkloenen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:47:59.890192+00:00' - claim_type: description_short claim_value: 'Rondleiding bij Stalhouderij de Zadelhoff Zandpad 34a in Breukelen Welkom bij de Historische Kring Loenen De Historische Kring Loenen (HKGL) is @@ -239,6 +229,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:47:59.890987+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Kring Loenen diff --git a/data/nde/enriched/entries/1045_Q3389883.yaml b/data/nde/enriched/entries/1045_Q3389883.yaml index f98bf4082f..cd8f0e9964 100644 --- a/data/nde/enriched/entries/1045_Q3389883.yaml +++ b/data/nde/enriched/entries/1045_Q3389883.yaml @@ -1065,18 +1065,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:02.074517+00:00' source_archive: web/1045/mondriaanhuis.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Mondriaanhuis - source_url: https://www.mondriaanhuis.nl/nl/ - retrieved_on: '2025-11-29T18:08:16.632063+00:00' - xpath: /html/head/title - html_file: web/1045/mondriaanhuis.nl/mirror/www.mondriaanhuis.nl/nl/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:02.073851+00:00' - claim_type: org_name claim_value: Mondriaanhuis raw_value: Mondriaanhuis @@ -1107,6 +1097,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:02.074441+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Mondriaanhuis diff --git a/data/nde/enriched/entries/1047_Q110995912.yaml b/data/nde/enriched/entries/1047_Q110995912.yaml index cb183da676..98b2bc6677 100644 --- a/data/nde/enriched/entries/1047_Q110995912.yaml +++ b/data/nde/enriched/entries/1047_Q110995912.yaml @@ -504,18 +504,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:02.327912+00:00' source_archive: web/1047/heksenwaag.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum De Heksenwaag Oudewater - source_url: https://www.heksenwaag.nl/ - retrieved_on: '2025-11-29T18:07:07.622194+00:00' - xpath: /html/head/title - html_file: web/1047/heksenwaag.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:02.327131+00:00' - claim_type: description_short claim_value: Museum De Heksenwaag is een plek waar de geschiedenis van de heksenvervolging in de 16e en de 17e eeuw herleeft. Waarom is de Heksenwaag Oudewater wereldberoemd? @@ -578,6 +568,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:02.327731+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum de Heksenwaag diff --git a/data/nde/enriched/entries/1052_Q2493502.yaml b/data/nde/enriched/entries/1052_Q2493502.yaml index 74cd7de1df..84c1bd4dae 100644 --- a/data/nde/enriched/entries/1052_Q2493502.yaml +++ b/data/nde/enriched/entries/1052_Q2493502.yaml @@ -621,18 +621,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:03.417558+00:00' source_archive: web/1052/cavaleriemuseum.nl - claims_count: 4 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - cavaleriemuseum.nl - source_url: https://cavaleriemuseum.nl/ - retrieved_on: '2025-11-29T18:09:53.143864+00:00' - xpath: /html/head/title - html_file: web/1052/cavaleriemuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:03.417016+00:00' - claim_type: description_short claim_value: Welkom bij het Cavaleriemuseum Ontdek de fascinerende geschiedenis van de Nederlandse Cavalerie, waarbij de militaire functies Mobiliteit, Vuur- @@ -653,16 +643,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:48:03.417166+00:00' - - claim_type: org_name - claim_value: cavaleriemuseum.nl - raw_value: cavaleriemuseum.nl - source_url: https://cavaleriemuseum.nl/ - retrieved_on: '2025-11-29T18:09:53.143864+00:00' - xpath: /html/head/meta[9] - html_file: web/1052/cavaleriemuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:48:03.417229+00:00' - claim_type: org_name claim_value: Welkom bij het Cavaleriemuseum raw_value: Welkom bij het Cavaleriemuseum @@ -673,6 +653,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:03.417479+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cavaleriemuseum diff --git a/data/nde/enriched/entries/1059_Q963825.yaml b/data/nde/enriched/entries/1059_Q963825.yaml index 0a7884d032..b5d31c2038 100644 --- a/data/nde/enriched/entries/1059_Q963825.yaml +++ b/data/nde/enriched/entries/1059_Q963825.yaml @@ -618,18 +618,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:04.301846+00:00' source_archive: web/1059/bomenmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Bomenmuseum Gimborn | Stichting Von Gimborn Arboretum - source_url: https://www.bomenmuseum.nl/ - retrieved_on: '2025-11-29T18:10:51.484859+00:00' - xpath: /html/head/title - html_file: web/1059/bomenmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:04.300632+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/NationaalBomenmuseum/ raw_value: https://www.facebook.com/NationaalBomenmuseum/ @@ -672,6 +662,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:04.301685+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Von Gimborn Arboretum diff --git a/data/nde/enriched/entries/1060_Q2144884.yaml b/data/nde/enriched/entries/1060_Q2144884.yaml index 4ef2e9bf0e..2641091d07 100644 --- a/data/nde/enriched/entries/1060_Q2144884.yaml +++ b/data/nde/enriched/entries/1060_Q2144884.yaml @@ -958,7 +958,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:04.685643+00:00' source_archive: web/1060/nationaalglasmuseum.nl - claims_count: 10 + claims_count: 7 claims: - claim_type: org_name claim_value: Nationaal Glasmuseum @@ -980,36 +980,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:04.685185+00:00' - - claim_type: org_name - claim_value: Twitter - raw_value: Twitter - source_url: http://www.nationaalglasmuseum.nl - retrieved_on: '2025-11-29T18:10:53.376243+00:00' - xpath: /html/body/footer/div/div[2]/ul/li[1]/a/svg/title - html_file: web/1060/nationaalglasmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:04.685193+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: http://www.nationaalglasmuseum.nl - retrieved_on: '2025-11-29T18:10:53.376243+00:00' - xpath: /html/body/footer/div/div[2]/ul/li[2]/a/svg/title - html_file: web/1060/nationaalglasmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:04.685197+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: http://www.nationaalglasmuseum.nl - retrieved_on: '2025-11-29T18:10:53.376243+00:00' - xpath: /html/body/footer/div/div[2]/ul/li[3]/a/svg/title - html_file: web/1060/nationaalglasmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:04.685200+00:00' - claim_type: description_short claim_value: In het Nationaal Glasmuseum worden het hele jaar door afwisselende tentoonstellingen georganiseerd. Hedendaagse en experimentele tentoonstellingen @@ -1064,6 +1034,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:04.685560+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Glasmuseum diff --git a/data/nde/enriched/entries/1062_Q19974153.yaml b/data/nde/enriched/entries/1062_Q19974153.yaml index 885d6bced0..c38b640ab2 100644 --- a/data/nde/enriched/entries/1062_Q19974153.yaml +++ b/data/nde/enriched/entries/1062_Q19974153.yaml @@ -780,18 +780,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:05.303675+00:00' source_archive: web/1062/kampamersfoort.nl - claims_count: 19 + claims_count: 18 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Kamp Amersfoort - source_url: https://www.kampamersfoort.nl/ - retrieved_on: '2025-11-29T18:10:38.504267+00:00' - xpath: /html/head/title - html_file: web/1062/kampamersfoort.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:05.302405+00:00' - claim_type: org_name claim_value: Toegankelijkheid gereedschappen raw_value: Toegankelijkheid gereedschappen @@ -976,6 +966,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:05.303484+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Monument Kamp Amersfoort diff --git a/data/nde/enriched/entries/1064_Q2418919.yaml b/data/nde/enriched/entries/1064_Q2418919.yaml index 275633d611..e2f146ee28 100644 --- a/data/nde/enriched/entries/1064_Q2418919.yaml +++ b/data/nde/enriched/entries/1064_Q2418919.yaml @@ -335,18 +335,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:05.843983+00:00' source_archive: web/1064/ngv.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nederlandse Genealogische Vereniging - source_url: https://www.ngv.nl/ - retrieved_on: '2025-11-30T00:02:46.046735+00:00' - xpath: /html/head/title - html_file: web/1064/ngv.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:05.842880+00:00' - claim_type: description_short claim_value: Ontdek je familiegeschiedenis. Maak een stamboom. Meld je aan voor de nieuwsbrief. Bekijk de regionale en landelijke activiteiten. @@ -399,6 +389,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:05.843756+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nederlandse Genealogische Vereniging diff --git a/data/nde/enriched/entries/1065_Q110292105.yaml b/data/nde/enriched/entries/1065_Q110292105.yaml index 7907dfcd88..63ec66f672 100644 --- a/data/nde/enriched/entries/1065_Q110292105.yaml +++ b/data/nde/enriched/entries/1065_Q110292105.yaml @@ -463,18 +463,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:05.925026+00:00' source_archive: web/1065/samh.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - SAMH - source_url: https://www.samh.nl/ - retrieved_on: '2025-11-30T00:01:36.054198+00:00' - xpath: /html/head/title - html_file: web/1065/samh.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:05.924408+00:00' - claim_type: description_short claim_value: Streekarchief Midden-Holland beheert het geheugen van de regio Midden-Holland. Iedereen die iets wil weten over de geschiedenis van zijn familie, huis of buurt @@ -579,6 +569,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:05.924970+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Open Archieven diff --git a/data/nde/enriched/entries/1071_Q2545824.yaml b/data/nde/enriched/entries/1071_Q2545824.yaml index 8312ff8145..6b849debd7 100644 --- a/data/nde/enriched/entries/1071_Q2545824.yaml +++ b/data/nde/enriched/entries/1071_Q2545824.yaml @@ -388,7 +388,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:07.278233+00:00' source_archive: web/1071/rhcrijnstreek.nl - claims_count: 50 + claims_count: 20 claims: - claim_type: org_name claim_value: Regionaal Historisch Centrum Rijnstreek en Lopikerwaard @@ -456,56 +456,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:07.277233+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[1]/div[3]/div/div/a - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277243+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[1]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277254+00:00' - - claim_type: social_facebook - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[1]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277259+00:00' - - claim_type: social_facebook - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662&title=%09In%20de%20editie%20van%20november%202025%20van%20Ons%20Voorgeslacht%2C%20het%20maandblad%20van%20de%20Hollandse%20Vereniging%20voor%20Genealogie%20%26%23039%3BOns%20Voorgeslacht%26%23039%3B%2C%20staat%20een%20artikel%20van%20een%20van%20onze%20archivarissen%20Rob%20Alkemade.%20Het%20betreft%20een%20transcriptie%20van%20het%20%26%23039%3BReglement%20en%20lijst%20voor%20het%20wachtlopen%20in%20het%20gerecht%20Lange%20Ruige%20Weide%2C%201609%26%23039%3B%2C%20uit%20het%20archief%20van%20het%20Schoutambacht%20of%20Gerecht%20Lange%20Ruige%20Weide%20%28R027%2C%20inv.nr.%2074%29.Het%20tijdschrift%20is%20op%20de%20studiezaal%20van%20het%20RHC%20Rijnstreek%20en%20Lopikerwaard%20in%20te%20zien.%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662&title=%09In%20de%20editie%20van%20november%202025%20van%20Ons%20Voorgeslacht%2C%20het%20maandblad%20van%20de%20Hollandse%20Vereniging%20voor%20Genealogie%20%26%23039%3BOns%20Voorgeslacht%26%23039%3B%2C%20staat%20een%20artikel%20van%20een%20van%20onze%20archivarissen%20Rob%20Alkemade.%20Het%20betreft%20een%20transcriptie%20van%20het%20%26%23039%3BReglement%20en%20lijst%20voor%20het%20wachtlopen%20in%20het%20gerecht%20Lange%20Ruige%20Weide%2C%201609%26%23039%3B%2C%20uit%20het%20archief%20van%20het%20Schoutambacht%20of%20Gerecht%20Lange%20Ruige%20Weide%20%28R027%2C%20inv.nr.%2074%29.Het%20tijdschrift%20is%20op%20de%20studiezaal%20van%20het%20RHC%20Rijnstreek%20en%20Lopikerwaard%20in%20te%20zien.%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[1]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277265+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662&title=%09In%20de%20editie%20van%20november%202025%20van%20Ons%20Voorgeslacht%2C%20het%20maandblad%20van%20de%20Hollandse%20Vereniging%20voor%20Genealogie%20%26%23039%3BOns%20Voorgeslacht%26%23039%3B%2C%20staat%20een%20artikel%20van%20een%20van%20onze%20archivarissen%20Rob%20Alkemade.%20Het%20betreft%20een%20transcriptie%20van%20het%20%26%23039%3BReglement%20en%20lijst%20voor%20het%20wachtlopen%20in%20het%20gerecht%20Lange%20Ruige%20Weide%2C%201609%26%23039%3B%2C%20uit%20het%20archief%20van%20het%20Schoutambacht%20of%20Gerecht%20Lange%20Ruige%20Weide%20%28R027%2C%20inv.nr.%2074%29.Het%20tijdschrift%20is%20op%20de%20studiezaal%20van%20het%20RHC%20Rijnstreek%20en%20Lopikerwaard%20in%20te%20zien.%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662&title=%09In%20de%20editie%20van%20november%202025%20van%20Ons%20Voorgeslacht%2C%20het%20maandblad%20van%20de%20Hollandse%20Vereniging%20voor%20Genealogie%20%26%23039%3BOns%20Voorgeslacht%26%23039%3B%2C%20staat%20een%20artikel%20van%20een%20van%20onze%20archivarissen%20Rob%20Alkemade.%20Het%20betreft%20een%20transcriptie%20van%20het%20%26%23039%3BReglement%20en%20lijst%20voor%20het%20wachtlopen%20in%20het%20gerecht%20Lange%20Ruige%20Weide%2C%201609%26%23039%3B%2C%20uit%20het%20archief%20van%20het%20Schoutambacht%20of%20Gerecht%20Lange%20Ruige%20Weide%20%28R027%2C%20inv.nr.%2074%29.Het%20tijdschrift%20is%20op%20de%20studiezaal%20van%20het%20RHC%20Rijnstreek%20en%20Lopikerwaard%20in%20te%20zien.%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[1]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277270+00:00' - claim_type: social_facebook claim_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662%20-%20%0D%0A%09In%20de%20editie%20van%20november%202025%20van%20Ons%20Voorgeslacht%2C%20het%20maandblad%20van%20de%20Hollandse%20Vereniging%20voor%20Genealogie%20%26%23039%3BOns%20Voorgeslacht%26%23039%3B%2C%20staat%20een%20artikel%20van%20een%20van%20onze%20archivarissen%20Rob%20Alkemade.%20Het%20betreft%20een%20transcriptie%20van%20het%20%26%23039%3BReglement%20en%20lijst%20voor%20het%20wachtlopen%20in%20het%20gerecht%20Lange%20Ruige%20Weide%2C%201609%26%23039%3B%2C%20uit%20het%20archief%20van%20het%20Schoutambacht%20of%20Gerecht%20Lange%20Ruige%20Weide%20%28R027%2C%20inv.nr.%2074%29.Het%20tijdschrift%20is%20op%20de%20studiezaal%20van%20het%20RHC%20Rijnstreek%20en%20Lopikerwaard%20in%20te%20zien.%20%09%0D%0A%09 raw_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_774467828971662%20-%20%0D%0A%09In%20de%20editie%20van%20november%202025%20van%20Ons%20Voorgeslacht%2C%20het%20maandblad%20van%20de%20Hollandse%20Vereniging%20voor%20Genealogie%20%26%23039%3BOns%20Voorgeslacht%26%23039%3B%2C%20staat%20een%20artikel%20van%20een%20van%20onze%20archivarissen%20Rob%20Alkemade.%20Het%20betreft%20een%20transcriptie%20van%20het%20%26%23039%3BReglement%20en%20lijst%20voor%20het%20wachtlopen%20in%20het%20gerecht%20Lange%20Ruige%20Weide%2C%201609%26%23039%3B%2C%20uit%20het%20archief%20van%20het%20Schoutambacht%20of%20Gerecht%20Lange%20Ruige%20Weide%20%28R027%2C%20inv.nr.%2074%29.Het%20tijdschrift%20is%20op%20de%20studiezaal%20van%20het%20RHC%20Rijnstreek%20en%20Lopikerwaard%20in%20te%20zien.%20%09%0D%0A%09 @@ -526,56 +476,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:07.277283+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[2]/div[3]/div/div/a - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277288+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[2]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277298+00:00' - - claim_type: social_facebook - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[2]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277302+00:00' - - claim_type: social_facebook - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423&title=%09Onlangs%20is%20het%20mooie%20boek%20%26%23039%3BAtlas%20van%20historische%20verdedigingswerken%20in%20Nederland%26%23039%3B%20uitgekomen%2C%20waar%20onder%20andere%20archivaris%20Rob%20Alkemade%20aan%20heeft%20meegewerkt.%20Hij%20schreef%20de%20stukken%20over%20IJsselstein%2C%20Montfoort%2C%20Oudewater%20en%20Woerden.%20U%20kunt%20dit%20boek%20bij%20ons%20op%20de%20studiezaal%20bekijken.%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423&title=%09Onlangs%20is%20het%20mooie%20boek%20%26%23039%3BAtlas%20van%20historische%20verdedigingswerken%20in%20Nederland%26%23039%3B%20uitgekomen%2C%20waar%20onder%20andere%20archivaris%20Rob%20Alkemade%20aan%20heeft%20meegewerkt.%20Hij%20schreef%20de%20stukken%20over%20IJsselstein%2C%20Montfoort%2C%20Oudewater%20en%20Woerden.%20U%20kunt%20dit%20boek%20bij%20ons%20op%20de%20studiezaal%20bekijken.%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[2]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277307+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423&title=%09Onlangs%20is%20het%20mooie%20boek%20%26%23039%3BAtlas%20van%20historische%20verdedigingswerken%20in%20Nederland%26%23039%3B%20uitgekomen%2C%20waar%20onder%20andere%20archivaris%20Rob%20Alkemade%20aan%20heeft%20meegewerkt.%20Hij%20schreef%20de%20stukken%20over%20IJsselstein%2C%20Montfoort%2C%20Oudewater%20en%20Woerden.%20U%20kunt%20dit%20boek%20bij%20ons%20op%20de%20studiezaal%20bekijken.%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423&title=%09Onlangs%20is%20het%20mooie%20boek%20%26%23039%3BAtlas%20van%20historische%20verdedigingswerken%20in%20Nederland%26%23039%3B%20uitgekomen%2C%20waar%20onder%20andere%20archivaris%20Rob%20Alkemade%20aan%20heeft%20meegewerkt.%20Hij%20schreef%20de%20stukken%20over%20IJsselstein%2C%20Montfoort%2C%20Oudewater%20en%20Woerden.%20U%20kunt%20dit%20boek%20bij%20ons%20op%20de%20studiezaal%20bekijken.%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[2]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277312+00:00' - claim_type: social_facebook claim_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423%20-%20%0D%0A%09Onlangs%20is%20het%20mooie%20boek%20%26%23039%3BAtlas%20van%20historische%20verdedigingswerken%20in%20Nederland%26%23039%3B%20uitgekomen%2C%20waar%20onder%20andere%20archivaris%20Rob%20Alkemade%20aan%20heeft%20meegewerkt.%20Hij%20schreef%20de%20stukken%20over%20IJsselstein%2C%20Montfoort%2C%20Oudewater%20en%20Woerden.%20U%20kunt%20dit%20boek%20bij%20ons%20op%20de%20studiezaal%20bekijken.%20%09%0D%0A%09 raw_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_737203556031423%20-%20%0D%0A%09Onlangs%20is%20het%20mooie%20boek%20%26%23039%3BAtlas%20van%20historische%20verdedigingswerken%20in%20Nederland%26%23039%3B%20uitgekomen%2C%20waar%20onder%20andere%20archivaris%20Rob%20Alkemade%20aan%20heeft%20meegewerkt.%20Hij%20schreef%20de%20stukken%20over%20IJsselstein%2C%20Montfoort%2C%20Oudewater%20en%20Woerden.%20U%20kunt%20dit%20boek%20bij%20ons%20op%20de%20studiezaal%20bekijken.%20%09%0D%0A%09 @@ -596,56 +496,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:07.277325+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[3]/div[3]/div/div/a - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277330+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[3]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277339+00:00' - - claim_type: social_facebook - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[3]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277343+00:00' - - claim_type: social_facebook - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306&title=%09Vandaag%20is%20het%20precies%20500%20jaar%20geleden%20dat%20Woerdenaar%20Jan%20de%20Bakker%20in%20Den%20Haag%20werd%20ge%C3%ABxecuteerd%20vanwege%20zijn%20geloof.%20Hij%20was%20daarmee%20de%20eerste%20protestantse%20martelaar%20in%20de%20Noordelijke%20Nederlanden.%20Lees%20meer%20over%20Jan%20de%20Bakker%20op%20onze%20website%3A%20rhcrijnstreek.nl%2Fbronnen%2Flokale-historie%2Fwoerden%2Fwoerden%2Fjan-de-bakker-kerkhervormer-en-martelaar%2Ffoto%3A%20raam%20in%20de%20Petruskerk%20ter%20nagedachtenis%20aan%20Jan%20de%20Bakker%20%28RHC%20Rijnstreek%20en%20Lopikerwaard%2C%20fotonummer%20G1610%29%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306&title=%09Vandaag%20is%20het%20precies%20500%20jaar%20geleden%20dat%20Woerdenaar%20Jan%20de%20Bakker%20in%20Den%20Haag%20werd%20ge%C3%ABxecuteerd%20vanwege%20zijn%20geloof.%20Hij%20was%20daarmee%20de%20eerste%20protestantse%20martelaar%20in%20de%20Noordelijke%20Nederlanden.%20Lees%20meer%20over%20Jan%20de%20Bakker%20op%20onze%20website%3A%20rhcrijnstreek.nl%2Fbronnen%2Flokale-historie%2Fwoerden%2Fwoerden%2Fjan-de-bakker-kerkhervormer-en-martelaar%2Ffoto%3A%20raam%20in%20de%20Petruskerk%20ter%20nagedachtenis%20aan%20Jan%20de%20Bakker%20%28RHC%20Rijnstreek%20en%20Lopikerwaard%2C%20fotonummer%20G1610%29%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[3]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277349+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306&title=%09Vandaag%20is%20het%20precies%20500%20jaar%20geleden%20dat%20Woerdenaar%20Jan%20de%20Bakker%20in%20Den%20Haag%20werd%20ge%C3%ABxecuteerd%20vanwege%20zijn%20geloof.%20Hij%20was%20daarmee%20de%20eerste%20protestantse%20martelaar%20in%20de%20Noordelijke%20Nederlanden.%20Lees%20meer%20over%20Jan%20de%20Bakker%20op%20onze%20website%3A%20rhcrijnstreek.nl%2Fbronnen%2Flokale-historie%2Fwoerden%2Fwoerden%2Fjan-de-bakker-kerkhervormer-en-martelaar%2Ffoto%3A%20raam%20in%20de%20Petruskerk%20ter%20nagedachtenis%20aan%20Jan%20de%20Bakker%20%28RHC%20Rijnstreek%20en%20Lopikerwaard%2C%20fotonummer%20G1610%29%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306&title=%09Vandaag%20is%20het%20precies%20500%20jaar%20geleden%20dat%20Woerdenaar%20Jan%20de%20Bakker%20in%20Den%20Haag%20werd%20ge%C3%ABxecuteerd%20vanwege%20zijn%20geloof.%20Hij%20was%20daarmee%20de%20eerste%20protestantse%20martelaar%20in%20de%20Noordelijke%20Nederlanden.%20Lees%20meer%20over%20Jan%20de%20Bakker%20op%20onze%20website%3A%20rhcrijnstreek.nl%2Fbronnen%2Flokale-historie%2Fwoerden%2Fwoerden%2Fjan-de-bakker-kerkhervormer-en-martelaar%2Ffoto%3A%20raam%20in%20de%20Petruskerk%20ter%20nagedachtenis%20aan%20Jan%20de%20Bakker%20%28RHC%20Rijnstreek%20en%20Lopikerwaard%2C%20fotonummer%20G1610%29%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[3]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277353+00:00' - claim_type: social_facebook claim_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306%20-%20%0D%0A%09Vandaag%20is%20het%20precies%20500%20jaar%20geleden%20dat%20Woerdenaar%20Jan%20de%20Bakker%20in%20Den%20Haag%20werd%20ge%C3%ABxecuteerd%20vanwege%20zijn%20geloof.%20Hij%20was%20daarmee%20de%20eerste%20protestantse%20martelaar%20in%20de%20Noordelijke%20Nederlanden.%20Lees%20meer%20over%20Jan%20de%20Bakker%20op%20onze%20website%3A%20rhcrijnstreek.nl%2Fbronnen%2Flokale-historie%2Fwoerden%2Fwoerden%2Fjan-de-bakker-kerkhervormer-en-martelaar%2Ffoto%3A%20raam%20in%20de%20Petruskerk%20ter%20nagedachtenis%20aan%20Jan%20de%20Bakker%20%28RHC%20Rijnstreek%20en%20Lopikerwaard%2C%20fotonummer%20G1610%29%20%09%0D%0A%09 raw_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_723941394024306%20-%20%0D%0A%09Vandaag%20is%20het%20precies%20500%20jaar%20geleden%20dat%20Woerdenaar%20Jan%20de%20Bakker%20in%20Den%20Haag%20werd%20ge%C3%ABxecuteerd%20vanwege%20zijn%20geloof.%20Hij%20was%20daarmee%20de%20eerste%20protestantse%20martelaar%20in%20de%20Noordelijke%20Nederlanden.%20Lees%20meer%20over%20Jan%20de%20Bakker%20op%20onze%20website%3A%20rhcrijnstreek.nl%2Fbronnen%2Flokale-historie%2Fwoerden%2Fwoerden%2Fjan-de-bakker-kerkhervormer-en-martelaar%2Ffoto%3A%20raam%20in%20de%20Petruskerk%20ter%20nagedachtenis%20aan%20Jan%20de%20Bakker%20%28RHC%20Rijnstreek%20en%20Lopikerwaard%2C%20fotonummer%20G1610%29%20%09%0D%0A%09 @@ -686,56 +536,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:07.277377+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[4]/div[3]/div/div/a - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277382+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[4]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277391+00:00' - - claim_type: social_facebook - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[4]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277395+00:00' - - claim_type: social_facebook - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273&title=%09Dit%20weekend%20is%20het%20weer%20%23openmonumentendag%21%20Dit%20jaar%20hebben%20leerlingen%20van%20een%20school%20in%20%23Kamerik%20vlogs%20gemaakt%20voor%20vijf%20monumentale%20gebouwen%20in%20hun%20dorp%2C%20met%20behulp%20van%20het%20RHC.%20Benieuwd%20naar%20het%20resultaat%3F%20De%20filmpjes%20zijn%20te%20zien%20bij%20de%20plekken%20zelf%20met%20een%20QR-code%20te%20bekijken%21%20Klik%20op%20deze%20link%20voor%20meer%20informatie%3A%20www.openmonumentendag.nl%2Fwaar-wil-je-naartoe%2F%3Fsearch_type%3Dplaats%26amp%3Bsearch_municipality%3DWoerden%26amp%3Bplaa...%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273&title=%09Dit%20weekend%20is%20het%20weer%20%23openmonumentendag%21%20Dit%20jaar%20hebben%20leerlingen%20van%20een%20school%20in%20%23Kamerik%20vlogs%20gemaakt%20voor%20vijf%20monumentale%20gebouwen%20in%20hun%20dorp%2C%20met%20behulp%20van%20het%20RHC.%20Benieuwd%20naar%20het%20resultaat%3F%20De%20filmpjes%20zijn%20te%20zien%20bij%20de%20plekken%20zelf%20met%20een%20QR-code%20te%20bekijken%21%20Klik%20op%20deze%20link%20voor%20meer%20informatie%3A%20www.openmonumentendag.nl%2Fwaar-wil-je-naartoe%2F%3Fsearch_type%3Dplaats%26amp%3Bsearch_municipality%3DWoerden%26amp%3Bplaa...%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[4]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277401+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273&title=%09Dit%20weekend%20is%20het%20weer%20%23openmonumentendag%21%20Dit%20jaar%20hebben%20leerlingen%20van%20een%20school%20in%20%23Kamerik%20vlogs%20gemaakt%20voor%20vijf%20monumentale%20gebouwen%20in%20hun%20dorp%2C%20met%20behulp%20van%20het%20RHC.%20Benieuwd%20naar%20het%20resultaat%3F%20De%20filmpjes%20zijn%20te%20zien%20bij%20de%20plekken%20zelf%20met%20een%20QR-code%20te%20bekijken%21%20Klik%20op%20deze%20link%20voor%20meer%20informatie%3A%20www.openmonumentendag.nl%2Fwaar-wil-je-naartoe%2F%3Fsearch_type%3Dplaats%26amp%3Bsearch_municipality%3DWoerden%26amp%3Bplaa...%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273&title=%09Dit%20weekend%20is%20het%20weer%20%23openmonumentendag%21%20Dit%20jaar%20hebben%20leerlingen%20van%20een%20school%20in%20%23Kamerik%20vlogs%20gemaakt%20voor%20vijf%20monumentale%20gebouwen%20in%20hun%20dorp%2C%20met%20behulp%20van%20het%20RHC.%20Benieuwd%20naar%20het%20resultaat%3F%20De%20filmpjes%20zijn%20te%20zien%20bij%20de%20plekken%20zelf%20met%20een%20QR-code%20te%20bekijken%21%20Klik%20op%20deze%20link%20voor%20meer%20informatie%3A%20www.openmonumentendag.nl%2Fwaar-wil-je-naartoe%2F%3Fsearch_type%3Dplaats%26amp%3Bsearch_municipality%3DWoerden%26amp%3Bplaa...%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[4]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277405+00:00' - claim_type: social_facebook claim_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273%20-%20%0D%0A%09Dit%20weekend%20is%20het%20weer%20%23openmonumentendag%21%20Dit%20jaar%20hebben%20leerlingen%20van%20een%20school%20in%20%23Kamerik%20vlogs%20gemaakt%20voor%20vijf%20monumentale%20gebouwen%20in%20hun%20dorp%2C%20met%20behulp%20van%20het%20RHC.%20Benieuwd%20naar%20het%20resultaat%3F%20De%20filmpjes%20zijn%20te%20zien%20bij%20de%20plekken%20zelf%20met%20een%20QR-code%20te%20bekijken%21%20Klik%20op%20deze%20link%20voor%20meer%20informatie%3A%20www.openmonumentendag.nl%2Fwaar-wil-je-naartoe%2F%3Fsearch_type%3Dplaats%26amp%3Bsearch_municipality%3DWoerden%26amp%3Bplaa...%20%09%0D%0A%09 raw_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_720538391031273%20-%20%0D%0A%09Dit%20weekend%20is%20het%20weer%20%23openmonumentendag%21%20Dit%20jaar%20hebben%20leerlingen%20van%20een%20school%20in%20%23Kamerik%20vlogs%20gemaakt%20voor%20vijf%20monumentale%20gebouwen%20in%20hun%20dorp%2C%20met%20behulp%20van%20het%20RHC.%20Benieuwd%20naar%20het%20resultaat%3F%20De%20filmpjes%20zijn%20te%20zien%20bij%20de%20plekken%20zelf%20met%20een%20QR-code%20te%20bekijken%21%20Klik%20op%20deze%20link%20voor%20meer%20informatie%3A%20www.openmonumentendag.nl%2Fwaar-wil-je-naartoe%2F%3Fsearch_type%3Dplaats%26amp%3Bsearch_municipality%3DWoerden%26amp%3Bplaa...%20%09%0D%0A%09 @@ -756,56 +556,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:07.277418+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[5]/div[3]/div/div/a - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277423+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[5]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277432+00:00' - - claim_type: social_facebook - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[5]/div[3]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277436+00:00' - - claim_type: social_facebook - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119&title=%09Wij%20zijn%20nog%20altijd%20op%20zoek%20naar%20vrijwilligers%20die%20aan%20onze%20crowdsourceprojecten%20vanuit%20huis%20willen%20meewerken.%20Momenteel%20indiceren%20%28digitaliseren%29%20onze%20vrijwilligers%20de%20bevolkingsregisters.%20Het%20nieuwste%20project%20is%20de%20bevolkingsregister%20van%20Harmelen%20van%201870-1880.%20Heeft%20u%20Harmelense%20voorouders%20uit%20die%20periode%3F%20Wellicht%20komt%20u%20ze%20tegen%21%20Klik%20op%20deze%20link%20rhcrijnstreek.nl%2Fbronnen%2Fcrowdsourcing%2F%20en%20meld%20u%20aan.%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119&title=%09Wij%20zijn%20nog%20altijd%20op%20zoek%20naar%20vrijwilligers%20die%20aan%20onze%20crowdsourceprojecten%20vanuit%20huis%20willen%20meewerken.%20Momenteel%20indiceren%20%28digitaliseren%29%20onze%20vrijwilligers%20de%20bevolkingsregisters.%20Het%20nieuwste%20project%20is%20de%20bevolkingsregister%20van%20Harmelen%20van%201870-1880.%20Heeft%20u%20Harmelense%20voorouders%20uit%20die%20periode%3F%20Wellicht%20komt%20u%20ze%20tegen%21%20Klik%20op%20deze%20link%20rhcrijnstreek.nl%2Fbronnen%2Fcrowdsourcing%2F%20en%20meld%20u%20aan.%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[5]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277473+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119&title=%09Wij%20zijn%20nog%20altijd%20op%20zoek%20naar%20vrijwilligers%20die%20aan%20onze%20crowdsourceprojecten%20vanuit%20huis%20willen%20meewerken.%20Momenteel%20indiceren%20%28digitaliseren%29%20onze%20vrijwilligers%20de%20bevolkingsregisters.%20Het%20nieuwste%20project%20is%20de%20bevolkingsregister%20van%20Harmelen%20van%201870-1880.%20Heeft%20u%20Harmelense%20voorouders%20uit%20die%20periode%3F%20Wellicht%20komt%20u%20ze%20tegen%21%20Klik%20op%20deze%20link%20rhcrijnstreek.nl%2Fbronnen%2Fcrowdsourcing%2F%20en%20meld%20u%20aan.%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119&title=%09Wij%20zijn%20nog%20altijd%20op%20zoek%20naar%20vrijwilligers%20die%20aan%20onze%20crowdsourceprojecten%20vanuit%20huis%20willen%20meewerken.%20Momenteel%20indiceren%20%28digitaliseren%29%20onze%20vrijwilligers%20de%20bevolkingsregisters.%20Het%20nieuwste%20project%20is%20de%20bevolkingsregister%20van%20Harmelen%20van%201870-1880.%20Heeft%20u%20Harmelense%20voorouders%20uit%20die%20periode%3F%20Wellicht%20komt%20u%20ze%20tegen%21%20Klik%20op%20deze%20link%20rhcrijnstreek.nl%2Fbronnen%2Fcrowdsourcing%2F%20en%20meld%20u%20aan.%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[5]/div[3]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277498+00:00' - claim_type: social_facebook claim_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119%20-%20%0D%0A%09Wij%20zijn%20nog%20altijd%20op%20zoek%20naar%20vrijwilligers%20die%20aan%20onze%20crowdsourceprojecten%20vanuit%20huis%20willen%20meewerken.%20Momenteel%20indiceren%20%28digitaliseren%29%20onze%20vrijwilligers%20de%20bevolkingsregisters.%20Het%20nieuwste%20project%20is%20de%20bevolkingsregister%20van%20Harmelen%20van%201870-1880.%20Heeft%20u%20Harmelense%20voorouders%20uit%20die%20periode%3F%20Wellicht%20komt%20u%20ze%20tegen%21%20Klik%20op%20deze%20link%20rhcrijnstreek.nl%2Fbronnen%2Fcrowdsourcing%2F%20en%20meld%20u%20aan.%20%09%0D%0A%09 raw_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_670506602701119%20-%20%0D%0A%09Wij%20zijn%20nog%20altijd%20op%20zoek%20naar%20vrijwilligers%20die%20aan%20onze%20crowdsourceprojecten%20vanuit%20huis%20willen%20meewerken.%20Momenteel%20indiceren%20%28digitaliseren%29%20onze%20vrijwilligers%20de%20bevolkingsregisters.%20Het%20nieuwste%20project%20is%20de%20bevolkingsregister%20van%20Harmelen%20van%201870-1880.%20Heeft%20u%20Harmelense%20voorouders%20uit%20die%20periode%3F%20Wellicht%20komt%20u%20ze%20tegen%21%20Klik%20op%20deze%20link%20rhcrijnstreek.nl%2Fbronnen%2Fcrowdsourcing%2F%20en%20meld%20u%20aan.%20%09%0D%0A%09 @@ -826,56 +576,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:07.277535+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[6]/div[2]/div/div/a - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277546+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[6]/div[2]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277567+00:00' - - claim_type: social_facebook - claim_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970 - raw_value: https://twitter.com/intent/tweet?text=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[6]/div[2]/div/div/div/a[2] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277576+00:00' - - claim_type: social_facebook - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970&title=%09rhcrijnstreek.nl%2Fnieuws%2Fnieuw-archief-ontvangen-r130-hoge-heerlijkheid-vrijenes-en-de-ambachtshee...%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970&title=%09rhcrijnstreek.nl%2Fnieuws%2Fnieuw-archief-ontvangen-r130-hoge-heerlijkheid-vrijenes-en-de-ambachtshee...%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[6]/div[2]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277589+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970&title=%09rhcrijnstreek.nl%2Fnieuws%2Fnieuw-archief-ontvangen-r130-hoge-heerlijkheid-vrijenes-en-de-ambachtshee...%20%09%09 - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970&title=%09rhcrijnstreek.nl%2Fnieuws%2Fnieuw-archief-ontvangen-r130-hoge-heerlijkheid-vrijenes-en-de-ambachtshee...%20%09%09 - source_url: http://www.rhcrijnstreek.nl/ - retrieved_on: '2025-11-29T18:10:54.298517+00:00' - xpath: /html/body/div[3]/div/div[1]/div[1]/div/div/div[6]/div[2]/div/div/div/a[3] - html_file: web/1071/rhcrijnstreek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:07.277598+00:00' - claim_type: social_facebook claim_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970%20-%20%0D%0A%09rhcrijnstreek.nl%2Fnieuws%2Fnieuw-archief-ontvangen-r130-hoge-heerlijkheid-vrijenes-en-de-ambachtshee...%20%09%0D%0A%09 raw_value: mailto:?subject=Facebook&body=https%3A%2F%2Fwww.facebook.com%2F106853325717583_659141427170970%20-%20%0D%0A%09rhcrijnstreek.nl%2Fnieuws%2Fnieuw-archief-ontvangen-r130-hoge-heerlijkheid-vrijenes-en-de-ambachtshee...%20%09%0D%0A%09 @@ -896,6 +596,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:07.277722+00:00' + removed_invalid_claims: + - removed_count: 30 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: RHC Rijnstreek en Lopikerwaard diff --git a/data/nde/enriched/entries/1074_Q1766396.yaml b/data/nde/enriched/entries/1074_Q1766396.yaml index 42d1c8eee7..9ba50040c2 100644 --- a/data/nde/enriched/entries/1074_Q1766396.yaml +++ b/data/nde/enriched/entries/1074_Q1766396.yaml @@ -3769,18 +3769,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:08.284788+00:00' source_archive: web/1074/cultureelerfgoed.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Rijksdienst voor het Cultureel Erfgoed - source_url: https://www.cultureelerfgoed.nl/ - retrieved_on: '2025-11-29T18:11:00.240400+00:00' - xpath: /html/head/title - html_file: web/1074/cultureelerfgoed.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:08.284238+00:00' - claim_type: description_short claim_value: De Rijksdienst voor het Cultureel Erfgoed is een onderdeel van het ministerie van Onderwijs, Cultuur en Wetenschap. We werken onder de rechtstreekse @@ -3837,6 +3827,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:08.284636+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksdienst voor het Cultureel Erfgoed diff --git a/data/nde/enriched/entries/1075_Q2131198.yaml b/data/nde/enriched/entries/1075_Q2131198.yaml index 28bb530cc7..7902a6393d 100644 --- a/data/nde/enriched/entries/1075_Q2131198.yaml +++ b/data/nde/enriched/entries/1075_Q2131198.yaml @@ -1089,7 +1089,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:08.467038+00:00' source_archive: web/1075/slotzuylen.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Kasteel @@ -1115,16 +1115,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:48:08.465736+00:00' - - claim_type: org_name - claim_value: Kasteel - Slot Zuylen - raw_value: Kasteel - Slot Zuylen - source_url: http://www.slotzuylen.nl/ - retrieved_on: '2025-11-29T18:11:01.685354+00:00' - xpath: /html/head/meta[10] - html_file: web/1075/slotzuylen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:48:08.465963+00:00' - claim_type: email claim_value: horeca@slotzuylen.nl raw_value: horeca@slotzuylen.nl @@ -1185,16 +1175,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:08.466452+00:00' - - claim_type: org_name - claim_value: Een reis door 500 jaar geschiedenis in Slot zuylen - raw_value: Een reis door 500 jaar geschiedenis in Slot zuylen - source_url: http://www.slotzuylen.nl/ - retrieved_on: '2025-11-29T18:11:01.685354+00:00' - xpath: /html/body/div[2]/section[1]/div[3]/div/div/div[1]/div/h1 - html_file: web/1075/slotzuylen.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:48:08.466613+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Slot Zuylen diff --git a/data/nde/enriched/entries/1081_unknown.yaml b/data/nde/enriched/entries/1081_unknown.yaml index 2697a18ac0..54db93e09a 100644 --- a/data/nde/enriched/entries/1081_unknown.yaml +++ b/data/nde/enriched/entries/1081_unknown.yaml @@ -189,18 +189,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:09.589854+00:00' source_archive: web/1081/lokaalausterlitz.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Lokaal Austerlitz - source_url: https://www.lokaalausterlitz.nl/ - retrieved_on: '2025-11-29T18:12:02.778186+00:00' - xpath: /html/head/title - html_file: web/1081/lokaalausterlitz.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:09.589004+00:00' - claim_type: org_name claim_value: nieuws & agenda raw_value: nieuws & agenda @@ -211,6 +201,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:09.589590+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Lokaal Austerlitz (SLA) diff --git a/data/nde/enriched/entries/1087_Q29944161.yaml b/data/nde/enriched/entries/1087_Q29944161.yaml index d11044afe5..dd2d09ff11 100644 --- a/data/nde/enriched/entries/1087_Q29944161.yaml +++ b/data/nde/enriched/entries/1087_Q29944161.yaml @@ -649,18 +649,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:11.147503+00:00' source_archive: web/1087/touwmuseum.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Touwmuseum Oudewater - source_url: http://www.touwmuseum.nl - retrieved_on: '2025-11-29T18:14:38.012466+00:00' - xpath: /html/head/title - html_file: web/1087/touwmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:11.146616+00:00' - claim_type: description_short claim_value: Waarom is Oudewater zo'n leuk stadje? Dat komt door de touwindustrie. Hoe dat werkt. Kom naar het museum en sla zelf een touwtje! @@ -723,6 +713,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:11.147369+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Touwmuseum diff --git a/data/nde/enriched/entries/1089_Q2519854.yaml b/data/nde/enriched/entries/1089_Q2519854.yaml index 025cb427cb..dfa055759f 100644 --- a/data/nde/enriched/entries/1089_Q2519854.yaml +++ b/data/nde/enriched/entries/1089_Q2519854.yaml @@ -603,18 +603,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:11.665953+00:00' source_archive: web/1089/vechtstreekmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Vechtstreek Museum - source_url: http://www.vechtstreekmuseum.nl - retrieved_on: '2025-11-29T18:15:22.350804+00:00' - xpath: /html/head/title - html_file: web/1089/vechtstreekmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:11.665438+00:00' - claim_type: description_short claim_value: Ontdek de cultuur, geschiedenis en kunst van de hele Vechtstreek. In het museum zijn steeds meerdere tentoonstellingen te zien, worden regelmatig @@ -701,6 +691,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:11.665902+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Vechtstreekmuseum diff --git a/data/nde/enriched/entries/1093_unknown.yaml b/data/nde/enriched/entries/1093_unknown.yaml index 9361beee40..53133e7a46 100644 --- a/data/nde/enriched/entries/1093_unknown.yaml +++ b/data/nde/enriched/entries/1093_unknown.yaml @@ -310,18 +310,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:12.641411+00:00' source_archive: web/1093/collectiezeeland.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Collectie Zeeland - source_url: https://collectiezeeland.nl/ - retrieved_on: '2025-11-29T18:15:08.705417+00:00' - xpath: /html/head/title - html_file: web/1093/collectiezeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:12.639801+00:00' - claim_type: org_name claim_value: Collectie Zeeland raw_value: Collectie Zeeland @@ -372,6 +362,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:12.640847+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Collectie Zeeland diff --git a/data/nde/enriched/entries/1095_Q55076484.yaml b/data/nde/enriched/entries/1095_Q55076484.yaml index 76108ccd2c..9d37abc401 100644 --- a/data/nde/enriched/entries/1095_Q55076484.yaml +++ b/data/nde/enriched/entries/1095_Q55076484.yaml @@ -521,18 +521,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:13.216573+00:00' source_archive: web/1095/het-vlaemsche-erfgoed.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Het Vlaemsche erfgoed - source_url: http://www.het-vlaemsche-erfgoed.nl/ - retrieved_on: '2025-11-30T00:02:49.180841+00:00' - xpath: /html/head/title - html_file: web/1095/het-vlaemsche-erfgoed.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:13.215921+00:00' - claim_type: description_short claim_value: Het Vlaemsche Erfgoed biedt een kijkje in een Zeeuws-Vlaams landbouwdorp in de eerste helft van de 20e eeuw. @@ -565,6 +555,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:13.216464+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Vlaemsche Erfgoed diff --git a/data/nde/enriched/entries/1096_Q18775346.yaml b/data/nde/enriched/entries/1096_Q18775346.yaml index 74f80e4989..cc03596972 100644 --- a/data/nde/enriched/entries/1096_Q18775346.yaml +++ b/data/nde/enriched/entries/1096_Q18775346.yaml @@ -578,18 +578,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:13.666981+00:00' source_archive: web/1096/werfarnemuiden.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: home - raw_value: home - Werf Arnemuiden - source_url: https://werfarnemuiden.nl/ - retrieved_on: '2025-11-29T18:14:54.828197+00:00' - xpath: /html/head/title - html_file: web/1096/werfarnemuiden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:13.666069+00:00' - claim_type: description_short claim_value: Werf Arnemuiden is een werkende museumwerf. Bezoek de werf of zet je boot op de helling voor onderhoud. @@ -652,6 +642,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:13.666743+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Scheepswerf Meerman diff --git a/data/nde/enriched/entries/1097_Q56460926.yaml b/data/nde/enriched/entries/1097_Q56460926.yaml index 186f9396f0..e26ac41e20 100644 --- a/data/nde/enriched/entries/1097_Q56460926.yaml +++ b/data/nde/enriched/entries/1097_Q56460926.yaml @@ -742,18 +742,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:13.749487+00:00' source_archive: web/1097/industrieelmuseumzeeland.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.industrieelmuseumzeeland.nl - retrieved_on: '2025-11-29T18:14:55.580275+00:00' - xpath: /html/head/title - html_file: web/1097/industrieelmuseumzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:13.748983+00:00' - claim_type: email claim_value: info@industrieelmuseumzeeland.nl raw_value: info@industrieelmuseumzeeland.nl @@ -824,6 +814,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:13.749433+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Industrieel Museum Zeeland diff --git a/data/nde/enriched/entries/1102_unknown.yaml b/data/nde/enriched/entries/1102_unknown.yaml index 23d84699d0..8575f63f00 100644 --- a/data/nde/enriched/entries/1102_unknown.yaml +++ b/data/nde/enriched/entries/1102_unknown.yaml @@ -326,18 +326,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:15.031539+00:00' source_archive: web/1102/hetwarenhuis.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.hetwarenhuis.nl/ - retrieved_on: '2025-11-30T00:04:03.280435+00:00' - xpath: /html/head/title - html_file: web/1102/hetwarenhuis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:15.029596+00:00' - claim_type: phone claim_value: '+31115562885' raw_value: '+31115562885' @@ -418,6 +408,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:15.031205+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Het Warenhuis diff --git a/data/nde/enriched/entries/1108_Q2643296.yaml b/data/nde/enriched/entries/1108_Q2643296.yaml index ff3e93498f..717c41309d 100644 --- a/data/nde/enriched/entries/1108_Q2643296.yaml +++ b/data/nde/enriched/entries/1108_Q2643296.yaml @@ -693,18 +693,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:16.728758+00:00' source_archive: web/1108/terramaris.nl - claims_count: 10 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Terra Maris - source_url: http://www.terramaris.nl - retrieved_on: '2025-11-29T18:15:56.005047+00:00' - xpath: /html/head/title - html_file: web/1108/terramaris.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:16.727716+00:00' - claim_type: org_name claim_value: Terra Maris raw_value: Terra Maris @@ -755,36 +745,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:16.728572+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https://www.terramaris.nl/&title=&summary=&source= - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https://www.terramaris.nl/&title=&summary=&source= - source_url: http://www.terramaris.nl - retrieved_on: '2025-11-29T18:15:56.005047+00:00' - xpath: /html/body/div[2]/div/div/div/ul/li[1]/a - html_file: web/1108/terramaris.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:16.728581+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://www.terramaris.nl/ - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://www.terramaris.nl/ - source_url: http://www.terramaris.nl - retrieved_on: '2025-11-29T18:15:56.005047+00:00' - xpath: /html/body/div[2]/div/div/div/ul/li[2]/a - html_file: web/1108/terramaris.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:16.728585+00:00' - - claim_type: social_twitter - claim_value: http://twitter.com/share?text=Home&url=https://www.terramaris.nl/ - raw_value: http://twitter.com/share?text=Home&url=https://www.terramaris.nl/ - source_url: http://www.terramaris.nl - retrieved_on: '2025-11-29T18:15:56.005047+00:00' - xpath: /html/body/div[2]/div/div/div/ul/li[3]/a - html_file: web/1108/terramaris.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:16.728588+00:00' - claim_type: org_name claim_value: Ontdek de unieke natuur van zeeland raw_value: Ontdek de unieke natuur van zeeland @@ -795,6 +755,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:16.728628+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Terra Maris diff --git a/data/nde/enriched/entries/1109_Q55076504.yaml b/data/nde/enriched/entries/1109_Q55076504.yaml index fc0d37535c..3951d4f8ba 100644 --- a/data/nde/enriched/entries/1109_Q55076504.yaml +++ b/data/nde/enriched/entries/1109_Q55076504.yaml @@ -673,18 +673,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:17.198387+00:00' source_archive: web/1109/museumbreskens.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Visserijmuseum Breskens - source_url: http://www.museumbreskens.nl - retrieved_on: '2025-11-29T18:15:26.498950+00:00' - xpath: /html/head/title - html_file: web/1109/museumbreskens.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:17.197925+00:00' - claim_type: description_short claim_value: Duik in de wereld van de zee bij het Visserijmuseum Breskens! Fascinerende verhalen over vissers, unieke fossielen en een prachtig aquarium. @@ -717,6 +707,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:48:17.198230+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Visserijmuseum diff --git a/data/nde/enriched/entries/1113_Q1852477.yaml b/data/nde/enriched/entries/1113_Q1852477.yaml index 516d38b9e4..fdc3926701 100644 --- a/data/nde/enriched/entries/1113_Q1852477.yaml +++ b/data/nde/enriched/entries/1113_Q1852477.yaml @@ -850,18 +850,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:17.912679+00:00' source_archive: web/1113/bevrijdingsmuseumzeeland.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Bevrijdingsmuseum Zeeland - source_url: https://bevrijdingsmuseumzeeland.nl/ - retrieved_on: '2025-11-29T18:18:37.349560+00:00' - xpath: /html[1]/head/title - html_file: web/1113/bevrijdingsmuseumzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:17.910796+00:00' - claim_type: description_short claim_value: Ontdek het hele verhaal van de Tweede Wereldoorlog en de Slag om de Schelde in het Bevrijdingsmuseum Zeeland, met persoonlijke verhalen en authentieke @@ -906,6 +896,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:48:17.912172+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bevrijdingsmuseum Zeeland diff --git a/data/nde/enriched/entries/1117_unknown.yaml b/data/nde/enriched/entries/1117_unknown.yaml index 352c24fb8c..583e28568c 100644 --- a/data/nde/enriched/entries/1117_unknown.yaml +++ b/data/nde/enriched/entries/1117_unknown.yaml @@ -244,18 +244,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:18.937451+00:00' source_archive: web/1117/borsele.nl - claims_count: 14 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Borsele - source_url: https://www.borsele.nl/gemeentearchief - retrieved_on: '2025-11-29T18:16:12.394169+00:00' - xpath: /html/head/title - html_file: web/1117/borsele.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:18.936510+00:00' - claim_type: org_name claim_value: Gemeente Borsele raw_value: Gemeente Borsele @@ -276,46 +266,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:18.936528+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.borsele.nl/gemeentearchief - retrieved_on: '2025-11-29T18:16:12.394169+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[1]/ul/li[1]/a/svg/title - html_file: web/1117/borsele.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:18.936537+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: https://www.borsele.nl/gemeentearchief - retrieved_on: '2025-11-29T18:16:12.394169+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[1]/ul/li[2]/a/svg/title - html_file: web/1117/borsele.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:18.936541+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.borsele.nl/gemeentearchief - retrieved_on: '2025-11-29T18:16:12.394169+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[1]/ul/li[3]/a/svg/title - html_file: web/1117/borsele.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:18.936546+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.borsele.nl/gemeentearchief - retrieved_on: '2025-11-29T18:16:12.394169+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[1]/ul/li[4]/a/svg/title - html_file: web/1117/borsele.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:18.936550+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -386,6 +336,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:18.937189+00:00' + removed_invalid_claims: + - removed_count: 5 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeente Borsele diff --git a/data/nde/enriched/entries/1118_Q70354640.yaml b/data/nde/enriched/entries/1118_Q70354640.yaml index 88edc8598e..d364872b3e 100644 --- a/data/nde/enriched/entries/1118_Q70354640.yaml +++ b/data/nde/enriched/entries/1118_Q70354640.yaml @@ -498,18 +498,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:19.000262+00:00' source_archive: web/1118/gemeentearchiefgoes.nl - claims_count: 8 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeentearchief Goes - source_url: https://www.gemeentearchiefgoes.nl/ - retrieved_on: '2025-11-29T18:16:13.019247+00:00' - xpath: /html/head/title - html_file: web/1118/gemeentearchiefgoes.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:18.999558+00:00' - claim_type: org_name claim_value: Gemeentearchief Goes raw_value: Gemeentearchief Goes @@ -530,16 +520,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:18.999595+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.gemeentearchiefgoes.nl/ - retrieved_on: '2025-11-29T18:16:13.019247+00:00' - xpath: /html/body/div[2]/div/footer/div[1]/div/div[1]/ul/li[1]/a/svg/title - html_file: web/1118/gemeentearchiefgoes.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:18.999613+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -580,6 +560,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:19.000132+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Goes diff --git a/data/nde/enriched/entries/1119_Q81181278.yaml b/data/nde/enriched/entries/1119_Q81181278.yaml index 8384013bdf..2f795002f7 100644 --- a/data/nde/enriched/entries/1119_Q81181278.yaml +++ b/data/nde/enriched/entries/1119_Q81181278.yaml @@ -537,18 +537,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:19.137289+00:00' source_archive: web/1119/gemeentehulst.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Archief - raw_value: Archief - gemeente Hulst - source_url: https://www.gemeentehulst.nl/de_Gemeente/Archief/ - retrieved_on: '2025-11-29T18:16:13.799455+00:00' - xpath: /html/head/title - html_file: web/1119/gemeentehulst.nl/mirror/www.gemeentehulst.nl/de_Gemeente/Archief/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:19.135755+00:00' - claim_type: social_twitter claim_value: http://www.twitter.com/gemeentehulst raw_value: http://www.twitter.com/gemeentehulst @@ -559,6 +549,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:19.137055+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Hulst diff --git a/data/nde/enriched/entries/1122_Q111363451.yaml b/data/nde/enriched/entries/1122_Q111363451.yaml index 738032e339..8dc1e70650 100644 --- a/data/nde/enriched/entries/1122_Q111363451.yaml +++ b/data/nde/enriched/entries/1122_Q111363451.yaml @@ -465,18 +465,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:19.651172+00:00' source_archive: web/1122/archieftholen.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Archief Tholen - source_url: https://archieftholen.nl/ - retrieved_on: '2025-11-29T18:16:14.825364+00:00' - xpath: /html/head/title - html_file: web/1122/archieftholen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:19.650709+00:00' - claim_type: description_short claim_value: Gemeentearchief Tholen, met toegang tot de beeldbank, voorouders (genealogie) en archieven. @@ -539,6 +529,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:19.651106+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Tholen diff --git a/data/nde/enriched/entries/1125_Q56459326.yaml b/data/nde/enriched/entries/1125_Q56459326.yaml index 80c85dbe1c..675e5d3550 100644 --- a/data/nde/enriched/entries/1125_Q56459326.yaml +++ b/data/nde/enriched/entries/1125_Q56459326.yaml @@ -626,18 +626,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:20.240274+00:00' source_archive: web/1125/hetwarenhuis.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.hetwarenhuis.nl - retrieved_on: '2025-11-29T18:16:18.283712+00:00' - xpath: /html/head/title - html_file: web/1125/hetwarenhuis.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:20.238327+00:00' - claim_type: phone claim_value: '+31115562885' raw_value: '+31115562885' @@ -718,6 +708,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:20.239883+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het Warenhuis - Museum Het Land van Axel diff --git a/data/nde/enriched/entries/1131_Q4288330.yaml b/data/nde/enriched/entries/1131_Q4288330.yaml index 3731c5f26b..83895916b6 100644 --- a/data/nde/enriched/entries/1131_Q4288330.yaml +++ b/data/nde/enriched/entries/1131_Q4288330.yaml @@ -702,18 +702,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:21.325109+00:00' source_archive: web/1131/museumveere.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Veere - source_url: http://www.museumveere.nl - retrieved_on: '2025-11-29T18:16:40.165784+00:00' - xpath: /html/head/title - html_file: web/1131/museumveere.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:21.323384+00:00' - claim_type: description_short claim_value: 'Welkom bij Museum Veere Ontdek 1.000 jaar geschiedenis Plan je bezoek Beleef 1.000 jaar geschiedenis van Veere Za en zo: 11.00 -17.00 uurMa t/m vrij: @@ -788,6 +778,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:21.324929+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Veere diff --git a/data/nde/enriched/entries/1132_Q56459493.yaml b/data/nde/enriched/entries/1132_Q56459493.yaml index 58891c2edb..9dd0fc371e 100644 --- a/data/nde/enriched/entries/1132_Q56459493.yaml +++ b/data/nde/enriched/entries/1132_Q56459493.yaml @@ -544,18 +544,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:21.488321+00:00' source_archive: web/1132/goemanszorg.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Goemanszorg - source_url: http://www.goemanszorg.nl - retrieved_on: '2025-11-29T18:16:43.505113+00:00' - xpath: /html/head/title - html_file: web/1132/goemanszorg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:21.485392+00:00' - claim_type: description_short claim_value: 'Dit zijn de vijf belangrijkste redenen om Museumboerderij Goemanszorg dit jaar te bezoeken:' @@ -628,6 +618,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:21.487986+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Goemanszorg diff --git a/data/nde/enriched/entries/1133_Q110282067.yaml b/data/nde/enriched/entries/1133_Q110282067.yaml index a29981a692..85a06914c8 100644 --- a/data/nde/enriched/entries/1133_Q110282067.yaml +++ b/data/nde/enriched/entries/1133_Q110282067.yaml @@ -514,18 +514,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:21.607101+00:00' source_archive: web/1133/museumhavenzeeland.nl - claims_count: 10 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museumhaven Zeeland - source_url: https://www.museumhavenzeeland.nl/ - retrieved_on: '2025-11-29T18:16:45.819978+00:00' - xpath: /html/head/title - html_file: web/1133/museumhavenzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:21.606226+00:00' - claim_type: org_name claim_value: Museumhaven Zeeland raw_value: Museumhaven Zeeland @@ -576,36 +566,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:21.606858+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fmuseumhavenzeeland.nl&t=Museumhaven%20Zeeland - raw_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fmuseumhavenzeeland.nl&t=Museumhaven%20Zeeland - source_url: https://www.museumhavenzeeland.nl/ - retrieved_on: '2025-11-29T18:16:45.819978+00:00' - xpath: /html/body/div[4]/ul/li[1]/a - html_file: web/1133/museumhavenzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:21.606911+00:00' - - claim_type: social_twitter - claim_value: http://twitter.com/share?text=Museumhaven%20Zeeland&url=https%3A%2F%2Fmuseumhavenzeeland.nl - raw_value: http://twitter.com/share?text=Museumhaven%20Zeeland&url=https%3A%2F%2Fmuseumhavenzeeland.nl - source_url: https://www.museumhavenzeeland.nl/ - retrieved_on: '2025-11-29T18:16:45.819978+00:00' - xpath: /html/body/div[4]/ul/li[2]/a - html_file: web/1133/museumhavenzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:21.606918+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fmuseumhavenzeeland.nl&title=Museumhaven%20Zeeland - raw_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fmuseumhavenzeeland.nl&title=Museumhaven%20Zeeland - source_url: https://www.museumhavenzeeland.nl/ - retrieved_on: '2025-11-29T18:16:45.819978+00:00' - xpath: /html/body/div[4]/ul/li[4]/a - html_file: web/1133/museumhavenzeeland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:21.606923+00:00' - claim_type: org_name claim_value: Alles wat je altijd al wilde weten over scheepsbouw raw_value: Alles wat je altijd al wilde weten over scheepsbouw @@ -616,6 +576,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:21.606984+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museumhaven Zeeland diff --git a/data/nde/enriched/entries/1134_Q23900557.yaml b/data/nde/enriched/entries/1134_Q23900557.yaml index b36c4aa8e1..37c14b3261 100644 --- a/data/nde/enriched/entries/1134_Q23900557.yaml +++ b/data/nde/enriched/entries/1134_Q23900557.yaml @@ -672,18 +672,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:22.054351+00:00' source_archive: web/1134/stadhuismuseum.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.stadhuismuseum.nl/ - retrieved_on: '2025-11-29T18:19:35.090803+00:00' - xpath: /html/head/title - html_file: web/1134/stadhuismuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:22.053304+00:00' - claim_type: description_short claim_value: Het Stadhuismuseum vertelt het verhaal van monumentenstad Zierikzee en het eiland Schouwen-Duiveland en dankt zijn naam aan het gebouw waarin het @@ -792,6 +782,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:22.054239+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadhuismuseum Zierikzee diff --git a/data/nde/enriched/entries/1136_unknown.yaml b/data/nde/enriched/entries/1136_unknown.yaml index 61fab202fd..17bfd09f1b 100644 --- a/data/nde/enriched/entries/1136_unknown.yaml +++ b/data/nde/enriched/entries/1136_unknown.yaml @@ -168,18 +168,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:22.251942+00:00' source_archive: web/1136/westkapellecultuurbehoud.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Stichting Cultuurbehoud Westkapelle - source_url: https://www.westkapellecultuurbehoud.nl/ - retrieved_on: '2025-11-29T18:16:59.890825+00:00' - xpath: /html/head/title - html_file: web/1136/westkapellecultuurbehoud.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:22.251198+00:00' - claim_type: email claim_value: info@westkapellecultuurbehoud.nl raw_value: info@westkapellecultuurbehoud.nl @@ -220,6 +210,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:22.251858+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Cultuurbehoud Westkapelle diff --git a/data/nde/enriched/entries/1137_Q56459493.yaml b/data/nde/enriched/entries/1137_Q56459493.yaml index 4ad001b8fb..25755d2b83 100644 --- a/data/nde/enriched/entries/1137_Q56459493.yaml +++ b/data/nde/enriched/entries/1137_Q56459493.yaml @@ -532,18 +532,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:22.403389+00:00' source_archive: web/1137/goemanszorg.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Goemanszorg - source_url: http://www.goemanszorg.nl - retrieved_on: '2025-11-29T18:17:03.809403+00:00' - xpath: /html/head/title - html_file: web/1137/goemanszorg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:22.401414+00:00' - claim_type: description_short claim_value: 'Dit zijn de vijf belangrijkste redenen om Museumboerderij Goemanszorg dit jaar te bezoeken:' @@ -616,6 +606,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:22.403064+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Goemanszorg diff --git a/data/nde/enriched/entries/1141_Q2153365.yaml b/data/nde/enriched/entries/1141_Q2153365.yaml index f92e15d2cd..5ae91eb1d2 100644 --- a/data/nde/enriched/entries/1141_Q2153365.yaml +++ b/data/nde/enriched/entries/1141_Q2153365.yaml @@ -1192,7 +1192,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:23.527492+00:00' source_archive: web/1141/zeeuwsmuseum.nl - claims_count: 9 + claims_count: 7 claims: - claim_type: org_name claim_value: Zeeuws Museum @@ -1236,28 +1236,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:48:23.527065+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://www.zeeuwsmuseum.nl/nl/zeeuws-museum-v4 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://www.zeeuwsmuseum.nl/nl/zeeuws-museum-v4 - source_url: http://www.zeeuwsmuseum.nl/ - retrieved_on: '2025-11-29T18:19:19.826583+00:00' - xpath: /html/body/div/div[2]/div[1]/div/div/div/div/div/div/ul/li[1]/a - html_file: web/1141/zeeuwsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:23.527251+00:00' - - claim_type: social_twitter - claim_value: 'https://twitter.com/intent/tweet?text=Zeeuws Museum - in Middelburg - - Ontdek Zeeland: https://www.zeeuwsmuseum.nl/nl/zeeuws-museum-v4' - raw_value: 'https://twitter.com/intent/tweet?text=Zeeuws Museum - in Middelburg - - Ontdek Zeeland: https://www.zeeuwsmuseum.nl/nl/zeeuws-museum-v4' - source_url: http://www.zeeuwsmuseum.nl/ - retrieved_on: '2025-11-29T18:19:19.826583+00:00' - xpath: /html/body/div/div[2]/div[1]/div/div/div/div/div/div/ul/li[2]/a - html_file: web/1141/zeeuwsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:23.527257+00:00' - claim_type: social_twitter claim_value: https://twitter.com/Zeeuwsmuseum raw_value: https://twitter.com/Zeeuwsmuseum @@ -1288,6 +1266,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:23.527326+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zeeuws Museum diff --git a/data/nde/enriched/entries/1144_unknown.yaml b/data/nde/enriched/entries/1144_unknown.yaml index 4bdd1d0b5d..fa53c610c9 100644 --- a/data/nde/enriched/entries/1144_unknown.yaml +++ b/data/nde/enriched/entries/1144_unknown.yaml @@ -165,18 +165,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:24.110861+00:00' source_archive: web/1144/erfgoeddelft.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Erfgoed Delft en omstreken - source_url: https://erfgoeddelft.nl/ - retrieved_on: '2025-11-29T18:19:37.407019+00:00' - xpath: /html/head/title - html_file: web/1144/erfgoeddelft.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:24.110509+00:00' - claim_type: description_short claim_value: 'Erfgoed Delft: Stadsarchief Delft, Monumenten en Archeologie Delft' raw_value: 'Erfgoed Delft: Stadsarchief Delft, Monumenten en Archeologie Delft' @@ -267,6 +257,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:24.110829+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Erfgoed Delft diff --git a/data/nde/enriched/entries/1146_Q26432.yaml b/data/nde/enriched/entries/1146_Q26432.yaml index 0fc1181f50..1cfd761d2c 100644 --- a/data/nde/enriched/entries/1146_Q26432.yaml +++ b/data/nde/enriched/entries/1146_Q26432.yaml @@ -2288,18 +2288,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:24.378674+00:00' source_archive: web/1146/zoetermeer.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Zoetermeer - source_url: https://www.zoetermeer.nl/ - retrieved_on: '2025-11-29T18:19:38.051586+00:00' - xpath: /html/head/title - html_file: web/1146/zoetermeer.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:24.377822+00:00' - claim_type: org_name claim_value: Loading... raw_value: Loading... @@ -2362,6 +2352,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:24.378445+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zoetermeer diff --git a/data/nde/enriched/entries/1158_Q2654416.yaml b/data/nde/enriched/entries/1158_Q2654416.yaml index c2bb914984..0cdc01d087 100644 --- a/data/nde/enriched/entries/1158_Q2654416.yaml +++ b/data/nde/enriched/entries/1158_Q2654416.yaml @@ -860,22 +860,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:27.273163+00:00' source_archive: web/1158/onderwijsmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Bezoek het Nationaal Onderwijsmuseum in Dordrecht en ontdek onze - uitgebreide collectie. Het museum toont de vele aspecten van het onderwijs en - de invloed ervan op de jeugdcultuur. - raw_value: Bezoek het Nationaal Onderwijsmuseum in Dordrecht en ontdek onze uitgebreide - collectie. Het museum toont de vele aspecten van het onderwijs en de invloed - ervan op de jeugdcultuur. | Onderwijsmuseum - source_url: http://www.onderwijsmuseum.nl - retrieved_on: '2025-11-29T18:21:57.764145+00:00' - xpath: /html/head/title - html_file: web/1158/onderwijsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:27.272474+00:00' - claim_type: email claim_value: info@onderwijsmuseum.nl raw_value: info@onderwijsmuseum.nl @@ -956,6 +942,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:27.273094+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Onderwijsmuseum diff --git a/data/nde/enriched/entries/1160_unknown.yaml b/data/nde/enriched/entries/1160_unknown.yaml index c1db91fb8a..5beae06aa0 100644 --- a/data/nde/enriched/entries/1160_unknown.yaml +++ b/data/nde/enriched/entries/1160_unknown.yaml @@ -162,7 +162,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:27.786344+00:00' source_archive: web/1160/kurtcarlsen.nl - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Stichting Kurt Carlsen @@ -184,16 +184,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:27.786244+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.kurtcarlsen.nl/cms/ - retrieved_on: '2025-11-29T18:25:00.786534+00:00' - xpath: /html/body/div/main/header/h1 - html_file: web/1160/kurtcarlsen.nl/mirror/www.kurtcarlsen.nl/cms/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:48:27.786276+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Kurt Carlsen diff --git a/data/nde/enriched/entries/1162_Q2632714.yaml b/data/nde/enriched/entries/1162_Q2632714.yaml index bbe67aaadd..16d650f0b1 100644 --- a/data/nde/enriched/entries/1162_Q2632714.yaml +++ b/data/nde/enriched/entries/1162_Q2632714.yaml @@ -467,18 +467,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:28.251946+00:00' source_archive: web/1162/rovm.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - ROVM - source_url: https://www.rovm.nl/ - retrieved_on: '2025-11-30T00:04:25.135223+00:00' - xpath: /html/head/title - html_file: web/1162/rovm.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:28.250957+00:00' - claim_type: email claim_value: info@stichtingromeo.nl raw_value: info@stichtingromeo.nl @@ -549,6 +539,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:28.251864+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rotterdams Openbaar Vervoer Museum diff --git a/data/nde/enriched/entries/1165_Q1821169.yaml b/data/nde/enriched/entries/1165_Q1821169.yaml index 137abb5f8c..eed674d33e 100644 --- a/data/nde/enriched/entries/1165_Q1821169.yaml +++ b/data/nde/enriched/entries/1165_Q1821169.yaml @@ -906,7 +906,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:28.887864+00:00' source_archive: web/1165/literatuurmuseum.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Literatuurmuseum / Kinderboekenmuseum @@ -940,16 +940,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:48:28.887283+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.literatuurmuseum.nl - retrieved_on: '2025-11-29T18:22:30.613269+00:00' - xpath: /html/body/div[1]/h1 - html_file: web/1165/literatuurmuseum.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:48:28.887745+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Literatuurmuseum diff --git a/data/nde/enriched/entries/1166_Q2041110.yaml b/data/nde/enriched/entries/1166_Q2041110.yaml index 491f212dbe..84ee8549d0 100644 --- a/data/nde/enriched/entries/1166_Q2041110.yaml +++ b/data/nde/enriched/entries/1166_Q2041110.yaml @@ -718,18 +718,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:28.994322+00:00' source_archive: web/1166/rekenkamer.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Algemene Rekenkamer - source_url: https://www.rekenkamer.nl/ - retrieved_on: '2025-11-29T18:22:31.555565+00:00' - xpath: /html/head/title - html_file: web/1166/rekenkamer.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:28.993743+00:00' - claim_type: description_short claim_value: De Algemene Rekenkamer onderzoekt of de rijksoverheid goed omgaat met publiek geld. Wij onderzoeken en controleren of het geld zinnig en zuinig @@ -784,6 +774,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:28.994194+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Algemene Rekenkamer (AR) diff --git a/data/nde/enriched/entries/1176_Q2036123.yaml b/data/nde/enriched/entries/1176_Q2036123.yaml index 6d5401488b..9ff2195ae8 100644 --- a/data/nde/enriched/entries/1176_Q2036123.yaml +++ b/data/nde/enriched/entries/1176_Q2036123.yaml @@ -854,18 +854,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:31.401061+00:00' source_archive: web/1176/vrijmetselarijmuseum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.vrijmetselarijmuseum.nl/ - retrieved_on: '2025-11-29T18:24:24.772760+00:00' - xpath: /html/head/title - html_file: web/1176/vrijmetselarijmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:31.400377+00:00' - claim_type: description_short claim_value: 'De verzamelingen van de Orde van Vrijmetselaren: een bibliotheek, een archief en een museum' @@ -898,6 +888,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:48:31.400892+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Vrijmetselarij Museum diff --git a/data/nde/enriched/entries/1180_Q2041110.yaml b/data/nde/enriched/entries/1180_Q2041110.yaml index 4714f09fe9..8fa040f05f 100644 --- a/data/nde/enriched/entries/1180_Q2041110.yaml +++ b/data/nde/enriched/entries/1180_Q2041110.yaml @@ -699,18 +699,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:32.441975+00:00' source_archive: web/1180/rekenkamer.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Algemene Rekenkamer - source_url: https://www.rekenkamer.nl/ - retrieved_on: '2025-11-29T18:25:01.493188+00:00' - xpath: /html/head/title - html_file: web/1180/rekenkamer.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:32.441451+00:00' - claim_type: description_short claim_value: De Algemene Rekenkamer onderzoekt of de rijksoverheid goed omgaat met publiek geld. Wij onderzoeken en controleren of het geld zinnig en zuinig @@ -765,6 +755,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:32.441847+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Algemene Rekenkamer diff --git a/data/nde/enriched/entries/1181_Q59486.yaml b/data/nde/enriched/entries/1181_Q59486.yaml index 89ac07375d..57d59a22b8 100644 --- a/data/nde/enriched/entries/1181_Q59486.yaml +++ b/data/nde/enriched/entries/1181_Q59486.yaml @@ -1140,18 +1140,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:32.587444+00:00' source_archive: web/1181/eerstekamer.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Eerste Kamer der Staten-Generaal - source_url: https://www.eerstekamer.nl/ - retrieved_on: '2025-11-29T18:25:01.897266+00:00' - xpath: /html/head/title - html_file: web/1181/eerstekamer.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:32.586648+00:00' - claim_type: org_name claim_value: Logo Eerste Kamer der Staten-Generaal, woordmerk raw_value: Logo Eerste Kamer der Staten-Generaal, woordmerk @@ -1212,6 +1202,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:32.587344+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Eerste Kamer der Staten-Generaal diff --git a/data/nde/enriched/entries/1182_Q3141841.yaml b/data/nde/enriched/entries/1182_Q3141841.yaml index d1e1c28551..b0a2c6eeef 100644 --- a/data/nde/enriched/entries/1182_Q3141841.yaml +++ b/data/nde/enriched/entries/1182_Q3141841.yaml @@ -447,18 +447,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:32.664022+00:00' source_archive: web/1182/lintjes.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Koninklijke onderscheidingen - source_url: http://www.lintjes.nl/ - retrieved_on: '2025-11-30T00:04:25.686286+00:00' - xpath: /html/head/title - html_file: web/1182/lintjes.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:32.663361+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/kanselarijdernederlandseorden/ raw_value: https://www.instagram.com/kanselarijdernederlandseorden/ @@ -489,6 +479,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:32.663909+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Kanselarij der Nederlandse Orden diff --git a/data/nde/enriched/entries/1184_Q1155243.yaml b/data/nde/enriched/entries/1184_Q1155243.yaml index 65cd5668cc..3bf8f8ab16 100644 --- a/data/nde/enriched/entries/1184_Q1155243.yaml +++ b/data/nde/enriched/entries/1184_Q1155243.yaml @@ -911,18 +911,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:33.281463+00:00' source_archive: web/1184/raadvanstate.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Raad van State - source_url: https://www.raadvanstate.nl/ - retrieved_on: '2025-11-29T18:28:07.868826+00:00' - xpath: /html/head/title - html_file: web/1184/raadvanstate.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:33.280763+00:00' - claim_type: description_short claim_value: De Raad van State is onafhankelijk adviseur van regering en parlement over wetgeving en bestuur en hoogste algemene bestuursrechter van het land. @@ -967,6 +957,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:33.281300+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Raad van State diff --git a/data/nde/enriched/entries/1188_Q2874177.yaml b/data/nde/enriched/entries/1188_Q2874177.yaml index d436706dce..2d5f375613 100644 --- a/data/nde/enriched/entries/1188_Q2874177.yaml +++ b/data/nde/enriched/entries/1188_Q2874177.yaml @@ -1222,7 +1222,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:34.477717+00:00' source_archive: web/1188/dordrechtsmuseum.nl - claims_count: 13 + claims_count: 6 claims: - claim_type: org_name claim_value: Dordrechts Museum @@ -1234,76 +1234,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:34.476771+00:00' - - claim_type: org_name - claim_value: Eye - raw_value: Eye - source_url: http://www.dordrechtsmuseum.nl/ - retrieved_on: '2025-11-29T18:27:45.657738+00:00' - xpath: /html/body/nav/div/div[1]/div[2]/a[2]/svg/title - html_file: web/1188/dordrechtsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.476788+00:00' - - claim_type: org_name - claim_value: Chevron left - raw_value: Chevron left - source_url: http://www.dordrechtsmuseum.nl/ - retrieved_on: '2025-11-29T18:27:45.657738+00:00' - xpath: /html/body/nav/div/div[2]/div[2]/div/button/svg/title - html_file: web/1188/dordrechtsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.476792+00:00' - - claim_type: org_name - claim_value: Close - raw_value: Close - source_url: http://www.dordrechtsmuseum.nl/ - retrieved_on: '2025-11-29T18:27:45.657738+00:00' - xpath: /html/body/nav/div/div[2]/div[2]/div/div[2]/button/svg/title - html_file: web/1188/dordrechtsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.476797+00:00' - - claim_type: org_name - claim_value: Opent in externe pagina - raw_value: Opent in externe pagina - source_url: http://www.dordrechtsmuseum.nl/ - retrieved_on: '2025-11-29T18:27:45.657738+00:00' - xpath: /html/body/nav/div/div[2]/div[3]/div/div[2]/ul[2]/li/a/svg/title - html_file: web/1188/dordrechtsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.476805+00:00' - - claim_type: org_name - claim_value: Arrow left - raw_value: Arrow left - source_url: http://www.dordrechtsmuseum.nl/ - retrieved_on: '2025-11-29T18:27:45.657738+00:00' - xpath: /html/body/main/div[2]/div[1]/div/div/div/button[1]/svg/title - html_file: web/1188/dordrechtsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.476834+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: http://www.dordrechtsmuseum.nl/ - retrieved_on: '2025-11-29T18:27:45.657738+00:00' - xpath: /html/body/footer/div/div[4]/ul[1]/li[1]/a/svg/title - html_file: web/1188/dordrechtsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.476838+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: http://www.dordrechtsmuseum.nl/ - retrieved_on: '2025-11-29T18:27:45.657738+00:00' - xpath: /html/body/footer/div/div[4]/ul[1]/li[2]/a/svg/title - html_file: web/1188/dordrechtsmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.476842+00:00' - claim_type: org_name claim_value: Tiktok raw_value: Tiktok @@ -1356,6 +1286,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:34.477522+00:00' + removed_invalid_claims: + - removed_count: 7 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Dordrechts Museum diff --git a/data/nde/enriched/entries/1191_Q135734962.yaml b/data/nde/enriched/entries/1191_Q135734962.yaml index ea3681b75b..3e8ba218a6 100644 --- a/data/nde/enriched/entries/1191_Q135734962.yaml +++ b/data/nde/enriched/entries/1191_Q135734962.yaml @@ -284,18 +284,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:34.921916+00:00' source_archive: web/1191/fmhaaglanden.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | FMHaaglanden - source_url: https://www.fmhaaglanden.nl/ - retrieved_on: '2025-11-29T18:26:39.294546+00:00' - xpath: /html/head/title - html_file: web/1191/fmhaaglanden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.921288+00:00' - claim_type: description_short claim_value: FMHaaglanden is onderdeel van het ministerie van Binnenlandse Zaken en Koninkrijksrelaties. Wij leveren facilitaire producten en diensten binnen @@ -312,6 +302,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:48:34.921353+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: FMHaaglanden diff --git a/data/nde/enriched/entries/1192_Q111190988.yaml b/data/nde/enriched/entries/1192_Q111190988.yaml index 2944c89fd8..15f3331a56 100644 --- a/data/nde/enriched/entries/1192_Q111190988.yaml +++ b/data/nde/enriched/entries/1192_Q111190988.yaml @@ -407,18 +407,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:34.961645+00:00' source_archive: web/1192/gemeentearchief.alphenaandenrijn.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://gemeentearchief.alphenaandenrijn.nl/ - retrieved_on: '2025-11-29T18:26:39.531336+00:00' - xpath: /html/head/title - html_file: web/1192/gemeentearchief.alphenaandenrijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:34.961328+00:00' - claim_type: email claim_value: gemeentearchief@alphenaandenrijn.nl raw_value: gemeentearchief@alphenaandenrijn.nl @@ -449,6 +439,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:34.961598+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Alphen aan den Rijn diff --git a/data/nde/enriched/entries/1193_unknown.yaml b/data/nde/enriched/entries/1193_unknown.yaml index f88401c16e..0017b4ca26 100644 --- a/data/nde/enriched/entries/1193_unknown.yaml +++ b/data/nde/enriched/entries/1193_unknown.yaml @@ -318,18 +318,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:35.188939+00:00' source_archive: web/1193/lv.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Leidschendam-Voorburg - source_url: https://www.lv.nl/gemeentearchief - retrieved_on: '2025-11-29T18:26:40.328395+00:00' - xpath: /html/head/title - html_file: web/1193/lv.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:35.187839+00:00' - claim_type: org_name claim_value: default icon raw_value: default icon @@ -422,6 +412,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:35.188670+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Gemeentearchief Leidschendam-Voorburg diff --git a/data/nde/enriched/entries/1197_Q3229492.yaml b/data/nde/enriched/entries/1197_Q3229492.yaml index e0e0ee4613..1f985b680a 100644 --- a/data/nde/enriched/entries/1197_Q3229492.yaml +++ b/data/nde/enriched/entries/1197_Q3229492.yaml @@ -634,18 +634,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:33:36.409557+00:00' source_archive: web/1197/haagsgemeentearchief.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Haags Gemeentearchief - source_url: https://haagsgemeentearchief.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1197/haagsgemeentearchief.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:33:36.406984+00:00' - claim_type: description_short claim_value: Het Haags Gemeentearchief is de schatkist van de Haagse geschiedenis. Uw onderzoek begint hier. @@ -714,3 +704,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:33:36.407642+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1199_Q1857094.yaml b/data/nde/enriched/entries/1199_Q1857094.yaml index 8e180a0650..f58852ba8a 100644 --- a/data/nde/enriched/entries/1199_Q1857094.yaml +++ b/data/nde/enriched/entries/1199_Q1857094.yaml @@ -498,18 +498,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:36.280559+00:00' source_archive: web/1199/kabinetvandekoning.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Kabinet van de Koning - source_url: https://www.kabinetvandekoning.nl/ - retrieved_on: '2025-11-29T18:27:49.575347+00:00' - xpath: /html/head/title - html_file: web/1199/kabinetvandekoning.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:36.279969+00:00' - claim_type: description_short claim_value: Het Kabinet van de Koning is een kleine rijksorganisatie die de Koning ondersteunt in de uitoefening van zijn constitutionele werkzaamheden. @@ -522,6 +512,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:48:36.280027+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Het kabinet van de Koning diff --git a/data/nde/enriched/entries/1202_Q124386169.yaml b/data/nde/enriched/entries/1202_Q124386169.yaml index a4533d3356..14ed3145e3 100644 --- a/data/nde/enriched/entries/1202_Q124386169.yaml +++ b/data/nde/enriched/entries/1202_Q124386169.yaml @@ -293,7 +293,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:37.224065+00:00' source_archive: web/1202/hgwaddinxveen.nl - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Historisch Genootschap Waddinxveen @@ -315,16 +315,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:48:37.223634+00:00' - - claim_type: org_name - claim_value: menu - raw_value: menu - source_url: https://hgwaddinxveen.nl/ - retrieved_on: '2025-11-30T00:06:25.617586+00:00' - xpath: /html/body/div/div/div/section/section[1]/nav/div/h1 - html_file: web/1202/hgwaddinxveen.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:48:37.223920+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Genootschap Waddinxveen diff --git a/data/nde/enriched/entries/1203_Q15224245.yaml b/data/nde/enriched/entries/1203_Q15224245.yaml index 2f7aef235e..ea31e06056 100644 --- a/data/nde/enriched/entries/1203_Q15224245.yaml +++ b/data/nde/enriched/entries/1203_Q15224245.yaml @@ -689,18 +689,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:37.360306+00:00' source_archive: web/1203/historischmuseumdenbriel.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Historisch Museum Den Briel - source_url: http://www.historischmuseumdenbriel.nl - retrieved_on: '2025-11-29T18:27:52.942444+00:00' - xpath: /html/head/title - html_file: web/1203/historischmuseumdenbriel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:37.358861+00:00' - claim_type: description_short claim_value: Plan je bezoek Beleef 80 jaar oorlog Vorige slide Volgende slide Wat is er te zien en te doen? Abonneren op de nieuwsbrief? Bezoek onze locaties @@ -783,6 +773,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:37.360114+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historisch Museum Den Briel diff --git a/data/nde/enriched/entries/1210_invalid_id.yaml b/data/nde/enriched/entries/1210_invalid_id.yaml index 2226c6bfc2..d31d610e54 100644 --- a/data/nde/enriched/entries/1210_invalid_id.yaml +++ b/data/nde/enriched/entries/1210_invalid_id.yaml @@ -188,18 +188,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:39.957850+00:00' source_archive: web/1210/proosdijlanden.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://proosdijlanden.nl/ - retrieved_on: '2025-11-29T18:29:00.059381+00:00' - xpath: /html/head/title - html_file: web/1210/proosdijlanden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:39.957204+00:00' - claim_type: description_short claim_value: website van de Historische Vereniging De Proosdijlanden, werkgebied Mijdrecht, Wilnis, Vinkeveen, Abcoude @@ -252,6 +242,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:39.957683+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Historische Vereniging De Proosdijlanden diff --git a/data/nde/enriched/entries/1212_Q2584045.yaml b/data/nde/enriched/entries/1212_Q2584045.yaml index 8ecc9e1988..d2d6158b34 100644 --- a/data/nde/enriched/entries/1212_Q2584045.yaml +++ b/data/nde/enriched/entries/1212_Q2584045.yaml @@ -511,18 +511,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:40.024965+00:00' source_archive: web/1212/hogeraadvanadel.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Hoge Raad van Adel - source_url: https://www.hogeraadvanadel.nl/ - retrieved_on: '2025-11-29T18:28:51.397816+00:00' - xpath: /html/head/title - html_file: web/1212/hogeraadvanadel.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:40.024594+00:00' - claim_type: description_short claim_value: De Hoge Raad van Adel is ingesteld in 1814 en is het vaste adviescollege van de regering op het gebied van adeldom en heraldiek. @@ -535,6 +525,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:48:40.024656+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Hoge Raad van Adel (HRvA) diff --git a/data/nde/enriched/entries/1213_Q3046110.yaml b/data/nde/enriched/entries/1213_Q3046110.yaml index f8e9b0ce62..70c2416241 100644 --- a/data/nde/enriched/entries/1213_Q3046110.yaml +++ b/data/nde/enriched/entries/1213_Q3046110.yaml @@ -1598,18 +1598,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:40.545374+00:00' source_archive: web/1213/hhdelfland.nl - claims_count: 9 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Delfland - source_url: http://www.hhdelfland.nl - retrieved_on: '2025-11-29T19:22:30.457277+00:00' - xpath: /html/head/title - html_file: web/1213/hhdelfland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:40.544086+00:00' - claim_type: description_short claim_value: Bij het Hoogheemraadschap van Delfland nemen we de zorg voor het water op ons. Zodat jij samen met alle inwoners van ons gebied fijn kunt wonen, @@ -1634,26 +1624,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:48:40.544661+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.hhdelfland.nl%2f&t=Bij%20het%20Hoogheemraadschap%20van%20Delfland%20nemen%20we%20de%20zorg%20voor%20het%20water%20op%20ons.%20Zodat%20jij%20samen%20met%20alle%20inwoners%20van%20ons%20gebied%20fijn%20kunt%c2%a0wonen%2c%20werken%20%c3%a9n%20ontspannen%20in%20deze%20prachtige%20waterrijke%20omgeving. - raw_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.hhdelfland.nl%2f&t=Bij%20het%20Hoogheemraadschap%20van%20Delfland%20nemen%20we%20de%20zorg%20voor%20het%20water%20op%20ons.%20Zodat%20jij%20samen%20met%20alle%20inwoners%20van%20ons%20gebied%20fijn%20kunt%c2%a0wonen%2c%20werken%20%c3%a9n%20ontspannen%20in%20deze%20prachtige%20waterrijke%20omgeving. - source_url: http://www.hhdelfland.nl - retrieved_on: '2025-11-29T19:22:30.457277+00:00' - xpath: /html/body/div/footer/div/div[1]/div[1]/div/div/div/div[2]/ul/li[1]/span/a - html_file: web/1213/hhdelfland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:40.545044+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.hhdelfland.nl%2f&title=Home&ro=false&summary=Bij%20het%20Hoogheemraadschap%20van%20Delfland%20nemen%20we%20de%20zorg%20voor%20het%20water%20op%20ons.%20Zodat%20jij%20samen%20met%20alle%20inwoners%20van%20ons%20gebied%20fijn%20kunt%c2%a0wonen%2c%20werken%20%c3%a9n%20ontspannen%20in%20deze%20prachtige%20waterrijke%20omgeving. - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.hhdelfland.nl%2f&title=Home&ro=false&summary=Bij%20het%20Hoogheemraadschap%20van%20Delfland%20nemen%20we%20de%20zorg%20voor%20het%20water%20op%20ons.%20Zodat%20jij%20samen%20met%20alle%20inwoners%20van%20ons%20gebied%20fijn%20kunt%c2%a0wonen%2c%20werken%20%c3%a9n%20ontspannen%20in%20deze%20prachtige%20waterrijke%20omgeving. - source_url: http://www.hhdelfland.nl - retrieved_on: '2025-11-29T19:22:30.457277+00:00' - xpath: /html/body/div/footer/div/div[1]/div[1]/div/div/div/div[2]/ul/li[3]/span/a - html_file: web/1213/hhdelfland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:40.545054+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/hhdelfland raw_value: https://www.facebook.com/hhdelfland @@ -1694,6 +1664,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:40.545088+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Hoogheemraadschap van Delfland diff --git a/data/nde/enriched/entries/1214_Q2619632.yaml b/data/nde/enriched/entries/1214_Q2619632.yaml index 29d9c2a82e..a3aa5363b1 100644 --- a/data/nde/enriched/entries/1214_Q2619632.yaml +++ b/data/nde/enriched/entries/1214_Q2619632.yaml @@ -1703,7 +1703,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:41.022691+00:00' source_archive: web/1214/rijnland.net - claims_count: 11 + claims_count: 10 claims: - claim_type: org_name claim_value: Hoogheemraadschap van Rijnland @@ -1808,16 +1808,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:41.022180+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.rijnland.net - retrieved_on: '2025-11-29T19:04:16.234343+00:00' - xpath: /html/body/main/div/div[2]/h1 - html_file: web/1214/rijnland.net/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:48:41.022271+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Hoogheemraadschap van Rijnland diff --git a/data/nde/enriched/entries/1215_Q2304570.yaml b/data/nde/enriched/entries/1215_Q2304570.yaml index fc6eb1b07f..392a73dff5 100644 --- a/data/nde/enriched/entries/1215_Q2304570.yaml +++ b/data/nde/enriched/entries/1215_Q2304570.yaml @@ -1104,19 +1104,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:41.487259+00:00' source_archive: web/1215/schielandendekrimpenerwaard.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: "Home\n \n \n\n \n \n\n | Hoogheemraadschap\ - \ van Schieland en de Krimpenerwaard" - source_url: https://www.schielandendekrimpenerwaard.nl - retrieved_on: '2025-11-29T19:22:58.080764+00:00' - xpath: /html/head/title - html_file: web/1215/schielandendekrimpenerwaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:41.484151+00:00' - claim_type: email claim_value: info@hhsk.nl raw_value: info@hhsk.nl @@ -1177,6 +1166,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:41.486778+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Hoogheemraadschap van Schieland en de Krimpenerwaard diff --git a/data/nde/enriched/entries/1217_Q2425770.yaml b/data/nde/enriched/entries/1217_Q2425770.yaml index 07d5750690..e37bfbb07d 100644 --- a/data/nde/enriched/entries/1217_Q2425770.yaml +++ b/data/nde/enriched/entries/1217_Q2425770.yaml @@ -875,7 +875,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:42.045781+00:00' source_archive: web/1217/huisvangijn.nl - claims_count: 13 + claims_count: 6 claims: - claim_type: org_name claim_value: Huis Van Gijn @@ -897,76 +897,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:42.044909+00:00' - - claim_type: org_name - claim_value: Eye - raw_value: Eye - source_url: https://www.huisvangijn.nl/ - retrieved_on: '2025-11-29T19:04:49.108789+00:00' - xpath: /html/body/nav/div/div[1]/div[2]/a[2]/svg/title - html_file: web/1217/huisvangijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.044914+00:00' - - claim_type: org_name - claim_value: Chevron left - raw_value: Chevron left - source_url: https://www.huisvangijn.nl/ - retrieved_on: '2025-11-29T19:04:49.108789+00:00' - xpath: /html/body/nav/div/div[2]/div[2]/div/button/svg/title - html_file: web/1217/huisvangijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.044918+00:00' - - claim_type: org_name - claim_value: Close - raw_value: Close - source_url: https://www.huisvangijn.nl/ - retrieved_on: '2025-11-29T19:04:49.108789+00:00' - xpath: /html/body/nav/div/div[2]/div[2]/div/div[2]/button/svg/title - html_file: web/1217/huisvangijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.044923+00:00' - - claim_type: org_name - claim_value: Opent in externe pagina - raw_value: Opent in externe pagina - source_url: https://www.huisvangijn.nl/ - retrieved_on: '2025-11-29T19:04:49.108789+00:00' - xpath: /html/body/nav/div/div[2]/div[3]/div/div[2]/ul[3]/li/a/svg/title - html_file: web/1217/huisvangijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.044932+00:00' - - claim_type: org_name - claim_value: Arrow left - raw_value: Arrow left - source_url: https://www.huisvangijn.nl/ - retrieved_on: '2025-11-29T19:04:49.108789+00:00' - xpath: /html/body/main/div[3]/div[1]/div/div/div/button[1]/svg/title - html_file: web/1217/huisvangijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.044960+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.huisvangijn.nl/ - retrieved_on: '2025-11-29T19:04:49.108789+00:00' - xpath: /html/body/footer/div/div[4]/ul[1]/li[1]/a/svg/title - html_file: web/1217/huisvangijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.044964+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.huisvangijn.nl/ - retrieved_on: '2025-11-29T19:04:49.108789+00:00' - xpath: /html/body/footer/div/div[4]/ul[1]/li[2]/a/svg/title - html_file: web/1217/huisvangijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.044967+00:00' - claim_type: description_short claim_value: Ontdek in Huis Van Gijn hoe een welgesteld echtpaar en hun personeel rond 1900 woonden en werkten. @@ -1009,6 +939,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:42.045662+00:00' + removed_invalid_claims: + - removed_count: 7 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Huis Van Gijn diff --git a/data/nde/enriched/entries/1221_Q1930398.yaml b/data/nde/enriched/entries/1221_Q1930398.yaml index 711e2c8865..4974328204 100644 --- a/data/nde/enriched/entries/1221_Q1930398.yaml +++ b/data/nde/enriched/entries/1221_Q1930398.yaml @@ -449,18 +449,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:42.457232+00:00' source_archive: web/1221/inspectie-oe.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Inspectie Overheidsinformatie en Erfgoed - source_url: https://www.inspectie-oe.nl/ - retrieved_on: '2025-11-29T19:04:20.128050+00:00' - xpath: /html/head/title - html_file: web/1221/inspectie-oe.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:42.456796+00:00' - claim_type: description_short claim_value: De Inspectie Overheidsinformatie en Erfgoed houdt toezicht op Nederlands erfgoed en de overheidsinformatie bij de centrale overheid. Zoals op de schilderijen @@ -479,6 +469,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:48:42.456875+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Inspectie Overheidsinformatie en Erfgoed diff --git a/data/nde/enriched/entries/1233_Q2653692.yaml b/data/nde/enriched/entries/1233_Q2653692.yaml index 7996de7132..0786b4bea1 100644 --- a/data/nde/enriched/entries/1233_Q2653692.yaml +++ b/data/nde/enriched/entries/1233_Q2653692.yaml @@ -641,22 +641,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:44.391727+00:00' source_archive: web/1233/couperusmuseum.org - claims_count: 12 + claims_count: 9 claims: - - claim_type: org_name - claim_value: k9win wallet เว็บเดิมพัน:k9win wallet เว็บเดิมพัน แหล่งรวมเกมคาสิโนออนไลน์ยอดนิยมจากค่ายดังทั่วโลก - ไม่ว่าจะเป็น บาคาร่า สล็อต รูเล็ต และเกมอื่นๆ อีกมากมาย เล่นง่ายได้เงินจริง - พร้อมระบบที่เสถียรแล - raw_value: k9win wallet เว็บเดิมพัน:k9win wallet เว็บเดิมพัน แหล่งรวมเกมคาสิโนออนไลน์ยอดนิยมจากค่ายดังทั่วโลก - ไม่ว่าจะเป็น บาคาร่า สล็อต รูเล็ต และเกมอื่นๆ อีกมากมาย เล่นง่ายได้เงินจริง - พร้อมระบบที่เสถียรแล - source_url: https://www.couperusmuseum.org/ - retrieved_on: '2025-11-29T19:22:43.257116+00:00' - xpath: /html/head/title - html_file: web/1233/couperusmuseum.org/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:44.391161+00:00' - claim_type: description_short claim_value: k9win wallet เว็บเดิมพัน แหล่งรวมเกมคาสิโนออนไลน์ยอดนิยมจากค่ายดังทั่วโลก ไม่ว่าจะเป็น บาคาร่า สล็อต รูเล็ต และเกมอื่นๆ อีกมากมาย เล่นง่ายได้เงินจริง @@ -689,16 +675,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:48:44.391333+00:00' - - claim_type: org_name - claim_value: k9win wallet เว็บเดิมพัน - raw_value: k9win wallet เว็บเดิมพัน - source_url: https://www.couperusmuseum.org/ - retrieved_on: '2025-11-29T19:22:43.257116+00:00' - xpath: /html/head/meta[22] - html_file: web/1233/couperusmuseum.org/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:48:44.391389+00:00' - claim_type: org_name claim_value: PP9 raw_value: PP9 @@ -773,16 +749,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: schema_org_sameAs extraction_timestamp: '2025-12-01T10:48:44.391508+00:00' - - claim_type: org_name - claim_value: หมูเด้ง888 เว็บคาสิโนออนไลน์ ครบทุกค่าย แตกง่ายทุกเกม - raw_value: หมูเด้ง888 เว็บคาสิโนออนไลน์ ครบทุกค่าย แตกง่ายทุกเกม - source_url: https://www.couperusmuseum.org/ - retrieved_on: '2025-11-29T19:22:43.257116+00:00' - xpath: /html/body/main/div/article/strong/h1 - html_file: web/1233/couperusmuseum.org/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:48:44.391678+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Louis Couperus Museum diff --git a/data/nde/enriched/entries/1249_Q679527.yaml b/data/nde/enriched/entries/1249_Q679527.yaml index 458d44e019..9d370586c9 100644 --- a/data/nde/enriched/entries/1249_Q679527.yaml +++ b/data/nde/enriched/entries/1249_Q679527.yaml @@ -2926,78 +2926,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:46.612793+00:00' source_archive: web/1249/boijmans.nl - claims_count: 17 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Boijmans Van Beuningen - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/head/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611161+00:00' - - claim_type: org_name - claim_value: Previous - raw_value: Previous - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/body/svg/defs/symbol[20]/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611173+00:00' - - claim_type: org_name - claim_value: Next - raw_value: Next - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/body/svg/defs/symbol[21]/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611177+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/body/svg/defs/symbol[24]/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611181+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/body/svg/defs/symbol[25]/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611184+00:00' - - claim_type: org_name - claim_value: Twitter - raw_value: Twitter - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/body/svg/defs/symbol[26]/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611187+00:00' - - claim_type: org_name - claim_value: Pinterest - raw_value: Pinterest - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/body/svg/defs/symbol[27]/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611190+00:00' - claim_type: org_name claim_value: Tiktok raw_value: Tiktok @@ -3028,16 +2958,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:46.611200+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://www.boijmans.nl/ - retrieved_on: '2025-11-29T19:23:00.777609+00:00' - xpath: /html/body/div[3]/div[1]/div[2]/div[2]/div/form/button/svg/title - html_file: web/1249/boijmans.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:46.611204+00:00' - claim_type: description_short claim_value: Museum Boijmans Van Beuningen gaat vernieuwen. De collectie blijft zichtbaar bij (inter)nationale collega-instellingen en vanaf 2021 in Depot Boijmans @@ -3102,6 +3022,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:46.612309+00:00' + removed_invalid_claims: + - removed_count: 8 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Boijmans Van Beuningen diff --git a/data/nde/enriched/entries/1250_Q2216754.yaml b/data/nde/enriched/entries/1250_Q2216754.yaml index 6e84ec324a..babe0e5e83 100644 --- a/data/nde/enriched/entries/1250_Q2216754.yaml +++ b/data/nde/enriched/entries/1250_Q2216754.yaml @@ -960,19 +960,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:47.091804+00:00' source_archive: web/1250/museumbredius.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Bredius | Beleef o.m. Rembrandt en Jan Steen bij Museum - Bredius in Den Haag - source_url: https://museumbredius.nl/ - retrieved_on: '2025-11-29T19:24:06.107463+00:00' - xpath: /html/head/title - html_file: web/1250/museumbredius.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:47.091322+00:00' - claim_type: org_name claim_value: Museum Bredius raw_value: Museum Bredius @@ -1013,6 +1002,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:47.091708+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Bredius diff --git a/data/nde/enriched/entries/1256_Q4360916.yaml b/data/nde/enriched/entries/1256_Q4360916.yaml index 1a5cbc09b3..10ad2cfce5 100644 --- a/data/nde/enriched/entries/1256_Q4360916.yaml +++ b/data/nde/enriched/entries/1256_Q4360916.yaml @@ -884,18 +884,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:48.875867+00:00' source_archive: web/1256/museumgouda.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Gouda - source_url: http://www.museumgouda.nl/ - retrieved_on: '2025-11-29T19:24:11.675609+00:00' - xpath: /html/head/title - html_file: web/1256/museumgouda.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:48.875164+00:00' - claim_type: org_name claim_value: Museum Gouda raw_value: Museum Gouda @@ -976,6 +966,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:48.875765+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Museum Gouda diff --git a/data/nde/enriched/entries/1259_Q2754878.yaml b/data/nde/enriched/entries/1259_Q2754878.yaml index 7eddfa6e33..34ef9dab4a 100644 --- a/data/nde/enriched/entries/1259_Q2754878.yaml +++ b/data/nde/enriched/entries/1259_Q2754878.yaml @@ -1728,7 +1728,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:49.270305+00:00' source_archive: web/1259/meermanno.nl - claims_count: 12 + claims_count: 11 claims: - claim_type: org_name claim_value: Huis van het boek @@ -1842,16 +1842,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:49.270156+00:00' - - claim_type: org_name - claim_value: '''Mythes en mogelijkheden''' - raw_value: '''Mythes en mogelijkheden''' - source_url: http://www.meermanno.nl/ - retrieved_on: '2025-11-29T19:24:07.165483+00:00' - xpath: /html/body/div/main/section[3]/div/div/div[2]/div/h1 - html_file: web/1259/meermanno.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:48:49.270225+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Huis van het Boek diff --git a/data/nde/enriched/entries/1265_Q4011822.yaml b/data/nde/enriched/entries/1265_Q4011822.yaml index e6e7aaa72f..2c0d437b26 100644 --- a/data/nde/enriched/entries/1265_Q4011822.yaml +++ b/data/nde/enriched/entries/1265_Q4011822.yaml @@ -522,7 +522,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:50.430114+00:00' source_archive: web/1265/stoomtreinkatwijkleiden.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Stoomtrein Katwijk Leiden @@ -544,26 +544,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:50.428545+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: https://stoomtreinkatwijkleiden.nl/ - retrieved_on: '2025-11-30T00:10:26.582099+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/1265/stoomtreinkatwijkleiden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:50.428550+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: https://stoomtreinkatwijkleiden.nl/ - retrieved_on: '2025-11-30T00:10:26.582099+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/1265/stoomtreinkatwijkleiden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:50.428554+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -626,6 +606,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:50.429677+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stoomtrein Katwijk Leiden diff --git a/data/nde/enriched/entries/1272_Q13736930.yaml b/data/nde/enriched/entries/1272_Q13736930.yaml index 01023f1e6b..462d105e40 100644 --- a/data/nde/enriched/entries/1272_Q13736930.yaml +++ b/data/nde/enriched/entries/1272_Q13736930.yaml @@ -622,18 +622,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:51.564272+00:00' source_archive: web/1272/jenevermuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Jenevermuseum Schiedam - source_url: http://www.jenevermuseum.nl - retrieved_on: '2025-11-29T19:24:15.336251+00:00' - xpath: /html/head/title - html_file: web/1272/jenevermuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:51.562630+00:00' - claim_type: description_short claim_value: Ontdek de rijke geschiedenis van jenever in het Jenevermuseum in Schiedam. Bezoek de branderij, proef authentieke jenevers en ervaar een uniek @@ -718,6 +708,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:51.564020+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Jenevermuseum diff --git a/data/nde/enriched/entries/1274_invalid_id.yaml b/data/nde/enriched/entries/1274_invalid_id.yaml index d9f4bdf750..32fe43f166 100644 --- a/data/nde/enriched/entries/1274_invalid_id.yaml +++ b/data/nde/enriched/entries/1274_invalid_id.yaml @@ -337,18 +337,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:52.382595+00:00' source_archive: web/1274/nationaalsleepvaartmuseum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Sleepvaart Museum Maassluis - source_url: https://nationaalsleepvaartmuseum.nl/ - retrieved_on: '2025-11-30T00:07:41.684143+00:00' - xpath: /html/head/title - html_file: web/1274/nationaalsleepvaartmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:52.381801+00:00' - claim_type: description_short claim_value: Wanneer je nieuwsgierig bent naar de rijke maritieme historie van Nederland moet je beslist in Maassluis zijn. In het fraaie en schilderachtige @@ -408,6 +398,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:52.382366+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Sleepvaart Museum diff --git a/data/nde/enriched/entries/1275_Q21113770.yaml b/data/nde/enriched/entries/1275_Q21113770.yaml index 09cda3e759..d76f776035 100644 --- a/data/nde/enriched/entries/1275_Q21113770.yaml +++ b/data/nde/enriched/entries/1275_Q21113770.yaml @@ -662,18 +662,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:52.574226+00:00' source_archive: web/1275/nationaalsleepvaartmuseum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Sleepvaart Museum Maassluis - source_url: http://www.nationaalsleepvaartmuseum.nl - retrieved_on: '2025-11-29T19:24:20.491026+00:00' - xpath: /html/head/title - html_file: web/1275/nationaalsleepvaartmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:52.573197+00:00' - claim_type: description_short claim_value: Wanneer je nieuwsgierig bent naar de rijke maritieme historie van Nederland moet je beslist in Maassluis zijn. In het fraaie en schilderachtige @@ -733,6 +723,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:52.574106+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nationaal Sleepvaart Museum Maassluis diff --git a/data/nde/enriched/entries/1276_Q3311591.yaml b/data/nde/enriched/entries/1276_Q3311591.yaml index 827be76771..f83e59443f 100644 --- a/data/nde/enriched/entries/1276_Q3311591.yaml +++ b/data/nde/enriched/entries/1276_Q3311591.yaml @@ -1041,18 +1041,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:53.073221+00:00' source_archive: web/1276/hetnatuurhistorisch.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.hetnatuurhistorisch.nl - retrieved_on: '2025-11-29T19:37:02.122207+00:00' - xpath: /html/head/title - html_file: web/1276/hetnatuurhistorisch.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:53.072370+00:00' - claim_type: social_facebook claim_value: https://nl-nl.facebook.com/hetnatuurhistorisch/ raw_value: https://nl-nl.facebook.com/hetnatuurhistorisch/ @@ -1073,6 +1063,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:53.073079+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Natuurhistorisch Museum Rotterdam diff --git a/data/nde/enriched/entries/1277_Q1893708.yaml b/data/nde/enriched/entries/1277_Q1893708.yaml index 111c3381e9..959a1e3ad2 100644 --- a/data/nde/enriched/entries/1277_Q1893708.yaml +++ b/data/nde/enriched/entries/1277_Q1893708.yaml @@ -869,18 +869,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:33:48.559145+00:00' source_archive: web/1277/forensischinstituut.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Forensischinstituut.nl - source_url: https://www.forensischinstituut.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1277/forensischinstituut.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:33:48.558611+00:00' - claim_type: description_short claim_value: Het NFI is een internationaal kennis- en expertisecentrum voor forensisch onderzoek. Het instituut heeft bijna veertig verschillende forensische deskundigheidsgebieden @@ -925,3 +915,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:33:48.559030+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1279_Q1919182.yaml b/data/nde/enriched/entries/1279_Q1919182.yaml index ae6e09ed87..8cd1fb11f4 100644 --- a/data/nde/enriched/entries/1279_Q1919182.yaml +++ b/data/nde/enriched/entries/1279_Q1919182.yaml @@ -425,18 +425,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:53.272645+00:00' source_archive: web/1279/nederlandsmuziekinstituut.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - NMI - source_url: http://www.nederlandsmuziekinstituut.nl/ - retrieved_on: '2025-11-29T19:24:43.576183+00:00' - xpath: /html/head/title - html_file: web/1279/nederlandsmuziekinstituut.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:53.272147+00:00' - claim_type: description_short claim_value: De bibliotheek en archieven van het Nederlands Muziek Instituut vormen de muziekcollecties van het Haags Gemeentearchief @@ -499,6 +489,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:53.272597+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Nederlands Muziek Instituut diff --git a/data/nde/enriched/entries/1285_Q2797811.yaml b/data/nde/enriched/entries/1285_Q2797811.yaml index 3a05c498b7..411f87395f 100644 --- a/data/nde/enriched/entries/1285_Q2797811.yaml +++ b/data/nde/enriched/entries/1285_Q2797811.yaml @@ -631,18 +631,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:33:49.421876+00:00' source_archive: web/1285/oorlogsgravenstichting.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Oorlogsgravenstichting - source_url: https://oorlogsgravenstichting.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1285/oorlogsgravenstichting.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:33:49.419650+00:00' - claim_type: description_short claim_value: In Nederland zijn 180.000 mensen door de Oorlogsgravenstichting geregistreerd als oorlogsslachtoffer. Mannen, vrouwen en kinderen die hun leven verloren… @@ -715,3 +705,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:33:49.421604+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1289_Q2367258.yaml b/data/nde/enriched/entries/1289_Q2367258.yaml index 97ab753bca..6ee45c3821 100644 --- a/data/nde/enriched/entries/1289_Q2367258.yaml +++ b/data/nde/enriched/entries/1289_Q2367258.yaml @@ -653,18 +653,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:55.901896+00:00' source_archive: web/1289/pbl.nl - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Planbureau voor de Leefomgeving - source_url: https://www.pbl.nl - retrieved_on: '2025-11-29T19:35:35.884822+00:00' - xpath: /html/head/title - html_file: web/1289/pbl.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:55.900484+00:00' - claim_type: org_name claim_value: Planbureau voor de Leefomgeving raw_value: Planbureau voor de Leefomgeving @@ -755,6 +745,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:55.901604+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Planbureau voor de Leefomgeving diff --git a/data/nde/enriched/entries/1291_Q694.yaml b/data/nde/enriched/entries/1291_Q694.yaml index df30a3e084..5e11360157 100644 --- a/data/nde/enriched/entries/1291_Q694.yaml +++ b/data/nde/enriched/entries/1291_Q694.yaml @@ -4359,18 +4359,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:56.932134+00:00' source_archive: web/1291/zuid-holland.nl - claims_count: 10 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Provincie Zuid-Holland - source_url: https://www.zuid-holland.nl/ - retrieved_on: '2025-11-29T19:38:53.172475+00:00' - xpath: /html/head/title - html_file: web/1291/zuid-holland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:56.931116+00:00' - claim_type: description_short claim_value: De provincie Zuid-Holland is een bedrijvige, veelzijdige provincie. Het is de dichtstbevolkte provincie en heeft universiteiten in Delft, Leiden @@ -4395,16 +4385,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:48:56.931587+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.zuid-holland.nl%2f&t=De%20provincie%20Zuid-Holland%20is%20een%20bedrijvige%2c%20veelzijdige%20provincie.%20Het%20is%20de%20dichtstbevolkte%20provincie%20en%20heeft%20universiteiten%20in%20Delft%2c%20Leiden%20en%20Rotterdam. - raw_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.zuid-holland.nl%2f&t=De%20provincie%20Zuid-Holland%20is%20een%20bedrijvige%2c%20veelzijdige%20provincie.%20Het%20is%20de%20dichtstbevolkte%20provincie%20en%20heeft%20universiteiten%20in%20Delft%2c%20Leiden%20en%20Rotterdam. - source_url: https://www.zuid-holland.nl/ - retrieved_on: '2025-11-29T19:38:53.172475+00:00' - xpath: /html/body/div/main/div[2]/div[1]/div[3]/div/div/div/div[2]/ul/li[1]/span/a - html_file: web/1291/zuid-holland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:56.931900+00:00' - claim_type: social_twitter claim_value: https://x.com/intent/tweet?text=Home&url=https%3a%2f%2fwww.zuid-holland.nl%2f raw_value: https://x.com/intent/tweet?text=Home&url=https%3a%2f%2fwww.zuid-holland.nl%2f @@ -4415,16 +4395,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:56.931907+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.zuid-holland.nl%2f&title=Home&ro=false&summary=De%20provincie%20Zuid-Holland%20is%20een%20bedrijvige%2c%20veelzijdige%20provincie.%20Het%20is%20de%20dichtstbevolkte%20provincie%20en%20heeft%20universiteiten%20in%20Delft%2c%20Leiden%20en%20Rotterdam. - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.zuid-holland.nl%2f&title=Home&ro=false&summary=De%20provincie%20Zuid-Holland%20is%20een%20bedrijvige%2c%20veelzijdige%20provincie.%20Het%20is%20de%20dichtstbevolkte%20provincie%20en%20heeft%20universiteiten%20in%20Delft%2c%20Leiden%20en%20Rotterdam. - source_url: https://www.zuid-holland.nl/ - retrieved_on: '2025-11-29T19:38:53.172475+00:00' - xpath: /html/body/div/main/div[2]/div[1]/div[3]/div/div/div/div[2]/ul/li[3]/span/a - html_file: web/1291/zuid-holland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:56.931914+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/zuidholland raw_value: https://www.facebook.com/zuidholland @@ -4465,6 +4435,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:56.931960+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Zuid-Holland diff --git a/data/nde/enriched/entries/1292_Q2103699.yaml b/data/nde/enriched/entries/1292_Q2103699.yaml index b1a39bb812..4f0861f299 100644 --- a/data/nde/enriched/entries/1292_Q2103699.yaml +++ b/data/nde/enriched/entries/1292_Q2103699.yaml @@ -573,7 +573,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:57.463414+00:00' source_archive: web/1292/regionaalarchiefdordrecht.nl - claims_count: 5 + claims_count: 3 claims: - claim_type: org_name claim_value: Regionaal Archief Dordrecht @@ -585,26 +585,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:57.462232+00:00' - - claim_type: org_name - claim_value: Chevron left - raw_value: Chevron left - source_url: http://www.regionaalarchiefdordrecht.nl - retrieved_on: '2025-11-29T19:39:46.898354+00:00' - xpath: /html/body/nav/div/div[2]/div[2]/div/button/svg/title - html_file: web/1292/regionaalarchiefdordrecht.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:57.462249+00:00' - - claim_type: org_name - claim_value: Close - raw_value: Close - source_url: http://www.regionaalarchiefdordrecht.nl - retrieved_on: '2025-11-29T19:39:46.898354+00:00' - xpath: /html/body/nav/div/div[2]/div[2]/div/div[2]/button/svg/title - html_file: web/1292/regionaalarchiefdordrecht.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:57.462254+00:00' - claim_type: social_instagram claim_value: http://www.instagram.com/regionaalarchiefdordrecht raw_value: http://www.instagram.com/regionaalarchiefdordrecht @@ -625,6 +605,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:57.463131+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Regionaal Archief Dordrecht diff --git a/data/nde/enriched/entries/1293_Q758610.yaml b/data/nde/enriched/entries/1293_Q758610.yaml index ccc990bba9..a381b91619 100644 --- a/data/nde/enriched/entries/1293_Q758610.yaml +++ b/data/nde/enriched/entries/1293_Q758610.yaml @@ -1828,18 +1828,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:57.627865+00:00' source_archive: web/1293/rkd.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | RKD – Nederlands Instituut voor Kunstgeschiedenis - source_url: https://rkd.nl/ - retrieved_on: '2025-11-29T19:37:02.709419+00:00' - xpath: /html/head/title - html_file: web/1293/rkd.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:57.627268+00:00' - claim_type: org_name claim_value: RKD – Nederlands Instituut voor Kunstgeschiedenis raw_value: RKD – Nederlands Instituut voor Kunstgeschiedenis @@ -1890,6 +1880,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:57.627767+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: RKD – Nederlands Instituut voor Kunstgeschiedenis diff --git a/data/nde/enriched/entries/1294_Q15881312.yaml b/data/nde/enriched/entries/1294_Q15881312.yaml index 29ea006446..e1ca6e676f 100644 --- a/data/nde/enriched/entries/1294_Q15881312.yaml +++ b/data/nde/enriched/entries/1294_Q15881312.yaml @@ -677,7 +677,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:57.785725+00:00' source_archive: web/1294/rvo.nl - claims_count: 8 + claims_count: 7 claims: - claim_type: org_name claim_value: Rijksdienst voor Ondernemend Nederland @@ -717,16 +717,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T10:48:57.785195+00:00' - - claim_type: org_name - claim_value: RVO.nl - raw_value: RVO.nl - source_url: https://www.rvo.nl/ - retrieved_on: '2025-11-29T19:37:02.994511+00:00' - xpath: /html/head/meta[5] - html_file: web/1294/rvo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T10:48:57.785261+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/rijksdienstvoorondernemendnederland raw_value: https://www.facebook.com/rijksdienstvoorondernemendnederland @@ -767,6 +757,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:57.785542+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijksdienst voor Ondernemend Nederland diff --git a/data/nde/enriched/entries/1297_Q758610.yaml b/data/nde/enriched/entries/1297_Q758610.yaml index 3b1f79b650..14f9860b12 100644 --- a/data/nde/enriched/entries/1297_Q758610.yaml +++ b/data/nde/enriched/entries/1297_Q758610.yaml @@ -1817,18 +1817,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:58.254979+00:00' source_archive: web/1297/rkd.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | RKD – Nederlands Instituut voor Kunstgeschiedenis - source_url: https://rkd.nl/ - retrieved_on: '2025-11-29T19:37:03.989654+00:00' - xpath: /html/head/title - html_file: web/1297/rkd.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:58.254204+00:00' - claim_type: org_name claim_value: RKD – Nederlands Instituut voor Kunstgeschiedenis raw_value: RKD – Nederlands Instituut voor Kunstgeschiedenis @@ -1879,6 +1869,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:58.254851+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: RKD – Nederlands Instituut voor Kunstgeschiedenis diff --git a/data/nde/enriched/entries/1298_Q2632714.yaml b/data/nde/enriched/entries/1298_Q2632714.yaml index c28cffd5c6..2b8336be09 100644 --- a/data/nde/enriched/entries/1298_Q2632714.yaml +++ b/data/nde/enriched/entries/1298_Q2632714.yaml @@ -455,18 +455,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:48:58.690458+00:00' source_archive: web/1298/rovm.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - ROVM - source_url: https://www.rovm.nl/ - retrieved_on: '2025-11-30T00:09:01.807324+00:00' - xpath: /html/head/title - html_file: web/1298/rovm.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:58.689610+00:00' - claim_type: email claim_value: info@stichtingromeo.nl raw_value: info@stichtingromeo.nl @@ -537,6 +527,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:48:58.690284+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rotterdams Openbaar Vervoer Museum diff --git a/data/nde/enriched/entries/1302_Q3912134.yaml b/data/nde/enriched/entries/1302_Q3912134.yaml index f6845a1750..37494782f2 100644 --- a/data/nde/enriched/entries/1302_Q3912134.yaml +++ b/data/nde/enriched/entries/1302_Q3912134.yaml @@ -963,18 +963,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:48:59.349702+00:00' source_archive: web/1302/stadsarchief.rotterdam.nl - claims_count: 18 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Stadsarchief Rotterdam - source_url: http://www.stadsarchief.rotterdam.nl - retrieved_on: '2025-11-29T19:37:11.590029+00:00' - xpath: /html/head/title - html_file: web/1302/stadsarchief.rotterdam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:59.349007+00:00' - claim_type: org_name claim_value: Verplicht raw_value: Verplicht @@ -985,26 +975,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:59.349022+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: http://www.stadsarchief.rotterdam.nl - retrieved_on: '2025-11-29T19:37:11.590029+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/1302/stadsarchief.rotterdam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:59.349032+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: http://www.stadsarchief.rotterdam.nl - retrieved_on: '2025-11-29T19:37:11.590029+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/1302/stadsarchief.rotterdam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:59.349037+00:00' - claim_type: org_name claim_value: Youtube raw_value: Youtube @@ -1015,36 +985,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:48:59.349041+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: http://www.stadsarchief.rotterdam.nl - retrieved_on: '2025-11-29T19:37:11.590029+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/1302/stadsarchief.rotterdam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:59.349045+00:00' - - claim_type: org_name - claim_value: X - raw_value: X - source_url: http://www.stadsarchief.rotterdam.nl - retrieved_on: '2025-11-29T19:37:11.590029+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[5]/a/svg/title - html_file: web/1302/stadsarchief.rotterdam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:59.349050+00:00' - - claim_type: org_name - claim_value: Rotterdam.nl - raw_value: Rotterdam.nl - source_url: http://www.stadsarchief.rotterdam.nl - retrieved_on: '2025-11-29T19:37:11.590029+00:00' - xpath: /html/body/div[2]/div/footer/div/div/a[2]/svg/title - html_file: web/1302/stadsarchief.rotterdam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:48:59.349055+00:00' - claim_type: description_short claim_value: Het geheugen van de stad Rotterdam. Website Stadsarchief Rotterdam. raw_value: Het geheugen van de stad Rotterdam. Website Stadsarchief Rotterdam. @@ -1065,16 +1005,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:48:59.349413+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.stadsarchief.rotterdam.nl/home - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.stadsarchief.rotterdam.nl/home - source_url: http://www.stadsarchief.rotterdam.nl - retrieved_on: '2025-11-29T19:37:11.590029+00:00' - xpath: /html/body/div[2]/div/main/div/section/div/ul/li[1]/a - html_file: web/1302/stadsarchief.rotterdam.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:48:59.349520+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/sharing/share-offsite/?url=https%3A//www.stadsarchief.rotterdam.nl/home raw_value: https://www.linkedin.com/sharing/share-offsite/?url=https%3A//www.stadsarchief.rotterdam.nl/home @@ -1145,6 +1075,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:48:59.349568+00:00' + removed_invalid_claims: + - removed_count: 7 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stadsarchief Rotterdam diff --git a/data/nde/enriched/entries/1311_Q136473440.yaml b/data/nde/enriched/entries/1311_Q136473440.yaml index aa8bfff3ac..b027dc9eeb 100644 --- a/data/nde/enriched/entries/1311_Q136473440.yaml +++ b/data/nde/enriched/entries/1311_Q136473440.yaml @@ -425,7 +425,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:00.764712+00:00' source_archive: web/1311/hendrickhamelmuseum.nl - claims_count: 10 + claims_count: 7 claims: - claim_type: org_name claim_value: Hendrick Hamel Museum @@ -437,36 +437,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:49:00.763014+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: https://hendrickhamelmuseum.nl - retrieved_on: '2025-11-29T19:39:09.972750+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div/a[1]/svg/title - html_file: web/1311/hendrickhamelmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:00.763027+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: https://hendrickhamelmuseum.nl - retrieved_on: '2025-11-29T19:39:09.972750+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div/a[2]/svg/title - html_file: web/1311/hendrickhamelmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:00.763031+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://hendrickhamelmuseum.nl - retrieved_on: '2025-11-29T19:39:09.972750+00:00' - xpath: /html/body/div[1]/div/div[9]/div/div[4]/section[2]/ul/li/a/svg/title - html_file: web/1311/hendrickhamelmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:00.763036+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -529,6 +499,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:00.764423+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Hendrick Hamel Museum diff --git a/data/nde/enriched/entries/1314_Q1456512.yaml b/data/nde/enriched/entries/1314_Q1456512.yaml index 25e7577aa6..9e7e747a99 100644 --- a/data/nde/enriched/entries/1314_Q1456512.yaml +++ b/data/nde/enriched/entries/1314_Q1456512.yaml @@ -905,18 +905,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:00.999388+00:00' source_archive: web/1314/louwmanmuseum.nl - claims_count: 13 + claims_count: 11 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Louwman Museum - source_url: https://www.louwmanmuseum.nl/en/ - retrieved_on: '2025-11-29T19:39:10.590236+00:00' - xpath: /html/head/title - html_file: web/1314/louwmanmuseum.nl/mirror/www.louwmanmuseum.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:00.998134+00:00' - claim_type: description_short claim_value: The Louwman Museum in Den Haag is the place with one of the most beautiful automotive collections in the world. Find more than 250 models. @@ -939,16 +929,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:49:00.998684+00:00' - - claim_type: address - claim_value: '' - raw_value: null - source_url: https://www.louwmanmuseum.nl/en/ - retrieved_on: '2025-11-29T19:39:10.590236+00:00' - xpath: /html/head/script[17] - html_file: web/1314/louwmanmuseum.nl/mirror/www.louwmanmuseum.nl/en/index.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T10:49:00.998785+00:00' - claim_type: postal_code claim_value: 2594 BB raw_value: 2594 BB @@ -1039,6 +1019,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:00.999253+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Louwman Museum diff --git a/data/nde/enriched/entries/1321_invalid_id.yaml b/data/nde/enriched/entries/1321_invalid_id.yaml index 6854f88f88..354389497c 100644 --- a/data/nde/enriched/entries/1321_invalid_id.yaml +++ b/data/nde/enriched/entries/1321_invalid_id.yaml @@ -243,7 +243,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:02.289223+00:00' source_archive: web/1321/oudzoeterwoude.nl - claims_count: 7 + claims_count: 5 claims: - claim_type: org_name claim_value: Nieuws over Oud Zoeterwoude @@ -295,26 +295,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:49:02.288930+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.oudzoeterwoude.nl%2F&t=Nieuws%20over%20Oud%20Zoeterwoude&s=100&p[url]=https%3A%2F%2Fwww.oudzoeterwoude.nl%2F&p[images][0]=https%3A%2F%2Fwww.oudzoeterwoude.nl%2Fxtp%2Fwp-content%2Fuploads%2F2025%2F11%2FAffiche-Sinterklaas-2055-11-25-714x1024.jpg&p[title]=Nieuws%20over%20Oud%20Zoeterwoude - raw_value: https://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.oudzoeterwoude.nl%2F&t=Nieuws%20over%20Oud%20Zoeterwoude&s=100&p[url]=https%3A%2F%2Fwww.oudzoeterwoude.nl%2F&p[images][0]=https%3A%2F%2Fwww.oudzoeterwoude.nl%2Fxtp%2Fwp-content%2Fuploads%2F2025%2F11%2FAffiche-Sinterklaas-2055-11-25-714x1024.jpg&p[title]=Nieuws%20over%20Oud%20Zoeterwoude - source_url: https://www.oudzoeterwoude.nl/ - retrieved_on: '2025-11-29T19:42:50.805986+00:00' - xpath: /html/body/div[1]/div[2]/div[1]/div[3]/div/article/div/a[1] - html_file: web/1321/oudzoeterwoude.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:02.289088+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?url=https%3A%2F%2Fwww.oudzoeterwoude.nl%2F&text=Bekijk%20deze%20pagina%20eens... - raw_value: https://twitter.com/intent/tweet?url=https%3A%2F%2Fwww.oudzoeterwoude.nl%2F&text=Bekijk%20deze%20pagina%20eens... - source_url: https://www.oudzoeterwoude.nl/ - retrieved_on: '2025-11-29T19:42:50.805986+00:00' - xpath: /html/body/div[1]/div[2]/div[1]/div[3]/div/article/div/a[2] - html_file: web/1321/oudzoeterwoude.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:02.289093+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Oud Zoeterwoude diff --git a/data/nde/enriched/entries/1326_Q81181215.yaml b/data/nde/enriched/entries/1326_Q81181215.yaml index 27502938ec..f527480fa4 100644 --- a/data/nde/enriched/entries/1326_Q81181215.yaml +++ b/data/nde/enriched/entries/1326_Q81181215.yaml @@ -525,18 +525,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:49:03.754374+00:00' source_archive: web/1326/streekarchiefvpr.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Streekarchief Voorne Putten - source_url: http://www.streekarchiefvpr.nl - retrieved_on: '2025-11-29T19:40:03.780102+00:00' - xpath: /html/head/title - html_file: web/1326/streekarchiefvpr.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:03.753239+00:00' - claim_type: description_short claim_value: 'Archieven Personen Beeldbank Bibliotheek Kranten Audiovisueel Bouwdossiers Notariële akten Openingstijden: dinsdag – vrijdag: 9:00 – 16:00 uur Bezoekadres: @@ -597,6 +587,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:03.754146+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Voorne-Putten diff --git a/data/nde/enriched/entries/1328_Q81181253.yaml b/data/nde/enriched/entries/1328_Q81181253.yaml index c54bb83b01..bf2d75c4f5 100644 --- a/data/nde/enriched/entries/1328_Q81181253.yaml +++ b/data/nde/enriched/entries/1328_Q81181253.yaml @@ -481,18 +481,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:49:03.906610+00:00' source_archive: web/1328/samh.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - SAMH - source_url: https://samh.nl/ - retrieved_on: '2025-11-29T19:40:04.312875+00:00' - xpath: /html/head/title - html_file: web/1328/samh.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:03.906037+00:00' - claim_type: description_short claim_value: Streekarchief Midden-Holland beheert het geheugen van de regio Midden-Holland. Iedereen die iets wil weten over de geschiedenis van zijn familie, huis of buurt @@ -597,6 +587,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:03.906555+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Midden-Holland diff --git a/data/nde/enriched/entries/1329_Q81181182.yaml b/data/nde/enriched/entries/1329_Q81181182.yaml index 3b29a31eac..8f588e6147 100644 --- a/data/nde/enriched/entries/1329_Q81181182.yaml +++ b/data/nde/enriched/entries/1329_Q81181182.yaml @@ -355,18 +355,8 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:49:03.991697+00:00' source_archive: web/1329/groenehartarchieven.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - SAMH - source_url: http://www.groenehartarchieven.nl - retrieved_on: '2025-11-29T19:40:04.822570+00:00' - xpath: /html/head/title - html_file: web/1329/groenehartarchieven.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:03.991030+00:00' - claim_type: description_short claim_value: Streekarchief Midden-Holland beheert het geheugen van de regio Midden-Holland. Iedereen die iets wil weten over de geschiedenis van zijn familie, huis of buurt @@ -471,6 +461,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:03.991618+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Rijnlands Midden diff --git a/data/nde/enriched/entries/1330_Q81181215.yaml b/data/nde/enriched/entries/1330_Q81181215.yaml index 3ddccfecb1..94e2495ce0 100644 --- a/data/nde/enriched/entries/1330_Q81181215.yaml +++ b/data/nde/enriched/entries/1330_Q81181215.yaml @@ -510,18 +510,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:04.114905+00:00' source_archive: web/1330/streekarchiefvpr.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Streekarchief Voorne Putten - source_url: http://www.streekarchiefvpr.nl - retrieved_on: '2025-11-29T19:40:05.443516+00:00' - xpath: /html/head/title - html_file: web/1330/streekarchiefvpr.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:04.113191+00:00' - claim_type: description_short claim_value: 'Archieven Personen Beeldbank Bibliotheek Kranten Audiovisueel Bouwdossiers Notariële akten Openingstijden: dinsdag – vrijdag: 9:00 – 16:00 uur Bezoekadres: @@ -582,6 +572,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:04.114212+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Streekarchief Voorne-Putten diff --git a/data/nde/enriched/entries/1336_Q1872824.yaml b/data/nde/enriched/entries/1336_Q1872824.yaml index a5440ad802..3fe1de51b3 100644 --- a/data/nde/enriched/entries/1336_Q1872824.yaml +++ b/data/nde/enriched/entries/1336_Q1872824.yaml @@ -830,18 +830,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:05.346424+00:00' source_archive: web/1336/trompenburg.nl - claims_count: 13 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Trompenburg - source_url: http://www.trompenburg.nl - retrieved_on: '2025-11-29T19:41:57.027721+00:00' - xpath: /html/head/title - html_file: web/1336/trompenburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:05.345117+00:00' - claim_type: org_name claim_value: Winkelwagen raw_value: Winkelwagen @@ -862,26 +852,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:49:05.345136+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: http://www.trompenburg.nl - retrieved_on: '2025-11-29T19:41:57.027721+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/1336/trompenburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:05.345141+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: http://www.trompenburg.nl - retrieved_on: '2025-11-29T19:41:57.027721+00:00' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/1336/trompenburg.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:05.345145+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -972,6 +942,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:05.346224+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Arboretum Trompenburg diff --git a/data/nde/enriched/entries/1340_invalid_id.yaml b/data/nde/enriched/entries/1340_invalid_id.yaml index 2a6fd1ad22..83fdb7e733 100644 --- a/data/nde/enriched/entries/1340_invalid_id.yaml +++ b/data/nde/enriched/entries/1340_invalid_id.yaml @@ -265,19 +265,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:06.407267+00:00' source_archive: web/1340/verenigingdelijn.nl - claims_count: 2 + claims_count: 1 claims: - - claim_type: org_name - claim_value: verenigingdelijn.nl - raw_value: verenigingdelijn.nl – Voor en door (oud)medewerkers en belangstellenden - Holland-Amerika Lijn / Holland America Line - source_url: https://verenigingdelijn.nl/ - retrieved_on: '2025-11-29T19:42:29.269636+00:00' - xpath: /html/head/title - html_file: web/1340/verenigingdelijn.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:06.405926+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/groups/250815384229292 raw_value: https://www.facebook.com/groups/250815384229292 @@ -288,6 +277,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:06.406928+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Lijn diff --git a/data/nde/enriched/entries/1342_Q2803129.yaml b/data/nde/enriched/entries/1342_Q2803129.yaml index 51f62ea5be..1d2f3e11ce 100644 --- a/data/nde/enriched/entries/1342_Q2803129.yaml +++ b/data/nde/enriched/entries/1342_Q2803129.yaml @@ -575,7 +575,7 @@ ghcid: web_claims: extraction_timestamp: '2025-12-01T10:49:06.590620+00:00' source_archive: web/1342/vng.nl - claims_count: 8 + claims_count: 4 claims: - claim_type: org_name claim_value: VNG @@ -587,36 +587,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:49:06.589120+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//vng.nl/home - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//vng.nl/home - source_url: http://vng.nl - retrieved_on: '2025-11-29T19:42:37.305786+00:00' - xpath: /html/body/div/div/main/div[2]/ul/li[1]/a - html_file: web/1342/vng.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:06.590147+00:00' - - claim_type: social_twitter - claim_value: http://twitter.com/share?text=Home&url=https%3A//vng.nl/home - raw_value: http://twitter.com/share?text=Home&url=https%3A//vng.nl/home - source_url: http://vng.nl - retrieved_on: '2025-11-29T19:42:37.305786+00:00' - xpath: /html/body/div/div/main/div[2]/ul/li[2]/a - html_file: web/1342/vng.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:06.590161+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//vng.nl/home&title=Home - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//vng.nl/home&title=Home - source_url: http://vng.nl - retrieved_on: '2025-11-29T19:42:37.305786+00:00' - xpath: /html/body/div/div/main/div[2]/ul/li[3]/a - html_file: web/1342/vng.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:06.590171+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/company/vng raw_value: https://www.linkedin.com/company/vng @@ -647,16 +617,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:06.590214+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://vng.nl - retrieved_on: '2025-11-29T19:42:37.305786+00:00' - xpath: /html/body/div/div/h1 - html_file: web/1342/vng.nl/pages/index.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:49:06.590291+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Vereniging van Nederlandse Gemeenten diff --git a/data/nde/enriched/entries/1344_Q2036123.yaml b/data/nde/enriched/entries/1344_Q2036123.yaml index ecf60b7e63..4823aee0cf 100644 --- a/data/nde/enriched/entries/1344_Q2036123.yaml +++ b/data/nde/enriched/entries/1344_Q2036123.yaml @@ -830,18 +830,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:07.170593+00:00' source_archive: web/1344/vrijmetselarijmuseum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.vrijmetselarijmuseum.nl/ - retrieved_on: '2025-11-29T19:42:56.551948+00:00' - xpath: /html/head/title - html_file: web/1344/vrijmetselarijmuseum.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:07.169999+00:00' - claim_type: description_short claim_value: 'De verzamelingen van de Orde van Vrijmetselaren: een bibliotheek, een archief en een museum' @@ -874,6 +864,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:49:07.170439+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Vrijmetselarij Museum / Cultureel Maçonniek Centrum diff --git a/data/nde/enriched/entries/1348_Q933459.yaml b/data/nde/enriched/entries/1348_Q933459.yaml index 13a2c83ae5..96a20988db 100644 --- a/data/nde/enriched/entries/1348_Q933459.yaml +++ b/data/nde/enriched/entries/1348_Q933459.yaml @@ -2095,18 +2095,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:07.735553+00:00' source_archive: web/1348/goeree-overflakkee.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Gemeente Goeree-Overflakkee - source_url: https://www.goeree-overflakkee.nl/ - retrieved_on: '2025-11-29T19:43:07.533098+00:00' - xpath: /html/head/title - html_file: web/1348/goeree-overflakkee.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:07.734833+00:00' - claim_type: description_short claim_value: Officiële website van gemeente Goeree-Overflakkee. Hier vindt u informatie, nieuwsberichten en online dienstverlening. @@ -2179,6 +2169,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:07.735362+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Goeree-Overflakkee diff --git a/data/nde/enriched/entries/1351_Q110282063.yaml b/data/nde/enriched/entries/1351_Q110282063.yaml index 5e3ac0b006..95c934c92b 100644 --- a/data/nde/enriched/entries/1351_Q110282063.yaml +++ b/data/nde/enriched/entries/1351_Q110282063.yaml @@ -321,18 +321,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:08.022183+00:00' source_archive: web/1351/elkander-getrouw.com - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Elkander Getrouw - source_url: http://www.elkander-getrouw.com/ - retrieved_on: '2025-11-30T00:09:22.423702+00:00' - xpath: /html/head/title - html_file: web/1351/elkander-getrouw.com/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:08.021034+00:00' - claim_type: description_short claim_value: Welkom Muziekvereniging Elkander Getrouw Muziek maken is het leukste als je dat samen doet Over Ons ELKANDER GETROUW, Springlevend Christelijke muziekvereniging @@ -411,6 +401,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:08.022064+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting Egress Foundation CSROT diff --git a/data/nde/enriched/entries/1357_kb_isil.yaml b/data/nde/enriched/entries/1357_kb_isil.yaml index a2319576ed..67b2d085d8 100644 --- a/data/nde/enriched/entries/1357_kb_isil.yaml +++ b/data/nde/enriched/entries/1357_kb_isil.yaml @@ -361,18 +361,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:09.837460+00:00' source_archive: web/1357/rijnbrink.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home – Rijnbrink - source_url: https://www.rijnbrink.nl/ - retrieved_on: '2025-11-29T19:45:59.052457+00:00' - xpath: /html/head/title - html_file: web/1357/rijnbrink.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:09.836728+00:00' - claim_type: description_short claim_value: × Rijnbrink in een nieuw jasje We willen je inspireren met nieuws dat écht bij jou past. Laat weten waar je meer over wilt lezen. Ja, ik wil me @@ -459,6 +449,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:09.837378+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijnbrink diff --git a/data/nde/enriched/entries/1359_kb_isil.yaml b/data/nde/enriched/entries/1359_kb_isil.yaml index 0abc514b23..0e649cb614 100644 --- a/data/nde/enriched/entries/1359_kb_isil.yaml +++ b/data/nde/enriched/entries/1359_kb_isil.yaml @@ -377,7 +377,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:10.025179+00:00' source_archive: web/1359/limburg.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Cubiss Limburg @@ -415,26 +415,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T10:49:10.024570+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.limburg.nl%2fonderwerpen%2fgezondheid%2fbetrokken-organisaties%2fcubiss-limburg%2f&t=Iedereen%20heeft%20recht%20op%20toegang%20tot%20informatie%20en%20cultuur.%20In%20Nederland%20zorgen%20de%20openbare%20bibliotheken%20hier%20voor.Cubiss%20Limburg%20ondersteunt%20(vanuit%20de%20Wet%20stelsel%20openbare%20bibliotheekvoorzieningen%20(Wsob))%20de%20Limburgse%20bibliotheeksector%20bij%20hun%20werkzaamheden. - raw_value: https://www.facebook.com/sharer.php?u=https%3a%2f%2fwww.limburg.nl%2fonderwerpen%2fgezondheid%2fbetrokken-organisaties%2fcubiss-limburg%2f&t=Iedereen%20heeft%20recht%20op%20toegang%20tot%20informatie%20en%20cultuur.%20In%20Nederland%20zorgen%20de%20openbare%20bibliotheken%20hier%20voor.Cubiss%20Limburg%20ondersteunt%20(vanuit%20de%20Wet%20stelsel%20openbare%20bibliotheekvoorzieningen%20(Wsob))%20de%20Limburgse%20bibliotheeksector%20bij%20hun%20werkzaamheden. - source_url: https://www.limburg.nl/onderwerpen/gezondheid/betrokken-organisaties/cubiss-limburg/ - retrieved_on: '2025-11-29T19:44:37.572397+00:00' - xpath: /html/body/div/main/div[2]/div[1]/div[9]/div/div/div/div[2]/ul/li[1]/span/a - html_file: web/1359/limburg.nl/mirror/www.limburg.nl/onderwerpen/gezondheid/betrokken-organisaties/cubiss-limburg/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:10.024985+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.limburg.nl%2fonderwerpen%2fgezondheid%2fbetrokken-organisaties%2fcubiss-limburg%2f&title=Cubiss%20Limburg&ro=false&summary=Iedereen%20heeft%20recht%20op%20toegang%20tot%20informatie%20en%20cultuur.%20In%20Nederland%20zorgen%20de%20openbare%20bibliotheken%20hier%20voor.Cubiss%20Limburg%20ondersteunt%20(vanuit%20de%20Wet%20stelsel%20openbare%20bibliotheekvoorzieningen%20(Wsob))%20de%20Limburgse%20bibliotheeksector%20bij%20hun%20werkzaamheden. - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2fwww.limburg.nl%2fonderwerpen%2fgezondheid%2fbetrokken-organisaties%2fcubiss-limburg%2f&title=Cubiss%20Limburg&ro=false&summary=Iedereen%20heeft%20recht%20op%20toegang%20tot%20informatie%20en%20cultuur.%20In%20Nederland%20zorgen%20de%20openbare%20bibliotheken%20hier%20voor.Cubiss%20Limburg%20ondersteunt%20(vanuit%20de%20Wet%20stelsel%20openbare%20bibliotheekvoorzieningen%20(Wsob))%20de%20Limburgse%20bibliotheeksector%20bij%20hun%20werkzaamheden. - source_url: https://www.limburg.nl/onderwerpen/gezondheid/betrokken-organisaties/cubiss-limburg/ - retrieved_on: '2025-11-29T19:44:37.572397+00:00' - xpath: /html/body/div/main/div[2]/div[1]/div[9]/div/div/div/div[2]/ul/li[2]/span/a - html_file: web/1359/limburg.nl/mirror/www.limburg.nl/onderwerpen/gezondheid/betrokken-organisaties/cubiss-limburg/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:10.024994+00:00' - claim_type: social_twitter claim_value: https://x.com/intent/tweet?text=Cubiss%20Limburg&url=https%3a%2f%2fwww.limburg.nl%2fonderwerpen%2fgezondheid%2fbetrokken-organisaties%2fcubiss-limburg%2f raw_value: https://x.com/intent/tweet?text=Cubiss%20Limburg&url=https%3a%2f%2fwww.limburg.nl%2fonderwerpen%2fgezondheid%2fbetrokken-organisaties%2fcubiss-limburg%2f @@ -485,6 +465,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:10.025023+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cubiss Limburg diff --git a/data/nde/enriched/entries/1362_kb_isil.yaml b/data/nde/enriched/entries/1362_kb_isil.yaml index 1c8192177a..919d603ce9 100644 --- a/data/nde/enriched/entries/1362_kb_isil.yaml +++ b/data/nde/enriched/entries/1362_kb_isil.yaml @@ -337,18 +337,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:11.121193+00:00' source_archive: web/1362/rijnbrink.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home – Rijnbrink - source_url: https://www.rijnbrink.nl/ - retrieved_on: '2025-11-29T19:48:01.496640+00:00' - xpath: /html/head/title - html_file: web/1362/rijnbrink.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:11.120259+00:00' - claim_type: description_short claim_value: × Rijnbrink in een nieuw jasje We willen je inspireren met nieuws dat écht bij jou past. Laat weten waar je meer over wilt lezen. Ja, ik wil me @@ -435,6 +425,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:11.121111+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijnbrink diff --git a/data/nde/enriched/entries/1367_kb_isil.yaml b/data/nde/enriched/entries/1367_kb_isil.yaml index cf79b2342f..3321e08d7b 100644 --- a/data/nde/enriched/entries/1367_kb_isil.yaml +++ b/data/nde/enriched/entries/1367_kb_isil.yaml @@ -306,18 +306,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:12.211287+00:00' source_archive: web/1367/bibliotheeknetwerkflevoland.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - BibliotheekNetwerkFlevoland - source_url: https://www.bibliotheeknetwerkflevoland.nl/ - retrieved_on: '2025-11-29T19:50:15.395982+00:00' - xpath: /html/head/title - html_file: web/1367/bibliotheeknetwerkflevoland.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:12.210774+00:00' - claim_type: org_name claim_value: BibliotheekNetwerkFlevoland raw_value: BibliotheekNetwerkFlevoland @@ -378,6 +368,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T10:49:12.211104+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: BibliotheekNetwerkFlevoland diff --git a/data/nde/enriched/entries/1371_kb_isil.yaml b/data/nde/enriched/entries/1371_kb_isil.yaml index d5e2c0d8c6..171dd36eb4 100644 --- a/data/nde/enriched/entries/1371_kb_isil.yaml +++ b/data/nde/enriched/entries/1371_kb_isil.yaml @@ -414,18 +414,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:13.240360+00:00' source_archive: web/1371/nieuw.passendlezen.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Passend Lezen - source_url: https://nieuw.passendlezen.nl/ - retrieved_on: '2025-11-29T19:49:10.289933+00:00' - xpath: /html/head/title - html_file: web/1371/nieuw.passendlezen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:13.239461+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/passendlezen raw_value: https://www.facebook.com/passendlezen @@ -476,6 +466,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:13.240193+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Passend Lezen diff --git a/data/nde/enriched/entries/1373_kb_isil.yaml b/data/nde/enriched/entries/1373_kb_isil.yaml index 1b92c20ed8..ea87072f71 100644 --- a/data/nde/enriched/entries/1373_kb_isil.yaml +++ b/data/nde/enriched/entries/1373_kb_isil.yaml @@ -410,18 +410,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:13.592991+00:00' source_archive: web/1373/musidesk.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Musidesk - source_url: https://www.musidesk.nl/ - retrieved_on: '2025-11-29T19:49:12.538650+00:00' - xpath: /html/head/title - html_file: web/1373/musidesk.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:13.592029+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/musidesk/ raw_value: https://www.facebook.com/musidesk/ @@ -452,6 +442,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:13.592836+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijnbrink Musidesk Gelderland diff --git a/data/nde/enriched/entries/1374_kb_isil.yaml b/data/nde/enriched/entries/1374_kb_isil.yaml index c9665256a0..0632d1edf2 100644 --- a/data/nde/enriched/entries/1374_kb_isil.yaml +++ b/data/nde/enriched/entries/1374_kb_isil.yaml @@ -395,18 +395,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:13.826495+00:00' source_archive: web/1374/musidesk.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Musidesk - source_url: https://www.musidesk.nl/ - retrieved_on: '2025-11-29T19:49:14.107814+00:00' - xpath: /html/head/title - html_file: web/1374/musidesk.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:13.825556+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/musidesk/ raw_value: https://www.facebook.com/musidesk/ @@ -437,6 +427,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:13.826339+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Rijnbrink Musidesk Overijssel diff --git a/data/nde/enriched/entries/1388_kb_isil.yaml b/data/nde/enriched/entries/1388_kb_isil.yaml index 35e116840c..df01ad7ce6 100644 --- a/data/nde/enriched/entries/1388_kb_isil.yaml +++ b/data/nde/enriched/entries/1388_kb_isil.yaml @@ -290,18 +290,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:17.705554+00:00' source_archive: web/1388/verhalenwerf.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.verhalenwerf.nl/ - retrieved_on: '2025-11-29T19:55:44.832013+00:00' - xpath: /html/head/title - html_file: web/1388/verhalenwerf.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:17.705089+00:00' - claim_type: org_name claim_value: Verhalenwerf raw_value: Verhalenwerf @@ -364,6 +354,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:17.705501+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Hoogeveen/Verhalenwerf diff --git a/data/nde/enriched/entries/1389_kb_isil.yaml b/data/nde/enriched/entries/1389_kb_isil.yaml index 7277ff7645..554c0c8c54 100644 --- a/data/nde/enriched/entries/1389_kb_isil.yaml +++ b/data/nde/enriched/entries/1389_kb_isil.yaml @@ -287,18 +287,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:18.069804+00:00' source_archive: web/1389/zininbibliotheek.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.zininbibliotheek.nl/ - retrieved_on: '2025-11-29T19:59:02.223421+00:00' - xpath: /html/head/title - html_file: web/1389/zininbibliotheek.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:18.069096+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/zininbibliotheek/ raw_value: https://www.facebook.com/zininbibliotheek/ @@ -319,6 +309,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:18.069659+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: ZinIn Bibliotheek diff --git a/data/nde/enriched/entries/1393_kb_isil.yaml b/data/nde/enriched/entries/1393_kb_isil.yaml index 3a4bfc9fd5..8057c03fa4 100644 --- a/data/nde/enriched/entries/1393_kb_isil.yaml +++ b/data/nde/enriched/entries/1393_kb_isil.yaml @@ -287,18 +287,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:19.411321+00:00' source_archive: web/1393/bibliotheekdeventer.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheekdeventer.nl/ - retrieved_on: '2025-11-29T20:01:24.314463+00:00' - xpath: /html/head/title - html_file: web/1393/bibliotheekdeventer.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:19.410713+00:00' - claim_type: description_short claim_value: 'Bibliotheek Deventer is dé plek voor: boeken, cursussen, taalondersteuning, cultuur, een kop koffie en veel meer! Laat je verrassen en inspireren.' @@ -341,6 +331,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:19.411201+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Deventer diff --git a/data/nde/enriched/entries/1397_kb_isil.yaml b/data/nde/enriched/entries/1397_kb_isil.yaml index 22667f622f..1215fc6f7d 100644 --- a/data/nde/enriched/entries/1397_kb_isil.yaml +++ b/data/nde/enriched/entries/1397_kb_isil.yaml @@ -304,18 +304,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:20.379176+00:00' source_archive: web/1397/bibliorura.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - de Bibliotheek Bibliorura - source_url: https://bibliorura.nl/ - retrieved_on: '2025-11-29T20:00:11.367981+00:00' - xpath: /html/head/title - html_file: web/1397/bibliorura.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:20.377937+00:00' - claim_type: description_short claim_value: de Bibliotheek Bibliorura raw_value: de Bibliotheek Bibliorura @@ -346,6 +336,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:20.378960+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliorura diff --git a/data/nde/enriched/entries/1406_kb_isil.yaml b/data/nde/enriched/entries/1406_kb_isil.yaml index 99e1f683b8..fadbfabd1c 100644 --- a/data/nde/enriched/entries/1406_kb_isil.yaml +++ b/data/nde/enriched/entries/1406_kb_isil.yaml @@ -468,7 +468,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:22.706669+00:00' source_archive: web/1406/visitkopvanholland.nl - claims_count: 12 + claims_count: 10 claims: - claim_type: org_name claim_value: Bibliotheek Den Helder, School 7 @@ -480,26 +480,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:49:22.705656+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://visitkopvanholland.nl/en/locations/library-den-helder-school-7/ - retrieved_on: '2025-11-29T20:03:17.828253+00:00' - xpath: /html/body/footer/div/div[1]/div[1]/ul/li[1]/a/svg/title - html_file: web/1406/visitkopvanholland.nl/mirror/visitkopvanholland.nl/en/locations/library-den-helder-school-7/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:22.705688+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://visitkopvanholland.nl/en/locations/library-den-helder-school-7/ - retrieved_on: '2025-11-29T20:03:17.828253+00:00' - xpath: /html/body/footer/div/div[1]/div[1]/ul/li[2]/a/svg/title - html_file: web/1406/visitkopvanholland.nl/mirror/visitkopvanholland.nl/en/locations/library-den-helder-school-7/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:22.705695+00:00' - claim_type: org_name claim_value: E-mail raw_value: E-mail @@ -590,6 +570,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:22.706504+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Stichting KopGroep Bibliotheken Den Helder diff --git a/data/nde/enriched/entries/1407_kb_isil.yaml b/data/nde/enriched/entries/1407_kb_isil.yaml index 7b759b6e90..ad56b6ab08 100644 --- a/data/nde/enriched/entries/1407_kb_isil.yaml +++ b/data/nde/enriched/entries/1407_kb_isil.yaml @@ -544,7 +544,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:22.828317+00:00' source_archive: web/1407/leideninternationalcentre.nl - claims_count: 18 + claims_count: 17 claims: - claim_type: org_name claim_value: BplusC @@ -684,16 +684,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:22.828143+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://www.leideninternationalcentre.nl/locations/2061722299/bplusc-1&title=BplusC - raw_value: https://www.facebook.com/sharer.php?u=https://www.leideninternationalcentre.nl/locations/2061722299/bplusc-1&title=BplusC - source_url: https://www.leideninternationalcentre.nl/locations/2061722299/bplusc-1 - retrieved_on: '2025-11-29T20:03:19.315835+00:00' - xpath: /html/body/main/div/div/div[2]/div[3]/div/a[1] - html_file: web/1407/leideninternationalcentre.nl/pages/locations_2061722299_bplusc-1.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:22.828151+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/leideninternationalcentre/ raw_value: https://www.instagram.com/leideninternationalcentre/ @@ -734,6 +724,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:22.828176+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: BplusC diff --git a/data/nde/enriched/entries/1408_kb_isil.yaml b/data/nde/enriched/entries/1408_kb_isil.yaml index c9a6a0d806..94e9e81a2f 100644 --- a/data/nde/enriched/entries/1408_kb_isil.yaml +++ b/data/nde/enriched/entries/1408_kb_isil.yaml @@ -415,18 +415,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:23.233590+00:00' source_archive: web/1408/bibliotheekalmelo.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheekalmelo.nl/ - retrieved_on: '2025-11-29T20:06:46.706364+00:00' - xpath: /html/head/title - html_file: web/1408/bibliotheekalmelo.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:23.233092+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/bibliotheekalmelo raw_value: https://www.facebook.com/bibliotheekalmelo @@ -457,6 +447,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:23.233499+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Almelo diff --git a/data/nde/enriched/entries/1416_kb_isil.yaml b/data/nde/enriched/entries/1416_kb_isil.yaml index 12f9aaccfe..f7f9339dfc 100644 --- a/data/nde/enriched/entries/1416_kb_isil.yaml +++ b/data/nde/enriched/entries/1416_kb_isil.yaml @@ -429,7 +429,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:24.852107+00:00' source_archive: web/1416/kliknieuwsoss.nl - claims_count: 8 + claims_count: 7 claims: - claim_type: org_name claim_value: Bibliotheek wordt hoofdhuurder van het nieuwe Warenhuis @@ -490,16 +490,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:24.851621+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=false&url=https://www.kliknieuwsoss.nl/nieuws/human-interest/244061/bibliotheek-wordt-hoofdhuurder-van-het-nieuwe-warenhuis&title=Bibliotheek+wordt+hoofdhuurder+van+het+nieuwe+Warenhuis&source=kliknieuwsoss.nl - raw_value: http://www.linkedin.com/shareArticle?mini=false&url=https://www.kliknieuwsoss.nl/nieuws/human-interest/244061/bibliotheek-wordt-hoofdhuurder-van-het-nieuwe-warenhuis&title=Bibliotheek+wordt+hoofdhuurder+van+het+nieuwe+Warenhuis&source=kliknieuwsoss.nl - source_url: https://www.kliknieuwsoss.nl/nieuws/human-interest/244061/bibliotheek-wordt-hoofdhuurder-van-het-nieuwe-warenhuis - retrieved_on: '2025-11-29T20:06:37.533818+00:00' - xpath: /html/body/main/div[2]/div/div[1]/div[4]/div/ul/li[5]/div/a - html_file: web/1416/kliknieuwsoss.nl/pages/nieuws_human-interest_244061_bibliotheek-wordt-hoofdhuurder-van-het-nieuwe-warenhuis.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:24.851634+00:00' - claim_type: social_facebook claim_value: https://facebook.com/regiooss raw_value: https://facebook.com/regiooss @@ -520,6 +510,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:24.851679+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: NOBB diff --git a/data/nde/enriched/entries/1421_kb_isil.yaml b/data/nde/enriched/entries/1421_kb_isil.yaml index 5762c966d4..2a9a4eae86 100644 --- a/data/nde/enriched/entries/1421_kb_isil.yaml +++ b/data/nde/enriched/entries/1421_kb_isil.yaml @@ -439,7 +439,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:25.196649+00:00' source_archive: web/1421/linqmedia.nl - claims_count: 7 + claims_count: 4 claims: - claim_type: org_name claim_value: Bibliotheek Zuid-Hollandse Delta en de Boekenberg samen verder! @@ -451,36 +451,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:49:25.195913+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?url=https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - raw_value: https://www.linkedin.com/shareArticle?url=https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - source_url: https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - retrieved_on: '2025-11-29T20:06:49.824093+00:00' - xpath: /html/body/section[2]/section/section[1]/div[3]/div/div[1]/a[1] - html_file: web/1421/linqmedia.nl/pages/nieuws_bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:25.196405+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - raw_value: https://www.facebook.com/sharer.php?u=https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - source_url: https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - retrieved_on: '2025-11-29T20:06:49.824093+00:00' - xpath: /html/body/section[2]/section/section[1]/div[3]/div/div[1]/a[2] - html_file: web/1421/linqmedia.nl/pages/nieuws_bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:25.196412+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?url=https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - raw_value: https://twitter.com/intent/tweet?url=https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - source_url: https://www.linqmedia.nl/nieuws/bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder - retrieved_on: '2025-11-29T20:06:49.824093+00:00' - xpath: /html/body/section[2]/section/section[1]/div[3]/div/div[1]/a[3] - html_file: web/1421/linqmedia.nl/pages/nieuws_bibliotheek-zuid-hollandse-delta-en-de-boekenberg-samen-verder.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:25.196442+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/LINQonline raw_value: https://www.facebook.com/LINQonline @@ -511,6 +481,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:25.196479+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Zuid-Hollandse Delta en de Boekenberg samen verder! diff --git a/data/nde/enriched/entries/1422_kb_isil.yaml b/data/nde/enriched/entries/1422_kb_isil.yaml index 1afdf22b3b..3c715a7e0a 100644 --- a/data/nde/enriched/entries/1422_kb_isil.yaml +++ b/data/nde/enriched/entries/1422_kb_isil.yaml @@ -393,18 +393,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:25.546307+00:00' source_archive: web/1422/bibliotheektubbergen.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheektubbergen.nl/ - retrieved_on: '2025-11-29T20:07:00.466719+00:00' - xpath: /html/head/title - html_file: web/1422/bibliotheektubbergen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:25.545381+00:00' - claim_type: email claim_value: '%20info@bibliotheektubbergen.nl' raw_value: '%20info@bibliotheektubbergen.nl' @@ -425,6 +415,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:25.546008+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Tubbergen diff --git a/data/nde/enriched/entries/1424_kb_isil.yaml b/data/nde/enriched/entries/1424_kb_isil.yaml index 3bdb14c9b5..264acf3874 100644 --- a/data/nde/enriched/entries/1424_kb_isil.yaml +++ b/data/nde/enriched/entries/1424_kb_isil.yaml @@ -348,18 +348,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:25.634339+00:00' source_archive: web/1424/bibliotheek-zoetermeer.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: '' - retrieved_on: '' - xpath: /html/head/title - html_file: web/1424/bibliotheek-zoetermeer.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:25.633633+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/BibliotheekZoetermeer raw_value: https://www.facebook.com/BibliotheekZoetermeer @@ -390,6 +380,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:25.634213+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Forum Bibliotheek Zoetermeer diff --git a/data/nde/enriched/entries/1428_kb_isil.yaml b/data/nde/enriched/entries/1428_kb_isil.yaml index 07c5279664..2dc819dd44 100644 --- a/data/nde/enriched/entries/1428_kb_isil.yaml +++ b/data/nde/enriched/entries/1428_kb_isil.yaml @@ -382,7 +382,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:25.955587+00:00' source_archive: web/1428/ontdekgooisemeren.nl - claims_count: 12 + claims_count: 10 claims: - claim_type: org_name claim_value: Bibliotheek Gooi+ @@ -464,26 +464,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:49:25.955320+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer.php?u=https://www.ontdekgooisemeren.nl/nl/locaties/469999842/bibliotheek-gooi-bussum&title=Bibliotheek+Gooi%2B+%7C+Bussum - raw_value: https://www.facebook.com/sharer.php?u=https://www.ontdekgooisemeren.nl/nl/locaties/469999842/bibliotheek-gooi-bussum&title=Bibliotheek+Gooi%2B+%7C+Bussum - source_url: https://www.ontdekgooisemeren.nl/nl/locaties/469999842/bibliotheek-gooi-bussum - retrieved_on: '2025-11-29T20:35:09.483636+00:00' - xpath: /html/body/main/div/div/div[2]/div[1]/div[4]/div/a[1] - html_file: web/1428/ontdekgooisemeren.nl/pages/nl_locaties_469999842_bibliotheek-gooi-bussum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:25.955422+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?text=Bibliotheek+Gooi%2B+%7C+Bussum&url=https://www.ontdekgooisemeren.nl/nl/locaties/469999842/bibliotheek-gooi-bussum - raw_value: https://twitter.com/share?text=Bibliotheek+Gooi%2B+%7C+Bussum&url=https://www.ontdekgooisemeren.nl/nl/locaties/469999842/bibliotheek-gooi-bussum - source_url: https://www.ontdekgooisemeren.nl/nl/locaties/469999842/bibliotheek-gooi-bussum - retrieved_on: '2025-11-29T20:35:09.483636+00:00' - xpath: /html/body/main/div/div/div[2]/div[1]/div[4]/div/a[2] - html_file: web/1428/ontdekgooisemeren.nl/pages/nl_locaties_469999842_bibliotheek-gooi-bussum.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:25.955427+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/ontdekgooisemeren raw_value: https://www.facebook.com/ontdekgooisemeren @@ -504,6 +484,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:25.955447+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Gooi+ Bussum diff --git a/data/nde/enriched/entries/1429_kb_isil.yaml b/data/nde/enriched/entries/1429_kb_isil.yaml index 6d2419d535..0813a116fb 100644 --- a/data/nde/enriched/entries/1429_kb_isil.yaml +++ b/data/nde/enriched/entries/1429_kb_isil.yaml @@ -409,7 +409,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:26.082002+00:00' source_archive: web/1429/socialekaart.ede.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Cultura Ede / Centrale Bibliotheek @@ -461,16 +461,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:26.081625+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fsocialekaart.ede.nl%2Fis%2Forganisatie%2F11183752%2Fwijzer%2Fcultura-ede-centrale-bibliotheek - raw_value: https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fsocialekaart.ede.nl%2Fis%2Forganisatie%2F11183752%2Fwijzer%2Fcultura-ede-centrale-bibliotheek - source_url: https://socialekaart.ede.nl/is/organisatie/11183752/wijzer/cultura-ede-centrale-bibliotheek - retrieved_on: '2025-11-29T20:35:12.165155+00:00' - xpath: /html/body/div[2]/div/div[6]/div/div/div/div[1]/a - html_file: web/1429/socialekaart.ede.nl/pages/is_organisatie_11183752_wijzer_cultura-ede-centrale-bibliotheek.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:26.081649+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/sharing/share-offsite/?url=http%3A%2F%2Fsocialekaart.ede.nl%2Fis%2Forganisatie%2F11183752%2Fwijzer%2Fcultura-ede-centrale-bibliotheek raw_value: https://www.linkedin.com/sharing/share-offsite/?url=http%3A%2F%2Fsocialekaart.ede.nl%2Fis%2Forganisatie%2F11183752%2Fwijzer%2Fcultura-ede-centrale-bibliotheek @@ -481,6 +471,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:26.081660+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Cultura Ede diff --git a/data/nde/enriched/entries/1432_kb_isil.yaml b/data/nde/enriched/entries/1432_kb_isil.yaml index 9131c090cf..00a8060eb5 100644 --- a/data/nde/enriched/entries/1432_kb_isil.yaml +++ b/data/nde/enriched/entries/1432_kb_isil.yaml @@ -339,18 +339,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:26.885626+00:00' source_archive: web/1432/bibliotheekkampen.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheekkampen.nl/ - retrieved_on: '2025-11-29T20:43:43.264044+00:00' - xpath: /html/head/title - html_file: web/1432/bibliotheekkampen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:26.885061+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/BibliotheekKampen/ raw_value: https://www.facebook.com/BibliotheekKampen/ @@ -381,6 +371,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:26.885527+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Kampen diff --git a/data/nde/enriched/entries/1434_kb_isil.yaml b/data/nde/enriched/entries/1434_kb_isil.yaml index b7dae6bf92..faed383d98 100644 --- a/data/nde/enriched/entries/1434_kb_isil.yaml +++ b/data/nde/enriched/entries/1434_kb_isil.yaml @@ -378,18 +378,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:27.342026+00:00' source_archive: web/1434/biblionu.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.biblionu.nl/ - retrieved_on: '2025-11-29T20:45:48.182504+00:00' - xpath: /html/head/title - html_file: web/1434/biblionu.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:27.341369+00:00' - claim_type: description_short claim_value: Welkom bij de Bibliotheek BiblioNu! Wij zijn er voor iedereen die zichzelf wil ontwikkelen. Bij de Bibliotheek geloven we dat je een leven lang @@ -433,6 +423,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:27.341852+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Biblionu Venray diff --git a/data/nde/enriched/entries/1446_kb_isil.yaml b/data/nde/enriched/entries/1446_kb_isil.yaml index 2219e2b86a..ff90804120 100644 --- a/data/nde/enriched/entries/1446_kb_isil.yaml +++ b/data/nde/enriched/entries/1446_kb_isil.yaml @@ -425,18 +425,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:30.894667+00:00' source_archive: web/1446/bibliotheekoosterschelde.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Bibliotheek Oosterschelde - source_url: https://www.bibliotheekoosterschelde.nl/zeeuwse-bibliotheek.html - retrieved_on: '2025-11-29T22:04:50.057179+00:00' - xpath: /html/head/title - html_file: web/1446/bibliotheekoosterschelde.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:30.894068+00:00' - claim_type: description_short claim_value: Welkom bij de Bibliotheek Oosterschelde. De Bibliotheek Oosterschelde (BO) verzorgt het openbare Bibliotheekwerk voor de gemeenten Borsele, Goes, @@ -495,6 +485,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:30.894513+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Oosterschelde diff --git a/data/nde/enriched/entries/1449_kb_isil.yaml b/data/nde/enriched/entries/1449_kb_isil.yaml index c3da511fc8..4fcae59895 100644 --- a/data/nde/enriched/entries/1449_kb_isil.yaml +++ b/data/nde/enriched/entries/1449_kb_isil.yaml @@ -434,18 +434,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:31.463930+00:00' source_archive: web/1449/archiweb.cz - claims_count: 6 + claims_count: 4 claims: - - claim_type: org_name - claim_value: archiweb.cz - raw_value: archiweb.cz - New Library Almere - source_url: https://www.archiweb.cz/en/b/nova-mestska-knihovna-de-nieuwe-bibliotheek - retrieved_on: '2025-11-29T22:02:08.442893+00:00' - xpath: /html/head/title - html_file: web/1449/archiweb.cz/pages/en_b_nova-mestska-knihovna-de-nieuwe-bibliotheek.tmp.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:31.462831+00:00' - claim_type: description_short claim_value: Right next door to the town hall in one of Almere’s most prominent locations – Stadhuisplein – stands a highly successful building; Almere’s new @@ -462,16 +452,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:49:31.462938+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://www.archiweb.cz/en/b/nova-mestska-knihovna-de-nieuwe-bibliotheek - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://www.archiweb.cz/en/b/nova-mestska-knihovna-de-nieuwe-bibliotheek - source_url: https://www.archiweb.cz/en/b/nova-mestska-knihovna-de-nieuwe-bibliotheek - retrieved_on: '2025-11-29T22:02:08.442893+00:00' - xpath: /html/body/div/div[1]/div[5]/main/div[1]/div/div[2]/div/ul/li/a - html_file: web/1449/archiweb.cz/pages/en_b_nova-mestska-knihovna-de-nieuwe-bibliotheek.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:31.463729+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/archiweb.cz raw_value: https://www.facebook.com/archiweb.cz @@ -502,6 +482,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:31.463799+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: de nieuwe bibliotheek diff --git a/data/nde/enriched/entries/1450_kb_isil.yaml b/data/nde/enriched/entries/1450_kb_isil.yaml index 5c01fc3ef7..6a63d66358 100644 --- a/data/nde/enriched/entries/1450_kb_isil.yaml +++ b/data/nde/enriched/entries/1450_kb_isil.yaml @@ -363,7 +363,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:31.619302+00:00' source_archive: web/1450/hunterdouglasarchitectural.eu - claims_count: 10 + claims_count: 9 claims: - claim_type: org_name claim_value: Cultural Centre De Nobelaer, Etten-Leur @@ -415,16 +415,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:31.618883+00:00' - - claim_type: social_twitter - claim_value: https://www.twitter.com/sharer.php?u=https%3A%2F%2Fwww.hunterdouglasarchitectural.eu%2Fen-NO%2Fproject%2Fcultural-centre-de-nobelaer%2F - raw_value: https://www.twitter.com/sharer.php?u=https%3A%2F%2Fwww.hunterdouglasarchitectural.eu%2Fen-NO%2Fproject%2Fcultural-centre-de-nobelaer%2F - source_url: https://www.hunterdouglasarchitectural.eu/en-NO/project/cultural-centre-de-nobelaer/ - retrieved_on: '2025-11-29T22:02:11.699432+00:00' - xpath: /html[2]/body/main/dialog/div/div[2]/div/div[2]/a - html_file: web/1450/hunterdouglasarchitectural.eu/mirror/www.hunterdouglasarchitectural.eu/en-NO/project/cultural-centre-de-nobelaer/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:31.618889+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/hunterdouglasarchitectural/ raw_value: https://www.instagram.com/hunterdouglasarchitectural/ @@ -465,6 +455,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:31.619001+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: De Nobelaer diff --git a/data/nde/enriched/entries/1451_kb_isil.yaml b/data/nde/enriched/entries/1451_kb_isil.yaml index 34907c66f6..50b47edbac 100644 --- a/data/nde/enriched/entries/1451_kb_isil.yaml +++ b/data/nde/enriched/entries/1451_kb_isil.yaml @@ -387,7 +387,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:31.703807+00:00' source_archive: web/1451/librarytechnology.org - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Bibliotheek Geldrop -- Bibliotheek Dommeldal @@ -411,16 +411,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:49:31.703123+00:00' - - claim_type: org_name - claim_value: libraries.org - raw_value: libraries.org - source_url: https://librarytechnology.org/library/192769 - retrieved_on: '2025-11-29T22:02:13.573454+00:00' - xpath: /html/body/div/header/a/h1 - html_file: web/1451/librarytechnology.org/pages/library_192769.tmp.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:49:31.703538+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Geldrop -- Bibliotheek Dommeldal diff --git a/data/nde/enriched/entries/1452_kb_isil.yaml b/data/nde/enriched/entries/1452_kb_isil.yaml index 3a99f86f10..94b5acac5a 100644 --- a/data/nde/enriched/entries/1452_kb_isil.yaml +++ b/data/nde/enriched/entries/1452_kb_isil.yaml @@ -606,18 +606,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:34:05.290092+00:00' source_archive: web/1452/bibliotheekkerkrade.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.bibliotheekkerkrade.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1452/bibliotheekkerkrade.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:05.289429+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/mijnstreekbibliotheek raw_value: https://www.facebook.com/mijnstreekbibliotheek @@ -648,3 +638,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:05.289955+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1454_kb_isil.yaml b/data/nde/enriched/entries/1454_kb_isil.yaml index 8f07676703..c7e4e2865f 100644 --- a/data/nde/enriched/entries/1454_kb_isil.yaml +++ b/data/nde/enriched/entries/1454_kb_isil.yaml @@ -409,7 +409,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:31.851788+00:00' source_archive: web/1454/librarytechnology.org - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Bibliotheek Velserbroek -- Bibliotheek Velsen @@ -433,16 +433,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:49:31.850860+00:00' - - claim_type: org_name - claim_value: libraries.org - raw_value: libraries.org - source_url: https://librarytechnology.org/library/196547 - retrieved_on: '2025-11-29T22:02:19.618559+00:00' - xpath: /html/body/div/header/a/h1 - html_file: web/1454/librarytechnology.org/pages/library_196547.tmp.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:49:31.851632+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Velsen diff --git a/data/nde/enriched/entries/1459_kb_isil.yaml b/data/nde/enriched/entries/1459_kb_isil.yaml index 7961af5b04..d33d9287fb 100644 --- a/data/nde/enriched/entries/1459_kb_isil.yaml +++ b/data/nde/enriched/entries/1459_kb_isil.yaml @@ -490,7 +490,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:32.818345+00:00' source_archive: web/1459/library.sx - claims_count: 12 + claims_count: 9 claims: - claim_type: org_name claim_value: 'Strengthening Regional Library Services:' @@ -544,36 +544,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:49:32.818010+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525 - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525 - source_url: https://www.library.sx/articles/strengthening-regional-library-services--525 - retrieved_on: '2025-11-29T22:04:52.136646+00:00' - xpath: /html/body/main/section/section/div[1]/a[1] - html_file: web/1459/library.sx/pages/articles_strengthening-regional-library-services--525.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:32.818160+00:00' - - claim_type: social_linkedin - claim_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525&title=Strengthening+Regional+Library+Services%3A+&summary=Saba+and+St.+Eustatius+Libraries+Conduct+Working+Visit+to+Sint+Maarten+Library&source=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525 - raw_value: http://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525&title=Strengthening+Regional+Library+Services%3A+&summary=Saba+and+St.+Eustatius+Libraries+Conduct+Working+Visit+to+Sint+Maarten+Library&source=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525 - source_url: https://www.library.sx/articles/strengthening-regional-library-services--525 - retrieved_on: '2025-11-29T22:04:52.136646+00:00' - xpath: /html/body/main/section/section/div[1]/a[2] - html_file: web/1459/library.sx/pages/articles_strengthening-regional-library-services--525.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:32.818166+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Strengthening+Regional+Library+Services%3A+&url=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525 - raw_value: https://twitter.com/intent/tweet?text=Strengthening+Regional+Library+Services%3A+&url=https%3A%2F%2Fwww.library.sx%2Farticles%2Fstrengthening-regional-library-services--525 - source_url: https://www.library.sx/articles/strengthening-regional-library-services--525 - retrieved_on: '2025-11-29T22:04:52.136646+00:00' - xpath: /html/body/main/section/section/div[1]/a[3] - html_file: web/1459/library.sx/pages/articles_strengthening-regional-library-services--525.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:32.818170+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/sintmaartenlibrary raw_value: https://www.facebook.com/sintmaartenlibrary @@ -614,6 +584,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:32.818188+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Saba diff --git a/data/nde/enriched/entries/1466_kb_isil.yaml b/data/nde/enriched/entries/1466_kb_isil.yaml index 53a9b8fd80..536bb39022 100644 --- a/data/nde/enriched/entries/1466_kb_isil.yaml +++ b/data/nde/enriched/entries/1466_kb_isil.yaml @@ -524,18 +524,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:34.762245+00:00' source_archive: web/1466/bibliotheeknoord-veluwe.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheeknoord-veluwe.nl/adressen-en-openingstijden.html - retrieved_on: '2025-11-29T22:12:04.997675+00:00' - xpath: /html/head/title - html_file: web/1466/bibliotheeknoord-veluwe.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:34.761759+00:00' - claim_type: description_short claim_value: Homepagina van de website van de Bibliotheek Noord-Veluwe. raw_value: Homepagina van de website van de Bibliotheek Noord-Veluwe. @@ -586,6 +576,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:34.762161+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Noord-Veluwe diff --git a/data/nde/enriched/entries/1470_kb_isil.yaml b/data/nde/enriched/entries/1470_kb_isil.yaml index 6a24b9a573..9d925b46be 100644 --- a/data/nde/enriched/entries/1470_kb_isil.yaml +++ b/data/nde/enriched/entries/1470_kb_isil.yaml @@ -566,18 +566,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:35.964367+00:00' source_archive: web/1470/biblioplus.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.biblioplus.nl/ - retrieved_on: '2025-11-29T22:15:04.055938+00:00' - xpath: /html/head/title - html_file: web/1470/biblioplus.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:35.963788+00:00' - claim_type: description_short claim_value: BiblioPlus is er voor alle inwoners van Bergen, Boxmeer, Cuijk, Gennep, Grave, Mill & Sint Hubert en Sint Anthonis. De Bibliotheek biedt een uitgebreide @@ -628,6 +618,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:35.964269+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Biblioplus diff --git a/data/nde/enriched/entries/1474_kb_isil.yaml b/data/nde/enriched/entries/1474_kb_isil.yaml index c2eb96c1ae..8f58b39ee1 100644 --- a/data/nde/enriched/entries/1474_kb_isil.yaml +++ b/data/nde/enriched/entries/1474_kb_isil.yaml @@ -427,18 +427,8 @@ custodian_name: web_claims: extraction_timestamp: '2025-12-01T12:34:06.331274+00:00' source_archive: web/1474/bibliotheekdekempen.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.bibliotheekdekempen.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1474/bibliotheekdekempen.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:06.330423+00:00' - claim_type: description_short claim_value: Bibliotheek De Kempen is er voor alle inwoners van Bergeijk, Bladel, Budel, Cranendonck, Eersel, Maarheeze, Oirschot, Reusel en Valkenswaard. De @@ -469,3 +459,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:06.331105+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1481_kb_isil.yaml b/data/nde/enriched/entries/1481_kb_isil.yaml index afc92a0bc4..950d75184e 100644 --- a/data/nde/enriched/entries/1481_kb_isil.yaml +++ b/data/nde/enriched/entries/1481_kb_isil.yaml @@ -369,18 +369,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:38.629382+00:00' source_archive: web/1481/bibliotheekwierden.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheekwierden.nl/ - retrieved_on: '2025-11-29T22:22:36.258227+00:00' - xpath: /html/head/title - html_file: web/1481/bibliotheekwierden.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:38.628600+00:00' - claim_type: email claim_value: '%20info@bibliotheekwierden.nl' raw_value: '%20info@bibliotheekwierden.nl' @@ -421,6 +411,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:38.629256+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Wierden diff --git a/data/nde/enriched/entries/1486_kb_isil.yaml b/data/nde/enriched/entries/1486_kb_isil.yaml index 3cd0ded2b0..c46047fe16 100644 --- a/data/nde/enriched/entries/1486_kb_isil.yaml +++ b/data/nde/enriched/entries/1486_kb_isil.yaml @@ -311,7 +311,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:39.082547+00:00' source_archive: web/1486/librarytechnology.org - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Bibliotheek Mijdrecht -- Bibliotheek Angstel, Vecht en Venen @@ -335,16 +335,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:49:39.081985+00:00' - - claim_type: org_name - claim_value: libraries.org - raw_value: libraries.org - source_url: https://librarytechnology.org/library/192294 - retrieved_on: '2025-11-29T22:20:17.432916+00:00' - xpath: /html/body/div/header/a/h1 - html_file: web/1486/librarytechnology.org/pages/library_192294.tmp.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:49:39.082419+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Mijdrecht -- Bibliotheek Angstel, Vecht en Venen diff --git a/data/nde/enriched/entries/1487_kb_isil.yaml b/data/nde/enriched/entries/1487_kb_isil.yaml index 69d8d7a442..d40efaa1d2 100644 --- a/data/nde/enriched/entries/1487_kb_isil.yaml +++ b/data/nde/enriched/entries/1487_kb_isil.yaml @@ -307,7 +307,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:39.165430+00:00' source_archive: web/1487/librarytechnology.org - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Bibliotheek Vianen -- Bibliotheek Lek @@ -331,16 +331,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:49:39.164923+00:00' - - claim_type: org_name - claim_value: libraries.org - raw_value: libraries.org - source_url: https://librarytechnology.org/library/192570 - retrieved_on: '2025-11-29T22:20:19.063379+00:00' - xpath: /html/body/div/header/a/h1 - html_file: web/1487/librarytechnology.org/pages/library_192570.tmp.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:49:39.165378+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Lek & Ijssel diff --git a/data/nde/enriched/entries/1489_kb_isil.yaml b/data/nde/enriched/entries/1489_kb_isil.yaml index fea2f9fb30..06cd8b8e85 100644 --- a/data/nde/enriched/entries/1489_kb_isil.yaml +++ b/data/nde/enriched/entries/1489_kb_isil.yaml @@ -356,7 +356,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:39.755150+00:00' source_archive: web/1489/krimpenerwaard.nl - claims_count: 13 + claims_count: 10 claims: - claim_type: org_name claim_value: Homepage @@ -388,26 +388,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:49:39.753828+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.krimpenerwaard.nl/doelenplein-12-bibliotheek-schoonhoven - retrieved_on: '2025-11-29T22:22:48.833473+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[1]/a/svg/title - html_file: web/1489/krimpenerwaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:39.753833+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.krimpenerwaard.nl/doelenplein-12-bibliotheek-schoonhoven - retrieved_on: '2025-11-29T22:22:48.833473+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[2]/a/svg/title - html_file: web/1489/krimpenerwaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:39.753838+00:00' - claim_type: org_name claim_value: Youtube kanaal raw_value: Youtube kanaal @@ -418,16 +398,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T10:49:39.753842+00:00' - - claim_type: org_name - claim_value: LinkedIn - raw_value: LinkedIn - source_url: https://www.krimpenerwaard.nl/doelenplein-12-bibliotheek-schoonhoven - retrieved_on: '2025-11-29T22:22:48.833473+00:00' - xpath: /html/body/div[2]/div/footer/div/div/div[4]/ul/li[4]/a/svg/title - html_file: web/1489/krimpenerwaard.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:39.753846+00:00' - claim_type: description_short claim_value: Officiële website van de gemeente Krimpenerwaard. Hier vindt u online producten, diensten en informatie voor inwoners en bedrijven in de gemeente. @@ -490,6 +460,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:39.754946+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Krimpenerwaard diff --git a/data/nde/enriched/entries/1490_kb_isil.yaml b/data/nde/enriched/entries/1490_kb_isil.yaml index 6cf3c6fb04..1157c575d0 100644 --- a/data/nde/enriched/entries/1490_kb_isil.yaml +++ b/data/nde/enriched/entries/1490_kb_isil.yaml @@ -295,7 +295,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:39.837861+00:00' source_archive: web/1490/librarytechnology.org - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Bibliotheek Wateringen -- Bibliotheek Westland @@ -319,16 +319,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:49:39.837430+00:00' - - claim_type: org_name - claim_value: libraries.org - raw_value: libraries.org - source_url: https://librarytechnology.org/library/192729 - retrieved_on: '2025-11-29T22:24:25.036069+00:00' - xpath: /html/body/div/header/a/h1 - html_file: web/1490/librarytechnology.org/pages/library_192729.tmp.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:49:39.837813+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Westland diff --git a/data/nde/enriched/entries/1492_kb_isil.yaml b/data/nde/enriched/entries/1492_kb_isil.yaml index 1c5bd2ced4..c7b8cd597a 100644 --- a/data/nde/enriched/entries/1492_kb_isil.yaml +++ b/data/nde/enriched/entries/1492_kb_isil.yaml @@ -317,18 +317,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:40.280484+00:00' source_archive: web/1492/bibliotheekdegroenevenen.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.bibliotheekdegroenevenen.nl/ - retrieved_on: '2025-11-29T22:27:32.526103+00:00' - xpath: /html/head/title - html_file: web/1492/bibliotheekdegroenevenen.nl/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:40.279996+00:00' - claim_type: description_short claim_value: 'Welkom bij de Bibliotheek De Groene Venen. De bibliotheek is dé plek voor persoonlijke ontwikkeling. Voor leden én niet-leden. Diverse activiteiten. @@ -375,6 +365,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:40.280362+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek De Groene Venen diff --git a/data/nde/enriched/entries/1493_kb_isil.yaml b/data/nde/enriched/entries/1493_kb_isil.yaml index ce79e09852..97f137bea6 100644 --- a/data/nde/enriched/entries/1493_kb_isil.yaml +++ b/data/nde/enriched/entries/1493_kb_isil.yaml @@ -337,7 +337,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:40.364688+00:00' source_archive: web/1493/gemeentehw.nl - claims_count: 13 + claims_count: 10 claims: - claim_type: org_name claim_value: Hoeksche Waard – Evenementen van de bibliotheek @@ -392,36 +392,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:49:40.364434+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet/?text=&url=https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - raw_value: https://twitter.com/intent/tweet/?text=&url=https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - source_url: https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - retrieved_on: '2025-11-29T22:25:43.709005+00:00' - xpath: /html/body/div[1]/div[4]/main/div[2]/div/div[2]/div/div[3]/div/a[1] - html_file: web/1493/gemeentehw.nl/mirror/www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:40.364508+00:00' - - claim_type: social_facebook - claim_value: https://facebook.com/sharer/sharer.php?u=https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - raw_value: https://facebook.com/sharer/sharer.php?u=https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - source_url: https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - retrieved_on: '2025-11-29T22:25:43.709005+00:00' - xpath: /html/body/div[1]/div[4]/main/div[2]/div/div[2]/div/div[3]/div/a[2] - html_file: web/1493/gemeentehw.nl/mirror/www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:40.364514+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&title=&url=https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - raw_value: https://www.linkedin.com/shareArticle?mini=true&title=&url=https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - source_url: https://www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/ - retrieved_on: '2025-11-29T22:25:43.709005+00:00' - xpath: /html/body/div[1]/div[4]/main/div[2]/div/div[2]/div/div[3]/div/a[3] - html_file: web/1493/gemeentehw.nl/mirror/www.gemeentehw.nl/evenement/hoeksche-waard-bibliotheek-hoeksche-waard/index.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:40.364519+00:00' - claim_type: social_twitter claim_value: https://x.com/gemeentehw raw_value: https://x.com/gemeentehw @@ -472,6 +442,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T10:49:40.364548+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Hoeksche Waard diff --git a/data/nde/enriched/entries/1496_kb_isil.yaml b/data/nde/enriched/entries/1496_kb_isil.yaml index 927f05d835..44b2c97c19 100644 --- a/data/nde/enriched/entries/1496_kb_isil.yaml +++ b/data/nde/enriched/entries/1496_kb_isil.yaml @@ -314,18 +314,8 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:40.833833+00:00' source_archive: web/1496/bibliotheekbonaire.com - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://bibliotheekbonaire.com/ - retrieved_on: '2025-11-29T22:26:06.753942+00:00' - xpath: /html/head/title - html_file: web/1496/bibliotheekbonaire.com/pages/index.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T10:49:40.832766+00:00' - claim_type: description_short claim_value: 'Biblioteka Publiko Boneiru, Bibliotheek Bonaire: Kaya Gramel 1, Bonaire. Een leven lang leren: natuurlijk ook op Bonaire.' @@ -368,6 +358,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:40.833676+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Biblioteka Públiko Boneiru diff --git a/data/nde/enriched/entries/1500_kb_isil.yaml b/data/nde/enriched/entries/1500_kb_isil.yaml index bdbf45bf58..55e8ed2084 100644 --- a/data/nde/enriched/entries/1500_kb_isil.yaml +++ b/data/nde/enriched/entries/1500_kb_isil.yaml @@ -671,7 +671,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:41.445674+00:00' source_archive: web/1500/hetinformatiepunt.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: De Zoomerij Rheden @@ -713,16 +713,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: tel_link extraction_timestamp: '2025-12-01T10:49:41.445495+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fwww.hetinformatiepunt.nl%2Fis%2Forganisatie%2F11689430%2Fde-zoomerij-rheden - raw_value: https://www.facebook.com/sharer/sharer.php?u=http%3A%2F%2Fwww.hetinformatiepunt.nl%2Fis%2Forganisatie%2F11689430%2Fde-zoomerij-rheden - source_url: https://www.hetinformatiepunt.nl/is/organisatie/11689430/de-zoomerij-rheden - retrieved_on: '2025-11-29T22:28:49.361050+00:00' - xpath: /html/body/div[1]/div/div[5]/div/div/div/div[1]/a - html_file: web/1500/hetinformatiepunt.nl/pages/is_organisatie_11689430_de-zoomerij-rheden.tmp.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T10:49:41.445556+00:00' - claim_type: social_linkedin claim_value: https://www.linkedin.com/sharing/share-offsite/?url=http%3A%2F%2Fwww.hetinformatiepunt.nl%2Fis%2Forganisatie%2F11689430%2Fde-zoomerij-rheden raw_value: https://www.linkedin.com/sharing/share-offsite/?url=http%3A%2F%2Fwww.hetinformatiepunt.nl%2Fis%2Forganisatie%2F11689430%2Fde-zoomerij-rheden @@ -743,6 +733,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T10:49:41.445599+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: de Zoomerij Dieren bibliotheek x cultuur diff --git a/data/nde/enriched/entries/1501_kb_isil.yaml b/data/nde/enriched/entries/1501_kb_isil.yaml index 6af4b80d8e..a4ec5bfa68 100644 --- a/data/nde/enriched/entries/1501_kb_isil.yaml +++ b/data/nde/enriched/entries/1501_kb_isil.yaml @@ -420,7 +420,7 @@ identifiers: web_claims: extraction_timestamp: '2025-12-01T10:49:41.533455+00:00' source_archive: web/1501/librarytechnology.org - claims_count: 3 + claims_count: 2 claims: - claim_type: org_name claim_value: Bibliotheek Heiloo -- Bibliotheek Heiloo @@ -444,16 +444,14 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T10:49:41.532948+00:00' - - claim_type: org_name - claim_value: libraries.org - raw_value: libraries.org - source_url: https://librarytechnology.org/library/196560 - retrieved_on: '2025-11-29T22:28:50.602350+00:00' - xpath: /html/body/div/header/a/h1 - html_file: web/1501/librarytechnology.org/pages/library_196560.tmp.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T10:49:41.533398+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content custodian_name: claim_type: custodian_name claim_value: Bibliotheek Heiloo -- Bibliotheek Heiloo diff --git a/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml b/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml index e465bf0d12..27f0721ea7 100644 --- a/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml +++ b/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml @@ -223,18 +223,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.295663+00:00' source_archive: web/1502/huygens.knaw.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Huygens Instituut - source_url: http://www.huygens.knaw.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1502/huygens.knaw.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.294693+00:00' - claim_type: description_short claim_value: Het Huygens Instituut is het toonaangevende onderzoeksinstituut op het gebied van geschiedenis en cultuur van Nederland. @@ -267,4 +257,12 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:34:07.295167+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content url: http://www.huygens.knaw.nl/ diff --git a/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml b/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml index cb36fb265c..95358ea5ca 100644 --- a/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml +++ b/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml @@ -173,18 +173,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.453619+00:00' source_archive: web/1506/scp.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Sociaal en Cultureel Planbureau - source_url: http://www.scp.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1506/scp.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.452451+00:00' - claim_type: description_short claim_value: Het Sociaal en Cultureel Planbureau (SCP) verricht sociaal-wetenschappelijk onderzoek en rapporteert hierover aan regering en parlement. Op deze website @@ -219,4 +209,12 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:07.453311+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content url: http://www.scp.nl/ diff --git a/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml b/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml index 0dc1c177c0..a8e65a5e17 100644 --- a/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml +++ b/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml @@ -167,7 +167,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.492020+00:00' source_archive: web/1507/albertusperk.nl - claims_count: 5 + claims_count: 4 claims: - claim_type: org_name claim_value: Albertus Perk @@ -179,16 +179,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:07.491379+00:00' - - claim_type: org_name - claim_value: zoeken - raw_value: zoeken - source_url: https://albertusperk.nl/ - retrieved_on: '' - xpath: /html/body/div/div/div/section/aside[1]/div/div/div/div/form/table/tbody/tr/td[2]/button/svg/title - html_file: web/1507/albertusperk.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.491396+00:00' - claim_type: email claim_value: ledenadministratie@albertusperk.nl raw_value: ledenadministratie@albertusperk.nl @@ -219,4 +209,12 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:34:07.491709+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content url: https://albertusperk.nl/ diff --git a/data/nde/enriched/entries/1510_kitlv.yaml b/data/nde/enriched/entries/1510_kitlv.yaml index 7b948c2e88..8b4c074166 100644 --- a/data/nde/enriched/entries/1510_kitlv.yaml +++ b/data/nde/enriched/entries/1510_kitlv.yaml @@ -219,18 +219,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.599382+00:00' source_archive: web/1510/kitlv.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - KITLV - source_url: http://www.kitlv.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1510/kitlv.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.596882+00:00' - claim_type: description_short claim_value: KITLV, Royal Netherlands Institute of Southeast Asian and Caribbean Studies @@ -295,4 +285,12 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.598934+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content url: http://www.kitlv.nl/ diff --git a/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml b/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml index e1bb2d91f1..f219d5a11f 100644 --- a/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml +++ b/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml @@ -170,18 +170,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.695487+00:00' source_archive: web/1513/oudsoetermeer.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://www.oudsoetermeer.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1513/oudsoetermeer.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.694960+00:00' - claim_type: email claim_value: info@oudsoetermeer.nl raw_value: info@oudsoetermeer.nl @@ -212,4 +202,12 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.695433+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content url: http://www.oudsoetermeer.nl/ diff --git a/data/nde/enriched/entries/1516_het_scheepvaartmuseum_amsterdam.yaml b/data/nde/enriched/entries/1516_het_scheepvaartmuseum_amsterdam.yaml index f880e4e11e..eabbe81a30 100644 --- a/data/nde/enriched/entries/1516_het_scheepvaartmuseum_amsterdam.yaml +++ b/data/nde/enriched/entries/1516_het_scheepvaartmuseum_amsterdam.yaml @@ -236,18 +236,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.853801+00:00' source_archive: web/1516/hetscheepvaartmuseum.nl - claims_count: 11 + claims_count: 10 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Het Scheepvaartmuseum Amsterdam - source_url: https://www.hetscheepvaartmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1516/hetscheepvaartmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.851351+00:00' - claim_type: description_short claim_value: In Het Scheepvaartmuseum komt alles boven water. Verken 500 jaar maritieme geschiedenis en hoe deze in verbinding staat met de samenleving van @@ -353,3 +343,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.853327+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1517_rijksmuseum_amsterdam.yaml b/data/nde/enriched/entries/1517_rijksmuseum_amsterdam.yaml index cf87236f4e..7ed42482ff 100644 --- a/data/nde/enriched/entries/1517_rijksmuseum_amsterdam.yaml +++ b/data/nde/enriched/entries/1517_rijksmuseum_amsterdam.yaml @@ -246,7 +246,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.914636+00:00' source_archive: web/1517/rijksmuseum.nl - claims_count: 40 + claims_count: 37 claims: - claim_type: org_name claim_value: Rijksmuseum, hét museum van Nederland @@ -318,16 +318,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:07.912791+00:00' - - claim_type: org_name - claim_value: close - raw_value: close - source_url: https://www.rijksmuseum.nl/nl - retrieved_on: '' - xpath: /html/body/div[1]/svg/symbol[7]/title - html_file: web/1517/rijksmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.912798+00:00' - claim_type: org_name claim_value: compare-button raw_value: compare-button @@ -498,16 +488,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:07.912908+00:00' - - claim_type: org_name - claim_value: search - raw_value: search - source_url: https://www.rijksmuseum.nl/nl - retrieved_on: '' - xpath: /html/body/div[1]/svg/symbol[25]/title - html_file: web/1517/rijksmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:07.912914+00:00' - claim_type: org_name claim_value: subtitles-off raw_value: subtitles-off @@ -592,16 +572,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T12:34:07.913116+00:00' - - claim_type: org_name - claim_value: Rijksmuseum.nl - raw_value: Rijksmuseum.nl - source_url: https://www.rijksmuseum.nl/nl - retrieved_on: '' - xpath: /html/head/meta[8] - html_file: web/1517/rijksmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T12:34:07.913359+00:00' - claim_type: social_instagram claim_value: https://www.instagram.com/rijksmuseum/ raw_value: https://www.instagram.com/rijksmuseum/ @@ -652,6 +622,14 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.914392+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content alternative_names: - name: Rijksmuseum Amsterdam source: original_entry diff --git a/data/nde/enriched/entries/1518_museum_huis_van_het_boek.yaml b/data/nde/enriched/entries/1518_museum_huis_van_het_boek.yaml index 7f74bc289a..1a522c7e07 100644 --- a/data/nde/enriched/entries/1518_museum_huis_van_het_boek.yaml +++ b/data/nde/enriched/entries/1518_museum_huis_van_het_boek.yaml @@ -229,7 +229,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:07.975480+00:00' source_archive: web/1518/huisvanhetboek.nl - claims_count: 12 + claims_count: 11 claims: - claim_type: org_name claim_value: Huis van het boek @@ -343,13 +343,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:07.975028+00:00' - - claim_type: org_name - claim_value: '''Mythes en mogelijkheden''' - raw_value: '''Mythes en mogelijkheden''' - source_url: https://www.huisvanhetboek.nl/ - retrieved_on: '' - xpath: /html/body/div/main/section[3]/div/div/div[2]/div/h1 - html_file: web/1518/huisvanhetboek.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:34:07.975295+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1520_museum_kaap_skil.yaml b/data/nde/enriched/entries/1520_museum_kaap_skil.yaml index 8bc5f1349d..aa7824deed 100644 --- a/data/nde/enriched/entries/1520_museum_kaap_skil.yaml +++ b/data/nde/enriched/entries/1520_museum_kaap_skil.yaml @@ -218,18 +218,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.082077+00:00' source_archive: web/1520/kaapskil.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Kaap Skil - source_url: https://www.kaapskil.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1520/kaapskil.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.080539+00:00' - claim_type: description_short claim_value: Een uniek museum vol verhalen en avonturen. Bekijk de opgedoken schatten uit scheepswrakken, ontdek de Reede van Texel en stap terug in de tijd in het @@ -304,3 +294,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:08.081739+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1522_valkhof_museum.yaml b/data/nde/enriched/entries/1522_valkhof_museum.yaml index be0c7481b8..1d4c73a6d5 100644 --- a/data/nde/enriched/entries/1522_valkhof_museum.yaml +++ b/data/nde/enriched/entries/1522_valkhof_museum.yaml @@ -170,18 +170,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.132661+00:00' source_archive: web/1522/valkhofmuseum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: home - raw_value: home | Valkhof Museum - source_url: http://www.valkhofmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1522/valkhofmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.131740+00:00' - claim_type: org_name claim_value: Valkhof Museum raw_value: Valkhof Museum @@ -232,3 +222,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:08.132400+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1524_leudalmuseum.yaml b/data/nde/enriched/entries/1524_leudalmuseum.yaml index 0f6da4b6b8..296f290228 100644 --- a/data/nde/enriched/entries/1524_leudalmuseum.yaml +++ b/data/nde/enriched/entries/1524_leudalmuseum.yaml @@ -216,18 +216,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.201449+00:00' source_archive: web/1524/bezoekerscentrumleudal.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Bezoekerscentrum Leudal - source_url: https://www.bezoekerscentrumleudal.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1524/bezoekerscentrumleudal.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.199604+00:00' - claim_type: description_short claim_value: Welkom bij… Bezoekerscentrum Leudal Welkom in het kloppend hart van Leudal. Wandelen, fietsen, genieten, natuur beleven of de rijke historie ervaren? @@ -292,3 +282,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:08.201197+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1527_cultuurhistorisch_museum_texel_de_waelstee.yaml b/data/nde/enriched/entries/1527_cultuurhistorisch_museum_texel_de_waelstee.yaml index 4bba4cd08c..02b6b126a2 100644 --- a/data/nde/enriched/entries/1527_cultuurhistorisch_museum_texel_de_waelstee.yaml +++ b/data/nde/enriched/entries/1527_cultuurhistorisch_museum_texel_de_waelstee.yaml @@ -164,18 +164,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.337313+00:00' source_archive: web/1527/museumtexel.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Texels erfgoedmuseum - Waelstee - source_url: https://www.museumtexel.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1527/museumtexel.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.336414+00:00' - claim_type: org_name claim_value: Texels erfgoedmuseum - Waelstee raw_value: Texels erfgoedmuseum - Waelstee @@ -216,3 +206,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:08.337112+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1529_museum_belv_d_re.yaml b/data/nde/enriched/entries/1529_museum_belv_d_re.yaml index d7d63f6187..c168a903ca 100644 --- a/data/nde/enriched/entries/1529_museum_belv_d_re.yaml +++ b/data/nde/enriched/entries/1529_museum_belv_d_re.yaml @@ -217,18 +217,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.417447+00:00' source_archive: web/1529/museumbelvedere.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum Belvédère - source_url: https://www.museumbelvedere.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1529/museumbelvedere.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.416224+00:00' - claim_type: email claim_value: info@museumbelvedere.nl raw_value: info@museumbelvedere.nl @@ -279,3 +269,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:08.417171+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1531_museum_veere_incl_stadhuis.yaml b/data/nde/enriched/entries/1531_museum_veere_incl_stadhuis.yaml index f4232d70b7..118d1f3547 100644 --- a/data/nde/enriched/entries/1531_museum_veere_incl_stadhuis.yaml +++ b/data/nde/enriched/entries/1531_museum_veere_incl_stadhuis.yaml @@ -189,18 +189,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.515168+00:00' source_archive: web/1531/museumveere.nl - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Museum Veere - source_url: http://www.museumveere.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1531/museumveere.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.513754+00:00' - claim_type: description_short claim_value: 'Welkom bij Museum Veere Ontdek 1.000 jaar geschiedenis Plan je bezoek Beleef 1.000 jaar geschiedenis van Veere Za en zo: 11.00 -17.00 uurMa t/m vrij: @@ -275,3 +265,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:08.514904+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1533_frans_hals_museum.yaml b/data/nde/enriched/entries/1533_frans_hals_museum.yaml index 989780c4be..7fea62fe3a 100644 --- a/data/nde/enriched/entries/1533_frans_hals_museum.yaml +++ b/data/nde/enriched/entries/1533_frans_hals_museum.yaml @@ -226,7 +226,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.618355+00:00' source_archive: web/1533/franshalsmuseum.nl - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Hét kunstmuseum van Haarlem @@ -290,13 +290,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:08.614134+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://franshalsmuseum.nl/nl - retrieved_on: '' - xpath: /html/body/div[2]/main/div/h1 - html_file: web/1533/franshalsmuseum.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:34:08.614201+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1534_museum_stoomtrein_katwijk_leiden.yaml b/data/nde/enriched/entries/1534_museum_stoomtrein_katwijk_leiden.yaml index cb75e88f2e..24f6ce0cad 100644 --- a/data/nde/enriched/entries/1534_museum_stoomtrein_katwijk_leiden.yaml +++ b/data/nde/enriched/entries/1534_museum_stoomtrein_katwijk_leiden.yaml @@ -176,7 +176,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.657598+00:00' source_archive: web/1534/stoomtreinkatwijkleiden.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Stoomtrein Katwijk Leiden @@ -198,26 +198,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:08.655471+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: https://stoomtreinkatwijkleiden.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/1534/stoomtreinkatwijkleiden.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.655476+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: https://stoomtreinkatwijkleiden.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/1534/stoomtreinkatwijkleiden.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.655481+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -280,3 +260,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:08.656714+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1539_museon_omniversum.yaml b/data/nde/enriched/entries/1539_museon_omniversum.yaml index 7113521121..16316471ec 100644 --- a/data/nde/enriched/entries/1539_museon_omniversum.yaml +++ b/data/nde/enriched/entries/1539_museon_omniversum.yaml @@ -232,18 +232,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:08.881503+00:00' source_archive: web/1539/museon-omniversum.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museon-Omniversum - source_url: http://www.museon-omniversum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1539/museon-omniversum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:08.879877+00:00' - claim_type: phone claim_value: 0031703381338 raw_value: 0031703381338 @@ -294,3 +284,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:08.880816+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1543_textielmuseum_tilburg.yaml b/data/nde/enriched/entries/1543_textielmuseum_tilburg.yaml index 83c10f0858..6d5dcee5eb 100644 --- a/data/nde/enriched/entries/1543_textielmuseum_tilburg.yaml +++ b/data/nde/enriched/entries/1543_textielmuseum_tilburg.yaml @@ -227,18 +227,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.111544+00:00' source_archive: web/1543/textielmuseum.nl - claims_count: 9 + claims_count: 8 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - textielmuseum.nl - TextielMuseum - source_url: http://www.textielmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1543/textielmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:09.107948+00:00' - claim_type: description_short claim_value: Kom naar het TextielMuseum in Tilburg voor inspirerende tentoonstellingen op het gebied van kunst, design, mode en erfgoed. Ontdek ook het TextielLab, @@ -323,3 +313,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:09.110959+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1544_ecomare.yaml b/data/nde/enriched/entries/1544_ecomare.yaml index f978d6b0e0..b2a53aa5f6 100644 --- a/data/nde/enriched/entries/1544_ecomare.yaml +++ b/data/nde/enriched/entries/1544_ecomare.yaml @@ -231,7 +231,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.183241+00:00' source_archive: web/1544/ecomare.nl - claims_count: 12 + claims_count: 10 claims: - claim_type: org_name claim_value: Duik in de zee! Een spetterend dagje uit @@ -325,26 +325,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:09.182271+00:00' - - claim_type: social_facebook - claim_value: https://facebook.com/sharer.php?u= - raw_value: https://facebook.com/sharer.php?u= - source_url: http://www.ecomare.nl/ - retrieved_on: '' - xpath: /html/body/footer/div[3]/div[3]/div/div/div/ul/li[1]/a - html_file: web/1544/ecomare.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:09.182366+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/share?url= - raw_value: https://twitter.com/share?url= - source_url: http://www.ecomare.nl/ - retrieved_on: '' - xpath: /html/body/footer/div[3]/div[3]/div/div/div/ul/li[2]/a - html_file: web/1544/ecomare.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:09.182380+00:00' - claim_type: org_name claim_value: Ontvang onze nieuwsbrief raw_value: Ontvang onze nieuwsbrief @@ -355,3 +335,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:09.182545+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1545_het_nieuwe_domein.yaml b/data/nde/enriched/entries/1545_het_nieuwe_domein.yaml index e6824417d1..60c6a4097b 100644 --- a/data/nde/enriched/entries/1545_het_nieuwe_domein.yaml +++ b/data/nde/enriched/entries/1545_het_nieuwe_domein.yaml @@ -218,18 +218,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.221687+00:00' source_archive: web/1545/hetnieuwedomein.nl - claims_count: 10 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Het Nieuwe Domein - source_url: https://www.hetnieuwedomein.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1545/hetnieuwedomein.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:09.220681+00:00' - claim_type: org_name claim_value: (externe link) raw_value: (externe link) @@ -290,33 +280,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:09.221471+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.hetnieuwedomein.nl/home - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A//www.hetnieuwedomein.nl/home - source_url: https://www.hetnieuwedomein.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[1] - html_file: web/1545/hetnieuwedomein.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:09.221494+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//www.hetnieuwedomein.nl/home - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A//www.hetnieuwedomein.nl/home - source_url: https://www.hetnieuwedomein.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[2] - html_file: web/1545/hetnieuwedomein.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:09.221500+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?original_referer=https%3A//www.hetnieuwedomein.nl/home&url=https%3A//www.hetnieuwedomein.nl/home - raw_value: https://twitter.com/intent/tweet?original_referer=https%3A//www.hetnieuwedomein.nl/home&url=https%3A//www.hetnieuwedomein.nl/home - source_url: https://www.hetnieuwedomein.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div/div[2]/main/div/div/article/div[3]/div/div/a[3] - html_file: web/1545/hetnieuwedomein.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:09.221505+00:00' + removed_invalid_claims: + - removed_count: 4 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1547_anne_frank_huis.yaml b/data/nde/enriched/entries/1547_anne_frank_huis.yaml index 2fe639e612..ec062a9df1 100644 --- a/data/nde/enriched/entries/1547_anne_frank_huis.yaml +++ b/data/nde/enriched/entries/1547_anne_frank_huis.yaml @@ -242,18 +242,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.303765+00:00' source_archive: web/1547/annefrank.org - claims_count: 8 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Anne Frank Stichting - source_url: https://www.annefrank.org/nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1547/annefrank.org/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:09.302888+00:00' - claim_type: org_name claim_value: Anne Frank Stichting raw_value: Anne Frank Stichting @@ -328,3 +318,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:09.303687+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1552_hildo_krop_museum.yaml b/data/nde/enriched/entries/1552_hildo_krop_museum.yaml index 6d4363e8d4..f1bcd0b4e3 100644 --- a/data/nde/enriched/entries/1552_hildo_krop_museum.yaml +++ b/data/nde/enriched/entries/1552_hildo_krop_museum.yaml @@ -208,18 +208,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.467003+00:00' source_archive: web/1552/hildokropmuseum.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Hildo Krop Museum - source_url: https://hildokropmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1552/hildokropmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:09.465880+00:00' - claim_type: org_name claim_value: Hildo Krop Museum - Museum uit Steenwijk raw_value: Hildo Krop Museum - Museum uit Steenwijk @@ -254,3 +244,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:09.466676+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1553_museum_bronbeek.yaml b/data/nde/enriched/entries/1553_museum_bronbeek.yaml index 4ef720830b..daa2d6065e 100644 --- a/data/nde/enriched/entries/1553_museum_bronbeek.yaml +++ b/data/nde/enriched/entries/1553_museum_bronbeek.yaml @@ -222,18 +222,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.503552+00:00' source_archive: web/1553/bronbeek.nl - claims_count: 6 + claims_count: 5 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Bronbeek - source_url: http://www.bronbeek.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1553/bronbeek.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:09.502436+00:00' - claim_type: description_short claim_value: Bronbeek is een museum en hét kenniscentrum van het koloniaal-militair verleden van het Koninkrijk der Nederlanden. Het bevindt zich op een cultuurhistorisch @@ -290,3 +280,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:09.503126+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1559_brandweermuseum_hellevoetsluis.yaml b/data/nde/enriched/entries/1559_brandweermuseum_hellevoetsluis.yaml index 8157da3351..71d82919b7 100644 --- a/data/nde/enriched/entries/1559_brandweermuseum_hellevoetsluis.yaml +++ b/data/nde/enriched/entries/1559_brandweermuseum_hellevoetsluis.yaml @@ -157,18 +157,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.737926+00:00' source_archive: web/1559/brwmh.nl - claims_count: 7 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Brandweermuseum - source_url: http://www.brwmh.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1559/brwmh.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:09.736059+00:00' - claim_type: description_short claim_value: Nationaal Brandweermuseum Hellevoetsluis raw_value: Nationaal Brandweermuseum Hellevoetsluis @@ -229,3 +219,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:09.737245+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1560_museum_urk_het_oude_raadhuis.yaml b/data/nde/enriched/entries/1560_museum_urk_het_oude_raadhuis.yaml index c150c01399..6d2a5b8c07 100644 --- a/data/nde/enriched/entries/1560_museum_urk_het_oude_raadhuis.yaml +++ b/data/nde/enriched/entries/1560_museum_urk_het_oude_raadhuis.yaml @@ -222,18 +222,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.772363+00:00' source_archive: web/1560/museumopurk.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home | Museum op Urk - source_url: https://www.museumopurk.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1560/museumopurk.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:09.771849+00:00' - claim_type: org_name claim_value: Toegankelijkheid raw_value: Toegankelijkheid @@ -274,3 +264,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:09.772243+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1566_speelgoedmuseum_oosterhout.yaml b/data/nde/enriched/entries/1566_speelgoedmuseum_oosterhout.yaml index 8a310a68fb..e1dd440627 100644 --- a/data/nde/enriched/entries/1566_speelgoedmuseum_oosterhout.yaml +++ b/data/nde/enriched/entries/1566_speelgoedmuseum_oosterhout.yaml @@ -226,7 +226,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:09.979918+00:00' source_archive: web/1566/speelgoedmuseum.nl - claims_count: 8 + claims_count: 7 claims: - claim_type: org_name claim_value: Speelgoedmuseum Oosterhout @@ -250,16 +250,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T12:34:09.978805+00:00' - - claim_type: org_name - claim_value: speelgoedmuseum.nl - raw_value: speelgoedmuseum.nl - source_url: http://www.speelgoedmuseum.nl/ - retrieved_on: '' - xpath: /html/head/meta[9] - html_file: web/1566/speelgoedmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T12:34:09.979113+00:00' - claim_type: email claim_value: info@speelgoedmuseum.nl raw_value: info@speelgoedmuseum.nl @@ -310,3 +300,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:09.979666+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1568_embassy_of_the_free_mind.yaml b/data/nde/enriched/entries/1568_embassy_of_the_free_mind.yaml index 006d6f23ad..b07f8b5320 100644 --- a/data/nde/enriched/entries/1568_embassy_of_the_free_mind.yaml +++ b/data/nde/enriched/entries/1568_embassy_of_the_free_mind.yaml @@ -223,18 +223,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:10.072966+00:00' source_archive: web/1568/embassyofthefreemind.com - claims_count: 10 + claims_count: 9 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - EMBASSY OF THE FREE MIND - source_url: http://www.embassyofthefreemind.com/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1568/embassyofthefreemind.com/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:10.072135+00:00' - claim_type: org_name claim_value: scroll raw_value: scroll @@ -333,3 +323,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:10.072871+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1570_grachtenmuseum_amsterdam.yaml b/data/nde/enriched/entries/1570_grachtenmuseum_amsterdam.yaml index 23b4a6d041..12969e34d7 100644 --- a/data/nde/enriched/entries/1570_grachtenmuseum_amsterdam.yaml +++ b/data/nde/enriched/entries/1570_grachtenmuseum_amsterdam.yaml @@ -239,7 +239,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:10.173198+00:00' source_archive: web/1570/grachten.museum - claims_count: 7 + claims_count: 6 claims: - claim_type: org_name claim_value: Hét museum in Amsterdam over de grachten @@ -263,16 +263,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T12:34:10.171672+00:00' - - claim_type: org_name - claim_value: https://grachten.museum/ - raw_value: https://grachten.museum/ - source_url: https://grachten.museum/ - retrieved_on: '' - xpath: /html/head/meta[10] - html_file: web/1570/grachten.museum/rendered.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T12:34:10.171982+00:00' - claim_type: email claim_value: mail@grachten.museum raw_value: mail@grachten.museum @@ -313,3 +303,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:10.172304+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1576_nationaal_bomenmuseum_gimborn.yaml b/data/nde/enriched/entries/1576_nationaal_bomenmuseum_gimborn.yaml index a1a2644223..158c6a3a41 100644 --- a/data/nde/enriched/entries/1576_nationaal_bomenmuseum_gimborn.yaml +++ b/data/nde/enriched/entries/1576_nationaal_bomenmuseum_gimborn.yaml @@ -219,18 +219,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:10.461391+00:00' source_archive: web/1576/bomenmuseum.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Nationaal Bomenmuseum Gimborn | Stichting Von Gimborn Arboretum - source_url: https://www.bomenmuseum.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1576/bomenmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:10.459728+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/NationaalBomenmuseum/ raw_value: https://www.facebook.com/NationaalBomenmuseum/ @@ -273,3 +263,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:10.461264+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1581_joods_museum.yaml b/data/nde/enriched/entries/1581_joods_museum.yaml index 75d499e6e1..8b912e1efe 100644 --- a/data/nde/enriched/entries/1581_joods_museum.yaml +++ b/data/nde/enriched/entries/1581_joods_museum.yaml @@ -241,7 +241,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:10.683704+00:00' source_archive: web/1581/jck.nl - claims_count: 7 + claims_count: 4 claims: - claim_type: org_name claim_value: Joods Museum + junior @@ -265,36 +265,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: meta_description extraction_timestamp: '2025-12-01T12:34:10.682890+00:00' - - claim_type: address - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '' - xpath: /html/head/script[5] - html_file: web/1581/jck.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: schema_org_streetAddress - extraction_timestamp: '2025-12-01T12:34:10.683170+00:00' - - claim_type: postal_code - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '' - xpath: /html/head/script[5] - html_file: web/1581/jck.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: schema_org_postalCode - extraction_timestamp: '2025-12-01T12:34:10.683175+00:00' - - claim_type: city - claim_value: '' - raw_value: '' - source_url: https://jck.nl/locatie/joods-museum - retrieved_on: '' - xpath: /html/head/script[5] - html_file: web/1581/jck.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: schema_org_addressLocality - extraction_timestamp: '2025-12-01T12:34:10.683177+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/joodscultureelkwartier raw_value: https://www.facebook.com/joodscultureelkwartier @@ -315,3 +285,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:10.683452+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1602_huis_van_hilde_archeologiemuseum_provincie_noord_h.yaml b/data/nde/enriched/entries/1602_huis_van_hilde_archeologiemuseum_provincie_noord_h.yaml index cbad2c6d2f..e657a7dbcc 100644 --- a/data/nde/enriched/entries/1602_huis_van_hilde_archeologiemuseum_provincie_noord_h.yaml +++ b/data/nde/enriched/entries/1602_huis_van_hilde_archeologiemuseum_provincie_noord_h.yaml @@ -214,7 +214,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:11.425647+00:00' source_archive: web/1602/huisvanhilde.nl - claims_count: 10 + claims_count: 8 claims: - claim_type: org_name claim_value: Huis van Hilde, ontdek de archeologie van Noord-Holland @@ -226,26 +226,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:11.424153+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: http://www.huisvanhilde.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[1]/svg/title - html_file: web/1602/huisvanhilde.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.424167+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: http://www.huisvanhilde.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div[1]/a[2]/svg/title - html_file: web/1602/huisvanhilde.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.424172+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -318,3 +298,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:11.424848+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1606_museum_boijmans_van_beuningen_incl_depot.yaml b/data/nde/enriched/entries/1606_museum_boijmans_van_beuningen_incl_depot.yaml index 2d3322694e..8a7cea1f3c 100644 --- a/data/nde/enriched/entries/1606_museum_boijmans_van_beuningen_incl_depot.yaml +++ b/data/nde/enriched/entries/1606_museum_boijmans_van_beuningen_incl_depot.yaml @@ -230,7 +230,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:11.564842+00:00' source_archive: web/1606/boijmans.nl - claims_count: 16 + claims_count: 9 claims: - claim_type: org_name claim_value: Het Depot van Museum Boijmans Van Beuningen @@ -242,66 +242,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:11.562994+00:00' - - claim_type: org_name - claim_value: Previous - raw_value: Previous - source_url: https://www.boijmans.nl/depot - retrieved_on: '' - xpath: /html/body/svg/defs/symbol[20]/title - html_file: web/1606/boijmans.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.563008+00:00' - - claim_type: org_name - claim_value: Next - raw_value: Next - source_url: https://www.boijmans.nl/depot - retrieved_on: '' - xpath: /html/body/svg/defs/symbol[21]/title - html_file: web/1606/boijmans.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.563013+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.boijmans.nl/depot - retrieved_on: '' - xpath: /html/body/svg/defs/symbol[24]/title - html_file: web/1606/boijmans.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.563016+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.boijmans.nl/depot - retrieved_on: '' - xpath: /html/body/svg/defs/symbol[25]/title - html_file: web/1606/boijmans.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.563020+00:00' - - claim_type: org_name - claim_value: Twitter - raw_value: Twitter - source_url: https://www.boijmans.nl/depot - retrieved_on: '' - xpath: /html/body/svg/defs/symbol[26]/title - html_file: web/1606/boijmans.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.563023+00:00' - - claim_type: org_name - claim_value: Pinterest - raw_value: Pinterest - source_url: https://www.boijmans.nl/depot - retrieved_on: '' - xpath: /html/body/svg/defs/symbol[27]/title - html_file: web/1606/boijmans.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.563026+00:00' - claim_type: org_name claim_value: Tiktok raw_value: Tiktok @@ -332,16 +272,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:11.563035+00:00' - - claim_type: org_name - claim_value: Zoeken - raw_value: Zoeken - source_url: https://www.boijmans.nl/depot - retrieved_on: '' - xpath: /html/body/div[3]/div[1]/div[2]/div[2]/div/form[1]/button/svg/title - html_file: web/1606/boijmans.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.563041+00:00' - claim_type: org_name claim_value: Museum Boijmans Van Beuningen raw_value: Museum Boijmans Van Beuningen @@ -392,3 +322,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:11.564421+00:00' + removed_invalid_claims: + - removed_count: 7 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1611_literatuurmuseum_kinderboekenmuseum.yaml b/data/nde/enriched/entries/1611_literatuurmuseum_kinderboekenmuseum.yaml index 415369e7b8..18915cdd1d 100644 --- a/data/nde/enriched/entries/1611_literatuurmuseum_kinderboekenmuseum.yaml +++ b/data/nde/enriched/entries/1611_literatuurmuseum_kinderboekenmuseum.yaml @@ -233,7 +233,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:11.768759+00:00' source_archive: web/1611/literatuurmuseum.nl - claims_count: 4 + claims_count: 3 claims: - claim_type: org_name claim_value: Literatuurmuseum / Kinderboekenmuseum @@ -267,13 +267,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T12:34:11.767881+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: http://literatuurmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/h1 - html_file: web/1611/literatuurmuseum.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:34:11.768423+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1612_muiderslot.yaml b/data/nde/enriched/entries/1612_muiderslot.yaml index 6c934c96bd..92d7e04a1d 100644 --- a/data/nde/enriched/entries/1612_muiderslot.yaml +++ b/data/nde/enriched/entries/1612_muiderslot.yaml @@ -230,18 +230,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:11.797239+00:00' source_archive: web/1612/muiderslot.nl - claims_count: 9 + claims_count: 7 claims: - - claim_type: org_name - claim_value: Muiderslot - raw_value: Muiderslot - Muiderslot - source_url: https://www.muiderslot.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1612/muiderslot.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.795539+00:00' - claim_type: email claim_value: info@muiderslot.nl raw_value: info@muiderslot.nl @@ -312,13 +302,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:11.796753+00:00' - - claim_type: org_name - claim_value: sinterklaas op het Muiderslot - raw_value: sinterklaas op het Muiderslot - source_url: https://www.muiderslot.nl/ - retrieved_on: '' - xpath: /html/body/div[2]/main/div/div/div[1]/div/div/div[1]/div/div[2]/div[2]/div/div/div/div/div/div/div/h1 - html_file: web/1612/muiderslot.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:34:11.796985+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1614_nationaal_glasmuseum_leerdam.yaml b/data/nde/enriched/entries/1614_nationaal_glasmuseum_leerdam.yaml index 938101baf1..4298583e98 100644 --- a/data/nde/enriched/entries/1614_nationaal_glasmuseum_leerdam.yaml +++ b/data/nde/enriched/entries/1614_nationaal_glasmuseum_leerdam.yaml @@ -223,7 +223,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:11.880343+00:00' source_archive: web/1614/nationaalglasmuseum.nl - claims_count: 10 + claims_count: 7 claims: - claim_type: org_name claim_value: Nationaal Glasmuseum @@ -245,36 +245,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:11.879350+00:00' - - claim_type: org_name - claim_value: Twitter - raw_value: Twitter - source_url: http://www.nationaalglasmuseum.nl/ - retrieved_on: '' - xpath: /html/body/footer/div/div[2]/ul/li[1]/a/svg/title - html_file: web/1614/nationaalglasmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.879387+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: http://www.nationaalglasmuseum.nl/ - retrieved_on: '' - xpath: /html/body/footer/div/div[2]/ul/li[2]/a/svg/title - html_file: web/1614/nationaalglasmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.879397+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: http://www.nationaalglasmuseum.nl/ - retrieved_on: '' - xpath: /html/body/footer/div/div[2]/ul/li[3]/a/svg/title - html_file: web/1614/nationaalglasmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:11.879406+00:00' - claim_type: description_short claim_value: In het Nationaal Glasmuseum worden het hele jaar door afwisselende tentoonstellingen georganiseerd. Hedendaagse en experimentele tentoonstellingen @@ -329,3 +299,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:11.880053+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1617_museum_anno.yaml b/data/nde/enriched/entries/1617_museum_anno.yaml index 43baa48a9b..8411a5ac25 100644 --- a/data/nde/enriched/entries/1617_museum_anno.yaml +++ b/data/nde/enriched/entries/1617_museum_anno.yaml @@ -221,18 +221,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:12.016686+00:00' source_archive: web/1617/anno.nl - claims_count: 9 + claims_count: 6 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - ANNO Stadsmuseum Zwolle - source_url: https://anno.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1617/anno.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.015256+00:00' - claim_type: description_short claim_value: ANNO vertelt de verhalen van Zwolle. Het is een unieke plek in Nederland waar museum, archeologie, bouwhistorie, monumenten en archief samen onder één @@ -303,26 +293,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:12.016530+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fanno.nl%2F - raw_value: https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fanno.nl%2F - source_url: https://anno.nl/ - retrieved_on: '' - xpath: /html/body/section[4]/div/div/div[3]/div/div/div[2]/a[1] - html_file: web/1617/anno.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:12.016539+00:00' - - claim_type: social_linkedin - claim_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fanno.nl%2F&title=ANNO+Stadsmuseum - raw_value: https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fanno.nl%2F&title=ANNO+Stadsmuseum - source_url: https://anno.nl/ - retrieved_on: '' - xpath: /html/body/section[4]/div/div/div[3]/div/div/div[2]/a[2] - html_file: web/1617/anno.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:12.016544+00:00' - claim_type: org_name claim_value: ANNO Stadsmuseum raw_value: ANNO Stadsmuseum @@ -333,3 +303,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:12.016600+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1620_walburgiskerk_incl_librije.yaml b/data/nde/enriched/entries/1620_walburgiskerk_incl_librije.yaml index 2aed530d1b..e63a1f87e4 100644 --- a/data/nde/enriched/entries/1620_walburgiskerk_incl_librije.yaml +++ b/data/nde/enriched/entries/1620_walburgiskerk_incl_librije.yaml @@ -200,18 +200,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:12.132880+00:00' source_archive: web/1620/walburgiskerk.nl - claims_count: 3 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Walburgiskerk - source_url: http://www.walburgiskerk.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1620/walburgiskerk.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.132335+00:00' - claim_type: description_short claim_value: De kapittelkerk stamt uit de 13e eeuw en geldt als één van de 10 grootste en mooiste kerken in Nederland. Zij is gebouwd op restanten van een @@ -246,3 +236,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_site_name extraction_timestamp: '2025-12-01T12:34:12.132546+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1623_hendrick_hamel_museum.yaml b/data/nde/enriched/entries/1623_hendrick_hamel_museum.yaml index 049c25f838..d3f86ea1d4 100644 --- a/data/nde/enriched/entries/1623_hendrick_hamel_museum.yaml +++ b/data/nde/enriched/entries/1623_hendrick_hamel_museum.yaml @@ -210,7 +210,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:12.230585+00:00' source_archive: web/1623/hendrickhamelmuseum.nl - claims_count: 10 + claims_count: 7 claims: - claim_type: org_name claim_value: Hendrick Hamel Museum @@ -222,36 +222,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:12.228409+00:00' - - claim_type: org_name - claim_value: Vorige - raw_value: Vorige - source_url: http://www.hendrickhamelmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div/a[1]/svg/title - html_file: web/1623/hendrickhamelmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.228436+00:00' - - claim_type: org_name - claim_value: Volgende - raw_value: Volgende - source_url: http://www.hendrickhamelmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[1]/div/div/a[2]/svg/title - html_file: web/1623/hendrickhamelmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.228446+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: http://www.hendrickhamelmuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[1]/div/div[9]/div/div[4]/section[2]/ul/li/a/svg/title - html_file: web/1623/hendrickhamelmuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.228457+00:00' - claim_type: org_name claim_value: Scroll naar bovenzijde raw_value: Scroll naar bovenzijde @@ -314,3 +284,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:12.230256+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1627_bonnefanten_museum.yaml b/data/nde/enriched/entries/1627_bonnefanten_museum.yaml index 5257a72a3b..a4469692d4 100644 --- a/data/nde/enriched/entries/1627_bonnefanten_museum.yaml +++ b/data/nde/enriched/entries/1627_bonnefanten_museum.yaml @@ -236,7 +236,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:12.382092+00:00' source_archive: web/1627/bonnefanten.nl - claims_count: 8 + claims_count: 6 claims: - claim_type: org_name claim_value: Bonnefanten — The art museum of Limburg @@ -290,26 +290,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:12.381774+00:00' - - claim_type: social_facebook - claim_value: https://www.facebook.com/sharer/sharer.php?u=https://www.bonnefanten.nl/en - raw_value: https://www.facebook.com/sharer/sharer.php?u=https://www.bonnefanten.nl/en - source_url: http://www.bonnefanten.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div[3]/main/div/div[2]/div[2]/ul/li[1]/a - html_file: web/1627/bonnefanten.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:12.381833+00:00' - - claim_type: social_twitter - claim_value: https://twitter.com/intent/tweet?text=Bonnefanten&url=https://www.bonnefanten.nl/en - raw_value: https://twitter.com/intent/tweet?text=Bonnefanten&url=https://www.bonnefanten.nl/en - source_url: http://www.bonnefanten.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/div[3]/main/div/div[2]/div[2]/ul/li[2]/a - html_file: web/1627/bonnefanten.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:12.381838+00:00' - claim_type: org_name claim_value: Bonnefanten raw_value: Bonnefanten @@ -320,3 +300,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:12.381908+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1628_museum_gevangenpoort.yaml b/data/nde/enriched/entries/1628_museum_gevangenpoort.yaml index af12fb3022..0662695076 100644 --- a/data/nde/enriched/entries/1628_museum_gevangenpoort.yaml +++ b/data/nde/enriched/entries/1628_museum_gevangenpoort.yaml @@ -234,18 +234,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:12.413882+00:00' source_archive: web/1628/gevangenpoort.nl - claims_count: 5 + claims_count: 4 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Gevangenpoort - source_url: http://www.gevangenpoort.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1628/gevangenpoort.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.412903+00:00' - claim_type: org_name claim_value: Gevangenpoort raw_value: Gevangenpoort @@ -286,3 +276,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:12.413570+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1638_de_mesdag_collectie.yaml b/data/nde/enriched/entries/1638_de_mesdag_collectie.yaml index 8be92940e1..1542b587b3 100644 --- a/data/nde/enriched/entries/1638_de_mesdag_collectie.yaml +++ b/data/nde/enriched/entries/1638_de_mesdag_collectie.yaml @@ -212,7 +212,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:12.808488+00:00' source_archive: web/1638/demesdagcollectie.nl - claims_count: 48 + claims_count: 45 claims: - claim_type: org_name claim_value: Museum De Mesdag Collectie in Den Haag @@ -384,16 +384,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:12.807595+00:00' - - claim_type: org_name - claim_value: close - raw_value: close - source_url: https://demesdagcollectie.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/svg/symbol[17]/title - html_file: web/1638/demesdagcollectie.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.807598+00:00' - claim_type: org_name claim_value: collection raw_value: collection @@ -464,16 +454,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:12.807619+00:00' - - claim_type: org_name - claim_value: menu - raw_value: menu - source_url: https://demesdagcollectie.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/svg/symbol[25]/title - html_file: web/1638/demesdagcollectie.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.807622+00:00' - claim_type: org_name claim_value: minus-big raw_value: minus-big @@ -584,16 +564,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:12.807654+00:00' - - claim_type: org_name - claim_value: search - raw_value: search - source_url: https://demesdagcollectie.nl/ - retrieved_on: '' - xpath: /html/body/div[3]/svg/symbol[37]/title - html_file: web/1638/demesdagcollectie.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.807657+00:00' - claim_type: org_name claim_value: set raw_value: set @@ -698,3 +668,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:12.808316+00:00' + removed_invalid_claims: + - removed_count: 3 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1640_hof_van_nederland.yaml b/data/nde/enriched/entries/1640_hof_van_nederland.yaml index 2f54dae07b..01c50cacd5 100644 --- a/data/nde/enriched/entries/1640_hof_van_nederland.yaml +++ b/data/nde/enriched/entries/1640_hof_van_nederland.yaml @@ -220,7 +220,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:12.898375+00:00' source_archive: web/1640/hofvannederland.nl - claims_count: 12 + claims_count: 6 claims: - claim_type: org_name claim_value: Hof van Nederland @@ -232,66 +232,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:12.897466+00:00' - - claim_type: org_name - claim_value: Chevron left - raw_value: Chevron left - source_url: https://www.hofvannederland.nl/ - retrieved_on: '' - xpath: /html/body/nav/div/div[2]/div[2]/div/button/svg/title - html_file: web/1640/hofvannederland.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.897495+00:00' - - claim_type: org_name - claim_value: Close - raw_value: Close - source_url: https://www.hofvannederland.nl/ - retrieved_on: '' - xpath: /html/body/nav/div/div[2]/div[2]/div/div[2]/button/svg/title - html_file: web/1640/hofvannederland.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.897500+00:00' - - claim_type: org_name - claim_value: Opent in externe pagina - raw_value: Opent in externe pagina - source_url: https://www.hofvannederland.nl/ - retrieved_on: '' - xpath: /html/body/nav/div/div[2]/div[3]/div/div[2]/ul[2]/li/a/svg/title - html_file: web/1640/hofvannederland.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.897509+00:00' - - claim_type: org_name - claim_value: Arrow left - raw_value: Arrow left - source_url: https://www.hofvannederland.nl/ - retrieved_on: '' - xpath: /html/body/main/div[2]/div[1]/div/div/div/button[1]/svg/title - html_file: web/1640/hofvannederland.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.897526+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: https://www.hofvannederland.nl/ - retrieved_on: '' - xpath: /html/body/footer/div/div[4]/ul[1]/li[1]/a/svg/title - html_file: web/1640/hofvannederland.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.897530+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: https://www.hofvannederland.nl/ - retrieved_on: '' - xpath: /html/body/footer/div/div[4]/ul[1]/li[2]/a/svg/title - html_file: web/1640/hofvannederland.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:12.897534+00:00' - claim_type: org_name claim_value: Tiktok raw_value: Tiktok @@ -348,3 +288,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:12.898176+00:00' + removed_invalid_claims: + - removed_count: 6 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1644_huis_van_gijn.yaml b/data/nde/enriched/entries/1644_huis_van_gijn.yaml index 3922b47fac..410b171238 100644 --- a/data/nde/enriched/entries/1644_huis_van_gijn.yaml +++ b/data/nde/enriched/entries/1644_huis_van_gijn.yaml @@ -220,7 +220,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:13.156654+00:00' source_archive: web/1644/huisvangijn.nl - claims_count: 13 + claims_count: 6 claims: - claim_type: org_name claim_value: Huis Van Gijn @@ -242,76 +242,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:13.155081+00:00' - - claim_type: org_name - claim_value: Eye - raw_value: Eye - source_url: http://huisvangijn.nl/ - retrieved_on: '' - xpath: /html/body/nav/div/div[1]/div[2]/a[2]/svg/title - html_file: web/1644/huisvangijn.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.155091+00:00' - - claim_type: org_name - claim_value: Chevron left - raw_value: Chevron left - source_url: http://huisvangijn.nl/ - retrieved_on: '' - xpath: /html/body/nav/div/div[2]/div[2]/div/button/svg/title - html_file: web/1644/huisvangijn.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.155101+00:00' - - claim_type: org_name - claim_value: Close - raw_value: Close - source_url: http://huisvangijn.nl/ - retrieved_on: '' - xpath: /html/body/nav/div/div[2]/div[2]/div/div[2]/button/svg/title - html_file: web/1644/huisvangijn.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.155110+00:00' - - claim_type: org_name - claim_value: Opent in externe pagina - raw_value: Opent in externe pagina - source_url: http://huisvangijn.nl/ - retrieved_on: '' - xpath: /html/body/nav/div/div[2]/div[3]/div/div[2]/ul[3]/li/a/svg/title - html_file: web/1644/huisvangijn.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.155130+00:00' - - claim_type: org_name - claim_value: Arrow left - raw_value: Arrow left - source_url: http://huisvangijn.nl/ - retrieved_on: '' - xpath: /html/body/main/div[3]/div[1]/div/div/div/button[1]/svg/title - html_file: web/1644/huisvangijn.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.155195+00:00' - - claim_type: org_name - claim_value: Facebook - raw_value: Facebook - source_url: http://huisvangijn.nl/ - retrieved_on: '' - xpath: /html/body/footer/div/div[4]/ul[1]/li[1]/a/svg/title - html_file: web/1644/huisvangijn.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.155204+00:00' - - claim_type: org_name - claim_value: Instagram - raw_value: Instagram - source_url: http://huisvangijn.nl/ - retrieved_on: '' - xpath: /html/body/footer/div/div[4]/ul[1]/li[2]/a/svg/title - html_file: web/1644/huisvangijn.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.155212+00:00' - claim_type: description_short claim_value: Ontdek in Huis Van Gijn hoe een welgesteld echtpaar en hun personeel rond 1900 woonden en werkten. @@ -354,3 +284,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:13.156379+00:00' + removed_invalid_claims: + - removed_count: 7 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1647_marius_van_dokkum_museum.yaml b/data/nde/enriched/entries/1647_marius_van_dokkum_museum.yaml index 28a5872270..f00190d811 100644 --- a/data/nde/enriched/entries/1647_marius_van_dokkum_museum.yaml +++ b/data/nde/enriched/entries/1647_marius_van_dokkum_museum.yaml @@ -225,7 +225,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:13.248689+00:00' source_archive: web/1647/mariusvandokkummuseum.nl - claims_count: 7 + claims_count: 5 claims: - claim_type: org_name claim_value: Marius van Dokkum Museum @@ -277,23 +277,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:13.248355+00:00' - - claim_type: social_facebook - claim_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.mariusvandokkummuseum.nl&t=Marius%20van%20Dokkum%20Museum%20Harderwijk - raw_value: http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.mariusvandokkummuseum.nl&t=Marius%20van%20Dokkum%20Museum%20Harderwijk - source_url: http://mariusvandokkummuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[2]/ul/li[1]/a - html_file: web/1647/mariusvandokkummuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:13.248476+00:00' - - claim_type: social_twitter - claim_value: http://twitter.com/share?text=Marius%20van%20Dokkum%20Museum%20Harderwijk&url=https%3A%2F%2Fwww.mariusvandokkummuseum.nl&via=Stadsmuseum - raw_value: http://twitter.com/share?text=Marius%20van%20Dokkum%20Museum%20Harderwijk&url=https%3A%2F%2Fwww.mariusvandokkummuseum.nl&via=Stadsmuseum - source_url: http://mariusvandokkummuseum.nl/ - retrieved_on: '' - xpath: /html/body/div[2]/ul/li[2]/a - html_file: web/1647/mariusvandokkummuseum.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: social_link - extraction_timestamp: '2025-12-01T12:34:13.248489+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1654_sudergemaal.yaml b/data/nde/enriched/entries/1654_sudergemaal.yaml index 5118e7d60d..5a8fba237f 100644 --- a/data/nde/enriched/entries/1654_sudergemaal.yaml +++ b/data/nde/enriched/entries/1654_sudergemaal.yaml @@ -147,7 +147,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:13.500719+00:00' source_archive: web/1654/damshus.nl - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: It Damshûs @@ -201,13 +201,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:13.500266+00:00' - - claim_type: org_name - claim_value: Oproep - raw_value: Oproep - source_url: http://www.damshus.nl/ - retrieved_on: '' - xpath: /html/body/div[2]/div/div/div/article/div/div/div/div[2]/div/div/div[1]/div/h1 - html_file: web/1654/damshus.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:34:13.500441+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1661_stichting_oer_ij.yaml b/data/nde/enriched/entries/1661_stichting_oer_ij.yaml index b13f934917..a08611dae4 100644 --- a/data/nde/enriched/entries/1661_stichting_oer_ij.yaml +++ b/data/nde/enriched/entries/1661_stichting_oer_ij.yaml @@ -148,7 +148,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:13.792280+00:00' source_archive: web/1661/oerij.eu - claims_count: 6 + claims_count: 5 claims: - claim_type: org_name claim_value: Oer-IJ @@ -160,16 +160,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: title_tag extraction_timestamp: '2025-12-01T12:34:13.790705+00:00' - - claim_type: org_name - claim_value: zoeken - raw_value: zoeken - source_url: https://www.oerij.eu/ - retrieved_on: '' - xpath: /html/body/div/div/div/div/div/div/form/table/tbody/tr/td[2]/button/svg/title - html_file: web/1661/oerij.eu/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.790721+00:00' - claim_type: description_short claim_value: De thuispagina van Stichting Oer-IJ raw_value: De thuispagina van Stichting Oer-IJ @@ -210,3 +200,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:13.792140+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1664_museumboerderij_tante_jaantje.yaml b/data/nde/enriched/entries/1664_museumboerderij_tante_jaantje.yaml index dacd2b853c..b8297a646b 100644 --- a/data/nde/enriched/entries/1664_museumboerderij_tante_jaantje.yaml +++ b/data/nde/enriched/entries/1664_museumboerderij_tante_jaantje.yaml @@ -204,18 +204,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:13.894391+00:00' source_archive: web/1664/tantejaantje.nl - claims_count: 4 + claims_count: 3 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Tante Jaantje - source_url: http://www.tantejaantje.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1664/tantejaantje.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:13.893743+00:00' - claim_type: org_name claim_value: Tante Jaantje raw_value: Tante Jaantje @@ -246,3 +236,11 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:13.894314+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1669_heemkundevereniging_helden.yaml b/data/nde/enriched/entries/1669_heemkundevereniging_helden.yaml index 53bda3f791..dde6cb3c61 100644 --- a/data/nde/enriched/entries/1669_heemkundevereniging_helden.yaml +++ b/data/nde/enriched/entries/1669_heemkundevereniging_helden.yaml @@ -189,18 +189,8 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:14.038383+00:00' source_archive: web/1669/moennik.nl - claims_count: 4 + claims_count: 2 claims: - - claim_type: org_name - claim_value: Home - raw_value: Home - Moennik - source_url: http://www.moennik.nl/ - retrieved_on: '' - xpath: /html/head/title - html_file: web/1669/moennik.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: title_tag - extraction_timestamp: '2025-12-01T12:34:14.036847+00:00' - claim_type: description_short claim_value: De Heemkundevereniging Helden heeft ten doel de studie en geschiedenis van historische en culturele aspecten van de voormalige gemeente Helden (Beringe, @@ -219,16 +209,6 @@ web_claims: xpath_match_score: 1.0 extraction_method: og_description extraction_timestamp: '2025-12-01T12:34:14.037093+00:00' - - claim_type: org_name - claim_value: Moennik - raw_value: Moennik - source_url: http://www.moennik.nl/ - retrieved_on: '' - xpath: /html/head/meta[10] - html_file: web/1669/moennik.nl/rendered.html - xpath_match_score: 1.0 - extraction_method: og_site_name - extraction_timestamp: '2025-12-01T12:34:14.037176+00:00' - claim_type: social_facebook claim_value: https://www.facebook.com/heemkundevereniginghelden/?fref=ts raw_value: https://www.facebook.com/heemkundevereniginghelden/?fref=ts @@ -239,3 +219,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:14.038111+00:00' + removed_invalid_claims: + - removed_count: 2 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/data/nde/enriched/entries/1670_historisch_genootschap_crommenie.yaml b/data/nde/enriched/entries/1670_historisch_genootschap_crommenie.yaml index b7717eda0b..003d447dfc 100644 --- a/data/nde/enriched/entries/1670_historisch_genootschap_crommenie.yaml +++ b/data/nde/enriched/entries/1670_historisch_genootschap_crommenie.yaml @@ -164,7 +164,7 @@ web_enrichment: web_claims: extraction_timestamp: '2025-12-01T12:34:14.077705+00:00' source_archive: web/1670/hgc-krommenie.nl - claims_count: 5 + claims_count: 4 claims: - claim_type: org_name claim_value: Historie Krommenie @@ -217,13 +217,11 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:14.077491+00:00' - - claim_type: org_name - claim_value: Home - raw_value: Home - source_url: https://www.hgc-krommenie.nl/ - retrieved_on: '' - xpath: /html/body/div/div[2]/div/div/main/article/header/h1 - html_file: web/1670/hgc-krommenie.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - extraction_timestamp: '2025-12-01T12:34:14.077598+00:00' + removed_invalid_claims: + - removed_count: 1 + audit_timestamp: '2025-12-01' + reasons: + - share_buttons_as_social + - invalid_org_names + - empty_claims + - spam_content diff --git a/schemas/20251121/linkml/modules/classes/WebPortal.yaml b/schemas/20251121/linkml/modules/classes/WebPortal.yaml index 3654cee8b6..16689b6471 100644 --- a/schemas/20251121/linkml/modules/classes/WebPortal.yaml +++ b/schemas/20251121/linkml/modules/classes/WebPortal.yaml @@ -873,7 +873,60 @@ classes: participating_institutions: 4000 identifiers: - "http://www.wikidata.org/entity/Q209441" - description: "Europeana - pan-European cultural heritage portal" + # NEW RELATIONSHIP SLOTS (added 2025-12-01) + data_license_policy: + - policy_id: "https://nde.nl/ontology/hc/policy/europeana-data-exchange" + policy_name: "Europeana Data Exchange Agreement" + data_licenses: + - license_id: "https://creativecommons.org/publicdomain/zero/1.0/" + license_name: "CC0 1.0 Universal (Public Domain)" + license_scope: "Metadata provided to Europeana" + temporal_extent: + begin_of_the_begin: "2011-09-01" + end_of_the_end: null # Ongoing + created_by_project: + project_id: "https://nde.nl/ontology/hc/project/europeana-dsi" + project_name: "Europeana DSI (Digital Service Infrastructure)" + project_description: >- + EU-funded project under CEF Telecom to develop and maintain the + Europeana digital platform as core European cultural heritage infrastructure. + funding_sources: + - "https://nde.nl/ontology/hc/funding-org/eu/cef-telecom" + project_period: + begin_of_the_begin: "2015-01-01" + end_of_the_end: "2025-12-31" + associated_encompassing_bodies: + # Funding governance (indirect via FundingOrganisation → Project → WebPortal) + - "https://nde.nl/ontology/hc/encompassing-body/government/european-commission" + # Operating foundation + - "https://nde.nl/ontology/hc/encompassing-body/network/europeana-foundation" + # Member state cultural ministries + - "https://nde.nl/ontology/hc/encompassing-body/consortium/europeana-network-association" + implements_digital_platform: + - platform_id: "https://nde.nl/ontology/hc/platform/europeana-collections" + platform_name: "Europeana Collections" + platform_url: "https://www.europeana.eu/collections" + temporal_extent: + begin_of_the_begin: "2008-11-20" + - platform_id: "https://nde.nl/ontology/hc/platform/europeana-pro" + platform_name: "Europeana Pro" + platform_url: "https://pro.europeana.eu" + temporal_extent: + begin_of_the_begin: "2014-01-01" + implements_auxiliary_platform: + - auxiliary_id: "https://nde.nl/ontology/hc/auxiliary/europeana-iiif" + auxiliary_name: "Europeana IIIF APIs" + auxiliary_url: "https://iiif.europeana.eu" + auxiliary_type: "IIIF_SERVER" + temporal_extent: + begin_of_the_begin: "2018-01-01" + - auxiliary_id: "https://nde.nl/ontology/hc/auxiliary/europeana-entity-api" + auxiliary_name: "Europeana Entity API" + auxiliary_url: "https://api.europeana.eu/entity" + auxiliary_type: "REST_API" + temporal_extent: + begin_of_the_begin: "2017-01-01" + description: "Europeana - pan-European cultural heritage portal (comprehensive example with all relationship slots)" - value: portal_id: "https://nde.nl/ontology/hc/portal/de/archivportal-d"