chore: Add data reports, infra scripts, and API updates
- Data quality reports for Dutch custodians - Name mismatch detection reports - Failed crawl URL tracking - Caddy configuration updates - Monitor script for chunk 404 errors - API endpoint improvements
This commit is contained in:
parent
0c36429257
commit
3820f2fc92
10 changed files with 28385 additions and 217 deletions
2204
data/custodian/person/name_mismatch_report.csv
Normal file
2204
data/custodian/person/name_mismatch_report.csv
Normal file
File diff suppressed because it is too large
Load diff
18014
data/custodian/person/name_mismatch_report.json
Normal file
18014
data/custodian/person/name_mismatch_report.json
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,196 +0,0 @@
|
|||
original_entry:
|
||||
name: Amsterdam-Centrum
|
||||
source: UNESCO Memory of the World (via Wikidata)
|
||||
wikidata_id: Q478282
|
||||
mow_inscriptions:
|
||||
- wikidata_id: Q856452
|
||||
name: Ets Haim library
|
||||
country: Netherlands
|
||||
processing_timestamp: '2025-12-06T20:58:00.502825+00:00'
|
||||
wikidata_enrichment:
|
||||
wikidata_entity_id: Q478282
|
||||
api_metadata:
|
||||
api_endpoint: https://www.wikidata.org/w/rest.php/wikibase/v1
|
||||
fetch_timestamp: '2025-12-09T12:13:54.498647+00:00'
|
||||
user_agent: GLAMDataExtractor/1.1 (glam-data@example.com) Python/httpx
|
||||
enrichment_version: 2.0_full
|
||||
properties_found:
|
||||
- P625
|
||||
- P17
|
||||
- P31
|
||||
- P131
|
||||
- P373
|
||||
- P18
|
||||
- P856
|
||||
- P154
|
||||
wikidata_labels:
|
||||
fr: Amsterdam-Centrum
|
||||
en: Amsterdam-Centrum
|
||||
nl: Amsterdam-Centrum
|
||||
la: Medium Amstelodamum
|
||||
de: Amsterdam-Centrum
|
||||
it: Amsterdam-Centrum
|
||||
sv: Amsterdam-Centrum
|
||||
pl: Amsterdam-Centrum
|
||||
fa: امستردام-سنترام
|
||||
sq: Amsterdam-Centrum
|
||||
he: אמסטרדם המרכזית
|
||||
ar: أمستردام- سينتروم
|
||||
ka: ამსტერდამ-სენტრუმი
|
||||
zh: 阿姆斯特丹中央
|
||||
fi: Amsterdam-Centrum
|
||||
hy: Ամստերդամ-Սենտրում
|
||||
eo: Amsterdam-Centro
|
||||
ja: アムステルダムセントラム
|
||||
el: Άμστερνταμ-Κέντρο
|
||||
ru: Центр Амстердама
|
||||
sl: Amsterdam Center
|
||||
ca: Amsterdam-Centrum
|
||||
ban: Amsterdam-Centrum
|
||||
id: Amsterdam-Centrum
|
||||
wikidata_label_en: Amsterdam-Centrum
|
||||
wikidata_label_nl: Amsterdam-Centrum
|
||||
wikidata_label_ja: アムステルダムセントラム
|
||||
wikidata_label_de: Amsterdam-Centrum
|
||||
wikidata_label_fr: Amsterdam-Centrum
|
||||
wikidata_descriptions:
|
||||
de: einer von 7 Stadtbezirken in Amsterdam
|
||||
nl: stadsdeel van Amsterdam, Nederland
|
||||
en: borough of Amsterdam, Netherlands
|
||||
ar: ال هراش و
|
||||
fr: arrondissement d'Amsterdam
|
||||
fi: Amsterdamin historiallinen keskusta
|
||||
he: רובע באמסטרדם
|
||||
ru: район в историческом центре Амстердама, Нидерланды
|
||||
sl: mestni okraj Amsterdama na Nizozemskem
|
||||
wikidata_description_en: borough of Amsterdam, Netherlands
|
||||
wikidata_aliases:
|
||||
nl:
|
||||
- Amsterdam Centrum
|
||||
- Centrum
|
||||
- Amsterdamse binnenstad
|
||||
- stadsdeel Centrum
|
||||
wikidata_sitelinks:
|
||||
enwiki: Amsterdam-Centrum
|
||||
nlwiki: Amsterdam-Centrum
|
||||
itwiki: Amsterdam-Centrum
|
||||
lawiki: Medium Amstelodamum
|
||||
dewiki: Amsterdam-Centrum
|
||||
svwiki: Amsterdam-Centrum
|
||||
sqwiki: Amsterdam-Centrum
|
||||
hewiki: אמסטרדם המרכזית
|
||||
arwiki: أمستردام- سينتروم
|
||||
kawiki: ამსტერდამ-სენტრუმი
|
||||
zhwiki: 阿姆斯特丹中央
|
||||
dewikivoyage: Amsterdam/Centrum
|
||||
fawiki: آمستردام-سنترام
|
||||
frwiki: Amsterdam-Centre
|
||||
banwiki: Amsterdam-Centrum
|
||||
idwiki: Amsterdam-Centrum
|
||||
wikidata_classification:
|
||||
instance_of: &id001
|
||||
- id: Q15079751
|
||||
label: borough of Amsterdam
|
||||
description: Administrative divisions of Amsterdam
|
||||
wikidata_instance_of: *id001
|
||||
wikidata_location:
|
||||
coordinates: &id004
|
||||
latitude: 52.369985
|
||||
longitude: 4.898014
|
||||
precision: 1.0e-06
|
||||
country: &id002
|
||||
id: Q55
|
||||
label: Netherlands
|
||||
description: country in Northwestern Europe with territories in the Caribbean
|
||||
located_in_admin_entity: &id003
|
||||
id: Q9899
|
||||
label: Amsterdam
|
||||
description: municipality in the Netherlands, containing the cities of Amsterdam and Weesp
|
||||
wikidata_country: *id002
|
||||
wikidata_located_in: *id003
|
||||
wikidata_coordinates: *id004
|
||||
wikidata_web:
|
||||
official_website: http://www.centrum.amsterdam.nl/
|
||||
wikidata_official_website: http://www.centrum.amsterdam.nl/
|
||||
wikidata_media:
|
||||
commons_category: Amsterdam-Centrum
|
||||
image: Canal in Jordaan, Amsterdam (9258952020).jpg
|
||||
logo: Logo of Gemeente Amsterdam Centrum.png
|
||||
wikidata_image: Canal in Jordaan, Amsterdam (9258952020).jpg
|
||||
wikidata_logo: Logo of Gemeente Amsterdam Centrum.png
|
||||
ghcid:
|
||||
ghcid_current: NL-NH-AMS-A-AC
|
||||
ghcid_original: NL-NH-AMS-A-AC
|
||||
ghcid_uuid: 1605331a-0ffc-507f-abcb-db808af8df0b
|
||||
ghcid_uuid_sha256: be1da48c-4ab7-8896-b209-be645ac5eb9e
|
||||
ghcid_numeric: 13699286563986864278
|
||||
record_id: 975c3324-51d2-470e-8673-53a917c43f42
|
||||
generation_timestamp: '2025-12-06T20:58:00.502825+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-NH-AMS-A-AC
|
||||
ghcid_numeric: 13699286563986864278
|
||||
valid_from: '2025-12-06T20:58:00.502825+00:00'
|
||||
reason: Initial GHCID from UNESCO MoW Wikidata data (Dec 2025)
|
||||
location_resolution:
|
||||
method: WIKIDATA_LOCATION
|
||||
country_code: NL
|
||||
country_label: Netherlands
|
||||
region_code: NH
|
||||
city_code: AMS
|
||||
city_label: Amsterdam
|
||||
geonames_id: 2759794
|
||||
geonames_name: Amsterdam
|
||||
feature_code: PPLC
|
||||
population: 741636
|
||||
admin1_code: '07'
|
||||
source_coordinates:
|
||||
latitude: 52.369985
|
||||
longitude: 4.898014
|
||||
source: wikidata
|
||||
geonames_id: 2759794
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: Amsterdam-Centrum
|
||||
source_type: wikidata
|
||||
emic_name: Amsterdam-Centrum
|
||||
name_language: nl
|
||||
standardized_name: Amsterdam-Centrum
|
||||
unesco_mow_enrichment:
|
||||
is_mow_custodian: true
|
||||
inscription_count: 1
|
||||
inscriptions:
|
||||
- wikidata_id: Q856452
|
||||
name: Ets Haim library
|
||||
inscription_country: Netherlands
|
||||
enrichment_timestamp: '2025-12-06T20:58:00.502825+00:00'
|
||||
data_source: Wikidata SPARQL (UNESCO has no MoW API)
|
||||
google_maps_status: NO_MATCH
|
||||
google_maps_rejected:
|
||||
candidate_name: Amsterdam-Centrum
|
||||
rejection_reason: The Google Maps candidate 'Amsterdam-Centrum' has the Google Place types 'sublocality_level_1', 'sublocality',
|
||||
'political'. This indicates it is a geographical/administrative area, not an institution. It fails the TYPE MATCH and
|
||||
ENTITY TYPE criteria. The source is a borough, not an archive.
|
||||
timestamp: '2025-12-08T21:00:29.116084+00:00'
|
||||
youtube_status: NOT_FOUND
|
||||
youtube_search_query: Amsterdam-Centrum official
|
||||
youtube_search_timestamp: '2025-12-08T21:00:29.454841+00:00'
|
||||
provenance:
|
||||
notes:
|
||||
- 'YouTube/Google Maps enrichment 2025-12-08T21:00:29Z: Maps: rejected by LLM; YouTube: not found'
|
||||
- Canonical location normalized on 2025-12-09T12:53:12Z
|
||||
location:
|
||||
latitude: 52.369985
|
||||
longitude: 4.898014
|
||||
coordinate_provenance:
|
||||
source_type: WIKIDATA
|
||||
source_path: wikidata_enrichment.wikidata_coordinates
|
||||
original_timestamp: '2025-12-09T12:13:54.498647+00:00'
|
||||
api_endpoint: https://www.wikidata.org/w/rest.php/wikibase/v1
|
||||
entity_id: Q478282
|
||||
city: Amsterdam
|
||||
region_code: NH
|
||||
country: NL
|
||||
geonames_id: 2759794
|
||||
geonames_name: Amsterdam
|
||||
feature_code: PPLC
|
||||
normalization_timestamp: '2025-12-09T12:53:12.132221+00:00'
|
||||
203
data/failed_crawl_urls.txt
Normal file
203
data/failed_crawl_urls.txt
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
NL-OV-ENS-I-C.yaml http://www.cvah.nl
|
||||
NL-NB-SAM-S-SH.yaml http://www.sambeeksheem.nl/
|
||||
NL-NH-CAS-S-C-castricum.yaml https://www.oerij.eu/
|
||||
NL-NH-AMS-A-NBA.yaml https://www.dnb.nl/archief/
|
||||
NL-ZH-HIL-I-HHO.yaml http://harddraverijvereniginghillegom.nl
|
||||
NL-ZH-STR-S-OVLS.yaml https://www.hetlandvanstrijen.com/
|
||||
NL-OV-DEV-M-SMD.yaml https://hetspeelgoedmuseum.nl/
|
||||
NL-GE-LEN-S-L.yaml https://www.dewarmoes.nl/
|
||||
NL-FR-GRO-I-SPG.yaml http://sintpiter.nl/
|
||||
NL-GE-KRA-M-K.yaml https://www.heiligenbeeldenmuseum.nl/
|
||||
NL-OV-MAR-D-W.yaml https://www.landgoedereninoverijssel.nl
|
||||
NL-NB-BOX-I-BO.yaml http://www.brabantorgel.nl
|
||||
NL-OV-ZWO-S-ZHV.yaml https://www.zwolsehistorischevereniging.nl/
|
||||
NL-UT-BRE-S-HKB-historische_kring_loenen.yaml https://www.hkloenen.nl/
|
||||
NL-ZH-SAS-S-OS.yaml https://www.stichtingoudsassenheim.nl/
|
||||
NL-GE-GRO-A-HPE.yaml https://vu.nl/nl/over-de-vu/diensten/universiteitsbibliotheek/meer-over/collectie-hdc-protestants-erfgoed
|
||||
NL-FR-SNE-M-FSM.yaml https://www.friesscheepvaartmuseum.nl/
|
||||
NL-LI-BAN-I-JB.yaml http://www.jonkheid-banholt.nl
|
||||
NL-LI-SMA-I-VZN.yaml http://www.vlechtheggen.nl
|
||||
NL-GE-ZIE-S-OVZ-vereniging_voor_oudheidkunde_te_lichtenvoorde.yaml https://lichtenvoorde.erfgoedcms.nl/
|
||||
NL-LI-STE-I-PH.yaml http://www.niekhoogland.nl
|
||||
NL-FR-LEE-M-L.yaml https://natuurmuseumfryslan.nl/
|
||||
NL-GE-APE-S-A.yaml https://hollandsecirkel.nl/
|
||||
NL-GE-BAR-A-GAB.yaml https://gemeentearchief.barneveld.nl/
|
||||
NL-NH-ALK-T-AKG.yaml http://www.kaasdragersgildealkmaar.nl
|
||||
NL-ZH-DHA-A-NA-nationaal_archief_CORRUPTED_20251210.yaml https://moslimarchief.nl/
|
||||
NL-ZH-LEI-U-WML.yaml https://leiden.wereldmuseum.nl/
|
||||
NL-LI-VAL-U-KV.yaml https://www.kasteelvalkenburg.nl/
|
||||
NL-NH-AMS-U-WMA.yaml https://amsterdam.wereldmuseum.nl/
|
||||
NL-ZH-MAA-U-MM.yaml https://museummaassluis.nl/
|
||||
NL-NB-WAA-U-S.yaml https://schoenenkwartier.nl/
|
||||
NL-LI-VEN-U-MBD.yaml https://www.vanbommelvandam.nl/
|
||||
NL-UT-AME-U-M.yaml https://musiom.art/
|
||||
NL-NH-MON-U-WMS.yaml https://www.despeeltoren.nl/
|
||||
NL-NH-DKO-U-E.yaml https://www.ecomare.nl/
|
||||
NL-ZE-VEE-U-MV.yaml https://museumveere.nl/
|
||||
NL-ZH-DEL-U-MPD.yaml https://www.museumprinsenhofdelft.nl/
|
||||
NL-OV-DEL-U-ZMD.yaml https://zoutmuseum.nl/
|
||||
NL-ZH-DHA-U-MH.yaml https://www.mauritshuis.nl/
|
||||
NL-ZH-DHA-U-MC.yaml https://www.demesdagcollectie.nl/nl
|
||||
NL-NH-DBU-U-OT.yaml https://oudheidkamertexel.nl/en/
|
||||
NL-FR-ORA-U-MB.yaml https://www.museumbelvedere.nl/nl/
|
||||
NL-NH-AMS-U-EFM-.yaml https://www.eyefilm.nl/
|
||||
NL-ZH-DHA-U-MO.yaml https://www.museon-omniversum.nl/
|
||||
NL-UT-UTR-U-VM.yaml https://volksbuurtmuseum.nl/
|
||||
NL-UT-WOE-U-SMW.yaml https://www.stadsmuseumwoerden.nl/
|
||||
NL-NH-SCH-U-MV.yaml https://museumvreeburg.nl/
|
||||
NL-NH-AMS-U-SM.yaml https://www.hetscheepvaartmuseum.nl/
|
||||
NL-NH-AMS-U-MJ.yaml https://museumjan.nl/
|
||||
NL-NH-IJM-U-IZHM.yaml https://www.zeehavenmuseum.nl/
|
||||
NL-NH-ZAA-U-MZT.yaml https://zaansetijd.nl/
|
||||
NL-NH-HOO-U-MTE.yaml https://www.museumhoorn.nl/
|
||||
NL-NB-KLU-U-VSM.yaml https://vlasserij-suikermuseum.nl/
|
||||
NL-NB-SCH-U-MJHH.yaml https://www.kunstcollectiemeierijstad.nl/kunstcollectie/Museum_Jan_Heestershuis
|
||||
NL-NB-HEL-U-MH.yaml https://museumhelmond.nl/
|
||||
NL-ZH-GOR-U-HHM.yaml https://hendrickhamelmuseum.nl/
|
||||
NL-ZH-DHA-U-VM.yaml https://www.vrijmetselarijmuseum.nl/
|
||||
NL-NH-HAA-U-FHM.yaml https://franshalsmuseum.nl/nl
|
||||
NL-NH-ZAA-U-ZM.yaml https://zaansmuseum.nl/
|
||||
NL-NH-AMS-U-LMA.yaml https://luthermuseum.nl/nl
|
||||
NL-ZE-HUL-U-MH.yaml https://museumhulst.nl/
|
||||
NL-NB-OOS-U-SMO.yaml https://speelgoedmuseum.nl/
|
||||
NL-NH-OUD-U-MKS.yaml https://kaapskil.nl/
|
||||
NL-OV-OLD-U-MO.yaml https://museumoldenzaal.nl/
|
||||
NL-NB-EER-U-KM.yaml https://www.kempenmuseum.nl/
|
||||
NL-ZH-ROT-U-MR.yaml https://museumrotterdam.nl/
|
||||
NL-ZE-AAR-U-CA.yaml https://cultuurforumaardenburg.nl/
|
||||
NL-OV-ENS-U-M.yaml https://www.demuseumfabriek.nl/
|
||||
NL-ZH-ROT-U-WMR.yaml https://rotterdam.wereldmuseum.nl/
|
||||
NL-NH-AMS-U-DMA.yaml https://www.diamondmuseum.com/
|
||||
NL-GE-ARN-U-MB.yaml https://www.bronbeek.nl/museum
|
||||
NL-ZH-DHA-U-LM.yaml https://louwmanmuseum.nl/
|
||||
NL-NB-GRA-U-SMG.yaml https://www.stadsmuseumgrave.nl/
|
||||
NL-LI-MAA-U-BM.yaml https://www.bonnefanten.nl/nl
|
||||
NL-ZH-SCH-U-SMS.yaml https://stedelijkmuseumschiedam.nl/
|
||||
NL-ZH-DOR-U-HG.yaml https://www.huisvangijn.nl/
|
||||
NL-NH-AMS-U-C.yaml https://www.circuspunt.nl/
|
||||
NL-OV-WIE-U-HKW.yaml https://www.historischekringwierden.nl/
|
||||
NL-ZH-DEL-U-RDM.yaml https://museum.royaldelft.com/
|
||||
NL-NH-AMS-U-AM.yaml https://www.artis.nl/nl/artis-micropia
|
||||
NL-GR-GRO-M-OVCG.yaml https://www.ovcg.nl/
|
||||
NL-LI-BET-S-V.yaml https://www.indertied.nl/
|
||||
NL-OV-DEL-A-SHCHTTS.yaml https://www.heemkundedelden.nl/welkom/streekarchief/
|
||||
NL-GE-ZAL-A-SAB.yaml https://regionaalarchiefrivierenland.nl/
|
||||
NL-UT-DDO-S-HVD.yaml https://www.historischeverenigingdendolder.nl/
|
||||
NL-FR-SAX-A-GAB.yaml https://www.waadhoeke.nl/
|
||||
NL-DR-DIE-A-GAW.yaml https://www.gemeentewesterveld.nl/
|
||||
NL-NH-AMS-S-GGGRO.yaml https://historischeprojecten.nl/geheugenvanhardenberg/werkgroep-van-riemsdijk/
|
||||
NL-ZH-VLA-I-IE.yaml http://www.istimewa-events.nl
|
||||
NL-DR-GAS-I-NFP.yaml http://www.pijprokers.nl
|
||||
NL-GR-APP-S-WK.yaml https://stichtingvanderwyck-dekempenaer.nl/
|
||||
NL-GR-GRO-R-DCNPP.yaml https://www.rug.nl/research/dnpp/
|
||||
NL-DR-DWI-S-W.yaml https://historiedewijkkoekange.nl/
|
||||
NL-FR-MEN-A-GAM.yaml https://www.waadhoeke.nl/
|
||||
NL-ZH-SCH-I-VSN.yaml http://www.vreugdevuur-scheveningen.nl
|
||||
NL-UT-AME-M-CM.yaml https://cavaleriemuseum.nl/
|
||||
NL-FR-WES-A-GAT.yaml https://www.terschelling.nl/
|
||||
NL-LI-MEC-S-HVM.yaml https://beleefmechelen.nl/
|
||||
NL-NH-ZAA-M-K.yaml https://zaansmuseum.nl/zien-doen/kuiperij/
|
||||
NL-ZH-LEI-M-MG.yaml http://www.hartebrug.nl/museum-greccio
|
||||
NL-LI-MAA-I-BOLVSZ.yaml https://olv-bidweg.nl/
|
||||
NL-ZH-ROT-I-AR.yaml http://www.aflegverenigingrachel%40gmail.com
|
||||
NL-NH-MID-M-BM.yaml https://www.historischgenootschapbeemster.nl/afdelingen/agrarisch-museum-westerhem/
|
||||
NL-NH-AMS-S-WH.yaml https://www.weeshuisjes.nl/
|
||||
NL-ZH-THX-I-BRJH.yaml http://www.berryrutjes.com
|
||||
NL-LI-MAA-I-SFPLS.yaml http://www.slowfood.nl
|
||||
NL-GR-TBX-A-GATB.yaml https://gemeente.groningen.nl/
|
||||
NL-GE-NED-D-EGH.yaml https://www.erfgoedheumen.nl/
|
||||
NL-FL-DRO-S-HD.yaml https://www.historischdronten.nl/
|
||||
NL-NH-AMS-N-IB.yaml https://inmijnbuurt.org/
|
||||
NL-ZH-OUD-M-RO.yaml https://rtm-ouddorp.nl/
|
||||
NL-FR-APP-S-A.yaml https://www.hvappelscha.nl/
|
||||
NL-LI-KER-I-SSSK.yaml http://Schuttersbroederschap%20St.%20Sebastianus
|
||||
NL-ZH-ROT-I-PH.yaml http://www.papiermakerijdehoop.nl
|
||||
NL-UT-UTR-M-MB.yaml https://www.umcutrecht.nl/nl/anatomisch-museum-bleulandinum
|
||||
NL-ZH-THX-I-HTOB.yaml http://www.tilduivenbondonsbelang.nl
|
||||
NL-NH-UIT-S-VOU.yaml https://www.ouduitgeest.nl/
|
||||
NL-DR-FOR-S-F.yaml https://www.oudheidkamerzuidwolde.nl/
|
||||
NL-ZH-ROT-M-K.yaml https://www.dekuip.com/nl/tour-en-museum/arrangementen/feyenoord-museum
|
||||
NL-ZH-DEL-A-HD.yaml https://www.hhdelfland.nl/loket/producten-diensten/bezoek-archief
|
||||
NL-LI-SIT-A-RASG-gemeente_sittard_geleen.yaml https://regioarchiefsittard-geleen.nl/
|
||||
NL-FR-HIN-M-HNSH.yaml https://www.museumhindeloopen.nl/
|
||||
NL-NB-HEL-I-HUH.yaml http://hawuthellemonds.nl
|
||||
NL-LI-SIT-A-RASG-regioarchief_sittard_geleen.yaml https://regioarchiefsittard-geleen.nl/
|
||||
NL-NH-HOO-I-KH.yaml http://www.kortebaanhoofddorp.nl
|
||||
NL-FR-LEE-M-FM-fries_museum.yaml https://www.friesverzetsmuseum.nl/frl
|
||||
NL-NB-HDX-M-KH.yaml https://www.kasteelheeswijk.nl/
|
||||
NL-OV-ALM-A-FA.yaml https://www.fotoarchieftwente.nl/
|
||||
NL-ZH-LEI-I-BN.yaml http://www.bonaken.nl
|
||||
NL-GR-MAR-A-GM.yaml https://www.westerkwartier.nl/
|
||||
NL-NH-HAU-S-HGH.yaml https://historisch-hauwert.nl/
|
||||
NL-GE-ARN-O-G.yaml https://www.gelderland.nl/themas/organisatie/over-de-provincie/huis-der-provincie/provinciale-kunstcollectie
|
||||
NL-GE-ARN-I-A.yaml http://www.1aprilvereniging.nl
|
||||
NL-OV-WIE-I-SM.yaml http://www.suntemartn.nl
|
||||
NL-GE-ARN-I-CIAO.yaml https://orthen.nl/commissie/
|
||||
NL-GE-ARN-I-BM.yaml http://www.bmsootmarsum.nl
|
||||
NL-ZH-VLA-A-SAV.yaml https://www.vlaardingen.nl/stadsarchief/
|
||||
NL-GE-HAR-M-SMH.yaml https://www.stadsmuseum-harderwijk.nl/
|
||||
NL-OV-DEL-S-KVTS.yaml https://kreenk.nl/
|
||||
NL-LI-HEY-S-HVH.yaml https://heibloem.nu/vereniging/heemkundevereniging-heibloem
|
||||
NL-DR-WES-M-MP.yaml https://www.papierknipmuseum.nl/
|
||||
NL-GE-GEN-S-HKG.yaml https://www.historischekringgente.nl/
|
||||
NL-GE-ZUT-M-MZ-stichting_musea_zutphen_stedelijk_museum_zutphen_en_museum_henriette_polak.yaml https://museazutphen.nl/stedelijk-museum-zutphen/
|
||||
NL-NH-GRO-S-OSB.yaml https://www.oudstedebroec.eu/
|
||||
NL-ZH-BER-M-MOTC.yaml https://www.molenaars-otc.nl/
|
||||
NL-ZH-ALB-I-SJPA.yaml http://www.paardenmarkt-alblasserdam.nl
|
||||
NL-GE-ELD-S-E.yaml https://www.historischekringelden.nl/
|
||||
NL-NH-AMS-M-LPMA.yaml https://dehollandschemanege.nl/levend-paardenmuseum/
|
||||
NL-NH-AMS-I-N.yaml http://www.ninsee.nl
|
||||
NL-OV-MAR-A-HAWH.yaml https://proxy.archieven.nl/0/F8BA43C796AD4BEC97A456978826D3AD
|
||||
NL-LI-AME-M-PM.yaml https://www.peelmuseum.nl/
|
||||
NL-FR-LEE-M-KMP.yaml https://www.princessehof.nl/
|
||||
NL-GR-HAR-A-GAH.yaml https://gemeente.groningen.nl/
|
||||
NL-GE-HED-S-HH.yaml https://www.historischmuseumhedel.nl/
|
||||
NL-NH-CAS-S-C-stichting_oer_ij.yaml http://www.alkmaardermeeromgeving.nl
|
||||
NL-OV-HEN-M-BMTH.yaml https://bedrijfsmuseumthaleshengelo.nl/
|
||||
NL-NB-SXH-I-NFS.yaml http://www.nbfs.nl
|
||||
NL-GE-DOO-M-MV.yaml https://museumveluwezoom.nl/
|
||||
NL-GE-OTT-I-BEV.yaml http://buurt.ede-en-veldhuizen.nl
|
||||
NL-FL-DRO-I-NPSF.yaml http://www.npsf.nl
|
||||
NL-OV-KAM-M-SMKIVS.yaml https://stedelijkmuseumkampen.nl/
|
||||
NL-GR-GRO-A-PAG.yaml https://www.provinciegroningen.nl/
|
||||
NL-LI-VIJ-S-NVV.yaml https://www.naobereviele.nl/
|
||||
NL-GE-APE-S-NVI.yaml https://www.korpora.nl/
|
||||
NL-OV-SLA-M-OMS.yaml https://museumslagharen.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_ultimate.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_ultimate_enriched.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_bulk_extracted.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_comprehensive.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_enriched.yaml https://www.eyefilm.nl/
|
||||
NL-NH-EAD-S-E.yaml https://www.historischegmond.nl/
|
||||
NL-LI-ROE-A-GAR.yaml https://www.archiefroermond.nl/nl
|
||||
NL-NB-TIL-I-AC.yaml http://demoscene-the-art-of-coding.net/
|
||||
NL-NH-AMS-I-KBP.yaml http://www.koprobekiprodo.wordpress.com
|
||||
NL-FR-LAN-S-L.yaml http://www.beeldbankdeknipe.nl
|
||||
NL-FR-WOM-A-L.yaml https://www.noardeast-fryslan.nl/, https://www.leeuwarden.nl/, https://www.waadhoeke.nl/
|
||||
NL-NB-BOZ-A-WBA-gemeentearchief_bergen_op_zoom.yaml https://proxy.archieven.nl/0/4A9E2B87DB3F1949E053CA00A8C054B4
|
||||
NL-ZH-BER-M-MOT.yaml https://molenaarsoudetractoren.nl/
|
||||
NL-OV-ZWO-I-LE.yaml https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/
|
||||
NL-FR-MAK-M-M.yaml https://www.museumenmolenmakkinga.nl/
|
||||
NL-FR-TIJ-S-PBMIK.yaml https://deknipe.frl/
|
||||
NL-GR-SLO-A-GAS.yaml https://www.midden-groningen.nl/
|
||||
NL-OV-GIE-M-MGOMU-museumboerderij_t_olde_maat_uus.yaml https://www.museumgiethoorn.nl/
|
||||
NL-NH-AMS-D-A.yaml https://modemuze.nl/
|
||||
NL-NH-AMS-N-LAS.yaml https://louisandriessen.com/
|
||||
NL-UT-VLE-I-KOH.yaml https://www.kovh.nl/
|
||||
NL-GE-ARN-I-S-splika.yaml http://www.splika.nl
|
||||
NL-OV-VOL-I-VV.yaml https://vvvv-vollenhove.nl/
|
||||
NL-OV-ENS-I-LF.yaml http://www.levendefolklore.nl
|
||||
NL-OV-ALM-A-GAA.yaml https://www.almelo.nl/over-almelo/gemeentearchief
|
||||
NL-FR-DRA-A-GS.yaml https://www.smallingerland.nl/
|
||||
NL-DR-DAL-S-AD.yaml https://www.aolddaoln.nl/
|
||||
NL-NB-OVE-I-SCKM.yaml http://www.karolus-magnus.nl
|
||||
NL-NH-OBD-I-NP.yaml http://www.papierknippen.nl
|
||||
NL-ZH-ROT-I-ZN.yaml http://www.zomercarnaval.com
|
||||
NL-OV-KAL-I-HBM.yaml http://www.oudhengelo.nl/index.php/marken-om-hengelo/25-mark-dunsborg/89-muldersfluit-actueel
|
||||
NL-OV-BRO-S-HB.yaml https://shb.collectiebank.nl/beeldbank/start/shb
|
||||
NL-GE-ERM-S-EML.yaml https://erfgoedlov.org/
|
||||
NL-GE-ARN-I-HH.yaml http://harddraverijheemskerk.nl
|
||||
NL-OV-STA-S-HVGS.yaml https://www.historischeverenigingstaphorst.nl/
|
||||
NL-GR-WES-M-MH.yaml https://www.helmantel.nl/museum
|
||||
NL-UT-LEE-S-HVL.yaml https://historischeverenigingleerdam.nl/
|
||||
NL-FR-LEE-M-FM-fries_verzetsmuseum.yaml https://www.friesverzetsmuseum.nl/frl
|
||||
1503
data/failed_crawl_urls_round1_backup.txt
Normal file
1503
data/failed_crawl_urls_round1_backup.txt
Normal file
File diff suppressed because it is too large
Load diff
5995
data/reports/dutch_data_quality_fast.yaml
Normal file
5995
data/reports/dutch_data_quality_fast.yaml
Normal file
File diff suppressed because it is too large
Load diff
203
data/unenriched_urls_round2.txt
Normal file
203
data/unenriched_urls_round2.txt
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
NL-OV-ENS-I-C.yaml http://www.cvah.nl
|
||||
NL-NB-SAM-S-SH.yaml http://www.sambeeksheem.nl/
|
||||
NL-NH-CAS-S-C-castricum.yaml https://www.oerij.eu/
|
||||
NL-NH-AMS-A-NBA.yaml https://www.dnb.nl/archief/
|
||||
NL-ZH-HIL-I-HHO.yaml http://harddraverijvereniginghillegom.nl
|
||||
NL-ZH-STR-S-OVLS.yaml https://www.hetlandvanstrijen.com/
|
||||
NL-OV-DEV-M-SMD.yaml https://hetspeelgoedmuseum.nl/
|
||||
NL-GE-LEN-S-L.yaml https://www.dewarmoes.nl/
|
||||
NL-FR-GRO-I-SPG.yaml http://sintpiter.nl/
|
||||
NL-GE-KRA-M-K.yaml https://www.heiligenbeeldenmuseum.nl/
|
||||
NL-OV-MAR-D-W.yaml https://www.landgoedereninoverijssel.nl
|
||||
NL-NB-BOX-I-BO.yaml http://www.brabantorgel.nl
|
||||
NL-OV-ZWO-S-ZHV.yaml https://www.zwolsehistorischevereniging.nl/
|
||||
NL-UT-BRE-S-HKB-historische_kring_loenen.yaml https://www.hkloenen.nl/
|
||||
NL-ZH-SAS-S-OS.yaml https://www.stichtingoudsassenheim.nl/
|
||||
NL-GE-GRO-A-HPE.yaml https://vu.nl/nl/over-de-vu/diensten/universiteitsbibliotheek/meer-over/collectie-hdc-protestants-erfgoed
|
||||
NL-FR-SNE-M-FSM.yaml https://www.friesscheepvaartmuseum.nl/
|
||||
NL-LI-BAN-I-JB.yaml http://www.jonkheid-banholt.nl
|
||||
NL-LI-SMA-I-VZN.yaml http://www.vlechtheggen.nl
|
||||
NL-GE-ZIE-S-OVZ-vereniging_voor_oudheidkunde_te_lichtenvoorde.yaml https://lichtenvoorde.erfgoedcms.nl/
|
||||
NL-LI-STE-I-PH.yaml http://www.niekhoogland.nl
|
||||
NL-FR-LEE-M-L.yaml https://natuurmuseumfryslan.nl/
|
||||
NL-GE-APE-S-A.yaml https://hollandsecirkel.nl/
|
||||
NL-GE-BAR-A-GAB.yaml https://gemeentearchief.barneveld.nl/
|
||||
NL-NH-ALK-T-AKG.yaml http://www.kaasdragersgildealkmaar.nl
|
||||
NL-ZH-DHA-A-NA-nationaal_archief_CORRUPTED_20251210.yaml https://moslimarchief.nl/
|
||||
NL-ZH-LEI-U-WML.yaml https://leiden.wereldmuseum.nl/
|
||||
NL-LI-VAL-U-KV.yaml https://www.kasteelvalkenburg.nl/
|
||||
NL-NH-AMS-U-WMA.yaml https://amsterdam.wereldmuseum.nl/
|
||||
NL-ZH-MAA-U-MM.yaml https://museummaassluis.nl/
|
||||
NL-NB-WAA-U-S.yaml https://schoenenkwartier.nl/
|
||||
NL-LI-VEN-U-MBD.yaml https://www.vanbommelvandam.nl/
|
||||
NL-UT-AME-U-M.yaml https://musiom.art/
|
||||
NL-NH-MON-U-WMS.yaml https://www.despeeltoren.nl/
|
||||
NL-NH-DKO-U-E.yaml https://www.ecomare.nl/
|
||||
NL-ZE-VEE-U-MV.yaml https://museumveere.nl/
|
||||
NL-ZH-DEL-U-MPD.yaml https://www.museumprinsenhofdelft.nl/
|
||||
NL-OV-DEL-U-ZMD.yaml https://zoutmuseum.nl/
|
||||
NL-ZH-DHA-U-MH.yaml https://www.mauritshuis.nl/
|
||||
NL-ZH-DHA-U-MC.yaml https://www.demesdagcollectie.nl/nl
|
||||
NL-NH-DBU-U-OT.yaml https://oudheidkamertexel.nl/en/
|
||||
NL-FR-ORA-U-MB.yaml https://www.museumbelvedere.nl/nl/
|
||||
NL-NH-AMS-U-EFM-.yaml https://www.eyefilm.nl/
|
||||
NL-ZH-DHA-U-MO.yaml https://www.museon-omniversum.nl/
|
||||
NL-UT-UTR-U-VM.yaml https://volksbuurtmuseum.nl/
|
||||
NL-UT-WOE-U-SMW.yaml https://www.stadsmuseumwoerden.nl/
|
||||
NL-NH-SCH-U-MV.yaml https://museumvreeburg.nl/
|
||||
NL-NH-AMS-U-SM.yaml https://www.hetscheepvaartmuseum.nl/
|
||||
NL-NH-AMS-U-MJ.yaml https://museumjan.nl/
|
||||
NL-NH-IJM-U-IZHM.yaml https://www.zeehavenmuseum.nl/
|
||||
NL-NH-ZAA-U-MZT.yaml https://zaansetijd.nl/
|
||||
NL-NH-HOO-U-MTE.yaml https://www.museumhoorn.nl/
|
||||
NL-NB-KLU-U-VSM.yaml https://vlasserij-suikermuseum.nl/
|
||||
NL-NB-SCH-U-MJHH.yaml https://www.kunstcollectiemeierijstad.nl/kunstcollectie/Museum_Jan_Heestershuis
|
||||
NL-NB-HEL-U-MH.yaml https://museumhelmond.nl/
|
||||
NL-ZH-GOR-U-HHM.yaml https://hendrickhamelmuseum.nl/
|
||||
NL-ZH-DHA-U-VM.yaml https://www.vrijmetselarijmuseum.nl/
|
||||
NL-NH-HAA-U-FHM.yaml https://franshalsmuseum.nl/nl
|
||||
NL-NH-ZAA-U-ZM.yaml https://zaansmuseum.nl/
|
||||
NL-NH-AMS-U-LMA.yaml https://luthermuseum.nl/nl
|
||||
NL-ZE-HUL-U-MH.yaml https://museumhulst.nl/
|
||||
NL-NB-OOS-U-SMO.yaml https://speelgoedmuseum.nl/
|
||||
NL-NH-OUD-U-MKS.yaml https://kaapskil.nl/
|
||||
NL-OV-OLD-U-MO.yaml https://museumoldenzaal.nl/
|
||||
NL-NB-EER-U-KM.yaml https://www.kempenmuseum.nl/
|
||||
NL-ZH-ROT-U-MR.yaml https://museumrotterdam.nl/
|
||||
NL-ZE-AAR-U-CA.yaml https://cultuurforumaardenburg.nl/
|
||||
NL-OV-ENS-U-M.yaml https://www.demuseumfabriek.nl/
|
||||
NL-ZH-ROT-U-WMR.yaml https://rotterdam.wereldmuseum.nl/
|
||||
NL-NH-AMS-U-DMA.yaml https://www.diamondmuseum.com/
|
||||
NL-GE-ARN-U-MB.yaml https://www.bronbeek.nl/museum
|
||||
NL-ZH-DHA-U-LM.yaml https://louwmanmuseum.nl/
|
||||
NL-NB-GRA-U-SMG.yaml https://www.stadsmuseumgrave.nl/
|
||||
NL-LI-MAA-U-BM.yaml https://www.bonnefanten.nl/nl
|
||||
NL-ZH-SCH-U-SMS.yaml https://stedelijkmuseumschiedam.nl/
|
||||
NL-ZH-DOR-U-HG.yaml https://www.huisvangijn.nl/
|
||||
NL-NH-AMS-U-C.yaml https://www.circuspunt.nl/
|
||||
NL-OV-WIE-U-HKW.yaml https://www.historischekringwierden.nl/
|
||||
NL-ZH-DEL-U-RDM.yaml https://museum.royaldelft.com/
|
||||
NL-NH-AMS-U-AM.yaml https://www.artis.nl/nl/artis-micropia
|
||||
NL-GR-GRO-M-OVCG.yaml https://www.ovcg.nl/
|
||||
NL-LI-BET-S-V.yaml https://www.indertied.nl/
|
||||
NL-OV-DEL-A-SHCHTTS.yaml https://www.heemkundedelden.nl/welkom/streekarchief/
|
||||
NL-GE-ZAL-A-SAB.yaml https://regionaalarchiefrivierenland.nl/
|
||||
NL-UT-DDO-S-HVD.yaml https://www.historischeverenigingdendolder.nl/
|
||||
NL-FR-SAX-A-GAB.yaml https://www.waadhoeke.nl/
|
||||
NL-DR-DIE-A-GAW.yaml https://www.gemeentewesterveld.nl/
|
||||
NL-NH-AMS-S-GGGRO.yaml https://historischeprojecten.nl/geheugenvanhardenberg/werkgroep-van-riemsdijk/
|
||||
NL-ZH-VLA-I-IE.yaml http://www.istimewa-events.nl
|
||||
NL-DR-GAS-I-NFP.yaml http://www.pijprokers.nl
|
||||
NL-GR-APP-S-WK.yaml https://stichtingvanderwyck-dekempenaer.nl/
|
||||
NL-GR-GRO-R-DCNPP.yaml https://www.rug.nl/research/dnpp/
|
||||
NL-DR-DWI-S-W.yaml https://historiedewijkkoekange.nl/
|
||||
NL-FR-MEN-A-GAM.yaml https://www.waadhoeke.nl/
|
||||
NL-ZH-SCH-I-VSN.yaml http://www.vreugdevuur-scheveningen.nl
|
||||
NL-UT-AME-M-CM.yaml https://cavaleriemuseum.nl/
|
||||
NL-FR-WES-A-GAT.yaml https://www.terschelling.nl/
|
||||
NL-LI-MEC-S-HVM.yaml https://beleefmechelen.nl/
|
||||
NL-NH-ZAA-M-K.yaml https://zaansmuseum.nl/zien-doen/kuiperij/
|
||||
NL-ZH-LEI-M-MG.yaml http://www.hartebrug.nl/museum-greccio
|
||||
NL-LI-MAA-I-BOLVSZ.yaml https://olv-bidweg.nl/
|
||||
NL-ZH-ROT-I-AR.yaml http://www.aflegverenigingrachel%40gmail.com
|
||||
NL-NH-MID-M-BM.yaml https://www.historischgenootschapbeemster.nl/afdelingen/agrarisch-museum-westerhem/
|
||||
NL-NH-AMS-S-WH.yaml https://www.weeshuisjes.nl/
|
||||
NL-ZH-THX-I-BRJH.yaml http://www.berryrutjes.com
|
||||
NL-LI-MAA-I-SFPLS.yaml http://www.slowfood.nl
|
||||
NL-GR-TBX-A-GATB.yaml https://gemeente.groningen.nl/
|
||||
NL-GE-NED-D-EGH.yaml https://www.erfgoedheumen.nl/
|
||||
NL-FL-DRO-S-HD.yaml https://www.historischdronten.nl/
|
||||
NL-NH-AMS-N-IB.yaml https://inmijnbuurt.org/
|
||||
NL-ZH-OUD-M-RO.yaml https://rtm-ouddorp.nl/
|
||||
NL-FR-APP-S-A.yaml https://www.hvappelscha.nl/
|
||||
NL-LI-KER-I-SSSK.yaml http://Schuttersbroederschap%20St.%20Sebastianus
|
||||
NL-ZH-ROT-I-PH.yaml http://www.papiermakerijdehoop.nl
|
||||
NL-UT-UTR-M-MB.yaml https://www.umcutrecht.nl/nl/anatomisch-museum-bleulandinum
|
||||
NL-ZH-THX-I-HTOB.yaml http://www.tilduivenbondonsbelang.nl
|
||||
NL-NH-UIT-S-VOU.yaml https://www.ouduitgeest.nl/
|
||||
NL-DR-FOR-S-F.yaml https://www.oudheidkamerzuidwolde.nl/
|
||||
NL-ZH-ROT-M-K.yaml https://www.dekuip.com/nl/tour-en-museum/arrangementen/feyenoord-museum
|
||||
NL-ZH-DEL-A-HD.yaml https://www.hhdelfland.nl/loket/producten-diensten/bezoek-archief
|
||||
NL-LI-SIT-A-RASG-gemeente_sittard_geleen.yaml https://regioarchiefsittard-geleen.nl/
|
||||
NL-FR-HIN-M-HNSH.yaml https://www.museumhindeloopen.nl/
|
||||
NL-NB-HEL-I-HUH.yaml http://hawuthellemonds.nl
|
||||
NL-LI-SIT-A-RASG-regioarchief_sittard_geleen.yaml https://regioarchiefsittard-geleen.nl/
|
||||
NL-NH-HOO-I-KH.yaml http://www.kortebaanhoofddorp.nl
|
||||
NL-FR-LEE-M-FM-fries_museum.yaml https://www.friesverzetsmuseum.nl/frl
|
||||
NL-NB-HDX-M-KH.yaml https://www.kasteelheeswijk.nl/
|
||||
NL-OV-ALM-A-FA.yaml https://www.fotoarchieftwente.nl/
|
||||
NL-ZH-LEI-I-BN.yaml http://www.bonaken.nl
|
||||
NL-GR-MAR-A-GM.yaml https://www.westerkwartier.nl/
|
||||
NL-NH-HAU-S-HGH.yaml https://historisch-hauwert.nl/
|
||||
NL-GE-ARN-O-G.yaml https://www.gelderland.nl/themas/organisatie/over-de-provincie/huis-der-provincie/provinciale-kunstcollectie
|
||||
NL-GE-ARN-I-A.yaml http://www.1aprilvereniging.nl
|
||||
NL-OV-WIE-I-SM.yaml http://www.suntemartn.nl
|
||||
NL-GE-ARN-I-CIAO.yaml https://orthen.nl/commissie/
|
||||
NL-GE-ARN-I-BM.yaml http://www.bmsootmarsum.nl
|
||||
NL-ZH-VLA-A-SAV.yaml https://www.vlaardingen.nl/stadsarchief/
|
||||
NL-GE-HAR-M-SMH.yaml https://www.stadsmuseum-harderwijk.nl/
|
||||
NL-OV-DEL-S-KVTS.yaml https://kreenk.nl/
|
||||
NL-LI-HEY-S-HVH.yaml https://heibloem.nu/vereniging/heemkundevereniging-heibloem
|
||||
NL-DR-WES-M-MP.yaml https://www.papierknipmuseum.nl/
|
||||
NL-GE-GEN-S-HKG.yaml https://www.historischekringgente.nl/
|
||||
NL-GE-ZUT-M-MZ-stichting_musea_zutphen_stedelijk_museum_zutphen_en_museum_henriette_polak.yaml https://museazutphen.nl/stedelijk-museum-zutphen/
|
||||
NL-NH-GRO-S-OSB.yaml https://www.oudstedebroec.eu/
|
||||
NL-ZH-BER-M-MOTC.yaml https://www.molenaars-otc.nl/
|
||||
NL-ZH-ALB-I-SJPA.yaml http://www.paardenmarkt-alblasserdam.nl
|
||||
NL-GE-ELD-S-E.yaml https://www.historischekringelden.nl/
|
||||
NL-NH-AMS-M-LPMA.yaml https://dehollandschemanege.nl/levend-paardenmuseum/
|
||||
NL-NH-AMS-I-N.yaml http://www.ninsee.nl
|
||||
NL-OV-MAR-A-HAWH.yaml https://proxy.archieven.nl/0/F8BA43C796AD4BEC97A456978826D3AD
|
||||
NL-LI-AME-M-PM.yaml https://www.peelmuseum.nl/
|
||||
NL-FR-LEE-M-KMP.yaml https://www.princessehof.nl/
|
||||
NL-GR-HAR-A-GAH.yaml https://gemeente.groningen.nl/
|
||||
NL-GE-HED-S-HH.yaml https://www.historischmuseumhedel.nl/
|
||||
NL-NH-CAS-S-C-stichting_oer_ij.yaml http://www.alkmaardermeeromgeving.nl
|
||||
NL-OV-HEN-M-BMTH.yaml https://bedrijfsmuseumthaleshengelo.nl/
|
||||
NL-NB-SXH-I-NFS.yaml http://www.nbfs.nl
|
||||
NL-GE-DOO-M-MV.yaml https://museumveluwezoom.nl/
|
||||
NL-GE-OTT-I-BEV.yaml http://buurt.ede-en-veldhuizen.nl
|
||||
NL-FL-DRO-I-NPSF.yaml http://www.npsf.nl
|
||||
NL-OV-KAM-M-SMKIVS.yaml https://stedelijkmuseumkampen.nl/
|
||||
NL-GR-GRO-A-PAG.yaml https://www.provinciegroningen.nl/
|
||||
NL-LI-VIJ-S-NVV.yaml https://www.naobereviele.nl/
|
||||
NL-GE-APE-S-NVI.yaml https://www.korpora.nl/
|
||||
NL-OV-SLA-M-OMS.yaml https://museumslagharen.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_ultimate.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_ultimate_enriched.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_bulk_extracted.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_comprehensive.yaml https://www.eyefilm.nl/
|
||||
NL-NH-AMS-U-EFM-eye_filmmuseum_linkedin_enriched.yaml https://www.eyefilm.nl/
|
||||
NL-NH-EAD-S-E.yaml https://www.historischegmond.nl/
|
||||
NL-LI-ROE-A-GAR.yaml https://www.archiefroermond.nl/nl
|
||||
NL-NB-TIL-I-AC.yaml http://demoscene-the-art-of-coding.net/
|
||||
NL-NH-AMS-I-KBP.yaml http://www.koprobekiprodo.wordpress.com
|
||||
NL-FR-LAN-S-L.yaml http://www.beeldbankdeknipe.nl
|
||||
NL-FR-WOM-A-L.yaml https://www.noardeast-fryslan.nl/, https://www.leeuwarden.nl/, https://www.waadhoeke.nl/
|
||||
NL-NB-BOZ-A-WBA-gemeentearchief_bergen_op_zoom.yaml https://proxy.archieven.nl/0/4A9E2B87DB3F1949E053CA00A8C054B4
|
||||
NL-ZH-BER-M-MOT.yaml https://molenaarsoudetractoren.nl/
|
||||
NL-OV-ZWO-I-LE.yaml https://www.recreatieparkentwente.nl/routenetwerkentwente/projecten-verhalend-landschap/
|
||||
NL-FR-MAK-M-M.yaml https://www.museumenmolenmakkinga.nl/
|
||||
NL-FR-TIJ-S-PBMIK.yaml https://deknipe.frl/
|
||||
NL-GR-SLO-A-GAS.yaml https://www.midden-groningen.nl/
|
||||
NL-OV-GIE-M-MGOMU-museumboerderij_t_olde_maat_uus.yaml https://www.museumgiethoorn.nl/
|
||||
NL-NH-AMS-D-A.yaml https://modemuze.nl/
|
||||
NL-NH-AMS-N-LAS.yaml https://louisandriessen.com/
|
||||
NL-UT-VLE-I-KOH.yaml https://www.kovh.nl/
|
||||
NL-GE-ARN-I-S-splika.yaml http://www.splika.nl
|
||||
NL-OV-VOL-I-VV.yaml https://vvvv-vollenhove.nl/
|
||||
NL-OV-ENS-I-LF.yaml http://www.levendefolklore.nl
|
||||
NL-OV-ALM-A-GAA.yaml https://www.almelo.nl/over-almelo/gemeentearchief
|
||||
NL-FR-DRA-A-GS.yaml https://www.smallingerland.nl/
|
||||
NL-DR-DAL-S-AD.yaml https://www.aolddaoln.nl/
|
||||
NL-NB-OVE-I-SCKM.yaml http://www.karolus-magnus.nl
|
||||
NL-NH-OBD-I-NP.yaml http://www.papierknippen.nl
|
||||
NL-ZH-ROT-I-ZN.yaml http://www.zomercarnaval.com
|
||||
NL-OV-KAL-I-HBM.yaml http://www.oudhengelo.nl/index.php/marken-om-hengelo/25-mark-dunsborg/89-muldersfluit-actueel
|
||||
NL-OV-BRO-S-HB.yaml https://shb.collectiebank.nl/beeldbank/start/shb
|
||||
NL-GE-ERM-S-EML.yaml https://erfgoedlov.org/
|
||||
NL-GE-ARN-I-HH.yaml http://harddraverijheemskerk.nl
|
||||
NL-OV-STA-S-HVGS.yaml https://www.historischeverenigingstaphorst.nl/
|
||||
NL-GR-WES-M-MH.yaml https://www.helmantel.nl/museum
|
||||
NL-UT-LEE-S-HVL.yaml https://historischeverenigingleerdam.nl/
|
||||
NL-FR-LEE-M-FM-fries_verzetsmuseum.yaml https://www.friesverzetsmuseum.nl/frl
|
||||
|
|
@ -11,6 +11,17 @@
|
|||
}
|
||||
max_header_size 16KB
|
||||
}
|
||||
|
||||
# Structured JSON logging for all requests
|
||||
log {
|
||||
output file /var/log/caddy/access.log {
|
||||
roll_size 100mb
|
||||
roll_keep 5
|
||||
roll_keep_for 168h
|
||||
}
|
||||
format json
|
||||
level INFO
|
||||
}
|
||||
}
|
||||
|
||||
(sparql_protection) {
|
||||
|
|
@ -200,6 +211,17 @@
|
|||
}
|
||||
|
||||
bronhouder.nl, www.bronhouder.nl {
|
||||
# Site-specific logging for chunk load monitoring
|
||||
log {
|
||||
output file /var/log/caddy/bronhouder.log {
|
||||
roll_size 50mb
|
||||
roll_keep 3
|
||||
roll_keep_for 72h
|
||||
}
|
||||
format json
|
||||
level INFO
|
||||
}
|
||||
|
||||
handle /health {
|
||||
respond "OK" 200
|
||||
}
|
||||
|
|
|
|||
109
infrastructure/scripts/monitor-chunk-404s.sh
Executable file
109
infrastructure/scripts/monitor-chunk-404s.sh
Executable file
|
|
@ -0,0 +1,109 @@
|
|||
#!/bin/bash
|
||||
# Monitor for 404 errors on JavaScript chunks (stale cache detection)
|
||||
# Usage: ./monitor-chunk-404s.sh [--tail] [--count] [--last-hour]
|
||||
|
||||
set -e
|
||||
|
||||
SERVER="root@91.98.224.44"
|
||||
LOG_FILE="/var/log/caddy/bronhouder.log"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
YELLOW='\033[1;33m'
|
||||
GREEN='\033[0;32m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
show_help() {
|
||||
echo "Monitor chunk load 404 errors from Caddy logs"
|
||||
echo ""
|
||||
echo "Usage: $0 [options]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --tail Live tail of 404 errors on /assets/*.js"
|
||||
echo " --count Count 404s by URI (grouped)"
|
||||
echo " --last-hour Show 404s from the last hour only"
|
||||
echo " --all-404s Show all 404 errors (not just assets)"
|
||||
echo " --summary Quick summary of chunk 404s"
|
||||
echo " -h, --help Show this help"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 --summary # Quick overview"
|
||||
echo " $0 --tail # Watch live 404s"
|
||||
echo " $0 --count # See which chunks are missing most"
|
||||
}
|
||||
|
||||
# Quick summary
|
||||
summary() {
|
||||
echo -e "${YELLOW}=== Chunk 404 Summary ===${NC}"
|
||||
|
||||
# Total 404s on assets
|
||||
TOTAL=$(ssh $SERVER "cat $LOG_FILE 2>/dev/null | jq -r 'select(.status == 404 and (.request.uri | test(\"/assets/.*\\.js\")))' 2>/dev/null | wc -l" || echo "0")
|
||||
echo -e "Total JS chunk 404s: ${RED}$TOTAL${NC}"
|
||||
|
||||
# Unique chunks
|
||||
UNIQUE=$(ssh $SERVER "cat $LOG_FILE 2>/dev/null | jq -r 'select(.status == 404 and (.request.uri | test(\"/assets/.*\\.js\"))) | .request.uri' 2>/dev/null | sort -u | wc -l" || echo "0")
|
||||
echo -e "Unique missing chunks: ${YELLOW}$UNIQUE${NC}"
|
||||
|
||||
# Last 404 timestamp
|
||||
LAST=$(ssh $SERVER "cat $LOG_FILE 2>/dev/null | jq -r 'select(.status == 404 and (.request.uri | test(\"/assets/.*\\.js\"))) | .ts' 2>/dev/null | tail -1" || echo "")
|
||||
if [ -n "$LAST" ]; then
|
||||
LAST_DATE=$(date -r ${LAST%.*} 2>/dev/null || echo "unknown")
|
||||
echo -e "Last chunk 404: $LAST_DATE"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}Top 5 missing chunks:${NC}"
|
||||
ssh $SERVER "cat $LOG_FILE 2>/dev/null | jq -r 'select(.status == 404 and (.request.uri | test(\"/assets/.*\\.js\"))) | .request.uri' 2>/dev/null | sort | uniq -c | sort -rn | head -5" || echo " (none)"
|
||||
}
|
||||
|
||||
# Live tail
|
||||
tail_logs() {
|
||||
echo -e "${YELLOW}=== Watching for chunk 404s (Ctrl+C to stop) ===${NC}"
|
||||
ssh $SERVER "tail -f $LOG_FILE" | jq -r --unbuffered 'select(.status == 404 and (.request.uri | test("/assets/"))) | "\(.ts | todate) | \(.status) | \(.request.uri) | \(.request.remote_ip)"'
|
||||
}
|
||||
|
||||
# Count by URI
|
||||
count_by_uri() {
|
||||
echo -e "${YELLOW}=== 404 Counts by URI ===${NC}"
|
||||
ssh $SERVER "cat $LOG_FILE 2>/dev/null | jq -r 'select(.status == 404 and (.request.uri | test(\"/assets/\"))) | .request.uri'" | sort | uniq -c | sort -rn | head -20
|
||||
}
|
||||
|
||||
# Last hour only
|
||||
last_hour() {
|
||||
HOUR_AGO=$(date -v-1H +%s 2>/dev/null || date -d '1 hour ago' +%s)
|
||||
echo -e "${YELLOW}=== Chunk 404s in the last hour ===${NC}"
|
||||
ssh $SERVER "cat $LOG_FILE 2>/dev/null | jq -r 'select(.status == 404 and (.request.uri | test(\"/assets/\")) and .ts > $HOUR_AGO) | \"\(.ts | todate) | \(.request.uri) | \(.request.remote_ip)\"'" | head -50
|
||||
}
|
||||
|
||||
# All 404s
|
||||
all_404s() {
|
||||
echo -e "${YELLOW}=== All 404 Errors (last 50) ===${NC}"
|
||||
ssh $SERVER "cat $LOG_FILE 2>/dev/null | jq -r 'select(.status == 404) | \"\(.ts | todate) | \(.request.uri)\"'" | tail -50
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
case "${1:-}" in
|
||||
--tail)
|
||||
tail_logs
|
||||
;;
|
||||
--count)
|
||||
count_by_uri
|
||||
;;
|
||||
--last-hour)
|
||||
last_hour
|
||||
;;
|
||||
--all-404s)
|
||||
all_404s
|
||||
;;
|
||||
--summary|"")
|
||||
summary
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
|
@ -15,6 +15,7 @@ import asyncio
|
|||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
|
|
@ -234,6 +235,94 @@ def ensure_dspy_configured(model: str | None = None) -> None:
|
|||
logger.info(f"DSPy configured with {settings.llm_provider}/{target_model}")
|
||||
|
||||
|
||||
def get_dspy_context():
|
||||
"""Get a DSPy context manager for async-safe LM calls.
|
||||
|
||||
DSPy 3.x doesn't allow calling dspy.configure() from different async tasks.
|
||||
Use this function to get a context manager that provides the LM for DSPy calls.
|
||||
|
||||
Usage:
|
||||
with get_dspy_context():
|
||||
result = some_dspy_module(...)
|
||||
|
||||
Returns:
|
||||
A context manager wrapping dspy.context(lm=_dspy_lm) if LM is configured,
|
||||
or a no-op context manager if not.
|
||||
"""
|
||||
import dspy
|
||||
from contextlib import nullcontext
|
||||
|
||||
if _dspy_lm is not None:
|
||||
return dspy.context(lm=_dspy_lm)
|
||||
else:
|
||||
# Fallback: no context if LM not configured
|
||||
return nullcontext()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Retry Logic for Transient API Errors
|
||||
# =============================================================================
|
||||
|
||||
def is_retryable_error(error: Exception) -> bool:
|
||||
"""Check if an error is retryable (transient API errors like 'overloaded').
|
||||
|
||||
Args:
|
||||
error: The exception to check
|
||||
|
||||
Returns:
|
||||
True if the error is likely transient and worth retrying
|
||||
"""
|
||||
error_str = str(error).lower()
|
||||
retryable_keywords = [
|
||||
"overloaded", "rate_limit", "rate limit", "too many requests",
|
||||
"529", "503", "502", "504", # HTTP status codes
|
||||
"temporarily unavailable", "service unavailable",
|
||||
"connection reset", "connection refused", "timeout",
|
||||
]
|
||||
return any(keyword in error_str for keyword in retryable_keywords)
|
||||
|
||||
|
||||
def call_with_retry(func, *args, max_retries: int = 3, **kwargs) -> Any:
|
||||
"""Call a function with retry logic for transient API errors.
|
||||
|
||||
Args:
|
||||
func: The function to call
|
||||
*args: Positional arguments for the function
|
||||
max_retries: Maximum number of retry attempts (default 3)
|
||||
**kwargs: Keyword arguments for the function
|
||||
|
||||
Returns:
|
||||
The result of the function call
|
||||
|
||||
Raises:
|
||||
The last exception if all retries fail
|
||||
"""
|
||||
last_error: Exception | None = None
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
||||
if is_retryable_error(e) and attempt < max_retries - 1:
|
||||
wait_time = 2 ** attempt # Exponential backoff: 1s, 2s, 4s
|
||||
logger.warning(
|
||||
f"Transient API error (attempt {attempt + 1}/{max_retries}): {e}. "
|
||||
f"Retrying in {wait_time}s..."
|
||||
)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
else:
|
||||
# Non-retryable error or max retries reached
|
||||
raise
|
||||
|
||||
# Should not reach here, but just in case
|
||||
if last_error:
|
||||
raise last_error
|
||||
raise RuntimeError("Unexpected state in call_with_retry")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DSPy Router Interface Mapping Functions
|
||||
# =============================================================================
|
||||
|
|
@ -306,6 +395,7 @@ _dspy_query_router: Any = None
|
|||
_dspy_optimized: bool = False
|
||||
_dspy_bootstrap_optimized: bool = False
|
||||
_dspy_optimized_model_path: str | None = None
|
||||
_dspy_lm: Any = None # Shared LM instance for async-safe DSPy calls
|
||||
|
||||
# Semantic Cache global instance
|
||||
_semantic_cache: Any = None
|
||||
|
|
@ -319,7 +409,7 @@ async def lifespan(app: FastAPI):
|
|||
"""Application lifespan handler."""
|
||||
global _hybrid_retriever, _typedb_retriever, _viz_selector, _http_client
|
||||
global _dspy_pipeline, _dspy_query_router, _dspy_bootstrap_optimized, _dspy_optimized_model_path
|
||||
global _semantic_cache, _embedding_model
|
||||
global _semantic_cache, _embedding_model, _dspy_lm
|
||||
|
||||
# Startup
|
||||
logger.info("GLAM API starting up...")
|
||||
|
|
@ -424,6 +514,7 @@ async def lifespan(app: FastAPI):
|
|||
|
||||
if lm:
|
||||
dspy.configure(lm=lm)
|
||||
_dspy_lm = lm # Store for async-safe context usage
|
||||
|
||||
# Create DSPy pipeline with hybrid retriever for actual data retrieval
|
||||
# The optimized model was trained without ReAct agent
|
||||
|
|
@ -576,8 +667,9 @@ async def generate_sparql(request: GenerateSPARQLRequest) -> GenerateSPARQLRespo
|
|||
about heritage institutions into valid SPARQL queries.
|
||||
"""
|
||||
try:
|
||||
# Ensure DSPy is configured with the requested model (or default)
|
||||
ensure_dspy_configured(model=request.model)
|
||||
# Use async-safe DSPy context (model override not supported in async mode)
|
||||
if request.model:
|
||||
logger.warning(f"Model override '{request.model}' ignored in async mode, using default LM")
|
||||
|
||||
# Import here to avoid loading DSPy until needed
|
||||
from .dspy_sparql import generate_sparql as dspy_generate
|
||||
|
|
@ -588,12 +680,13 @@ async def generate_sparql(request: GenerateSPARQLRequest) -> GenerateSPARQLRespo
|
|||
for msg in request.context
|
||||
]
|
||||
|
||||
# Generate SPARQL
|
||||
result = dspy_generate(
|
||||
question=request.question,
|
||||
language=request.language,
|
||||
context=context,
|
||||
)
|
||||
# Generate SPARQL with async-safe context
|
||||
with get_dspy_context():
|
||||
result = dspy_generate(
|
||||
question=request.question,
|
||||
language=request.language,
|
||||
context=context,
|
||||
)
|
||||
|
||||
return GenerateSPARQLResponse(
|
||||
sparql=result["sparql"],
|
||||
|
|
@ -630,11 +723,11 @@ async def _retrieve_from_sparql(question: str, k: int = 10) -> list[dict[str, An
|
|||
sparql_endpoint = getattr(settings, 'sparql_endpoint', 'http://localhost:7878/query')
|
||||
|
||||
try:
|
||||
# Generate SPARQL from question
|
||||
ensure_dspy_configured()
|
||||
# Generate SPARQL from question using async-safe DSPy context
|
||||
from .dspy_sparql import generate_sparql as dspy_generate
|
||||
|
||||
result = dspy_generate(question=question, language="nl", context=[])
|
||||
with get_dspy_context():
|
||||
result = dspy_generate(question=question, language="nl", context=[])
|
||||
sparql_query = result.get("sparql", "")
|
||||
|
||||
if sparql_query and _http_client:
|
||||
|
|
@ -746,8 +839,14 @@ async def rag_query(request: RAGQueryRequest) -> RAGQueryResponse:
|
|||
logger.warning(f"Cache lookup failed: {e}")
|
||||
|
||||
# Route query using DSPy HeritageQueryRouter (LLM-powered, NOT heuristics)
|
||||
# Uses retry logic for transient API errors (e.g., Anthropic "Overloaded")
|
||||
try:
|
||||
routing = _dspy_query_router(question=request.question, language=request.language)
|
||||
with get_dspy_context():
|
||||
routing = call_with_retry(
|
||||
_dspy_query_router,
|
||||
question=request.question,
|
||||
language=request.language,
|
||||
)
|
||||
intent = _map_dspy_intent_to_query_intent(routing.intent)
|
||||
# Use request.sources if provided, otherwise use DSPy-selected sources
|
||||
if request.sources:
|
||||
|
|
@ -903,8 +1002,14 @@ async def _stream_rag_response(request: RAGQueryRequest) -> AsyncIterator[str]:
|
|||
return
|
||||
|
||||
# Route query using DSPy HeritageQueryRouter (LLM-powered, NOT heuristics)
|
||||
# Uses retry logic for transient API errors (e.g., Anthropic "Overloaded")
|
||||
try:
|
||||
routing = _dspy_query_router(question=request.question, language=request.language)
|
||||
with get_dspy_context():
|
||||
routing = call_with_retry(
|
||||
_dspy_query_router,
|
||||
question=request.question,
|
||||
language=request.language,
|
||||
)
|
||||
intent = _map_dspy_intent_to_query_intent(routing.intent)
|
||||
# Use request.sources if provided, otherwise use DSPy-selected sources
|
||||
if request.sources:
|
||||
|
|
@ -1039,13 +1144,17 @@ async def dspy_rag_query(request: DSPyRAGRequest) -> DSPyRAGResponse:
|
|||
start_time = asyncio.get_event_loop().time()
|
||||
|
||||
try:
|
||||
# Execute DSPy pipeline
|
||||
result = _dspy_pipeline(
|
||||
question=request.question,
|
||||
language=request.language,
|
||||
include_viz=request.include_visualization,
|
||||
use_agent=request.use_agent,
|
||||
)
|
||||
# Execute DSPy pipeline with retry logic for transient API errors
|
||||
# (e.g., Anthropic "Overloaded" errors)
|
||||
# Uses get_dspy_context() for async-safe LM access (DSPy 3.x requirement)
|
||||
with get_dspy_context():
|
||||
result = call_with_retry(
|
||||
_dspy_pipeline,
|
||||
question=request.question,
|
||||
language=request.language,
|
||||
include_viz=request.include_visualization,
|
||||
use_agent=request.use_agent,
|
||||
)
|
||||
|
||||
elapsed_ms = (asyncio.get_event_loop().time() - start_time) * 1000
|
||||
|
||||
|
|
@ -1076,10 +1185,12 @@ async def _stream_dspy_rag_response(request: DSPyRAGRequest) -> AsyncIterator[st
|
|||
|
||||
try:
|
||||
# Use the streaming function from dspy_heritage_rag
|
||||
# Pass the LM instance for async-safe DSPy context (DSPy 3.x requirement)
|
||||
async for chunk in stream_heritage_rag(
|
||||
question=request.question,
|
||||
language=request.language,
|
||||
router=_dspy_query_router,
|
||||
lm=_dspy_lm,
|
||||
):
|
||||
yield chunk
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Reference in a new issue