feat: Add script to generate Mermaid ER diagrams with instance data from LinkML schemas
- Implemented `generate_mermaid_with_instances.py` to create ER diagrams that include all classes, relationships, enum values, and instance data. - Loaded instance data from YAML files and enriched enum definitions with meaningful annotations. - Configured output paths for generated diagrams in both frontend and schema directories. - Added support for excluding technical classes and limiting the number of displayed enum and instance values for readability.
This commit is contained in:
parent
097d116b72
commit
48a2b26f59
46 changed files with 8469 additions and 193 deletions
|
|
@ -763,6 +763,19 @@ provenance:
|
|||
- rating
|
||||
- reviews
|
||||
- opening_hours
|
||||
youtube:
|
||||
- source_type: youtube_data_api
|
||||
fetch_timestamp: '2025-12-01T15:49:04.188036+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
channel_id: null
|
||||
claims_extracted:
|
||||
- channel_info
|
||||
- subscriber_count
|
||||
- video_count
|
||||
- view_count
|
||||
- recent_videos
|
||||
- video_comments
|
||||
- video_transcripts
|
||||
data_tier_summary:
|
||||
TIER_1_AUTHORITATIVE:
|
||||
- original_entry (NDE CSV)
|
||||
|
|
@ -1204,3 +1217,15 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
extraction_timestamp: '2025-12-01T12:35:22.923802+00:00'
|
||||
youtube_enrichment:
|
||||
source_url: https://www.youtube.com/channel/UCaONHfdTkBYYpJsl0eqJ4zw
|
||||
fetch_timestamp: '2025-12-01T15:49:04.188036+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
api_version: v3
|
||||
identifier_type: channel_id
|
||||
identifier_value: UCaONHfdTkBYYpJsl0eqJ4z
|
||||
channel:
|
||||
error: 'Channel not found: UCaONHfdTkBYYpJsl0eqJ4z'
|
||||
videos: []
|
||||
videos_count: 0
|
||||
status: SUCCESS
|
||||
|
|
|
|||
|
|
@ -1001,3 +1001,20 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 60
|
||||
extraction_timestamp: '2025-12-01T12:35:23.014695+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Hunebedcentrum
|
||||
platform_url: http://www.hunebedcentrum.eu/
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q2679819
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.269100+00:00'
|
||||
- platform_name: Hunebedcentrum Twitter/X
|
||||
platform_url: https://twitter.com/hunebedcentrum
|
||||
platform_type: SOCIAL_MEDIA_TWITTER
|
||||
provenance:
|
||||
source_type: wikidata_p2002
|
||||
wikidata_id: Q2679819
|
||||
data_tier: TIER_2_VERIFIED
|
||||
|
|
|
|||
|
|
@ -446,6 +446,19 @@ provenance:
|
|||
- rating
|
||||
- reviews
|
||||
- opening_hours
|
||||
youtube:
|
||||
- source_type: youtube_data_api
|
||||
fetch_timestamp: '2025-12-01T15:50:48.466418+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
channel_id: UCBUSzZPNjS28NcLdXROqlFA
|
||||
claims_extracted:
|
||||
- channel_info
|
||||
- subscriber_count
|
||||
- video_count
|
||||
- view_count
|
||||
- recent_videos
|
||||
- video_comments
|
||||
- video_transcripts
|
||||
data_tier_summary:
|
||||
TIER_1_AUTHORITATIVE:
|
||||
- original_entry (NDE CSV)
|
||||
|
|
@ -827,3 +840,584 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 60
|
||||
extraction_timestamp: '2025-12-01T12:35:23.086858+00:00'
|
||||
youtube_enrichment:
|
||||
source_url: https://www.youtube.com/drentsarchief
|
||||
fetch_timestamp: '2025-12-01T15:50:48.466418+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
api_version: v3
|
||||
identifier_type: custom_url
|
||||
identifier_value: drentsarchief
|
||||
channel:
|
||||
channel_id: UCBUSzZPNjS28NcLdXROqlFA
|
||||
channel_url: https://www.youtube.com/channel/UCBUSzZPNjS28NcLdXROqlFA
|
||||
title: Drents Archief
|
||||
description: 'Het Drents Archief verzamelt en conserveert historische beeld- en
|
||||
geluidsdragers en maakt de inhoud voor een breed publiek toegankelijk.
|
||||
|
||||
|
||||
Filmmateriaal is kwetsbaar en vernietigt op den duur zichzelf. Oude films vervagen,
|
||||
verkleuren en verbrokkelen. Het materiaal is gevoelig voor schommelingen in
|
||||
temperatuur en luchtvochtigheid. Daarom is het belangrijk dat film wordt bewaard
|
||||
in een depot onder goede klimaatbeheersing. Conservering is dus echt noodzakelijk
|
||||
en dat doen wij als Drents Archief.
|
||||
|
||||
|
||||
Het Drents Archief zet de oude films en geluidsbanden over op moderne digitale
|
||||
dragers. Het originele materiaal kan op die manier onder de beste omstandigheden
|
||||
in het depot bewaard blijven.'
|
||||
custom_url: '@drentsarchief'
|
||||
published_at: '2009-01-23T15:59:56Z'
|
||||
country: NL
|
||||
default_language: null
|
||||
thumbnail_url: https://yt3.ggpht.com/hK5VKnDKf-39fO6pFvHQq-1QxA8Cn683j_lZtcSxOFjCcEGvBJGOSQb7iRBxWvg53Unyip3i=s800-c-k-c0x00ffffff-no-rj
|
||||
banner_url: https://yt3.googleusercontent.com/jRWO4i0_0T_kOmjb8n-UPW-76kdT_9iKzxVMAxjKBS6j8GlbsmSaSCGhSLALVIT-UatfqpgyIA
|
||||
subscriber_count: 2980
|
||||
video_count: 835
|
||||
view_count: 2608160
|
||||
subscriber_count_hidden: false
|
||||
uploads_playlist_id: UUBUSzZPNjS28NcLdXROqlFA
|
||||
videos:
|
||||
- video_id: GptwvdUuLzA
|
||||
video_url: https://www.youtube.com/watch?v=GptwvdUuLzA
|
||||
title: 4 - Het vrouwencafe
|
||||
description: '"Weet uw moeder wel dat u zo''n duur huis heeft gekocht?" Ook Drentse
|
||||
vrouwen werden steeds zelfstandiger, ze kregen fulltimebanen – soms zelfs als
|
||||
kostwinner, en er ontstonden speciale vrouwencafés, zoals op maandagavonden
|
||||
in Het Keldertje in Hoogeveen. Niet iedereen was blij met deze ontwikkelingen.
|
||||
Er werd vreemd gekeken naar vrouwen die zelfstandig een huis kochten. En ook
|
||||
samenkomsten waarin vrouwen spraken over alles wat hen bezighield, waren voor
|
||||
veel mannen onbegrijpelijk. '
|
||||
published_at: '2025-11-13T14:52:15Z'
|
||||
duration: PT47M48S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 3
|
||||
like_count: 0
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/GptwvdUuLzA/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: Iat_KWNamiE
|
||||
video_url: https://www.youtube.com/watch?v=Iat_KWNamiE
|
||||
title: 1 - Maandag is wasdag
|
||||
description: ‘Maandag is wasdag in Vries, op donderdag gaan de vrouwen naar de
|
||||
markt en vrijdag wassen ze de ramen. Een buurvrouw die 's ochtends de deur uit
|
||||
gaat, wordt daarop aangekeken. En ook wie voor de middag in de tuin werkt, pleegt
|
||||
inbreuk op een ongeschreven regel.’ We gaan terug naar het Drenthe van de jaren
|
||||
70. Onder aanvoering van Dolle Mina kwam in die tijd de Randstad de tweede feministische
|
||||
golf op gang. Wat bracht die teweeg in leven van de Drentse (plattelands)vrouw?
|
||||
|
||||
published_at: '2025-11-13T14:51:59Z'
|
||||
duration: PT38M27S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 2
|
||||
like_count: 0
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/Iat_KWNamiE/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: BCz9XnZjQUw
|
||||
video_url: https://www.youtube.com/watch?v=BCz9XnZjQUw
|
||||
title: 3 - De VOS-cursus
|
||||
description: '"Die vrouwen hebben zich ook wat in het hoofd gehaald," zeiden de
|
||||
boeren in Zeijen schamper toen vrouwen zich inschreven voor de cursus Engels
|
||||
van de Bond voor Plattelandsvrouwen. In de jaren 70 kwam de VOS-cursus op. VOS
|
||||
stond voor ‘Vrouwen Oriënteren zich op de Samenleving, maar mannen zeiden vaak
|
||||
gekscherend ‘vrouwen oriënteren zich op de scheiding. Boerin Dini Iepema uit
|
||||
Roden deed mee aan de VOS-cursus ''spreken in het openbaar’. Vijftig jaar later
|
||||
heeft ze haar 5-minutenpraatje dat ze hierv'
|
||||
published_at: '2025-11-13T14:52:09Z'
|
||||
duration: PT49M51S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 1
|
||||
like_count: 0
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/BCz9XnZjQUw/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: MAEAxTDryT4
|
||||
video_url: https://www.youtube.com/watch?v=MAEAxTDryT4
|
||||
title: 2 - Plattelandsvrouwen
|
||||
description: '“We zijn overal geweest, ik vind dus: je kunt overal over meepraten.”
|
||||
Drenthe kende van oudsher veel boerinnen. Uit onderzoek uit 1974 bleek dat 96%
|
||||
van de Drentse boerinnen meehielpen op de boerderij: hooien, kalveren voeren
|
||||
en melken. Steeds meer van hen sloten zich ook aan bij de plattelandsvrouwen
|
||||
en kregen daar bestuurlijke functie, Zoals de nu 83-jarige Ans Stevens: zij
|
||||
kreeg zo de kans zich ook op andere vlakken te ontwikkelen. '
|
||||
published_at: '2025-11-13T14:52:05Z'
|
||||
duration: PT52M31S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 3
|
||||
like_count: 0
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/MAEAxTDryT4/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: O0AU7Wkk16w
|
||||
video_url: https://www.youtube.com/watch?v=O0AU7Wkk16w
|
||||
title: De W van... Wilhelmina Ziekenhuis
|
||||
description: "In de jaren 30 maakte directeur-geneesheer Mook deze film over het\
|
||||
\ reilen en zeilen in het Wilhelmina Ziekenhuis in Assen. We zien artsen en\
|
||||
\ zusters aan het werk, patiënten op het terras en alle verpleegsters worden\
|
||||
\ aan de kijker voorgesteld. Het ziekenhuis zat in die tijd aan de Oosterhoutstraat\
|
||||
\ 11. \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit\
|
||||
\ onze rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs\
|
||||
\ tot Zuidlaren. Laat je op zaterdagen verrassen door bewe"
|
||||
published_at: '2025-11-08T16:00:36Z'
|
||||
duration: PT2M13S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 221
|
||||
like_count: 8
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/O0AU7Wkk16w/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: C6sYdB9jNeA
|
||||
video_url: https://www.youtube.com/watch?v=C6sYdB9jNeA
|
||||
title: De V van... vlindertuin
|
||||
description: "Wie heeft de vlindertuin in de ‘oude’ dierentuin niet bezocht? Met\
|
||||
\ een schoolreisje of een familiebezoek? Met deze beelden uit de jaren 90 waan\
|
||||
\ je je weer even tussen de gekleurde vlinders in de Emmer kas. Deel jouw herinneringen\
|
||||
\ aan de vlindertuin van het Noorderdierenpark in de comments! \n\nIn 2025\
|
||||
\ schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke filmcollectie\
|
||||
\ kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren. Laat je op\
|
||||
\ zaterdagen verrassen door bewegende Dren"
|
||||
published_at: '2025-10-25T15:00:18Z'
|
||||
duration: PT1M51S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 52
|
||||
like_count: 3
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/C6sYdB9jNeA/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: 0KAS61XTNx4
|
||||
video_url: https://www.youtube.com/watch?v=0KAS61XTNx4
|
||||
title: De U van... Uffelte
|
||||
description: "Deze week reizen we af naar Uffelte! Dat deed filmmaker Johan Adolfs\
|
||||
\ in 1950 namelijk ook. Hij schoot er toen een dorpsfilm en legde de dorpsbewoners\
|
||||
\ en het dorp zelf vast op beeld. Adolfs schoof onder meer aan bij de dames\
|
||||
\ van de Plattelandsvrouwen. Herken jij nog iemand op deze beelden? \n\nIn 2025\
|
||||
\ schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke filmcollectie\
|
||||
\ kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren. Laat je op\
|
||||
\ zaterdagen verrassen door bewegende Drents"
|
||||
published_at: '2025-10-11T15:00:02Z'
|
||||
duration: PT1M48S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 351
|
||||
like_count: 5
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/0KAS61XTNx4/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: lgOJbYBkx28
|
||||
video_url: https://www.youtube.com/watch?v=lgOJbYBkx28
|
||||
title: De T van... turf
|
||||
description: "Lange tijd werd turf gestoken met de hand, maar ook deze sector\
|
||||
\ werd gemechaniseerd. In deze film zie je onder meer een machine die over de\
|
||||
\ gedroogde turven rijdt. Via smalspoor en vrachtwagen komen de turven bij de\
|
||||
\ fabriek van Purit in Klazienaveen terecht. \n\nIn 2025 schotelen we jullie\
|
||||
\ het ABC van Drenthe op film voor. Uit onze rijke filmcollectie kiezen we elke\
|
||||
\ twee weken een letter. Van Adolfs tot Zuidlaren. Laat je op zaterdagen verrassen\
|
||||
\ door bewegende Drentse beelden door de jaren heen. V"
|
||||
published_at: '2025-09-27T15:01:19Z'
|
||||
duration: PT1M41S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 549
|
||||
like_count: 5
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/lgOJbYBkx28/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: VrMg8fow2H4
|
||||
video_url: https://www.youtube.com/watch?v=VrMg8fow2H4
|
||||
title: De R van... rally
|
||||
description: "In de omgeving van Emmen wordt op 21 juli 1951 de ‘Solex boortoren\
|
||||
\ rally’ gehouden. De organisatie is in handen van ‘de eerste Drentse Solex\
|
||||
\ club Emmen’. 690 deelnemers verschijnen aan de start en zij doen onder meer\
|
||||
\ Coevorden en Schoonebeek aan. De finish is op de Boslaan in Emmen. Hier krijgen\
|
||||
\ de deelnemers een plaquette uitgereikt. \n\nIn 2025 schotelen we jullie het\
|
||||
\ ABC van Drenthe op film voor. Uit onze rijke filmcollectie kiezen we elke\
|
||||
\ twee weken een letter. Van Adolfs tot Zuidlaren. Laat j"
|
||||
published_at: '2025-08-30T15:00:56Z'
|
||||
duration: PT1M40S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 79
|
||||
like_count: 3
|
||||
comment_count: 1
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/VrMg8fow2H4/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: en
|
||||
- video_id: hxng1OQp8wo
|
||||
video_url: https://www.youtube.com/watch?v=hxng1OQp8wo
|
||||
title: De Q van... quiz
|
||||
description: "Wie wint de scholencompetitie 1995? In de finale staan OBS Tynaarlo\
|
||||
\ en OBS De Vlindertuin uit Veenhuizen tegenover elkaar. In het ICO in Assen\
|
||||
\ (nu Podium Zuidhaege) vindt de beslissende quiz plaats. Omroep Assen legt\
|
||||
\ de scholenstrijd vast. Herken je iemand op deze beelden? Tag hem of haar!\
|
||||
\ \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit onze\
|
||||
\ rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren.\
|
||||
\ Laat je op zaterdagen verrassen door bewegende Dren"
|
||||
published_at: '2025-08-16T15:01:09Z'
|
||||
duration: PT2M28S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 42
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/hxng1OQp8wo/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: en
|
||||
- video_id: fcX4Nw20BjI
|
||||
video_url: https://www.youtube.com/watch?v=fcX4Nw20BjI
|
||||
title: Café 't Keerpunt in Spijkerboor - deel 2
|
||||
description: 'Café ''t Keerpunt in Spijkerboor is waarschijnlijk het oudste nog
|
||||
bestaande café van Drenthe. Dit jaar bestaat het 275 jaar. Kastelein Ivo Dijkema
|
||||
dook daarom in de geschiedenis van zijn kroeg. "We zijn op zoek naar verhalen
|
||||
uit het verleden en ik wil meer kunnen vertellen over de eerste honderd jaar
|
||||
dat het café hier stond."
|
||||
|
||||
|
||||
In deze video neemt Dijkema ons mee naar zijn café in Spijkerboor. Hij laat
|
||||
zie wat er nog te zien is van de rijke geschiedenis van zijn dorpskroeg. Er
|
||||
zit onder een ijzer'
|
||||
published_at: '2025-07-21T10:00:56Z'
|
||||
duration: PT3M33S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 172
|
||||
like_count: 4
|
||||
comment_count: 3
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/fcX4Nw20BjI/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: 5ldr6whsj9s
|
||||
video_url: https://www.youtube.com/watch?v=5ldr6whsj9s
|
||||
title: Café 't Keerpunt in Spijkerboor - deel 1
|
||||
description: "Café 't Keerpunt in Spijkerboor is waarschijnlijk het oudste nog\
|
||||
\ bestaande café van Drenthe. Dit jaar bestaat het 275 jaar. Kastelein Ivo Dijkema\
|
||||
\ dook daarom in de geschiedenis van zijn kroeg: \"Het is echt een authentiek\
|
||||
\ dorpscafé en de geschiedenis is voor ons heel belangrijk, waarom bestaat het\
|
||||
\ als 275 jaar als café?\" \nWat is er over 't Keerpunt te vinden in het Drents\
|
||||
\ Archief? Dijkema bekeek onder meer oude kaarten en ontdekte welke schat aan\
|
||||
\ informatie je uit notariële aktes kunt halen: \"Als"
|
||||
published_at: '2025-07-21T10:00:09Z'
|
||||
duration: PT1M43S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 196
|
||||
like_count: 4
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/5ldr6whsj9s/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: So5xFyHdMEY
|
||||
video_url: https://www.youtube.com/watch?v=So5xFyHdMEY
|
||||
title: Albert, een Drentse loteling naar Moskou
|
||||
description: Albert, een Drentse loteling naar Moskou vertelt het verhaal van
|
||||
de 18-jarige Albert uit het Drentse Krakeel, in 1812 ingeloot om te dienen in
|
||||
het leger van keizer Napoleon. Samen met duizenden andere Nederlandse jongens
|
||||
marcheert Albert als voetsoldaat naar Moskou, waar de winter het Franse leger
|
||||
overvalt. Verzwakt door honger, kou en ziekte weten slechts enkele jongens huiswaarts
|
||||
te keren. Zal Albert Krakeel ooit terugzien?
|
||||
published_at: '2025-06-25T07:40:30Z'
|
||||
duration: PT31M56S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 222
|
||||
like_count: 8
|
||||
comment_count: 1
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/So5xFyHdMEY/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: TVV6fuK9px8
|
||||
video_url: https://www.youtube.com/watch?v=TVV6fuK9px8
|
||||
title: De M van... marathon
|
||||
description: "De eerste TT-marathon was op 26 juni 1953. In dat jaar liepen de\
|
||||
\ deelnemers alleen geen 42, maar 25 kilometer. De route voerde onder meer langs\
|
||||
\ Zeijen en Peest. Winnaar werd Janus van der Zanden in een tijd van 1.29.72.\
|
||||
\ \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit onze\
|
||||
\ rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren.\
|
||||
\ Laat je op zaterdagen verrassen door bewegende Drentse beelden door de jaren\
|
||||
\ heen. Vandaag is het de beurt aan de…M!\n\nBeel"
|
||||
published_at: '2025-06-21T15:00:44Z'
|
||||
duration: PT2M8S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 107
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/TVV6fuK9px8/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: G6TQ-D7k-f8
|
||||
video_url: https://www.youtube.com/watch?v=G6TQ-D7k-f8
|
||||
title: De L van... landschap
|
||||
description: "Dit is toch echt een typisch Drents landschap? Op de heide grazen\
|
||||
\ schapen en lammetjes springen vrolijk in het rond. De beelden zijn geschoten\
|
||||
\ door Klaas Nijmeijer in 1984. Waar hij dit precies deed, weten we helaas niet.\
|
||||
\ Misschien herkent iemand de plek? \n\nIn 2025 schotelen we jullie het ABC\
|
||||
\ van Drenthe op film voor. Uit onze rijke filmcollectie kiezen we elke twee\
|
||||
\ weken een letter. Van Adolfs tot Zuidlaren. Laat je op zaterdagen verrassen\
|
||||
\ door bewegende Drentse beelden door de jaren heen. Van"
|
||||
published_at: '2025-06-07T15:00:20Z'
|
||||
duration: PT2M18S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 111
|
||||
like_count: 3
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/G6TQ-D7k-f8/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: YDj7JtN2YpY
|
||||
video_url: https://www.youtube.com/watch?v=YDj7JtN2YpY
|
||||
title: De D van... Dansen
|
||||
description: "Dansen op de boerenbruiloft! Dat is het thema van de Opa- en Omadagen\
|
||||
\ deze voorjaarsvakantie! Op deze filmbeelden, gemaakt in de jaren 30, wordt\
|
||||
\ een traditionele boerenbruiloft nagespeeld. Gasten in traditionele kleding\
|
||||
\ voeren elkaar boerenjongens. De mannen roken Goudse stenen pijpen en er wordt\
|
||||
\ gedanst! \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor.\
|
||||
\ Uit onze rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs\
|
||||
\ tot Zuidlaren. Laat je op zaterdagen verrassen door"
|
||||
published_at: '2025-02-15T16:00:36Z'
|
||||
duration: PT1M18S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 96
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/YDj7JtN2YpY/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: rLyA6zJpZ6Q
|
||||
video_url: https://www.youtube.com/watch?v=rLyA6zJpZ6Q
|
||||
title: De C van... Chauffeur
|
||||
description: "Chauffeur Job Berends uit Roden gaat met de VUT! Dat betekent dat\
|
||||
\ hij vervroegd met pensioen mag. Een bus van de FRAM draagt een spandoek met\
|
||||
\ de tekst ‘1948-1981 afscheid na 33 jaar trouwe dienst’. Job Berends krijgt\
|
||||
\ felicitaties en cadeaus van reizigers die bij de bushalte op hem staan te\
|
||||
\ wachten. De laatste rit van deze buschauffeur is op 20 juli 1981. \n\nIn 2025\
|
||||
\ schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke filmcollectie\
|
||||
\ kiezen we elke twee weken een letter. Van Adolfs t"
|
||||
published_at: '2025-02-01T16:00:57Z'
|
||||
duration: PT1M9S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 208
|
||||
like_count: 8
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/rLyA6zJpZ6Q/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: it
|
||||
- video_id: En5YTDfyog0
|
||||
video_url: https://www.youtube.com/watch?v=En5YTDfyog0
|
||||
title: De B van... Brand Bellevue
|
||||
description: "In februari 1986 brak brand uit bij het Asser zalencentrum Bellevue.\
|
||||
\ De brandweer klom op het dak om daar de brand te blussen. Burgemeester Masman\
|
||||
\ kwam ook langs om de situatie te bekijken. \n\nGing jij in die tijd ook wel\
|
||||
\ eens naar Bellevue? Het werd als restaurant geopend in 1882. In het 100-jarig\
|
||||
\ bestaan was er van alles te beleven: een bruiloft voor honderden gasten, het\
|
||||
\ Kerstvolleybaltoernooi, een grote modeshow of het afscheidsconcert van Cuby\
|
||||
\ & The Blizzards. De brand in 1986 bekende niet h"
|
||||
published_at: '2025-01-18T16:00:10Z'
|
||||
duration: PT1M35S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 161
|
||||
like_count: 3
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/En5YTDfyog0/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: en
|
||||
- video_id: X76O1WNd0qM
|
||||
video_url: https://www.youtube.com/watch?v=X76O1WNd0qM
|
||||
title: De A van... Adolfs
|
||||
description: 'Filmmaker Johan Adolfs is vooral bekend vanwege de dorpsfilms die
|
||||
hij in heel Nederland maakte. In 1966 bezocht hij Schoonoord en de omliggende
|
||||
plaatsen. Hij bracht natuurlijk ook een bezoekje aan Ellert en Brammert! Herken
|
||||
jij iemand op deze film?
|
||||
|
||||
|
||||
In 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke
|
||||
filmcollectie kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren.
|
||||
Laat je op zaterdagen verrassen door bewegende Drentse beelden door de jaren
|
||||
heen. Vandaag is h'
|
||||
published_at: '2025-01-04T16:00:26Z'
|
||||
duration: PT3M22S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 483
|
||||
like_count: 10
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/X76O1WNd0qM/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: 2eyDfn4xH_4
|
||||
video_url: https://www.youtube.com/watch?v=2eyDfn4xH_4
|
||||
title: 'Van Echten: Klederdracht'
|
||||
description: 'Grada Eding Askes maakt met veel rust en precisie haar klederdracht
|
||||
gereed. Ze demonstreert een kanten muts zoals deze in de jaren 20 in Drenthe
|
||||
werd gedragen. Het is een hele klus om alles netjes te krijgen. Zo moet er worden
|
||||
gestreken en geregen, maar uiteindelijk is mevrouw Eding Askes klaar om erop
|
||||
uit te gaan!
|
||||
|
||||
|
||||
De in Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst
|
||||
door bij de filmdienst. Daarna werkte hij als fotograaf en bij bioscopen in
|
||||
Den Haag. In 1959 keerde hij'
|
||||
published_at: '2024-12-28T16:00:03Z'
|
||||
duration: PT3M56S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 116
|
||||
like_count: 6
|
||||
comment_count: 1
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/2eyDfn4xH_4/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: n6rXy2_DF3Q
|
||||
video_url: https://www.youtube.com/watch?v=n6rXy2_DF3Q
|
||||
title: 'Van Echten: Vlechten'
|
||||
description: "In de jaren 70 maakte Frits van Echten voor het Drents Museum een\
|
||||
\ serie over Drentse gebruiken, ambachten en beroepen. Deze aflevering gaat\
|
||||
\ het over vlechten. Meneer Pronk uit Grolloo maakt van takjes een gevlochten\
|
||||
\ bijenkorf. Het is overduidelijk dat hij dit vaker heeft gedaan! \n\nDe in\
|
||||
\ Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst door\
|
||||
\ bij de filmdienst. Daarna werkte hij als fotograaf en bij bioscopen in Den\
|
||||
\ Haag. In 1959 keerde hij terug naar Assen, waar hij een foto"
|
||||
published_at: '2024-12-21T16:00:41Z'
|
||||
duration: PT1M48S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 118
|
||||
like_count: 5
|
||||
comment_count: 2
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/n6rXy2_DF3Q/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: 8QQT991huUg
|
||||
video_url: https://www.youtube.com/watch?v=8QQT991huUg
|
||||
title: 'Van Echten: Borckerhof'
|
||||
description: "In de jaren 60 verkeerde boerderij de Borckerhof in Orvelte in een\
|
||||
\ vervallen staat. De Saksische boerderij uit halverwege de 19e eeuw moest nodig\
|
||||
\ worden gerenoveerd. Filmmaker Frits van Echten legde dit proces vast. Het\
|
||||
\ was een grote klus waarbij de hele boerderij werd ontmanteld. Nu zit er een\
|
||||
\ groepsaccommodatie in de Borckerhof: je kunt er dus overnachten! \n\nDe in\
|
||||
\ Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst door\
|
||||
\ bij de filmdienst. Daarna werkte hij als fotograaf en "
|
||||
published_at: '2024-12-14T16:00:08Z'
|
||||
duration: PT2M34S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 281
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/8QQT991huUg/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: en
|
||||
- video_id: qRaecnHWCco
|
||||
video_url: https://www.youtube.com/watch?v=qRaecnHWCco
|
||||
title: 'Van Echten: Wecken'
|
||||
description: "Weet jij nog hoe de aloude techniek van wecken gaat? Nee? Dan hebben\
|
||||
\ we hier voor jou een cursus wecken uit 1978. Op dit filmpje zie je (waarschijnlijk)\
|
||||
\ mevrouw J. Schoonvelde – Kuipers de boontjes die ze heeft geplukt klaarmaken.\
|
||||
\ Daarna worden de potten met geweckte bonen in de kelder gezet, bij al het\
|
||||
\ andere ingemaakte groente en fruit. De wintervoorraad is aangelegd! \n\n\
|
||||
De in Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst\
|
||||
\ door bij de filmdienst. Daarna werkte hij als "
|
||||
published_at: '2024-12-07T16:00:43Z'
|
||||
duration: PT2M19S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 209
|
||||
like_count: 6
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/qRaecnHWCco/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: ___RXae9ElY
|
||||
video_url: https://www.youtube.com/watch?v=___RXae9ElY
|
||||
title: Notariële akten - Archiefonderzoek voor Dummies (aflevering 6)
|
||||
description: 'Notariële akten zijn een verborgen goudmijn vol verhalen en details
|
||||
over je voorouders. Ze bieden een blik op het dagelijks leven, op eigendommen,
|
||||
overeenkomsten en relaties van mensen. Er zijn koopakten, testamenten, huwelijkscontracten.
|
||||
In Drenthe zijn lang niet alle notariële akten geïndiceerd. Indiceren betekent:
|
||||
het doorzoekbaar en daarmee beter vindbaar maken van archiefstukken. Maar deze
|
||||
akten zijn wél gedigitaliseerd. Hoe je ze alsnog kunt vinden op onze website,
|
||||
legt publieksadviseur Jo'
|
||||
published_at: '2024-11-27T13:38:59Z'
|
||||
duration: PT7M7S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 160
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/___RXae9ElY/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: 2FMLXFVr5SQ
|
||||
video_url: https://www.youtube.com/watch?v=2FMLXFVr5SQ
|
||||
title: 'Hiddingh: Demonstratie'
|
||||
description: "Op 2 juni 1979 wordt het dorp Gasselte overspoeld door 25.000 demonstranten.\
|
||||
\ Er dreigt dumping van kernafval in de grote zoutkoepel van Gasselte. Het protest\
|
||||
\ in Gasselte was een van de grootste ooit in Drenthe. Wim Hiddingh legde de\
|
||||
\ grote demonstratie in zijn woonplaats vast. \n\nWim Hiddingh (1938-2016) was\
|
||||
\ in het dagelijks leven onderwijzer, maar zijn grootste hobby was film en fotografie.\
|
||||
\ Zo was hij de vaste fotograaf van het Drents maandblad Oeze Volk. Hiddingh\
|
||||
\ legde alles vast wat er dagelijk"
|
||||
published_at: '2024-11-23T16:00:03Z'
|
||||
duration: PT3M58S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 155
|
||||
like_count: 3
|
||||
comment_count: 2
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/2FMLXFVr5SQ/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
videos_count: 25
|
||||
status: SUCCESS
|
||||
|
|
|
|||
|
|
@ -1303,6 +1303,19 @@ provenance:
|
|||
- rating
|
||||
- reviews
|
||||
- opening_hours
|
||||
youtube:
|
||||
- source_type: youtube_data_api
|
||||
fetch_timestamp: '2025-12-01T15:49:06.614842+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
channel_id: UCIOV1are9TDnwNrgr6Yr3tQ
|
||||
claims_extracted:
|
||||
- channel_info
|
||||
- subscriber_count
|
||||
- video_count
|
||||
- view_count
|
||||
- recent_videos
|
||||
- video_comments
|
||||
- video_transcripts
|
||||
data_tier_summary:
|
||||
TIER_1_AUTHORITATIVE:
|
||||
- original_entry (NDE CSV)
|
||||
|
|
@ -1759,3 +1772,259 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:35:23.211895+00:00'
|
||||
youtube_enrichment:
|
||||
source_url: https://www.youtube.com/user/drentsmuseum
|
||||
fetch_timestamp: '2025-12-01T15:49:06.614842+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
api_version: v3
|
||||
identifier_type: username
|
||||
identifier_value: drentsmuseum
|
||||
channel:
|
||||
channel_id: UCIOV1are9TDnwNrgr6Yr3tQ
|
||||
channel_url: https://www.youtube.com/channel/UCIOV1are9TDnwNrgr6Yr3tQ
|
||||
title: drentsmuseum
|
||||
description: Het Drents Museum is bekend om zijn internationale tentoonstellingen
|
||||
over archeologie, kunst 1885-1935, hedendaags realisme en zijn zeer enthousiaste
|
||||
medewerkers.
|
||||
custom_url: '@drentsmuseum'
|
||||
published_at: '2008-09-16T10:01:21Z'
|
||||
country: null
|
||||
default_language: null
|
||||
thumbnail_url: https://yt3.ggpht.com/lxnClZMy4vPi3iccoR9S9SaX4vk_JyQng64QfStqUR0YHPDFOK-xuVniSxfjTEzSPbn2GJt_=s800-c-k-c0x00ffffff-no-rj
|
||||
banner_url: https://yt3.googleusercontent.com/66yI4J2SYIgA_6P3-BQrKeTkx1yjyxeZzeigMWID_ee-jTKhqdfW8CBPy3rIUZwxpWj9NI8KGg
|
||||
subscriber_count: 771
|
||||
video_count: 230
|
||||
view_count: 884235
|
||||
subscriber_count_hidden: false
|
||||
uploads_playlist_id: UUIOV1are9TDnwNrgr6Yr3tQ
|
||||
videos:
|
||||
- video_id: d_1zWCrs-K8
|
||||
video_url: https://www.youtube.com/watch?v=d_1zWCrs-K8
|
||||
title: Microkosmos & Henk Schiffmacher
|
||||
description: 'Henk Schiffmacher verzamelt objecten van over de hele wereld die
|
||||
te maken hebben met tatoeages. Speciaal voor ‘Microkosmos’ koos hij een aantal
|
||||
voorwerpen uit om in het museum te laten zien.
|
||||
|
||||
|
||||
Je bekijkt Schiffmachers collectie en die van de andere verzamelaars nu in ‘Microkosmos''.
|
||||
|
||||
|
||||
Meer weten over Henk Schiffmacher? Op 14 december komt hij naar de Verwonderdag.
|
||||
Tickets zijn verkrijgbaar op onze website.
|
||||
|
||||
|
||||
#Microkosmos #HenkSchiffmacher #tatoeage #tatoeages #museum #tentoonstelling
|
||||
#cultuur #dagje'
|
||||
published_at: '2025-11-21T11:09:50Z'
|
||||
duration: PT44S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 198
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/d_1zWCrs-K8/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: wOrAxNXKFO8
|
||||
video_url: https://www.youtube.com/watch?v=wOrAxNXKFO8
|
||||
title: Stem op Drents Museum voor de VriendenLoterij Museumprijs 2025!
|
||||
description: "Het Drents Museum is genomineerd voor de VriendenLoterij Museumprijs!\
|
||||
\ Wie deze prijs wint, wordt bepaald door middel van stemmen. En breng je je\
|
||||
\ stem uit, dan maak je bovendien kans op hele mooie prijzen! \nBreng je stem\
|
||||
\ uit op: www.drentsmuseum.nl/stem \U0001F64F\U0001F3C6\U0001FA77\n\nIn deze\
|
||||
\ video leggen we uit wat we gaan doen wanneer wij de hoofdprijs winnen. Help\
|
||||
\ jij ons om de hoofdprijs van € 100.000 te winnen en onze droom in vervulling\
|
||||
\ te laten gaan? Alvast bedankt!"
|
||||
published_at: '2025-10-09T07:11:51Z'
|
||||
duration: PT1M
|
||||
definition: hd
|
||||
caption_available: true
|
||||
view_count: 1706
|
||||
like_count: 9
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/wOrAxNXKFO8/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: tnJMlRqEGto
|
||||
video_url: https://www.youtube.com/watch?v=tnJMlRqEGto
|
||||
title: Stem op Drents Museum voor de VriendenLoterij Museumprijs 2025!
|
||||
description: 'Het Drents Museum is genomineerd voor de VriendenLoterij Museumprijs!
|
||||
Help jij ons om de hoofdprijs van € 100.000 te winnen en onze droom in vervulling
|
||||
te laten gaan?
|
||||
|
||||
Breng je stem uit op: www.drentsmuseum.nl/stem 🙏🏆🩷'
|
||||
published_at: '2025-10-09T07:11:56Z'
|
||||
duration: PT11S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 836
|
||||
like_count: 1
|
||||
comment_count: 1
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/tnJMlRqEGto/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
comments:
|
||||
- comment_id: Ugy283RyalMNdLc69DR4AaABAg
|
||||
author_display_name: '@TheSMcBrown'
|
||||
author_channel_url: http://www.youtube.com/@TheSMcBrown
|
||||
text: 🥰
|
||||
like_count: 0
|
||||
published_at: '2025-10-09T18:58:23Z'
|
||||
updated_at: '2025-10-09T18:58:23Z'
|
||||
reply_count: 0
|
||||
comments_fetched: 1
|
||||
- video_id: bdHBoEHnJJQ
|
||||
video_url: https://www.youtube.com/watch?v=bdHBoEHnJJQ
|
||||
title: Stem op Drents Museum voor de VriendenLoterij Museumprijs 2025!
|
||||
description: "Het Drents Museum is genomineerd voor de VriendenLoterij Museumprijs!\
|
||||
\ Wie deze prijs wint, wordt bepaald door middel van stemmen. En breng je je\
|
||||
\ stem uit, dan maak je bovendien kans op hele mooie prijzen! \n\nIn deze video\
|
||||
\ leggen we uit wat we gaan doen wanneer wij de hoofdprijs winnen. Help jij\
|
||||
\ ons om de hoofdprijs van € 100.000 te winnen en onze droom in vervulling te\
|
||||
\ laten gaan? \n\nStem via: https://winnaar.vriendenloterij.nl/museumprijs2025?museumid=57519"
|
||||
published_at: '2025-10-09T07:11:08Z'
|
||||
duration: PT59S
|
||||
definition: hd
|
||||
caption_available: true
|
||||
view_count: 8203
|
||||
like_count: 2
|
||||
comment_count: 1
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/bdHBoEHnJJQ/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
comments:
|
||||
- comment_id: Ugw9_G4VbcgQjNR4i7F4AaABAg
|
||||
author_display_name: '@Kep39584'
|
||||
author_channel_url: http://www.youtube.com/@Kep39584
|
||||
text: Misschien eerst de Helm van Coțofenești terughalen, jullie verdienen zeker
|
||||
geen prijs uitreiking dit jaar, schande.
|
||||
like_count: 1
|
||||
published_at: '2025-10-19T12:36:16Z'
|
||||
updated_at: '2025-10-19T12:36:16Z'
|
||||
reply_count: 0
|
||||
comments_fetched: 1
|
||||
- video_id: zy5dFu42f_w
|
||||
video_url: https://www.youtube.com/watch?v=zy5dFu42f_w
|
||||
title: Het Drents Museum en de Leipziger Schule
|
||||
description: In deze video leggen we uit wat de Leipziger Schule is en wat de
|
||||
relatie is tussen het Drents Museum en de Leipziger Schule.
|
||||
published_at: '2025-09-19T12:46:02Z'
|
||||
duration: PT10M37S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 846
|
||||
like_count: 5
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/zy5dFu42f_w/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: TK8XDe9SIW0
|
||||
video_url: https://www.youtube.com/watch?v=TK8XDe9SIW0
|
||||
title: Prikkelarme rondleiding 'Gen F - 75 jaar figuratieve kunst'
|
||||
description: ''
|
||||
published_at: '2025-09-03T13:33:52Z'
|
||||
duration: PT31M23S
|
||||
definition: hd
|
||||
caption_available: true
|
||||
view_count: 31
|
||||
like_count: 1
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/TK8XDe9SIW0/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
- video_id: viRdUD1V-dg
|
||||
video_url: https://www.youtube.com/watch?v=viRdUD1V-dg
|
||||
title: Microkosmos - De wereld in een Wunderkammer - Drents Museum
|
||||
description: 'In de tentoonstelling Microkosmos – De wereld in een Wunderkammer
|
||||
komen klassieke Wunderkammer-objecten, hedendaagse rariteiten en beeldende kunst
|
||||
samen. Een visueel spektakel waarin de magie van verzamelen tot leven komt.
|
||||
Microkosmos is ook de afscheidstentoonstelling van algemeen directeur Harry
|
||||
Tupan, die alles uit de kast heeft gehaald voor dit grote Wunderkammer-overzicht.
|
||||
|
||||
|
||||
Onder meer tattoo-artiest Henk Schiffmacher, ontdekkingsreiziger Redmond O’Hanlon,
|
||||
schrijver en acteur Ramsey Nasr en '
|
||||
published_at: '2025-08-19T08:08:01Z'
|
||||
duration: PT23S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 6751
|
||||
like_count: 5
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/viRdUD1V-dg/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
- video_id: wYnAjhxv54I
|
||||
video_url: https://www.youtube.com/watch?v=wYnAjhxv54I
|
||||
title: Ontmoet de wereld in Drenthe - Jouw event in het Drents Museum
|
||||
description: Jouw volgende evenement, congres, workshop of teamdag in het Drents
|
||||
Museum? We heten jullie van harte welkom op onze unieke locatie, waar oud en
|
||||
nieuw samengaan in een verrassend gebouw.
|
||||
published_at: '2025-07-30T11:52:42Z'
|
||||
duration: PT54S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 36
|
||||
like_count: 0
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/wYnAjhxv54I/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
- video_id: jj_eTFOyuWY
|
||||
video_url: https://www.youtube.com/watch?v=jj_eTFOyuWY
|
||||
title: Prikkelarme rondleiding door 'Menyala'
|
||||
description: "In deze rondleiding van iets meer dan 1 uur en 8 minuten neemt onze\
|
||||
\ collega je mee door de tentoonstelling 'Menyala - De buitengewone geschiedenis\
|
||||
\ van de Molukkers in Drenthe\". De tentoonstelling was in 2023 te zien in het\
|
||||
\ Drents Museum. \n\nPrikkels zijn in deze rondleiding zo veel mogelijk beperkt.\
|
||||
\ Er zijn dus geen speciale effecten toegepast. De tour is voorzien van voice-over\
|
||||
\ en ondertiteling, maar deze aspecten kunnen indien gewenst uit worden gezet.\n\
|
||||
\nHeb je tips voor ons na het bekijken van "
|
||||
published_at: '2025-07-14T08:15:43Z'
|
||||
duration: PT1H8M40S
|
||||
definition: hd
|
||||
caption_available: true
|
||||
view_count: 52
|
||||
like_count: 0
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/jj_eTFOyuWY/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
- video_id: dVBTIdaxfOc
|
||||
video_url: https://www.youtube.com/watch?v=dVBTIdaxfOc
|
||||
title: Ontdek Drenthe - de app
|
||||
description: "De app 'Ontdek Drenthe' verbindt de collectie van het Drents Museum\
|
||||
\ met bijzondere locaties en andere musea in Drenthe. Met Augmented Reality\
|
||||
\ (AR) beleef je de verhalen over archeologie, kunst en geschiedenis op verschillende\
|
||||
\ locaties.\n \nStap in de voetsporen van de schilders van Drenthe, bezoek het\
|
||||
\ grootste hunebed of ga op zoek naar de vindplaats van het meisje van Yde.\n\
|
||||
\nVindt de app & meer informatie hierover op: https://drentsmuseum.nl/ontdek-drenthe\n\
|
||||
Deze video is gemaakt door DEN Kennisinst"
|
||||
published_at: '2025-07-08T08:22:37Z'
|
||||
duration: PT2M40S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 82
|
||||
like_count: 3
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/dVBTIdaxfOc/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl-NL
|
||||
videos_count: 10
|
||||
status: SUCCESS
|
||||
|
|
|
|||
|
|
@ -222,6 +222,19 @@ provenance:
|
|||
- rating
|
||||
- reviews
|
||||
- opening_hours
|
||||
youtube:
|
||||
- source_type: youtube_data_api
|
||||
fetch_timestamp: '2025-12-01T15:49:09.601504+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
channel_id: null
|
||||
claims_extracted:
|
||||
- channel_info
|
||||
- subscriber_count
|
||||
- video_count
|
||||
- view_count
|
||||
- recent_videos
|
||||
- video_comments
|
||||
- video_transcripts
|
||||
data_tier_summary:
|
||||
TIER_1_AUTHORITATIVE:
|
||||
- original_entry (NDE CSV)
|
||||
|
|
@ -601,3 +614,15 @@ custodian_name:
|
|||
wikidata_id: ''
|
||||
provenance_note: Derived from wikidata_label_nl (web_claims had no valid org_name)
|
||||
extraction_timestamp: '2025-12-01T12:35:23.432573+00:00'
|
||||
youtube_enrichment:
|
||||
source_url: https://www.youtube.com/channel/UCdj5Tn3btqad_ukTOa7YEIQ
|
||||
fetch_timestamp: '2025-12-01T15:49:09.601504+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
api_version: v3
|
||||
identifier_type: channel_id
|
||||
identifier_value: UCdj5Tn3btqad_ukTOa7YEI
|
||||
channel:
|
||||
error: 'Channel not found: UCdj5Tn3btqad_ukTOa7YEI'
|
||||
videos: []
|
||||
videos_count: 0
|
||||
status: SUCCESS
|
||||
|
|
|
|||
|
|
@ -504,3 +504,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:35:26.134796+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Kazemattenmuseum (Stichting Kornwerderzand)
|
||||
platform_url: http://www.kazemattenmuseum.nl
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q2530771
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.103014+00:00'
|
||||
|
|
|
|||
|
|
@ -954,3 +954,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:35:34.024620+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Nationaal Onderduikmuseum
|
||||
platform_url: https://nationaalonderduikmuseum.nl
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q2710899
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.364790+00:00'
|
||||
|
|
|
|||
|
|
@ -816,3 +816,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
extraction_timestamp: '2025-12-01T12:35:34.111039+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Anton Pieck Museum
|
||||
platform_url: http://www.antonpieckmuseum.nl
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q2654815
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.166677+00:00'
|
||||
|
|
|
|||
|
|
@ -375,6 +375,19 @@ provenance:
|
|||
- rating
|
||||
- reviews
|
||||
- opening_hours
|
||||
youtube:
|
||||
- source_type: youtube_data_api
|
||||
fetch_timestamp: '2025-12-01T15:47:38.187552+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
channel_id: UChFRnKtFCgrGix8qCTSqtlA
|
||||
claims_extracted:
|
||||
- channel_info
|
||||
- subscriber_count
|
||||
- video_count
|
||||
- view_count
|
||||
- recent_videos
|
||||
- video_comments
|
||||
- video_transcripts
|
||||
data_tier_summary:
|
||||
TIER_1_AUTHORITATIVE:
|
||||
- original_entry (NDE CSV)
|
||||
|
|
@ -727,3 +740,385 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:35:34.164064+00:00'
|
||||
youtube_enrichment:
|
||||
source_url: https://www.youtube.com/user/TUApeldoorn
|
||||
fetch_timestamp: '2025-12-01T15:47:38.187552+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
api_version: v3
|
||||
identifier_type: username
|
||||
identifier_value: TUApeldoorn
|
||||
channel:
|
||||
channel_id: UChFRnKtFCgrGix8qCTSqtlA
|
||||
channel_url: https://www.youtube.com/channel/UChFRnKtFCgrGix8qCTSqtlA
|
||||
title: TUApeldoorn
|
||||
description: Aan de Theologische Universiteit Apeldoorn wordt een zesjarige studie
|
||||
theologie aangeboden. In de opleiding staat de wetenschappelijke kwaliteit verbonden
|
||||
aan een gedegen beroepsmatige toerusting centraal. Daarbij is er veel aandacht
|
||||
voor persoonlijkheidsvorming van de toekomstige predikanten en andere werkers
|
||||
in en buiten de kerk.
|
||||
custom_url: '@tuapeldoorn'
|
||||
published_at: '2013-10-10T13:14:51Z'
|
||||
country: null
|
||||
default_language: null
|
||||
thumbnail_url: https://yt3.ggpht.com/WT2foUsKADfDmi82g7r_hORC4uZHG6LSLziv8bZfRLpHpCqPdMpMgbjIVTwrcwN-35qnfHPq=s800-c-k-c0x00ffffff-no-rj
|
||||
banner_url: https://yt3.googleusercontent.com/AwxQqNWwgr49lUETkGVrAsMUhT8JsoMgW02fDtnDo3_SlHndVHq8wh57IqOq5CTMqVFrLwLIfQ
|
||||
subscriber_count: 557
|
||||
video_count: 57
|
||||
view_count: 69955
|
||||
subscriber_count_hidden: false
|
||||
uploads_playlist_id: UUhFRnKtFCgrGix8qCTSqtlA
|
||||
videos:
|
||||
- video_id: 0BhkYRSYJfw
|
||||
video_url: https://www.youtube.com/watch?v=0BhkYRSYJfw
|
||||
title: Ulf Grapenthin | 26 november 2025
|
||||
description: 'Volg ons!
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn'
|
||||
published_at: '2025-11-27T01:04:13Z'
|
||||
duration: PT1H38M49S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 172
|
||||
like_count: null
|
||||
comment_count: null
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/0BhkYRSYJfw/hqdefault.jpg
|
||||
default_language: af
|
||||
default_audio_language: nl
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: O3OqanQbkL8
|
||||
video_url: https://www.youtube.com/watch?v=O3OqanQbkL8
|
||||
title: Wat is de HGT? | TUA
|
||||
description: 'Heb je altijd al op academisch niveau iets willen doen met theologie?
|
||||
Of wil je je vakgebied of werkveld verrijken met theologische verdieping? Kies
|
||||
voor de master Herbronning Gereformeerde Theologie! Deze master van 60 EC is
|
||||
laagdrempelig, hedendaags, legt de verbinding met jouw (huidig of toekomstig)
|
||||
beroep en kan in deeltijd gevolgd worden. Colleges zijn alleen op woensdag.
|
||||
|
||||
|
||||
Duik diep in de rijkdom van de gereformeerde theologie!
|
||||
|
||||
|
||||
Meer informatie? Kijk op https://www.tua.nl/nl/onderwijs/o/60e'
|
||||
published_at: '2025-10-01T12:23:55Z'
|
||||
duration: PT2M6S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 127
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/O3OqanQbkL8/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: XyWb2cyJvAs
|
||||
video_url: https://www.youtube.com/watch?v=XyWb2cyJvAs
|
||||
title: Promotion S. Joo
|
||||
description: 'Volg ons!
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn'
|
||||
published_at: '2025-09-09T04:16:35Z'
|
||||
duration: PT1H26M10S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 217
|
||||
like_count: 6
|
||||
comment_count: 0
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/XyWb2cyJvAs/hqdefault.jpg
|
||||
default_language: af
|
||||
default_audio_language: en
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: kgJgJZXJPV8
|
||||
video_url: https://www.youtube.com/watch?v=kgJgJZXJPV8
|
||||
title: Promotion prof. R.S. Goldreich
|
||||
description: 'Volg ons!
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn'
|
||||
published_at: '2025-07-03T02:57:45Z'
|
||||
duration: PT1H29M51S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 189
|
||||
like_count: 1
|
||||
comment_count: 0
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/kgJgJZXJPV8/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: en
|
||||
comments: []
|
||||
comments_fetched: 0
|
||||
- video_id: sCJYoEF1MaM
|
||||
video_url: https://www.youtube.com/watch?v=sCJYoEF1MaM
|
||||
title: Promotie drs. J.J. Steensma | 17 maart 2025
|
||||
description: 'Volg ons!
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn'
|
||||
published_at: '2025-03-17T22:03:59Z'
|
||||
duration: PT1H27M13S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 3019
|
||||
like_count: 30
|
||||
comment_count: 1
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/sCJYoEF1MaM/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
comments:
|
||||
- comment_id: UgylfNuWq8ezGFBOK3l4AaABAg
|
||||
author_display_name: '@jackwestra'
|
||||
author_channel_url: http://www.youtube.com/@jackwestra
|
||||
text: Jaap Jan, van harte gefeliciteerd met dit fantastische resultaat. Het
|
||||
onderzoek is prachtig vormgegeven.. Jammer dat ik er niet bij kon zijn.. de
|
||||
uitslag op het eind heb ik met ontroering gevolgd.. tot binnenkort..
|
||||
like_count: 0
|
||||
published_at: '2025-03-17T23:37:55Z'
|
||||
updated_at: '2025-03-17T23:37:55Z'
|
||||
reply_count: 0
|
||||
comments_fetched: 1
|
||||
- video_id: _fbUYD2VRgU
|
||||
video_url: https://www.youtube.com/watch?v=_fbUYD2VRgU
|
||||
title: Inauguratie van prof. dr. A. Versluis | 7 maart 2025
|
||||
description: 'Op 7 maart 2025 vond de inauguratieplechtigheid van prof. dr. A.
|
||||
Versluis als hoogleraar Oude Testament aan de Theologische Universiteit Apeldoorn
|
||||
plaats.
|
||||
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn'
|
||||
published_at: '2025-03-08T03:30:28Z'
|
||||
duration: PT1H35M36S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 1806
|
||||
like_count: 10
|
||||
comment_count: 1
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/_fbUYD2VRgU/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: AP_1pdV7ryQ
|
||||
video_url: https://www.youtube.com/watch?v=AP_1pdV7ryQ
|
||||
title: Mijn studie theologie bevalt me goed omdat...
|
||||
description: 'Volg ons!
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn
|
||||
|
||||
|
||||
#theologie #TUA #TUApeldoorn #TheologischeUniversiteitApeldoorn #theologiestuderen
|
||||
#studiekeuze #welkestudie'
|
||||
published_at: '2024-12-18T09:47:24Z'
|
||||
duration: PT37S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 285
|
||||
like_count: 1
|
||||
comment_count: 0
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/AP_1pdV7ryQ/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: ICNOmvkLRmU
|
||||
video_url: https://www.youtube.com/watch?v=ICNOmvkLRmU
|
||||
title: Een leuke module tijdens mijn studie theologie vond ik...
|
||||
description: 'Volg ons!
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn
|
||||
|
||||
|
||||
#theologie #TUA #TUApeldoorn #TheologischeUniversiteitApeldoorn #theologiestuderen
|
||||
#studiekeuze #welkestudie'
|
||||
published_at: '2024-12-03T10:42:51Z'
|
||||
duration: PT41S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 404
|
||||
like_count: 5
|
||||
comment_count: 0
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/ICNOmvkLRmU/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: BtrAWku9Yek
|
||||
video_url: https://www.youtube.com/watch?v=BtrAWku9Yek
|
||||
title: 'Denken om te dienen | # 8 Persoonlijk geloof en de studie theologie (met
|
||||
Jantine en Jan Willem)'
|
||||
description: In deze aflevering gaan Charlotte en Niels in gesprek met Jan Willem
|
||||
van Panhuis en Jantine Donker, twee studenten van de Theologische Universiteit
|
||||
Apeldoorn. Ze vertellen in deze aflevering over wat de studie theologie hen
|
||||
gebracht heeft. Er komen mooie ontdekkingen langs die de studie hen bracht,
|
||||
maar theologie studeren ook een confronterende kant. Theologie studeren gaat
|
||||
namelijk ook over jezelf als aankomend theoloog. Het is een open en eerlijk
|
||||
gesprek waarin twijfels en hoop ter sprake kome
|
||||
published_at: '2024-11-28T06:00:13Z'
|
||||
duration: PT36M7S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 492
|
||||
like_count: 2
|
||||
comment_count: 0
|
||||
tags: []
|
||||
thumbnail_url: https://i.ytimg.com/vi/BtrAWku9Yek/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
- video_id: n6l4O92qgN0
|
||||
video_url: https://www.youtube.com/watch?v=n6l4O92qgN0
|
||||
title: Promotie G.M. Bosker
|
||||
description: 'Volg ons!
|
||||
|
||||
https://www.tua.nl
|
||||
|
||||
https://www.instagram.com/tuapeldoorn/
|
||||
|
||||
https://www.facebook.com/TUApeldoorn/
|
||||
|
||||
https://twitter.com/TUApeldoorn'
|
||||
published_at: '2024-11-23T03:30:13Z'
|
||||
duration: PT1H27M11S
|
||||
definition: hd
|
||||
caption_available: false
|
||||
view_count: 1540
|
||||
like_count: 5
|
||||
comment_count: 1
|
||||
tags:
|
||||
- TUA
|
||||
- Master Theology
|
||||
- Theology Netherlands
|
||||
- Theology
|
||||
- Theologie studeren
|
||||
- Studying Theolgy
|
||||
- Theology Apeldoorn
|
||||
- Theological University
|
||||
- Theologische Universiteit
|
||||
- Apeldoorn Universiteit
|
||||
thumbnail_url: https://i.ytimg.com/vi/n6l4O92qgN0/hqdefault.jpg
|
||||
default_language: nl
|
||||
default_audio_language: nl
|
||||
videos_count: 10
|
||||
status: SUCCESS
|
||||
|
|
|
|||
|
|
@ -1413,3 +1413,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 60
|
||||
extraction_timestamp: '2025-12-01T12:35:34.321297+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Erfgoed Gelderland
|
||||
platform_url: https://erfgoedgelderland.nl/
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q69725772
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.925471+00:00'
|
||||
|
|
|
|||
|
|
@ -942,3 +942,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
extraction_timestamp: '2025-12-01T12:35:34.731670+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Erfgoedcentrum Achterhoek en Liemers
|
||||
platform_url: https://www.ecal.nu
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q3448774
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.454236+00:00'
|
||||
|
|
|
|||
|
|
@ -1054,3 +1054,20 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:35:34.854539+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Flipje & Streekmuseum
|
||||
platform_url: http://www.streekmuseumtiel.nl
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q13636575
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.571571+00:00'
|
||||
- platform_name: Flipje & Streekmuseum Twitter/X
|
||||
platform_url: https://twitter.com/Flipjemuseum
|
||||
platform_type: SOCIAL_MEDIA_TWITTER
|
||||
provenance:
|
||||
source_type: wikidata_p2002
|
||||
wikidata_id: Q13636575
|
||||
data_tier: TIER_2_VERIFIED
|
||||
|
|
|
|||
|
|
@ -1194,3 +1194,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:36:45.132014+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Kasteel Amerongen
|
||||
platform_url: https://www.kasteelamerongen.nl/
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q572269
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:18.875296+00:00'
|
||||
|
|
|
|||
|
|
@ -713,3 +713,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
extraction_timestamp: '2025-12-01T12:36:47.207621+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Museum IJsselstein
|
||||
platform_url: http://www.museumijsselstein.nl
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q28058453
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.670308+00:00'
|
||||
|
|
|
|||
|
|
@ -740,3 +740,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
extraction_timestamp: '2025-12-01T12:36:47.509287+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Museum Oud Amelisweerd
|
||||
platform_url: http://www.moa.nl/
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q28956940
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.790759+00:00'
|
||||
|
|
|
|||
|
|
@ -1102,3 +1102,41 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:36:47.676254+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Museum Speelklok
|
||||
platform_url: https://www.museumspeelklok.nl/
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q1624224
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:18.984317+00:00'
|
||||
- platform_name: Museum Speelklok Facebook
|
||||
platform_url: https://www.facebook.com/MuseumSpeelklok
|
||||
platform_type: SOCIAL_MEDIA_FACEBOOK
|
||||
provenance:
|
||||
source_type: wikidata_p2013
|
||||
wikidata_id: Q1624224
|
||||
data_tier: TIER_2_VERIFIED
|
||||
- platform_name: Museum Speelklok Twitter/X
|
||||
platform_url: https://twitter.com/museumspeelklok
|
||||
platform_type: SOCIAL_MEDIA_TWITTER
|
||||
provenance:
|
||||
source_type: wikidata_p2002
|
||||
wikidata_id: Q1624224
|
||||
data_tier: TIER_2_VERIFIED
|
||||
- platform_name: Museum Speelklok Instagram
|
||||
platform_url: https://www.instagram.com/museumspeelklok
|
||||
platform_type: SOCIAL_MEDIA_INSTAGRAM
|
||||
provenance:
|
||||
source_type: wikidata_p2003
|
||||
wikidata_id: Q1624224
|
||||
data_tier: TIER_2_VERIFIED
|
||||
- platform_name: Museum Speelklok YouTube
|
||||
platform_url: https://www.youtube.com/channel/UCXv_fLzzL7UBnZtRG3A2QLw
|
||||
platform_type: SOCIAL_MEDIA_YOUTUBE
|
||||
provenance:
|
||||
source_type: wikidata_p2397
|
||||
wikidata_id: Q1624224
|
||||
data_tier: TIER_2_VERIFIED
|
||||
|
|
|
|||
|
|
@ -553,3 +553,13 @@ custodian_name:
|
|||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
extraction_timestamp: '2025-12-01T12:36:47.877408+00:00'
|
||||
digital_platforms:
|
||||
- platform_name: Museum Warsenhoeck
|
||||
platform_url: https://www.museumwarsenhoeck.nl
|
||||
platform_type: OFFICIAL_WEBSITE
|
||||
provenance:
|
||||
source_type: wikidata_p856
|
||||
wikidata_id: Q2361897
|
||||
wikidata_property: P856
|
||||
data_tier: TIER_2_VERIFIED
|
||||
discovery_timestamp: '2025-12-01T15:11:19.061600+00:00'
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7f45-96a5-c14ef7c70861
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7f45-96a5-c14ef7c70861
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.134408+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJO6dE4bgJxkcRBil_dsYIKsY
|
||||
name: Huygens Institute
|
||||
|
|
@ -263,3 +264,4 @@ web_claims:
|
|||
xpath_match_score: 1.0
|
||||
extraction_method: mailto_link
|
||||
extraction_timestamp: '2025-12-01T12:34:07.295167+00:00'
|
||||
url: http://www.huygens.knaw.nl/
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7a4e-85df-c78e1e224ec7
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7a4e-85df-c78e1e224ec7
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.153024+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJO4j9Qb8JxkcRBebeeuFWxDc
|
||||
name: Meertens Instituut
|
||||
|
|
@ -157,3 +158,4 @@ custodian_name:
|
|||
provenance_note: Derived from original_entry.organisatie (no valid web_claims or
|
||||
wikidata)
|
||||
extraction_timestamp: '2025-12-01T12:37:14.913003+00:00'
|
||||
url: http://www.meertens.knaw.nl/
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7fee-97e6-2dd6367d0c98
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7fee-97e6-2dd6367d0c98
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.168401+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJMYyNRHekx0cR3u8FZYrhex4
|
||||
name: Kenniscentrum Papier en Karton
|
||||
|
|
@ -149,3 +150,9 @@ custodian_name:
|
|||
provenance_note: Derived from original_entry.organisatie (no valid web_claims or
|
||||
wikidata)
|
||||
extraction_timestamp: '2025-12-01T12:37:14.922732+00:00'
|
||||
url: https://papierknippen.nl/
|
||||
url_discovery:
|
||||
method: web_search
|
||||
search_tool: exa
|
||||
discovery_date: '2025-12-01T15:30:00+00:00'
|
||||
notes: Website discovered via Exa web search - organization dedicated to paper cutting art (papierknipkunst)
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-75a0-a861-debd8f866f11
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-75a0-a861-debd8f866f11
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.194067+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJxQAWkyxyxkcRPllQjy0uHy0
|
||||
name: Regionaal Historisch Centrum Vecht en Venen
|
||||
|
|
@ -317,3 +318,4 @@ web_claims:
|
|||
xpath_match_score: 0.9
|
||||
extraction_method: h1_tag
|
||||
extraction_timestamp: '2025-12-01T12:34:07.372483+00:00'
|
||||
url: http://www.rhcvechtenvenen.nl/
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7d56-9377-69ff0170e5d4
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7d56-9377-69ff0170e5d4
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.216400+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJ6XJ4URG3xUcRpa4hALvApIs
|
||||
name: Sociaal en Cultureel Planbureau
|
||||
|
|
@ -212,3 +213,4 @@ web_claims:
|
|||
xpath_match_score: 1.0
|
||||
extraction_method: social_link
|
||||
extraction_timestamp: '2025-12-01T12:34:07.453311+00:00'
|
||||
url: http://www.scp.nl/
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7cf7-894a-20bd54d3721e
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7cf7-894a-20bd54d3721e
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.233585+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJO5TyCABrxkcRK4V-8gF-KQc
|
||||
name: Hilversumse Historische Kring Albertus Perk
|
||||
|
|
@ -215,3 +216,4 @@ web_claims:
|
|||
xpath_match_score: 1.0
|
||||
extraction_method: mailto_link
|
||||
extraction_timestamp: '2025-12-01T12:34:07.491709+00:00'
|
||||
url: https://albertusperk.nl/
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ original_entry:
|
|||
type_organisatie: unknown
|
||||
source: nan_isil_2025-11-06
|
||||
type:
|
||||
- U
|
||||
- A
|
||||
entry_index: 1508
|
||||
processing_timestamp: '2025-11-30T09:58:13.945408+00:00'
|
||||
nan_isil_enrichment:
|
||||
|
|
@ -28,19 +28,19 @@ identifiers:
|
|||
assigned_date: '2025-07-24'
|
||||
source: Nationaal Archief ISIL Registry 2025-11-06
|
||||
- identifier_scheme: GHCID
|
||||
identifier_value: NL-OV-KAM-U-C
|
||||
identifier_value: NL-OV-KAM-A-PK
|
||||
- identifier_scheme: GHCID_UUID
|
||||
identifier_value: f7cfb354-1aa0-5526-89ae-2133533c3a5c
|
||||
identifier_url: urn:uuid:f7cfb354-1aa0-5526-89ae-2133533c3a5c
|
||||
identifier_value: 385940ba-7a26-5e47-b457-575d05f0e9ba
|
||||
identifier_url: urn:uuid:385940ba-7a26-5e47-b457-575d05f0e9ba
|
||||
- identifier_scheme: GHCID_UUID_SHA256
|
||||
identifier_value: b49586e3-6da4-8827-b0c5-ec46e9c08eae
|
||||
identifier_url: urn:uuid:b49586e3-6da4-8827-b0c5-ec46e9c08eae
|
||||
identifier_value: 0a2c1b48-6811-8096-bc8a-ad9f14065c47
|
||||
identifier_url: urn:uuid:0a2c1b48-6811-8096-bc8a-ad9f14065c47
|
||||
- identifier_scheme: GHCID_NUMERIC
|
||||
identifier_value: '13012455009712130087'
|
||||
identifier_value: '732990837152104598'
|
||||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7957-9232-b3b47d8ac543
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7957-9232-b3b47d8ac543
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.252844+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJEXoq5vJ5yEcRh0suJoCk3v4
|
||||
name: Stadsarchief Kampen
|
||||
|
|
@ -159,19 +160,24 @@ google_maps_enrichment:
|
|||
google_maps_status: SUCCESS
|
||||
google_maps_search_query: Parochiearchief Kampen (PAK), Kampen, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-OV-KAM-U-C
|
||||
ghcid_current: NL-OV-KAM-A-PK
|
||||
ghcid_original: NL-OV-KAM-U-C
|
||||
ghcid_uuid: f7cfb354-1aa0-5526-89ae-2133533c3a5c
|
||||
ghcid_uuid_sha256: b49586e3-6da4-8827-b0c5-ec46e9c08eae
|
||||
ghcid_numeric: 13012455009712130087
|
||||
ghcid_uuid: 385940ba-7a26-5e47-b457-575d05f0e9ba
|
||||
ghcid_uuid_sha256: 0a2c1b48-6811-8096-bc8a-ad9f14065c47
|
||||
ghcid_numeric: 732990837152104598
|
||||
record_id: 019ad9ec-7cc0-7957-9232-b3b47d8ac543
|
||||
generation_timestamp: '2025-12-01T12:38:04.703815+00:00'
|
||||
generation_timestamp: '2025-12-01T16:00:00+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-OV-KAM-A-PK
|
||||
ghcid_numeric: 732990837152104598
|
||||
valid_from: '2025-12-01T16:00:00+00:00'
|
||||
valid_to: null
|
||||
reason: Corrected GHCID - fixed abbreviation (PK from Parochiearchief Kampen) and institution type (A=Archive, not U=Unknown)
|
||||
- ghcid: NL-OV-KAM-U-C
|
||||
ghcid_numeric: 13012455009712130087
|
||||
valid_from: '2025-12-01T12:38:04.703815+00:00'
|
||||
valid_to: null
|
||||
reason: Initial GHCID assignment (NDE batch import December 2025)
|
||||
valid_to: '2025-12-01T16:00:00+00:00'
|
||||
reason: Initial GHCID assignment (NDE batch import December 2025) - incorrect abbreviation from corrupted custodian_name extraction
|
||||
location_resolution:
|
||||
method: REVERSE_GEOCODE
|
||||
geonames_id: 2753106
|
||||
|
|
@ -188,17 +194,11 @@ ghcid:
|
|||
geonames_id: 2753106
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: Cookiesbeleid
|
||||
raw_value: Cookiesbeleid
|
||||
source_url: https://www.stadsarchiefkampen.nl/
|
||||
retrieved_on: ''
|
||||
xpath: /html/body/div[4]/div/div/div[1]/div[1]/h1
|
||||
html_file: web/1508/stadsarchiefkampen.nl/rendered.html
|
||||
xpath_match_score: 0.9
|
||||
extraction_method: h1_tag
|
||||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
claim_value: Parochiearchief Kampen
|
||||
source: original_entry
|
||||
provenance_note: Derived from NAN ISIL Registry (authoritative). Note - Google Maps found Stadsarchief Kampen which is the city archive housing the parish archive collection. The original H1 extraction captured "Cookiesbeleid" (cookie policy popup).
|
||||
extraction_timestamp: '2025-12-01T12:37:14.987916+00:00'
|
||||
correction_note: Manual correction on 2025-12-01. Original web scraping captured cookie popup H1 tag. Reverted to NAN ISIL authoritative name. Parochiearchief Kampen (PAK) is a distinct collection within Stadsarchief Kampen.
|
||||
web_enrichment:
|
||||
web_archives:
|
||||
- url: https://www.stadsarchiefkampen.nl/
|
||||
|
|
@ -249,3 +249,4 @@ web_claims:
|
|||
xpath_match_score: 0.9
|
||||
extraction_method: h1_tag
|
||||
extraction_timestamp: '2025-12-01T12:34:07.524187+00:00'
|
||||
url: https://www.stadsarchiefkampen.nl/
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7b68-86b5-0b9e194ec0f9
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7b68-86b5-0b9e194ec0f9
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.281697+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJZTGcI7Ebg44Rk4p-Ea5vkVI
|
||||
name: Post En Archief
|
||||
|
|
@ -251,3 +252,4 @@ web_claims:
|
|||
xpath_match_score: 1.0
|
||||
extraction_method: social_link
|
||||
extraction_timestamp: '2025-12-01T12:34:07.560668+00:00'
|
||||
url: http://www.bonairegov.com/
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-727e-a3cc-d8e60a6af3ca
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-727e-a3cc-d8e60a6af3ca
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.307521+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJo2Gq7_HGxUcRkYE-4iAJA1I
|
||||
name: Royal Netherlands Institute of Southeast Asian and Caribbean Studies (KITLV-KNAW)
|
||||
|
|
@ -297,3 +298,4 @@ web_claims:
|
|||
xpath_match_score: 0.9
|
||||
extraction_method: h1_tag
|
||||
extraction_timestamp: '2025-12-01T12:34:07.598934+00:00'
|
||||
url: http://www.kitlv.nl/
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ original_entry:
|
|||
type_organisatie: unknown
|
||||
source: nan_isil_2025-11-06
|
||||
type:
|
||||
- U
|
||||
- M
|
||||
entry_index: 1511
|
||||
processing_timestamp: '2025-11-30T09:58:13.945408+00:00'
|
||||
nan_isil_enrichment:
|
||||
|
|
@ -28,19 +28,19 @@ identifiers:
|
|||
assigned_date: '2022-02-18'
|
||||
source: Nationaal Archief ISIL Registry 2025-11-06
|
||||
- identifier_scheme: GHCID
|
||||
identifier_value: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling
|
||||
identifier_value: NL-ZH-LEI-M-WL
|
||||
- identifier_scheme: GHCID_UUID
|
||||
identifier_value: 8b45d9cc-18cc-5d47-99df-27937ed88f95
|
||||
identifier_url: urn:uuid:8b45d9cc-18cc-5d47-99df-27937ed88f95
|
||||
identifier_value: 0b9fef4d-bd59-5cc0-9fe8-1434459dd2a9
|
||||
identifier_url: urn:uuid:0b9fef4d-bd59-5cc0-9fe8-1434459dd2a9
|
||||
- identifier_scheme: GHCID_UUID_SHA256
|
||||
identifier_value: 7eb2d4ab-da89-8ce7-a6b1-5102913526d4
|
||||
identifier_url: urn:uuid:7eb2d4ab-da89-8ce7-a6b1-5102913526d4
|
||||
identifier_value: 79a7aa1f-ecff-86b6-9b68-335c176b4cc7
|
||||
identifier_url: urn:uuid:79a7aa1f-ecff-86b6-9b68-335c176b4cc7
|
||||
- identifier_scheme: GHCID_NUMERIC
|
||||
identifier_value: '9129593229204327655'
|
||||
identifier_value: '8766162253796816566'
|
||||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-71b1-b104-3fb4d93374a5
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-71b1-b104-3fb4d93374a5
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.332093+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJA3e1oO7GxUcR0inKzc4IoYM
|
||||
name: Wereldmuseum Leiden
|
||||
|
|
@ -180,20 +181,24 @@ google_maps_status: SUCCESS
|
|||
google_maps_search_query: Stichting Nationaal Museum voor Wereldculturen (NMvW), Leiden,
|
||||
Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling
|
||||
ghcid_current: NL-ZH-LEI-M-WL
|
||||
ghcid_original: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling
|
||||
ghcid_uuid: 8b45d9cc-18cc-5d47-99df-27937ed88f95
|
||||
ghcid_uuid_sha256: 7eb2d4ab-da89-8ce7-a6b1-5102913526d4
|
||||
ghcid_numeric: 9129593229204327655
|
||||
ghcid_uuid: 0b9fef4d-bd59-5cc0-9fe8-1434459dd2a9
|
||||
ghcid_uuid_sha256: 79a7aa1f-ecff-86b6-9b68-335c176b4cc7
|
||||
ghcid_numeric: 8766162253796816566
|
||||
record_id: 019ad9ec-7cc0-71b1-b104-3fb4d93374a5
|
||||
generation_timestamp: '2025-12-01T12:38:04.703815+00:00'
|
||||
generation_timestamp: '2025-12-01T16:00:00+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-ZH-LEI-M-WL
|
||||
ghcid_numeric: 8766162253796816566
|
||||
valid_from: '2025-12-01T16:00:00+00:00'
|
||||
valid_to: null
|
||||
reason: Corrected GHCID - fixed abbreviation (WL from Wereldmuseum Leiden) and institution type (M=Museum). Original was incorrectly based on temporary exhibition title.
|
||||
- ghcid: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling
|
||||
ghcid_numeric: 9129593229204327655
|
||||
valid_from: '2025-12-01T12:38:04.703815+00:00'
|
||||
valid_to: null
|
||||
reason: Initial GHCID assignment (NDE batch import December 2025) - name suffix
|
||||
added to resolve collision
|
||||
valid_to: '2025-12-01T16:00:00+00:00'
|
||||
reason: Initial GHCID assignment (NDE batch import December 2025) - incorrectly based on temporary exhibition H1 tag instead of institution name
|
||||
location_resolution:
|
||||
method: REVERSE_GEOCODE
|
||||
geonames_id: 2751773
|
||||
|
|
@ -208,21 +213,20 @@ ghcid:
|
|||
source: google_maps
|
||||
distance_km: 1.2931170039476427
|
||||
geonames_id: 2751773
|
||||
collision_resolved: true
|
||||
base_ghcid_before_collision: NL-ZH-LEI-U-PST
|
||||
custodian_name:
|
||||
claim_type: custodian_name
|
||||
claim_value: 'K-pop: A Snapshot tentoonstelling'
|
||||
raw_value: 'K-pop: A Snapshot tentoonstelling'
|
||||
claim_value: Wereldmuseum Leiden
|
||||
raw_value: Wereldmuseum Leiden | Eén museum, drie locaties. Welkom bij Wereldmuseum Leiden
|
||||
source_url: https://leiden.wereldmuseum.nl/
|
||||
retrieved_on: ''
|
||||
xpath: /html/body/div[6]/div[2]/main/section/div/div/section/div/div[1]/article/div/div[1]/div/div/article/div/div/div/h1
|
||||
xpath: /html/head/title
|
||||
html_file: web/1511/leiden.wereldmuseum.nl/rendered.html
|
||||
xpath_match_score: 0.9
|
||||
extraction_method: h1_tag
|
||||
selection_method: priority_ranking
|
||||
selection_priority: 70
|
||||
xpath_match_score: 1.0
|
||||
extraction_method: title_tag
|
||||
selection_method: manual_correction
|
||||
selection_priority: 100
|
||||
extraction_timestamp: '2025-12-01T12:37:15.049203+00:00'
|
||||
correction_note: Manual correction - original H1 extraction captured temporary exhibition title "K-pop A Snapshot" instead of institution name. Corrected to use title tag which contains proper institution name.
|
||||
web_enrichment:
|
||||
web_archives:
|
||||
- url: https://leiden.wereldmuseum.nl/
|
||||
|
|
@ -306,3 +310,4 @@ web_claims:
|
|||
xpath_match_score: 0.9
|
||||
extraction_method: h1_tag
|
||||
extraction_timestamp: '2025-12-01T12:34:07.636557+00:00'
|
||||
url: https://leiden.wereldmuseum.nl/
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ original_entry:
|
|||
type_organisatie: unknown
|
||||
source: nan_isil_2025-11-06
|
||||
type:
|
||||
- U
|
||||
- H
|
||||
entry_index: 1512
|
||||
processing_timestamp: '2025-11-30T09:58:13.945408+00:00'
|
||||
nan_isil_enrichment:
|
||||
|
|
@ -28,19 +28,19 @@ identifiers:
|
|||
assigned_date: '2025-09-18'
|
||||
source: Nationaal Archief ISIL Registry 2025-11-06
|
||||
- identifier_scheme: GHCID
|
||||
identifier_value: NL-LI-ROE-U-DCKK
|
||||
identifier_value: NL-LI-ROE-H-DCKK
|
||||
- identifier_scheme: GHCID_UUID
|
||||
identifier_value: d6da362f-6439-5668-8312-bf17c0463b9a
|
||||
identifier_url: urn:uuid:d6da362f-6439-5668-8312-bf17c0463b9a
|
||||
identifier_value: 302e6b73-a57c-51d1-8083-b5c67113174e
|
||||
identifier_url: urn:uuid:302e6b73-a57c-51d1-8083-b5c67113174e
|
||||
- identifier_scheme: GHCID_UUID_SHA256
|
||||
identifier_value: 39693277-3d92-8be7-8fc4-518e0672a7c8
|
||||
identifier_url: urn:uuid:39693277-3d92-8be7-8fc4-518e0672a7c8
|
||||
identifier_value: 1ce71168-1bfb-8a6f-952c-055f1d76c3d6
|
||||
identifier_url: urn:uuid:1ce71168-1bfb-8a6f-952c-055f1d76c3d6
|
||||
- identifier_scheme: GHCID_NUMERIC
|
||||
identifier_value: '4136893220431993831'
|
||||
identifier_value: '2082652491525859951'
|
||||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-7bac-92ca-665cb4af328b
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-7bac-92ca-665cb4af328b
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.350060+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJV4kHaExLx0cRWd7LsD09qBA
|
||||
name: Bisdom Roermond
|
||||
|
|
@ -115,18 +116,23 @@ google_maps_enrichment:
|
|||
google_maps_status: SUCCESS
|
||||
google_maps_search_query: Diocesane Commissie Kerkelijk Kunstbezit, Roermond, Netherlands
|
||||
ghcid:
|
||||
ghcid_current: NL-LI-ROE-U-DCKK
|
||||
ghcid_current: NL-LI-ROE-H-DCKK
|
||||
ghcid_original: NL-LI-ROE-U-DCKK
|
||||
ghcid_uuid: d6da362f-6439-5668-8312-bf17c0463b9a
|
||||
ghcid_uuid_sha256: 39693277-3d92-8be7-8fc4-518e0672a7c8
|
||||
ghcid_numeric: 4136893220431993831
|
||||
ghcid_uuid: 302e6b73-a57c-51d1-8083-b5c67113174e
|
||||
ghcid_uuid_sha256: 1ce71168-1bfb-8a6f-952c-055f1d76c3d6
|
||||
ghcid_numeric: 2082652491525859951
|
||||
record_id: 019ad9ec-7cc0-7bac-92ca-665cb4af328b
|
||||
generation_timestamp: '2025-12-01T12:38:04.703815+00:00'
|
||||
generation_timestamp: '2025-12-01T16:00:00+00:00'
|
||||
ghcid_history:
|
||||
- ghcid: NL-LI-ROE-H-DCKK
|
||||
ghcid_numeric: 2082652491525859951
|
||||
valid_from: '2025-12-01T16:00:00+00:00'
|
||||
valid_to: null
|
||||
reason: Corrected institution type to H=Holy Sites (diocesan commission managing church art collections)
|
||||
- ghcid: NL-LI-ROE-U-DCKK
|
||||
ghcid_numeric: 4136893220431993831
|
||||
valid_from: '2025-12-01T12:38:04.703815+00:00'
|
||||
valid_to: null
|
||||
valid_to: '2025-12-01T16:00:00+00:00'
|
||||
reason: Initial GHCID assignment (NDE batch import December 2025)
|
||||
location_resolution:
|
||||
method: REVERSE_GEOCODE
|
||||
|
|
@ -149,3 +155,9 @@ custodian_name:
|
|||
provenance_note: Derived from original_entry.organisatie (no valid web_claims or
|
||||
wikidata)
|
||||
extraction_timestamp: '2025-12-01T12:37:15.067001+00:00'
|
||||
parent_organization:
|
||||
name: Bisdom Roermond
|
||||
url: https://bisdom-roermond.org/
|
||||
google_maps_place_id: ChIJV4kHaExLx0cRWd7LsD09qBA
|
||||
notes: The Diocesane Commissie Kerkelijk Kunstbezit operates under the Diocese of Roermond. It manages and preserves ecclesiastical art collections for parishes in the diocese. No dedicated website - operates through the diocese's organizational structure.
|
||||
discovery_date: '2025-12-01T16:00:00+00:00'
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ identifiers:
|
|||
- identifier_scheme: RECORD_ID
|
||||
identifier_value: 019ad9ec-7cc0-700e-b28b-eca8c0eed1e6
|
||||
identifier_url: urn:uuid:019ad9ec-7cc0-700e-b28b-eca8c0eed1e6
|
||||
enrichment_status: needs_enrichment
|
||||
enrichment_status: enriched
|
||||
provenance:
|
||||
schema_version: 1.0.0
|
||||
generated_at: '2025-11-30T09:58:13.945408+00:00'
|
||||
|
|
@ -60,6 +60,7 @@ provenance:
|
|||
TIER_4_INFERRED: []
|
||||
notes:
|
||||
- Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment
|
||||
- Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.366271+00:00
|
||||
google_maps_enrichment:
|
||||
place_id: ChIJa55zGyzJxUcRz1Jv8H6AzmQ
|
||||
name: Hist.Genootsch. Oud Soetermeer
|
||||
|
|
@ -209,3 +210,4 @@ web_claims:
|
|||
xpath_match_score: 0.9
|
||||
extraction_method: h1_tag
|
||||
extraction_timestamp: '2025-12-01T12:34:07.695433+00:00'
|
||||
url: http://www.oudsoetermeer.nl/
|
||||
|
|
|
|||
238
docs/YOUTUBE_ENRICHMENT.md
Normal file
238
docs/YOUTUBE_ENRICHMENT.md
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
# YouTube Enrichment for Heritage Custodians
|
||||
|
||||
This document explains how to enrich heritage custodian entries with YouTube channel and video data.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### 1. Get a YouTube API Key
|
||||
|
||||
1. **Go to Google Cloud Console**
|
||||
- Visit: https://console.cloud.google.com/
|
||||
|
||||
2. **Create or Select a Project**
|
||||
- Click on the project dropdown at the top
|
||||
- Click "New Project" or select an existing one
|
||||
- Name it something like "GLAM YouTube Enrichment"
|
||||
|
||||
3. **Enable YouTube Data API v3**
|
||||
- Navigate to "APIs & Services" → "Library"
|
||||
- Search for "YouTube Data API v3"
|
||||
- Click on it and press **Enable**
|
||||
|
||||
4. **Create API Credentials**
|
||||
- Go to "APIs & Services" → "Credentials"
|
||||
- Click "Create Credentials" → "API Key"
|
||||
- Copy the generated API key
|
||||
|
||||
5. **Restrict the API Key (Recommended)**
|
||||
- Click on your new API key to edit it
|
||||
- Under "API restrictions", select "Restrict key"
|
||||
- Select only "YouTube Data API v3"
|
||||
- Click Save
|
||||
|
||||
### 2. Set Environment Variable
|
||||
|
||||
```bash
|
||||
export YOUTUBE_API_KEY='your-api-key-here'
|
||||
```
|
||||
|
||||
Or add to your `.env` file:
|
||||
```
|
||||
YOUTUBE_API_KEY=your-api-key-here
|
||||
```
|
||||
|
||||
### 3. Install Dependencies
|
||||
|
||||
```bash
|
||||
pip install httpx pyyaml
|
||||
|
||||
# For transcript extraction (optional but recommended)
|
||||
brew install yt-dlp # macOS
|
||||
# or
|
||||
pip install yt-dlp
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Process all entries with YouTube URLs
|
||||
python scripts/enrich_youtube.py
|
||||
|
||||
# Dry run (show what would be done)
|
||||
python scripts/enrich_youtube.py --dry-run
|
||||
|
||||
# Process only first 10 entries
|
||||
python scripts/enrich_youtube.py --limit 10
|
||||
|
||||
# Process a specific entry
|
||||
python scripts/enrich_youtube.py --entry 0146_Q1663974.yaml
|
||||
```
|
||||
|
||||
### Example Output
|
||||
|
||||
```
|
||||
Processing: 0146_Q1663974.yaml
|
||||
Found YouTube URL: https://www.youtube.com/user/TUApeldoorn
|
||||
Fetching channel info for UCxxxxx...
|
||||
Fetching 10 recent videos...
|
||||
Fetching comments for top videos...
|
||||
Fetching transcripts for videos with captions...
|
||||
Status: SUCCESS
|
||||
Channel: Theologische Universiteit Apeldoorn
|
||||
Subscribers: 1,234
|
||||
Videos fetched: 10
|
||||
```
|
||||
|
||||
## Data Collected
|
||||
|
||||
### Channel Information
|
||||
- Channel ID and URL
|
||||
- Channel title and description
|
||||
- Custom URL (e.g., @channelname)
|
||||
- Subscriber count
|
||||
- Total video count
|
||||
- Total view count
|
||||
- Channel creation date
|
||||
- Country
|
||||
- Thumbnail and banner images
|
||||
|
||||
### Video Information (per video)
|
||||
- Video ID and URL
|
||||
- Title and description
|
||||
- Published date
|
||||
- Duration
|
||||
- View count
|
||||
- Like count
|
||||
- Comment count
|
||||
- Tags
|
||||
- Thumbnail
|
||||
- Caption availability
|
||||
- Default language
|
||||
|
||||
### Comments (per video)
|
||||
- Comment ID
|
||||
- Author name and channel URL
|
||||
- Comment text
|
||||
- Like count
|
||||
- Reply count
|
||||
- Published date
|
||||
|
||||
### Transcripts (when available)
|
||||
- Full transcript text
|
||||
- Language
|
||||
- Transcript type (manual or auto-generated)
|
||||
|
||||
## Provenance Tracking
|
||||
|
||||
All extracted data includes full provenance:
|
||||
|
||||
```yaml
|
||||
youtube_enrichment:
|
||||
source_url: https://www.youtube.com/user/TUApeldoorn
|
||||
fetch_timestamp: '2025-12-01T15:30:00+00:00'
|
||||
api_endpoint: https://www.googleapis.com/youtube/v3
|
||||
api_version: v3
|
||||
status: SUCCESS
|
||||
channel:
|
||||
channel_id: UCxxxxxxxxxxxxx
|
||||
channel_url: https://www.youtube.com/channel/UCxxxxxxxxxxxxx
|
||||
title: Theologische Universiteit Apeldoorn
|
||||
subscriber_count: 1234
|
||||
# ... more fields
|
||||
videos:
|
||||
- video_id: abc123xyz
|
||||
video_url: https://www.youtube.com/watch?v=abc123xyz
|
||||
title: Video Title
|
||||
view_count: 5678
|
||||
comments:
|
||||
- comment_id: xyz789
|
||||
text: Great video!
|
||||
like_count: 5
|
||||
transcript:
|
||||
transcript_text: "Full video transcript..."
|
||||
language: nl
|
||||
transcript_type: auto
|
||||
```
|
||||
|
||||
## API Quota
|
||||
|
||||
YouTube Data API has a daily quota of **10,000 units**:
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Channel info | 1 unit |
|
||||
| Video list | 1 unit |
|
||||
| Video details | 1 unit per video |
|
||||
| Comments | 1 unit per 100 comments |
|
||||
| Search | 100 units |
|
||||
|
||||
**Estimated usage per custodian**: 15-50 units (depending on videos/comments)
|
||||
|
||||
For 100 custodians: ~1,500-5,000 units (well within daily quota)
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "API key not valid"
|
||||
- Check that the API key is correct
|
||||
- Verify YouTube Data API v3 is enabled
|
||||
- Check that the key isn't restricted to wrong APIs
|
||||
|
||||
### "Quota exceeded"
|
||||
- Wait until the next day (quota resets at midnight Pacific Time)
|
||||
- Or request a quota increase in Google Cloud Console
|
||||
|
||||
### "Channel not found"
|
||||
- The channel may have been deleted
|
||||
- The URL format may not be recognized
|
||||
- Try using the channel ID directly
|
||||
|
||||
### "Comments disabled"
|
||||
- Some videos have comments disabled
|
||||
- The script handles this gracefully
|
||||
|
||||
### "No transcript available"
|
||||
- Not all videos have captions
|
||||
- Auto-generated captions may not be available for all languages
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Entry YAML file
|
||||
↓
|
||||
Find YouTube URL from:
|
||||
- web_claims.social_youtube
|
||||
- wikidata_enrichment.P2397
|
||||
↓
|
||||
Resolve channel ID (handle → channel ID)
|
||||
↓
|
||||
Fetch via YouTube Data API v3:
|
||||
- Channel info
|
||||
- Recent videos
|
||||
- Video details
|
||||
- Comments
|
||||
↓
|
||||
Fetch via yt-dlp:
|
||||
- Transcripts/captions
|
||||
↓
|
||||
Add youtube_enrichment section
|
||||
Update provenance
|
||||
↓
|
||||
Save YAML file
|
||||
```
|
||||
|
||||
## Related Scripts
|
||||
|
||||
- `scripts/enrich_wikidata.py` - Wikidata enrichment
|
||||
- `scripts/enrich_google_maps.py` - Google Maps enrichment
|
||||
- `scripts/fetch_website_playwright.py` - Website archiving
|
||||
- `mcp_servers/social_media/server.py` - MCP server for social media
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- [ ] Track channel subscriber growth over time
|
||||
- [ ] Extract video chapters/timestamps
|
||||
- [ ] Analyze video categories and topics
|
||||
- [ ] Cross-reference with other social media
|
||||
- [ ] Detect playlists relevant to heritage
|
||||
|
|
@ -38,7 +38,7 @@ Found official websites for **16 entries** by querying Wikidata property P856:
|
|||
|
||||
## URLs Discovered via Web Search (Exa)
|
||||
|
||||
Found URLs for **9 additional entries**:
|
||||
Found URLs for **10 additional entries**:
|
||||
|
||||
| Entry ID | Institution | URL | Notes |
|
||||
|----------|-------------|-----|-------|
|
||||
|
|
@ -50,7 +50,14 @@ Found URLs for **9 additional entries**:
|
|||
| 0715 | HDC Protestants Erfgoed | https://www.hdcvu.nl/ | Within VU Library |
|
||||
| 0729 | Historische Vereniging Staphorst | https://www.historischeverenigingstaphorst.nl/ | |
|
||||
| 0851 | Historische Vereniging Den Dolder | https://www.historischeverenigingdendolder.nl/ | |
|
||||
| 1170 | Nederlandse Vereniging voor Papierknipkunst | https://papierknippen.nl/ | |
|
||||
| 1170 | Nederlandse Vereniging voor Papierknipkunst | https://papierknippen.nl/ | Original NDE entry |
|
||||
| 1504 | Nederlandse Vereniging voor Papierknipkunst | https://papierknippen.nl/ | Same org, from NAN ISIL 2025-11-06 |
|
||||
|
||||
## Entries Without Dedicated Websites (Parent Organization Only)
|
||||
|
||||
| Entry ID | Institution | Parent Organization | Notes |
|
||||
|----------|-------------|---------------------|-------|
|
||||
| 1512 | Diocesane Commissie Kerkelijk Kunstbezit | Bisdom Roermond (https://bisdom-roermond.org/) | Commission managing diocesan church art - operates under diocese, no dedicated website |
|
||||
|
||||
## Problematic Entries
|
||||
|
||||
|
|
@ -142,4 +149,23 @@ These entries have no discoverable website. Many are:
|
|||
|
||||
---
|
||||
*Generated by NDE URL Discovery workflow*
|
||||
*Last updated: 2025-12-01*
|
||||
*Last updated: 2025-12-01T16:30:00+00:00*
|
||||
|
||||
## Session Updates - December 2025
|
||||
|
||||
### 2025-12-01: NAN ISIL Batch Corrections
|
||||
|
||||
Fixed several issues with entries 1502-1513 (NAN ISIL 2025-11-06 batch):
|
||||
|
||||
| Entry ID | Issue | Resolution |
|
||||
|----------|-------|------------|
|
||||
| 1504 | Missing URL | Added https://papierknippen.nl/ (discovered via Exa) |
|
||||
| 1508 | Wrong custodian_name ("Cookiesbeleid") | Corrected to "Parochiearchief Kampen" from NAN ISIL registry |
|
||||
| 1508 | Wrong institution type (U) | Changed to A (Archive) |
|
||||
| 1508 | Incorrect GHCID | Regenerated: NL-OV-KAM-A-PK |
|
||||
| 1511 | Wrong custodian_name (exhibition title) | Already corrected to "Wereldmuseum Leiden" |
|
||||
| 1511 | Wrong institution type (U) | Changed to M (Museum) |
|
||||
| 1511 | GHCID based on exhibition title | Regenerated: NL-ZH-LEI-M-WL |
|
||||
| 1512 | Wrong institution type (U) | Changed to H (Holy Sites - diocesan heritage commission) |
|
||||
| 1512 | No website info | Added parent organization note (Bisdom Roermond) |
|
||||
| 1512 | Incorrect GHCID | Regenerated: NL-LI-ROE-H-DCKK |
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"generated": "2025-12-01T15:00:54.169Z",
|
||||
"generated": "2025-12-01T15:56:31.862Z",
|
||||
"version": "1.0.0",
|
||||
"categories": [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -787,6 +787,28 @@
|
|||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
|
||||
/* Desktop: Hide default bottom tip when popup is positioned horizontally */
|
||||
@media (min-width: 769px) {
|
||||
.institution-popup .leaflet-popup-tip-container {
|
||||
display: none;
|
||||
}
|
||||
|
||||
/* Add subtle left/right border indicator instead of tip */
|
||||
.institution-popup .leaflet-popup-content-wrapper {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.institution-popup .leaflet-popup-content-wrapper::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
transform: translateY(-50%);
|
||||
width: 0;
|
||||
height: 0;
|
||||
border: 8px solid transparent;
|
||||
}
|
||||
}
|
||||
|
||||
/* URL Filter Bar */
|
||||
.url-filter-bar {
|
||||
display: flex;
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@ interface Photo {
|
|||
interface GHCID {
|
||||
current: string;
|
||||
uuid: string;
|
||||
numeric?: string;
|
||||
}
|
||||
|
||||
interface Identifier {
|
||||
|
|
@ -121,7 +122,13 @@ interface Institution {
|
|||
social_media?: SocialMedia;
|
||||
verified_name?: string;
|
||||
name_source?: string;
|
||||
isil?: string;
|
||||
isil?: {
|
||||
code: string;
|
||||
name?: string;
|
||||
city?: string;
|
||||
assigned_date?: string;
|
||||
source?: string;
|
||||
};
|
||||
museum_register?: {
|
||||
name?: string;
|
||||
province?: string;
|
||||
|
|
@ -481,16 +488,37 @@ export default function NDEMapPage() {
|
|||
${inst.ghcid ? `
|
||||
<div class="popup-identifiers">
|
||||
<span class="identifier-badge ghcid clickable"
|
||||
title="${language === 'nl' ? 'Klik om UUID te kopiëren' : 'Click to copy UUID'}: ${inst.ghcid.uuid}"
|
||||
title="${language === 'nl' ? 'Klik om Numerieke ID te zien' : 'Click to see Numeric ID'}"
|
||||
data-uuid="${inst.ghcid.uuid}"
|
||||
onclick="navigator.clipboard.writeText('${inst.ghcid.uuid}').then(() => { this.classList.add('copied'); setTimeout(() => this.classList.remove('copied'), 1500); })">
|
||||
data-numeric="${inst.ghcid.numeric || ''}"
|
||||
data-current="${inst.ghcid.current}"
|
||||
data-showing="current"
|
||||
onclick="
|
||||
const showing = this.dataset.showing;
|
||||
if (showing === 'current') {
|
||||
this.innerHTML = '🔢 ' + (this.dataset.numeric || 'N/A');
|
||||
this.dataset.showing = 'numeric';
|
||||
this.title = '${language === 'nl' ? 'Klik voor UUID' : 'Click for UUID'}';
|
||||
} else if (showing === 'numeric') {
|
||||
this.innerHTML = '🆔 ' + this.dataset.uuid;
|
||||
this.dataset.showing = 'uuid';
|
||||
this.title = '${language === 'nl' ? 'Klik voor GHCID' : 'Click for GHCID'}';
|
||||
navigator.clipboard.writeText(this.dataset.uuid);
|
||||
this.classList.add('copied');
|
||||
setTimeout(() => this.classList.remove('copied'), 1500);
|
||||
} else {
|
||||
this.innerHTML = '🏛️ ' + this.dataset.current;
|
||||
this.dataset.showing = 'current';
|
||||
this.title = '${language === 'nl' ? 'Klik om Numerieke ID te zien' : 'Click to see Numeric ID'}';
|
||||
}
|
||||
">
|
||||
🏛️ ${inst.ghcid.current}
|
||||
</span>
|
||||
${inst.isil ? `<span class="identifier-badge isil" title="ISIL Code">📋 ${inst.isil}</span>` : ''}
|
||||
${inst.isil?.code ? `<span class="identifier-badge isil" title="ISIL Code: ${inst.isil.code}${inst.isil.assigned_date ? ' (assigned ' + inst.isil.assigned_date + ')' : ''}">📋 ${inst.isil.code}</span>` : ''}
|
||||
</div>
|
||||
` : inst.isil ? `
|
||||
` : inst.isil?.code ? `
|
||||
<div class="popup-identifiers">
|
||||
<span class="identifier-badge isil" title="ISIL Code">📋 ${inst.isil}</span>
|
||||
<span class="identifier-badge isil" title="ISIL Code: ${inst.isil.code}${inst.isil.assigned_date ? ' (assigned ' + inst.isil.assigned_date + ')' : ''}">📋 ${inst.isil.code}</span>
|
||||
</div>
|
||||
` : ''}
|
||||
${inst.museum_register ? `
|
||||
|
|
@ -530,14 +558,54 @@ export default function NDEMapPage() {
|
|||
</div>
|
||||
`;
|
||||
|
||||
// Bind popup with autoPan disabled to prevent camera movement
|
||||
// The popup will appear in available space without moving the map
|
||||
// Responsive popup positioning:
|
||||
// - Desktop (>768px): Position left or right of marker based on available space
|
||||
// - Mobile (≤768px): Position above marker (default Leaflet behavior)
|
||||
const isMobile = window.innerWidth <= 768;
|
||||
|
||||
// For desktop, we'll dynamically position when popup opens
|
||||
marker.bindPopup(popupContent, {
|
||||
autoPan: false,
|
||||
maxWidth: 400,
|
||||
minWidth: 280,
|
||||
className: 'institution-popup',
|
||||
// Default offset for mobile (above marker)
|
||||
offset: isMobile ? [0, -10] : [0, 0],
|
||||
});
|
||||
|
||||
// For desktop: position popup left or right based on marker position
|
||||
if (!isMobile) {
|
||||
marker.on('click', function(e) {
|
||||
const markerPoint = map.latLngToContainerPoint(e.latlng);
|
||||
const mapWidth = map.getSize().x;
|
||||
|
||||
// Determine if marker is on left or right half of map
|
||||
const isOnRightHalf = markerPoint.x > mapWidth / 2;
|
||||
|
||||
// Get popup and reposition it
|
||||
setTimeout(() => {
|
||||
const popup = marker.getPopup();
|
||||
if (popup && popup.isOpen()) {
|
||||
const popupElement = popup.getElement();
|
||||
if (popupElement) {
|
||||
// Get popup width
|
||||
const popupWidth = popupElement.offsetWidth || 300;
|
||||
|
||||
// Calculate horizontal offset: popup appears on opposite side of marker
|
||||
// Add some padding (20px) from the marker
|
||||
const horizontalOffset = isOnRightHalf
|
||||
? -(popupWidth / 2 + 30) // Move left
|
||||
: (popupWidth / 2 + 30); // Move right
|
||||
|
||||
// Update popup offset and position
|
||||
popup.options.offset = [horizontalOffset, 0];
|
||||
popup.update();
|
||||
}
|
||||
}
|
||||
}, 10);
|
||||
});
|
||||
}
|
||||
|
||||
marker.addTo(map);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -111,6 +111,46 @@
|
|||
color: #3498db;
|
||||
}
|
||||
|
||||
.summary-card.accent-red .card-value {
|
||||
color: #e74c3c;
|
||||
}
|
||||
|
||||
.summary-card.accent-slate .card-value {
|
||||
color: #64748b;
|
||||
}
|
||||
|
||||
.summary-card.accent-amber .card-value {
|
||||
color: #d97706;
|
||||
}
|
||||
|
||||
.summary-card.accent-indigo .card-value {
|
||||
color: #6366f1;
|
||||
}
|
||||
|
||||
.summary-card.highlight {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.summary-card.highlight .card-value {
|
||||
color: white;
|
||||
}
|
||||
|
||||
.summary-card.highlight .card-label {
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
/* Overview stats content styling */
|
||||
.overview-stats-content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.overview-stats-content .summary-cards {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
/* Charts Section */
|
||||
.charts-section {
|
||||
max-width: 1200px;
|
||||
|
|
|
|||
|
|
@ -2540,77 +2540,117 @@ export default function NDEStatsPage() {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Summary Cards */}
|
||||
<section className="summary-section">
|
||||
<div className="summary-cards">
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.total_institutions.toLocaleString()}</div>
|
||||
<div className="card-label">{t('totalInstitutions')}</div>
|
||||
</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{(stats.summary.enriched || stats.summary.with_wikidata).toLocaleString()}</div>
|
||||
<div className="card-label">{t('enrichedRecords')}</div>
|
||||
<div className="card-subtext">{((stats.summary.enriched || stats.summary.with_wikidata) / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.with_coordinates.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withCoordinates')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_coordinates / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.unique_cities.toLocaleString()}</div>
|
||||
<div className="card-label">{t('uniqueCities')}</div>
|
||||
</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.institution_types}</div>
|
||||
<div className="card-label">{t('institutionTypes')}</div>
|
||||
</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{(stats.summary.with_google_maps || 0).toLocaleString()}</div>
|
||||
<div className="card-label">{t('googleMapsData')}</div>
|
||||
<div className="card-subtext">{((stats.summary.with_google_maps || 0) / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Data Sources Cards - Second Row */}
|
||||
<div className="summary-cards data-sources-row">
|
||||
{stats.summary.with_ghcid !== undefined && (
|
||||
<div className="summary-card accent-green">
|
||||
<div className="card-value">{stats.summary.with_ghcid.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withGHCID')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_ghcid / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
{/* Overview Statistics - Collapsible */}
|
||||
<CollapsibleSection title={t('overviewStatistics')} defaultOpen={false}>
|
||||
<div className="overview-stats-content">
|
||||
{/* Row 1: Core Stats */}
|
||||
<div className="summary-cards">
|
||||
<div className="summary-card highlight">
|
||||
<div className="card-value">{stats.summary.total_institutions.toLocaleString()}</div>
|
||||
<div className="card-label">{t('totalInstitutions')}</div>
|
||||
</div>
|
||||
)}
|
||||
{stats.summary.with_web_claims !== undefined && (
|
||||
<div className="summary-card accent-teal">
|
||||
<div className="card-value">{stats.summary.with_web_claims.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withWebClaims')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_web_claims / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{(stats.summary.enriched || stats.summary.with_wikidata).toLocaleString()}</div>
|
||||
<div className="card-label">{t('enrichedRecords')}</div>
|
||||
<div className="card-subtext">{((stats.summary.enriched || stats.summary.with_wikidata) / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
)}
|
||||
{stats.summary.with_social_media !== undefined && (
|
||||
<div className="summary-card accent-purple">
|
||||
<div className="card-value">{stats.summary.with_social_media.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withSocialMedia')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_social_media / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.with_coordinates.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withCoordinates')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_coordinates / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
)}
|
||||
{stats.summary.with_museum_register !== undefined && (
|
||||
<div className="summary-card accent-orange">
|
||||
<div className="card-value">{stats.summary.with_museum_register.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withMuseumRegister')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_museum_register / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.unique_cities.toLocaleString()}</div>
|
||||
<div className="card-label">{t('uniqueCities')}</div>
|
||||
</div>
|
||||
)}
|
||||
{stats.summary.with_nan_isil !== undefined && (
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.unique_provinces || 12}</div>
|
||||
<div className="card-label">{t('uniqueProvinces')}</div>
|
||||
</div>
|
||||
<div className="summary-card">
|
||||
<div className="card-value">{stats.summary.institution_types}</div>
|
||||
<div className="card-label">{t('institutionTypes')}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Row 2: Data Sources */}
|
||||
<div className="summary-cards data-sources-row">
|
||||
{stats.summary.with_ghcid !== undefined && (
|
||||
<div className="summary-card accent-green">
|
||||
<div className="card-value">{stats.summary.with_ghcid.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withGHCID')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_ghcid / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
)}
|
||||
<div className="summary-card accent-blue">
|
||||
<div className="card-value">{stats.summary.with_nan_isil.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withISIL')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_nan_isil / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
<div className="card-value">{stats.summary.with_wikidata.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withWikidata')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_wikidata / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
<div className="summary-card accent-teal">
|
||||
<div className="card-value">{(stats.summary.with_google_maps || 0).toLocaleString()}</div>
|
||||
<div className="card-label">{t('googleMapsData')}</div>
|
||||
<div className="card-subtext">{((stats.summary.with_google_maps || 0) / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
{stats.summary.with_web_claims !== undefined && (
|
||||
<div className="summary-card accent-purple">
|
||||
<div className="card-value">{stats.summary.with_web_claims.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withWebClaims')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_web_claims / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
)}
|
||||
{stats.summary.with_social_media !== undefined && (
|
||||
<div className="summary-card accent-orange">
|
||||
<div className="card-value">{stats.summary.with_social_media.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withSocialMedia')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_social_media / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Row 3: Identifier Coverage */}
|
||||
<div className="summary-cards identifier-row">
|
||||
{stats.charts.identifier_coverage?.map((item) => (
|
||||
<div key={item.identifier} className="summary-card accent-slate">
|
||||
<div className="card-value">{item.count.toLocaleString()}</div>
|
||||
<div className="card-label">{item.identifier}</div>
|
||||
<div className="card-subtext">{item.percentage.toFixed(1)}%</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Row 4: Google Maps Features */}
|
||||
{stats.charts.google_maps_coverage && stats.charts.google_maps_coverage.length > 0 && (
|
||||
<div className="summary-cards google-maps-row">
|
||||
{stats.charts.google_maps_coverage.map((item) => (
|
||||
<div key={item.feature} className="summary-card accent-red">
|
||||
<div className="card-value">{item.count.toLocaleString()}</div>
|
||||
<div className="card-label">{item.feature}</div>
|
||||
<div className="card-subtext">{item.percentage.toFixed(1)}%</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Row 5: Registry Sources */}
|
||||
<div className="summary-cards registry-row">
|
||||
{stats.summary.with_museum_register !== undefined && (
|
||||
<div className="summary-card accent-amber">
|
||||
<div className="card-value">{stats.summary.with_museum_register.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withMuseumRegister')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_museum_register / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
)}
|
||||
{stats.summary.with_nan_isil !== undefined && (
|
||||
<div className="summary-card accent-indigo">
|
||||
<div className="card-value">{stats.summary.with_nan_isil.toLocaleString()}</div>
|
||||
<div className="card-label">{t('withISIL')}</div>
|
||||
<div className="card-subtext">{(stats.summary.with_nan_isil / stats.summary.total_institutions * 100).toFixed(1)}%</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</CollapsibleSection>
|
||||
|
||||
{/* Charts Grid */}
|
||||
<section className="charts-section">
|
||||
|
|
|
|||
245
mcp_servers/social_media/README.md
Normal file
245
mcp_servers/social_media/README.md
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
# Social Media MCP Server
|
||||
|
||||
A Model Context Protocol (MCP) server for obtaining media content from YouTube, LinkedIn, Facebook, and Instagram.
|
||||
|
||||
## Features
|
||||
|
||||
### YouTube (5 tools)
|
||||
- `youtube_get_video_info` - Get video metadata (title, description, views, likes)
|
||||
- `youtube_get_transcript` - Extract subtitles/transcripts using yt-dlp
|
||||
- `youtube_search_videos` - Search for videos
|
||||
- `youtube_get_channel_info` - Get channel information
|
||||
- `youtube_get_channel_videos` - Get recent videos from a channel
|
||||
|
||||
### LinkedIn (4 tools)
|
||||
- `linkedin_get_profile` - Scrape LinkedIn profile (unofficial API)
|
||||
- `linkedin_get_company` - Get company page info
|
||||
- `linkedin_search_jobs` - Search job listings
|
||||
- `linkedin_get_feed_posts` - Get posts from your feed
|
||||
|
||||
> **Warning**: LinkedIn tools use unofficial methods and may violate LinkedIn's Terms of Service.
|
||||
|
||||
### Facebook (4 tools)
|
||||
- `facebook_get_page_posts` - Get posts from a Facebook Page
|
||||
- `facebook_get_post_comments` - Get comments on a post
|
||||
- `facebook_post_to_page` - Publish a post to a Page
|
||||
- `facebook_reply_to_comment` - Reply to comments
|
||||
|
||||
### Instagram (5 tools)
|
||||
- `instagram_get_profile_info` - Get Business profile info
|
||||
- `instagram_get_media_posts` - Get recent media posts
|
||||
- `instagram_get_media_insights` - Get post analytics
|
||||
- `instagram_publish_media` - Publish images
|
||||
- `instagram_get_comments` - Get comments on posts
|
||||
|
||||
> **Note**: Instagram tools require a Business Account connected to Facebook.
|
||||
|
||||
## Installation
|
||||
|
||||
### Quick Setup
|
||||
|
||||
```bash
|
||||
# Navigate to the server directory
|
||||
cd mcp_servers/social_media
|
||||
|
||||
# Run the setup script
|
||||
./setup.sh
|
||||
```
|
||||
|
||||
### Manual Installation
|
||||
|
||||
```bash
|
||||
# Create virtual environment
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
|
||||
# Install dependencies
|
||||
pip install -e .
|
||||
|
||||
# Install yt-dlp (for YouTube transcripts)
|
||||
brew install yt-dlp # macOS
|
||||
# or
|
||||
pip install yt-dlp
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Create a `.env` file or set environment variables:
|
||||
|
||||
```bash
|
||||
# YouTube (optional - falls back to yt-dlp)
|
||||
YOUTUBE_API_KEY=your_youtube_api_key
|
||||
|
||||
# LinkedIn (choose one method)
|
||||
LINKEDIN_COOKIE=your_li_at_cookie_value
|
||||
# or
|
||||
LINKEDIN_EMAIL=your_email
|
||||
LINKEDIN_PASSWORD=your_password
|
||||
|
||||
# Facebook
|
||||
FACEBOOK_ACCESS_TOKEN=your_page_access_token
|
||||
FACEBOOK_PAGE_ID=your_page_id
|
||||
|
||||
# Instagram
|
||||
INSTAGRAM_ACCESS_TOKEN=your_instagram_access_token
|
||||
INSTAGRAM_BUSINESS_ACCOUNT_ID=your_business_account_id
|
||||
```
|
||||
|
||||
## Getting API Credentials
|
||||
|
||||
### YouTube API Key
|
||||
|
||||
1. Go to [Google Cloud Console](https://console.cloud.google.com/)
|
||||
2. Create a new project (or select existing)
|
||||
3. Enable "YouTube Data API v3"
|
||||
4. Create credentials → API Key
|
||||
5. Copy the API key
|
||||
|
||||
### LinkedIn Cookie (li_at)
|
||||
|
||||
1. Log into LinkedIn in your browser
|
||||
2. Open Developer Tools (F12)
|
||||
3. Go to Application → Cookies → linkedin.com
|
||||
4. Find the `li_at` cookie and copy its value
|
||||
|
||||
### Facebook Page Access Token
|
||||
|
||||
1. Go to [Facebook Developer Console](https://developers.facebook.com/)
|
||||
2. Create an App (Business type)
|
||||
3. Add "Facebook Login" and "Pages API" products
|
||||
4. Generate a Page Access Token with permissions:
|
||||
- `pages_show_list`
|
||||
- `pages_read_engagement`
|
||||
- `pages_manage_posts` (for posting)
|
||||
- `pages_manage_comments` (for replying)
|
||||
|
||||
### Instagram Business Account
|
||||
|
||||
1. Convert your Instagram account to a Business Account
|
||||
2. Connect it to a Facebook Page
|
||||
3. Use the Facebook Graph API to get your Instagram Business Account ID
|
||||
4. Generate an access token with permissions:
|
||||
- `instagram_basic`
|
||||
- `instagram_content_publish` (for posting)
|
||||
- `instagram_manage_comments`
|
||||
- `instagram_manage_insights`
|
||||
|
||||
## Usage with OpenCode
|
||||
|
||||
Add to your OpenCode configuration (`.opencode/config.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"social-media": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"--directory",
|
||||
"/path/to/glam/mcp_servers/social_media",
|
||||
"run",
|
||||
"python",
|
||||
"server.py"
|
||||
],
|
||||
"env": {
|
||||
"YOUTUBE_API_KEY": "your_key",
|
||||
"LINKEDIN_COOKIE": "your_cookie",
|
||||
"FACEBOOK_ACCESS_TOKEN": "your_token",
|
||||
"FACEBOOK_PAGE_ID": "your_page_id",
|
||||
"INSTAGRAM_ACCESS_TOKEN": "your_token",
|
||||
"INSTAGRAM_BUSINESS_ACCOUNT_ID": "your_account_id"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Usage with Claude Desktop
|
||||
|
||||
Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"social-media": {
|
||||
"command": "/path/to/glam/mcp_servers/social_media/.venv/bin/python",
|
||||
"args": ["/path/to/glam/mcp_servers/social_media/server.py"],
|
||||
"env": {
|
||||
"YOUTUBE_API_KEY": "your_key"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Get YouTube Video Info
|
||||
|
||||
```
|
||||
Get info about this YouTube video: https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
||||
```
|
||||
|
||||
### Get Video Transcript
|
||||
|
||||
```
|
||||
Get the transcript of this video: dQw4w9WgXcQ
|
||||
```
|
||||
|
||||
### Search LinkedIn Jobs
|
||||
|
||||
```
|
||||
Search for software engineer jobs in San Francisco
|
||||
```
|
||||
|
||||
### Get Instagram Posts
|
||||
|
||||
```
|
||||
Get my recent Instagram posts
|
||||
```
|
||||
|
||||
## API Methods Used
|
||||
|
||||
| Platform | Method | Authentication |
|
||||
|-----------|---------------------------|----------------------|
|
||||
| YouTube | YouTube Data API v3 | API Key |
|
||||
| YouTube | yt-dlp (fallback) | None |
|
||||
| LinkedIn | Voyager API (unofficial) | Session Cookie |
|
||||
| Facebook | Graph API | Page Access Token |
|
||||
| Instagram | Graph API (via Facebook) | Access Token |
|
||||
|
||||
## Rate Limits
|
||||
|
||||
- **YouTube**: 10,000 quota units/day (search costs 100, video info costs 1)
|
||||
- **LinkedIn**: Unofficial API - use sparingly to avoid account restrictions
|
||||
- **Facebook/Instagram**: 200 calls/user/hour for most endpoints
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### YouTube: "API key not configured"
|
||||
- Set `YOUTUBE_API_KEY` or install `yt-dlp` for fallback
|
||||
|
||||
### LinkedIn: "Session expired"
|
||||
- Get a fresh `li_at` cookie from your browser
|
||||
|
||||
### Facebook/Instagram: "Invalid access token"
|
||||
- Tokens expire after ~60 days; generate a new one
|
||||
|
||||
### yt-dlp: "Command not found"
|
||||
```bash
|
||||
brew install yt-dlp # macOS
|
||||
pip install yt-dlp # All platforms
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT License - See project root for details.
|
||||
|
||||
## References
|
||||
|
||||
This server was built by studying these open-source MCP implementations:
|
||||
- [anaisbetts/mcp-youtube](https://github.com/anaisbetts/mcp-youtube)
|
||||
- [ZubeidHendricks/youtube-mcp-server](https://github.com/ZubeidHendricks/youtube-mcp-server)
|
||||
- [adhikasp/mcp-linkedin](https://github.com/adhikasp/mcp-linkedin)
|
||||
- [stickerdaniel/linkedin-mcp-server](https://github.com/stickerdaniel/linkedin-mcp-server)
|
||||
- [jlbadano/ig-mcp](https://github.com/jlbadano/ig-mcp)
|
||||
- [tiroshanm/facebook-mcp-server](https://github.com/tiroshanm/facebook-mcp-server)
|
||||
20
mcp_servers/social_media/pyproject.toml
Normal file
20
mcp_servers/social_media/pyproject.toml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
[project]
|
||||
name = "social-media-mcp"
|
||||
version = "1.0.0"
|
||||
description = "MCP Server for obtaining media content from YouTube, LinkedIn, Facebook, and Instagram"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"httpx>=0.27.0",
|
||||
"mcp>=1.0.0",
|
||||
"yt-dlp>=2024.0.0",
|
||||
"beautifulsoup4>=4.12.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
linkedin = [
|
||||
"linkedin-api>=2.0.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
1292
mcp_servers/social_media/server.py
Normal file
1292
mcp_servers/social_media/server.py
Normal file
File diff suppressed because it is too large
Load diff
154
mcp_servers/social_media/setup.sh
Executable file
154
mcp_servers/social_media/setup.sh
Executable file
|
|
@ -0,0 +1,154 @@
|
|||
#!/bin/bash
|
||||
# Social Media MCP Server - Setup Script
|
||||
#
|
||||
# This script sets up the development environment for the Social Media MCP server.
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
echo "=============================================="
|
||||
echo " Social Media MCP Server - Setup"
|
||||
echo "=============================================="
|
||||
echo ""
|
||||
|
||||
# Check for Python 3.10+
|
||||
echo "Checking Python version..."
|
||||
if command -v python3 &> /dev/null; then
|
||||
PYTHON_VERSION=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
|
||||
MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1)
|
||||
MINOR=$(echo $PYTHON_VERSION | cut -d. -f2)
|
||||
|
||||
if [ "$MAJOR" -ge 3 ] && [ "$MINOR" -ge 10 ]; then
|
||||
echo " Python $PYTHON_VERSION found"
|
||||
else
|
||||
echo " ERROR: Python 3.10+ required, found $PYTHON_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo " ERROR: Python 3 not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create virtual environment
|
||||
echo ""
|
||||
echo "Creating virtual environment..."
|
||||
if [ -d ".venv" ]; then
|
||||
echo " .venv already exists, skipping..."
|
||||
else
|
||||
python3 -m venv .venv
|
||||
echo " Created .venv"
|
||||
fi
|
||||
|
||||
# Activate virtual environment
|
||||
echo ""
|
||||
echo "Activating virtual environment..."
|
||||
source .venv/bin/activate
|
||||
echo " Activated"
|
||||
|
||||
# Upgrade pip
|
||||
echo ""
|
||||
echo "Upgrading pip..."
|
||||
pip install --upgrade pip --quiet
|
||||
|
||||
# Install dependencies
|
||||
echo ""
|
||||
echo "Installing dependencies..."
|
||||
pip install -e . --quiet
|
||||
echo " Installed: httpx, mcp, yt-dlp, beautifulsoup4"
|
||||
|
||||
# Check for yt-dlp system installation (preferred for better codec support)
|
||||
echo ""
|
||||
echo "Checking yt-dlp installation..."
|
||||
if command -v yt-dlp &> /dev/null; then
|
||||
YTDLP_VERSION=$(yt-dlp --version)
|
||||
echo " yt-dlp $YTDLP_VERSION found (system)"
|
||||
else
|
||||
echo " yt-dlp not found system-wide"
|
||||
echo " Using pip-installed version (may have limited codec support)"
|
||||
echo " For best results, install with: brew install yt-dlp"
|
||||
fi
|
||||
|
||||
# Create .env template if it doesn't exist
|
||||
echo ""
|
||||
echo "Checking environment configuration..."
|
||||
if [ -f ".env" ]; then
|
||||
echo " .env file already exists"
|
||||
else
|
||||
cat > .env.example << 'EOF'
|
||||
# YouTube API (optional - falls back to yt-dlp if not set)
|
||||
YOUTUBE_API_KEY=
|
||||
|
||||
# LinkedIn (choose one method)
|
||||
# Option 1: Session cookie (recommended)
|
||||
LINKEDIN_COOKIE=
|
||||
# Option 2: Email/Password (less reliable)
|
||||
LINKEDIN_EMAIL=
|
||||
LINKEDIN_PASSWORD=
|
||||
|
||||
# Facebook Graph API
|
||||
FACEBOOK_ACCESS_TOKEN=
|
||||
FACEBOOK_PAGE_ID=
|
||||
FACEBOOK_API_VERSION=v19.0
|
||||
|
||||
# Instagram Graph API (via Facebook)
|
||||
INSTAGRAM_ACCESS_TOKEN=
|
||||
INSTAGRAM_BUSINESS_ACCOUNT_ID=
|
||||
INSTAGRAM_API_VERSION=v19.0
|
||||
EOF
|
||||
echo " Created .env.example template"
|
||||
echo " Copy to .env and fill in your credentials:"
|
||||
echo " cp .env.example .env"
|
||||
fi
|
||||
|
||||
# Test the server can start
|
||||
echo ""
|
||||
echo "Testing server import..."
|
||||
if python -c "import server" 2>/dev/null; then
|
||||
echo " Server module imports successfully"
|
||||
else
|
||||
echo " WARNING: Server import failed. Check for missing dependencies."
|
||||
fi
|
||||
|
||||
# Print summary
|
||||
echo ""
|
||||
echo "=============================================="
|
||||
echo " Setup Complete!"
|
||||
echo "=============================================="
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo ""
|
||||
echo "1. Configure credentials:"
|
||||
echo " cp .env.example .env"
|
||||
echo " # Edit .env with your API keys/tokens"
|
||||
echo ""
|
||||
echo "2. Test the server:"
|
||||
echo " source .venv/bin/activate"
|
||||
echo " python server.py"
|
||||
echo ""
|
||||
echo "3. Add to your MCP client config (OpenCode/Claude Desktop)"
|
||||
echo " See README.md for configuration examples"
|
||||
echo ""
|
||||
echo "Configured platforms:"
|
||||
if [ -n "$YOUTUBE_API_KEY" ]; then
|
||||
echo " - YouTube: API Key set"
|
||||
else
|
||||
echo " - YouTube: Not configured (will use yt-dlp fallback)"
|
||||
fi
|
||||
if [ -n "$LINKEDIN_COOKIE" ]; then
|
||||
echo " - LinkedIn: Cookie set"
|
||||
else
|
||||
echo " - LinkedIn: Not configured"
|
||||
fi
|
||||
if [ -n "$FACEBOOK_ACCESS_TOKEN" ]; then
|
||||
echo " - Facebook: Access token set"
|
||||
else
|
||||
echo " - Facebook: Not configured"
|
||||
fi
|
||||
if [ -n "$INSTAGRAM_ACCESS_TOKEN" ]; then
|
||||
echo " - Instagram: Access token set"
|
||||
else
|
||||
echo " - Instagram: Not configured"
|
||||
fi
|
||||
echo ""
|
||||
File diff suppressed because it is too large
Load diff
678
scripts/enrich_youtube.py
Executable file
678
scripts/enrich_youtube.py
Executable file
|
|
@ -0,0 +1,678 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
YouTube Enrichment Script for Heritage Custodian Entries
|
||||
|
||||
This script enriches heritage custodian YAML entries with YouTube channel/video data.
|
||||
It finds YouTube channels from existing web_claims (social_youtube) and fetches:
|
||||
- Channel info (subscribers, video count, description, etc.)
|
||||
- Recent videos (title, description, views, likes, comments)
|
||||
- Video transcripts (when available)
|
||||
- Comments on videos
|
||||
|
||||
All data includes full provenance with URLs and timestamps.
|
||||
|
||||
Usage:
|
||||
python scripts/enrich_youtube.py [--dry-run] [--limit N] [--entry ENTRY_FILE]
|
||||
|
||||
Environment Variables:
|
||||
YOUTUBE_API_KEY: Required. Get from https://console.cloud.google.com/
|
||||
|
||||
Author: GLAM Data Extraction Project
|
||||
Date: December 2025
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import httpx
|
||||
import yaml
|
||||
|
||||
# Load environment variables from .env file
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
except ImportError:
|
||||
pass # dotenv not installed, rely on shell environment
|
||||
|
||||
# ============================================================================
|
||||
# Configuration
|
||||
# ============================================================================
|
||||
|
||||
# Support both YOUTUBE_API_KEY and GOOGLE_YOUTUBE_TOKEN
|
||||
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") or os.getenv("GOOGLE_YOUTUBE_TOKEN", "")
|
||||
YOUTUBE_API_BASE = "https://www.googleapis.com/youtube/v3"
|
||||
USER_AGENT = "GLAMDataExtractor/1.0 (heritage-data@example.com) Python/httpx"
|
||||
|
||||
ENTRIES_DIR = Path("data/nde/enriched/entries")
|
||||
|
||||
# Rate limiting
|
||||
REQUEST_DELAY = 0.5 # seconds between API calls
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helper Functions
|
||||
# ============================================================================
|
||||
|
||||
def extract_channel_id_or_username(youtube_url: str) -> Tuple[Optional[str], str]:
|
||||
"""
|
||||
Extract YouTube channel ID or username from various URL formats.
|
||||
|
||||
Returns:
|
||||
Tuple of (identifier, identifier_type) where type is 'channel_id', 'username', or 'handle'
|
||||
"""
|
||||
if not youtube_url:
|
||||
return None, ""
|
||||
|
||||
# Channel ID format: /channel/UCxxxxx
|
||||
match = re.search(r'youtube\.com/channel/([UC][0-9A-Za-z_-]{22})', youtube_url)
|
||||
if match:
|
||||
return match.group(1), "channel_id"
|
||||
|
||||
# Handle format: /@username
|
||||
match = re.search(r'youtube\.com/@([^/?&]+)', youtube_url)
|
||||
if match:
|
||||
return match.group(1), "handle"
|
||||
|
||||
# User format: /user/username
|
||||
match = re.search(r'youtube\.com/user/([^/?&]+)', youtube_url)
|
||||
if match:
|
||||
return match.group(1), "username"
|
||||
|
||||
# Custom URL format: /c/customname
|
||||
match = re.search(r'youtube\.com/c/([^/?&]+)', youtube_url)
|
||||
if match:
|
||||
return match.group(1), "custom_url"
|
||||
|
||||
# Direct custom URL format: youtube.com/customname (no prefix)
|
||||
# Must be after all other patterns to avoid false matches
|
||||
match = re.search(r'youtube\.com/([a-zA-Z][a-zA-Z0-9_-]{2,})(?:[/?]|$)', youtube_url)
|
||||
if match:
|
||||
# Exclude known paths that aren't custom URLs
|
||||
name = match.group(1)
|
||||
excluded = {'watch', 'playlist', 'channel', 'user', 'c', 'results', 'feed', 'gaming', 'shorts', 'live'}
|
||||
if name.lower() not in excluded:
|
||||
return name, "custom_url"
|
||||
|
||||
return None, ""
|
||||
|
||||
|
||||
def resolve_channel_id(identifier: str, id_type: str, api_key: str) -> Optional[str]:
|
||||
"""
|
||||
Resolve a username, handle, or custom URL to a channel ID.
|
||||
"""
|
||||
if id_type == "channel_id":
|
||||
return identifier
|
||||
|
||||
# Use search to find channel
|
||||
search_params = {
|
||||
"part": "snippet",
|
||||
"type": "channel",
|
||||
"maxResults": 1,
|
||||
"key": api_key
|
||||
}
|
||||
|
||||
if id_type == "handle":
|
||||
search_params["q"] = f"@{identifier}"
|
||||
else:
|
||||
search_params["q"] = identifier
|
||||
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{YOUTUBE_API_BASE}/search",
|
||||
params=search_params,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
timeout=30.0
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if data.get("items"):
|
||||
return data["items"][0]["id"]["channelId"]
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not resolve {id_type} '{identifier}': {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_channel_info(channel_id: str, api_key: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detailed channel information from YouTube Data API.
|
||||
"""
|
||||
params = {
|
||||
"part": "snippet,statistics,brandingSettings,contentDetails",
|
||||
"id": channel_id,
|
||||
"key": api_key
|
||||
}
|
||||
|
||||
response = httpx.get(
|
||||
f"{YOUTUBE_API_BASE}/channels",
|
||||
params=params,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
timeout=30.0
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if not data.get("items"):
|
||||
return {"error": f"Channel not found: {channel_id}"}
|
||||
|
||||
item = data["items"][0]
|
||||
snippet = item.get("snippet", {})
|
||||
stats = item.get("statistics", {})
|
||||
branding = item.get("brandingSettings", {})
|
||||
|
||||
return {
|
||||
"channel_id": channel_id,
|
||||
"channel_url": f"https://www.youtube.com/channel/{channel_id}",
|
||||
"title": snippet.get("title"),
|
||||
"description": snippet.get("description"),
|
||||
"custom_url": snippet.get("customUrl"),
|
||||
"published_at": snippet.get("publishedAt"),
|
||||
"country": snippet.get("country"),
|
||||
"default_language": snippet.get("defaultLanguage"),
|
||||
"thumbnail_url": snippet.get("thumbnails", {}).get("high", {}).get("url"),
|
||||
"banner_url": branding.get("image", {}).get("bannerExternalUrl"),
|
||||
"subscriber_count": int(stats.get("subscriberCount", 0)) if stats.get("subscriberCount") else None,
|
||||
"video_count": int(stats.get("videoCount", 0)) if stats.get("videoCount") else None,
|
||||
"view_count": int(stats.get("viewCount", 0)) if stats.get("viewCount") else None,
|
||||
"subscriber_count_hidden": stats.get("hiddenSubscriberCount", False),
|
||||
"uploads_playlist_id": item.get("contentDetails", {}).get("relatedPlaylists", {}).get("uploads"),
|
||||
}
|
||||
|
||||
|
||||
def get_channel_videos(channel_id: str, api_key: str, max_results: int = 20) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get recent videos from a YouTube channel.
|
||||
"""
|
||||
# First, search for videos from this channel
|
||||
search_params = {
|
||||
"part": "snippet",
|
||||
"channelId": channel_id,
|
||||
"type": "video",
|
||||
"order": "date",
|
||||
"maxResults": min(max_results, 50),
|
||||
"key": api_key
|
||||
}
|
||||
|
||||
response = httpx.get(
|
||||
f"{YOUTUBE_API_BASE}/search",
|
||||
params=search_params,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
timeout=30.0
|
||||
)
|
||||
response.raise_for_status()
|
||||
search_data = response.json()
|
||||
|
||||
video_ids = [item["id"]["videoId"] for item in search_data.get("items", [])]
|
||||
|
||||
if not video_ids:
|
||||
return []
|
||||
|
||||
# Get detailed video info
|
||||
video_params = {
|
||||
"part": "snippet,contentDetails,statistics",
|
||||
"id": ",".join(video_ids),
|
||||
"key": api_key
|
||||
}
|
||||
|
||||
response = httpx.get(
|
||||
f"{YOUTUBE_API_BASE}/videos",
|
||||
params=video_params,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
timeout=30.0
|
||||
)
|
||||
response.raise_for_status()
|
||||
video_data = response.json()
|
||||
|
||||
videos = []
|
||||
for item in video_data.get("items", []):
|
||||
snippet = item.get("snippet", {})
|
||||
stats = item.get("statistics", {})
|
||||
content = item.get("contentDetails", {})
|
||||
|
||||
videos.append({
|
||||
"video_id": item["id"],
|
||||
"video_url": f"https://www.youtube.com/watch?v={item['id']}",
|
||||
"title": snippet.get("title"),
|
||||
"description": snippet.get("description", "")[:500], # Truncate long descriptions
|
||||
"published_at": snippet.get("publishedAt"),
|
||||
"duration": content.get("duration"),
|
||||
"definition": content.get("definition"),
|
||||
"caption_available": content.get("caption") == "true",
|
||||
"view_count": int(stats.get("viewCount", 0)) if stats.get("viewCount") else None,
|
||||
"like_count": int(stats.get("likeCount", 0)) if stats.get("likeCount") else None,
|
||||
"comment_count": int(stats.get("commentCount", 0)) if stats.get("commentCount") else None,
|
||||
"tags": snippet.get("tags", [])[:10], # Limit tags
|
||||
"thumbnail_url": snippet.get("thumbnails", {}).get("high", {}).get("url"),
|
||||
"default_language": snippet.get("defaultLanguage"),
|
||||
"default_audio_language": snippet.get("defaultAudioLanguage"),
|
||||
})
|
||||
|
||||
return videos
|
||||
|
||||
|
||||
def get_video_comments(video_id: str, api_key: str, max_results: int = 50) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get top-level comments on a video.
|
||||
"""
|
||||
params = {
|
||||
"part": "snippet",
|
||||
"videoId": video_id,
|
||||
"order": "relevance",
|
||||
"maxResults": min(max_results, 100),
|
||||
"textFormat": "plainText",
|
||||
"key": api_key
|
||||
}
|
||||
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{YOUTUBE_API_BASE}/commentThreads",
|
||||
params=params,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
timeout=30.0
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
comments = []
|
||||
for item in data.get("items", []):
|
||||
snippet = item.get("snippet", {}).get("topLevelComment", {}).get("snippet", {})
|
||||
comments.append({
|
||||
"comment_id": item["id"],
|
||||
"author_display_name": snippet.get("authorDisplayName"),
|
||||
"author_channel_url": snippet.get("authorChannelUrl"),
|
||||
"text": snippet.get("textDisplay", "")[:1000], # Truncate
|
||||
"like_count": snippet.get("likeCount", 0),
|
||||
"published_at": snippet.get("publishedAt"),
|
||||
"updated_at": snippet.get("updatedAt"),
|
||||
"reply_count": item.get("snippet", {}).get("totalReplyCount", 0),
|
||||
})
|
||||
|
||||
return comments
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 403:
|
||||
# Comments disabled for this video
|
||||
return []
|
||||
raise
|
||||
|
||||
|
||||
def get_video_transcript(video_id: str, language: str = "en") -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get video transcript using yt-dlp.
|
||||
"""
|
||||
video_url = f"https://www.youtube.com/watch?v={video_id}"
|
||||
|
||||
try:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"yt-dlp",
|
||||
"--write-subs",
|
||||
"--write-auto-subs",
|
||||
"--sub-langs", f"{language},nl,en",
|
||||
"--sub-format", "vtt",
|
||||
"--skip-download",
|
||||
"--output", f"{tmpdir}/%(id)s",
|
||||
video_url
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
import glob
|
||||
vtt_files = glob.glob(f"{tmpdir}/*.vtt")
|
||||
|
||||
if vtt_files:
|
||||
with open(vtt_files[0], 'r', encoding='utf-8') as f:
|
||||
vtt_content = f.read()
|
||||
|
||||
# Parse VTT to extract text
|
||||
lines = []
|
||||
for line in vtt_content.split('\n'):
|
||||
line = line.strip()
|
||||
if line and not line.startswith('WEBVTT') and not line.startswith('Kind:') \
|
||||
and not line.startswith('Language:') and '-->' not in line \
|
||||
and not re.match(r'^\d+$', line):
|
||||
clean_line = re.sub(r'<[^>]+>', '', line)
|
||||
if clean_line:
|
||||
lines.append(clean_line)
|
||||
|
||||
# Remove duplicate consecutive lines
|
||||
deduped = []
|
||||
for line in lines:
|
||||
if not deduped or line != deduped[-1]:
|
||||
deduped.append(line)
|
||||
|
||||
transcript = ' '.join(deduped)
|
||||
|
||||
# Determine language from filename
|
||||
detected_lang = "unknown"
|
||||
if ".nl." in vtt_files[0]:
|
||||
detected_lang = "nl"
|
||||
elif ".en." in vtt_files[0]:
|
||||
detected_lang = "en"
|
||||
|
||||
return {
|
||||
"video_id": video_id,
|
||||
"language": detected_lang,
|
||||
"transcript_type": "auto" if ".auto." in vtt_files[0] else "manual",
|
||||
"transcript_text": transcript[:10000], # Truncate very long transcripts
|
||||
"transcript_length_chars": len(transcript),
|
||||
"extraction_method": "yt-dlp",
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
except FileNotFoundError:
|
||||
return {"error": "yt-dlp not installed"}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"error": "Transcript extraction timed out"}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def find_youtube_url_in_entry(entry: Dict[str, Any]) -> Optional[str]:
|
||||
"""
|
||||
Find YouTube URL from web_claims or wikidata in an entry.
|
||||
"""
|
||||
# Check web_claims for social_youtube
|
||||
web_claims = entry.get("web_claims", {}).get("claims", [])
|
||||
for claim in web_claims:
|
||||
if claim.get("claim_type") == "social_youtube":
|
||||
return claim.get("claim_value")
|
||||
|
||||
# Check wikidata for YouTube channel ID (P2397)
|
||||
wikidata = entry.get("wikidata_enrichment", {})
|
||||
claims = wikidata.get("wikidata_claims", {})
|
||||
|
||||
youtube_claim = claims.get("P2397_youtube_channel_id")
|
||||
if youtube_claim:
|
||||
channel_id = youtube_claim.get("value")
|
||||
if channel_id:
|
||||
return f"https://www.youtube.com/channel/{channel_id}"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def create_youtube_enrichment(
|
||||
youtube_url: str,
|
||||
api_key: str,
|
||||
fetch_videos: int = 10,
|
||||
fetch_comments_per_video: int = 20,
|
||||
fetch_transcripts: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create full YouTube enrichment data with provenance.
|
||||
"""
|
||||
timestamp = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
enrichment = {
|
||||
"source_url": youtube_url,
|
||||
"fetch_timestamp": timestamp,
|
||||
"api_endpoint": YOUTUBE_API_BASE,
|
||||
"api_version": "v3",
|
||||
}
|
||||
|
||||
# Extract channel identifier
|
||||
identifier, id_type = extract_channel_id_or_username(youtube_url)
|
||||
|
||||
if not identifier:
|
||||
enrichment["error"] = f"Could not parse YouTube URL: {youtube_url}"
|
||||
enrichment["status"] = "FAILED"
|
||||
return enrichment
|
||||
|
||||
enrichment["identifier_type"] = id_type
|
||||
enrichment["identifier_value"] = identifier
|
||||
|
||||
# Resolve to channel ID
|
||||
channel_id = resolve_channel_id(identifier, id_type, api_key)
|
||||
|
||||
if not channel_id:
|
||||
enrichment["error"] = f"Could not resolve channel ID for: {identifier}"
|
||||
enrichment["status"] = "FAILED"
|
||||
return enrichment
|
||||
|
||||
try:
|
||||
# Get channel info
|
||||
print(f" Fetching channel info for {channel_id}...")
|
||||
channel_info = get_channel_info(channel_id, api_key)
|
||||
enrichment["channel"] = channel_info
|
||||
|
||||
# Get recent videos
|
||||
if fetch_videos > 0:
|
||||
print(f" Fetching {fetch_videos} recent videos...")
|
||||
videos = get_channel_videos(channel_id, api_key, fetch_videos)
|
||||
enrichment["videos"] = videos
|
||||
enrichment["videos_count"] = len(videos)
|
||||
|
||||
# Get comments for top videos
|
||||
if fetch_comments_per_video > 0 and videos:
|
||||
print(f" Fetching comments for top videos...")
|
||||
for i, video in enumerate(videos[:5]): # Only first 5 videos
|
||||
video_id = video["video_id"]
|
||||
comments = get_video_comments(video_id, api_key, fetch_comments_per_video)
|
||||
videos[i]["comments"] = comments
|
||||
videos[i]["comments_fetched"] = len(comments)
|
||||
|
||||
# Get transcripts for videos with captions
|
||||
if fetch_transcripts and videos:
|
||||
print(f" Fetching transcripts for videos with captions...")
|
||||
for i, video in enumerate(videos[:3]): # Only first 3 videos
|
||||
if video.get("caption_available"):
|
||||
video_id = video["video_id"]
|
||||
transcript = get_video_transcript(video_id)
|
||||
if transcript and not transcript.get("error"):
|
||||
videos[i]["transcript"] = transcript
|
||||
|
||||
enrichment["status"] = "SUCCESS"
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
enrichment["error"] = f"YouTube API error: {e.response.status_code}"
|
||||
enrichment["status"] = "FAILED"
|
||||
except Exception as e:
|
||||
enrichment["error"] = str(e)
|
||||
enrichment["status"] = "FAILED"
|
||||
|
||||
return enrichment
|
||||
|
||||
|
||||
def update_provenance(entry: Dict[str, Any], enrichment: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Update provenance section with YouTube enrichment source.
|
||||
"""
|
||||
if "provenance" not in entry:
|
||||
entry["provenance"] = {"sources": {}}
|
||||
|
||||
if "sources" not in entry["provenance"]:
|
||||
entry["provenance"]["sources"] = {}
|
||||
|
||||
if "youtube" not in entry["provenance"]["sources"]:
|
||||
entry["provenance"]["sources"]["youtube"] = []
|
||||
|
||||
source_entry = {
|
||||
"source_type": "youtube_data_api",
|
||||
"fetch_timestamp": enrichment.get("fetch_timestamp"),
|
||||
"api_endpoint": enrichment.get("api_endpoint"),
|
||||
"channel_id": enrichment.get("channel", {}).get("channel_id"),
|
||||
"claims_extracted": [
|
||||
"channel_info",
|
||||
"subscriber_count",
|
||||
"video_count",
|
||||
"view_count",
|
||||
"recent_videos",
|
||||
"video_comments",
|
||||
"video_transcripts",
|
||||
]
|
||||
}
|
||||
|
||||
entry["provenance"]["sources"]["youtube"].append(source_entry)
|
||||
|
||||
|
||||
def process_entry(entry_path: Path, api_key: str, dry_run: bool = False) -> bool:
|
||||
"""
|
||||
Process a single entry file and add YouTube enrichment.
|
||||
"""
|
||||
print(f"\nProcessing: {entry_path.name}")
|
||||
|
||||
# Load entry
|
||||
with open(entry_path, 'r', encoding='utf-8') as f:
|
||||
entry = yaml.safe_load(f)
|
||||
|
||||
# Check if already enriched
|
||||
if entry.get("youtube_enrichment", {}).get("status") == "SUCCESS":
|
||||
print(f" Already enriched, skipping...")
|
||||
return False
|
||||
|
||||
# Find YouTube URL
|
||||
youtube_url = find_youtube_url_in_entry(entry)
|
||||
|
||||
if not youtube_url:
|
||||
print(f" No YouTube URL found, skipping...")
|
||||
return False
|
||||
|
||||
print(f" Found YouTube URL: {youtube_url}")
|
||||
|
||||
if dry_run:
|
||||
print(f" [DRY RUN] Would enrich with YouTube data")
|
||||
return True
|
||||
|
||||
# Create enrichment
|
||||
enrichment = create_youtube_enrichment(
|
||||
youtube_url=youtube_url,
|
||||
api_key=api_key,
|
||||
fetch_videos=10,
|
||||
fetch_comments_per_video=20,
|
||||
fetch_transcripts=True
|
||||
)
|
||||
|
||||
# Add to entry
|
||||
entry["youtube_enrichment"] = enrichment
|
||||
|
||||
# Update provenance
|
||||
if enrichment.get("status") == "SUCCESS":
|
||||
update_provenance(entry, enrichment)
|
||||
|
||||
# Save entry
|
||||
with open(entry_path, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(entry, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
|
||||
status = enrichment.get("status", "UNKNOWN")
|
||||
print(f" Status: {status}")
|
||||
|
||||
if status == "SUCCESS":
|
||||
channel = enrichment.get("channel", {})
|
||||
videos = enrichment.get("videos", [])
|
||||
print(f" Channel: {channel.get('title')}")
|
||||
print(f" Subscribers: {channel.get('subscriber_count'):,}" if channel.get('subscriber_count') else " Subscribers: Hidden")
|
||||
print(f" Videos fetched: {len(videos)}")
|
||||
|
||||
return status == "SUCCESS"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Enrich heritage custodian entries with YouTube channel data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Show what would be done without making changes"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Limit number of entries to process"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--entry",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Process a specific entry file (filename or full path)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-existing",
|
||||
action="store_true",
|
||||
default=True,
|
||||
help="Skip entries that already have YouTube enrichment (default: True)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check API key
|
||||
if not YOUTUBE_API_KEY:
|
||||
print("ERROR: YOUTUBE_API_KEY environment variable not set")
|
||||
print("\nTo get an API key:")
|
||||
print("1. Go to https://console.cloud.google.com/")
|
||||
print("2. Create a project and enable YouTube Data API v3")
|
||||
print("3. Create an API key under Credentials")
|
||||
print("4. Set: export YOUTUBE_API_KEY='your-key-here'")
|
||||
sys.exit(1)
|
||||
|
||||
print("=" * 60)
|
||||
print("YouTube Enrichment Script for Heritage Custodians")
|
||||
print("=" * 60)
|
||||
print(f"API Key: {YOUTUBE_API_KEY[:8]}...{YOUTUBE_API_KEY[-4:]}")
|
||||
print(f"Entries directory: {ENTRIES_DIR}")
|
||||
print(f"Dry run: {args.dry_run}")
|
||||
|
||||
# Collect entries to process
|
||||
if args.entry:
|
||||
entry_path = Path(args.entry)
|
||||
if not entry_path.exists():
|
||||
entry_path = ENTRIES_DIR / args.entry
|
||||
if not entry_path.exists():
|
||||
print(f"ERROR: Entry not found: {args.entry}")
|
||||
sys.exit(1)
|
||||
entries = [entry_path]
|
||||
else:
|
||||
entries = sorted(ENTRIES_DIR.glob("*.yaml"))
|
||||
|
||||
if args.limit:
|
||||
entries = entries[:args.limit]
|
||||
|
||||
print(f"Entries to process: {len(entries)}")
|
||||
print("=" * 60)
|
||||
|
||||
# Process entries
|
||||
success_count = 0
|
||||
skip_count = 0
|
||||
error_count = 0
|
||||
|
||||
for entry_path in entries:
|
||||
try:
|
||||
result = process_entry(entry_path, YOUTUBE_API_KEY, args.dry_run)
|
||||
if result:
|
||||
success_count += 1
|
||||
else:
|
||||
skip_count += 1
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
error_count += 1
|
||||
|
||||
# Rate limiting
|
||||
import time
|
||||
time.sleep(REQUEST_DELAY)
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("Summary")
|
||||
print("=" * 60)
|
||||
print(f"Entries processed: {len(entries)}")
|
||||
print(f"Successfully enriched: {success_count}")
|
||||
print(f"Skipped (no YouTube / already done): {skip_count}")
|
||||
print(f"Errors: {error_count}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -288,6 +288,7 @@ def extract_institution_data(entry_data: dict) -> dict | None:
|
|||
result['ghcid'] = {
|
||||
'current': ghcid_data.get('ghcid_current', ''),
|
||||
'uuid': ghcid_data.get('ghcid_uuid', ''),
|
||||
'numeric': ghcid_data.get('ghcid_numeric', ''),
|
||||
}
|
||||
|
||||
# Add standardized identifiers
|
||||
|
|
|
|||
295
scripts/generate_custodian_type_enums.py
Normal file
295
scripts/generate_custodian_type_enums.py
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate LinkML enum files for CustodianType hyponyms from Wikidata data.
|
||||
|
||||
This script reads the curated hyponyms data and generates LinkML enum YAML files
|
||||
for each GLAMORCUBESFIXPHDNT category (Museum, Archive, Library, Gallery, etc.)
|
||||
|
||||
Usage:
|
||||
python3 scripts/generate_custodian_type_enums.py
|
||||
|
||||
Output:
|
||||
Creates/updates files in schemas/20251121/linkml/modules/enums/
|
||||
"""
|
||||
|
||||
import yaml
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from collections import defaultdict
|
||||
|
||||
# Type code to enum name mapping
|
||||
TYPE_CONFIG = {
|
||||
'M': {
|
||||
'enum_name': 'MuseumTypeEnum',
|
||||
'title': 'Museum Type Classification',
|
||||
'base_wikidata': 'Q33506',
|
||||
'description': 'Types of museums extracted from Wikidata hyponyms of Q33506 (museum).',
|
||||
},
|
||||
'A': {
|
||||
'enum_name': 'ArchiveTypeEnum',
|
||||
'title': 'Archive Type Classification',
|
||||
'base_wikidata': 'Q166118',
|
||||
'description': 'Types of archives extracted from Wikidata hyponyms of Q166118 (archive).',
|
||||
},
|
||||
'L': {
|
||||
'enum_name': 'LibraryTypeEnum',
|
||||
'title': 'Library Type Classification',
|
||||
'base_wikidata': 'Q7075',
|
||||
'description': 'Types of libraries extracted from Wikidata hyponyms of Q7075 (library).',
|
||||
},
|
||||
'G': {
|
||||
'enum_name': 'GalleryTypeEnum',
|
||||
'title': 'Gallery Type Classification',
|
||||
'base_wikidata': 'Q1007870',
|
||||
'description': 'Types of galleries extracted from Wikidata hyponyms of Q1007870 (art gallery).',
|
||||
},
|
||||
'B': {
|
||||
'enum_name': 'BioCustodianTypeEnum',
|
||||
'title': 'Bio Custodian Type Classification',
|
||||
'base_wikidata': 'Q473972',
|
||||
'description': 'Types of botanical gardens, zoos, and living collections from Wikidata.',
|
||||
},
|
||||
'O': {
|
||||
'enum_name': 'OfficialInstitutionTypeEnum',
|
||||
'title': 'Official Institution Type Classification',
|
||||
'base_wikidata': 'Q895526',
|
||||
'description': 'Types of official/government heritage institutions from Wikidata.',
|
||||
},
|
||||
'R': {
|
||||
'enum_name': 'ResearchCenterTypeEnum',
|
||||
'title': 'Research Center Type Classification',
|
||||
'base_wikidata': 'Q136410232',
|
||||
'description': 'Types of research organizations and documentation centers from Wikidata.',
|
||||
},
|
||||
'C': {
|
||||
'enum_name': 'CommercialCustodianTypeEnum',
|
||||
'title': 'Commercial Custodian Type Classification',
|
||||
'base_wikidata': 'Q21980538',
|
||||
'description': 'Types of commercial/corporate heritage custodians from Wikidata.',
|
||||
},
|
||||
'E': {
|
||||
'enum_name': 'EducationProviderTypeEnum',
|
||||
'title': 'Education Provider Type Classification',
|
||||
'base_wikidata': 'Q5341295',
|
||||
'description': 'Types of educational institutions with heritage collections from Wikidata.',
|
||||
},
|
||||
'S': {
|
||||
'enum_name': 'HeritageSocietyTypeEnum',
|
||||
'title': 'Heritage Society Type Classification',
|
||||
'base_wikidata': 'Q5774403',
|
||||
'description': 'Types of heritage societies and collecting organizations from Wikidata.',
|
||||
},
|
||||
'H': {
|
||||
'enum_name': 'HolySiteTypeEnum',
|
||||
'title': 'Holy/Sacred Site Type Classification',
|
||||
'base_wikidata': 'Q4588528',
|
||||
'description': 'Types of religious sites with heritage collections from Wikidata.',
|
||||
},
|
||||
'I': {
|
||||
'enum_name': 'IntangibleHeritageTypeEnum',
|
||||
'title': 'Intangible Heritage Group Type Classification',
|
||||
'base_wikidata': 'Q105815710',
|
||||
'description': 'Types of organizations preserving intangible cultural heritage from Wikidata.',
|
||||
},
|
||||
'N': {
|
||||
'enum_name': 'NonProfitCustodianTypeEnum',
|
||||
'title': 'Non-Profit Custodian Type Classification',
|
||||
'base_wikidata': 'Q163740',
|
||||
'description': 'Types of non-profit heritage organizations from Wikidata.',
|
||||
},
|
||||
'D': {
|
||||
'enum_name': 'DigitalPlatformTypeEnum',
|
||||
'title': 'Digital Platform Type Classification',
|
||||
'base_wikidata': 'Q28017710',
|
||||
'description': 'Types of digital heritage platforms from Wikidata.',
|
||||
},
|
||||
'P': {
|
||||
'enum_name': 'PersonalCollectionTypeEnum',
|
||||
'title': 'Personal Collection Type Classification',
|
||||
'base_wikidata': 'Q134886297',
|
||||
'description': 'Types of personal/private heritage collections from Wikidata.',
|
||||
},
|
||||
'T': {
|
||||
'enum_name': 'TasteScentHeritageTypeEnum',
|
||||
'title': 'Taste/Scent Heritage Type Classification',
|
||||
'base_wikidata': None,
|
||||
'description': 'Types of culinary and olfactory heritage custodians.',
|
||||
},
|
||||
}
|
||||
|
||||
def sanitize_enum_value(label: str) -> str:
|
||||
"""Convert a label to a valid enum value name (UPPER_SNAKE_CASE)."""
|
||||
# Remove special characters, keep alphanumeric and spaces
|
||||
clean = re.sub(r'[^\w\s-]', '', label)
|
||||
# Replace spaces and hyphens with underscores
|
||||
clean = re.sub(r'[\s-]+', '_', clean)
|
||||
# Convert to uppercase
|
||||
clean = clean.upper()
|
||||
# Remove leading/trailing underscores
|
||||
clean = clean.strip('_')
|
||||
# Ensure it starts with a letter
|
||||
if clean and not clean[0].isalpha():
|
||||
clean = 'TYPE_' + clean
|
||||
return clean or 'UNKNOWN'
|
||||
|
||||
def load_hyponyms(filepath: Path) -> list:
|
||||
"""Load hyponyms from YAML file efficiently."""
|
||||
print(f"Loading hyponyms from {filepath}...")
|
||||
|
||||
# Use stream loading for large files
|
||||
hyponyms = []
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
return data.get('hypernym', [])
|
||||
|
||||
def group_by_type(hyponyms: list) -> dict:
|
||||
"""Group hyponyms by their type codes."""
|
||||
by_type = defaultdict(list)
|
||||
|
||||
for item in hyponyms:
|
||||
curated = item.get('curated', {})
|
||||
wikidata = item.get('wikidata', {})
|
||||
types = curated.get('type', [])
|
||||
|
||||
# Get the Wikidata ID and labels
|
||||
qid = curated.get('label', wikidata.get('id', ''))
|
||||
labels = wikidata.get('labels', {})
|
||||
descriptions = wikidata.get('descriptions', {})
|
||||
|
||||
# Get English label (or first available)
|
||||
en_label = labels.get('en', labels.get('nl', list(labels.values())[0] if labels else qid))
|
||||
en_desc = descriptions.get('en', descriptions.get('nl', ''))
|
||||
|
||||
for t in types:
|
||||
by_type[t].append({
|
||||
'qid': qid,
|
||||
'label': en_label,
|
||||
'description': en_desc,
|
||||
'labels': labels,
|
||||
})
|
||||
|
||||
return by_type
|
||||
|
||||
def generate_enum_yaml(type_code: str, items: list, config: dict, output_dir: Path) -> Path:
|
||||
"""Generate a LinkML enum YAML file for a custodian type."""
|
||||
|
||||
enum_name = config['enum_name']
|
||||
output_file = output_dir / f"{enum_name}.yaml"
|
||||
|
||||
# Build permissible values
|
||||
permissible_values = {}
|
||||
seen_names = set()
|
||||
|
||||
for item in sorted(items, key=lambda x: x['label'].lower()):
|
||||
qid = item['qid']
|
||||
label = item['label']
|
||||
description = item['description']
|
||||
|
||||
# Generate enum value name
|
||||
value_name = sanitize_enum_value(label)
|
||||
|
||||
# Handle duplicates by appending QID
|
||||
original_name = value_name
|
||||
counter = 1
|
||||
while value_name in seen_names:
|
||||
value_name = f"{original_name}_{counter}"
|
||||
counter += 1
|
||||
seen_names.add(value_name)
|
||||
|
||||
# Build value definition
|
||||
value_def = {
|
||||
'description': description if description else f"{label} ({qid})",
|
||||
'meaning': f"wikidata:{qid}",
|
||||
}
|
||||
|
||||
# Add multilingual labels as comments if available
|
||||
if item.get('labels'):
|
||||
other_labels = []
|
||||
for lang, lbl in sorted(item['labels'].items()):
|
||||
if lang != 'en' and lang in ['nl', 'de', 'fr', 'es', 'it']:
|
||||
other_labels.append(f"{lbl} ({lang})")
|
||||
if other_labels:
|
||||
value_def['comments'] = other_labels[:3] # Limit to 3 translations
|
||||
|
||||
permissible_values[value_name] = value_def
|
||||
|
||||
# Build the full YAML structure
|
||||
yaml_content = {
|
||||
'id': f"https://nde.nl/ontology/hc/enum/{enum_name}",
|
||||
'name': enum_name,
|
||||
'title': config['title'],
|
||||
'description': f"{config['description']}\n\nGenerated: {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}\nTotal values: {len(permissible_values)}",
|
||||
'enums': {
|
||||
enum_name: {
|
||||
'permissible_values': permissible_values
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Write YAML file
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
# Custom YAML dump for better formatting
|
||||
yaml.dump(yaml_content, f,
|
||||
default_flow_style=False,
|
||||
allow_unicode=True,
|
||||
sort_keys=False,
|
||||
width=100)
|
||||
|
||||
return output_file
|
||||
|
||||
def main():
|
||||
# Paths
|
||||
project_root = Path(__file__).parent.parent
|
||||
hyponyms_file = project_root / 'data/wikidata/GLAMORCUBEPSXHFN/hyponyms_curated_full.yaml'
|
||||
output_dir = project_root / 'schemas/20251121/linkml/modules/enums'
|
||||
|
||||
if not hyponyms_file.exists():
|
||||
print(f"❌ Hyponyms file not found: {hyponyms_file}")
|
||||
return 1
|
||||
|
||||
# Load and group hyponyms
|
||||
hyponyms = load_hyponyms(hyponyms_file)
|
||||
print(f"✅ Loaded {len(hyponyms)} hyponyms")
|
||||
|
||||
by_type = group_by_type(hyponyms)
|
||||
print(f"✅ Grouped into {len(by_type)} type categories")
|
||||
|
||||
# Generate enum files for each type
|
||||
generated = []
|
||||
skipped = []
|
||||
|
||||
for type_code, config in TYPE_CONFIG.items():
|
||||
items = by_type.get(type_code, [])
|
||||
|
||||
if not items:
|
||||
skipped.append(f"{type_code} ({config['enum_name']}): no items")
|
||||
continue
|
||||
|
||||
# Skip Feature type (already exists as FeatureTypeEnum)
|
||||
if type_code == 'F':
|
||||
skipped.append(f"F (FeatureTypeEnum): already exists with {len(items)} items")
|
||||
continue
|
||||
|
||||
output_file = generate_enum_yaml(type_code, items, config, output_dir)
|
||||
generated.append(f"{type_code} ({config['enum_name']}): {len(items)} values → {output_file.name}")
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*60)
|
||||
print("GENERATION SUMMARY")
|
||||
print("="*60)
|
||||
print(f"\n✅ Generated {len(generated)} enum files:")
|
||||
for g in generated:
|
||||
print(f" {g}")
|
||||
|
||||
if skipped:
|
||||
print(f"\n⏭️ Skipped {len(skipped)} types:")
|
||||
for s in skipped:
|
||||
print(f" {s}")
|
||||
|
||||
print("\n✅ Done!")
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
||||
291
scripts/generate_mermaid_with_instances.py
Normal file
291
scripts/generate_mermaid_with_instances.py
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate Mermaid ER diagrams with instance data from LinkML schemas.
|
||||
|
||||
This script extends the standard Mermaid generation to include:
|
||||
1. All classes and their relationships
|
||||
2. Enum values (from LinkML schema)
|
||||
3. Instance data (from instances/enums/*.yaml) as annotations
|
||||
|
||||
The instance data provides semantically meaningful "allowed values" for
|
||||
CustodianType classes like MuseumType, LibraryType, HeritageSocietyType, etc.
|
||||
|
||||
Usage:
|
||||
python3 scripts/generate_mermaid_with_instances.py
|
||||
|
||||
Output:
|
||||
frontend/public/data/heritage_custodian_ontology.mmd
|
||||
schemas/20251121/uml/mermaid/complete_schema_with_instances_YYYYMMDD_HHMMSS.mmd
|
||||
"""
|
||||
import sys
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from linkml_runtime.utils.schemaview import SchemaView
|
||||
|
||||
# Configuration
|
||||
SCHEMA_PATH = "schemas/20251121/linkml/01_custodian_name_modular.yaml"
|
||||
INSTANCES_DIR = "schemas/20251121/linkml/instances/enums"
|
||||
OUTPUT_DIR = "schemas/20251121/uml/mermaid"
|
||||
FRONTEND_OUTPUT = "frontend/public/data/heritage_custodian_ontology.mmd"
|
||||
|
||||
# Classes to exclude from diagrams (technical artifacts with no semantic significance)
|
||||
EXCLUDED_CLASSES = {
|
||||
"Container", # LinkML tree_root for validation only, not part of ontology
|
||||
}
|
||||
|
||||
# Maximum number of enum values to show in diagram (for readability)
|
||||
MAX_ENUM_VALUES_IN_DIAGRAM = 10
|
||||
|
||||
# Maximum number of instance values to show (for readability)
|
||||
MAX_INSTANCE_VALUES = 15
|
||||
|
||||
# Mapping from enum names to their instance files
|
||||
ENUM_INSTANCE_FILES = {
|
||||
"CustodianPrimaryTypeEnum": "custodian_primary_type.yaml",
|
||||
"AppellationTypeEnum": "appellation_type.yaml",
|
||||
"OrganizationalChangeEventTypeEnum": "organizational_change_event_type.yaml",
|
||||
"StaffRoleTypeEnum": "staff_role_type.yaml",
|
||||
"OrganizationalUnitTypeEnum": "organizational_unit_type.yaml",
|
||||
"LegalStatusEnum": "legal_status_type.yaml",
|
||||
"PlaceSpecificityEnum": "place_specificity.yaml",
|
||||
"EncompassingBodyTypeEnum": "encompassing_body_type.yaml",
|
||||
"AuxiliaryDigitalPlatformTypeEnum": "auxiliary_digital_platform_type.yaml",
|
||||
"AgentTypeEnum": "agent_type.yaml",
|
||||
"EntityTypeEnum": "entity_type.yaml",
|
||||
"SourceDocumentTypeEnum": "source_document_type.yaml",
|
||||
"ReconstructionActivityTypeEnum": "reconstruction_activity_type.yaml",
|
||||
"WebPortalTypeEnum": "web_portal_type.yaml",
|
||||
"SocialMediaPlatformTypeEnum": "social_media_platform_type.yaml",
|
||||
"RecordsLifecycleStageEnum": "records_lifecycle_stage.yaml",
|
||||
"ArchiveProcessingStatusEnum": "archive_processing_status.yaml",
|
||||
"StorageTypeEnum": "storage_type.yaml",
|
||||
"DigitalPresenceTypeEnum": "digital_presence_type.yaml",
|
||||
"FeatureTypeEnum": "feature_type.yaml",
|
||||
"ProjectStatusEnum": "project_status.yaml",
|
||||
"FinancialStatementTypeEnum": "financial_statement_type.yaml",
|
||||
"StorageConditionStatusEnum": "storage_condition_status.yaml",
|
||||
"AuxiliaryPlaceTypeEnum": "auxiliary_place_type.yaml",
|
||||
"GiftShopTypeEnum": "gift_shop_type.yaml",
|
||||
"FundingRequirementTypeEnum": "funding_requirement_type.yaml",
|
||||
"OrganizationBranchTypeEnum": "organization_branch_type.yaml",
|
||||
}
|
||||
|
||||
|
||||
def load_instance_data(instances_dir: Path) -> dict:
|
||||
"""Load all instance data from YAML files."""
|
||||
instance_data = {}
|
||||
|
||||
for enum_name, filename in ENUM_INSTANCE_FILES.items():
|
||||
filepath = instances_dir / filename
|
||||
if filepath.exists():
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = yaml.safe_load(f)
|
||||
if data and 'instances' in data:
|
||||
# Extract value names and their English labels
|
||||
values = []
|
||||
for instance in data['instances']:
|
||||
value = instance.get('value', '')
|
||||
code = instance.get('code', '')
|
||||
|
||||
# Get English label if available
|
||||
pref_label = instance.get('skos:prefLabel', {})
|
||||
en_label = pref_label.get('en', '') if isinstance(pref_label, dict) else ''
|
||||
|
||||
# Get Wikidata entity
|
||||
wikidata = instance.get('wikidata', {})
|
||||
qid = wikidata.get('entity', '') if isinstance(wikidata, dict) else ''
|
||||
|
||||
values.append({
|
||||
'value': value,
|
||||
'code': code,
|
||||
'label': en_label,
|
||||
'wikidata': qid
|
||||
})
|
||||
|
||||
instance_data[enum_name] = {
|
||||
'name': data.get('name', enum_name),
|
||||
'description': data.get('description', ''),
|
||||
'values': values
|
||||
}
|
||||
print(f" ✓ Loaded {len(values)} instances from {filename}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f" ⚠ Warning: Could not load {filename}: {e}", file=sys.stderr)
|
||||
|
||||
return instance_data
|
||||
|
||||
|
||||
def generate_mermaid_with_instances(sv: SchemaView, instance_data: dict, include_enums: bool = True) -> str:
|
||||
"""
|
||||
Generate Mermaid ER diagram with instance data annotations.
|
||||
"""
|
||||
lines = ["```mermaid"]
|
||||
lines.append("erDiagram")
|
||||
lines.append("")
|
||||
lines.append(" %% Heritage Custodian Ontology - Complete Schema with Instance Data")
|
||||
lines.append(f" %% Generated: {datetime.now().isoformat()}")
|
||||
lines.append(f" %% Schema: {sv.schema.name}")
|
||||
lines.append("")
|
||||
|
||||
# Get all classes except excluded ones
|
||||
all_classes = [c for c in sv.all_classes() if c not in EXCLUDED_CLASSES]
|
||||
|
||||
# Get all enums
|
||||
all_enums = list(sv.all_enums()) if include_enums else []
|
||||
|
||||
# Generate class entities
|
||||
for class_name in all_classes:
|
||||
cls = sv.get_class(class_name)
|
||||
|
||||
lines.append(f"{class_name} {{")
|
||||
|
||||
# Add ALL attributes/slots
|
||||
for slot_name in sv.class_slots(class_name):
|
||||
slot = sv.induced_slot(slot_name, class_name)
|
||||
|
||||
if slot:
|
||||
slot_range = slot.range if slot.range else "string"
|
||||
|
||||
# Skip excluded classes only
|
||||
if slot_range in EXCLUDED_CLASSES:
|
||||
continue
|
||||
|
||||
# Format: type attribute_name
|
||||
multivalued_marker = "List" if slot.multivalued else ""
|
||||
required_marker = " PK" if slot.required else ""
|
||||
lines.append(f" {slot_range}{multivalued_marker} {slot_name}{required_marker}")
|
||||
|
||||
lines.append("}")
|
||||
|
||||
# Generate enum entities with instance data enrichment
|
||||
if include_enums and all_enums:
|
||||
lines.append("")
|
||||
lines.append(" %% Enumerations with Instance Data")
|
||||
for enum_name in all_enums:
|
||||
enum_def = sv.get_enum(enum_name)
|
||||
if enum_def and enum_def.permissible_values:
|
||||
lines.append(f"{enum_name} {{")
|
||||
lines.append(" string enum_type PK")
|
||||
|
||||
# Check if we have instance data for this enum
|
||||
if enum_name in instance_data:
|
||||
inst_data = instance_data[enum_name]
|
||||
values = inst_data['values']
|
||||
|
||||
# Show values with their labels and Wikidata IDs
|
||||
for i, val_info in enumerate(values[:MAX_INSTANCE_VALUES]):
|
||||
value = val_info['value']
|
||||
code = val_info.get('code', '')
|
||||
label = val_info.get('label', '')
|
||||
qid = val_info.get('wikidata', '')
|
||||
|
||||
# Build annotation string
|
||||
annotation = f" string {value}"
|
||||
# Add comment with code, label, and Wikidata ID
|
||||
# Note: Mermaid ER diagrams don't support comments in entities
|
||||
# but we include the code for context
|
||||
if code:
|
||||
annotation = f" string {value}_{code}"
|
||||
lines.append(annotation)
|
||||
|
||||
if len(values) > MAX_INSTANCE_VALUES:
|
||||
remaining = len(values) - MAX_INSTANCE_VALUES
|
||||
lines.append(f" string _and_{remaining}_more")
|
||||
else:
|
||||
# Fall back to schema enum values
|
||||
values = list(enum_def.permissible_values.keys())
|
||||
for i, value_name in enumerate(values[:MAX_ENUM_VALUES_IN_DIAGRAM]):
|
||||
lines.append(f" string {value_name}")
|
||||
|
||||
if len(values) > MAX_ENUM_VALUES_IN_DIAGRAM:
|
||||
remaining = len(values) - MAX_ENUM_VALUES_IN_DIAGRAM
|
||||
lines.append(f" string _and_{remaining}_more")
|
||||
|
||||
lines.append("}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Generate relationships
|
||||
for class_name in all_classes:
|
||||
cls = sv.get_class(class_name)
|
||||
|
||||
# Inheritance relationships
|
||||
if cls.is_a and cls.is_a not in EXCLUDED_CLASSES:
|
||||
lines.append(f'{class_name} ||--|| {cls.is_a} : "inherits"')
|
||||
|
||||
# Association relationships
|
||||
for slot_name in sv.class_slots(class_name):
|
||||
slot = sv.induced_slot(slot_name, class_name)
|
||||
|
||||
if slot and slot.range:
|
||||
# Check if range is a class
|
||||
if slot.range in all_classes:
|
||||
if slot.multivalued:
|
||||
cardinality = "||--}|" if slot.required else "||--}o"
|
||||
else:
|
||||
cardinality = "||--||" if slot.required else "||--|o"
|
||||
|
||||
lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"')
|
||||
|
||||
# Check if range is an enum
|
||||
elif include_enums and slot.range in all_enums:
|
||||
cardinality = "||--}o" if slot.multivalued else "||--|o"
|
||||
lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"')
|
||||
|
||||
lines.append("")
|
||||
lines.append("```")
|
||||
lines.append("")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
print("=" * 60, file=sys.stderr)
|
||||
print("Mermaid ER Diagram Generator with Instance Data", file=sys.stderr)
|
||||
print("=" * 60, file=sys.stderr)
|
||||
|
||||
# Load schema
|
||||
print(f"\nLoading schema: {SCHEMA_PATH}", file=sys.stderr)
|
||||
sv = SchemaView(SCHEMA_PATH)
|
||||
print(f"✓ Loaded schema: {sv.schema.name}", file=sys.stderr)
|
||||
print(f" Classes: {len(list(sv.all_classes()))}", file=sys.stderr)
|
||||
print(f" Enums: {len(list(sv.all_enums()))}", file=sys.stderr)
|
||||
|
||||
# Load instance data
|
||||
instances_dir = Path(INSTANCES_DIR)
|
||||
print(f"\nLoading instance data from: {instances_dir}", file=sys.stderr)
|
||||
instance_data = load_instance_data(instances_dir)
|
||||
print(f"✓ Loaded {len(instance_data)} enum instance files", file=sys.stderr)
|
||||
|
||||
# Generate Mermaid
|
||||
print("\nGenerating Mermaid ER diagram...", file=sys.stderr)
|
||||
mermaid = generate_mermaid_with_instances(sv, instance_data)
|
||||
|
||||
# Generate timestamp
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# Ensure output directories exist
|
||||
output_dir = Path(OUTPUT_DIR)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
frontend_path = Path(FRONTEND_OUTPUT)
|
||||
frontend_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write to schemas directory (timestamped)
|
||||
schema_output = output_dir / f"complete_schema_with_instances_{timestamp}.mmd"
|
||||
schema_output.write_text(mermaid)
|
||||
print(f"\n✓ Generated: {schema_output}", file=sys.stderr)
|
||||
print(f" Size: {len(mermaid)} bytes", file=sys.stderr)
|
||||
|
||||
# Write to frontend directory (overwrite)
|
||||
frontend_path.write_text(mermaid)
|
||||
print(f"✓ Updated frontend: {frontend_path}", file=sys.stderr)
|
||||
|
||||
print("\n" + "=" * 60, file=sys.stderr)
|
||||
print("Done! The UML diagram now includes instance data.", file=sys.stderr)
|
||||
print("=" * 60, file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Reference in a new issue