diff --git a/data/nde/enriched/entries/0000_Q22246632.yaml b/data/nde/enriched/entries/0000_Q22246632.yaml index a4d6938ed0..aa6d9f5976 100644 --- a/data/nde/enriched/entries/0000_Q22246632.yaml +++ b/data/nde/enriched/entries/0000_Q22246632.yaml @@ -763,6 +763,19 @@ provenance: - rating - reviews - opening_hours + youtube: + - source_type: youtube_data_api + fetch_timestamp: '2025-12-01T15:49:04.188036+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + channel_id: null + claims_extracted: + - channel_info + - subscriber_count + - video_count + - view_count + - recent_videos + - video_comments + - video_transcripts data_tier_summary: TIER_1_AUTHORITATIVE: - original_entry (NDE CSV) @@ -1204,3 +1217,15 @@ custodian_name: selection_method: priority_ranking selection_priority: 70 extraction_timestamp: '2025-12-01T12:35:22.923802+00:00' +youtube_enrichment: + source_url: https://www.youtube.com/channel/UCaONHfdTkBYYpJsl0eqJ4zw + fetch_timestamp: '2025-12-01T15:49:04.188036+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + api_version: v3 + identifier_type: channel_id + identifier_value: UCaONHfdTkBYYpJsl0eqJ4z + channel: + error: 'Channel not found: UCaONHfdTkBYYpJsl0eqJ4z' + videos: [] + videos_count: 0 + status: SUCCESS diff --git a/data/nde/enriched/entries/0001_Q2679819.yaml b/data/nde/enriched/entries/0001_Q2679819.yaml index fd6083c649..5e17a8dc52 100644 --- a/data/nde/enriched/entries/0001_Q2679819.yaml +++ b/data/nde/enriched/entries/0001_Q2679819.yaml @@ -1001,3 +1001,20 @@ custodian_name: selection_method: priority_ranking selection_priority: 60 extraction_timestamp: '2025-12-01T12:35:23.014695+00:00' +digital_platforms: +- platform_name: Hunebedcentrum + platform_url: http://www.hunebedcentrum.eu/ + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q2679819 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.269100+00:00' +- platform_name: Hunebedcentrum Twitter/X + platform_url: https://twitter.com/hunebedcentrum + platform_type: SOCIAL_MEDIA_TWITTER + provenance: + source_type: wikidata_p2002 + wikidata_id: Q2679819 + data_tier: TIER_2_VERIFIED diff --git a/data/nde/enriched/entries/0002_Q1978308.yaml b/data/nde/enriched/entries/0002_Q1978308.yaml index d68850ec6e..499a30aaa5 100644 --- a/data/nde/enriched/entries/0002_Q1978308.yaml +++ b/data/nde/enriched/entries/0002_Q1978308.yaml @@ -446,6 +446,19 @@ provenance: - rating - reviews - opening_hours + youtube: + - source_type: youtube_data_api + fetch_timestamp: '2025-12-01T15:50:48.466418+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + channel_id: UCBUSzZPNjS28NcLdXROqlFA + claims_extracted: + - channel_info + - subscriber_count + - video_count + - view_count + - recent_videos + - video_comments + - video_transcripts data_tier_summary: TIER_1_AUTHORITATIVE: - original_entry (NDE CSV) @@ -827,3 +840,584 @@ custodian_name: selection_method: priority_ranking selection_priority: 60 extraction_timestamp: '2025-12-01T12:35:23.086858+00:00' +youtube_enrichment: + source_url: https://www.youtube.com/drentsarchief + fetch_timestamp: '2025-12-01T15:50:48.466418+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + api_version: v3 + identifier_type: custom_url + identifier_value: drentsarchief + channel: + channel_id: UCBUSzZPNjS28NcLdXROqlFA + channel_url: https://www.youtube.com/channel/UCBUSzZPNjS28NcLdXROqlFA + title: Drents Archief + description: 'Het Drents Archief verzamelt en conserveert historische beeld- en + geluidsdragers en maakt de inhoud voor een breed publiek toegankelijk. + + + Filmmateriaal is kwetsbaar en vernietigt op den duur zichzelf. Oude films vervagen, + verkleuren en verbrokkelen. Het materiaal is gevoelig voor schommelingen in + temperatuur en luchtvochtigheid. Daarom is het belangrijk dat film wordt bewaard + in een depot onder goede klimaatbeheersing. Conservering is dus echt noodzakelijk + en dat doen wij als Drents Archief. + + + Het Drents Archief zet de oude films en geluidsbanden over op moderne digitale + dragers. Het originele materiaal kan op die manier onder de beste omstandigheden + in het depot bewaard blijven.' + custom_url: '@drentsarchief' + published_at: '2009-01-23T15:59:56Z' + country: NL + default_language: null + thumbnail_url: https://yt3.ggpht.com/hK5VKnDKf-39fO6pFvHQq-1QxA8Cn683j_lZtcSxOFjCcEGvBJGOSQb7iRBxWvg53Unyip3i=s800-c-k-c0x00ffffff-no-rj + banner_url: https://yt3.googleusercontent.com/jRWO4i0_0T_kOmjb8n-UPW-76kdT_9iKzxVMAxjKBS6j8GlbsmSaSCGhSLALVIT-UatfqpgyIA + subscriber_count: 2980 + video_count: 835 + view_count: 2608160 + subscriber_count_hidden: false + uploads_playlist_id: UUBUSzZPNjS28NcLdXROqlFA + videos: + - video_id: GptwvdUuLzA + video_url: https://www.youtube.com/watch?v=GptwvdUuLzA + title: 4 - Het vrouwencafe + description: '"Weet uw moeder wel dat u zo''n duur huis heeft gekocht?" Ook Drentse + vrouwen werden steeds zelfstandiger, ze kregen fulltimebanen – soms zelfs als + kostwinner, en er ontstonden speciale vrouwencafés, zoals op maandagavonden + in Het Keldertje in Hoogeveen. Niet iedereen was blij met deze ontwikkelingen. + Er werd vreemd gekeken naar vrouwen die zelfstandig een huis kochten. En ook + samenkomsten waarin vrouwen spraken over alles wat hen bezighield, waren voor + veel mannen onbegrijpelijk.  ' + published_at: '2025-11-13T14:52:15Z' + duration: PT47M48S + definition: hd + caption_available: false + view_count: 3 + like_count: 0 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/GptwvdUuLzA/hqdefault.jpg + default_language: nl + default_audio_language: nl + comments: [] + comments_fetched: 0 + - video_id: Iat_KWNamiE + video_url: https://www.youtube.com/watch?v=Iat_KWNamiE + title: 1 - Maandag is wasdag + description: ‘Maandag is wasdag in Vries, op donderdag gaan de vrouwen naar de + markt en vrijdag wassen ze de ramen. Een buurvrouw die 's ochtends de deur uit + gaat, wordt daarop aangekeken. En ook wie voor de middag in de tuin werkt, pleegt + inbreuk op een ongeschreven regel.’ We gaan terug naar het Drenthe van de jaren + 70. Onder aanvoering van Dolle Mina kwam in die tijd de Randstad de tweede feministische + golf op gang. Wat bracht die teweeg in leven van de Drentse (plattelands)vrouw?  +   + published_at: '2025-11-13T14:51:59Z' + duration: PT38M27S + definition: hd + caption_available: false + view_count: 2 + like_count: 0 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/Iat_KWNamiE/hqdefault.jpg + default_language: nl + default_audio_language: nl + comments: [] + comments_fetched: 0 + - video_id: BCz9XnZjQUw + video_url: https://www.youtube.com/watch?v=BCz9XnZjQUw + title: 3 - De VOS-cursus + description: '"Die vrouwen hebben zich ook wat in het hoofd gehaald," zeiden de + boeren in Zeijen schamper toen vrouwen zich inschreven voor de cursus Engels + van de Bond voor Plattelandsvrouwen. In de jaren 70 kwam de VOS-cursus op. VOS + stond voor ‘Vrouwen Oriënteren zich op de Samenleving, maar mannen zeiden vaak + gekscherend ‘vrouwen oriënteren zich op de scheiding. Boerin Dini Iepema uit + Roden deed mee aan de VOS-cursus ''spreken in het openbaar’. Vijftig jaar later + heeft ze haar 5-minutenpraatje dat ze hierv' + published_at: '2025-11-13T14:52:09Z' + duration: PT49M51S + definition: hd + caption_available: false + view_count: 1 + like_count: 0 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/BCz9XnZjQUw/hqdefault.jpg + default_language: nl + default_audio_language: nl + comments: [] + comments_fetched: 0 + - video_id: MAEAxTDryT4 + video_url: https://www.youtube.com/watch?v=MAEAxTDryT4 + title: 2 - Plattelandsvrouwen + description: '“We zijn overal geweest, ik vind dus: je kunt overal over meepraten.” + Drenthe kende van oudsher veel boerinnen. Uit onderzoek uit 1974 bleek dat 96% + van de Drentse boerinnen meehielpen op de boerderij: hooien, kalveren voeren + en melken. Steeds meer van hen sloten zich ook aan bij de plattelandsvrouwen + en kregen daar bestuurlijke functie, Zoals de nu 83-jarige Ans Stevens: zij + kreeg zo de kans zich ook op andere vlakken te ontwikkelen. ' + published_at: '2025-11-13T14:52:05Z' + duration: PT52M31S + definition: hd + caption_available: false + view_count: 3 + like_count: 0 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/MAEAxTDryT4/hqdefault.jpg + default_language: nl + default_audio_language: nl + comments: [] + comments_fetched: 0 + - video_id: O0AU7Wkk16w + video_url: https://www.youtube.com/watch?v=O0AU7Wkk16w + title: De W van... Wilhelmina Ziekenhuis + description: "In de jaren 30 maakte directeur-geneesheer Mook deze film over het\ + \ reilen en zeilen in het Wilhelmina Ziekenhuis in Assen. We zien artsen en\ + \ zusters aan het werk, patiënten op het terras en alle verpleegsters worden\ + \ aan de kijker voorgesteld. Het ziekenhuis zat in die tijd aan de Oosterhoutstraat\ + \ 11. \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit\ + \ onze rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs\ + \ tot Zuidlaren. Laat je op zaterdagen verrassen door bewe" + published_at: '2025-11-08T16:00:36Z' + duration: PT2M13S + definition: hd + caption_available: false + view_count: 221 + like_count: 8 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/O0AU7Wkk16w/hqdefault.jpg + default_language: nl + default_audio_language: nl + comments: [] + comments_fetched: 0 + - video_id: C6sYdB9jNeA + video_url: https://www.youtube.com/watch?v=C6sYdB9jNeA + title: De V van... vlindertuin + description: "Wie heeft de vlindertuin in de ‘oude’ dierentuin niet bezocht? Met\ + \ een schoolreisje of een familiebezoek? Met deze beelden uit de jaren 90 waan\ + \ je je weer even tussen de gekleurde vlinders in de Emmer kas. Deel jouw herinneringen\ + \ aan de vlindertuin van het Noorderdierenpark in de comments! \n\nIn 2025\ + \ schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke filmcollectie\ + \ kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren. Laat je op\ + \ zaterdagen verrassen door bewegende Dren" + published_at: '2025-10-25T15:00:18Z' + duration: PT1M51S + definition: hd + caption_available: false + view_count: 52 + like_count: 3 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/C6sYdB9jNeA/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: 0KAS61XTNx4 + video_url: https://www.youtube.com/watch?v=0KAS61XTNx4 + title: De U van... Uffelte + description: "Deze week reizen we af naar Uffelte! Dat deed filmmaker Johan Adolfs\ + \ in 1950 namelijk ook. Hij schoot er toen een dorpsfilm en legde de dorpsbewoners\ + \ en het dorp zelf vast op beeld. Adolfs schoof onder meer aan bij de dames\ + \ van de Plattelandsvrouwen. Herken jij nog iemand op deze beelden? \n\nIn 2025\ + \ schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke filmcollectie\ + \ kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren. Laat je op\ + \ zaterdagen verrassen door bewegende Drents" + published_at: '2025-10-11T15:00:02Z' + duration: PT1M48S + definition: hd + caption_available: false + view_count: 351 + like_count: 5 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/0KAS61XTNx4/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: lgOJbYBkx28 + video_url: https://www.youtube.com/watch?v=lgOJbYBkx28 + title: De T van... turf + description: "Lange tijd werd turf gestoken met de hand, maar ook deze sector\ + \ werd gemechaniseerd. In deze film zie je onder meer een machine die over de\ + \ gedroogde turven rijdt. Via smalspoor en vrachtwagen komen de turven bij de\ + \ fabriek van Purit in Klazienaveen terecht. \n\nIn 2025 schotelen we jullie\ + \ het ABC van Drenthe op film voor. Uit onze rijke filmcollectie kiezen we elke\ + \ twee weken een letter. Van Adolfs tot Zuidlaren. Laat je op zaterdagen verrassen\ + \ door bewegende Drentse beelden door de jaren heen. V" + published_at: '2025-09-27T15:01:19Z' + duration: PT1M41S + definition: hd + caption_available: false + view_count: 549 + like_count: 5 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/lgOJbYBkx28/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: VrMg8fow2H4 + video_url: https://www.youtube.com/watch?v=VrMg8fow2H4 + title: De R van... rally + description: "In de omgeving van Emmen wordt op 21 juli 1951 de ‘Solex boortoren\ + \ rally’ gehouden. De organisatie is in handen van ‘de eerste Drentse Solex\ + \ club Emmen’. 690 deelnemers verschijnen aan de start en zij doen onder meer\ + \ Coevorden en Schoonebeek aan. De finish is op de Boslaan in Emmen. Hier krijgen\ + \ de deelnemers een plaquette uitgereikt. \n\nIn 2025 schotelen we jullie het\ + \ ABC van Drenthe op film voor. Uit onze rijke filmcollectie kiezen we elke\ + \ twee weken een letter. Van Adolfs tot Zuidlaren. Laat j" + published_at: '2025-08-30T15:00:56Z' + duration: PT1M40S + definition: hd + caption_available: false + view_count: 79 + like_count: 3 + comment_count: 1 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/VrMg8fow2H4/hqdefault.jpg + default_language: nl + default_audio_language: en + - video_id: hxng1OQp8wo + video_url: https://www.youtube.com/watch?v=hxng1OQp8wo + title: De Q van... quiz + description: "Wie wint de scholencompetitie 1995? In de finale staan OBS Tynaarlo\ + \ en OBS De Vlindertuin uit Veenhuizen tegenover elkaar. In het ICO in Assen\ + \ (nu Podium Zuidhaege) vindt de beslissende quiz plaats. Omroep Assen legt\ + \ de scholenstrijd vast. Herken je iemand op deze beelden? Tag hem of haar!\ + \ \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit onze\ + \ rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren.\ + \ Laat je op zaterdagen verrassen door bewegende Dren" + published_at: '2025-08-16T15:01:09Z' + duration: PT2M28S + definition: hd + caption_available: false + view_count: 42 + like_count: 2 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/hxng1OQp8wo/hqdefault.jpg + default_language: nl + default_audio_language: en + - video_id: fcX4Nw20BjI + video_url: https://www.youtube.com/watch?v=fcX4Nw20BjI + title: Café 't Keerpunt in Spijkerboor - deel 2 + description: 'Café ''t Keerpunt in Spijkerboor is waarschijnlijk het oudste nog + bestaande café van Drenthe. Dit jaar bestaat het 275 jaar. Kastelein Ivo Dijkema + dook daarom in de geschiedenis van zijn kroeg. "We zijn op zoek naar verhalen + uit het verleden en ik wil meer kunnen vertellen over de eerste honderd jaar + dat het café hier stond." + + + In deze video neemt Dijkema ons mee naar zijn café in Spijkerboor. Hij laat + zie wat er nog te zien is van de rijke geschiedenis van zijn dorpskroeg. Er + zit onder een ijzer' + published_at: '2025-07-21T10:00:56Z' + duration: PT3M33S + definition: hd + caption_available: false + view_count: 172 + like_count: 4 + comment_count: 3 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/fcX4Nw20BjI/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: 5ldr6whsj9s + video_url: https://www.youtube.com/watch?v=5ldr6whsj9s + title: Café 't Keerpunt in Spijkerboor - deel 1 + description: "Café 't Keerpunt in Spijkerboor is waarschijnlijk het oudste nog\ + \ bestaande café van Drenthe. Dit jaar bestaat het 275 jaar. Kastelein Ivo Dijkema\ + \ dook daarom in de geschiedenis van zijn kroeg: \"Het is echt een authentiek\ + \ dorpscafé en de geschiedenis is voor ons heel belangrijk, waarom bestaat het\ + \ als 275 jaar als café?\" \nWat is er over 't Keerpunt te vinden in het Drents\ + \ Archief? Dijkema bekeek onder meer oude kaarten en ontdekte welke schat aan\ + \ informatie je uit notariële aktes kunt halen: \"Als" + published_at: '2025-07-21T10:00:09Z' + duration: PT1M43S + definition: hd + caption_available: false + view_count: 196 + like_count: 4 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/5ldr6whsj9s/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: So5xFyHdMEY + video_url: https://www.youtube.com/watch?v=So5xFyHdMEY + title: Albert, een Drentse loteling naar Moskou + description: Albert, een Drentse loteling naar Moskou vertelt het verhaal van + de 18-jarige Albert uit het Drentse Krakeel, in 1812 ingeloot om te dienen in + het leger van keizer Napoleon. Samen met duizenden andere Nederlandse jongens + marcheert Albert als voetsoldaat naar Moskou, waar de winter het Franse leger + overvalt. Verzwakt door honger, kou en ziekte weten slechts enkele jongens huiswaarts + te keren. Zal Albert Krakeel ooit terugzien? + published_at: '2025-06-25T07:40:30Z' + duration: PT31M56S + definition: hd + caption_available: false + view_count: 222 + like_count: 8 + comment_count: 1 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/So5xFyHdMEY/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: TVV6fuK9px8 + video_url: https://www.youtube.com/watch?v=TVV6fuK9px8 + title: De M van... marathon + description: "De eerste TT-marathon was op 26 juni 1953. In dat jaar liepen de\ + \ deelnemers alleen geen 42, maar 25 kilometer. De route voerde onder meer langs\ + \ Zeijen en Peest. Winnaar werd Janus van der Zanden in een tijd van 1.29.72.\ + \ \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit onze\ + \ rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren.\ + \ Laat je op zaterdagen verrassen door bewegende Drentse beelden door de jaren\ + \ heen. Vandaag is het de beurt aan de…M!\n\nBeel" + published_at: '2025-06-21T15:00:44Z' + duration: PT2M8S + definition: hd + caption_available: false + view_count: 107 + like_count: 2 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/TVV6fuK9px8/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: G6TQ-D7k-f8 + video_url: https://www.youtube.com/watch?v=G6TQ-D7k-f8 + title: De L van... landschap + description: "Dit is toch echt een typisch Drents landschap? Op de heide grazen\ + \ schapen en lammetjes springen vrolijk in het rond. De beelden zijn geschoten\ + \ door Klaas Nijmeijer in 1984. Waar hij dit precies deed, weten we helaas niet.\ + \ Misschien herkent iemand de plek? \n\nIn 2025 schotelen we jullie het ABC\ + \ van Drenthe op film voor. Uit onze rijke filmcollectie kiezen we elke twee\ + \ weken een letter. Van Adolfs tot Zuidlaren. Laat je op zaterdagen verrassen\ + \ door bewegende Drentse beelden door de jaren heen. Van" + published_at: '2025-06-07T15:00:20Z' + duration: PT2M18S + definition: hd + caption_available: false + view_count: 111 + like_count: 3 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/G6TQ-D7k-f8/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: YDj7JtN2YpY + video_url: https://www.youtube.com/watch?v=YDj7JtN2YpY + title: De D van... Dansen + description: "Dansen op de boerenbruiloft! Dat is het thema van de Opa- en Omadagen\ + \ deze voorjaarsvakantie! Op deze filmbeelden, gemaakt in de jaren 30, wordt\ + \ een traditionele boerenbruiloft nagespeeld. Gasten in traditionele kleding\ + \ voeren elkaar boerenjongens. De mannen roken Goudse stenen pijpen en er wordt\ + \ gedanst! \n\nIn 2025 schotelen we jullie het ABC van Drenthe op film voor.\ + \ Uit onze rijke filmcollectie kiezen we elke twee weken een letter. Van Adolfs\ + \ tot Zuidlaren. Laat je op zaterdagen verrassen door" + published_at: '2025-02-15T16:00:36Z' + duration: PT1M18S + definition: hd + caption_available: false + view_count: 96 + like_count: 2 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/YDj7JtN2YpY/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: rLyA6zJpZ6Q + video_url: https://www.youtube.com/watch?v=rLyA6zJpZ6Q + title: De C van... Chauffeur + description: "Chauffeur Job Berends uit Roden gaat met de VUT! Dat betekent dat\ + \ hij vervroegd met pensioen mag. Een bus van de FRAM draagt een spandoek met\ + \ de tekst ‘1948-1981 afscheid na 33 jaar trouwe dienst’. Job Berends krijgt\ + \ felicitaties en cadeaus van reizigers die bij de bushalte op hem staan te\ + \ wachten. De laatste rit van deze buschauffeur is op 20 juli 1981. \n\nIn 2025\ + \ schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke filmcollectie\ + \ kiezen we elke twee weken een letter. Van Adolfs t" + published_at: '2025-02-01T16:00:57Z' + duration: PT1M9S + definition: hd + caption_available: false + view_count: 208 + like_count: 8 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/rLyA6zJpZ6Q/hqdefault.jpg + default_language: nl + default_audio_language: it + - video_id: En5YTDfyog0 + video_url: https://www.youtube.com/watch?v=En5YTDfyog0 + title: De B van... Brand Bellevue + description: "In februari 1986 brak brand uit bij het Asser zalencentrum Bellevue.\ + \ De brandweer klom op het dak om daar de brand te blussen. Burgemeester Masman\ + \ kwam ook langs om de situatie te bekijken. \n\nGing jij in die tijd ook wel\ + \ eens naar Bellevue? Het werd als restaurant geopend in 1882. In het 100-jarig\ + \ bestaan was er van alles te beleven: een bruiloft voor honderden gasten, het\ + \ Kerstvolleybaltoernooi, een grote modeshow of het afscheidsconcert van Cuby\ + \ & The Blizzards. De brand in 1986 bekende niet h" + published_at: '2025-01-18T16:00:10Z' + duration: PT1M35S + definition: hd + caption_available: false + view_count: 161 + like_count: 3 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/En5YTDfyog0/hqdefault.jpg + default_language: nl + default_audio_language: en + - video_id: X76O1WNd0qM + video_url: https://www.youtube.com/watch?v=X76O1WNd0qM + title: De A van... Adolfs + description: 'Filmmaker Johan Adolfs is vooral bekend vanwege de dorpsfilms die + hij in heel Nederland maakte. In 1966 bezocht hij Schoonoord en de omliggende + plaatsen. Hij bracht natuurlijk ook een bezoekje aan Ellert en Brammert! Herken + jij iemand op deze film? + + + In 2025 schotelen we jullie het ABC van Drenthe op film voor. Uit onze rijke + filmcollectie kiezen we elke twee weken een letter. Van Adolfs tot Zuidlaren. + Laat je op zaterdagen verrassen door bewegende Drentse beelden door de jaren + heen. Vandaag is h' + published_at: '2025-01-04T16:00:26Z' + duration: PT3M22S + definition: hd + caption_available: false + view_count: 483 + like_count: 10 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/X76O1WNd0qM/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: 2eyDfn4xH_4 + video_url: https://www.youtube.com/watch?v=2eyDfn4xH_4 + title: 'Van Echten: Klederdracht' + description: 'Grada Eding Askes maakt met veel rust en precisie haar klederdracht + gereed. Ze demonstreert een kanten muts zoals deze in de jaren 20 in Drenthe + werd gedragen. Het is een hele klus om alles netjes te krijgen. Zo moet er worden + gestreken en geregen, maar uiteindelijk is mevrouw Eding Askes klaar om erop + uit te gaan! + + + De in Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst + door bij de filmdienst. Daarna werkte hij als fotograaf en bij bioscopen in + Den Haag. In 1959 keerde hij' + published_at: '2024-12-28T16:00:03Z' + duration: PT3M56S + definition: hd + caption_available: false + view_count: 116 + like_count: 6 + comment_count: 1 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/2eyDfn4xH_4/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: n6rXy2_DF3Q + video_url: https://www.youtube.com/watch?v=n6rXy2_DF3Q + title: 'Van Echten: Vlechten' + description: "In de jaren 70 maakte Frits van Echten voor het Drents Museum een\ + \ serie over Drentse gebruiken, ambachten en beroepen. Deze aflevering gaat\ + \ het over vlechten. Meneer Pronk uit Grolloo maakt van takjes een gevlochten\ + \ bijenkorf. Het is overduidelijk dat hij dit vaker heeft gedaan! \n\nDe in\ + \ Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst door\ + \ bij de filmdienst. Daarna werkte hij als fotograaf en bij bioscopen in Den\ + \ Haag. In 1959 keerde hij terug naar Assen, waar hij een foto" + published_at: '2024-12-21T16:00:41Z' + duration: PT1M48S + definition: hd + caption_available: false + view_count: 118 + like_count: 5 + comment_count: 2 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/n6rXy2_DF3Q/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: 8QQT991huUg + video_url: https://www.youtube.com/watch?v=8QQT991huUg + title: 'Van Echten: Borckerhof' + description: "In de jaren 60 verkeerde boerderij de Borckerhof in Orvelte in een\ + \ vervallen staat. De Saksische boerderij uit halverwege de 19e eeuw moest nodig\ + \ worden gerenoveerd. Filmmaker Frits van Echten legde dit proces vast. Het\ + \ was een grote klus waarbij de hele boerderij werd ontmanteld. Nu zit er een\ + \ groepsaccommodatie in de Borckerhof: je kunt er dus overnachten! \n\nDe in\ + \ Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst door\ + \ bij de filmdienst. Daarna werkte hij als fotograaf en " + published_at: '2024-12-14T16:00:08Z' + duration: PT2M34S + definition: hd + caption_available: false + view_count: 281 + like_count: 2 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/8QQT991huUg/hqdefault.jpg + default_language: nl + default_audio_language: en + - video_id: qRaecnHWCco + video_url: https://www.youtube.com/watch?v=qRaecnHWCco + title: 'Van Echten: Wecken' + description: "Weet jij nog hoe de aloude techniek van wecken gaat? Nee? Dan hebben\ + \ we hier voor jou een cursus wecken uit 1978. Op dit filmpje zie je (waarschijnlijk)\ + \ mevrouw J. Schoonvelde – Kuipers de boontjes die ze heeft geplukt klaarmaken.\ + \ Daarna worden de potten met geweckte bonen in de kelder gezet, bij al het\ + \ andere ingemaakte groente en fruit. De wintervoorraad is aangelegd! \n\n\ + De in Assen geboren Frits van Echten (1932-2016) bracht zijn militaire dienst\ + \ door bij de filmdienst. Daarna werkte hij als " + published_at: '2024-12-07T16:00:43Z' + duration: PT2M19S + definition: hd + caption_available: false + view_count: 209 + like_count: 6 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/qRaecnHWCco/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: ___RXae9ElY + video_url: https://www.youtube.com/watch?v=___RXae9ElY + title: Notariële akten - Archiefonderzoek voor Dummies (aflevering 6) + description: 'Notariële akten zijn een verborgen goudmijn vol verhalen en details + over je voorouders. Ze bieden een blik op het dagelijks leven, op eigendommen, + overeenkomsten en relaties van mensen. Er zijn koopakten, testamenten, huwelijkscontracten. + In Drenthe zijn lang niet alle notariële akten geïndiceerd. Indiceren betekent: + het doorzoekbaar en daarmee beter vindbaar maken van archiefstukken. Maar deze + akten zijn wél gedigitaliseerd. Hoe je ze alsnog kunt vinden op onze website, + legt publieksadviseur Jo' + published_at: '2024-11-27T13:38:59Z' + duration: PT7M7S + definition: hd + caption_available: false + view_count: 160 + like_count: 2 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/___RXae9ElY/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: 2FMLXFVr5SQ + video_url: https://www.youtube.com/watch?v=2FMLXFVr5SQ + title: 'Hiddingh: Demonstratie' + description: "Op 2 juni 1979 wordt het dorp Gasselte overspoeld door 25.000 demonstranten.\ + \ Er dreigt dumping van kernafval in de grote zoutkoepel van Gasselte. Het protest\ + \ in Gasselte was een van de grootste ooit in Drenthe. Wim Hiddingh legde de\ + \ grote demonstratie in zijn woonplaats vast. \n\nWim Hiddingh (1938-2016) was\ + \ in het dagelijks leven onderwijzer, maar zijn grootste hobby was film en fotografie.\ + \ Zo was hij de vaste fotograaf van het Drents maandblad Oeze Volk. Hiddingh\ + \ legde alles vast wat er dagelijk" + published_at: '2024-11-23T16:00:03Z' + duration: PT3M58S + definition: hd + caption_available: false + view_count: 155 + like_count: 3 + comment_count: 2 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/2FMLXFVr5SQ/hqdefault.jpg + default_language: nl + default_audio_language: nl + videos_count: 25 + status: SUCCESS diff --git a/data/nde/enriched/entries/0003_Q1258370.yaml b/data/nde/enriched/entries/0003_Q1258370.yaml index 1c220c0876..721d536816 100644 --- a/data/nde/enriched/entries/0003_Q1258370.yaml +++ b/data/nde/enriched/entries/0003_Q1258370.yaml @@ -1303,6 +1303,19 @@ provenance: - rating - reviews - opening_hours + youtube: + - source_type: youtube_data_api + fetch_timestamp: '2025-12-01T15:49:06.614842+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + channel_id: UCIOV1are9TDnwNrgr6Yr3tQ + claims_extracted: + - channel_info + - subscriber_count + - video_count + - view_count + - recent_videos + - video_comments + - video_transcripts data_tier_summary: TIER_1_AUTHORITATIVE: - original_entry (NDE CSV) @@ -1759,3 +1772,259 @@ custodian_name: selection_method: priority_ranking selection_priority: 100 extraction_timestamp: '2025-12-01T12:35:23.211895+00:00' +youtube_enrichment: + source_url: https://www.youtube.com/user/drentsmuseum + fetch_timestamp: '2025-12-01T15:49:06.614842+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + api_version: v3 + identifier_type: username + identifier_value: drentsmuseum + channel: + channel_id: UCIOV1are9TDnwNrgr6Yr3tQ + channel_url: https://www.youtube.com/channel/UCIOV1are9TDnwNrgr6Yr3tQ + title: drentsmuseum + description: Het Drents Museum is bekend om zijn internationale tentoonstellingen + over archeologie, kunst 1885-1935, hedendaags realisme en zijn zeer enthousiaste + medewerkers. + custom_url: '@drentsmuseum' + published_at: '2008-09-16T10:01:21Z' + country: null + default_language: null + thumbnail_url: https://yt3.ggpht.com/lxnClZMy4vPi3iccoR9S9SaX4vk_JyQng64QfStqUR0YHPDFOK-xuVniSxfjTEzSPbn2GJt_=s800-c-k-c0x00ffffff-no-rj + banner_url: https://yt3.googleusercontent.com/66yI4J2SYIgA_6P3-BQrKeTkx1yjyxeZzeigMWID_ee-jTKhqdfW8CBPy3rIUZwxpWj9NI8KGg + subscriber_count: 771 + video_count: 230 + view_count: 884235 + subscriber_count_hidden: false + uploads_playlist_id: UUIOV1are9TDnwNrgr6Yr3tQ + videos: + - video_id: d_1zWCrs-K8 + video_url: https://www.youtube.com/watch?v=d_1zWCrs-K8 + title: Microkosmos & Henk Schiffmacher + description: 'Henk Schiffmacher verzamelt objecten van over de hele wereld die + te maken hebben met tatoeages. Speciaal voor ‘Microkosmos’ koos hij een aantal + voorwerpen uit om in het museum te laten zien. + + + Je bekijkt Schiffmachers collectie en die van de andere verzamelaars nu in ‘Microkosmos''. + + + Meer weten over Henk Schiffmacher? Op 14 december komt hij naar de Verwonderdag. + Tickets zijn verkrijgbaar op onze website. + + + #Microkosmos #HenkSchiffmacher #tatoeage #tatoeages #museum #tentoonstelling + #cultuur #dagje' + published_at: '2025-11-21T11:09:50Z' + duration: PT44S + definition: hd + caption_available: false + view_count: 198 + like_count: 2 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/d_1zWCrs-K8/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + comments: [] + comments_fetched: 0 + - video_id: wOrAxNXKFO8 + video_url: https://www.youtube.com/watch?v=wOrAxNXKFO8 + title: Stem op Drents Museum voor de VriendenLoterij Museumprijs 2025! + description: "Het Drents Museum is genomineerd voor de VriendenLoterij Museumprijs!\ + \ Wie deze prijs wint, wordt bepaald door middel van stemmen. En breng je je\ + \ stem uit, dan maak je bovendien kans op hele mooie prijzen! \nBreng je stem\ + \ uit op: www.drentsmuseum.nl/stem \U0001F64F\U0001F3C6\U0001FA77\n\nIn deze\ + \ video leggen we uit wat we gaan doen wanneer wij de hoofdprijs winnen. Help\ + \ jij ons om de hoofdprijs van € 100.000 te winnen en onze droom in vervulling\ + \ te laten gaan? Alvast bedankt!" + published_at: '2025-10-09T07:11:51Z' + duration: PT1M + definition: hd + caption_available: true + view_count: 1706 + like_count: 9 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/wOrAxNXKFO8/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + comments: [] + comments_fetched: 0 + - video_id: tnJMlRqEGto + video_url: https://www.youtube.com/watch?v=tnJMlRqEGto + title: Stem op Drents Museum voor de VriendenLoterij Museumprijs 2025! + description: 'Het Drents Museum is genomineerd voor de VriendenLoterij Museumprijs! + Help jij ons om de hoofdprijs van € 100.000 te winnen en onze droom in vervulling + te laten gaan? + + Breng je stem uit op: www.drentsmuseum.nl/stem 🙏🏆🩷' + published_at: '2025-10-09T07:11:56Z' + duration: PT11S + definition: hd + caption_available: false + view_count: 836 + like_count: 1 + comment_count: 1 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/tnJMlRqEGto/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + comments: + - comment_id: Ugy283RyalMNdLc69DR4AaABAg + author_display_name: '@TheSMcBrown' + author_channel_url: http://www.youtube.com/@TheSMcBrown + text: 🥰 + like_count: 0 + published_at: '2025-10-09T18:58:23Z' + updated_at: '2025-10-09T18:58:23Z' + reply_count: 0 + comments_fetched: 1 + - video_id: bdHBoEHnJJQ + video_url: https://www.youtube.com/watch?v=bdHBoEHnJJQ + title: Stem op Drents Museum voor de VriendenLoterij Museumprijs 2025! + description: "Het Drents Museum is genomineerd voor de VriendenLoterij Museumprijs!\ + \ Wie deze prijs wint, wordt bepaald door middel van stemmen. En breng je je\ + \ stem uit, dan maak je bovendien kans op hele mooie prijzen! \n\nIn deze video\ + \ leggen we uit wat we gaan doen wanneer wij de hoofdprijs winnen. Help jij\ + \ ons om de hoofdprijs van € 100.000 te winnen en onze droom in vervulling te\ + \ laten gaan? \n\nStem via: https://winnaar.vriendenloterij.nl/museumprijs2025?museumid=57519" + published_at: '2025-10-09T07:11:08Z' + duration: PT59S + definition: hd + caption_available: true + view_count: 8203 + like_count: 2 + comment_count: 1 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/bdHBoEHnJJQ/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + comments: + - comment_id: Ugw9_G4VbcgQjNR4i7F4AaABAg + author_display_name: '@Kep39584' + author_channel_url: http://www.youtube.com/@Kep39584 + text: Misschien eerst de Helm van Coțofenești terughalen, jullie verdienen zeker + geen prijs uitreiking dit jaar, schande. + like_count: 1 + published_at: '2025-10-19T12:36:16Z' + updated_at: '2025-10-19T12:36:16Z' + reply_count: 0 + comments_fetched: 1 + - video_id: zy5dFu42f_w + video_url: https://www.youtube.com/watch?v=zy5dFu42f_w + title: Het Drents Museum en de Leipziger Schule + description: In deze video leggen we uit wat de Leipziger Schule is en wat de + relatie is tussen het Drents Museum en de Leipziger Schule. + published_at: '2025-09-19T12:46:02Z' + duration: PT10M37S + definition: hd + caption_available: false + view_count: 846 + like_count: 5 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/zy5dFu42f_w/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + comments: [] + comments_fetched: 0 + - video_id: TK8XDe9SIW0 + video_url: https://www.youtube.com/watch?v=TK8XDe9SIW0 + title: Prikkelarme rondleiding 'Gen F - 75 jaar figuratieve kunst' + description: '' + published_at: '2025-09-03T13:33:52Z' + duration: PT31M23S + definition: hd + caption_available: true + view_count: 31 + like_count: 1 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/TK8XDe9SIW0/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + - video_id: viRdUD1V-dg + video_url: https://www.youtube.com/watch?v=viRdUD1V-dg + title: Microkosmos - De wereld in een Wunderkammer - Drents Museum + description: 'In de tentoonstelling Microkosmos – De wereld in een Wunderkammer + komen klassieke Wunderkammer-objecten, hedendaagse rariteiten en beeldende kunst + samen. Een visueel spektakel waarin de magie van verzamelen tot leven komt. + Microkosmos is ook de afscheidstentoonstelling van algemeen directeur Harry + Tupan, die alles uit de kast heeft gehaald voor dit grote Wunderkammer-overzicht. + + + Onder meer tattoo-artiest Henk Schiffmacher, ontdekkingsreiziger Redmond O’Hanlon, + schrijver en acteur Ramsey Nasr en ' + published_at: '2025-08-19T08:08:01Z' + duration: PT23S + definition: hd + caption_available: false + view_count: 6751 + like_count: 5 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/viRdUD1V-dg/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + - video_id: wYnAjhxv54I + video_url: https://www.youtube.com/watch?v=wYnAjhxv54I + title: Ontmoet de wereld in Drenthe - Jouw event in het Drents Museum + description: Jouw volgende evenement, congres, workshop of teamdag in het Drents + Museum? We heten jullie van harte welkom op onze unieke locatie, waar oud en + nieuw samengaan in een verrassend gebouw. + published_at: '2025-07-30T11:52:42Z' + duration: PT54S + definition: hd + caption_available: false + view_count: 36 + like_count: 0 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/wYnAjhxv54I/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + - video_id: jj_eTFOyuWY + video_url: https://www.youtube.com/watch?v=jj_eTFOyuWY + title: Prikkelarme rondleiding door 'Menyala' + description: "In deze rondleiding van iets meer dan 1 uur en 8 minuten neemt onze\ + \ collega je mee door de tentoonstelling 'Menyala - De buitengewone geschiedenis\ + \ van de Molukkers in Drenthe\". De tentoonstelling was in 2023 te zien in het\ + \ Drents Museum. \n\nPrikkels zijn in deze rondleiding zo veel mogelijk beperkt.\ + \ Er zijn dus geen speciale effecten toegepast. De tour is voorzien van voice-over\ + \ en ondertiteling, maar deze aspecten kunnen indien gewenst uit worden gezet.\n\ + \nHeb je tips voor ons na het bekijken van " + published_at: '2025-07-14T08:15:43Z' + duration: PT1H8M40S + definition: hd + caption_available: true + view_count: 52 + like_count: 0 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/jj_eTFOyuWY/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + - video_id: dVBTIdaxfOc + video_url: https://www.youtube.com/watch?v=dVBTIdaxfOc + title: Ontdek Drenthe - de app + description: "De app 'Ontdek Drenthe' verbindt de collectie van het Drents Museum\ + \ met bijzondere locaties en andere musea in Drenthe. Met Augmented Reality\ + \ (AR) beleef je de verhalen over archeologie, kunst en geschiedenis op verschillende\ + \ locaties.\n \nStap in de voetsporen van de schilders van Drenthe, bezoek het\ + \ grootste hunebed of ga op zoek naar de vindplaats van het meisje van Yde.\n\ + \nVindt de app & meer informatie hierover op: https://drentsmuseum.nl/ontdek-drenthe\n\ + Deze video is gemaakt door DEN Kennisinst" + published_at: '2025-07-08T08:22:37Z' + duration: PT2M40S + definition: hd + caption_available: false + view_count: 82 + like_count: 3 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/dVBTIdaxfOc/hqdefault.jpg + default_language: nl + default_audio_language: nl-NL + videos_count: 10 + status: SUCCESS diff --git a/data/nde/enriched/entries/0005_Q81181263.yaml b/data/nde/enriched/entries/0005_Q81181263.yaml index 38c6d8c951..2b5d4902e4 100644 --- a/data/nde/enriched/entries/0005_Q81181263.yaml +++ b/data/nde/enriched/entries/0005_Q81181263.yaml @@ -222,6 +222,19 @@ provenance: - rating - reviews - opening_hours + youtube: + - source_type: youtube_data_api + fetch_timestamp: '2025-12-01T15:49:09.601504+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + channel_id: null + claims_extracted: + - channel_info + - subscriber_count + - video_count + - view_count + - recent_videos + - video_comments + - video_transcripts data_tier_summary: TIER_1_AUTHORITATIVE: - original_entry (NDE CSV) @@ -601,3 +614,15 @@ custodian_name: wikidata_id: '' provenance_note: Derived from wikidata_label_nl (web_claims had no valid org_name) extraction_timestamp: '2025-12-01T12:35:23.432573+00:00' +youtube_enrichment: + source_url: https://www.youtube.com/channel/UCdj5Tn3btqad_ukTOa7YEIQ + fetch_timestamp: '2025-12-01T15:49:09.601504+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + api_version: v3 + identifier_type: channel_id + identifier_value: UCdj5Tn3btqad_ukTOa7YEI + channel: + error: 'Channel not found: UCdj5Tn3btqad_ukTOa7YEI' + videos: [] + videos_count: 0 + status: SUCCESS diff --git a/data/nde/enriched/entries/0063_Q2530771.yaml b/data/nde/enriched/entries/0063_Q2530771.yaml index bacf60e111..d0a5857e00 100644 --- a/data/nde/enriched/entries/0063_Q2530771.yaml +++ b/data/nde/enriched/entries/0063_Q2530771.yaml @@ -504,3 +504,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 100 extraction_timestamp: '2025-12-01T12:35:26.134796+00:00' +digital_platforms: +- platform_name: Kazemattenmuseum (Stichting Kornwerderzand) + platform_url: http://www.kazemattenmuseum.nl + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q2530771 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.103014+00:00' diff --git a/data/nde/enriched/entries/0144_Q2710899.yaml b/data/nde/enriched/entries/0144_Q2710899.yaml index 9a9bfe580c..1d04dc698e 100644 --- a/data/nde/enriched/entries/0144_Q2710899.yaml +++ b/data/nde/enriched/entries/0144_Q2710899.yaml @@ -954,3 +954,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 100 extraction_timestamp: '2025-12-01T12:35:34.024620+00:00' +digital_platforms: +- platform_name: Nationaal Onderduikmuseum + platform_url: https://nationaalonderduikmuseum.nl + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q2710899 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.364790+00:00' diff --git a/data/nde/enriched/entries/0145_Q2654815.yaml b/data/nde/enriched/entries/0145_Q2654815.yaml index dddbe6622d..7af3bea402 100644 --- a/data/nde/enriched/entries/0145_Q2654815.yaml +++ b/data/nde/enriched/entries/0145_Q2654815.yaml @@ -816,3 +816,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 70 extraction_timestamp: '2025-12-01T12:35:34.111039+00:00' +digital_platforms: +- platform_name: Anton Pieck Museum + platform_url: http://www.antonpieckmuseum.nl + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q2654815 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.166677+00:00' diff --git a/data/nde/enriched/entries/0146_Q1663974.yaml b/data/nde/enriched/entries/0146_Q1663974.yaml index bafcc309e3..615bdc8289 100644 --- a/data/nde/enriched/entries/0146_Q1663974.yaml +++ b/data/nde/enriched/entries/0146_Q1663974.yaml @@ -375,6 +375,19 @@ provenance: - rating - reviews - opening_hours + youtube: + - source_type: youtube_data_api + fetch_timestamp: '2025-12-01T15:47:38.187552+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + channel_id: UChFRnKtFCgrGix8qCTSqtlA + claims_extracted: + - channel_info + - subscriber_count + - video_count + - view_count + - recent_videos + - video_comments + - video_transcripts data_tier_summary: TIER_1_AUTHORITATIVE: - original_entry (NDE CSV) @@ -727,3 +740,385 @@ custodian_name: selection_method: priority_ranking selection_priority: 100 extraction_timestamp: '2025-12-01T12:35:34.164064+00:00' +youtube_enrichment: + source_url: https://www.youtube.com/user/TUApeldoorn + fetch_timestamp: '2025-12-01T15:47:38.187552+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + api_version: v3 + identifier_type: username + identifier_value: TUApeldoorn + channel: + channel_id: UChFRnKtFCgrGix8qCTSqtlA + channel_url: https://www.youtube.com/channel/UChFRnKtFCgrGix8qCTSqtlA + title: TUApeldoorn + description: Aan de Theologische Universiteit Apeldoorn wordt een zesjarige studie + theologie aangeboden. In de opleiding staat de wetenschappelijke kwaliteit verbonden + aan een gedegen beroepsmatige toerusting centraal. Daarbij is er veel aandacht + voor persoonlijkheidsvorming van de toekomstige predikanten en andere werkers + in en buiten de kerk. + custom_url: '@tuapeldoorn' + published_at: '2013-10-10T13:14:51Z' + country: null + default_language: null + thumbnail_url: https://yt3.ggpht.com/WT2foUsKADfDmi82g7r_hORC4uZHG6LSLziv8bZfRLpHpCqPdMpMgbjIVTwrcwN-35qnfHPq=s800-c-k-c0x00ffffff-no-rj + banner_url: https://yt3.googleusercontent.com/AwxQqNWwgr49lUETkGVrAsMUhT8JsoMgW02fDtnDo3_SlHndVHq8wh57IqOq5CTMqVFrLwLIfQ + subscriber_count: 557 + video_count: 57 + view_count: 69955 + subscriber_count_hidden: false + uploads_playlist_id: UUhFRnKtFCgrGix8qCTSqtlA + videos: + - video_id: 0BhkYRSYJfw + video_url: https://www.youtube.com/watch?v=0BhkYRSYJfw + title: Ulf Grapenthin | 26 november 2025 + description: 'Volg ons! + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn' + published_at: '2025-11-27T01:04:13Z' + duration: PT1H38M49S + definition: hd + caption_available: false + view_count: 172 + like_count: null + comment_count: null + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/0BhkYRSYJfw/hqdefault.jpg + default_language: af + default_audio_language: nl + comments: [] + comments_fetched: 0 + - video_id: O3OqanQbkL8 + video_url: https://www.youtube.com/watch?v=O3OqanQbkL8 + title: Wat is de HGT? | TUA + description: 'Heb je altijd al op academisch niveau iets willen doen met theologie? + Of wil je je vakgebied of werkveld verrijken met theologische verdieping? Kies + voor de master Herbronning Gereformeerde Theologie! Deze master van 60 EC is + laagdrempelig, hedendaags, legt de verbinding met jouw (huidig of toekomstig) + beroep en kan in deeltijd gevolgd worden. Colleges zijn alleen op woensdag. + + + Duik diep in de rijkdom van de gereformeerde theologie! + + + Meer informatie? Kijk op https://www.tua.nl/nl/onderwijs/o/60e' + published_at: '2025-10-01T12:23:55Z' + duration: PT2M6S + definition: hd + caption_available: false + view_count: 127 + like_count: 2 + comment_count: 0 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/O3OqanQbkL8/hqdefault.jpg + default_language: nl + default_audio_language: nl + comments: [] + comments_fetched: 0 + - video_id: XyWb2cyJvAs + video_url: https://www.youtube.com/watch?v=XyWb2cyJvAs + title: Promotion S. Joo + description: 'Volg ons! + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn' + published_at: '2025-09-09T04:16:35Z' + duration: PT1H26M10S + definition: hd + caption_available: false + view_count: 217 + like_count: 6 + comment_count: 0 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/XyWb2cyJvAs/hqdefault.jpg + default_language: af + default_audio_language: en + comments: [] + comments_fetched: 0 + - video_id: kgJgJZXJPV8 + video_url: https://www.youtube.com/watch?v=kgJgJZXJPV8 + title: Promotion prof. R.S. Goldreich + description: 'Volg ons! + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn' + published_at: '2025-07-03T02:57:45Z' + duration: PT1H29M51S + definition: hd + caption_available: false + view_count: 189 + like_count: 1 + comment_count: 0 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/kgJgJZXJPV8/hqdefault.jpg + default_language: nl + default_audio_language: en + comments: [] + comments_fetched: 0 + - video_id: sCJYoEF1MaM + video_url: https://www.youtube.com/watch?v=sCJYoEF1MaM + title: Promotie drs. J.J. Steensma | 17 maart 2025 + description: 'Volg ons! + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn' + published_at: '2025-03-17T22:03:59Z' + duration: PT1H27M13S + definition: hd + caption_available: false + view_count: 3019 + like_count: 30 + comment_count: 1 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/sCJYoEF1MaM/hqdefault.jpg + default_language: nl + default_audio_language: nl + comments: + - comment_id: UgylfNuWq8ezGFBOK3l4AaABAg + author_display_name: '@jackwestra' + author_channel_url: http://www.youtube.com/@jackwestra + text: Jaap Jan, van harte gefeliciteerd met dit fantastische resultaat. Het + onderzoek is prachtig vormgegeven.. Jammer dat ik er niet bij kon zijn.. de + uitslag op het eind heb ik met ontroering gevolgd.. tot binnenkort.. + like_count: 0 + published_at: '2025-03-17T23:37:55Z' + updated_at: '2025-03-17T23:37:55Z' + reply_count: 0 + comments_fetched: 1 + - video_id: _fbUYD2VRgU + video_url: https://www.youtube.com/watch?v=_fbUYD2VRgU + title: Inauguratie van prof. dr. A. Versluis | 7 maart 2025 + description: 'Op 7 maart 2025 vond de inauguratieplechtigheid van prof. dr. A. + Versluis als hoogleraar Oude Testament aan de Theologische Universiteit Apeldoorn + plaats. + + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn' + published_at: '2025-03-08T03:30:28Z' + duration: PT1H35M36S + definition: hd + caption_available: false + view_count: 1806 + like_count: 10 + comment_count: 1 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/_fbUYD2VRgU/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: AP_1pdV7ryQ + video_url: https://www.youtube.com/watch?v=AP_1pdV7ryQ + title: Mijn studie theologie bevalt me goed omdat... + description: 'Volg ons! + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn + + + #theologie #TUA #TUApeldoorn #TheologischeUniversiteitApeldoorn #theologiestuderen + #studiekeuze #welkestudie' + published_at: '2024-12-18T09:47:24Z' + duration: PT37S + definition: hd + caption_available: false + view_count: 285 + like_count: 1 + comment_count: 0 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/AP_1pdV7ryQ/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: ICNOmvkLRmU + video_url: https://www.youtube.com/watch?v=ICNOmvkLRmU + title: Een leuke module tijdens mijn studie theologie vond ik... + description: 'Volg ons! + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn + + + #theologie #TUA #TUApeldoorn #TheologischeUniversiteitApeldoorn #theologiestuderen + #studiekeuze #welkestudie' + published_at: '2024-12-03T10:42:51Z' + duration: PT41S + definition: hd + caption_available: false + view_count: 404 + like_count: 5 + comment_count: 0 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/ICNOmvkLRmU/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: BtrAWku9Yek + video_url: https://www.youtube.com/watch?v=BtrAWku9Yek + title: 'Denken om te dienen | # 8 Persoonlijk geloof en de studie theologie (met + Jantine en Jan Willem)' + description: In deze aflevering gaan Charlotte en Niels in gesprek met Jan Willem + van Panhuis en Jantine Donker, twee studenten van de Theologische Universiteit + Apeldoorn. Ze vertellen in deze aflevering over wat de studie theologie hen + gebracht heeft. Er komen mooie ontdekkingen langs die de studie hen bracht, + maar theologie studeren ook een confronterende kant. Theologie studeren gaat + namelijk ook over jezelf als aankomend theoloog. Het is een open en eerlijk + gesprek waarin twijfels en hoop ter sprake kome + published_at: '2024-11-28T06:00:13Z' + duration: PT36M7S + definition: hd + caption_available: false + view_count: 492 + like_count: 2 + comment_count: 0 + tags: [] + thumbnail_url: https://i.ytimg.com/vi/BtrAWku9Yek/hqdefault.jpg + default_language: nl + default_audio_language: nl + - video_id: n6l4O92qgN0 + video_url: https://www.youtube.com/watch?v=n6l4O92qgN0 + title: Promotie G.M. Bosker + description: 'Volg ons! + + https://www.tua.nl + + https://www.instagram.com/tuapeldoorn/ + + https://www.facebook.com/TUApeldoorn/ + + https://twitter.com/TUApeldoorn' + published_at: '2024-11-23T03:30:13Z' + duration: PT1H27M11S + definition: hd + caption_available: false + view_count: 1540 + like_count: 5 + comment_count: 1 + tags: + - TUA + - Master Theology + - Theology Netherlands + - Theology + - Theologie studeren + - Studying Theolgy + - Theology Apeldoorn + - Theological University + - Theologische Universiteit + - Apeldoorn Universiteit + thumbnail_url: https://i.ytimg.com/vi/n6l4O92qgN0/hqdefault.jpg + default_language: nl + default_audio_language: nl + videos_count: 10 + status: SUCCESS diff --git a/data/nde/enriched/entries/0148_Q69725772.yaml b/data/nde/enriched/entries/0148_Q69725772.yaml index 006b3ee884..81346c31fb 100644 --- a/data/nde/enriched/entries/0148_Q69725772.yaml +++ b/data/nde/enriched/entries/0148_Q69725772.yaml @@ -1413,3 +1413,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 60 extraction_timestamp: '2025-12-01T12:35:34.321297+00:00' +digital_platforms: +- platform_name: Erfgoed Gelderland + platform_url: https://erfgoedgelderland.nl/ + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q69725772 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.925471+00:00' diff --git a/data/nde/enriched/entries/0153_Q3448774.yaml b/data/nde/enriched/entries/0153_Q3448774.yaml index 1a0eda1c21..d35e833ab8 100644 --- a/data/nde/enriched/entries/0153_Q3448774.yaml +++ b/data/nde/enriched/entries/0153_Q3448774.yaml @@ -942,3 +942,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 70 extraction_timestamp: '2025-12-01T12:35:34.731670+00:00' +digital_platforms: +- platform_name: Erfgoedcentrum Achterhoek en Liemers + platform_url: https://www.ecal.nu + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q3448774 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.454236+00:00' diff --git a/data/nde/enriched/entries/0155_Q13636575.yaml b/data/nde/enriched/entries/0155_Q13636575.yaml index 79fd97e522..e93e7f2ccc 100644 --- a/data/nde/enriched/entries/0155_Q13636575.yaml +++ b/data/nde/enriched/entries/0155_Q13636575.yaml @@ -1054,3 +1054,20 @@ custodian_name: selection_method: priority_ranking selection_priority: 100 extraction_timestamp: '2025-12-01T12:35:34.854539+00:00' +digital_platforms: +- platform_name: Flipje & Streekmuseum + platform_url: http://www.streekmuseumtiel.nl + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q13636575 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.571571+00:00' +- platform_name: Flipje & Streekmuseum Twitter/X + platform_url: https://twitter.com/Flipjemuseum + platform_type: SOCIAL_MEDIA_TWITTER + provenance: + source_type: wikidata_p2002 + wikidata_id: Q13636575 + data_tier: TIER_2_VERIFIED diff --git a/data/nde/enriched/entries/1015_Q572269.yaml b/data/nde/enriched/entries/1015_Q572269.yaml index 9e0397b3e1..20a4665e2f 100644 --- a/data/nde/enriched/entries/1015_Q572269.yaml +++ b/data/nde/enriched/entries/1015_Q572269.yaml @@ -1194,3 +1194,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 100 extraction_timestamp: '2025-12-01T12:36:45.132014+00:00' +digital_platforms: +- platform_name: Kasteel Amerongen + platform_url: https://www.kasteelamerongen.nl/ + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q572269 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:18.875296+00:00' diff --git a/data/nde/enriched/entries/1050_Q28058453.yaml b/data/nde/enriched/entries/1050_Q28058453.yaml index e2aaa016aa..9cef0f0292 100644 --- a/data/nde/enriched/entries/1050_Q28058453.yaml +++ b/data/nde/enriched/entries/1050_Q28058453.yaml @@ -713,3 +713,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 70 extraction_timestamp: '2025-12-01T12:36:47.207621+00:00' +digital_platforms: +- platform_name: Museum IJsselstein + platform_url: http://www.museumijsselstein.nl + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q28058453 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.670308+00:00' diff --git a/data/nde/enriched/entries/1053_Q28956940.yaml b/data/nde/enriched/entries/1053_Q28956940.yaml index 0ec9cc64f1..19db6fcbae 100644 --- a/data/nde/enriched/entries/1053_Q28956940.yaml +++ b/data/nde/enriched/entries/1053_Q28956940.yaml @@ -740,3 +740,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 70 extraction_timestamp: '2025-12-01T12:36:47.509287+00:00' +digital_platforms: +- platform_name: Museum Oud Amelisweerd + platform_url: http://www.moa.nl/ + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q28956940 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.790759+00:00' diff --git a/data/nde/enriched/entries/1055_Q1624224.yaml b/data/nde/enriched/entries/1055_Q1624224.yaml index 5918d21934..bf73cbf8cb 100644 --- a/data/nde/enriched/entries/1055_Q1624224.yaml +++ b/data/nde/enriched/entries/1055_Q1624224.yaml @@ -1102,3 +1102,41 @@ custodian_name: selection_method: priority_ranking selection_priority: 100 extraction_timestamp: '2025-12-01T12:36:47.676254+00:00' +digital_platforms: +- platform_name: Museum Speelklok + platform_url: https://www.museumspeelklok.nl/ + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q1624224 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:18.984317+00:00' +- platform_name: Museum Speelklok Facebook + platform_url: https://www.facebook.com/MuseumSpeelklok + platform_type: SOCIAL_MEDIA_FACEBOOK + provenance: + source_type: wikidata_p2013 + wikidata_id: Q1624224 + data_tier: TIER_2_VERIFIED +- platform_name: Museum Speelklok Twitter/X + platform_url: https://twitter.com/museumspeelklok + platform_type: SOCIAL_MEDIA_TWITTER + provenance: + source_type: wikidata_p2002 + wikidata_id: Q1624224 + data_tier: TIER_2_VERIFIED +- platform_name: Museum Speelklok Instagram + platform_url: https://www.instagram.com/museumspeelklok + platform_type: SOCIAL_MEDIA_INSTAGRAM + provenance: + source_type: wikidata_p2003 + wikidata_id: Q1624224 + data_tier: TIER_2_VERIFIED +- platform_name: Museum Speelklok YouTube + platform_url: https://www.youtube.com/channel/UCXv_fLzzL7UBnZtRG3A2QLw + platform_type: SOCIAL_MEDIA_YOUTUBE + provenance: + source_type: wikidata_p2397 + wikidata_id: Q1624224 + data_tier: TIER_2_VERIFIED diff --git a/data/nde/enriched/entries/1058_Q2361897.yaml b/data/nde/enriched/entries/1058_Q2361897.yaml index ebaf32b5e7..28126882e1 100644 --- a/data/nde/enriched/entries/1058_Q2361897.yaml +++ b/data/nde/enriched/entries/1058_Q2361897.yaml @@ -553,3 +553,13 @@ custodian_name: selection_method: priority_ranking selection_priority: 70 extraction_timestamp: '2025-12-01T12:36:47.877408+00:00' +digital_platforms: +- platform_name: Museum Warsenhoeck + platform_url: https://www.museumwarsenhoeck.nl + platform_type: OFFICIAL_WEBSITE + provenance: + source_type: wikidata_p856 + wikidata_id: Q2361897 + wikidata_property: P856 + data_tier: TIER_2_VERIFIED + discovery_timestamp: '2025-12-01T15:11:19.061600+00:00' diff --git a/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml b/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml index 2404f84d7e..f88960c77c 100644 --- a/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml +++ b/data/nde/enriched/entries/1502_huygens_instituut_hi.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7f45-96a5-c14ef7c70861 identifier_url: urn:uuid:019ad9ec-7cc0-7f45-96a5-c14ef7c70861 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.134408+00:00 google_maps_enrichment: place_id: ChIJO6dE4bgJxkcRBil_dsYIKsY name: Huygens Institute @@ -263,3 +264,4 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:34:07.295167+00:00' +url: http://www.huygens.knaw.nl/ diff --git a/data/nde/enriched/entries/1503_meertens_instituut_mi.yaml b/data/nde/enriched/entries/1503_meertens_instituut_mi.yaml index 521839b262..07ed9019dd 100644 --- a/data/nde/enriched/entries/1503_meertens_instituut_mi.yaml +++ b/data/nde/enriched/entries/1503_meertens_instituut_mi.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7a4e-85df-c78e1e224ec7 identifier_url: urn:uuid:019ad9ec-7cc0-7a4e-85df-c78e1e224ec7 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.153024+00:00 google_maps_enrichment: place_id: ChIJO4j9Qb8JxkcRBebeeuFWxDc name: Meertens Instituut @@ -157,3 +158,4 @@ custodian_name: provenance_note: Derived from original_entry.organisatie (no valid web_claims or wikidata) extraction_timestamp: '2025-12-01T12:37:14.913003+00:00' +url: http://www.meertens.knaw.nl/ diff --git a/data/nde/enriched/entries/1504_nederlandse_vereniging_voor_papierknipkunst.yaml b/data/nde/enriched/entries/1504_nederlandse_vereniging_voor_papierknipkunst.yaml index 8f99846ce1..bae84c732e 100644 --- a/data/nde/enriched/entries/1504_nederlandse_vereniging_voor_papierknipkunst.yaml +++ b/data/nde/enriched/entries/1504_nederlandse_vereniging_voor_papierknipkunst.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7fee-97e6-2dd6367d0c98 identifier_url: urn:uuid:019ad9ec-7cc0-7fee-97e6-2dd6367d0c98 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.168401+00:00 google_maps_enrichment: place_id: ChIJMYyNRHekx0cR3u8FZYrhex4 name: Kenniscentrum Papier en Karton @@ -149,3 +150,9 @@ custodian_name: provenance_note: Derived from original_entry.organisatie (no valid web_claims or wikidata) extraction_timestamp: '2025-12-01T12:37:14.922732+00:00' +url: https://papierknippen.nl/ +url_discovery: + method: web_search + search_tool: exa + discovery_date: '2025-12-01T15:30:00+00:00' + notes: Website discovered via Exa web search - organization dedicated to paper cutting art (papierknipkunst) diff --git a/data/nde/enriched/entries/1505_rhc_vecht_en_venen.yaml b/data/nde/enriched/entries/1505_rhc_vecht_en_venen.yaml index 338be7cde7..fbbf3a8e0e 100644 --- a/data/nde/enriched/entries/1505_rhc_vecht_en_venen.yaml +++ b/data/nde/enriched/entries/1505_rhc_vecht_en_venen.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-75a0-a861-debd8f866f11 identifier_url: urn:uuid:019ad9ec-7cc0-75a0-a861-debd8f866f11 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.194067+00:00 google_maps_enrichment: place_id: ChIJxQAWkyxyxkcRPllQjy0uHy0 name: Regionaal Historisch Centrum Vecht en Venen @@ -317,3 +318,4 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.372483+00:00' +url: http://www.rhcvechtenvenen.nl/ diff --git a/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml b/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml index e4c967198c..6c9a9b3122 100644 --- a/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml +++ b/data/nde/enriched/entries/1506_sociaal_en_cultureel_planbureau_scp.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7d56-9377-69ff0170e5d4 identifier_url: urn:uuid:019ad9ec-7cc0-7d56-9377-69ff0170e5d4 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.216400+00:00 google_maps_enrichment: place_id: ChIJ6XJ4URG3xUcRpa4hALvApIs name: Sociaal en Cultureel Planbureau @@ -212,3 +213,4 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:07.453311+00:00' +url: http://www.scp.nl/ diff --git a/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml b/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml index b56009161d..7c598f5422 100644 --- a/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml +++ b/data/nde/enriched/entries/1507_hilversumse_historische_kring_albertus_perk_hhkap.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7cf7-894a-20bd54d3721e identifier_url: urn:uuid:019ad9ec-7cc0-7cf7-894a-20bd54d3721e -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.233585+00:00 google_maps_enrichment: place_id: ChIJO5TyCABrxkcRK4V-8gF-KQc name: Hilversumse Historische Kring Albertus Perk @@ -215,3 +216,4 @@ web_claims: xpath_match_score: 1.0 extraction_method: mailto_link extraction_timestamp: '2025-12-01T12:34:07.491709+00:00' +url: https://albertusperk.nl/ diff --git a/data/nde/enriched/entries/1508_parochiearchief_kampen_pak.yaml b/data/nde/enriched/entries/1508_parochiearchief_kampen_pak.yaml index 2aa1061994..04dd9a92a9 100644 --- a/data/nde/enriched/entries/1508_parochiearchief_kampen_pak.yaml +++ b/data/nde/enriched/entries/1508_parochiearchief_kampen_pak.yaml @@ -4,7 +4,7 @@ original_entry: type_organisatie: unknown source: nan_isil_2025-11-06 type: - - U + - A entry_index: 1508 processing_timestamp: '2025-11-30T09:58:13.945408+00:00' nan_isil_enrichment: @@ -28,19 +28,19 @@ identifiers: assigned_date: '2025-07-24' source: Nationaal Archief ISIL Registry 2025-11-06 - identifier_scheme: GHCID - identifier_value: NL-OV-KAM-U-C + identifier_value: NL-OV-KAM-A-PK - identifier_scheme: GHCID_UUID - identifier_value: f7cfb354-1aa0-5526-89ae-2133533c3a5c - identifier_url: urn:uuid:f7cfb354-1aa0-5526-89ae-2133533c3a5c + identifier_value: 385940ba-7a26-5e47-b457-575d05f0e9ba + identifier_url: urn:uuid:385940ba-7a26-5e47-b457-575d05f0e9ba - identifier_scheme: GHCID_UUID_SHA256 - identifier_value: b49586e3-6da4-8827-b0c5-ec46e9c08eae - identifier_url: urn:uuid:b49586e3-6da4-8827-b0c5-ec46e9c08eae + identifier_value: 0a2c1b48-6811-8096-bc8a-ad9f14065c47 + identifier_url: urn:uuid:0a2c1b48-6811-8096-bc8a-ad9f14065c47 - identifier_scheme: GHCID_NUMERIC - identifier_value: '13012455009712130087' + identifier_value: '732990837152104598' - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7957-9232-b3b47d8ac543 identifier_url: urn:uuid:019ad9ec-7cc0-7957-9232-b3b47d8ac543 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.252844+00:00 google_maps_enrichment: place_id: ChIJEXoq5vJ5yEcRh0suJoCk3v4 name: Stadsarchief Kampen @@ -159,19 +160,24 @@ google_maps_enrichment: google_maps_status: SUCCESS google_maps_search_query: Parochiearchief Kampen (PAK), Kampen, Netherlands ghcid: - ghcid_current: NL-OV-KAM-U-C + ghcid_current: NL-OV-KAM-A-PK ghcid_original: NL-OV-KAM-U-C - ghcid_uuid: f7cfb354-1aa0-5526-89ae-2133533c3a5c - ghcid_uuid_sha256: b49586e3-6da4-8827-b0c5-ec46e9c08eae - ghcid_numeric: 13012455009712130087 + ghcid_uuid: 385940ba-7a26-5e47-b457-575d05f0e9ba + ghcid_uuid_sha256: 0a2c1b48-6811-8096-bc8a-ad9f14065c47 + ghcid_numeric: 732990837152104598 record_id: 019ad9ec-7cc0-7957-9232-b3b47d8ac543 - generation_timestamp: '2025-12-01T12:38:04.703815+00:00' + generation_timestamp: '2025-12-01T16:00:00+00:00' ghcid_history: + - ghcid: NL-OV-KAM-A-PK + ghcid_numeric: 732990837152104598 + valid_from: '2025-12-01T16:00:00+00:00' + valid_to: null + reason: Corrected GHCID - fixed abbreviation (PK from Parochiearchief Kampen) and institution type (A=Archive, not U=Unknown) - ghcid: NL-OV-KAM-U-C ghcid_numeric: 13012455009712130087 valid_from: '2025-12-01T12:38:04.703815+00:00' - valid_to: null - reason: Initial GHCID assignment (NDE batch import December 2025) + valid_to: '2025-12-01T16:00:00+00:00' + reason: Initial GHCID assignment (NDE batch import December 2025) - incorrect abbreviation from corrupted custodian_name extraction location_resolution: method: REVERSE_GEOCODE geonames_id: 2753106 @@ -188,17 +194,11 @@ ghcid: geonames_id: 2753106 custodian_name: claim_type: custodian_name - claim_value: Cookiesbeleid - raw_value: Cookiesbeleid - source_url: https://www.stadsarchiefkampen.nl/ - retrieved_on: '' - xpath: /html/body/div[4]/div/div/div[1]/div[1]/h1 - html_file: web/1508/stadsarchiefkampen.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - selection_method: priority_ranking - selection_priority: 70 + claim_value: Parochiearchief Kampen + source: original_entry + provenance_note: Derived from NAN ISIL Registry (authoritative). Note - Google Maps found Stadsarchief Kampen which is the city archive housing the parish archive collection. The original H1 extraction captured "Cookiesbeleid" (cookie policy popup). extraction_timestamp: '2025-12-01T12:37:14.987916+00:00' + correction_note: Manual correction on 2025-12-01. Original web scraping captured cookie popup H1 tag. Reverted to NAN ISIL authoritative name. Parochiearchief Kampen (PAK) is a distinct collection within Stadsarchief Kampen. web_enrichment: web_archives: - url: https://www.stadsarchiefkampen.nl/ @@ -249,3 +249,4 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.524187+00:00' +url: https://www.stadsarchiefkampen.nl/ diff --git a/data/nde/enriched/entries/1509_archief_eiland_bonaire_aeb.yaml b/data/nde/enriched/entries/1509_archief_eiland_bonaire_aeb.yaml index b3f3b88919..5cb8267d2f 100644 --- a/data/nde/enriched/entries/1509_archief_eiland_bonaire_aeb.yaml +++ b/data/nde/enriched/entries/1509_archief_eiland_bonaire_aeb.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7b68-86b5-0b9e194ec0f9 identifier_url: urn:uuid:019ad9ec-7cc0-7b68-86b5-0b9e194ec0f9 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.281697+00:00 google_maps_enrichment: place_id: ChIJZTGcI7Ebg44Rk4p-Ea5vkVI name: Post En Archief @@ -251,3 +252,4 @@ web_claims: xpath_match_score: 1.0 extraction_method: social_link extraction_timestamp: '2025-12-01T12:34:07.560668+00:00' +url: http://www.bonairegov.com/ diff --git a/data/nde/enriched/entries/1510_kitlv.yaml b/data/nde/enriched/entries/1510_kitlv.yaml index 72c2d9deda..2d4c55be34 100644 --- a/data/nde/enriched/entries/1510_kitlv.yaml +++ b/data/nde/enriched/entries/1510_kitlv.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-727e-a3cc-d8e60a6af3ca identifier_url: urn:uuid:019ad9ec-7cc0-727e-a3cc-d8e60a6af3ca -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.307521+00:00 google_maps_enrichment: place_id: ChIJo2Gq7_HGxUcRkYE-4iAJA1I name: Royal Netherlands Institute of Southeast Asian and Caribbean Studies (KITLV-KNAW) @@ -297,3 +298,4 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.598934+00:00' +url: http://www.kitlv.nl/ diff --git a/data/nde/enriched/entries/1511_stichting_nationaal_museum_voor_wereldculturen_nmv.yaml b/data/nde/enriched/entries/1511_stichting_nationaal_museum_voor_wereldculturen_nmv.yaml index 537a3c3bd5..8069b93fe9 100644 --- a/data/nde/enriched/entries/1511_stichting_nationaal_museum_voor_wereldculturen_nmv.yaml +++ b/data/nde/enriched/entries/1511_stichting_nationaal_museum_voor_wereldculturen_nmv.yaml @@ -4,7 +4,7 @@ original_entry: type_organisatie: unknown source: nan_isil_2025-11-06 type: - - U + - M entry_index: 1511 processing_timestamp: '2025-11-30T09:58:13.945408+00:00' nan_isil_enrichment: @@ -28,19 +28,19 @@ identifiers: assigned_date: '2022-02-18' source: Nationaal Archief ISIL Registry 2025-11-06 - identifier_scheme: GHCID - identifier_value: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling + identifier_value: NL-ZH-LEI-M-WL - identifier_scheme: GHCID_UUID - identifier_value: 8b45d9cc-18cc-5d47-99df-27937ed88f95 - identifier_url: urn:uuid:8b45d9cc-18cc-5d47-99df-27937ed88f95 + identifier_value: 0b9fef4d-bd59-5cc0-9fe8-1434459dd2a9 + identifier_url: urn:uuid:0b9fef4d-bd59-5cc0-9fe8-1434459dd2a9 - identifier_scheme: GHCID_UUID_SHA256 - identifier_value: 7eb2d4ab-da89-8ce7-a6b1-5102913526d4 - identifier_url: urn:uuid:7eb2d4ab-da89-8ce7-a6b1-5102913526d4 + identifier_value: 79a7aa1f-ecff-86b6-9b68-335c176b4cc7 + identifier_url: urn:uuid:79a7aa1f-ecff-86b6-9b68-335c176b4cc7 - identifier_scheme: GHCID_NUMERIC - identifier_value: '9129593229204327655' + identifier_value: '8766162253796816566' - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-71b1-b104-3fb4d93374a5 identifier_url: urn:uuid:019ad9ec-7cc0-71b1-b104-3fb4d93374a5 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.332093+00:00 google_maps_enrichment: place_id: ChIJA3e1oO7GxUcR0inKzc4IoYM name: Wereldmuseum Leiden @@ -180,20 +181,24 @@ google_maps_status: SUCCESS google_maps_search_query: Stichting Nationaal Museum voor Wereldculturen (NMvW), Leiden, Netherlands ghcid: - ghcid_current: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling + ghcid_current: NL-ZH-LEI-M-WL ghcid_original: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling - ghcid_uuid: 8b45d9cc-18cc-5d47-99df-27937ed88f95 - ghcid_uuid_sha256: 7eb2d4ab-da89-8ce7-a6b1-5102913526d4 - ghcid_numeric: 9129593229204327655 + ghcid_uuid: 0b9fef4d-bd59-5cc0-9fe8-1434459dd2a9 + ghcid_uuid_sha256: 79a7aa1f-ecff-86b6-9b68-335c176b4cc7 + ghcid_numeric: 8766162253796816566 record_id: 019ad9ec-7cc0-71b1-b104-3fb4d93374a5 - generation_timestamp: '2025-12-01T12:38:04.703815+00:00' + generation_timestamp: '2025-12-01T16:00:00+00:00' ghcid_history: + - ghcid: NL-ZH-LEI-M-WL + ghcid_numeric: 8766162253796816566 + valid_from: '2025-12-01T16:00:00+00:00' + valid_to: null + reason: Corrected GHCID - fixed abbreviation (WL from Wereldmuseum Leiden) and institution type (M=Museum). Original was incorrectly based on temporary exhibition title. - ghcid: NL-ZH-LEI-U-PST-k_pop_a_snapshot_tentoonstelling ghcid_numeric: 9129593229204327655 valid_from: '2025-12-01T12:38:04.703815+00:00' - valid_to: null - reason: Initial GHCID assignment (NDE batch import December 2025) - name suffix - added to resolve collision + valid_to: '2025-12-01T16:00:00+00:00' + reason: Initial GHCID assignment (NDE batch import December 2025) - incorrectly based on temporary exhibition H1 tag instead of institution name location_resolution: method: REVERSE_GEOCODE geonames_id: 2751773 @@ -208,21 +213,20 @@ ghcid: source: google_maps distance_km: 1.2931170039476427 geonames_id: 2751773 - collision_resolved: true - base_ghcid_before_collision: NL-ZH-LEI-U-PST custodian_name: claim_type: custodian_name - claim_value: 'K-pop: A Snapshot tentoonstelling' - raw_value: 'K-pop: A Snapshot tentoonstelling' + claim_value: Wereldmuseum Leiden + raw_value: Wereldmuseum Leiden | Eén museum, drie locaties. Welkom bij Wereldmuseum Leiden source_url: https://leiden.wereldmuseum.nl/ retrieved_on: '' - xpath: /html/body/div[6]/div[2]/main/section/div/div/section/div/div[1]/article/div/div[1]/div/div/article/div/div/div/h1 + xpath: /html/head/title html_file: web/1511/leiden.wereldmuseum.nl/rendered.html - xpath_match_score: 0.9 - extraction_method: h1_tag - selection_method: priority_ranking - selection_priority: 70 + xpath_match_score: 1.0 + extraction_method: title_tag + selection_method: manual_correction + selection_priority: 100 extraction_timestamp: '2025-12-01T12:37:15.049203+00:00' + correction_note: Manual correction - original H1 extraction captured temporary exhibition title "K-pop A Snapshot" instead of institution name. Corrected to use title tag which contains proper institution name. web_enrichment: web_archives: - url: https://leiden.wereldmuseum.nl/ @@ -306,3 +310,4 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.636557+00:00' +url: https://leiden.wereldmuseum.nl/ diff --git a/data/nde/enriched/entries/1512_diocesane_commissie_kerkelijk_kunstbezit.yaml b/data/nde/enriched/entries/1512_diocesane_commissie_kerkelijk_kunstbezit.yaml index 88e51f10ae..fdb7e12975 100644 --- a/data/nde/enriched/entries/1512_diocesane_commissie_kerkelijk_kunstbezit.yaml +++ b/data/nde/enriched/entries/1512_diocesane_commissie_kerkelijk_kunstbezit.yaml @@ -4,7 +4,7 @@ original_entry: type_organisatie: unknown source: nan_isil_2025-11-06 type: - - U + - H entry_index: 1512 processing_timestamp: '2025-11-30T09:58:13.945408+00:00' nan_isil_enrichment: @@ -28,19 +28,19 @@ identifiers: assigned_date: '2025-09-18' source: Nationaal Archief ISIL Registry 2025-11-06 - identifier_scheme: GHCID - identifier_value: NL-LI-ROE-U-DCKK + identifier_value: NL-LI-ROE-H-DCKK - identifier_scheme: GHCID_UUID - identifier_value: d6da362f-6439-5668-8312-bf17c0463b9a - identifier_url: urn:uuid:d6da362f-6439-5668-8312-bf17c0463b9a + identifier_value: 302e6b73-a57c-51d1-8083-b5c67113174e + identifier_url: urn:uuid:302e6b73-a57c-51d1-8083-b5c67113174e - identifier_scheme: GHCID_UUID_SHA256 - identifier_value: 39693277-3d92-8be7-8fc4-518e0672a7c8 - identifier_url: urn:uuid:39693277-3d92-8be7-8fc4-518e0672a7c8 + identifier_value: 1ce71168-1bfb-8a6f-952c-055f1d76c3d6 + identifier_url: urn:uuid:1ce71168-1bfb-8a6f-952c-055f1d76c3d6 - identifier_scheme: GHCID_NUMERIC - identifier_value: '4136893220431993831' + identifier_value: '2082652491525859951' - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-7bac-92ca-665cb4af328b identifier_url: urn:uuid:019ad9ec-7cc0-7bac-92ca-665cb4af328b -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.350060+00:00 google_maps_enrichment: place_id: ChIJV4kHaExLx0cRWd7LsD09qBA name: Bisdom Roermond @@ -115,18 +116,23 @@ google_maps_enrichment: google_maps_status: SUCCESS google_maps_search_query: Diocesane Commissie Kerkelijk Kunstbezit, Roermond, Netherlands ghcid: - ghcid_current: NL-LI-ROE-U-DCKK + ghcid_current: NL-LI-ROE-H-DCKK ghcid_original: NL-LI-ROE-U-DCKK - ghcid_uuid: d6da362f-6439-5668-8312-bf17c0463b9a - ghcid_uuid_sha256: 39693277-3d92-8be7-8fc4-518e0672a7c8 - ghcid_numeric: 4136893220431993831 + ghcid_uuid: 302e6b73-a57c-51d1-8083-b5c67113174e + ghcid_uuid_sha256: 1ce71168-1bfb-8a6f-952c-055f1d76c3d6 + ghcid_numeric: 2082652491525859951 record_id: 019ad9ec-7cc0-7bac-92ca-665cb4af328b - generation_timestamp: '2025-12-01T12:38:04.703815+00:00' + generation_timestamp: '2025-12-01T16:00:00+00:00' ghcid_history: + - ghcid: NL-LI-ROE-H-DCKK + ghcid_numeric: 2082652491525859951 + valid_from: '2025-12-01T16:00:00+00:00' + valid_to: null + reason: Corrected institution type to H=Holy Sites (diocesan commission managing church art collections) - ghcid: NL-LI-ROE-U-DCKK ghcid_numeric: 4136893220431993831 valid_from: '2025-12-01T12:38:04.703815+00:00' - valid_to: null + valid_to: '2025-12-01T16:00:00+00:00' reason: Initial GHCID assignment (NDE batch import December 2025) location_resolution: method: REVERSE_GEOCODE @@ -149,3 +155,9 @@ custodian_name: provenance_note: Derived from original_entry.organisatie (no valid web_claims or wikidata) extraction_timestamp: '2025-12-01T12:37:15.067001+00:00' +parent_organization: + name: Bisdom Roermond + url: https://bisdom-roermond.org/ + google_maps_place_id: ChIJV4kHaExLx0cRWd7LsD09qBA + notes: The Diocesane Commissie Kerkelijk Kunstbezit operates under the Diocese of Roermond. It manages and preserves ecclesiastical art collections for parishes in the diocese. No dedicated website - operates through the diocese's organizational structure. + discovery_date: '2025-12-01T16:00:00+00:00' diff --git a/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml b/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml index 7d606a3c8c..0e1d2862f0 100644 --- a/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml +++ b/data/nde/enriched/entries/1513_stadsarchief_zoetermeer_saz.yaml @@ -40,7 +40,7 @@ identifiers: - identifier_scheme: RECORD_ID identifier_value: 019ad9ec-7cc0-700e-b28b-eca8c0eed1e6 identifier_url: urn:uuid:019ad9ec-7cc0-700e-b28b-eca8c0eed1e6 -enrichment_status: needs_enrichment +enrichment_status: enriched provenance: schema_version: 1.0.0 generated_at: '2025-11-30T09:58:13.945408+00:00' @@ -60,6 +60,7 @@ provenance: TIER_4_INFERRED: [] notes: - Entry created from NAN ISIL Registry 2025-11-06 - needs further enrichment + - Enrichment status updated to 'enriched' on 2025-12-01T15:19:28.366271+00:00 google_maps_enrichment: place_id: ChIJa55zGyzJxUcRz1Jv8H6AzmQ name: Hist.Genootsch. Oud Soetermeer @@ -209,3 +210,4 @@ web_claims: xpath_match_score: 0.9 extraction_method: h1_tag extraction_timestamp: '2025-12-01T12:34:07.695433+00:00' +url: http://www.oudsoetermeer.nl/ diff --git a/docs/YOUTUBE_ENRICHMENT.md b/docs/YOUTUBE_ENRICHMENT.md new file mode 100644 index 0000000000..c3b1625cd5 --- /dev/null +++ b/docs/YOUTUBE_ENRICHMENT.md @@ -0,0 +1,238 @@ +# YouTube Enrichment for Heritage Custodians + +This document explains how to enrich heritage custodian entries with YouTube channel and video data. + +## Prerequisites + +### 1. Get a YouTube API Key + +1. **Go to Google Cloud Console** + - Visit: https://console.cloud.google.com/ + +2. **Create or Select a Project** + - Click on the project dropdown at the top + - Click "New Project" or select an existing one + - Name it something like "GLAM YouTube Enrichment" + +3. **Enable YouTube Data API v3** + - Navigate to "APIs & Services" → "Library" + - Search for "YouTube Data API v3" + - Click on it and press **Enable** + +4. **Create API Credentials** + - Go to "APIs & Services" → "Credentials" + - Click "Create Credentials" → "API Key" + - Copy the generated API key + +5. **Restrict the API Key (Recommended)** + - Click on your new API key to edit it + - Under "API restrictions", select "Restrict key" + - Select only "YouTube Data API v3" + - Click Save + +### 2. Set Environment Variable + +```bash +export YOUTUBE_API_KEY='your-api-key-here' +``` + +Or add to your `.env` file: +``` +YOUTUBE_API_KEY=your-api-key-here +``` + +### 3. Install Dependencies + +```bash +pip install httpx pyyaml + +# For transcript extraction (optional but recommended) +brew install yt-dlp # macOS +# or +pip install yt-dlp +``` + +## Usage + +### Basic Usage + +```bash +# Process all entries with YouTube URLs +python scripts/enrich_youtube.py + +# Dry run (show what would be done) +python scripts/enrich_youtube.py --dry-run + +# Process only first 10 entries +python scripts/enrich_youtube.py --limit 10 + +# Process a specific entry +python scripts/enrich_youtube.py --entry 0146_Q1663974.yaml +``` + +### Example Output + +``` +Processing: 0146_Q1663974.yaml + Found YouTube URL: https://www.youtube.com/user/TUApeldoorn + Fetching channel info for UCxxxxx... + Fetching 10 recent videos... + Fetching comments for top videos... + Fetching transcripts for videos with captions... + Status: SUCCESS + Channel: Theologische Universiteit Apeldoorn + Subscribers: 1,234 + Videos fetched: 10 +``` + +## Data Collected + +### Channel Information +- Channel ID and URL +- Channel title and description +- Custom URL (e.g., @channelname) +- Subscriber count +- Total video count +- Total view count +- Channel creation date +- Country +- Thumbnail and banner images + +### Video Information (per video) +- Video ID and URL +- Title and description +- Published date +- Duration +- View count +- Like count +- Comment count +- Tags +- Thumbnail +- Caption availability +- Default language + +### Comments (per video) +- Comment ID +- Author name and channel URL +- Comment text +- Like count +- Reply count +- Published date + +### Transcripts (when available) +- Full transcript text +- Language +- Transcript type (manual or auto-generated) + +## Provenance Tracking + +All extracted data includes full provenance: + +```yaml +youtube_enrichment: + source_url: https://www.youtube.com/user/TUApeldoorn + fetch_timestamp: '2025-12-01T15:30:00+00:00' + api_endpoint: https://www.googleapis.com/youtube/v3 + api_version: v3 + status: SUCCESS + channel: + channel_id: UCxxxxxxxxxxxxx + channel_url: https://www.youtube.com/channel/UCxxxxxxxxxxxxx + title: Theologische Universiteit Apeldoorn + subscriber_count: 1234 + # ... more fields + videos: + - video_id: abc123xyz + video_url: https://www.youtube.com/watch?v=abc123xyz + title: Video Title + view_count: 5678 + comments: + - comment_id: xyz789 + text: Great video! + like_count: 5 + transcript: + transcript_text: "Full video transcript..." + language: nl + transcript_type: auto +``` + +## API Quota + +YouTube Data API has a daily quota of **10,000 units**: + +| Operation | Cost | +|-----------|------| +| Channel info | 1 unit | +| Video list | 1 unit | +| Video details | 1 unit per video | +| Comments | 1 unit per 100 comments | +| Search | 100 units | + +**Estimated usage per custodian**: 15-50 units (depending on videos/comments) + +For 100 custodians: ~1,500-5,000 units (well within daily quota) + +## Troubleshooting + +### "API key not valid" +- Check that the API key is correct +- Verify YouTube Data API v3 is enabled +- Check that the key isn't restricted to wrong APIs + +### "Quota exceeded" +- Wait until the next day (quota resets at midnight Pacific Time) +- Or request a quota increase in Google Cloud Console + +### "Channel not found" +- The channel may have been deleted +- The URL format may not be recognized +- Try using the channel ID directly + +### "Comments disabled" +- Some videos have comments disabled +- The script handles this gracefully + +### "No transcript available" +- Not all videos have captions +- Auto-generated captions may not be available for all languages + +## Architecture + +``` +Entry YAML file + ↓ +Find YouTube URL from: + - web_claims.social_youtube + - wikidata_enrichment.P2397 + ↓ +Resolve channel ID (handle → channel ID) + ↓ +Fetch via YouTube Data API v3: + - Channel info + - Recent videos + - Video details + - Comments + ↓ +Fetch via yt-dlp: + - Transcripts/captions + ↓ +Add youtube_enrichment section +Update provenance + ↓ +Save YAML file +``` + +## Related Scripts + +- `scripts/enrich_wikidata.py` - Wikidata enrichment +- `scripts/enrich_google_maps.py` - Google Maps enrichment +- `scripts/fetch_website_playwright.py` - Website archiving +- `mcp_servers/social_media/server.py` - MCP server for social media + +## Future Enhancements + +- [ ] Track channel subscriber growth over time +- [ ] Extract video chapters/timestamps +- [ ] Analyze video categories and topics +- [ ] Cross-reference with other social media +- [ ] Detect playlists relevant to heritage diff --git a/docs/nde/URL_DISCOVERY_REPORT.md b/docs/nde/URL_DISCOVERY_REPORT.md index 3640a6e5c3..553ce823ce 100644 --- a/docs/nde/URL_DISCOVERY_REPORT.md +++ b/docs/nde/URL_DISCOVERY_REPORT.md @@ -38,7 +38,7 @@ Found official websites for **16 entries** by querying Wikidata property P856: ## URLs Discovered via Web Search (Exa) -Found URLs for **9 additional entries**: +Found URLs for **10 additional entries**: | Entry ID | Institution | URL | Notes | |----------|-------------|-----|-------| @@ -50,7 +50,14 @@ Found URLs for **9 additional entries**: | 0715 | HDC Protestants Erfgoed | https://www.hdcvu.nl/ | Within VU Library | | 0729 | Historische Vereniging Staphorst | https://www.historischeverenigingstaphorst.nl/ | | | 0851 | Historische Vereniging Den Dolder | https://www.historischeverenigingdendolder.nl/ | | -| 1170 | Nederlandse Vereniging voor Papierknipkunst | https://papierknippen.nl/ | | +| 1170 | Nederlandse Vereniging voor Papierknipkunst | https://papierknippen.nl/ | Original NDE entry | +| 1504 | Nederlandse Vereniging voor Papierknipkunst | https://papierknippen.nl/ | Same org, from NAN ISIL 2025-11-06 | + +## Entries Without Dedicated Websites (Parent Organization Only) + +| Entry ID | Institution | Parent Organization | Notes | +|----------|-------------|---------------------|-------| +| 1512 | Diocesane Commissie Kerkelijk Kunstbezit | Bisdom Roermond (https://bisdom-roermond.org/) | Commission managing diocesan church art - operates under diocese, no dedicated website | ## Problematic Entries @@ -142,4 +149,23 @@ These entries have no discoverable website. Many are: --- *Generated by NDE URL Discovery workflow* -*Last updated: 2025-12-01* +*Last updated: 2025-12-01T16:30:00+00:00* + +## Session Updates - December 2025 + +### 2025-12-01: NAN ISIL Batch Corrections + +Fixed several issues with entries 1502-1513 (NAN ISIL 2025-11-06 batch): + +| Entry ID | Issue | Resolution | +|----------|-------|------------| +| 1504 | Missing URL | Added https://papierknippen.nl/ (discovered via Exa) | +| 1508 | Wrong custodian_name ("Cookiesbeleid") | Corrected to "Parochiearchief Kampen" from NAN ISIL registry | +| 1508 | Wrong institution type (U) | Changed to A (Archive) | +| 1508 | Incorrect GHCID | Regenerated: NL-OV-KAM-A-PK | +| 1511 | Wrong custodian_name (exhibition title) | Already corrected to "Wereldmuseum Leiden" | +| 1511 | Wrong institution type (U) | Changed to M (Museum) | +| 1511 | GHCID based on exhibition title | Regenerated: NL-ZH-LEI-M-WL | +| 1512 | Wrong institution type (U) | Changed to H (Holy Sites - diocesan heritage commission) | +| 1512 | No website info | Added parent organization note (Bisdom Roermond) | +| 1512 | Incorrect GHCID | Regenerated: NL-LI-ROE-H-DCKK | diff --git a/frontend/public/data/heritage_custodian_ontology.mmd b/frontend/public/data/heritage_custodian_ontology.mmd index df926c423f..ca2bd71ece 100644 --- a/frontend/public/data/heritage_custodian_ontology.mmd +++ b/frontend/public/data/heritage_custodian_ontology.mmd @@ -68,6 +68,9 @@ CustodianLegalStatus { CustodianIdentifierList identifiers LegalResponsibilityCollectionList collections_under_responsibility } +ReconstructedEntity { + ReconstructionActivity was_generated_by +} ConfidenceMeasure { float confidence_value PK string confidence_method @@ -601,6 +604,7 @@ CustodianCollection { CustodianObservationList was_derived_from PK date valid_from date valid_to + ReconstructionActivity was_generated_by } LegalResponsibilityCollection { CustodianLegalStatus responsible_legal_entity PK @@ -625,6 +629,7 @@ LegalResponsibilityCollection { CustodianObservationList was_derived_from PK date valid_from date valid_to + ReconstructionActivity was_generated_by } GeoSpatialPlace { uriorcurie geospatial_id PK @@ -750,6 +755,25 @@ LegalStatus { TimeSpan temporal_validity PK Jurisdiction jurisdiction } +RegistrationAuthority { + uriorcurie id PK + string name PK + string name_local + string abbreviation + Country country PK + uri registry_url PK + uri api_url + uri sparql_endpoint + uri data_license + RegistrationAuthorityGovernanceEnum governance_type PK + integer established_year + RegistrationAuthority predecessor + StandardList standards_maintained + AllocationAgencyList allocation_agencies + uri website + string description + string wikidata_id +} Country { string alpha_2 PK string alpha_3 PK @@ -766,6 +790,7 @@ Settlement { Subregion subregion float latitude float longitude + uriorcurie settlement_id PK } DataLicensePolicy { uriorcurie id PK @@ -1121,24 +1146,27 @@ AllocationAgency { uri allocation_policy_url string description } -RegistrationAuthority { +ContributingAgency { uriorcurie id PK + string contributor_code PK string name PK string name_local string abbreviation Country country PK - uri registry_url PK - uri api_url - uri sparql_endpoint - uri data_license - RegistrationAuthorityGovernanceEnum governance_type PK - integer established_year - RegistrationAuthority predecessor - StandardList standards_maintained - AllocationAgencyList allocation_agencies + string authority_file_name + string authority_file_abbreviation + uri authority_file_url + AuthorityRecordFormatEnum record_format PK + AuthorityEntityTypeEnumList entity_types_covered PK + StandardList contributes_to PK + date contribution_start_date + boolean is_active PK + boolean governance_representative uri website string description - string wikidata_id + AllocationAgency also_allocation_agency + StandardsOrganizationList member_of + ConsortiumGovernanceRoleEnum governance_role } CustodianArchive { uriorcurie id PK @@ -1203,28 +1231,6 @@ ArticlesOfAssociation { date valid_from date valid_to } -ContributingAgency { - uriorcurie id PK - string contributor_code PK - string name PK - string name_local - string abbreviation - Country country PK - string authority_file_name - string authority_file_abbreviation - uri authority_file_url - AuthorityRecordFormatEnum record_format PK - AuthorityEntityTypeEnumList entity_types_covered PK - StandardList contributes_to PK - date contribution_start_date - boolean is_active PK - boolean governance_representative - uri website - string description - AllocationAgency also_allocation_agency - StandardsOrganizationList member_of - ConsortiumGovernanceRoleEnum governance_role -} SocialMediaProfile { uriorcurie social_media_profile_id PK SocialMediaPlatformTypeEnum platform_type PK @@ -1331,7 +1337,7 @@ CallForApplication { WebObservation { uriorcurie observation_id uri source_url - datetime retrieved_on PK + datetime retrieved_on string retrieved_by string retrieval_method string content_hash @@ -1400,7 +1406,7 @@ WebPortal { uriorcurieList identifiers TimeSpan temporal_extent CustodianObservationList was_derived_from PK - ReconstructionActivity was_generated_by PK + ReconstructionActivity was_generated_by } PrimaryDigitalPresenceAssertion { uriorcurie assertion_id PK @@ -1578,7 +1584,7 @@ WebClaim { uriorcurie claim_id ClaimTypeEnum claim_type PK string claim_value PK - uri source_url + uri source_url PK datetime retrieved_on PK string xpath PK string html_file PK @@ -1589,7 +1595,586 @@ WebClaim { string claim_notes } + %% Enumerations +ReconstructionActivityTypeEnum { + string enum_type PK + string MANUAL_CURATION + string ALGORITHMIC_MATCHING + string HYBRID + string EXPERT_REVIEW +} +AgentTypeEnum { + string enum_type PK + string PERSON + string GROUP + string ORGANIZATION + string FORMAL_ORGANIZATION + string PUBLIC_ORGANIZATION + string ORGANIZATIONAL_UNIT + string ORGANIZATIONAL_COLLABORATION + string SOFTWARE +} +AppellationTypeEnum { + string enum_type PK + string OFFICIAL + string VERNACULAR + string HISTORICAL + string TRANSLATION + string ABBREVIATION + string ALTERNATIVE +} +SourceDocumentTypeEnum { + string enum_type PK + string ARCHIVAL_DOCUMENT + string WEBSITE + string LETTERHEAD + string STATUTE + string PUBLICATION + string DATABASE + string SIGNAGE +} +CustodianPrimaryTypeEnum { + string enum_type PK + string GALLERY + string LIBRARY + string ARCHIVE + string MUSEUM + string OFFICIAL_INSTITUTION + string RESEARCH_CENTER + string COMMERCIAL + string UNSPECIFIED + string BIO_CUSTODIAN + string EDUCATION_PROVIDER + string _and_9_more +} +EncompassingBodyTypeEnum { + string enum_type PK + string UMBRELLA + string NETWORK + string CONSORTIUM + string COOPERATIVE + string SOCIAL_MOVEMENT + string FUNDING_BODY +} +EntityTypeEnum { + string enum_type PK + string INDIVIDUAL + string GROUP + string ORGANIZATION + string GOVERNMENT + string CORPORATION +} +LegalStatusEnum { + string enum_type PK + string ACTIVE + string DISSOLVED + string MERGED + string SUSPENDED + string BANKRUPTCY + string LIQUIDATION + string UNKNOWN +} +OrganizationalUnitTypeEnum { + string enum_type PK + string DEPARTMENT + string TEAM + string DIVISION + string GROUP + string PROGRAM + string SERVICE + string LAB + string OFFICE + string UNIT +} +OrganizationalChangeEventTypeEnum { + string enum_type PK + string FOUNDING + string DISSOLUTION + string MERGER + string SPLIT + string SPIN_OFF + string EXPANSION + string REORGANIZATION + string RENAMING + string TRANSFER + string REDUCTION +} +OrganizationalChangeEventCategoryEnum { + string enum_type PK + string EXISTENTIAL + string STATE +} +PlaceSpecificityEnum { + string enum_type PK + string BUILDING + string STREET + string NEIGHBORHOOD + string CITY + string REGION + string VAGUE +} +AuxiliaryPlaceTypeEnum { + string enum_type PK + string BRANCH_OFFICE + string STORAGE_FACILITY + string RESEARCH_CENTER + string EXHIBITION_SPACE + string HISTORIC_BUILDING + string TEMPORARY_LOCATION + string ADMINISTRATIVE_OFFICE + string EDUCATION_CENTER + string CONSERVATION_LAB + string READING_ROOM_ANNEX + string _and_4_more +} +OrganizationBranchTypeEnum { + string enum_type PK + string REGIONAL_OFFICE + string BRANCH_LIBRARY + string SATELLITE_GALLERY + string CONSERVATION_LAB + string DIGITIZATION_CENTER + string RESEARCH_CENTER + string EDUCATION_CENTER + string ADMINISTRATIVE_OFFICE + string STORAGE_MANAGEMENT + string EXHIBITION_SPACE + string _and_2_more +} +AuxiliaryDigitalPlatformTypeEnum { + string enum_type PK + string PROJECT_WEBSITE + string EXHIBITION_MICROSITE + string API_ENDPOINT + string MOBILE_APP + string COLLECTION_BROWSER + string CROWDSOURCING_PLATFORM + string EDUCATIONAL_PORTAL + string DATA_PORTAL + string LEGACY_PLATFORM + string VIRTUAL_TOUR + string _and_6_more +} +FeatureTypeEnum { + string enum_type PK + string MANSION + string VACATION_PROPERTY + string BUITENPLAATS + string URBAN_SETTLEMENT + string TOWN + string PARISH_CHURCH + string SEWERAGE_PUMPING_STATION + string ARTIFICIAL_OBJECT + string PHYSICAL_OBJECT + string ARTIFICIAL_PHYSICAL_OBJECT + string _and_284_more +} +StaffRoleTypeEnum { + string enum_type PK + string CURATOR + string COLLECTIONS_MANAGER + string CONSERVATOR + string ARCHIVIST + string RECORDS_MANAGER + string LIBRARIAN + string DIGITAL_PRESERVATION_SPECIALIST + string DIGITIZATION_SPECIALIST + string DATA_MANAGER + string EDUCATOR + string _and_8_more +} +CallForApplicationStatusEnum { + string enum_type PK + string ANNOUNCED + string OPEN + string CLOSING_SOON + string CLOSED + string UNDER_REVIEW + string RESULTS_PUBLISHED + string CANCELLED + string REOPENED +} +FundingRequirementTypeEnum { + string enum_type PK + string ELIGIBILITY_GEOGRAPHIC + string ELIGIBILITY_ORGANIZATIONAL + string ELIGIBILITY_HERITAGE_TYPE + string ELIGIBILITY_EXPERIENCE + string ELIGIBILITY_REGISTRATION + string FINANCIAL_COFUNDING + string FINANCIAL_BUDGET_MINIMUM + string FINANCIAL_BUDGET_MAXIMUM + string FINANCIAL_RATE + string FINANCIAL_ELIGIBLE_COSTS + string _and_17_more +} +WebPortalTypeEnum { + string enum_type PK + string NATIONAL_AGGREGATOR + string REGIONAL_AGGREGATOR + string ARCHIVAL_PORTAL + string LIBRARY_UNION_CATALOG + string MUSEUM_COLLECTION_PORTAL + string CROSS_DOMAIN_AGGREGATOR + string COLONIAL_HERITAGE_PORTAL + string MONASTIC_HERITAGE_PORTAL + string GENEALOGICAL_PORTAL + string NEWSPAPER_DIGITIZATION_PORTAL + string _and_9_more +} +SocialMediaPlatformTypeEnum { + string enum_type PK + string FACEBOOK + string X_TWITTER + string INSTAGRAM + string YOUTUBE + string LINKEDIN + string TIKTOK + string PINTEREST + string FLICKR + string VIMEO + string THREADS + string _and_15_more +} +DigitalPresenceTypeEnum { + string enum_type PK + string WEBSITE + string WEB_APPLICATION + string DISCOVERY_PORTAL + string DIGITAL_REPOSITORY + string PROJECT_WEBSITE + string EXHIBITION_MICROSITE + string SOCIAL_MEDIA + string MESSAGING_SERVICE + string API_SERVICE + string MOBILE_APP + string _and_7_more +} +GeometryTypeEnum { + string enum_type PK + string POINT + string LINESTRING + string POLYGON + string MULTIPOINT + string MULTILINESTRING + string MULTIPOLYGON + string GEOMETRYCOLLECTION +} +RegistrationAuthorityGovernanceEnum { + string enum_type PK + string GOVERNMENT + string INTERGOVERNMENTAL + string NONPROFIT + string CONSORTIUM + string COMMERCIAL +} +DataLicenseTypeEnum { + string enum_type PK + string CREATIVE_COMMONS + string OPEN_DATA_COMMONS + string PUBLIC_DOMAIN + string OPEN_SOURCE + string GOVERNMENT_OPEN + string PROPRIETARY + string TERMS_OF_SERVICE +} +DataOpennessLevelEnum { + string enum_type PK + string FULLY_OPEN + string OPEN_WITH_ATTRIBUTION + string OPEN_SHAREALIKE + string RESTRICTED_NONCOMMERCIAL + string RESTRICTED_NO_DERIVATIVES + string CLOSED_SUBSCRIPTION + string CLOSED_PROPRIETARY +} +OpennessStanceEnum { + string enum_type PK + string STRONG_OPEN_ADVOCATE + string OPEN_BY_DEFAULT + string MIXED_POLICY + string CLOSED_BY_DEFAULT + string FULLY_PROPRIETARY +} +JurisdictionTypeEnum { + string enum_type PK + string NATIONAL + string SUBNATIONAL + string MUNICIPAL + string SUPRANATIONAL +} +LegalSystemTypeEnum { + string enum_type PK + string CIVIL_LAW + string COMMON_LAW + string MIXED + string RELIGIOUS + string CUSTOMARY +} +RegisterTypeEnum { + string enum_type PK + string COMMERCIAL + string FOUNDATION + string ASSOCIATION + string CHARITY + string CULTURAL + string MIXED +} +StandardsOrganizationTypeEnum { + string enum_type PK + string INTERGOVERNMENTAL + string NATIONAL + string INDUSTRY_CONSORTIUM + string LIBRARY_COOPERATIVE + string PROFESSIONAL_ASSOCIATION + string NATIONAL_MUSEUM_ASSOCIATION + string CERTIFICATION_BODY + string GOVERNANCE_COUNCIL +} +StandardTypeEnum { + string enum_type PK + string ISO_STANDARD + string CONSORTIUM_SERVICE + string PROPRIETARY_SYSTEM + string NATIONAL_STANDARD + string COMMUNITY_STANDARD + string QUALITY_STANDARD + string COMMERCIAL_SERVICE + string GOVERNMENT_REGISTRY + string INTERNATIONAL_TREATY + string CROWDSOURCED + string _and_6_more +} +GovernanceModelEnum { + string enum_type PK + string ISO_TC + string COUNCIL + string SINGLE_AUTHORITY + string COMMUNITY_CONSENSUS + string MEMBERSHIP_BOARD + string PROPRIETARY + string GOVERNMENT + string ACADEMIC + string INTERGOVERNMENTAL + string COMMUNITY + string _and_1_more +} +StandardScopeTypeEnum { + string enum_type PK + string GLOBAL + string NATIONAL + string REGIONAL + string DOMAIN_SPECIFIC + string INSTITUTIONAL +} +IdentifierDomainEnum { + string enum_type PK + string ORGANIZATION + string HERITAGE_INSTITUTION + string PERSON + string WORK + string NAME_AUTHORITY + string RESEARCH_ORG + string LEGAL_ENTITY + string COLLECTION + string PLACE + string BUILDING + string _and_8_more +} +AllocationDomainEnum { + string enum_type PK + string LIBRARY_PUBLIC + string LIBRARY_ACADEMIC + string LIBRARY_RESEARCH + string LIBRARY_NATIONAL + string ARCHIVE + string MUSEUM + string GALLERY + string HERITAGE_SOCIETY + string RESEARCH_ORGANIZATION + string EDUCATION_PROVIDER + string _and_3_more +} +AuthorityRecordFormatEnum { + string enum_type PK + string MARC21_AUTHORITY + string UNIMARC_AUTHORITY + string RDF + string PROPRIETARY +} +AuthorityEntityTypeEnum { + string enum_type PK + string PERSON + string CORPORATE_BODY + string GEOGRAPHIC + string WORK + string SUBJECT + string EVENT + string FAMILY +} +ConsortiumGovernanceRoleEnum { + string enum_type PK + string VOTING_MEMBER + string OBSERVER + string FOUNDING_MEMBER + string ASSOCIATE + string REGIONAL_REPRESENTATIVE +} +RecordsLifecycleStageEnum { + string enum_type PK + string ACTIVE + string INACTIVE + string HERITAGE + string PRE_EXISTENCE +} +IdentifierStandardEnum { + string enum_type PK + string ISIL + string ISNI + string VIAF + string GND + string LCNAF + string BNF + string NTA + string NDL + string NLA + string BNE + string _and_121_more +} +ProjectStatusEnum { + string enum_type PK + string PROPOSED + string APPROVED + string IN_PROGRESS + string ON_HOLD + string COMPLETED + string DISCONTINUED + string EXTENDED +} +GiftShopTypeEnum { + string enum_type PK + string MUSEUM_SHOP + string BOOKSHOP + string DESIGN_STORE + string SPECIALTY_SHOP + string POP_UP + string KIOSK + string ONLINE_ONLY + string CAFE_SHOP + string CHILDREN_SHOP + string PUBLICATION_CENTER +} +ProductCategoryEnum { + string enum_type PK + string REPRODUCTIONS + string BOOKS + string DESIGN_OBJECTS + string JEWELRY + string TEXTILES + string STATIONERY + string HOME_DECOR + string TOYS + string FOOD + string SOUVENIRS + string _and_19_more +} +StorageConditionStatusEnum { + string enum_type PK + string EXCELLENT + string GOOD + string ACCEPTABLE + string CONCERNING + string POOR + string CRITICAL + string UNKNOWN + string NOT_APPLICABLE +} +StorageConditionCategoryEnum { + string enum_type PK + string TEMPERATURE + string HUMIDITY + string LIGHT + string AIR_QUALITY + string PEST_CONTROL + string FIRE_SAFETY + string FLOOD_WATER + string SECURITY + string STRUCTURAL + string SPACE_CAPACITY +} +StorageObserverTypeEnum { + string enum_type PK + string INTERNAL_STAFF + string EXTERNAL_CONSULTANT + string GOVERNMENT_INSPECTOR + string ACCREDITATION_ASSESSOR + string INSURANCE_ASSESSOR + string JOURNALIST + string RESEARCHER + string VISITING_PROFESSIONAL + string PUBLIC_VISITOR + string WHISTLEBLOWER + string _and_2_more +} +StorageStandardEnum { + string enum_type PK + string ISO_82306 + string ISO_TR_19815_2018 + string EN_16893_2018 + string EN_15757_2010 + string ISO_9706_2025 + string ISO_11108 + string ISO_20494 + string PAS_198_2012 + string BS_5454_2000 + string EN_16141_2012 + string _and_5_more +} +StorageTypeEnum { + string enum_type PK + string ARCHIVE_DEPOT + string ART_STORAGE + string GENERAL_DEPOT + string COLD_STORAGE + string HIGH_SECURITY_VAULT + string OPEN_STORAGE + string OFFSITE_STORAGE + string COMPACT_STORAGE + string HAZMAT_STORAGE + string DIGITAL_STORAGE + string _and_5_more +} +ArchiveProcessingStatusEnum { + string enum_type PK + string UNPROCESSED + string IN_APPRAISAL + string IN_ARRANGEMENT + string IN_DESCRIPTION + string IN_PRESERVATION + string PROCESSED_PENDING_TRANSFER + string TRANSFERRED_TO_COLLECTION + string PARTIALLY_PROCESSED + string ON_HOLD + string DEACCESSIONED +} +ClaimTypeEnum { + string enum_type PK + string full_name + string short_name + string description + string email + string phone + string address + string website + string social_media + string facebook + string twitter + string _and_14_more +} + +CustodianAppellation ||--|o AppellationTypeEnum : "appellation_type" CustodianAppellation ||--|o CustodianName : "variant_of_name" +CustodianName ||--|| ReconstructedEntity : "inherits" CustodianName ||--}o CustodianAppellation : "alternative_names" CustodianName ||--|o TimeSpan : "name_validity_period" CustodianName ||--|o CustodianName : "supersedes" @@ -1597,12 +2182,14 @@ CustodianName ||--|o CustodianName : "superseded_by" CustodianName ||--}| CustodianObservation : "was_derived_from" CustodianName ||--|o ReconstructionActivity : "was_generated_by" CustodianName ||--|| Custodian : "refers_to_custodian" +ReconstructionAgent ||--|o AgentTypeEnum : "agent_type" CustodianObservation ||--|| CustodianAppellation : "observed_name" CustodianObservation ||--}o CustodianAppellation : "alternative_observed_names" CustodianObservation ||--|| SourceDocument : "source" CustodianObservation ||--|o LanguageCode : "language" CustodianObservation ||--|o CustodianLegalStatus : "derived_from_entity" CustodianObservation ||--|o ConfidenceMeasure : "confidence_score" +CustodianLegalStatus ||--|| ReconstructedEntity : "inherits" CustodianLegalStatus ||--|| Custodian : "refers_to_custodian" CustodianLegalStatus ||--|| LegalEntityType : "legal_entity_type" CustodianLegalStatus ||--|| LegalName : "legal_name" @@ -1621,6 +2208,7 @@ CustodianLegalStatus ||--|| ReconstructionActivity : "was_generated_by" CustodianLegalStatus ||--|o CustodianLegalStatus : "was_revision_of" CustodianLegalStatus ||--}o CustodianIdentifier : "identifiers" CustodianLegalStatus ||--}o LegalResponsibilityCollection : "collections_under_responsibility" +ReconstructedEntity ||--|o ReconstructionActivity : "was_generated_by" Custodian ||--|o CustodianType : "custodian_type" Custodian ||--}o CustodianArchive : "has_operational_archive" Custodian ||--}o CustodianAdministration : "has_administration" @@ -1630,85 +2218,107 @@ Custodian ||--|o DataLicensePolicy : "data_license_policy" Custodian ||--}o Project : "participated_in_projects" Custodian ||--}o GiftShop : "gift_shop" Custodian ||--}o Storage : "storage_facilities" +CustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" CustodianType ||--|o CustodianType : "broader_type" CustodianType ||--}o CustodianType : "narrower_types" CustodianType ||--}o CustodianType : "related_types" ArchiveOrganizationType ||--|| CustodianType : "inherits" +ArchiveOrganizationType ||--|o CustodianPrimaryTypeEnum : "primary_type" ArchiveOrganizationType ||--|o ArchiveOrganizationType : "broader_type" ArchiveOrganizationType ||--}o CustodianType : "narrower_types" ArchiveOrganizationType ||--}o CustodianType : "related_types" MuseumType ||--|| CustodianType : "inherits" +MuseumType ||--|o CustodianPrimaryTypeEnum : "primary_type" MuseumType ||--|o MuseumType : "broader_type" MuseumType ||--}o CustodianType : "narrower_types" MuseumType ||--}o CustodianType : "related_types" LibraryType ||--|| CustodianType : "inherits" +LibraryType ||--|o CustodianPrimaryTypeEnum : "primary_type" LibraryType ||--|o LibraryType : "broader_type" LibraryType ||--}o CustodianType : "narrower_types" LibraryType ||--}o CustodianType : "related_types" GalleryType ||--|| CustodianType : "inherits" +GalleryType ||--|o CustodianPrimaryTypeEnum : "primary_type" GalleryType ||--|o GalleryType : "broader_type" GalleryType ||--}o CustodianType : "narrower_types" GalleryType ||--}o CustodianType : "related_types" ResearchOrganizationType ||--|| CustodianType : "inherits" +ResearchOrganizationType ||--|o CustodianPrimaryTypeEnum : "primary_type" ResearchOrganizationType ||--|o ResearchOrganizationType : "broader_type" ResearchOrganizationType ||--}o CustodianType : "narrower_types" ResearchOrganizationType ||--}o CustodianType : "related_types" OfficialInstitutionType ||--|| CustodianType : "inherits" +OfficialInstitutionType ||--|o CustodianPrimaryTypeEnum : "primary_type" OfficialInstitutionType ||--|o CustodianType : "broader_type" OfficialInstitutionType ||--}o CustodianType : "narrower_types" OfficialInstitutionType ||--}o CustodianType : "related_types" BioCustodianType ||--|| CustodianType : "inherits" +BioCustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" BioCustodianType ||--|o CustodianType : "broader_type" BioCustodianType ||--}o CustodianType : "narrower_types" BioCustodianType ||--}o CustodianType : "related_types" EducationProviderType ||--|| CustodianType : "inherits" +EducationProviderType ||--|o CustodianPrimaryTypeEnum : "primary_type" EducationProviderType ||--|o CustodianType : "broader_type" EducationProviderType ||--}o CustodianType : "narrower_types" EducationProviderType ||--}o CustodianType : "related_types" HeritageSocietyType ||--|| CustodianType : "inherits" +HeritageSocietyType ||--|o CustodianPrimaryTypeEnum : "primary_type" HeritageSocietyType ||--|o CustodianType : "broader_type" HeritageSocietyType ||--}o CustodianType : "narrower_types" HeritageSocietyType ||--}o CustodianType : "related_types" FeatureCustodianType ||--|| CustodianType : "inherits" +FeatureCustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" FeatureCustodianType ||--|o CustodianType : "broader_type" FeatureCustodianType ||--}o CustodianType : "narrower_types" FeatureCustodianType ||--}o CustodianType : "related_types" IntangibleHeritageGroupType ||--|| CustodianType : "inherits" +IntangibleHeritageGroupType ||--|o CustodianPrimaryTypeEnum : "primary_type" IntangibleHeritageGroupType ||--|o CustodianType : "broader_type" IntangibleHeritageGroupType ||--}o CustodianType : "narrower_types" IntangibleHeritageGroupType ||--}o CustodianType : "related_types" PersonalCollectionType ||--|| CustodianType : "inherits" +PersonalCollectionType ||--|o CustodianPrimaryTypeEnum : "primary_type" PersonalCollectionType ||--|o CustodianType : "broader_type" PersonalCollectionType ||--}o CustodianType : "narrower_types" PersonalCollectionType ||--}o CustodianType : "related_types" HolySacredSiteType ||--|| CustodianType : "inherits" +HolySacredSiteType ||--|o CustodianPrimaryTypeEnum : "primary_type" HolySacredSiteType ||--|o CustodianType : "broader_type" HolySacredSiteType ||--}o CustodianType : "narrower_types" HolySacredSiteType ||--}o CustodianType : "related_types" DigitalPlatformType ||--|| CustodianType : "inherits" +DigitalPlatformType ||--|o CustodianPrimaryTypeEnum : "primary_type" DigitalPlatformType ||--|o CustodianType : "broader_type" DigitalPlatformType ||--}o CustodianType : "narrower_types" DigitalPlatformType ||--}o CustodianType : "related_types" NonProfitType ||--|| CustodianType : "inherits" +NonProfitType ||--|o CustodianPrimaryTypeEnum : "primary_type" NonProfitType ||--|o CustodianType : "broader_type" NonProfitType ||--}o CustodianType : "narrower_types" NonProfitType ||--}o CustodianType : "related_types" TasteScentHeritageType ||--|| CustodianType : "inherits" +TasteScentHeritageType ||--|o CustodianPrimaryTypeEnum : "primary_type" TasteScentHeritageType ||--|o CustodianType : "broader_type" TasteScentHeritageType ||--}o CustodianType : "narrower_types" TasteScentHeritageType ||--}o CustodianType : "related_types" CommercialOrganizationType ||--|| CustodianType : "inherits" +CommercialOrganizationType ||--|o CustodianPrimaryTypeEnum : "primary_type" CommercialOrganizationType ||--|o CustodianType : "broader_type" CommercialOrganizationType ||--}o CustodianType : "narrower_types" CommercialOrganizationType ||--}o CustodianType : "related_types" MixedCustodianType ||--|| CustodianType : "inherits" +MixedCustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" MixedCustodianType ||--|o CustodianType : "broader_type" MixedCustodianType ||--}o CustodianType : "narrower_types" MixedCustodianType ||--}o CustodianType : "related_types" UnspecifiedType ||--|| CustodianType : "inherits" +UnspecifiedType ||--|o CustodianPrimaryTypeEnum : "primary_type" UnspecifiedType ||--|o CustodianType : "broader_type" UnspecifiedType ||--}o CustodianType : "narrower_types" UnspecifiedType ||--}o CustodianType : "related_types" +CustodianPlace ||--|| ReconstructedEntity : "inherits" +CustodianPlace ||--|o PlaceSpecificityEnum : "place_specificity" CustodianPlace ||--|o Country : "country" CustodianPlace ||--|o Subregion : "subregion" CustodianPlace ||--|o Settlement : "settlement" @@ -1718,6 +2328,8 @@ CustodianPlace ||--}o AuxiliaryPlace : "auxiliary_places" CustodianPlace ||--}| CustodianObservation : "was_derived_from" CustodianPlace ||--|o ReconstructionActivity : "was_generated_by" CustodianPlace ||--|| Custodian : "refers_to_custodian" +AuxiliaryPlace ||--|| ReconstructedEntity : "inherits" +AuxiliaryPlace ||--|o AuxiliaryPlaceTypeEnum : "auxiliary_place_type" AuxiliaryPlace ||--|o Country : "country" AuxiliaryPlace ||--|o Subregion : "subregion" AuxiliaryPlace ||--|o Settlement : "settlement" @@ -1729,15 +2341,19 @@ AuxiliaryPlace ||--|o TimeSpan : "temporal_extent" AuxiliaryPlace ||--}o CustodianObservation : "was_derived_from" AuxiliaryPlace ||--|o ReconstructionActivity : "was_generated_by" AuxiliaryPlace ||--|| Custodian : "refers_to_custodian" +ReconstructionActivity ||--|o ReconstructionActivityTypeEnum : "activity_type" ReconstructionActivity ||--|o ReconstructionAgent : "responsible_agent" ReconstructionActivity ||--|o TimeSpan : "temporal_extent" ReconstructionActivity ||--}| CustodianObservation : "used" ReconstructionActivity ||--|o ConfidenceMeasure : "confidence_score" +OrganizationalStructure ||--|o OrganizationalUnitTypeEnum : "unit_type" OrganizationalStructure ||--|o OrganizationalStructure : "parent_unit" OrganizationalStructure ||--}o PersonObservation : "staff_members" OrganizationalStructure ||--}o CustodianCollection : "managed_collections" OrganizationalStructure ||--}o AuxiliaryPlace : "located_at" OrganizationalStructure ||--|| Custodian : "refers_to_custodian" +OrganizationBranch ||--|| ReconstructedEntity : "inherits" +OrganizationBranch ||--|o OrganizationBranchTypeEnum : "branch_type" OrganizationBranch ||--}o AuxiliaryPlace : "located_at" OrganizationBranch ||--}o OrganizationalStructure : "has_operational_unit" OrganizationBranch ||--}o OrganizationBranch : "has_sub_branch" @@ -1745,17 +2361,21 @@ OrganizationBranch ||--|o TimeSpan : "temporal_extent" OrganizationBranch ||--}o CustodianObservation : "was_derived_from" OrganizationBranch ||--|o ReconstructionActivity : "was_generated_by" OrganizationBranch ||--|| Custodian : "refers_to_custodian" +AuxiliaryDigitalPlatform ||--|| ReconstructedEntity : "inherits" +AuxiliaryDigitalPlatform ||--|o AuxiliaryDigitalPlatformTypeEnum : "auxiliary_platform_type" AuxiliaryDigitalPlatform ||--|| DigitalPlatform : "is_auxiliary_of_platform" AuxiliaryDigitalPlatform ||--|o TimeSpan : "temporal_extent" AuxiliaryDigitalPlatform ||--}o CollectionManagementSystem : "powered_by_cms" AuxiliaryDigitalPlatform ||--}o CustodianObservation : "was_derived_from" AuxiliaryDigitalPlatform ||--|o ReconstructionActivity : "was_generated_by" AuxiliaryDigitalPlatform ||--|| Custodian : "refers_to_custodian" +CustodianCollection ||--|| ReconstructedEntity : "inherits" CustodianCollection ||--|o TimeSpan : "temporal_coverage" CustodianCollection ||--}o CollectionManagementSystem : "managed_by_cms" CustodianCollection ||--|o OrganizationalStructure : "managing_unit" CustodianCollection ||--|| Custodian : "refers_to_custodian" CustodianCollection ||--}| CustodianObservation : "was_derived_from" +CustodianCollection ||--|o ReconstructionActivity : "was_generated_by" LegalResponsibilityCollection ||--|| CustodianCollection : "inherits" LegalResponsibilityCollection ||--|| CustodianLegalStatus : "responsible_legal_entity" LegalResponsibilityCollection ||--|o TimeSpan : "temporal_coverage" @@ -1763,6 +2383,9 @@ LegalResponsibilityCollection ||--}o CollectionManagementSystem : "managed_by_cm LegalResponsibilityCollection ||--|o OrganizationalStructure : "managing_unit" LegalResponsibilityCollection ||--|| Custodian : "refers_to_custodian" LegalResponsibilityCollection ||--}| CustodianObservation : "was_derived_from" +LegalResponsibilityCollection ||--|o ReconstructionActivity : "was_generated_by" +GeoSpatialPlace ||--|o GeometryTypeEnum : "geometry_type" +OrganizationalChangeEvent ||--|o OrganizationalChangeEventTypeEnum : "event_type" OrganizationalChangeEvent ||--}o OrganizationalStructure : "affected_units" OrganizationalChangeEvent ||--}o OrganizationalStructure : "resulting_units" OrganizationalChangeEvent ||--|| Custodian : "parent_custodian" @@ -1770,6 +2393,7 @@ OrganizationalChangeEvent ||--|o CustodianPlace : "event_location" OrganizationalChangeEvent ||--|o CustodianPlace : "from_location" OrganizationalChangeEvent ||--|o CustodianPlace : "to_location" OrganizationalChangeEvent ||--}o GeoSpatialPlace : "affected_territory" +PersonObservation ||--|o StaffRoleTypeEnum : "staff_role" PersonObservation ||--|o OrganizationalStructure : "unit_affiliation" PersonObservation ||--|o SourceDocument : "observation_source" PersonObservation ||--|o OrganizationalChangeEvent : "affected_by_event" @@ -1778,6 +2402,7 @@ CustodianIdentifier ||--|o Standard : "defined_by_standard" CustodianIdentifier ||--|o AllocationAgency : "allocated_by" CustodianIdentifier ||--|o IdentifierFormat : "identifier_format_used" CustodianIdentifier ||--|o CustodianName : "also_identifies_name" +SourceDocument ||--|o SourceDocumentTypeEnum : "source_type" LegalForm ||--|| Country : "country_code" LegalForm ||--|| LegalEntityType : "legal_entity_type" LegalForm ||--|o LegalForm : "parent_form" @@ -1786,46 +2411,67 @@ RegistrationNumber ||--|o TradeRegister : "trade_register" RegistrationNumber ||--|| TimeSpan : "temporal_validity" LegalStatus ||--|| TimeSpan : "temporal_validity" LegalStatus ||--|o Jurisdiction : "jurisdiction" +RegistrationAuthority ||--|| Country : "country" +RegistrationAuthority ||--|o RegistrationAuthorityGovernanceEnum : "governance_type" +RegistrationAuthority ||--|o RegistrationAuthority : "predecessor" +RegistrationAuthority ||--}o Standard : "standards_maintained" +RegistrationAuthority ||--}o AllocationAgency : "allocation_agencies" Subregion ||--|| Country : "country" Settlement ||--|| Country : "country" Settlement ||--|o Subregion : "subregion" DataLicensePolicy ||--|| DataLicense : "default_license" DataLicensePolicy ||--}o ServiceLicense : "service_specific_licenses" +DataLicensePolicy ||--|o OpennessStanceEnum : "openness_stance" +DataLicense ||--|o DataLicenseTypeEnum : "license_type" +DataLicense ||--|o DataOpennessLevelEnum : "openness_level" ServiceLicense ||--|| DataLicense : "license" +Project ||--|o ProjectStatusEnum : "project_status" +Jurisdiction ||--|o JurisdictionTypeEnum : "jurisdiction_type" Jurisdiction ||--|o Country : "country" Jurisdiction ||--|o Subregion : "subregion" Jurisdiction ||--|o Settlement : "settlement" +Jurisdiction ||--|o LegalSystemTypeEnum : "legal_system_type" +EncompassingBody ||--|o EncompassingBodyTypeEnum : "organization_type" EncompassingBody ||--|o DataLicensePolicy : "data_license_policy" EncompassingBody ||--}o Project : "projects" EncompassingBody ||--|o Jurisdiction : "legal_jurisdiction" UmbrellaOrganisation ||--|| EncompassingBody : "inherits" +UmbrellaOrganisation ||--|o EncompassingBodyTypeEnum : "organization_type" UmbrellaOrganisation ||--|o DataLicensePolicy : "data_license_policy" UmbrellaOrganisation ||--}o Project : "projects" UmbrellaOrganisation ||--|| Jurisdiction : "legal_jurisdiction" NetworkOrganisation ||--|| EncompassingBody : "inherits" +NetworkOrganisation ||--|o EncompassingBodyTypeEnum : "organization_type" NetworkOrganisation ||--|o DataLicensePolicy : "data_license_policy" NetworkOrganisation ||--}o Project : "projects" NetworkOrganisation ||--|o Jurisdiction : "legal_jurisdiction" Consortium ||--|| EncompassingBody : "inherits" +Consortium ||--|o EncompassingBodyTypeEnum : "organization_type" Consortium ||--|o DataLicensePolicy : "data_license_policy" Consortium ||--}o Project : "projects" Consortium ||--|o Jurisdiction : "legal_jurisdiction" Cooperative ||--|| EncompassingBody : "inherits" +Cooperative ||--|o EncompassingBodyTypeEnum : "organization_type" Cooperative ||--|o DataLicensePolicy : "data_license_policy" Cooperative ||--}o Project : "projects" Cooperative ||--|o Jurisdiction : "legal_jurisdiction" SocialMovement ||--|| EncompassingBody : "inherits" +SocialMovement ||--|o EncompassingBodyTypeEnum : "organization_type" SocialMovement ||--|| DataLicensePolicy : "data_license_policy" SocialMovement ||--}o Project : "projects" SocialMovement ||--|o Jurisdiction : "legal_jurisdiction" FundingOrganisation ||--|| EncompassingBody : "inherits" FundingOrganisation ||--|o TimeSpan : "programme_period" +FundingOrganisation ||--|o EncompassingBodyTypeEnum : "organization_type" FundingOrganisation ||--|o DataLicensePolicy : "data_license_policy" FundingOrganisation ||--}o Project : "projects" FundingOrganisation ||--|o Jurisdiction : "legal_jurisdiction" +FeaturePlace ||--|| ReconstructedEntity : "inherits" +FeaturePlace ||--|o FeatureTypeEnum : "feature_type" FeaturePlace ||--|| CustodianPlace : "classifies_place" FeaturePlace ||--}| CustodianObservation : "was_derived_from" FeaturePlace ||--|o ReconstructionActivity : "was_generated_by" +DigitalPlatform ||--|| ReconstructedEntity : "inherits" DigitalPlatform ||--}| DigitalPlatformType : "platform_type" DigitalPlatform ||--}o CollectionManagementSystem : "powered_by_cms" DigitalPlatform ||--}o AuxiliaryDigitalPlatform : "auxiliary_platforms" @@ -1833,6 +2479,7 @@ DigitalPlatform ||--|o TimeSpan : "temporal_extent" DigitalPlatform ||--}o CustodianObservation : "was_derived_from" DigitalPlatform ||--|o ReconstructionActivity : "was_generated_by" DigitalPlatform ||--|| Custodian : "refers_to_custodian" +CollectionManagementSystem ||--|| ReconstructedEntity : "inherits" CollectionManagementSystem ||--}o DigitalPlatform : "powers_platform" CollectionManagementSystem ||--}o CustodianCollection : "manages_collection" CollectionManagementSystem ||--}o Custodian : "used_by_custodian" @@ -1840,32 +2487,46 @@ CollectionManagementSystem ||--|o TimeSpan : "temporal_extent" CollectionManagementSystem ||--}o CustodianObservation : "was_derived_from" CollectionManagementSystem ||--|o ReconstructionActivity : "was_generated_by" CollectionManagementSystem ||--|| Custodian : "refers_to_custodian" +TradeRegister ||--|o RegisterTypeEnum : "register_type" TradeRegister ||--|| Jurisdiction : "jurisdiction" TradeRegister ||--|| RegistrationAuthority : "maintained_by" +StandardsOrganization ||--|o StandardsOrganizationTypeEnum : "organization_type" StandardsOrganization ||--}o Standard : "standards_maintained" Standard ||--|| StandardsOrganization : "defined_by" Standard ||--|o RegistrationAuthority : "registration_authority" Standard ||--}o Country : "country_scope" +Standard ||--|o StandardScopeTypeEnum : "scope_type" +Standard ||--|o IdentifierDomainEnum : "identifier_domain" Standard ||--}o IdentifierFormat : "formats" Standard ||--|o IdentifierFormat : "canonical_format" +Standard ||--|o StandardTypeEnum : "standard_type" +Standard ||--|o GovernanceModelEnum : "governance_model" Standard ||--}o ContributingAgency : "contributing_agencies" Standard ||--|o StandardsOrganization : "governance_council" AllocationAgency ||--}| Country : "country_scope" AllocationAgency ||--}o Subregion : "subregion_scope" +AllocationAgency ||--}o AllocationDomainEnum : "allocation_domain" AllocationAgency ||--}| Standard : "allocates_for" AllocationAgency ||--|o RegistrationAuthority : "parent_registration_authority" -RegistrationAuthority ||--|| Country : "country" -RegistrationAuthority ||--|o RegistrationAuthority : "predecessor" -RegistrationAuthority ||--}o Standard : "standards_maintained" -RegistrationAuthority ||--}o AllocationAgency : "allocation_agencies" +ContributingAgency ||--|| Country : "country" +ContributingAgency ||--|o AuthorityRecordFormatEnum : "record_format" +ContributingAgency ||--}o AuthorityEntityTypeEnum : "entity_types_covered" +ContributingAgency ||--}| Standard : "contributes_to" +ContributingAgency ||--|o AllocationAgency : "also_allocation_agency" +ContributingAgency ||--}o StandardsOrganization : "member_of" +ContributingAgency ||--|o ConsortiumGovernanceRoleEnum : "governance_role" +CustodianArchive ||--|| ReconstructedEntity : "inherits" +CustodianArchive ||--|o ArchiveProcessingStatusEnum : "processing_status" CustodianArchive ||--}o Storage : "storage_location" CustodianArchive ||--}o CollectionManagementSystem : "tracked_in_cms" CustodianArchive ||--|o OrganizationalStructure : "managing_unit" CustodianArchive ||--|| Custodian : "refers_to_custodian" CustodianArchive ||--}o CustodianObservation : "was_derived_from" CustodianArchive ||--|o ReconstructionActivity : "was_generated_by" +ArticlesOfAssociation ||--|| ReconstructedEntity : "inherits" ArticlesOfAssociation ||--|o ArticlesOfAssociation : "supersedes" ArticlesOfAssociation ||--|o ArticlesOfAssociation : "superseded_by" +ArticlesOfAssociation ||--|o RecordsLifecycleStageEnum : "current_archival_stage" ArticlesOfAssociation ||--|o CustodianArchive : "archived_in" ArticlesOfAssociation ||--|o CustodianCollection : "collected_in" ArticlesOfAssociation ||--|| CustodianLegalStatus : "refers_to_legal_status" @@ -1874,10 +2535,8 @@ ArticlesOfAssociation ||--|o LegalForm : "legal_form" ArticlesOfAssociation ||--|o Jurisdiction : "jurisdiction" ArticlesOfAssociation ||--}o CustodianObservation : "was_derived_from" ArticlesOfAssociation ||--|o ReconstructionActivity : "was_generated_by" -ContributingAgency ||--|| Country : "country" -ContributingAgency ||--}| Standard : "contributes_to" -ContributingAgency ||--|o AllocationAgency : "also_allocation_agency" -ContributingAgency ||--}o StandardsOrganization : "member_of" +SocialMediaProfile ||--|| ReconstructedEntity : "inherits" +SocialMediaProfile ||--|o SocialMediaPlatformTypeEnum : "platform_type" SocialMediaProfile ||--}o PrimaryDigitalPresenceAssertion : "primary_presence_assertions" SocialMediaProfile ||--|o DigitalPlatform : "associated_digital_platform" SocialMediaProfile ||--|o AuxiliaryDigitalPlatform : "associated_auxiliary_platform" @@ -1885,45 +2544,64 @@ SocialMediaProfile ||--|o TimeSpan : "temporal_extent" SocialMediaProfile ||--}o CustodianObservation : "was_derived_from" SocialMediaProfile ||--|o ReconstructionActivity : "was_generated_by" SocialMediaProfile ||--|| Custodian : "refers_to_custodian" +InternetOfThings ||--|| ReconstructedEntity : "inherits" +InternetOfThings ||--|o DigitalPresenceTypeEnum : "device_type" InternetOfThings ||--|o CustodianPlace : "installed_at_place" InternetOfThings ||--|o TimeSpan : "temporal_extent" InternetOfThings ||--}o CustodianObservation : "was_derived_from" InternetOfThings ||--|o ReconstructionActivity : "was_generated_by" InternetOfThings ||--|| Custodian : "refers_to_custodian" +FundingRequirement ||--|o FundingRequirementTypeEnum : "requirement_type" +CallForApplication ||--|o CallForApplicationStatusEnum : "call_status" CallForApplication ||--}o FundingRequirement : "requirements" WebObservation ||--}o WebClaim : "claims" FundingAgenda ||--|o TimeSpan : "validity_period" FundingAgenda ||--}o ThematicRoute : "thematic_routes" +WebPortal ||--|| ReconstructedEntity : "inherits" +WebPortal ||--|o WebPortalTypeEnum : "portal_type" WebPortal ||--|o TimeSpan : "temporal_extent" WebPortal ||--}| CustodianObservation : "was_derived_from" -WebPortal ||--|| ReconstructionActivity : "was_generated_by" +WebPortal ||--|o ReconstructionActivity : "was_generated_by" +PrimaryDigitalPresenceAssertion ||--|o DigitalPresenceTypeEnum : "digital_presence_type" PrimaryDigitalPresenceAssertion ||--|o TimeSpan : "temporal_extent" PrimaryDigitalPresenceAssertion ||--}o WebObservation : "based_on_observations" +GiftShop ||--|| ReconstructedEntity : "inherits" +GiftShop ||--|o GiftShopTypeEnum : "shop_type" GiftShop ||--}o AuxiliaryPlace : "physical_location" GiftShop ||--}o AuxiliaryDigitalPlatform : "online_shop" +GiftShop ||--}o ProductCategoryEnum : "product_categories" GiftShop ||--|o TimeSpan : "temporal_extent" GiftShop ||--}o CustodianObservation : "was_derived_from" GiftShop ||--|o ReconstructionActivity : "was_generated_by" GiftShop ||--|| Custodian : "refers_to_custodian" +Storage ||--|o StorageTypeEnum : "storage_type" Storage ||--|o AuxiliaryPlace : "storage_location" Storage ||--}o CustodianCollection : "stores_collections" +Storage ||--}o StorageStandardEnum : "standards_applied" Storage ||--|o StorageConditionPolicy : "condition_policy" Storage ||--}o StorageCondition : "storage_conditions" Storage ||--|o TimeSpan : "temporal_extent" Storage ||--|| Custodian : "refers_to_custodian" StorageCondition ||--|| Storage : "refers_to_storage" StorageCondition ||--|o TimeSpan : "observation_period" +StorageCondition ||--|o StorageObserverTypeEnum : "observer_type" +StorageCondition ||--|o StorageConditionStatusEnum : "overall_status" StorageCondition ||--}o StorageConditionCategoryAssessment : "category_assessments" StorageCondition ||--|o StorageCondition : "supersedes" +StorageConditionCategoryAssessment ||--|o StorageConditionStatusEnum : "category_status" +StorageConditionPolicy ||--}o StorageStandardEnum : "standards_compliance" +CustodianAdministration ||--|| ReconstructedEntity : "inherits" CustodianAdministration ||--|o OrganizationalStructure : "managing_unit" CustodianAdministration ||--|o DigitalPlatform : "primary_system" CustodianAdministration ||--}o DigitalPlatform : "secondary_systems" CustodianAdministration ||--|| Custodian : "refers_to_custodian" CustodianAdministration ||--}o CustodianObservation : "was_derived_from" CustodianAdministration ||--|o ReconstructionActivity : "was_generated_by" +Budget ||--|| ReconstructedEntity : "inherits" Budget ||--|o OrganizationalStructure : "managing_unit" Budget ||--|| Custodian : "refers_to_custodian" Budget ||--}o CustodianObservation : "was_derived_from" Budget ||--|o ReconstructionActivity : "was_generated_by" +WebClaim ||--|o ClaimTypeEnum : "claim_type" ``` diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index 712bfc81dd..1f699a4312 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,5 +1,5 @@ { - "generated": "2025-12-01T15:00:54.169Z", + "generated": "2025-12-01T15:56:31.862Z", "version": "1.0.0", "categories": [ { diff --git a/frontend/src/pages/NDEMapPage.css b/frontend/src/pages/NDEMapPage.css index 824f8adc86..20f1685f8b 100644 --- a/frontend/src/pages/NDEMapPage.css +++ b/frontend/src/pages/NDEMapPage.css @@ -787,6 +787,28 @@ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); } +/* Desktop: Hide default bottom tip when popup is positioned horizontally */ +@media (min-width: 769px) { + .institution-popup .leaflet-popup-tip-container { + display: none; + } + + /* Add subtle left/right border indicator instead of tip */ + .institution-popup .leaflet-popup-content-wrapper { + position: relative; + } + + .institution-popup .leaflet-popup-content-wrapper::before { + content: ''; + position: absolute; + top: 50%; + transform: translateY(-50%); + width: 0; + height: 0; + border: 8px solid transparent; + } +} + /* URL Filter Bar */ .url-filter-bar { display: flex; diff --git a/frontend/src/pages/NDEMapPage.tsx b/frontend/src/pages/NDEMapPage.tsx index 6b2556fb98..f024451b18 100644 --- a/frontend/src/pages/NDEMapPage.tsx +++ b/frontend/src/pages/NDEMapPage.tsx @@ -74,6 +74,7 @@ interface Photo { interface GHCID { current: string; uuid: string; + numeric?: string; } interface Identifier { @@ -121,7 +122,13 @@ interface Institution { social_media?: SocialMedia; verified_name?: string; name_source?: string; - isil?: string; + isil?: { + code: string; + name?: string; + city?: string; + assigned_date?: string; + source?: string; + }; museum_register?: { name?: string; province?: string; @@ -481,16 +488,37 @@ export default function NDEMapPage() { ${inst.ghcid ? ` - ` : inst.isil ? ` + ` : inst.isil?.code ? ` ` : ''} ${inst.museum_register ? ` @@ -530,14 +558,54 @@ export default function NDEMapPage() { `; - // Bind popup with autoPan disabled to prevent camera movement - // The popup will appear in available space without moving the map + // Responsive popup positioning: + // - Desktop (>768px): Position left or right of marker based on available space + // - Mobile (≤768px): Position above marker (default Leaflet behavior) + const isMobile = window.innerWidth <= 768; + + // For desktop, we'll dynamically position when popup opens marker.bindPopup(popupContent, { autoPan: false, maxWidth: 400, minWidth: 280, className: 'institution-popup', + // Default offset for mobile (above marker) + offset: isMobile ? [0, -10] : [0, 0], }); + + // For desktop: position popup left or right based on marker position + if (!isMobile) { + marker.on('click', function(e) { + const markerPoint = map.latLngToContainerPoint(e.latlng); + const mapWidth = map.getSize().x; + + // Determine if marker is on left or right half of map + const isOnRightHalf = markerPoint.x > mapWidth / 2; + + // Get popup and reposition it + setTimeout(() => { + const popup = marker.getPopup(); + if (popup && popup.isOpen()) { + const popupElement = popup.getElement(); + if (popupElement) { + // Get popup width + const popupWidth = popupElement.offsetWidth || 300; + + // Calculate horizontal offset: popup appears on opposite side of marker + // Add some padding (20px) from the marker + const horizontalOffset = isOnRightHalf + ? -(popupWidth / 2 + 30) // Move left + : (popupWidth / 2 + 30); // Move right + + // Update popup offset and position + popup.options.offset = [horizontalOffset, 0]; + popup.update(); + } + } + }, 10); + }); + } + marker.addTo(map); }); diff --git a/frontend/src/pages/NDEStatsPage.css b/frontend/src/pages/NDEStatsPage.css index 035ca2e666..57e51ca92b 100644 --- a/frontend/src/pages/NDEStatsPage.css +++ b/frontend/src/pages/NDEStatsPage.css @@ -111,6 +111,46 @@ color: #3498db; } +.summary-card.accent-red .card-value { + color: #e74c3c; +} + +.summary-card.accent-slate .card-value { + color: #64748b; +} + +.summary-card.accent-amber .card-value { + color: #d97706; +} + +.summary-card.accent-indigo .card-value { + color: #6366f1; +} + +.summary-card.highlight { + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; +} + +.summary-card.highlight .card-value { + color: white; +} + +.summary-card.highlight .card-label { + color: rgba(255, 255, 255, 0.9); +} + +/* Overview stats content styling */ +.overview-stats-content { + display: flex; + flex-direction: column; + gap: 1rem; +} + +.overview-stats-content .summary-cards { + margin-bottom: 0; +} + /* Charts Section */ .charts-section { max-width: 1200px; diff --git a/frontend/src/pages/NDEStatsPage.tsx b/frontend/src/pages/NDEStatsPage.tsx index f581e698cf..38bc53db4b 100644 --- a/frontend/src/pages/NDEStatsPage.tsx +++ b/frontend/src/pages/NDEStatsPage.tsx @@ -2540,77 +2540,117 @@ export default function NDEStatsPage() { )} - {/* Summary Cards */} -
-
-
-
{stats.summary.total_institutions.toLocaleString()}
-
{t('totalInstitutions')}
-
-
-
{(stats.summary.enriched || stats.summary.with_wikidata).toLocaleString()}
-
{t('enrichedRecords')}
-
{((stats.summary.enriched || stats.summary.with_wikidata) / stats.summary.total_institutions * 100).toFixed(1)}%
-
-
-
{stats.summary.with_coordinates.toLocaleString()}
-
{t('withCoordinates')}
-
{(stats.summary.with_coordinates / stats.summary.total_institutions * 100).toFixed(1)}%
-
-
-
{stats.summary.unique_cities.toLocaleString()}
-
{t('uniqueCities')}
-
-
-
{stats.summary.institution_types}
-
{t('institutionTypes')}
-
-
-
{(stats.summary.with_google_maps || 0).toLocaleString()}
-
{t('googleMapsData')}
-
{((stats.summary.with_google_maps || 0) / stats.summary.total_institutions * 100).toFixed(1)}%
-
-
- - {/* Data Sources Cards - Second Row */} -
- {stats.summary.with_ghcid !== undefined && ( -
-
{stats.summary.with_ghcid.toLocaleString()}
-
{t('withGHCID')}
-
{(stats.summary.with_ghcid / stats.summary.total_institutions * 100).toFixed(1)}%
+ {/* Overview Statistics - Collapsible */} + +
+ {/* Row 1: Core Stats */} +
+
+
{stats.summary.total_institutions.toLocaleString()}
+
{t('totalInstitutions')}
- )} - {stats.summary.with_web_claims !== undefined && ( -
-
{stats.summary.with_web_claims.toLocaleString()}
-
{t('withWebClaims')}
-
{(stats.summary.with_web_claims / stats.summary.total_institutions * 100).toFixed(1)}%
+
+
{(stats.summary.enriched || stats.summary.with_wikidata).toLocaleString()}
+
{t('enrichedRecords')}
+
{((stats.summary.enriched || stats.summary.with_wikidata) / stats.summary.total_institutions * 100).toFixed(1)}%
- )} - {stats.summary.with_social_media !== undefined && ( -
-
{stats.summary.with_social_media.toLocaleString()}
-
{t('withSocialMedia')}
-
{(stats.summary.with_social_media / stats.summary.total_institutions * 100).toFixed(1)}%
+
+
{stats.summary.with_coordinates.toLocaleString()}
+
{t('withCoordinates')}
+
{(stats.summary.with_coordinates / stats.summary.total_institutions * 100).toFixed(1)}%
- )} - {stats.summary.with_museum_register !== undefined && ( -
-
{stats.summary.with_museum_register.toLocaleString()}
-
{t('withMuseumRegister')}
-
{(stats.summary.with_museum_register / stats.summary.total_institutions * 100).toFixed(1)}%
+
+
{stats.summary.unique_cities.toLocaleString()}
+
{t('uniqueCities')}
- )} - {stats.summary.with_nan_isil !== undefined && ( +
+
{stats.summary.unique_provinces || 12}
+
{t('uniqueProvinces')}
+
+
+
{stats.summary.institution_types}
+
{t('institutionTypes')}
+
+
+ + {/* Row 2: Data Sources */} +
+ {stats.summary.with_ghcid !== undefined && ( +
+
{stats.summary.with_ghcid.toLocaleString()}
+
{t('withGHCID')}
+
{(stats.summary.with_ghcid / stats.summary.total_institutions * 100).toFixed(1)}%
+
+ )}
-
{stats.summary.with_nan_isil.toLocaleString()}
-
{t('withISIL')}
-
{(stats.summary.with_nan_isil / stats.summary.total_institutions * 100).toFixed(1)}%
+
{stats.summary.with_wikidata.toLocaleString()}
+
{t('withWikidata')}
+
{(stats.summary.with_wikidata / stats.summary.total_institutions * 100).toFixed(1)}%
+
+
+
{(stats.summary.with_google_maps || 0).toLocaleString()}
+
{t('googleMapsData')}
+
{((stats.summary.with_google_maps || 0) / stats.summary.total_institutions * 100).toFixed(1)}%
+
+ {stats.summary.with_web_claims !== undefined && ( +
+
{stats.summary.with_web_claims.toLocaleString()}
+
{t('withWebClaims')}
+
{(stats.summary.with_web_claims / stats.summary.total_institutions * 100).toFixed(1)}%
+
+ )} + {stats.summary.with_social_media !== undefined && ( +
+
{stats.summary.with_social_media.toLocaleString()}
+
{t('withSocialMedia')}
+
{(stats.summary.with_social_media / stats.summary.total_institutions * 100).toFixed(1)}%
+
+ )} +
+ + {/* Row 3: Identifier Coverage */} +
+ {stats.charts.identifier_coverage?.map((item) => ( +
+
{item.count.toLocaleString()}
+
{item.identifier}
+
{item.percentage.toFixed(1)}%
+
+ ))} +
+ + {/* Row 4: Google Maps Features */} + {stats.charts.google_maps_coverage && stats.charts.google_maps_coverage.length > 0 && ( +
+ {stats.charts.google_maps_coverage.map((item) => ( +
+
{item.count.toLocaleString()}
+
{item.feature}
+
{item.percentage.toFixed(1)}%
+
+ ))}
)} + + {/* Row 5: Registry Sources */} +
+ {stats.summary.with_museum_register !== undefined && ( +
+
{stats.summary.with_museum_register.toLocaleString()}
+
{t('withMuseumRegister')}
+
{(stats.summary.with_museum_register / stats.summary.total_institutions * 100).toFixed(1)}%
+
+ )} + {stats.summary.with_nan_isil !== undefined && ( +
+
{stats.summary.with_nan_isil.toLocaleString()}
+
{t('withISIL')}
+
{(stats.summary.with_nan_isil / stats.summary.total_institutions * 100).toFixed(1)}%
+
+ )} +
-
+ {/* Charts Grid */}
diff --git a/mcp_servers/social_media/README.md b/mcp_servers/social_media/README.md new file mode 100644 index 0000000000..a332a1b30b --- /dev/null +++ b/mcp_servers/social_media/README.md @@ -0,0 +1,245 @@ +# Social Media MCP Server + +A Model Context Protocol (MCP) server for obtaining media content from YouTube, LinkedIn, Facebook, and Instagram. + +## Features + +### YouTube (5 tools) +- `youtube_get_video_info` - Get video metadata (title, description, views, likes) +- `youtube_get_transcript` - Extract subtitles/transcripts using yt-dlp +- `youtube_search_videos` - Search for videos +- `youtube_get_channel_info` - Get channel information +- `youtube_get_channel_videos` - Get recent videos from a channel + +### LinkedIn (4 tools) +- `linkedin_get_profile` - Scrape LinkedIn profile (unofficial API) +- `linkedin_get_company` - Get company page info +- `linkedin_search_jobs` - Search job listings +- `linkedin_get_feed_posts` - Get posts from your feed + +> **Warning**: LinkedIn tools use unofficial methods and may violate LinkedIn's Terms of Service. + +### Facebook (4 tools) +- `facebook_get_page_posts` - Get posts from a Facebook Page +- `facebook_get_post_comments` - Get comments on a post +- `facebook_post_to_page` - Publish a post to a Page +- `facebook_reply_to_comment` - Reply to comments + +### Instagram (5 tools) +- `instagram_get_profile_info` - Get Business profile info +- `instagram_get_media_posts` - Get recent media posts +- `instagram_get_media_insights` - Get post analytics +- `instagram_publish_media` - Publish images +- `instagram_get_comments` - Get comments on posts + +> **Note**: Instagram tools require a Business Account connected to Facebook. + +## Installation + +### Quick Setup + +```bash +# Navigate to the server directory +cd mcp_servers/social_media + +# Run the setup script +./setup.sh +``` + +### Manual Installation + +```bash +# Create virtual environment +python3 -m venv .venv +source .venv/bin/activate + +# Install dependencies +pip install -e . + +# Install yt-dlp (for YouTube transcripts) +brew install yt-dlp # macOS +# or +pip install yt-dlp +``` + +## Configuration + +Create a `.env` file or set environment variables: + +```bash +# YouTube (optional - falls back to yt-dlp) +YOUTUBE_API_KEY=your_youtube_api_key + +# LinkedIn (choose one method) +LINKEDIN_COOKIE=your_li_at_cookie_value +# or +LINKEDIN_EMAIL=your_email +LINKEDIN_PASSWORD=your_password + +# Facebook +FACEBOOK_ACCESS_TOKEN=your_page_access_token +FACEBOOK_PAGE_ID=your_page_id + +# Instagram +INSTAGRAM_ACCESS_TOKEN=your_instagram_access_token +INSTAGRAM_BUSINESS_ACCOUNT_ID=your_business_account_id +``` + +## Getting API Credentials + +### YouTube API Key + +1. Go to [Google Cloud Console](https://console.cloud.google.com/) +2. Create a new project (or select existing) +3. Enable "YouTube Data API v3" +4. Create credentials → API Key +5. Copy the API key + +### LinkedIn Cookie (li_at) + +1. Log into LinkedIn in your browser +2. Open Developer Tools (F12) +3. Go to Application → Cookies → linkedin.com +4. Find the `li_at` cookie and copy its value + +### Facebook Page Access Token + +1. Go to [Facebook Developer Console](https://developers.facebook.com/) +2. Create an App (Business type) +3. Add "Facebook Login" and "Pages API" products +4. Generate a Page Access Token with permissions: + - `pages_show_list` + - `pages_read_engagement` + - `pages_manage_posts` (for posting) + - `pages_manage_comments` (for replying) + +### Instagram Business Account + +1. Convert your Instagram account to a Business Account +2. Connect it to a Facebook Page +3. Use the Facebook Graph API to get your Instagram Business Account ID +4. Generate an access token with permissions: + - `instagram_basic` + - `instagram_content_publish` (for posting) + - `instagram_manage_comments` + - `instagram_manage_insights` + +## Usage with OpenCode + +Add to your OpenCode configuration (`.opencode/config.json`): + +```json +{ + "mcpServers": { + "social-media": { + "command": "uv", + "args": [ + "--directory", + "/path/to/glam/mcp_servers/social_media", + "run", + "python", + "server.py" + ], + "env": { + "YOUTUBE_API_KEY": "your_key", + "LINKEDIN_COOKIE": "your_cookie", + "FACEBOOK_ACCESS_TOKEN": "your_token", + "FACEBOOK_PAGE_ID": "your_page_id", + "INSTAGRAM_ACCESS_TOKEN": "your_token", + "INSTAGRAM_BUSINESS_ACCOUNT_ID": "your_account_id" + } + } + } +} +``` + +## Usage with Claude Desktop + +Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`): + +```json +{ + "mcpServers": { + "social-media": { + "command": "/path/to/glam/mcp_servers/social_media/.venv/bin/python", + "args": ["/path/to/glam/mcp_servers/social_media/server.py"], + "env": { + "YOUTUBE_API_KEY": "your_key" + } + } + } +} +``` + +## Examples + +### Get YouTube Video Info + +``` +Get info about this YouTube video: https://www.youtube.com/watch?v=dQw4w9WgXcQ +``` + +### Get Video Transcript + +``` +Get the transcript of this video: dQw4w9WgXcQ +``` + +### Search LinkedIn Jobs + +``` +Search for software engineer jobs in San Francisco +``` + +### Get Instagram Posts + +``` +Get my recent Instagram posts +``` + +## API Methods Used + +| Platform | Method | Authentication | +|-----------|---------------------------|----------------------| +| YouTube | YouTube Data API v3 | API Key | +| YouTube | yt-dlp (fallback) | None | +| LinkedIn | Voyager API (unofficial) | Session Cookie | +| Facebook | Graph API | Page Access Token | +| Instagram | Graph API (via Facebook) | Access Token | + +## Rate Limits + +- **YouTube**: 10,000 quota units/day (search costs 100, video info costs 1) +- **LinkedIn**: Unofficial API - use sparingly to avoid account restrictions +- **Facebook/Instagram**: 200 calls/user/hour for most endpoints + +## Troubleshooting + +### YouTube: "API key not configured" +- Set `YOUTUBE_API_KEY` or install `yt-dlp` for fallback + +### LinkedIn: "Session expired" +- Get a fresh `li_at` cookie from your browser + +### Facebook/Instagram: "Invalid access token" +- Tokens expire after ~60 days; generate a new one + +### yt-dlp: "Command not found" +```bash +brew install yt-dlp # macOS +pip install yt-dlp # All platforms +``` + +## License + +MIT License - See project root for details. + +## References + +This server was built by studying these open-source MCP implementations: +- [anaisbetts/mcp-youtube](https://github.com/anaisbetts/mcp-youtube) +- [ZubeidHendricks/youtube-mcp-server](https://github.com/ZubeidHendricks/youtube-mcp-server) +- [adhikasp/mcp-linkedin](https://github.com/adhikasp/mcp-linkedin) +- [stickerdaniel/linkedin-mcp-server](https://github.com/stickerdaniel/linkedin-mcp-server) +- [jlbadano/ig-mcp](https://github.com/jlbadano/ig-mcp) +- [tiroshanm/facebook-mcp-server](https://github.com/tiroshanm/facebook-mcp-server) diff --git a/mcp_servers/social_media/pyproject.toml b/mcp_servers/social_media/pyproject.toml new file mode 100644 index 0000000000..e53b769b08 --- /dev/null +++ b/mcp_servers/social_media/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "social-media-mcp" +version = "1.0.0" +description = "MCP Server for obtaining media content from YouTube, LinkedIn, Facebook, and Instagram" +requires-python = ">=3.10" +dependencies = [ + "httpx>=0.27.0", + "mcp>=1.0.0", + "yt-dlp>=2024.0.0", + "beautifulsoup4>=4.12.0", +] + +[project.optional-dependencies] +linkedin = [ + "linkedin-api>=2.0.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/mcp_servers/social_media/server.py b/mcp_servers/social_media/server.py new file mode 100644 index 0000000000..42def12871 --- /dev/null +++ b/mcp_servers/social_media/server.py @@ -0,0 +1,1292 @@ +#!/usr/bin/env python3 +""" +Social Media MCP Server - Unified Media Content Extraction + +Provides tools to obtain media content from: +1. YouTube - Video info, transcripts, channel data, search +2. LinkedIn - Profile scraping, job search, company info, feed posts +3. Facebook - Page posts, comments, insights, messaging +4. Instagram - Business profile, media posts, insights, DMs + +Architecture: +- Uses official APIs where available (YouTube Data API, Facebook Graph API, Instagram Graph API) +- Uses unofficial/scraping methods for LinkedIn (no official API for content reading) +- Supports authentication via environment variables + +Environment Variables: +- YOUTUBE_API_KEY: YouTube Data API v3 key +- LINKEDIN_EMAIL: LinkedIn login email +- LINKEDIN_PASSWORD: LinkedIn login password +- LINKEDIN_COOKIE: LinkedIn session cookie (li_at) - alternative to email/password +- FACEBOOK_ACCESS_TOKEN: Facebook Page Access Token +- FACEBOOK_PAGE_ID: Facebook Page ID +- INSTAGRAM_ACCESS_TOKEN: Instagram Business Account Token +- INSTAGRAM_BUSINESS_ACCOUNT_ID: Instagram Business Account ID + +Based on patterns from: +- https://github.com/anaisbetts/mcp-youtube +- https://github.com/ZubeidHendricks/youtube-mcp-server +- https://github.com/adhikasp/mcp-linkedin +- https://github.com/stickerdaniel/linkedin-mcp-server +- https://github.com/jlbadano/ig-mcp +- https://github.com/tiroshanm/facebook-mcp-server +""" + +import httpx +import json +import os +import re +import subprocess +import tempfile +from typing import List, Dict, Optional, Any +from mcp.server.fastmcp import FastMCP + +server = FastMCP("Social Media MCP Server") + +# ============================================================================ +# Configuration +# ============================================================================ + +# User-Agent for API requests +USER_AGENT = "SocialMediaMCP/1.0" + +# YouTube configuration +YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "") +YOUTUBE_API_BASE = "https://www.googleapis.com/youtube/v3" + +# LinkedIn configuration (uses unofficial API - scraping based) +LINKEDIN_EMAIL = os.getenv("LINKEDIN_EMAIL", "") +LINKEDIN_PASSWORD = os.getenv("LINKEDIN_PASSWORD", "") +LINKEDIN_COOKIE = os.getenv("LINKEDIN_COOKIE", "") + +# Facebook Graph API configuration +FACEBOOK_ACCESS_TOKEN = os.getenv("FACEBOOK_ACCESS_TOKEN", "") +FACEBOOK_PAGE_ID = os.getenv("FACEBOOK_PAGE_ID", "") +FACEBOOK_API_VERSION = os.getenv("FACEBOOK_API_VERSION", "v19.0") +FACEBOOK_API_BASE = f"https://graph.facebook.com/{FACEBOOK_API_VERSION}" + +# Instagram Graph API configuration (uses Facebook Graph API) +INSTAGRAM_ACCESS_TOKEN = os.getenv("INSTAGRAM_ACCESS_TOKEN", "") +INSTAGRAM_BUSINESS_ACCOUNT_ID = os.getenv("INSTAGRAM_BUSINESS_ACCOUNT_ID", "") +INSTAGRAM_API_VERSION = os.getenv("INSTAGRAM_API_VERSION", "v19.0") +INSTAGRAM_API_BASE = f"https://graph.facebook.com/{INSTAGRAM_API_VERSION}" + +# Print configuration status +print("=" * 60) +print("Social Media MCP Server - Configuration Status") +print("=" * 60) +print(f"YouTube API Key: {'✓ Configured' if YOUTUBE_API_KEY else '✗ Not configured'}") +print(f"LinkedIn Auth: {'✓ Cookie' if LINKEDIN_COOKIE else ('✓ Email/Password' if LINKEDIN_EMAIL else '✗ Not configured')}") +print(f"Facebook Token: {'✓ Configured' if FACEBOOK_ACCESS_TOKEN else '✗ Not configured'}") +print(f"Instagram Token: {'✓ Configured' if INSTAGRAM_ACCESS_TOKEN else '✗ Not configured'}") +print("=" * 60) + + +# ============================================================================ +# Helper Functions +# ============================================================================ + +def extract_video_id(url_or_id: str) -> str: + """Extract YouTube video ID from URL or return as-is if already an ID.""" + patterns = [ + r'(?:v=|/)([0-9A-Za-z_-]{11})(?:[&?]|$)', + r'(?:youtu\.be/)([0-9A-Za-z_-]{11})', + r'^([0-9A-Za-z_-]{11})$' + ] + for pattern in patterns: + match = re.search(pattern, url_or_id) + if match: + return match.group(1) + return url_or_id + + +def extract_channel_id(url_or_id: str) -> str: + """Extract YouTube channel ID from URL or return as-is.""" + patterns = [ + r'(?:channel/)([UC][0-9A-Za-z_-]{22})', + r'^([UC][0-9A-Za-z_-]{22})$' + ] + for pattern in patterns: + match = re.search(pattern, url_or_id) + if match: + return match.group(1) + return url_or_id + + +def extract_linkedin_profile_id(url_or_id: str) -> str: + """Extract LinkedIn profile ID from URL or return as-is.""" + patterns = [ + r'linkedin\.com/in/([^/?]+)', + r'^([a-zA-Z0-9-]+)$' + ] + for pattern in patterns: + match = re.search(pattern, url_or_id) + if match: + return match.group(1) + return url_or_id + + +def extract_linkedin_company_id(url_or_id: str) -> str: + """Extract LinkedIn company ID from URL or return as-is.""" + patterns = [ + r'linkedin\.com/company/([^/?]+)', + r'^([a-zA-Z0-9-]+)$' + ] + for pattern in patterns: + match = re.search(pattern, url_or_id) + if match: + return match.group(1) + return url_or_id + + +# ============================================================================ +# YOUTUBE TOOLS +# ============================================================================ + +@server.tool() +async def youtube_get_video_info(video_url_or_id: str) -> Dict[str, Any]: + """ + Get detailed information about a YouTube video. + + Args: + video_url_or_id: YouTube video URL or video ID + + Returns: + dict: Video metadata including title, description, duration, view count, + like count, channel info, publish date, and more. + + Example: + youtube_get_video_info("https://www.youtube.com/watch?v=dQw4w9WgXcQ") + youtube_get_video_info("dQw4w9WgXcQ") + """ + video_id = extract_video_id(video_url_or_id) + + if YOUTUBE_API_KEY: + # Use YouTube Data API + params = { + "part": "snippet,contentDetails,statistics", + "id": video_id, + "key": YOUTUBE_API_KEY + } + async with httpx.AsyncClient() as client: + response = await client.get( + f"{YOUTUBE_API_BASE}/videos", + params=params, + headers={"User-Agent": USER_AGENT} + ) + response.raise_for_status() + data = response.json() + + if not data.get("items"): + return {"error": f"Video not found: {video_id}"} + + item = data["items"][0] + return { + "video_id": video_id, + "title": item["snippet"]["title"], + "description": item["snippet"]["description"], + "channel_id": item["snippet"]["channelId"], + "channel_title": item["snippet"]["channelTitle"], + "published_at": item["snippet"]["publishedAt"], + "duration": item["contentDetails"]["duration"], + "view_count": item["statistics"].get("viewCount"), + "like_count": item["statistics"].get("likeCount"), + "comment_count": item["statistics"].get("commentCount"), + "tags": item["snippet"].get("tags", []), + "thumbnails": item["snippet"]["thumbnails"] + } + else: + # Fallback: Use yt-dlp for metadata extraction + try: + result = subprocess.run( + ["yt-dlp", "--dump-json", "--skip-download", f"https://www.youtube.com/watch?v={video_id}"], + capture_output=True, + text=True, + timeout=60 + ) + if result.returncode == 0: + data = json.loads(result.stdout) + return { + "video_id": data.get("id"), + "title": data.get("title"), + "description": data.get("description"), + "channel_id": data.get("channel_id"), + "channel_title": data.get("uploader"), + "published_at": data.get("upload_date"), + "duration": data.get("duration"), + "view_count": data.get("view_count"), + "like_count": data.get("like_count"), + "comment_count": data.get("comment_count"), + "tags": data.get("tags", []), + } + return {"error": f"yt-dlp failed: {result.stderr}"} + except FileNotFoundError: + return {"error": "yt-dlp not installed. Install with: brew install yt-dlp or pip install yt-dlp"} + except subprocess.TimeoutExpired: + return {"error": "Video info extraction timed out"} + + +@server.tool() +async def youtube_get_transcript(video_url_or_id: str, language: str = "en") -> Dict[str, Any]: + """ + Get the transcript/subtitles from a YouTube video. + + Uses yt-dlp to extract subtitles in the specified language. + Falls back to auto-generated captions if manual ones aren't available. + + Args: + video_url_or_id: YouTube video URL or video ID + language: Language code for subtitles (default: "en") + + Returns: + dict: Contains transcript text and metadata + + Example: + youtube_get_transcript("dQw4w9WgXcQ", "en") + """ + video_id = extract_video_id(video_url_or_id) + video_url = f"https://www.youtube.com/watch?v={video_id}" + + try: + with tempfile.TemporaryDirectory() as tmpdir: + # Try to get subtitles with yt-dlp + result = subprocess.run( + [ + "yt-dlp", + "--write-subs", + "--write-auto-subs", + "--sub-langs", language, + "--sub-format", "vtt", + "--skip-download", + "--output", f"{tmpdir}/%(id)s", + video_url + ], + capture_output=True, + text=True, + timeout=120 + ) + + # Look for the subtitle file + import glob + vtt_files = glob.glob(f"{tmpdir}/*.vtt") + + if vtt_files: + with open(vtt_files[0], 'r', encoding='utf-8') as f: + vtt_content = f.read() + + # Parse VTT to extract text + lines = [] + for line in vtt_content.split('\n'): + line = line.strip() + # Skip headers, timestamps, and empty lines + if line and not line.startswith('WEBVTT') and not line.startswith('Kind:') \ + and not line.startswith('Language:') and '-->' not in line \ + and not re.match(r'^\d+$', line): + # Remove HTML tags + clean_line = re.sub(r'<[^>]+>', '', line) + if clean_line: + lines.append(clean_line) + + # Remove duplicate consecutive lines + deduped = [] + for line in lines: + if not deduped or line != deduped[-1]: + deduped.append(line) + + transcript = ' '.join(deduped) + + return { + "video_id": video_id, + "language": language, + "transcript": transcript, + "source": "auto" if ".auto." in vtt_files[0] else "manual" + } + + return { + "video_id": video_id, + "error": f"No subtitles available in language: {language}", + "available_info": result.stderr if result.stderr else "Check video for available languages" + } + + except FileNotFoundError: + return {"error": "yt-dlp not installed. Install with: brew install yt-dlp or pip install yt-dlp"} + except subprocess.TimeoutExpired: + return {"error": "Transcript extraction timed out"} + + +@server.tool() +async def youtube_search_videos( + query: str, + max_results: int = 10, + order: str = "relevance" +) -> Dict[str, Any]: + """ + Search for YouTube videos. + + Args: + query: Search query string + max_results: Maximum number of results to return (default: 10, max: 50) + order: Sort order - 'relevance', 'date', 'viewCount', 'rating' (default: relevance) + + Returns: + dict: List of video results with basic metadata + + Example: + youtube_search_videos("python tutorials", max_results=5) + """ + if not YOUTUBE_API_KEY: + return {"error": "YouTube API key not configured. Set YOUTUBE_API_KEY environment variable."} + + params = { + "part": "snippet", + "q": query, + "type": "video", + "maxResults": min(max_results, 50), + "order": order, + "key": YOUTUBE_API_KEY + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{YOUTUBE_API_BASE}/search", + params=params, + headers={"User-Agent": USER_AGENT} + ) + response.raise_for_status() + data = response.json() + + results = [] + for item in data.get("items", []): + results.append({ + "video_id": item["id"]["videoId"], + "title": item["snippet"]["title"], + "description": item["snippet"]["description"], + "channel_id": item["snippet"]["channelId"], + "channel_title": item["snippet"]["channelTitle"], + "published_at": item["snippet"]["publishedAt"], + "thumbnail": item["snippet"]["thumbnails"]["high"]["url"] + }) + + return { + "query": query, + "total_results": data.get("pageInfo", {}).get("totalResults"), + "results": results + } + + +@server.tool() +async def youtube_get_channel_info(channel_url_or_id: str) -> Dict[str, Any]: + """ + Get information about a YouTube channel. + + Args: + channel_url_or_id: YouTube channel URL or channel ID + + Returns: + dict: Channel metadata including name, description, subscriber count, etc. + + Example: + youtube_get_channel_info("UCsXVk37bltHxD1rDPwtNM8Q") + """ + if not YOUTUBE_API_KEY: + return {"error": "YouTube API key not configured. Set YOUTUBE_API_KEY environment variable."} + + channel_id = extract_channel_id(channel_url_or_id) + + params = { + "part": "snippet,statistics,brandingSettings", + "id": channel_id, + "key": YOUTUBE_API_KEY + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{YOUTUBE_API_BASE}/channels", + params=params, + headers={"User-Agent": USER_AGENT} + ) + response.raise_for_status() + data = response.json() + + if not data.get("items"): + return {"error": f"Channel not found: {channel_id}"} + + item = data["items"][0] + return { + "channel_id": channel_id, + "title": item["snippet"]["title"], + "description": item["snippet"]["description"], + "custom_url": item["snippet"].get("customUrl"), + "published_at": item["snippet"]["publishedAt"], + "country": item["snippet"].get("country"), + "subscriber_count": item["statistics"].get("subscriberCount"), + "video_count": item["statistics"].get("videoCount"), + "view_count": item["statistics"].get("viewCount"), + "thumbnail": item["snippet"]["thumbnails"]["high"]["url"], + "banner": item.get("brandingSettings", {}).get("image", {}).get("bannerExternalUrl") + } + + +@server.tool() +async def youtube_get_channel_videos( + channel_url_or_id: str, + max_results: int = 20 +) -> Dict[str, Any]: + """ + Get recent videos from a YouTube channel. + + Args: + channel_url_or_id: YouTube channel URL or channel ID + max_results: Maximum number of videos to return (default: 20, max: 50) + + Returns: + dict: List of recent videos from the channel + + Example: + youtube_get_channel_videos("UCsXVk37bltHxD1rDPwtNM8Q", max_results=10) + """ + if not YOUTUBE_API_KEY: + return {"error": "YouTube API key not configured. Set YOUTUBE_API_KEY environment variable."} + + channel_id = extract_channel_id(channel_url_or_id) + + params = { + "part": "snippet", + "channelId": channel_id, + "type": "video", + "order": "date", + "maxResults": min(max_results, 50), + "key": YOUTUBE_API_KEY + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{YOUTUBE_API_BASE}/search", + params=params, + headers={"User-Agent": USER_AGENT} + ) + response.raise_for_status() + data = response.json() + + videos = [] + for item in data.get("items", []): + videos.append({ + "video_id": item["id"]["videoId"], + "title": item["snippet"]["title"], + "description": item["snippet"]["description"], + "published_at": item["snippet"]["publishedAt"], + "thumbnail": item["snippet"]["thumbnails"]["high"]["url"] + }) + + return { + "channel_id": channel_id, + "videos": videos + } + + +# ============================================================================ +# LINKEDIN TOOLS +# ============================================================================ + +# Note: LinkedIn doesn't have an official API for reading content. +# These tools use web scraping / unofficial methods. +# Users should be aware this may violate LinkedIn ToS. + +@server.tool() +async def linkedin_get_profile(profile_url_or_id: str) -> Dict[str, Any]: + """ + Get information from a LinkedIn profile. + + WARNING: Uses unofficial methods. May violate LinkedIn Terms of Service. + Requires LINKEDIN_COOKIE environment variable. + + Args: + profile_url_or_id: LinkedIn profile URL or vanity name + + Returns: + dict: Profile information including name, headline, experience, education + + Example: + linkedin_get_profile("https://www.linkedin.com/in/satyanadella/") + linkedin_get_profile("satyanadella") + """ + if not LINKEDIN_COOKIE: + return { + "error": "LinkedIn cookie not configured. Set LINKEDIN_COOKIE environment variable.", + "help": "Get your li_at cookie from LinkedIn after logging in via browser DevTools." + } + + profile_id = extract_linkedin_profile_id(profile_url_or_id) + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Cookie": f"li_at={LINKEDIN_COOKIE.replace('li_at=', '')}", + "Accept": "application/json", + } + + # LinkedIn Voyager API endpoint (unofficial) + profile_url = f"https://www.linkedin.com/voyager/api/identity/profiles/{profile_id}" + + try: + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + response = await client.get(profile_url, headers=headers) + + if response.status_code == 401: + return {"error": "LinkedIn session expired. Please update LINKEDIN_COOKIE."} + elif response.status_code == 404: + return {"error": f"Profile not found: {profile_id}"} + + response.raise_for_status() + data = response.json() + + return { + "profile_id": profile_id, + "first_name": data.get("firstName"), + "last_name": data.get("lastName"), + "headline": data.get("headline"), + "summary": data.get("summary"), + "location": data.get("locationName"), + "industry": data.get("industryName"), + "profile_url": f"https://www.linkedin.com/in/{profile_id}/", + } + + except httpx.HTTPStatusError as e: + return {"error": f"LinkedIn API error: {e.response.status_code}"} + except Exception as e: + return {"error": f"Failed to fetch profile: {str(e)}"} + + +@server.tool() +async def linkedin_get_company(company_url_or_id: str) -> Dict[str, Any]: + """ + Get information about a LinkedIn company page. + + WARNING: Uses unofficial methods. May violate LinkedIn Terms of Service. + Requires LINKEDIN_COOKIE environment variable. + + Args: + company_url_or_id: LinkedIn company URL or company name/ID + + Returns: + dict: Company information including name, description, industry, size + + Example: + linkedin_get_company("https://www.linkedin.com/company/microsoft/") + linkedin_get_company("microsoft") + """ + if not LINKEDIN_COOKIE: + return { + "error": "LinkedIn cookie not configured. Set LINKEDIN_COOKIE environment variable.", + "help": "Get your li_at cookie from LinkedIn after logging in via browser DevTools." + } + + company_id = extract_linkedin_company_id(company_url_or_id) + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Cookie": f"li_at={LINKEDIN_COOKIE.replace('li_at=', '')}", + "Accept": "application/json", + } + + # LinkedIn Voyager API endpoint (unofficial) + company_url = f"https://www.linkedin.com/voyager/api/organization/companies?decorationId=com.linkedin.voyager.deco.organization.web.WebFullCompanyMain-12&q=universalName&universalName={company_id}" + + try: + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + response = await client.get(company_url, headers=headers) + + if response.status_code == 401: + return {"error": "LinkedIn session expired. Please update LINKEDIN_COOKIE."} + + response.raise_for_status() + data = response.json() + + elements = data.get("elements", []) + if not elements: + return {"error": f"Company not found: {company_id}"} + + company = elements[0] + return { + "company_id": company_id, + "name": company.get("name"), + "tagline": company.get("tagline"), + "description": company.get("description"), + "industry": company.get("companyIndustries", [{}])[0].get("localizedName") if company.get("companyIndustries") else None, + "company_size": company.get("staffCountRange", {}).get("start"), + "headquarters": company.get("headquarter", {}).get("city"), + "website": company.get("companyPageUrl"), + "founded_year": company.get("foundedOn", {}).get("year"), + "specialities": company.get("specialities", []), + "company_url": f"https://www.linkedin.com/company/{company_id}/", + } + + except httpx.HTTPStatusError as e: + return {"error": f"LinkedIn API error: {e.response.status_code}"} + except Exception as e: + return {"error": f"Failed to fetch company: {str(e)}"} + + +@server.tool() +async def linkedin_search_jobs( + keywords: str, + location: Optional[str] = None, + limit: int = 10 +) -> Dict[str, Any]: + """ + Search for jobs on LinkedIn. + + WARNING: Uses unofficial methods. May violate LinkedIn Terms of Service. + Requires LINKEDIN_COOKIE environment variable. + + Args: + keywords: Job search keywords + location: Location filter (optional) + limit: Maximum number of results (default: 10) + + Returns: + dict: List of job postings matching the search + + Example: + linkedin_search_jobs("software engineer", "San Francisco", limit=5) + """ + if not LINKEDIN_COOKIE: + return { + "error": "LinkedIn cookie not configured. Set LINKEDIN_COOKIE environment variable.", + "help": "Get your li_at cookie from LinkedIn after logging in via browser DevTools." + } + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Cookie": f"li_at={LINKEDIN_COOKIE.replace('li_at=', '')}", + "Accept": "application/json", + } + + # Build search URL + params = { + "keywords": keywords, + "count": min(limit, 25), + "start": 0, + } + if location: + params["location"] = location + + search_url = f"https://www.linkedin.com/voyager/api/voyagerJobsDashJobCards" + + try: + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + response = await client.get(search_url, params=params, headers=headers) + + if response.status_code == 401: + return {"error": "LinkedIn session expired. Please update LINKEDIN_COOKIE."} + + response.raise_for_status() + data = response.json() + + jobs = [] + for element in data.get("elements", [])[:limit]: + job = element.get("jobCardUnion", {}).get("jobPostingCard", {}) + jobs.append({ + "title": job.get("title"), + "company": job.get("primaryDescription", {}).get("text"), + "location": job.get("secondaryDescription", {}).get("text"), + "posted_time": job.get("tertiaryDescription", {}).get("text"), + "job_url": f"https://www.linkedin.com/jobs/view/{job.get('jobPostingId')}" if job.get("jobPostingId") else None + }) + + return { + "keywords": keywords, + "location": location, + "jobs": jobs + } + + except httpx.HTTPStatusError as e: + return {"error": f"LinkedIn API error: {e.response.status_code}"} + except Exception as e: + return {"error": f"Failed to search jobs: {str(e)}"} + + +@server.tool() +async def linkedin_get_feed_posts(limit: int = 10) -> Dict[str, Any]: + """ + Get recent posts from your LinkedIn feed. + + WARNING: Uses unofficial methods. May violate LinkedIn Terms of Service. + Requires LINKEDIN_COOKIE environment variable. + + Args: + limit: Maximum number of posts to return (default: 10) + + Returns: + dict: List of recent feed posts + + Example: + linkedin_get_feed_posts(limit=5) + """ + if not LINKEDIN_COOKIE: + return { + "error": "LinkedIn cookie not configured. Set LINKEDIN_COOKIE environment variable.", + "help": "Get your li_at cookie from LinkedIn after logging in via browser DevTools." + } + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Cookie": f"li_at={LINKEDIN_COOKIE.replace('li_at=', '')}", + "Accept": "application/json", + } + + feed_url = f"https://www.linkedin.com/voyager/api/feed/updates?count={min(limit, 25)}" + + try: + async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client: + response = await client.get(feed_url, headers=headers) + + if response.status_code == 401: + return {"error": "LinkedIn session expired. Please update LINKEDIN_COOKIE."} + + response.raise_for_status() + data = response.json() + + posts = [] + for element in data.get("elements", [])[:limit]: + update = element.get("value", {}).get("com.linkedin.voyager.feed.render.UpdateV2", {}) + actor = update.get("actor", {}) + commentary = update.get("commentary", {}).get("text", {}).get("text", "") + + posts.append({ + "author_name": actor.get("name", {}).get("text"), + "author_headline": actor.get("description", {}).get("text"), + "content": commentary[:500] if commentary else None, + "num_likes": update.get("socialDetail", {}).get("totalSocialActivityCounts", {}).get("numLikes"), + "num_comments": update.get("socialDetail", {}).get("totalSocialActivityCounts", {}).get("numComments"), + }) + + return {"posts": posts} + + except httpx.HTTPStatusError as e: + return {"error": f"LinkedIn API error: {e.response.status_code}"} + except Exception as e: + return {"error": f"Failed to get feed: {str(e)}"} + + +# ============================================================================ +# FACEBOOK TOOLS +# ============================================================================ + +@server.tool() +async def facebook_get_page_posts( + page_id: Optional[str] = None, + limit: int = 10 +) -> Dict[str, Any]: + """ + Get recent posts from a Facebook Page. + + Requires FACEBOOK_ACCESS_TOKEN and optionally FACEBOOK_PAGE_ID environment variables. + + Args: + page_id: Facebook Page ID (uses FACEBOOK_PAGE_ID env var if not provided) + limit: Maximum number of posts to return (default: 10) + + Returns: + dict: List of recent posts with engagement metrics + + Example: + facebook_get_page_posts("12345678", limit=5) + """ + if not FACEBOOK_ACCESS_TOKEN: + return { + "error": "Facebook access token not configured. Set FACEBOOK_ACCESS_TOKEN environment variable.", + "help": "Get a Page Access Token from Facebook Developer Console." + } + + target_page_id = page_id or FACEBOOK_PAGE_ID + if not target_page_id: + return {"error": "Page ID required. Provide page_id or set FACEBOOK_PAGE_ID environment variable."} + + params = { + "access_token": FACEBOOK_ACCESS_TOKEN, + "fields": "id,message,created_time,shares,attachments,likes.summary(true),comments.summary(true)", + "limit": min(limit, 100) + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{FACEBOOK_API_BASE}/{target_page_id}/posts", + params=params + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Facebook API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + posts = [] + for post in data.get("data", []): + posts.append({ + "post_id": post.get("id"), + "message": post.get("message"), + "created_time": post.get("created_time"), + "likes_count": post.get("likes", {}).get("summary", {}).get("total_count", 0), + "comments_count": post.get("comments", {}).get("summary", {}).get("total_count", 0), + "shares_count": post.get("shares", {}).get("count", 0), + "attachments": post.get("attachments", {}).get("data", []) + }) + + return { + "page_id": target_page_id, + "posts": posts + } + + +@server.tool() +async def facebook_get_post_comments( + post_id: str, + limit: int = 25 +) -> Dict[str, Any]: + """ + Get comments on a Facebook post. + + Requires FACEBOOK_ACCESS_TOKEN environment variable. + + Args: + post_id: The Facebook post ID + limit: Maximum number of comments to return (default: 25) + + Returns: + dict: List of comments with user info and engagement + + Example: + facebook_get_post_comments("123456789_987654321", limit=10) + """ + if not FACEBOOK_ACCESS_TOKEN: + return { + "error": "Facebook access token not configured. Set FACEBOOK_ACCESS_TOKEN environment variable." + } + + params = { + "access_token": FACEBOOK_ACCESS_TOKEN, + "fields": "id,message,created_time,from,like_count,comment_count", + "limit": min(limit, 100) + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{FACEBOOK_API_BASE}/{post_id}/comments", + params=params + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Facebook API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + comments = [] + for comment in data.get("data", []): + comments.append({ + "comment_id": comment.get("id"), + "message": comment.get("message"), + "created_time": comment.get("created_time"), + "from_name": comment.get("from", {}).get("name"), + "from_id": comment.get("from", {}).get("id"), + "like_count": comment.get("like_count", 0), + "reply_count": comment.get("comment_count", 0) + }) + + return { + "post_id": post_id, + "comments": comments + } + + +@server.tool() +async def facebook_post_to_page( + message: str, + page_id: Optional[str] = None +) -> Dict[str, Any]: + """ + Post a message to a Facebook Page. + + Requires FACEBOOK_ACCESS_TOKEN with pages_manage_posts permission. + + Args: + message: The message content to post + page_id: Facebook Page ID (uses FACEBOOK_PAGE_ID env var if not provided) + + Returns: + dict: Post ID of the created post + + Example: + facebook_post_to_page("Hello from MCP!", "12345678") + """ + if not FACEBOOK_ACCESS_TOKEN: + return { + "error": "Facebook access token not configured. Set FACEBOOK_ACCESS_TOKEN environment variable." + } + + target_page_id = page_id or FACEBOOK_PAGE_ID + if not target_page_id: + return {"error": "Page ID required. Provide page_id or set FACEBOOK_PAGE_ID environment variable."} + + async with httpx.AsyncClient() as client: + response = await client.post( + f"{FACEBOOK_API_BASE}/{target_page_id}/feed", + data={ + "message": message, + "access_token": FACEBOOK_ACCESS_TOKEN + } + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Facebook API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + return { + "success": True, + "post_id": data.get("id"), + "page_id": target_page_id + } + + +@server.tool() +async def facebook_reply_to_comment( + comment_id: str, + message: str +) -> Dict[str, Any]: + """ + Reply to a comment on a Facebook post. + + Requires FACEBOOK_ACCESS_TOKEN with pages_manage_comments permission. + + Args: + comment_id: The Facebook comment ID to reply to + message: The reply message + + Returns: + dict: Reply comment ID + + Example: + facebook_reply_to_comment("123456789_987654321", "Thanks for your comment!") + """ + if not FACEBOOK_ACCESS_TOKEN: + return { + "error": "Facebook access token not configured. Set FACEBOOK_ACCESS_TOKEN environment variable." + } + + async with httpx.AsyncClient() as client: + response = await client.post( + f"{FACEBOOK_API_BASE}/{comment_id}/comments", + data={ + "message": message, + "access_token": FACEBOOK_ACCESS_TOKEN + } + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Facebook API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + return { + "success": True, + "reply_id": data.get("id"), + "parent_comment_id": comment_id + } + + +# ============================================================================ +# INSTAGRAM TOOLS +# ============================================================================ + +@server.tool() +async def instagram_get_profile_info() -> Dict[str, Any]: + """ + Get Instagram Business profile information. + + Requires INSTAGRAM_ACCESS_TOKEN and INSTAGRAM_BUSINESS_ACCOUNT_ID environment variables. + + Returns: + dict: Profile information including username, bio, follower count, etc. + + Example: + instagram_get_profile_info() + """ + if not INSTAGRAM_ACCESS_TOKEN or not INSTAGRAM_BUSINESS_ACCOUNT_ID: + return { + "error": "Instagram credentials not configured.", + "help": "Set INSTAGRAM_ACCESS_TOKEN and INSTAGRAM_BUSINESS_ACCOUNT_ID environment variables." + } + + params = { + "access_token": INSTAGRAM_ACCESS_TOKEN, + "fields": "id,username,name,biography,followers_count,follows_count,media_count,profile_picture_url,website" + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{INSTAGRAM_API_BASE}/{INSTAGRAM_BUSINESS_ACCOUNT_ID}", + params=params + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Instagram API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + return { + "account_id": data.get("id"), + "username": data.get("username"), + "name": data.get("name"), + "biography": data.get("biography"), + "followers_count": data.get("followers_count"), + "following_count": data.get("follows_count"), + "media_count": data.get("media_count"), + "profile_picture_url": data.get("profile_picture_url"), + "website": data.get("website") + } + + +@server.tool() +async def instagram_get_media_posts(limit: int = 10) -> Dict[str, Any]: + """ + Get recent Instagram media posts from the Business account. + + Requires INSTAGRAM_ACCESS_TOKEN and INSTAGRAM_BUSINESS_ACCOUNT_ID environment variables. + + Args: + limit: Maximum number of posts to return (default: 10) + + Returns: + dict: List of recent media posts with engagement metrics + + Example: + instagram_get_media_posts(limit=5) + """ + if not INSTAGRAM_ACCESS_TOKEN or not INSTAGRAM_BUSINESS_ACCOUNT_ID: + return { + "error": "Instagram credentials not configured.", + "help": "Set INSTAGRAM_ACCESS_TOKEN and INSTAGRAM_BUSINESS_ACCOUNT_ID environment variables." + } + + params = { + "access_token": INSTAGRAM_ACCESS_TOKEN, + "fields": "id,caption,media_type,media_url,permalink,timestamp,like_count,comments_count,thumbnail_url", + "limit": min(limit, 50) + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{INSTAGRAM_API_BASE}/{INSTAGRAM_BUSINESS_ACCOUNT_ID}/media", + params=params + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Instagram API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + posts = [] + for post in data.get("data", []): + posts.append({ + "media_id": post.get("id"), + "caption": post.get("caption"), + "media_type": post.get("media_type"), + "media_url": post.get("media_url"), + "permalink": post.get("permalink"), + "timestamp": post.get("timestamp"), + "like_count": post.get("like_count"), + "comments_count": post.get("comments_count"), + "thumbnail_url": post.get("thumbnail_url") + }) + + return {"posts": posts} + + +@server.tool() +async def instagram_get_media_insights(media_id: str) -> Dict[str, Any]: + """ + Get insights/analytics for a specific Instagram media post. + + Requires INSTAGRAM_ACCESS_TOKEN environment variable. + + Args: + media_id: The Instagram media ID + + Returns: + dict: Post insights including reach, impressions, engagement + + Example: + instagram_get_media_insights("17890012345678901") + """ + if not INSTAGRAM_ACCESS_TOKEN: + return { + "error": "Instagram access token not configured. Set INSTAGRAM_ACCESS_TOKEN environment variable." + } + + # Metrics available depend on media type (IMAGE, VIDEO, CAROUSEL_ALBUM) + params = { + "access_token": INSTAGRAM_ACCESS_TOKEN, + "metric": "engagement,impressions,reach,saved" + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{INSTAGRAM_API_BASE}/{media_id}/insights", + params=params + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Instagram API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + insights = {} + for metric in data.get("data", []): + insights[metric.get("name")] = metric.get("values", [{}])[0].get("value") + + return { + "media_id": media_id, + "insights": insights + } + + +@server.tool() +async def instagram_publish_media( + image_url: str, + caption: str +) -> Dict[str, Any]: + """ + Publish an image to Instagram Business account. + + Requires INSTAGRAM_ACCESS_TOKEN and INSTAGRAM_BUSINESS_ACCOUNT_ID environment variables. + The image_url must be a publicly accessible URL. + + Args: + image_url: Public URL of the image to publish + caption: Caption for the post + + Returns: + dict: Created media ID + + Example: + instagram_publish_media("https://example.com/image.jpg", "Check out this photo!") + """ + if not INSTAGRAM_ACCESS_TOKEN or not INSTAGRAM_BUSINESS_ACCOUNT_ID: + return { + "error": "Instagram credentials not configured.", + "help": "Set INSTAGRAM_ACCESS_TOKEN and INSTAGRAM_BUSINESS_ACCOUNT_ID environment variables." + } + + async with httpx.AsyncClient() as client: + # Step 1: Create media container + container_response = await client.post( + f"{INSTAGRAM_API_BASE}/{INSTAGRAM_BUSINESS_ACCOUNT_ID}/media", + data={ + "image_url": image_url, + "caption": caption, + "access_token": INSTAGRAM_ACCESS_TOKEN + } + ) + + if container_response.status_code == 400: + error_data = container_response.json().get("error", {}) + return {"error": f"Instagram API error: {error_data.get('message', 'Unknown error')}"} + + container_response.raise_for_status() + container_id = container_response.json().get("id") + + # Step 2: Publish the container + publish_response = await client.post( + f"{INSTAGRAM_API_BASE}/{INSTAGRAM_BUSINESS_ACCOUNT_ID}/media_publish", + data={ + "creation_id": container_id, + "access_token": INSTAGRAM_ACCESS_TOKEN + } + ) + + if publish_response.status_code == 400: + error_data = publish_response.json().get("error", {}) + return {"error": f"Instagram API error: {error_data.get('message', 'Unknown error')}"} + + publish_response.raise_for_status() + media_id = publish_response.json().get("id") + + return { + "success": True, + "media_id": media_id, + "container_id": container_id + } + + +@server.tool() +async def instagram_get_comments(media_id: str, limit: int = 25) -> Dict[str, Any]: + """ + Get comments on an Instagram media post. + + Requires INSTAGRAM_ACCESS_TOKEN environment variable. + + Args: + media_id: The Instagram media ID + limit: Maximum number of comments to return (default: 25) + + Returns: + dict: List of comments + + Example: + instagram_get_comments("17890012345678901", limit=10) + """ + if not INSTAGRAM_ACCESS_TOKEN: + return { + "error": "Instagram access token not configured. Set INSTAGRAM_ACCESS_TOKEN environment variable." + } + + params = { + "access_token": INSTAGRAM_ACCESS_TOKEN, + "fields": "id,text,timestamp,username,like_count", + "limit": min(limit, 50) + } + + async with httpx.AsyncClient() as client: + response = await client.get( + f"{INSTAGRAM_API_BASE}/{media_id}/comments", + params=params + ) + + if response.status_code == 400: + error_data = response.json().get("error", {}) + return {"error": f"Instagram API error: {error_data.get('message', 'Unknown error')}"} + + response.raise_for_status() + data = response.json() + + comments = [] + for comment in data.get("data", []): + comments.append({ + "comment_id": comment.get("id"), + "text": comment.get("text"), + "timestamp": comment.get("timestamp"), + "username": comment.get("username"), + "like_count": comment.get("like_count") + }) + + return { + "media_id": media_id, + "comments": comments + } + + +if __name__ == "__main__": + server.run() diff --git a/mcp_servers/social_media/setup.sh b/mcp_servers/social_media/setup.sh new file mode 100755 index 0000000000..8307a82118 --- /dev/null +++ b/mcp_servers/social_media/setup.sh @@ -0,0 +1,154 @@ +#!/bin/bash +# Social Media MCP Server - Setup Script +# +# This script sets up the development environment for the Social Media MCP server. + +set -e # Exit on error + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "==============================================" +echo " Social Media MCP Server - Setup" +echo "==============================================" +echo "" + +# Check for Python 3.10+ +echo "Checking Python version..." +if command -v python3 &> /dev/null; then + PYTHON_VERSION=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') + MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1) + MINOR=$(echo $PYTHON_VERSION | cut -d. -f2) + + if [ "$MAJOR" -ge 3 ] && [ "$MINOR" -ge 10 ]; then + echo " Python $PYTHON_VERSION found" + else + echo " ERROR: Python 3.10+ required, found $PYTHON_VERSION" + exit 1 + fi +else + echo " ERROR: Python 3 not found" + exit 1 +fi + +# Create virtual environment +echo "" +echo "Creating virtual environment..." +if [ -d ".venv" ]; then + echo " .venv already exists, skipping..." +else + python3 -m venv .venv + echo " Created .venv" +fi + +# Activate virtual environment +echo "" +echo "Activating virtual environment..." +source .venv/bin/activate +echo " Activated" + +# Upgrade pip +echo "" +echo "Upgrading pip..." +pip install --upgrade pip --quiet + +# Install dependencies +echo "" +echo "Installing dependencies..." +pip install -e . --quiet +echo " Installed: httpx, mcp, yt-dlp, beautifulsoup4" + +# Check for yt-dlp system installation (preferred for better codec support) +echo "" +echo "Checking yt-dlp installation..." +if command -v yt-dlp &> /dev/null; then + YTDLP_VERSION=$(yt-dlp --version) + echo " yt-dlp $YTDLP_VERSION found (system)" +else + echo " yt-dlp not found system-wide" + echo " Using pip-installed version (may have limited codec support)" + echo " For best results, install with: brew install yt-dlp" +fi + +# Create .env template if it doesn't exist +echo "" +echo "Checking environment configuration..." +if [ -f ".env" ]; then + echo " .env file already exists" +else + cat > .env.example << 'EOF' +# YouTube API (optional - falls back to yt-dlp if not set) +YOUTUBE_API_KEY= + +# LinkedIn (choose one method) +# Option 1: Session cookie (recommended) +LINKEDIN_COOKIE= +# Option 2: Email/Password (less reliable) +LINKEDIN_EMAIL= +LINKEDIN_PASSWORD= + +# Facebook Graph API +FACEBOOK_ACCESS_TOKEN= +FACEBOOK_PAGE_ID= +FACEBOOK_API_VERSION=v19.0 + +# Instagram Graph API (via Facebook) +INSTAGRAM_ACCESS_TOKEN= +INSTAGRAM_BUSINESS_ACCOUNT_ID= +INSTAGRAM_API_VERSION=v19.0 +EOF + echo " Created .env.example template" + echo " Copy to .env and fill in your credentials:" + echo " cp .env.example .env" +fi + +# Test the server can start +echo "" +echo "Testing server import..." +if python -c "import server" 2>/dev/null; then + echo " Server module imports successfully" +else + echo " WARNING: Server import failed. Check for missing dependencies." +fi + +# Print summary +echo "" +echo "==============================================" +echo " Setup Complete!" +echo "==============================================" +echo "" +echo "Next steps:" +echo "" +echo "1. Configure credentials:" +echo " cp .env.example .env" +echo " # Edit .env with your API keys/tokens" +echo "" +echo "2. Test the server:" +echo " source .venv/bin/activate" +echo " python server.py" +echo "" +echo "3. Add to your MCP client config (OpenCode/Claude Desktop)" +echo " See README.md for configuration examples" +echo "" +echo "Configured platforms:" +if [ -n "$YOUTUBE_API_KEY" ]; then + echo " - YouTube: API Key set" +else + echo " - YouTube: Not configured (will use yt-dlp fallback)" +fi +if [ -n "$LINKEDIN_COOKIE" ]; then + echo " - LinkedIn: Cookie set" +else + echo " - LinkedIn: Not configured" +fi +if [ -n "$FACEBOOK_ACCESS_TOKEN" ]; then + echo " - Facebook: Access token set" +else + echo " - Facebook: Not configured" +fi +if [ -n "$INSTAGRAM_ACCESS_TOKEN" ]; then + echo " - Instagram: Access token set" +else + echo " - Instagram: Not configured" +fi +echo "" diff --git a/schemas/20251121/uml/mermaid/complete_schema_with_instances_20251201_161833.mmd b/schemas/20251121/uml/mermaid/complete_schema_with_instances_20251201_161833.mmd new file mode 100644 index 0000000000..2413f8823e --- /dev/null +++ b/schemas/20251121/uml/mermaid/complete_schema_with_instances_20251201_161833.mmd @@ -0,0 +1,2677 @@ +```mermaid +erDiagram + + %% Heritage Custodian Ontology - Complete Schema with Instance Data + %% Generated: 2025-12-01T16:18:30.879332 + %% Schema: heritage-custodian-observation-reconstruction + +CustodianAppellation { + string appellation_value PK + string appellation_language + AppellationTypeEnum appellation_type + CustodianName variant_of_name +} +CustodianName { + string emic_name PK + string name_language + string standardized_name PK + CustodianAppellationList alternative_names + uriorcurie endorsement_source PK + string name_authority + date valid_from + date valid_to + TimeSpan name_validity_period + CustodianName supersedes + CustodianName superseded_by + CustodianObservationList was_derived_from PK + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +TimeSpan { + datetime begin_of_the_begin + datetime end_of_the_begin + datetime begin_of_the_end + datetime end_of_the_end +} +ReconstructionAgent { + uriorcurie id + string agent_name PK + AgentTypeEnum agent_type + string affiliation + string contact +} +CustodianObservation { + CustodianAppellation observed_name PK + CustodianAppellationList alternative_observed_names + date observation_date + string observation_source + SourceDocument source PK + LanguageCode language + string observation_context + CustodianLegalStatus derived_from_entity + ConfidenceMeasure confidence_score +} +CustodianLegalStatus { + Custodian refers_to_custodian PK + LegalEntityType legal_entity_type PK + LegalName legal_name PK + LegalForm legal_form + RegistrationNumberList registration_numbers + RegistrationAuthority registration_authority + TradeRegister primary_register + Jurisdiction legal_jurisdiction + date dissolution_date + TimeSpan temporal_extent + CustodianLegalStatus parent_custodian + LegalStatus legal_status PK + GovernanceStructure governance_structure + ArticlesOfAssociationList has_articles_of_association + string reconstruction_method + CustodianObservationList was_derived_from PK + ReconstructionActivity was_generated_by PK + CustodianLegalStatus was_revision_of + CustodianIdentifierList identifiers + LegalResponsibilityCollectionList collections_under_responsibility +} +ReconstructedEntity { + ReconstructionActivity was_generated_by +} +ConfidenceMeasure { + float confidence_value PK + string confidence_method +} +Custodian { + uriorcurie hc_id PK + uriorcurie preferred_label + CustodianType custodian_type + uriorcurie legal_status + uriorcurie place_designation + uriorcurieList digital_platform + uriorcurieList has_collection + CustodianArchiveList has_operational_archive + CustodianAdministrationList has_administration + BudgetList has_budget + SocialMediaProfileList social_media_profiles + uriorcurieList organizational_structure + uriorcurieList organizational_change_events + uriorcurieList encompassing_body + DataLicensePolicy data_license_policy + ProjectList participated_in_projects + uriorcurieList identifiers + GiftShopList gift_shop + StorageList storage_facilities + datetime created + datetime modified +} +CustodianType { + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +ArchiveOrganizationType { + string archive_scope + stringList record_types + stringList preservation_standards + string finding_aids_format + string access_policy + uri appraisal_policy + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + ArchiveOrganizationType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +MuseumType { + stringList collection_focus + string exhibition_program + stringList visitor_facilities + string cataloging_standard + boolean conservation_lab + boolean research_department + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + MuseumType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +LibraryType { + string lending_policy + string catalog_system + stringList special_collections + boolean membership_required + boolean interlibrary_loan + string cataloging_standard + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + LibraryType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +GalleryType { + boolean commercial_operation + stringList artist_representation + string exhibition_focus + boolean sales_activity + string exhibition_model + string commission_rate + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + GalleryType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +ResearchOrganizationType { + stringList research_focus + boolean publication_output + uri data_repository + stringList research_infrastructure + uri academic_affiliation + stringList research_projects + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + ResearchOrganizationType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +OfficialInstitutionType { + string administrative_level PK + stringList heritage_mandate + boolean regulatory_authority + stringList funding_programs + string oversight_jurisdiction + string policy_authority + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +BioCustodianType { + stringList specimen_types + string collection_size + boolean living_collections PK + stringList research_programs + stringList public_education + string conservation_breeding + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +EducationProviderType { + stringList education_level + stringList academic_programs + string collection_access + stringList teaching_collections + stringList student_services + string accreditation + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +HeritageSocietyType { + string society_focus + string membership_size + stringList publication_activities + stringList collecting_scope + stringList volunteer_programs + stringList community_engagement + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +FeatureCustodianType { + stringList feature_types PK + string site_portfolio PK + string visitor_services PK + string conservation_activities PK + string access_management PK + string stewardship_model PK + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +IntangibleHeritageGroupType { + stringList ich_domain PK + string transmission_methods PK + string practitioner_community PK + string performance_repertoire PK + string cultural_context PK + string safeguarding_measures PK + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +PersonalCollectionType { + stringList collection_focus PK + string collection_size PK + string acquisition_history PK + string access_restrictions PK + string preservation_approach PK + string legacy_planning + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +HolySacredSiteType { + string religious_tradition PK + stringList collection_types PK + string religious_function PK + string access_policy PK + string stewardship_responsibility PK + string secularization_status PK + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +DigitalPlatformType { + stringList platform_category PK + string digital_collections PK + stringList technology_stack PK + stringList data_standards PK + string user_services PK + string sustainability_model PK + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +NonProfitType { + string organizational_mission PK + string program_activities PK + stringList geographic_scope PK + stringList beneficiary_groups PK + string partnership_model PK + string impact_measurement + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +TasteScentHeritageType { + string heritage_practice PK + string sensory_heritage_domain PK + stringList preservation_methods PK + stringList traditional_products PK + string knowledge_transmission PK + string community_significance + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +CommercialOrganizationType { + string business_model PK + string collection_purpose PK + string corporate_integration PK + string public_access PK + stringList heritage_holdings PK + stringList commercial_activities + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +MixedCustodianType { + stringList constituent_types PK + string functional_integration PK + string mixed_governance_structure PK + stringList service_portfolio PK + string facility_design + stringList user_communities PK + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +UnspecifiedType { + string classification_status PK + stringList evidence_gaps PK + stringList type_hypotheses + stringList research_attempts PK + string review_status PK + stringList data_quality_flags PK + uriorcurie type_id PK + CustodianPrimaryTypeEnum primary_type PK + string wikidata_entity PK + stringList type_label PK + string type_description + CustodianType broader_type + CustodianTypeList narrower_types + CustodianTypeList related_types + stringList applicable_countries + datetime created + datetime modified +} +CustodianPlace { + string place_name PK + string place_language + PlaceSpecificityEnum place_specificity + string place_note + Country country + Subregion subregion + Settlement settlement + FeaturePlace has_feature_type + GeoSpatialPlaceList has_geospatial_location + AuxiliaryPlaceList auxiliary_places + CustodianObservationList was_derived_from PK + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK + date valid_from + date valid_to +} +AuxiliaryPlace { + uriorcurie auxiliary_place_id PK + string place_name PK + AuxiliaryPlaceTypeEnum auxiliary_place_type PK + string place_description + string street_address + string postal_code + string city + Country country + Subregion subregion + Settlement settlement + float latitude + float longitude + integer geonames_id + GeoSpatialPlaceList has_geospatial_location + FeaturePlace has_feature_type + OrganizationBranchList hosts_branch + CustodianPlace is_auxiliary_of_place PK + date valid_from + date valid_to + TimeSpan temporal_extent + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +ReconstructionActivity { + uriorcurie id PK + ReconstructionActivityTypeEnum activity_type + string method + ReconstructionAgent responsible_agent + TimeSpan temporal_extent + CustodianObservationList used PK + ConfidenceMeasure confidence_score + string justification +} +OrganizationalStructure { + uriorcurie id PK + string unit_name PK + OrganizationalUnitTypeEnum unit_type + OrganizationalStructure parent_unit + integer staff_count + PersonObservationList staff_members + CustodianCollectionList managed_collections + AuxiliaryPlaceList located_at + string contact_point + date valid_from + date valid_to + Custodian refers_to_custodian PK +} +OrganizationBranch { + uriorcurie branch_id PK + string branch_name PK + OrganizationBranchTypeEnum branch_type PK + string branch_description + AuxiliaryPlaceList located_at + OrganizationalStructureList has_operational_unit + uriorcurie is_branch_of PK + OrganizationBranchList has_sub_branch + string branch_head + integer staff_count + string contact_point + date valid_from + date valid_to + TimeSpan temporal_extent + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +AuxiliaryDigitalPlatform { + uriorcurie auxiliary_platform_id PK + string platform_name PK + AuxiliaryDigitalPlatformTypeEnum auxiliary_platform_type PK + uri platform_url PK + string platform_purpose + string platform_description + uri api_documentation + stringList technology_stack + DigitalPlatform is_auxiliary_of_platform PK + uriorcurieList provides_access_to + string related_project + string funding_source + boolean iiif_support + boolean linked_data + date valid_from + date valid_to + TimeSpan temporal_extent + string archival_status + uri archived_at + string preservation_event_type + string fixity_info + boolean cms_detected + CollectionManagementSystemList powered_by_cms + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +CustodianCollection { + uriorcurie id PK + string collection_name PK + string collection_description + stringList collection_type + string collection_scope + TimeSpan temporal_coverage + string extent + string access_rights + stringList digital_surrogates + string digitization_status + string preservation_level + CollectionManagementSystemList managed_by_cms + OrganizationalStructure managing_unit + stringList custody_history + Custodian refers_to_custodian PK + CustodianObservationList was_derived_from PK + date valid_from + date valid_to + ReconstructionActivity was_generated_by +} +LegalResponsibilityCollection { + CustodianLegalStatus responsible_legal_entity PK + string legal_responsibility_basis PK + date legal_responsibility_start_date + date legal_responsibility_end_date + uriorcurie id PK + string collection_name PK + string collection_description + stringList collection_type + string collection_scope + TimeSpan temporal_coverage + string extent + string access_rights + stringList digital_surrogates + string digitization_status + string preservation_level + CollectionManagementSystemList managed_by_cms + OrganizationalStructure managing_unit + stringList custody_history + Custodian refers_to_custodian PK + CustodianObservationList was_derived_from PK + date valid_from + date valid_to + ReconstructionActivity was_generated_by +} +GeoSpatialPlace { + uriorcurie geospatial_id PK + float latitude PK + float longitude PK + float altitude + string geometry_wkt + GeometryTypeEnum geometry_type + string coordinate_reference_system + integer geonames_id + string osm_id + string cadastral_id + float accuracy_meters + string geospatial_source + string bounding_box + string spatial_resolution + string feature_class + string feature_code + date valid_from_geo + date valid_to_geo +} +OrganizationalChangeEvent { + uriorcurie id PK + OrganizationalChangeEventTypeEnum event_type PK + date event_date PK + string event_description PK + OrganizationalStructureList affected_units + OrganizationalStructureList resulting_units + Custodian parent_custodian PK + string change_rationale + string staff_impact + CustodianPlace event_location + CustodianPlace from_location + CustodianPlace to_location + GeoSpatialPlaceList affected_territory + uri documentation_source + date valid_from + date valid_to +} +PersonObservation { + uriorcurie id PK + string person_name PK + StaffRoleTypeEnum staff_role PK + string role_title + OrganizationalStructure unit_affiliation + date role_start_date + date role_end_date + SourceDocument observation_source + OrganizationalChangeEvent affected_by_event + string contact_email + stringList expertise_areas + datetime created + datetime modified +} +CustodianIdentifier { + string identifier_scheme PK + string identifier_value PK + Custodian identifies_custodian + Standard defined_by_standard + AllocationAgency allocated_by + IdentifierFormat identifier_format_used + string canonical_value + CustodianName also_identifies_name + datetime allocation_date +} +LanguageCode { + string language_code PK +} +SourceDocument { + uriorcurie source_uri PK + SourceDocumentTypeEnum source_type + date source_date + string source_creator +} +LegalEntityType { + uriorcurie id PK + string code PK + string label PK + string definition PK + uriorcurieList ontology_mapping +} +LegalForm { + uriorcurie id PK + string elf_code PK + Country country_code PK + string local_name PK + string transliterated_name + string abbreviation + LegalEntityType legal_entity_type PK + LegalForm parent_form + date valid_from + date valid_to +} +LegalName { + uriorcurie id PK + string full_name PK + string name_without_type + string alphabetical_name + string display_name + string language + string script + TimeSpan temporal_validity +} +RegistrationNumber { + uriorcurie id PK + string number PK + string type PK + TradeRegister trade_register + TimeSpan temporal_validity PK +} +GovernanceStructure { + uriorcurie id PK + string structure_type PK + stringList organizational_units + string governance_body + string description +} +LegalStatus { + uriorcurie id PK + string status_code PK + string status_name PK + string description + TimeSpan temporal_validity PK + Jurisdiction jurisdiction +} +RegistrationAuthority { + uriorcurie id PK + string name PK + string name_local + string abbreviation + Country country PK + uri registry_url PK + uri api_url + uri sparql_endpoint + uri data_license + RegistrationAuthorityGovernanceEnum governance_type PK + integer established_year + RegistrationAuthority predecessor + StandardList standards_maintained + AllocationAgencyList allocation_agencies + uri website + string description + string wikidata_id +} +Country { + string alpha_2 PK + string alpha_3 PK +} +Subregion { + string iso_3166_2_code PK + Country country PK + string subdivision_name +} +Settlement { + integer geonames_id + string settlement_name PK + Country country PK + Subregion subregion + float latitude + float longitude + uriorcurie settlement_id PK +} +DataLicensePolicy { + uriorcurie id PK + string policy_name PK + DataLicense default_license PK + ServiceLicenseList service_specific_licenses + OpennessStanceEnum openness_stance PK + stringList open_data_principles + uri policy_url + date policy_effective_date + stringList advocacy_activities + string description +} +DataLicense { + uriorcurie id PK + string name PK + string abbreviation + DataLicenseTypeEnum license_type PK + DataOpennessLevelEnum openness_level PK + uri license_url PK + uri deed_url + string version + boolean allows_commercial_use PK + boolean requires_attribution PK + boolean requires_sharealike PK + boolean allows_derivatives PK + string jurisdiction + string steward_organization + string spdx_identifier + string description +} +ServiceLicense { + string service_name PK + uri service_url + DataLicense license PK + string license_notes +} +Project { + uriorcurie project_id PK + string project_name PK + string project_short_name + string project_description PK + ProjectStatusEnum project_status PK + uri project_url + date start_date + date end_date + stringList funding_source + string funding_amount + stringList objectives + stringList deliverables + uriorcurie organizing_body PK + uriorcurieList participating_custodians + uriorcurieList related_projects + uri documentation_url + string contact_email + stringList keywords + uriorcurieList project_identifiers +} +Jurisdiction { + string jurisdiction_id PK + JurisdictionTypeEnum jurisdiction_type PK + Country country + Subregion subregion + Settlement settlement + string supranational_code + string gleif_jurisdiction_code + LegalSystemTypeEnum legal_system_type + string description +} +EncompassingBody { + uriorcurie id PK + string organization_name PK + EncompassingBodyTypeEnum organization_type PK + string description + string organization_legal_form + date founding_date + date dissolution_date + uriorcurieList member_custodians + string governance_authority + stringList service_offerings + string membership_criteria + uriorcurieList external_identifiers + uri website + DataLicensePolicy data_license_policy + ProjectList projects + stringList area_served + Jurisdiction legal_jurisdiction +} +UmbrellaOrganisation { + uriorcurie id PK + string organization_name PK + EncompassingBodyTypeEnum organization_type PK + string description + string organization_legal_form PK + date founding_date + date dissolution_date + uriorcurieList member_custodians + string governance_authority PK + stringList service_offerings + string membership_criteria + uriorcurieList external_identifiers + uri website + DataLicensePolicy data_license_policy + ProjectList projects + stringList area_served + Jurisdiction legal_jurisdiction PK +} +NetworkOrganisation { + uriorcurie id PK + string organization_name PK + EncompassingBodyTypeEnum organization_type PK + string description + string organization_legal_form + date founding_date + date dissolution_date + uriorcurieList member_custodians + string governance_authority + stringList service_offerings PK + string membership_criteria + uriorcurieList external_identifiers + uri website + DataLicensePolicy data_license_policy + ProjectList projects + stringList area_served + Jurisdiction legal_jurisdiction +} +Consortium { + uriorcurie id PK + string organization_name PK + EncompassingBodyTypeEnum organization_type PK + string description + string organization_legal_form + date founding_date + date dissolution_date + uriorcurieList member_custodians + string governance_authority + stringList service_offerings + string membership_criteria + uriorcurieList external_identifiers + uri website + DataLicensePolicy data_license_policy + ProjectList projects + stringList area_served + Jurisdiction legal_jurisdiction +} +Cooperative { + uriorcurie id PK + string organization_name PK + EncompassingBodyTypeEnum organization_type PK + string description + string organization_legal_form PK + date founding_date + date dissolution_date + uriorcurieList member_custodians + string governance_authority PK + stringList service_offerings + string membership_criteria PK + uriorcurieList external_identifiers + uri website + DataLicensePolicy data_license_policy + ProjectList projects + stringList area_served + Jurisdiction legal_jurisdiction +} +SocialMovement { + uriorcurie id PK + string organization_name PK + EncompassingBodyTypeEnum organization_type PK + string description PK + string organization_legal_form + date founding_date + date dissolution_date + uriorcurieList member_custodians + string governance_authority + stringList service_offerings + string membership_criteria + uriorcurieList external_identifiers + uri website + DataLicensePolicy data_license_policy PK + ProjectList projects + stringList area_served + Jurisdiction legal_jurisdiction +} +FundingOrganisation { + uriorcurieList implements_agenda + uriorcurieList issued_calls + stringList funding_focus + stringList funding_schemes + string total_annual_budget + string funding_source + TimeSpan programme_period + uriorcurie id PK + string organization_name PK + EncompassingBodyTypeEnum organization_type PK + string description PK + string organization_legal_form + date founding_date + date dissolution_date + uriorcurieList member_custodians + string governance_authority + stringList service_offerings + string membership_criteria + uriorcurieList external_identifiers + uri website + DataLicensePolicy data_license_policy + ProjectList projects + stringList area_served + Jurisdiction legal_jurisdiction +} +FeaturePlace { + FeatureTypeEnum feature_type PK + string feature_name + string feature_language + string feature_description + string feature_note + CustodianPlace classifies_place PK + CustodianObservationList was_derived_from PK + ReconstructionActivity was_generated_by + date valid_from + date valid_to +} +DigitalPlatform { + uriorcurie platform_id PK + string platform_name PK + DigitalPlatformTypeList platform_type PK + uri homepage_web_address PK + uriList collection_web_addresses + uriList inventory_web_addresses + uri api_endpoint + uri sparql_endpoint + uri oai_pmh_endpoint + stringList programming_languages + string repository_software + CollectionManagementSystemList powered_by_cms + boolean iiif_support + boolean linked_data + stringList metadata_standards + string access_restrictions + AuxiliaryDigitalPlatformList auxiliary_platforms + TimeSpan temporal_extent + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK + string preservation_level + string storage_location + date fixity_check_date +} +CollectionManagementSystem { + uriorcurie cms_id PK + string cms_product_name PK + string cms_product_version + string cms_category + boolean open_source + string license + string vendor_name + uri vendor_url + uri documentation_url + stringList programming_languages + uri repository_url + stringList supported_metadata_standards + boolean iiif_compatible + boolean linked_data_export + boolean api_available + DigitalPlatformList powers_platform + CustodianCollectionList manages_collection + CustodianList used_by_custodian + date deployment_date + TimeSpan temporal_extent + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +TradeRegister { + string register_id PK + string register_name PK + string register_name_local + string register_abbreviation + RegisterTypeEnum register_type PK + Jurisdiction jurisdiction PK + RegistrationAuthority maintained_by PK + string gleif_ra_code + uri website + uri api_endpoint + string identifier_format + string description +} +StandardsOrganization { + uriorcurie id PK + string name PK + string abbreviation PK + StandardsOrganizationTypeEnum organization_type PK + stringList member_countries + integer founded_year + string headquarters_country + uri website + string description + StandardList standards_maintained + string wikidata_id + string country +} +Standard { + uriorcurie id PK + string name PK + string abbreviation PK + string iso_standard_number + StandardsOrganization defined_by PK + RegistrationAuthority registration_authority + CountryList country_scope + StandardScopeTypeEnum scope_type PK + IdentifierDomainEnum identifier_domain PK + IdentifierFormatList formats + IdentifierFormat canonical_format + uri website + string lookup_url_template + integer first_published_year + string current_version + string description + StandardTypeEnum standard_type PK + GovernanceModelEnum governance_model + ContributingAgencyList contributing_agencies + StandardsOrganization governance_council + uri data_license + uriorcurieList applicable_schema_types + uriorcurie wikidata_id + stringList glamorcubesfixphdnt_types + string category +} +IdentifierFormat { + uriorcurie id PK + string format_name PK + string pattern PK + string example PK + boolean is_canonical PK + boolean is_uri_format PK + string transformation_to_canonical +} +AllocationAgency { + uriorcurie id PK + string name PK + string name_local + string abbreviation + CountryList country_scope PK + SubregionList subregion_scope + AllocationDomainEnumList allocation_domain PK + StandardList allocates_for PK + string allocation_prefix + RegistrationAuthority parent_registration_authority + date allocation_start_date + date allocation_end_date + boolean is_active PK + uri website + string contact_email + uri allocation_policy_url + string description +} +ContributingAgency { + uriorcurie id PK + string contributor_code PK + string name PK + string name_local + string abbreviation + Country country PK + string authority_file_name + string authority_file_abbreviation + uri authority_file_url + AuthorityRecordFormatEnum record_format PK + AuthorityEntityTypeEnumList entity_types_covered PK + StandardList contributes_to PK + date contribution_start_date + boolean is_active PK + boolean governance_representative + uri website + string description + AllocationAgency also_allocation_agency + StandardsOrganizationList member_of + ConsortiumGovernanceRoleEnum governance_role +} +CustodianArchive { + uriorcurie id PK + string archive_name PK + string archive_description + string accession_number PK + date accession_date PK + date accumulation_date_start + date accumulation_date_end + string creating_agency + ArchiveProcessingStatusEnum processing_status PK + string processing_priority + string estimated_extent + StorageList storage_location + CollectionManagementSystemList tracked_in_cms + string assigned_processor + date processing_started_date + date processing_completed_date + date transfer_to_collection_date + uriorcurie successor_collection + string access_restrictions + string appraisal_notes + string arrangement_notes + OrganizationalStructure managing_unit + Custodian refers_to_custodian PK + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + date valid_from + date valid_to +} +ArticlesOfAssociation { + uriorcurie id PK + string document_title PK + string document_description + string document_type PK + date execution_date PK + date effective_date + string notary_name + string notary_office + string notarial_deed_number + integer version_number + boolean is_current_version PK + ArticlesOfAssociation supersedes + ArticlesOfAssociation superseded_by + string purpose_clause + string registered_office_clause + string governance_clauses + stringList amendment_history + string language + uri document_url + string document_format + RecordsLifecycleStageEnum current_archival_stage PK + CustodianArchive archived_in + CustodianCollection collected_in + boolean requires_articles_at_registration + CustodianLegalStatus refers_to_legal_status PK + Custodian refers_to_custodian PK + LegalForm legal_form + Jurisdiction jurisdiction + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + date valid_from + date valid_to +} +SocialMediaProfile { + uriorcurie social_media_profile_id PK + SocialMediaPlatformTypeEnum platform_type PK + string platform_name + string account_name PK + string account_id + uri profile_url PK + string profile_description + boolean is_primary_digital_presence + PrimaryDigitalPresenceAssertionList primary_presence_assertions + boolean verified + integer follower_count + integer following_count + integer post_count + float engagement_rate + datetime metrics_observed_date + uri profile_image_url + uri cover_image_url + DigitalPlatform associated_digital_platform + AuxiliaryDigitalPlatform associated_auxiliary_platform + date created_date + date valid_from + date valid_to + TimeSpan temporal_extent + string account_status + string language + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +InternetOfThings { + uriorcurie device_id PK + string device_name PK + DigitalPresenceTypeEnum device_type PK + string device_model + string device_manufacturer + integer device_count + string coverage_area + string purpose PK + string technical_specifications + stringList connectivity_type + string power_source + uri publishes_to + uri api_endpoint + string data_format + string update_frequency + CustodianPlace installed_at_place + date installation_date + date decommission_date + TimeSpan temporal_extent + string operational_status + string maintenance_schedule + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +FundingRequirement { + uriorcurie requirement_id PK + FundingRequirementTypeEnum requirement_type PK + string requirement_text PK + string requirement_value + string requirement_unit + boolean is_mandatory + uriorcurie applies_to_call + uriorcurie observed_in PK + string source_section + date valid_from + date valid_to + uriorcurie supersedes + float extraction_confidence + string extraction_notes +} +CallForApplication { + uriorcurie call_id PK + string call_title PK + string call_short_name + string call_description + CallForApplicationStatusEnum call_status PK + uri call_url + date application_opening_date + date application_deadline PK + date results_expected_date + string total_budget + string typical_grant_range + stringList eligible_applicants + stringList eligible_countries + stringList thematic_areas + stringList heritage_types + string funding_rate + boolean co_funding_required + boolean partnership_required + integer minimum_partners + uriorcurie issuing_organisation PK + string parent_programme + integer programme_year + uriorcurieList call_identifiers + uriorcurieList related_calls + string contact_email + stringList info_session_dates + stringList keywords + uriorcurieList web_observations + FundingRequirementList requirements +} +WebObservation { + uriorcurie observation_id + uri source_url + datetime retrieved_on + string retrieved_by + string retrieval_method + string content_hash + integer http_status_code + string content_type + string page_title + datetime last_modified + string etag + float extraction_confidence + string extraction_notes + uriorcurieList observed_entities + uriorcurie previous_observation + boolean content_changed + uri archived_at + WebClaimList claims +} +FundingAgenda { + uriorcurie agenda_id PK + string agenda_title PK + string agenda_short_name + string agenda_description + uri agenda_url + uri agenda_document_url + uriorcurie governing_body + uriorcurieList implementing_organisations + TimeSpan validity_period + ThematicRouteList thematic_routes + stringList strategic_objectives + string heritage_relevance + string total_investment + stringList geographic_scope + uriorcurieList related_agendas + stringList keywords + string language +} +ThematicRoute { + uriorcurie route_id PK + string route_title PK + string route_description + stringList route_keywords + string route_relevance_to_heritage +} +WebPortal { + uriorcurie portal_id PK + string portal_name PK + WebPortalTypeEnum portal_type PK + uri portal_url PK + string portal_description + stringList geographic_scope + stringList thematic_scope + uriorcurieList portal_data_sources + uriorcurieList exposes_collections + uriorcurie operated_by PK + uriorcurieList aggregates_from + uriorcurieList aggregated_by + stringList metadata_standards + uri api_endpoint + uri sparql_endpoint + uri oai_pmh_endpoint + stringList portal_language + date launch_date + string portal_status + uriorcurie successor_portal + integer record_count + integer participating_institutions + uriorcurieList identifiers + TimeSpan temporal_extent + CustodianObservationList was_derived_from PK + ReconstructionActivity was_generated_by +} +PrimaryDigitalPresenceAssertion { + uriorcurie assertion_id PK + uriorcurie about_digital_presence PK + DigitalPresenceTypeEnum digital_presence_type + boolean assertion_value PK + string assertion_rationale + TimeSpan temporal_extent + WebObservationList based_on_observations + datetime assertion_date + string asserted_by + float confidence_score + uriorcurie superseded_by + uriorcurie supersedes +} +GiftShop { + uriorcurie shop_id PK + string shop_name PK + GiftShopTypeEnum shop_type PK + string shop_description + AuxiliaryPlaceList physical_location + AuxiliaryDigitalPlatformList online_shop + ProductCategoryEnumList product_categories PK + string price_currency PK + string price_range + stringList accepts_payment_methods + string opening_hours + string annual_revenue + float visitor_conversion_rate + integer staff_count + float square_meters + string managed_by + stringList supplier_relationships + date valid_from + date valid_to + TimeSpan temporal_extent + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + Custodian refers_to_custodian PK +} +Storage { + uriorcurie storage_id PK + string storage_name PK + StorageTypeEnum storage_type PK + string storage_description + AuxiliaryPlace storage_location + string capacity_description + float capacity_linear_meters + float capacity_cubic_meters + integer capacity_items + float current_utilization_percent + CustodianCollectionList stores_collections + StorageStandardEnumList standards_applied + StorageConditionPolicy condition_policy + StorageConditionList storage_conditions + string managed_by + date valid_from + date valid_to + TimeSpan temporal_extent + Custodian refers_to_custodian PK +} +StorageCondition { + uriorcurie condition_id PK + Storage refers_to_storage PK + date observation_date PK + TimeSpan observation_period + StorageObserverTypeEnum observer_type PK + string observer_name + string observer_affiliation + boolean is_official_assessment PK + StorageConditionStatusEnum overall_status PK + StorageConditionCategoryAssessmentList category_assessments + string observation_notes + uriorcurieList evidence_documentation + string measurement_data + string compliance_status + boolean remediation_required + string remediation_notes + date follow_up_date + float confidence_score + StorageCondition supersedes +} +StorageConditionCategoryAssessment { + string assessment_category PK + StorageConditionStatusEnum category_status PK + string category_measurement + string category_notes +} +StorageConditionPolicy { + uriorcurie policy_id PK + string policy_name PK + string policy_description + float temperature_target + float temperature_min + float temperature_max + float temperature_tolerance + float humidity_target + float humidity_min + float humidity_max + float humidity_tolerance + float light_max_lux + boolean uv_filtered_required + float air_changes_per_hour + float particulate_max + boolean pest_management_required + string fire_suppression_type + boolean flood_protection_required + string security_level + string access_restrictions + StorageStandardEnumList standards_compliance + date policy_effective_from PK + date policy_effective_to + string policy_approved_by + date policy_review_date + string notes +} +CustodianAdministration { + uriorcurie id PK + string administration_name PK + string administration_description + stringList record_type + OrganizationalStructure managing_unit + string creating_function + date active_since + string estimated_volume + string growth_rate + DigitalPlatform primary_system + DigitalPlatformList secondary_systems + string retention_schedule + integer retention_period_years + date expected_transfer_date + string data_sensitivity + boolean gdpr_relevant + string business_criticality + string backup_status + string access_control + Custodian refers_to_custodian PK + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + date valid_from + date valid_to +} +Budget { + uriorcurie id PK + string budget_name PK + string budget_description + stringList budget_type PK + date fiscal_year_start PK + date fiscal_year_end PK + decimal total_budget_amount + string budget_currency PK + decimal operating_budget + decimal capital_budget + decimal acquisition_budget + decimal personnel_budget + decimal preservation_budget + decimal digitization_budget + decimal external_funding + decimal internal_funding + decimal endowment_draw + date approval_date + string approved_by + string budget_status PK + integer revision_number + date revision_date + OrganizationalStructure managing_unit + uriorcurieList documented_by + Custodian refers_to_custodian PK + CustodianObservationList was_derived_from + ReconstructionActivity was_generated_by + date valid_from + date valid_to +} +WebClaim { + uriorcurie claim_id + ClaimTypeEnum claim_type PK + string claim_value PK + uri source_url PK + datetime retrieved_on PK + string xpath PK + string html_file PK + float xpath_match_score PK + string xpath_matched_text + datetime extraction_timestamp + string extraction_method + string claim_notes +} + + %% Enumerations with Instance Data +ReconstructionActivityTypeEnum { + string enum_type PK + string MANUAL_CURATION + string ALGORITHMIC_MATCHING + string HYBRID + string EXPERT_REVIEW +} +AgentTypeEnum { + string enum_type PK + string PERSON + string GROUP + string ORGANIZATION + string FORMAL_ORGANIZATION + string PUBLIC_ORGANIZATION + string ORGANIZATIONAL_UNIT + string ORGANIZATIONAL_COLLABORATION + string SOFTWARE +} +AppellationTypeEnum { + string enum_type PK + string OFFICIAL + string VERNACULAR + string HISTORICAL + string TRANSLATION + string ABBREVIATION + string ALTERNATIVE +} +SourceDocumentTypeEnum { + string enum_type PK + string ARCHIVAL_DOCUMENT + string WEBSITE + string LETTERHEAD + string STATUTE + string PUBLICATION + string DATABASE + string SIGNAGE +} +CustodianPrimaryTypeEnum { + string enum_type PK + string GALLERY_G + string LIBRARY_L + string ARCHIVE_A + string MUSEUM_M + string OFFICIAL_INSTITUTION_O + string RESEARCH_CENTER_R + string COMMERCIAL_C + string UNSPECIFIED_U + string BIO_CUSTODIAN_B + string EDUCATION_PROVIDER_E + string HERITAGE_SOCIETY_S + string FEATURE_CUSTODIAN_F + string INTANGIBLE_HERITAGE_GROUP_I + string MIXED_X + string PERSONAL_COLLECTION_P + string _and_4_more +} +EncompassingBodyTypeEnum { + string enum_type PK + string UMBRELLA + string NETWORK + string CONSORTIUM + string COOPERATIVE + string SOCIAL_MOVEMENT + string FUNDING_BODY +} +EntityTypeEnum { + string enum_type PK + string INDIVIDUAL + string GROUP + string ORGANIZATION + string GOVERNMENT + string CORPORATION +} +LegalStatusEnum { + string enum_type PK + string ACTIVE + string DISSOLVED + string MERGED + string SUSPENDED + string BANKRUPTCY + string LIQUIDATION + string UNKNOWN +} +OrganizationalUnitTypeEnum { + string enum_type PK + string DEPARTMENT + string TEAM + string DIVISION + string GROUP + string PROGRAM + string SERVICE + string LAB + string OFFICE + string UNIT +} +OrganizationalChangeEventTypeEnum { + string enum_type PK + string FOUNDING + string DISSOLUTION + string MERGER + string SPLIT + string SPIN_OFF + string EXPANSION + string REORGANIZATION + string RENAMING + string TRANSFER + string REDUCTION + string RELOCATION +} +OrganizationalChangeEventCategoryEnum { + string enum_type PK + string EXISTENTIAL + string STATE +} +PlaceSpecificityEnum { + string enum_type PK + string BUILDING + string STREET + string NEIGHBORHOOD + string CITY + string REGION + string VAGUE +} +AuxiliaryPlaceTypeEnum { + string enum_type PK + string OFFSITE_STORAGE + string CLIMATE_CONTROLLED_VAULT + string OPEN_STORAGE + string QUARANTINE_AREA + string ADMINISTRATIVE_OFFICE + string STAFF_FACILITY + string LOADING_DOCK + string BRANCH_LOCATION + string HISTORIC_SITE + string EDUCATION_CENTER + string EVENT_VENUE + string CONSERVATION_LAB + string DIGITIZATION_CENTER + string RESEARCH_FACILITY + string PARTNER_FACILITY + string _and_2_more +} +OrganizationBranchTypeEnum { + string enum_type PK + string MAIN_LOCATION + string BRANCH_LIBRARY + string SATELLITE_MUSEUM + string READING_ROOM + string CONSERVATION_CENTER + string STORAGE_FACILITY + string DIGITIZATION_CENTER + string EDUCATION_CENTER + string ADMINISTRATIVE_OFFICE + string REGIONAL_OFFICE + string GOVERNANCE_OFFICE + string CONSORTIUM_MEMBER + string SHARED_FACILITY + string FRANCHISE_LOCATION + string POP_UP_LOCATION + string _and_1_more +} +AuxiliaryDigitalPlatformTypeEnum { + string enum_type PK + string PROJECT_WEBSITE + string EXHIBITION_MICROSITE + string API_ENDPOINT + string MOBILE_APP + string COLLECTION_BROWSER + string CROWDSOURCING_PLATFORM + string EDUCATIONAL_PORTAL + string DATA_PORTAL + string LEGACY_PLATFORM + string VIRTUAL_TOUR + string BLOG_NEWS + string SOCIAL_MEDIA + string PODCAST_CHANNEL + string BOOKING_SYSTEM + string WEBSHOP + string _and_1_more +} +FeatureTypeEnum { + string enum_type PK + string MANSION + string VACATION_PROPERTY + string BUITENPLAATS + string URBAN_SETTLEMENT + string TOWN + string PARISH_CHURCH + string SEWERAGE_PUMPING_STATION + string ARTIFICIAL_OBJECT + string PHYSICAL_OBJECT + string ARTIFICIAL_PHYSICAL_OBJECT + string PHYSICAL_STRUCTURE + string ARTIFICIAL_PHYSICAL_STRUCTURE + string INFRASTRUCTURE + string TRANSPORT_INFRASTRUCTURE + string CIVIL_ENGINEERING_CONSTRUCTION + string _and_279_more +} +StaffRoleTypeEnum { + string enum_type PK + string CURATOR + string COLLECTIONS_MANAGER + string CONSERVATOR + string ARCHIVIST + string RECORDS_MANAGER + string LIBRARIAN + string DIGITAL_PRESERVATION_SPECIALIST + string DIGITIZATION_SPECIALIST + string DATA_MANAGER + string EDUCATOR + string PUBLIC_ENGAGEMENT_SPECIALIST + string DIRECTOR + string DEPUTY_DIRECTOR + string DEPARTMENT_HEAD + string RESEARCHER + string _and_3_more +} +CallForApplicationStatusEnum { + string enum_type PK + string ANNOUNCED + string OPEN + string CLOSING_SOON + string CLOSED + string UNDER_REVIEW + string RESULTS_PUBLISHED + string CANCELLED + string REOPENED +} +FundingRequirementTypeEnum { + string enum_type PK + string LEGAL_STATUS + string GEOGRAPHIC_SCOPE + string INSTITUTION_TYPE + string COLLECTION_SIGNIFICANCE + string TRACK_RECORD + string MATCHING_FUNDS + string IN_KIND_CONTRIBUTION + string MULTIPLE_FUNDERS + string EARNED_INCOME_RATIO + string BOARD_COMPOSITION + string CONFLICT_OF_INTEREST + string FAIR_PAY + string DIVERSITY_INCLUSION + string STRATEGIC_PLAN + string ANNUAL_REPORT + string _and_16_more +} +WebPortalTypeEnum { + string enum_type PK + string NATIONAL_AGGREGATOR + string REGIONAL_AGGREGATOR + string ARCHIVAL_PORTAL + string LIBRARY_UNION_CATALOG + string MUSEUM_COLLECTION_PORTAL + string CROSS_DOMAIN_AGGREGATOR + string COLONIAL_HERITAGE_PORTAL + string MONASTIC_HERITAGE_PORTAL + string GENEALOGICAL_PORTAL + string NEWSPAPER_DIGITIZATION_PORTAL + string ARCHAEOLOGICAL_PORTAL + string DIGITAL_LIBRARY_PORTAL + string LINKED_DATA_HUB + string IIIF_AGGREGATOR + string OAI_PMH_HARVESTER + string _and_4_more +} +SocialMediaPlatformTypeEnum { + string enum_type PK + string INSTAGRAM + string PINTEREST + string FLICKR + string YOUTUBE + string TIKTOK + string VIMEO + string TWITCH + string FACEBOOK + string X_TWITTER + string LINKEDIN + string THREADS + string BLUESKY + string MASTODON + string WHATSAPP + string TELEGRAM + string _and_10_more +} +DigitalPresenceTypeEnum { + string enum_type PK + string WEBSITE + string WEB_APPLICATION + string DISCOVERY_PORTAL + string DIGITAL_REPOSITORY + string PROJECT_WEBSITE + string EXHIBITION_MICROSITE + string SOCIAL_MEDIA + string MESSAGING_SERVICE + string API_SERVICE + string MOBILE_APP + string IOT_BEACON + string IOT_KIOSK + string IOT_SENSOR + string LEARNING_PLATFORM + string VIRTUAL_TOUR + string _and_2_more +} +GeometryTypeEnum { + string enum_type PK + string POINT + string LINESTRING + string POLYGON + string MULTIPOINT + string MULTILINESTRING + string MULTIPOLYGON + string GEOMETRYCOLLECTION +} +RegistrationAuthorityGovernanceEnum { + string enum_type PK + string GOVERNMENT + string INTERGOVERNMENTAL + string NONPROFIT + string CONSORTIUM + string COMMERCIAL +} +DataLicenseTypeEnum { + string enum_type PK + string CREATIVE_COMMONS + string OPEN_DATA_COMMONS + string PUBLIC_DOMAIN + string OPEN_SOURCE + string GOVERNMENT_OPEN + string PROPRIETARY + string TERMS_OF_SERVICE +} +DataOpennessLevelEnum { + string enum_type PK + string FULLY_OPEN + string OPEN_WITH_ATTRIBUTION + string OPEN_SHAREALIKE + string RESTRICTED_NONCOMMERCIAL + string RESTRICTED_NO_DERIVATIVES + string CLOSED_SUBSCRIPTION + string CLOSED_PROPRIETARY +} +OpennessStanceEnum { + string enum_type PK + string STRONG_OPEN_ADVOCATE + string OPEN_BY_DEFAULT + string MIXED_POLICY + string CLOSED_BY_DEFAULT + string FULLY_PROPRIETARY +} +JurisdictionTypeEnum { + string enum_type PK + string NATIONAL + string SUBNATIONAL + string MUNICIPAL + string SUPRANATIONAL +} +LegalSystemTypeEnum { + string enum_type PK + string CIVIL_LAW + string COMMON_LAW + string MIXED + string RELIGIOUS + string CUSTOMARY +} +RegisterTypeEnum { + string enum_type PK + string COMMERCIAL + string FOUNDATION + string ASSOCIATION + string CHARITY + string CULTURAL + string MIXED +} +StandardsOrganizationTypeEnum { + string enum_type PK + string INTERGOVERNMENTAL + string NATIONAL + string INDUSTRY_CONSORTIUM + string LIBRARY_COOPERATIVE + string PROFESSIONAL_ASSOCIATION + string NATIONAL_MUSEUM_ASSOCIATION + string CERTIFICATION_BODY + string GOVERNANCE_COUNCIL +} +StandardTypeEnum { + string enum_type PK + string ISO_STANDARD + string CONSORTIUM_SERVICE + string PROPRIETARY_SYSTEM + string NATIONAL_STANDARD + string COMMUNITY_STANDARD + string QUALITY_STANDARD + string COMMERCIAL_SERVICE + string GOVERNMENT_REGISTRY + string INTERNATIONAL_TREATY + string CROWDSOURCED + string _and_6_more +} +GovernanceModelEnum { + string enum_type PK + string ISO_TC + string COUNCIL + string SINGLE_AUTHORITY + string COMMUNITY_CONSENSUS + string MEMBERSHIP_BOARD + string PROPRIETARY + string GOVERNMENT + string ACADEMIC + string INTERGOVERNMENTAL + string COMMUNITY + string _and_1_more +} +StandardScopeTypeEnum { + string enum_type PK + string GLOBAL + string NATIONAL + string REGIONAL + string DOMAIN_SPECIFIC + string INSTITUTIONAL +} +IdentifierDomainEnum { + string enum_type PK + string ORGANIZATION + string HERITAGE_INSTITUTION + string PERSON + string WORK + string NAME_AUTHORITY + string RESEARCH_ORG + string LEGAL_ENTITY + string COLLECTION + string PLACE + string BUILDING + string _and_8_more +} +AllocationDomainEnum { + string enum_type PK + string LIBRARY_PUBLIC + string LIBRARY_ACADEMIC + string LIBRARY_RESEARCH + string LIBRARY_NATIONAL + string ARCHIVE + string MUSEUM + string GALLERY + string HERITAGE_SOCIETY + string RESEARCH_ORGANIZATION + string EDUCATION_PROVIDER + string _and_3_more +} +AuthorityRecordFormatEnum { + string enum_type PK + string MARC21_AUTHORITY + string UNIMARC_AUTHORITY + string RDF + string PROPRIETARY +} +AuthorityEntityTypeEnum { + string enum_type PK + string PERSON + string CORPORATE_BODY + string GEOGRAPHIC + string WORK + string SUBJECT + string EVENT + string FAMILY +} +ConsortiumGovernanceRoleEnum { + string enum_type PK + string VOTING_MEMBER + string OBSERVER + string FOUNDING_MEMBER + string ASSOCIATE + string REGIONAL_REPRESENTATIVE +} +RecordsLifecycleStageEnum { + string enum_type PK + string PRE_EXISTENCE + string ACTIVE + string INACTIVE + string HERITAGE +} +IdentifierStandardEnum { + string enum_type PK + string ISIL + string ISNI + string VIAF + string GND + string LCNAF + string BNF + string NTA + string NDL + string NLA + string BNE + string _and_121_more +} +ProjectStatusEnum { + string enum_type PK + string PROPOSED + string APPROVED + string IN_PROGRESS + string ON_HOLD + string COMPLETED + string DISCONTINUED + string EXTENDED +} +GiftShopTypeEnum { + string enum_type PK + string MAIN_SHOP + string EXHIBITION_SHOP + string SATELLITE_SHOP + string KIOSK + string MEMBERS_SHOP + string ONLINE_STORE + string MARKETPLACE_PRESENCE + string DIGITAL_DOWNLOADS + string BOOKSHOP + string DESIGN_STORE + string CRAFT_SHOP + string CHILDREN_SHOP +} +ProductCategoryEnum { + string enum_type PK + string REPRODUCTIONS + string BOOKS + string DESIGN_OBJECTS + string JEWELRY + string TEXTILES + string STATIONERY + string HOME_DECOR + string TOYS + string FOOD + string SOUVENIRS + string _and_19_more +} +StorageConditionStatusEnum { + string enum_type PK + string OPTIMAL + string STABLE + string CONTROLLED + string ACCEPTABLE + string MINOR_FLUCTUATION + string HUMIDITY_HIGH + string HUMIDITY_LOW + string CONCERNING + string MAJOR_FLUCTUATION + string PEST_DETECTED + string LIGHT_EXPOSURE + string CRITICAL + string WATER_DAMAGE + string FIRE_DAMAGE + string MOLD_OUTBREAK + string _and_10_more +} +StorageConditionCategoryEnum { + string enum_type PK + string TEMPERATURE + string HUMIDITY + string LIGHT + string AIR_QUALITY + string PEST_CONTROL + string FIRE_SAFETY + string FLOOD_WATER + string SECURITY + string STRUCTURAL + string SPACE_CAPACITY +} +StorageObserverTypeEnum { + string enum_type PK + string INTERNAL_STAFF + string EXTERNAL_CONSULTANT + string GOVERNMENT_INSPECTOR + string ACCREDITATION_ASSESSOR + string INSURANCE_ASSESSOR + string JOURNALIST + string RESEARCHER + string VISITING_PROFESSIONAL + string PUBLIC_VISITOR + string WHISTLEBLOWER + string _and_2_more +} +StorageStandardEnum { + string enum_type PK + string ISO_82306 + string ISO_TR_19815_2018 + string EN_16893_2018 + string EN_15757_2010 + string ISO_9706_2025 + string ISO_11108 + string ISO_20494 + string PAS_198_2012 + string BS_5454_2000 + string EN_16141_2012 + string _and_5_more +} +StorageTypeEnum { + string enum_type PK + string ARCHIVE_DEPOT + string ART_STORAGE + string GENERAL_DEPOT + string COLD_STORAGE + string HIGH_SECURITY_VAULT + string OPEN_STORAGE + string OFFSITE_STORAGE + string COMPACT_STORAGE + string HAZMAT_STORAGE + string DIGITAL_STORAGE + string TEXTILE_STORAGE + string PHOTOGRAPH_STORAGE + string ARCHAEOLOGICAL_STORAGE + string NATURAL_HISTORY_STORAGE + string TEMPORARY_STORAGE +} +ArchiveProcessingStatusEnum { + string enum_type PK + string UNPROCESSED + string IN_APPRAISAL + string IN_ARRANGEMENT + string IN_DESCRIPTION + string IN_PRESERVATION + string PROCESSED_PENDING_TRANSFER + string TRANSFERRED_TO_COLLECTION + string PARTIALLY_PROCESSED + string ON_HOLD + string DEACCESSIONED +} +ClaimTypeEnum { + string enum_type PK + string full_name + string short_name + string description + string email + string phone + string address + string website + string social_media + string facebook + string twitter + string _and_14_more +} + +CustodianAppellation ||--|o AppellationTypeEnum : "appellation_type" +CustodianAppellation ||--|o CustodianName : "variant_of_name" +CustodianName ||--|| ReconstructedEntity : "inherits" +CustodianName ||--}o CustodianAppellation : "alternative_names" +CustodianName ||--|o TimeSpan : "name_validity_period" +CustodianName ||--|o CustodianName : "supersedes" +CustodianName ||--|o CustodianName : "superseded_by" +CustodianName ||--}| CustodianObservation : "was_derived_from" +CustodianName ||--|o ReconstructionActivity : "was_generated_by" +CustodianName ||--|| Custodian : "refers_to_custodian" +ReconstructionAgent ||--|o AgentTypeEnum : "agent_type" +CustodianObservation ||--|| CustodianAppellation : "observed_name" +CustodianObservation ||--}o CustodianAppellation : "alternative_observed_names" +CustodianObservation ||--|| SourceDocument : "source" +CustodianObservation ||--|o LanguageCode : "language" +CustodianObservation ||--|o CustodianLegalStatus : "derived_from_entity" +CustodianObservation ||--|o ConfidenceMeasure : "confidence_score" +CustodianLegalStatus ||--|| ReconstructedEntity : "inherits" +CustodianLegalStatus ||--|| Custodian : "refers_to_custodian" +CustodianLegalStatus ||--|| LegalEntityType : "legal_entity_type" +CustodianLegalStatus ||--|| LegalName : "legal_name" +CustodianLegalStatus ||--|o LegalForm : "legal_form" +CustodianLegalStatus ||--}o RegistrationNumber : "registration_numbers" +CustodianLegalStatus ||--|o RegistrationAuthority : "registration_authority" +CustodianLegalStatus ||--|o TradeRegister : "primary_register" +CustodianLegalStatus ||--|o Jurisdiction : "legal_jurisdiction" +CustodianLegalStatus ||--|o TimeSpan : "temporal_extent" +CustodianLegalStatus ||--|o CustodianLegalStatus : "parent_custodian" +CustodianLegalStatus ||--|| LegalStatus : "legal_status" +CustodianLegalStatus ||--|o GovernanceStructure : "governance_structure" +CustodianLegalStatus ||--}o ArticlesOfAssociation : "has_articles_of_association" +CustodianLegalStatus ||--}| CustodianObservation : "was_derived_from" +CustodianLegalStatus ||--|| ReconstructionActivity : "was_generated_by" +CustodianLegalStatus ||--|o CustodianLegalStatus : "was_revision_of" +CustodianLegalStatus ||--}o CustodianIdentifier : "identifiers" +CustodianLegalStatus ||--}o LegalResponsibilityCollection : "collections_under_responsibility" +ReconstructedEntity ||--|o ReconstructionActivity : "was_generated_by" +Custodian ||--|o CustodianType : "custodian_type" +Custodian ||--}o CustodianArchive : "has_operational_archive" +Custodian ||--}o CustodianAdministration : "has_administration" +Custodian ||--}o Budget : "has_budget" +Custodian ||--}o SocialMediaProfile : "social_media_profiles" +Custodian ||--|o DataLicensePolicy : "data_license_policy" +Custodian ||--}o Project : "participated_in_projects" +Custodian ||--}o GiftShop : "gift_shop" +Custodian ||--}o Storage : "storage_facilities" +CustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" +CustodianType ||--|o CustodianType : "broader_type" +CustodianType ||--}o CustodianType : "narrower_types" +CustodianType ||--}o CustodianType : "related_types" +ArchiveOrganizationType ||--|| CustodianType : "inherits" +ArchiveOrganizationType ||--|o CustodianPrimaryTypeEnum : "primary_type" +ArchiveOrganizationType ||--|o ArchiveOrganizationType : "broader_type" +ArchiveOrganizationType ||--}o CustodianType : "narrower_types" +ArchiveOrganizationType ||--}o CustodianType : "related_types" +MuseumType ||--|| CustodianType : "inherits" +MuseumType ||--|o CustodianPrimaryTypeEnum : "primary_type" +MuseumType ||--|o MuseumType : "broader_type" +MuseumType ||--}o CustodianType : "narrower_types" +MuseumType ||--}o CustodianType : "related_types" +LibraryType ||--|| CustodianType : "inherits" +LibraryType ||--|o CustodianPrimaryTypeEnum : "primary_type" +LibraryType ||--|o LibraryType : "broader_type" +LibraryType ||--}o CustodianType : "narrower_types" +LibraryType ||--}o CustodianType : "related_types" +GalleryType ||--|| CustodianType : "inherits" +GalleryType ||--|o CustodianPrimaryTypeEnum : "primary_type" +GalleryType ||--|o GalleryType : "broader_type" +GalleryType ||--}o CustodianType : "narrower_types" +GalleryType ||--}o CustodianType : "related_types" +ResearchOrganizationType ||--|| CustodianType : "inherits" +ResearchOrganizationType ||--|o CustodianPrimaryTypeEnum : "primary_type" +ResearchOrganizationType ||--|o ResearchOrganizationType : "broader_type" +ResearchOrganizationType ||--}o CustodianType : "narrower_types" +ResearchOrganizationType ||--}o CustodianType : "related_types" +OfficialInstitutionType ||--|| CustodianType : "inherits" +OfficialInstitutionType ||--|o CustodianPrimaryTypeEnum : "primary_type" +OfficialInstitutionType ||--|o CustodianType : "broader_type" +OfficialInstitutionType ||--}o CustodianType : "narrower_types" +OfficialInstitutionType ||--}o CustodianType : "related_types" +BioCustodianType ||--|| CustodianType : "inherits" +BioCustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" +BioCustodianType ||--|o CustodianType : "broader_type" +BioCustodianType ||--}o CustodianType : "narrower_types" +BioCustodianType ||--}o CustodianType : "related_types" +EducationProviderType ||--|| CustodianType : "inherits" +EducationProviderType ||--|o CustodianPrimaryTypeEnum : "primary_type" +EducationProviderType ||--|o CustodianType : "broader_type" +EducationProviderType ||--}o CustodianType : "narrower_types" +EducationProviderType ||--}o CustodianType : "related_types" +HeritageSocietyType ||--|| CustodianType : "inherits" +HeritageSocietyType ||--|o CustodianPrimaryTypeEnum : "primary_type" +HeritageSocietyType ||--|o CustodianType : "broader_type" +HeritageSocietyType ||--}o CustodianType : "narrower_types" +HeritageSocietyType ||--}o CustodianType : "related_types" +FeatureCustodianType ||--|| CustodianType : "inherits" +FeatureCustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" +FeatureCustodianType ||--|o CustodianType : "broader_type" +FeatureCustodianType ||--}o CustodianType : "narrower_types" +FeatureCustodianType ||--}o CustodianType : "related_types" +IntangibleHeritageGroupType ||--|| CustodianType : "inherits" +IntangibleHeritageGroupType ||--|o CustodianPrimaryTypeEnum : "primary_type" +IntangibleHeritageGroupType ||--|o CustodianType : "broader_type" +IntangibleHeritageGroupType ||--}o CustodianType : "narrower_types" +IntangibleHeritageGroupType ||--}o CustodianType : "related_types" +PersonalCollectionType ||--|| CustodianType : "inherits" +PersonalCollectionType ||--|o CustodianPrimaryTypeEnum : "primary_type" +PersonalCollectionType ||--|o CustodianType : "broader_type" +PersonalCollectionType ||--}o CustodianType : "narrower_types" +PersonalCollectionType ||--}o CustodianType : "related_types" +HolySacredSiteType ||--|| CustodianType : "inherits" +HolySacredSiteType ||--|o CustodianPrimaryTypeEnum : "primary_type" +HolySacredSiteType ||--|o CustodianType : "broader_type" +HolySacredSiteType ||--}o CustodianType : "narrower_types" +HolySacredSiteType ||--}o CustodianType : "related_types" +DigitalPlatformType ||--|| CustodianType : "inherits" +DigitalPlatformType ||--|o CustodianPrimaryTypeEnum : "primary_type" +DigitalPlatformType ||--|o CustodianType : "broader_type" +DigitalPlatformType ||--}o CustodianType : "narrower_types" +DigitalPlatformType ||--}o CustodianType : "related_types" +NonProfitType ||--|| CustodianType : "inherits" +NonProfitType ||--|o CustodianPrimaryTypeEnum : "primary_type" +NonProfitType ||--|o CustodianType : "broader_type" +NonProfitType ||--}o CustodianType : "narrower_types" +NonProfitType ||--}o CustodianType : "related_types" +TasteScentHeritageType ||--|| CustodianType : "inherits" +TasteScentHeritageType ||--|o CustodianPrimaryTypeEnum : "primary_type" +TasteScentHeritageType ||--|o CustodianType : "broader_type" +TasteScentHeritageType ||--}o CustodianType : "narrower_types" +TasteScentHeritageType ||--}o CustodianType : "related_types" +CommercialOrganizationType ||--|| CustodianType : "inherits" +CommercialOrganizationType ||--|o CustodianPrimaryTypeEnum : "primary_type" +CommercialOrganizationType ||--|o CustodianType : "broader_type" +CommercialOrganizationType ||--}o CustodianType : "narrower_types" +CommercialOrganizationType ||--}o CustodianType : "related_types" +MixedCustodianType ||--|| CustodianType : "inherits" +MixedCustodianType ||--|o CustodianPrimaryTypeEnum : "primary_type" +MixedCustodianType ||--|o CustodianType : "broader_type" +MixedCustodianType ||--}o CustodianType : "narrower_types" +MixedCustodianType ||--}o CustodianType : "related_types" +UnspecifiedType ||--|| CustodianType : "inherits" +UnspecifiedType ||--|o CustodianPrimaryTypeEnum : "primary_type" +UnspecifiedType ||--|o CustodianType : "broader_type" +UnspecifiedType ||--}o CustodianType : "narrower_types" +UnspecifiedType ||--}o CustodianType : "related_types" +CustodianPlace ||--|| ReconstructedEntity : "inherits" +CustodianPlace ||--|o PlaceSpecificityEnum : "place_specificity" +CustodianPlace ||--|o Country : "country" +CustodianPlace ||--|o Subregion : "subregion" +CustodianPlace ||--|o Settlement : "settlement" +CustodianPlace ||--|o FeaturePlace : "has_feature_type" +CustodianPlace ||--}o GeoSpatialPlace : "has_geospatial_location" +CustodianPlace ||--}o AuxiliaryPlace : "auxiliary_places" +CustodianPlace ||--}| CustodianObservation : "was_derived_from" +CustodianPlace ||--|o ReconstructionActivity : "was_generated_by" +CustodianPlace ||--|| Custodian : "refers_to_custodian" +AuxiliaryPlace ||--|| ReconstructedEntity : "inherits" +AuxiliaryPlace ||--|o AuxiliaryPlaceTypeEnum : "auxiliary_place_type" +AuxiliaryPlace ||--|o Country : "country" +AuxiliaryPlace ||--|o Subregion : "subregion" +AuxiliaryPlace ||--|o Settlement : "settlement" +AuxiliaryPlace ||--}o GeoSpatialPlace : "has_geospatial_location" +AuxiliaryPlace ||--|o FeaturePlace : "has_feature_type" +AuxiliaryPlace ||--}o OrganizationBranch : "hosts_branch" +AuxiliaryPlace ||--|| CustodianPlace : "is_auxiliary_of_place" +AuxiliaryPlace ||--|o TimeSpan : "temporal_extent" +AuxiliaryPlace ||--}o CustodianObservation : "was_derived_from" +AuxiliaryPlace ||--|o ReconstructionActivity : "was_generated_by" +AuxiliaryPlace ||--|| Custodian : "refers_to_custodian" +ReconstructionActivity ||--|o ReconstructionActivityTypeEnum : "activity_type" +ReconstructionActivity ||--|o ReconstructionAgent : "responsible_agent" +ReconstructionActivity ||--|o TimeSpan : "temporal_extent" +ReconstructionActivity ||--}| CustodianObservation : "used" +ReconstructionActivity ||--|o ConfidenceMeasure : "confidence_score" +OrganizationalStructure ||--|o OrganizationalUnitTypeEnum : "unit_type" +OrganizationalStructure ||--|o OrganizationalStructure : "parent_unit" +OrganizationalStructure ||--}o PersonObservation : "staff_members" +OrganizationalStructure ||--}o CustodianCollection : "managed_collections" +OrganizationalStructure ||--}o AuxiliaryPlace : "located_at" +OrganizationalStructure ||--|| Custodian : "refers_to_custodian" +OrganizationBranch ||--|| ReconstructedEntity : "inherits" +OrganizationBranch ||--|o OrganizationBranchTypeEnum : "branch_type" +OrganizationBranch ||--}o AuxiliaryPlace : "located_at" +OrganizationBranch ||--}o OrganizationalStructure : "has_operational_unit" +OrganizationBranch ||--}o OrganizationBranch : "has_sub_branch" +OrganizationBranch ||--|o TimeSpan : "temporal_extent" +OrganizationBranch ||--}o CustodianObservation : "was_derived_from" +OrganizationBranch ||--|o ReconstructionActivity : "was_generated_by" +OrganizationBranch ||--|| Custodian : "refers_to_custodian" +AuxiliaryDigitalPlatform ||--|| ReconstructedEntity : "inherits" +AuxiliaryDigitalPlatform ||--|o AuxiliaryDigitalPlatformTypeEnum : "auxiliary_platform_type" +AuxiliaryDigitalPlatform ||--|| DigitalPlatform : "is_auxiliary_of_platform" +AuxiliaryDigitalPlatform ||--|o TimeSpan : "temporal_extent" +AuxiliaryDigitalPlatform ||--}o CollectionManagementSystem : "powered_by_cms" +AuxiliaryDigitalPlatform ||--}o CustodianObservation : "was_derived_from" +AuxiliaryDigitalPlatform ||--|o ReconstructionActivity : "was_generated_by" +AuxiliaryDigitalPlatform ||--|| Custodian : "refers_to_custodian" +CustodianCollection ||--|| ReconstructedEntity : "inherits" +CustodianCollection ||--|o TimeSpan : "temporal_coverage" +CustodianCollection ||--}o CollectionManagementSystem : "managed_by_cms" +CustodianCollection ||--|o OrganizationalStructure : "managing_unit" +CustodianCollection ||--|| Custodian : "refers_to_custodian" +CustodianCollection ||--}| CustodianObservation : "was_derived_from" +CustodianCollection ||--|o ReconstructionActivity : "was_generated_by" +LegalResponsibilityCollection ||--|| CustodianCollection : "inherits" +LegalResponsibilityCollection ||--|| CustodianLegalStatus : "responsible_legal_entity" +LegalResponsibilityCollection ||--|o TimeSpan : "temporal_coverage" +LegalResponsibilityCollection ||--}o CollectionManagementSystem : "managed_by_cms" +LegalResponsibilityCollection ||--|o OrganizationalStructure : "managing_unit" +LegalResponsibilityCollection ||--|| Custodian : "refers_to_custodian" +LegalResponsibilityCollection ||--}| CustodianObservation : "was_derived_from" +LegalResponsibilityCollection ||--|o ReconstructionActivity : "was_generated_by" +GeoSpatialPlace ||--|o GeometryTypeEnum : "geometry_type" +OrganizationalChangeEvent ||--|o OrganizationalChangeEventTypeEnum : "event_type" +OrganizationalChangeEvent ||--}o OrganizationalStructure : "affected_units" +OrganizationalChangeEvent ||--}o OrganizationalStructure : "resulting_units" +OrganizationalChangeEvent ||--|| Custodian : "parent_custodian" +OrganizationalChangeEvent ||--|o CustodianPlace : "event_location" +OrganizationalChangeEvent ||--|o CustodianPlace : "from_location" +OrganizationalChangeEvent ||--|o CustodianPlace : "to_location" +OrganizationalChangeEvent ||--}o GeoSpatialPlace : "affected_territory" +PersonObservation ||--|o StaffRoleTypeEnum : "staff_role" +PersonObservation ||--|o OrganizationalStructure : "unit_affiliation" +PersonObservation ||--|o SourceDocument : "observation_source" +PersonObservation ||--|o OrganizationalChangeEvent : "affected_by_event" +CustodianIdentifier ||--|o Custodian : "identifies_custodian" +CustodianIdentifier ||--|o Standard : "defined_by_standard" +CustodianIdentifier ||--|o AllocationAgency : "allocated_by" +CustodianIdentifier ||--|o IdentifierFormat : "identifier_format_used" +CustodianIdentifier ||--|o CustodianName : "also_identifies_name" +SourceDocument ||--|o SourceDocumentTypeEnum : "source_type" +LegalForm ||--|| Country : "country_code" +LegalForm ||--|| LegalEntityType : "legal_entity_type" +LegalForm ||--|o LegalForm : "parent_form" +LegalName ||--|o TimeSpan : "temporal_validity" +RegistrationNumber ||--|o TradeRegister : "trade_register" +RegistrationNumber ||--|| TimeSpan : "temporal_validity" +LegalStatus ||--|| TimeSpan : "temporal_validity" +LegalStatus ||--|o Jurisdiction : "jurisdiction" +RegistrationAuthority ||--|| Country : "country" +RegistrationAuthority ||--|o RegistrationAuthorityGovernanceEnum : "governance_type" +RegistrationAuthority ||--|o RegistrationAuthority : "predecessor" +RegistrationAuthority ||--}o Standard : "standards_maintained" +RegistrationAuthority ||--}o AllocationAgency : "allocation_agencies" +Subregion ||--|| Country : "country" +Settlement ||--|| Country : "country" +Settlement ||--|o Subregion : "subregion" +DataLicensePolicy ||--|| DataLicense : "default_license" +DataLicensePolicy ||--}o ServiceLicense : "service_specific_licenses" +DataLicensePolicy ||--|o OpennessStanceEnum : "openness_stance" +DataLicense ||--|o DataLicenseTypeEnum : "license_type" +DataLicense ||--|o DataOpennessLevelEnum : "openness_level" +ServiceLicense ||--|| DataLicense : "license" +Project ||--|o ProjectStatusEnum : "project_status" +Jurisdiction ||--|o JurisdictionTypeEnum : "jurisdiction_type" +Jurisdiction ||--|o Country : "country" +Jurisdiction ||--|o Subregion : "subregion" +Jurisdiction ||--|o Settlement : "settlement" +Jurisdiction ||--|o LegalSystemTypeEnum : "legal_system_type" +EncompassingBody ||--|o EncompassingBodyTypeEnum : "organization_type" +EncompassingBody ||--|o DataLicensePolicy : "data_license_policy" +EncompassingBody ||--}o Project : "projects" +EncompassingBody ||--|o Jurisdiction : "legal_jurisdiction" +UmbrellaOrganisation ||--|| EncompassingBody : "inherits" +UmbrellaOrganisation ||--|o EncompassingBodyTypeEnum : "organization_type" +UmbrellaOrganisation ||--|o DataLicensePolicy : "data_license_policy" +UmbrellaOrganisation ||--}o Project : "projects" +UmbrellaOrganisation ||--|| Jurisdiction : "legal_jurisdiction" +NetworkOrganisation ||--|| EncompassingBody : "inherits" +NetworkOrganisation ||--|o EncompassingBodyTypeEnum : "organization_type" +NetworkOrganisation ||--|o DataLicensePolicy : "data_license_policy" +NetworkOrganisation ||--}o Project : "projects" +NetworkOrganisation ||--|o Jurisdiction : "legal_jurisdiction" +Consortium ||--|| EncompassingBody : "inherits" +Consortium ||--|o EncompassingBodyTypeEnum : "organization_type" +Consortium ||--|o DataLicensePolicy : "data_license_policy" +Consortium ||--}o Project : "projects" +Consortium ||--|o Jurisdiction : "legal_jurisdiction" +Cooperative ||--|| EncompassingBody : "inherits" +Cooperative ||--|o EncompassingBodyTypeEnum : "organization_type" +Cooperative ||--|o DataLicensePolicy : "data_license_policy" +Cooperative ||--}o Project : "projects" +Cooperative ||--|o Jurisdiction : "legal_jurisdiction" +SocialMovement ||--|| EncompassingBody : "inherits" +SocialMovement ||--|o EncompassingBodyTypeEnum : "organization_type" +SocialMovement ||--|| DataLicensePolicy : "data_license_policy" +SocialMovement ||--}o Project : "projects" +SocialMovement ||--|o Jurisdiction : "legal_jurisdiction" +FundingOrganisation ||--|| EncompassingBody : "inherits" +FundingOrganisation ||--|o TimeSpan : "programme_period" +FundingOrganisation ||--|o EncompassingBodyTypeEnum : "organization_type" +FundingOrganisation ||--|o DataLicensePolicy : "data_license_policy" +FundingOrganisation ||--}o Project : "projects" +FundingOrganisation ||--|o Jurisdiction : "legal_jurisdiction" +FeaturePlace ||--|| ReconstructedEntity : "inherits" +FeaturePlace ||--|o FeatureTypeEnum : "feature_type" +FeaturePlace ||--|| CustodianPlace : "classifies_place" +FeaturePlace ||--}| CustodianObservation : "was_derived_from" +FeaturePlace ||--|o ReconstructionActivity : "was_generated_by" +DigitalPlatform ||--|| ReconstructedEntity : "inherits" +DigitalPlatform ||--}| DigitalPlatformType : "platform_type" +DigitalPlatform ||--}o CollectionManagementSystem : "powered_by_cms" +DigitalPlatform ||--}o AuxiliaryDigitalPlatform : "auxiliary_platforms" +DigitalPlatform ||--|o TimeSpan : "temporal_extent" +DigitalPlatform ||--}o CustodianObservation : "was_derived_from" +DigitalPlatform ||--|o ReconstructionActivity : "was_generated_by" +DigitalPlatform ||--|| Custodian : "refers_to_custodian" +CollectionManagementSystem ||--|| ReconstructedEntity : "inherits" +CollectionManagementSystem ||--}o DigitalPlatform : "powers_platform" +CollectionManagementSystem ||--}o CustodianCollection : "manages_collection" +CollectionManagementSystem ||--}o Custodian : "used_by_custodian" +CollectionManagementSystem ||--|o TimeSpan : "temporal_extent" +CollectionManagementSystem ||--}o CustodianObservation : "was_derived_from" +CollectionManagementSystem ||--|o ReconstructionActivity : "was_generated_by" +CollectionManagementSystem ||--|| Custodian : "refers_to_custodian" +TradeRegister ||--|o RegisterTypeEnum : "register_type" +TradeRegister ||--|| Jurisdiction : "jurisdiction" +TradeRegister ||--|| RegistrationAuthority : "maintained_by" +StandardsOrganization ||--|o StandardsOrganizationTypeEnum : "organization_type" +StandardsOrganization ||--}o Standard : "standards_maintained" +Standard ||--|| StandardsOrganization : "defined_by" +Standard ||--|o RegistrationAuthority : "registration_authority" +Standard ||--}o Country : "country_scope" +Standard ||--|o StandardScopeTypeEnum : "scope_type" +Standard ||--|o IdentifierDomainEnum : "identifier_domain" +Standard ||--}o IdentifierFormat : "formats" +Standard ||--|o IdentifierFormat : "canonical_format" +Standard ||--|o StandardTypeEnum : "standard_type" +Standard ||--|o GovernanceModelEnum : "governance_model" +Standard ||--}o ContributingAgency : "contributing_agencies" +Standard ||--|o StandardsOrganization : "governance_council" +AllocationAgency ||--}| Country : "country_scope" +AllocationAgency ||--}o Subregion : "subregion_scope" +AllocationAgency ||--}o AllocationDomainEnum : "allocation_domain" +AllocationAgency ||--}| Standard : "allocates_for" +AllocationAgency ||--|o RegistrationAuthority : "parent_registration_authority" +ContributingAgency ||--|| Country : "country" +ContributingAgency ||--|o AuthorityRecordFormatEnum : "record_format" +ContributingAgency ||--}o AuthorityEntityTypeEnum : "entity_types_covered" +ContributingAgency ||--}| Standard : "contributes_to" +ContributingAgency ||--|o AllocationAgency : "also_allocation_agency" +ContributingAgency ||--}o StandardsOrganization : "member_of" +ContributingAgency ||--|o ConsortiumGovernanceRoleEnum : "governance_role" +CustodianArchive ||--|| ReconstructedEntity : "inherits" +CustodianArchive ||--|o ArchiveProcessingStatusEnum : "processing_status" +CustodianArchive ||--}o Storage : "storage_location" +CustodianArchive ||--}o CollectionManagementSystem : "tracked_in_cms" +CustodianArchive ||--|o OrganizationalStructure : "managing_unit" +CustodianArchive ||--|| Custodian : "refers_to_custodian" +CustodianArchive ||--}o CustodianObservation : "was_derived_from" +CustodianArchive ||--|o ReconstructionActivity : "was_generated_by" +ArticlesOfAssociation ||--|| ReconstructedEntity : "inherits" +ArticlesOfAssociation ||--|o ArticlesOfAssociation : "supersedes" +ArticlesOfAssociation ||--|o ArticlesOfAssociation : "superseded_by" +ArticlesOfAssociation ||--|o RecordsLifecycleStageEnum : "current_archival_stage" +ArticlesOfAssociation ||--|o CustodianArchive : "archived_in" +ArticlesOfAssociation ||--|o CustodianCollection : "collected_in" +ArticlesOfAssociation ||--|| CustodianLegalStatus : "refers_to_legal_status" +ArticlesOfAssociation ||--|| Custodian : "refers_to_custodian" +ArticlesOfAssociation ||--|o LegalForm : "legal_form" +ArticlesOfAssociation ||--|o Jurisdiction : "jurisdiction" +ArticlesOfAssociation ||--}o CustodianObservation : "was_derived_from" +ArticlesOfAssociation ||--|o ReconstructionActivity : "was_generated_by" +SocialMediaProfile ||--|| ReconstructedEntity : "inherits" +SocialMediaProfile ||--|o SocialMediaPlatformTypeEnum : "platform_type" +SocialMediaProfile ||--}o PrimaryDigitalPresenceAssertion : "primary_presence_assertions" +SocialMediaProfile ||--|o DigitalPlatform : "associated_digital_platform" +SocialMediaProfile ||--|o AuxiliaryDigitalPlatform : "associated_auxiliary_platform" +SocialMediaProfile ||--|o TimeSpan : "temporal_extent" +SocialMediaProfile ||--}o CustodianObservation : "was_derived_from" +SocialMediaProfile ||--|o ReconstructionActivity : "was_generated_by" +SocialMediaProfile ||--|| Custodian : "refers_to_custodian" +InternetOfThings ||--|| ReconstructedEntity : "inherits" +InternetOfThings ||--|o DigitalPresenceTypeEnum : "device_type" +InternetOfThings ||--|o CustodianPlace : "installed_at_place" +InternetOfThings ||--|o TimeSpan : "temporal_extent" +InternetOfThings ||--}o CustodianObservation : "was_derived_from" +InternetOfThings ||--|o ReconstructionActivity : "was_generated_by" +InternetOfThings ||--|| Custodian : "refers_to_custodian" +FundingRequirement ||--|o FundingRequirementTypeEnum : "requirement_type" +CallForApplication ||--|o CallForApplicationStatusEnum : "call_status" +CallForApplication ||--}o FundingRequirement : "requirements" +WebObservation ||--}o WebClaim : "claims" +FundingAgenda ||--|o TimeSpan : "validity_period" +FundingAgenda ||--}o ThematicRoute : "thematic_routes" +WebPortal ||--|| ReconstructedEntity : "inherits" +WebPortal ||--|o WebPortalTypeEnum : "portal_type" +WebPortal ||--|o TimeSpan : "temporal_extent" +WebPortal ||--}| CustodianObservation : "was_derived_from" +WebPortal ||--|o ReconstructionActivity : "was_generated_by" +PrimaryDigitalPresenceAssertion ||--|o DigitalPresenceTypeEnum : "digital_presence_type" +PrimaryDigitalPresenceAssertion ||--|o TimeSpan : "temporal_extent" +PrimaryDigitalPresenceAssertion ||--}o WebObservation : "based_on_observations" +GiftShop ||--|| ReconstructedEntity : "inherits" +GiftShop ||--|o GiftShopTypeEnum : "shop_type" +GiftShop ||--}o AuxiliaryPlace : "physical_location" +GiftShop ||--}o AuxiliaryDigitalPlatform : "online_shop" +GiftShop ||--}o ProductCategoryEnum : "product_categories" +GiftShop ||--|o TimeSpan : "temporal_extent" +GiftShop ||--}o CustodianObservation : "was_derived_from" +GiftShop ||--|o ReconstructionActivity : "was_generated_by" +GiftShop ||--|| Custodian : "refers_to_custodian" +Storage ||--|o StorageTypeEnum : "storage_type" +Storage ||--|o AuxiliaryPlace : "storage_location" +Storage ||--}o CustodianCollection : "stores_collections" +Storage ||--}o StorageStandardEnum : "standards_applied" +Storage ||--|o StorageConditionPolicy : "condition_policy" +Storage ||--}o StorageCondition : "storage_conditions" +Storage ||--|o TimeSpan : "temporal_extent" +Storage ||--|| Custodian : "refers_to_custodian" +StorageCondition ||--|| Storage : "refers_to_storage" +StorageCondition ||--|o TimeSpan : "observation_period" +StorageCondition ||--|o StorageObserverTypeEnum : "observer_type" +StorageCondition ||--|o StorageConditionStatusEnum : "overall_status" +StorageCondition ||--}o StorageConditionCategoryAssessment : "category_assessments" +StorageCondition ||--|o StorageCondition : "supersedes" +StorageConditionCategoryAssessment ||--|o StorageConditionStatusEnum : "category_status" +StorageConditionPolicy ||--}o StorageStandardEnum : "standards_compliance" +CustodianAdministration ||--|| ReconstructedEntity : "inherits" +CustodianAdministration ||--|o OrganizationalStructure : "managing_unit" +CustodianAdministration ||--|o DigitalPlatform : "primary_system" +CustodianAdministration ||--}o DigitalPlatform : "secondary_systems" +CustodianAdministration ||--|| Custodian : "refers_to_custodian" +CustodianAdministration ||--}o CustodianObservation : "was_derived_from" +CustodianAdministration ||--|o ReconstructionActivity : "was_generated_by" +Budget ||--|| ReconstructedEntity : "inherits" +Budget ||--|o OrganizationalStructure : "managing_unit" +Budget ||--|| Custodian : "refers_to_custodian" +Budget ||--}o CustodianObservation : "was_derived_from" +Budget ||--|o ReconstructionActivity : "was_generated_by" +WebClaim ||--|o ClaimTypeEnum : "claim_type" + +``` diff --git a/scripts/enrich_youtube.py b/scripts/enrich_youtube.py new file mode 100755 index 0000000000..716f38b3be --- /dev/null +++ b/scripts/enrich_youtube.py @@ -0,0 +1,678 @@ +#!/usr/bin/env python3 +""" +YouTube Enrichment Script for Heritage Custodian Entries + +This script enriches heritage custodian YAML entries with YouTube channel/video data. +It finds YouTube channels from existing web_claims (social_youtube) and fetches: +- Channel info (subscribers, video count, description, etc.) +- Recent videos (title, description, views, likes, comments) +- Video transcripts (when available) +- Comments on videos + +All data includes full provenance with URLs and timestamps. + +Usage: + python scripts/enrich_youtube.py [--dry-run] [--limit N] [--entry ENTRY_FILE] + +Environment Variables: + YOUTUBE_API_KEY: Required. Get from https://console.cloud.google.com/ + +Author: GLAM Data Extraction Project +Date: December 2025 +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import tempfile +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import httpx +import yaml + +# Load environment variables from .env file +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass # dotenv not installed, rely on shell environment + +# ============================================================================ +# Configuration +# ============================================================================ + +# Support both YOUTUBE_API_KEY and GOOGLE_YOUTUBE_TOKEN +YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY") or os.getenv("GOOGLE_YOUTUBE_TOKEN", "") +YOUTUBE_API_BASE = "https://www.googleapis.com/youtube/v3" +USER_AGENT = "GLAMDataExtractor/1.0 (heritage-data@example.com) Python/httpx" + +ENTRIES_DIR = Path("data/nde/enriched/entries") + +# Rate limiting +REQUEST_DELAY = 0.5 # seconds between API calls + + +# ============================================================================ +# Helper Functions +# ============================================================================ + +def extract_channel_id_or_username(youtube_url: str) -> Tuple[Optional[str], str]: + """ + Extract YouTube channel ID or username from various URL formats. + + Returns: + Tuple of (identifier, identifier_type) where type is 'channel_id', 'username', or 'handle' + """ + if not youtube_url: + return None, "" + + # Channel ID format: /channel/UCxxxxx + match = re.search(r'youtube\.com/channel/([UC][0-9A-Za-z_-]{22})', youtube_url) + if match: + return match.group(1), "channel_id" + + # Handle format: /@username + match = re.search(r'youtube\.com/@([^/?&]+)', youtube_url) + if match: + return match.group(1), "handle" + + # User format: /user/username + match = re.search(r'youtube\.com/user/([^/?&]+)', youtube_url) + if match: + return match.group(1), "username" + + # Custom URL format: /c/customname + match = re.search(r'youtube\.com/c/([^/?&]+)', youtube_url) + if match: + return match.group(1), "custom_url" + + # Direct custom URL format: youtube.com/customname (no prefix) + # Must be after all other patterns to avoid false matches + match = re.search(r'youtube\.com/([a-zA-Z][a-zA-Z0-9_-]{2,})(?:[/?]|$)', youtube_url) + if match: + # Exclude known paths that aren't custom URLs + name = match.group(1) + excluded = {'watch', 'playlist', 'channel', 'user', 'c', 'results', 'feed', 'gaming', 'shorts', 'live'} + if name.lower() not in excluded: + return name, "custom_url" + + return None, "" + + +def resolve_channel_id(identifier: str, id_type: str, api_key: str) -> Optional[str]: + """ + Resolve a username, handle, or custom URL to a channel ID. + """ + if id_type == "channel_id": + return identifier + + # Use search to find channel + search_params = { + "part": "snippet", + "type": "channel", + "maxResults": 1, + "key": api_key + } + + if id_type == "handle": + search_params["q"] = f"@{identifier}" + else: + search_params["q"] = identifier + + try: + response = httpx.get( + f"{YOUTUBE_API_BASE}/search", + params=search_params, + headers={"User-Agent": USER_AGENT}, + timeout=30.0 + ) + response.raise_for_status() + data = response.json() + + if data.get("items"): + return data["items"][0]["id"]["channelId"] + except Exception as e: + print(f" Warning: Could not resolve {id_type} '{identifier}': {e}") + + return None + + +def get_channel_info(channel_id: str, api_key: str) -> Dict[str, Any]: + """ + Get detailed channel information from YouTube Data API. + """ + params = { + "part": "snippet,statistics,brandingSettings,contentDetails", + "id": channel_id, + "key": api_key + } + + response = httpx.get( + f"{YOUTUBE_API_BASE}/channels", + params=params, + headers={"User-Agent": USER_AGENT}, + timeout=30.0 + ) + response.raise_for_status() + data = response.json() + + if not data.get("items"): + return {"error": f"Channel not found: {channel_id}"} + + item = data["items"][0] + snippet = item.get("snippet", {}) + stats = item.get("statistics", {}) + branding = item.get("brandingSettings", {}) + + return { + "channel_id": channel_id, + "channel_url": f"https://www.youtube.com/channel/{channel_id}", + "title": snippet.get("title"), + "description": snippet.get("description"), + "custom_url": snippet.get("customUrl"), + "published_at": snippet.get("publishedAt"), + "country": snippet.get("country"), + "default_language": snippet.get("defaultLanguage"), + "thumbnail_url": snippet.get("thumbnails", {}).get("high", {}).get("url"), + "banner_url": branding.get("image", {}).get("bannerExternalUrl"), + "subscriber_count": int(stats.get("subscriberCount", 0)) if stats.get("subscriberCount") else None, + "video_count": int(stats.get("videoCount", 0)) if stats.get("videoCount") else None, + "view_count": int(stats.get("viewCount", 0)) if stats.get("viewCount") else None, + "subscriber_count_hidden": stats.get("hiddenSubscriberCount", False), + "uploads_playlist_id": item.get("contentDetails", {}).get("relatedPlaylists", {}).get("uploads"), + } + + +def get_channel_videos(channel_id: str, api_key: str, max_results: int = 20) -> List[Dict[str, Any]]: + """ + Get recent videos from a YouTube channel. + """ + # First, search for videos from this channel + search_params = { + "part": "snippet", + "channelId": channel_id, + "type": "video", + "order": "date", + "maxResults": min(max_results, 50), + "key": api_key + } + + response = httpx.get( + f"{YOUTUBE_API_BASE}/search", + params=search_params, + headers={"User-Agent": USER_AGENT}, + timeout=30.0 + ) + response.raise_for_status() + search_data = response.json() + + video_ids = [item["id"]["videoId"] for item in search_data.get("items", [])] + + if not video_ids: + return [] + + # Get detailed video info + video_params = { + "part": "snippet,contentDetails,statistics", + "id": ",".join(video_ids), + "key": api_key + } + + response = httpx.get( + f"{YOUTUBE_API_BASE}/videos", + params=video_params, + headers={"User-Agent": USER_AGENT}, + timeout=30.0 + ) + response.raise_for_status() + video_data = response.json() + + videos = [] + for item in video_data.get("items", []): + snippet = item.get("snippet", {}) + stats = item.get("statistics", {}) + content = item.get("contentDetails", {}) + + videos.append({ + "video_id": item["id"], + "video_url": f"https://www.youtube.com/watch?v={item['id']}", + "title": snippet.get("title"), + "description": snippet.get("description", "")[:500], # Truncate long descriptions + "published_at": snippet.get("publishedAt"), + "duration": content.get("duration"), + "definition": content.get("definition"), + "caption_available": content.get("caption") == "true", + "view_count": int(stats.get("viewCount", 0)) if stats.get("viewCount") else None, + "like_count": int(stats.get("likeCount", 0)) if stats.get("likeCount") else None, + "comment_count": int(stats.get("commentCount", 0)) if stats.get("commentCount") else None, + "tags": snippet.get("tags", [])[:10], # Limit tags + "thumbnail_url": snippet.get("thumbnails", {}).get("high", {}).get("url"), + "default_language": snippet.get("defaultLanguage"), + "default_audio_language": snippet.get("defaultAudioLanguage"), + }) + + return videos + + +def get_video_comments(video_id: str, api_key: str, max_results: int = 50) -> List[Dict[str, Any]]: + """ + Get top-level comments on a video. + """ + params = { + "part": "snippet", + "videoId": video_id, + "order": "relevance", + "maxResults": min(max_results, 100), + "textFormat": "plainText", + "key": api_key + } + + try: + response = httpx.get( + f"{YOUTUBE_API_BASE}/commentThreads", + params=params, + headers={"User-Agent": USER_AGENT}, + timeout=30.0 + ) + response.raise_for_status() + data = response.json() + + comments = [] + for item in data.get("items", []): + snippet = item.get("snippet", {}).get("topLevelComment", {}).get("snippet", {}) + comments.append({ + "comment_id": item["id"], + "author_display_name": snippet.get("authorDisplayName"), + "author_channel_url": snippet.get("authorChannelUrl"), + "text": snippet.get("textDisplay", "")[:1000], # Truncate + "like_count": snippet.get("likeCount", 0), + "published_at": snippet.get("publishedAt"), + "updated_at": snippet.get("updatedAt"), + "reply_count": item.get("snippet", {}).get("totalReplyCount", 0), + }) + + return comments + + except httpx.HTTPStatusError as e: + if e.response.status_code == 403: + # Comments disabled for this video + return [] + raise + + +def get_video_transcript(video_id: str, language: str = "en") -> Optional[Dict[str, Any]]: + """ + Get video transcript using yt-dlp. + """ + video_url = f"https://www.youtube.com/watch?v={video_id}" + + try: + with tempfile.TemporaryDirectory() as tmpdir: + result = subprocess.run( + [ + "yt-dlp", + "--write-subs", + "--write-auto-subs", + "--sub-langs", f"{language},nl,en", + "--sub-format", "vtt", + "--skip-download", + "--output", f"{tmpdir}/%(id)s", + video_url + ], + capture_output=True, + text=True, + timeout=60 + ) + + import glob + vtt_files = glob.glob(f"{tmpdir}/*.vtt") + + if vtt_files: + with open(vtt_files[0], 'r', encoding='utf-8') as f: + vtt_content = f.read() + + # Parse VTT to extract text + lines = [] + for line in vtt_content.split('\n'): + line = line.strip() + if line and not line.startswith('WEBVTT') and not line.startswith('Kind:') \ + and not line.startswith('Language:') and '-->' not in line \ + and not re.match(r'^\d+$', line): + clean_line = re.sub(r'<[^>]+>', '', line) + if clean_line: + lines.append(clean_line) + + # Remove duplicate consecutive lines + deduped = [] + for line in lines: + if not deduped or line != deduped[-1]: + deduped.append(line) + + transcript = ' '.join(deduped) + + # Determine language from filename + detected_lang = "unknown" + if ".nl." in vtt_files[0]: + detected_lang = "nl" + elif ".en." in vtt_files[0]: + detected_lang = "en" + + return { + "video_id": video_id, + "language": detected_lang, + "transcript_type": "auto" if ".auto." in vtt_files[0] else "manual", + "transcript_text": transcript[:10000], # Truncate very long transcripts + "transcript_length_chars": len(transcript), + "extraction_method": "yt-dlp", + } + + return None + + except FileNotFoundError: + return {"error": "yt-dlp not installed"} + except subprocess.TimeoutExpired: + return {"error": "Transcript extraction timed out"} + except Exception as e: + return {"error": str(e)} + + +def find_youtube_url_in_entry(entry: Dict[str, Any]) -> Optional[str]: + """ + Find YouTube URL from web_claims or wikidata in an entry. + """ + # Check web_claims for social_youtube + web_claims = entry.get("web_claims", {}).get("claims", []) + for claim in web_claims: + if claim.get("claim_type") == "social_youtube": + return claim.get("claim_value") + + # Check wikidata for YouTube channel ID (P2397) + wikidata = entry.get("wikidata_enrichment", {}) + claims = wikidata.get("wikidata_claims", {}) + + youtube_claim = claims.get("P2397_youtube_channel_id") + if youtube_claim: + channel_id = youtube_claim.get("value") + if channel_id: + return f"https://www.youtube.com/channel/{channel_id}" + + return None + + +def create_youtube_enrichment( + youtube_url: str, + api_key: str, + fetch_videos: int = 10, + fetch_comments_per_video: int = 20, + fetch_transcripts: bool = True +) -> Dict[str, Any]: + """ + Create full YouTube enrichment data with provenance. + """ + timestamp = datetime.now(timezone.utc).isoformat() + + enrichment = { + "source_url": youtube_url, + "fetch_timestamp": timestamp, + "api_endpoint": YOUTUBE_API_BASE, + "api_version": "v3", + } + + # Extract channel identifier + identifier, id_type = extract_channel_id_or_username(youtube_url) + + if not identifier: + enrichment["error"] = f"Could not parse YouTube URL: {youtube_url}" + enrichment["status"] = "FAILED" + return enrichment + + enrichment["identifier_type"] = id_type + enrichment["identifier_value"] = identifier + + # Resolve to channel ID + channel_id = resolve_channel_id(identifier, id_type, api_key) + + if not channel_id: + enrichment["error"] = f"Could not resolve channel ID for: {identifier}" + enrichment["status"] = "FAILED" + return enrichment + + try: + # Get channel info + print(f" Fetching channel info for {channel_id}...") + channel_info = get_channel_info(channel_id, api_key) + enrichment["channel"] = channel_info + + # Get recent videos + if fetch_videos > 0: + print(f" Fetching {fetch_videos} recent videos...") + videos = get_channel_videos(channel_id, api_key, fetch_videos) + enrichment["videos"] = videos + enrichment["videos_count"] = len(videos) + + # Get comments for top videos + if fetch_comments_per_video > 0 and videos: + print(f" Fetching comments for top videos...") + for i, video in enumerate(videos[:5]): # Only first 5 videos + video_id = video["video_id"] + comments = get_video_comments(video_id, api_key, fetch_comments_per_video) + videos[i]["comments"] = comments + videos[i]["comments_fetched"] = len(comments) + + # Get transcripts for videos with captions + if fetch_transcripts and videos: + print(f" Fetching transcripts for videos with captions...") + for i, video in enumerate(videos[:3]): # Only first 3 videos + if video.get("caption_available"): + video_id = video["video_id"] + transcript = get_video_transcript(video_id) + if transcript and not transcript.get("error"): + videos[i]["transcript"] = transcript + + enrichment["status"] = "SUCCESS" + + except httpx.HTTPStatusError as e: + enrichment["error"] = f"YouTube API error: {e.response.status_code}" + enrichment["status"] = "FAILED" + except Exception as e: + enrichment["error"] = str(e) + enrichment["status"] = "FAILED" + + return enrichment + + +def update_provenance(entry: Dict[str, Any], enrichment: Dict[str, Any]) -> None: + """ + Update provenance section with YouTube enrichment source. + """ + if "provenance" not in entry: + entry["provenance"] = {"sources": {}} + + if "sources" not in entry["provenance"]: + entry["provenance"]["sources"] = {} + + if "youtube" not in entry["provenance"]["sources"]: + entry["provenance"]["sources"]["youtube"] = [] + + source_entry = { + "source_type": "youtube_data_api", + "fetch_timestamp": enrichment.get("fetch_timestamp"), + "api_endpoint": enrichment.get("api_endpoint"), + "channel_id": enrichment.get("channel", {}).get("channel_id"), + "claims_extracted": [ + "channel_info", + "subscriber_count", + "video_count", + "view_count", + "recent_videos", + "video_comments", + "video_transcripts", + ] + } + + entry["provenance"]["sources"]["youtube"].append(source_entry) + + +def process_entry(entry_path: Path, api_key: str, dry_run: bool = False) -> bool: + """ + Process a single entry file and add YouTube enrichment. + """ + print(f"\nProcessing: {entry_path.name}") + + # Load entry + with open(entry_path, 'r', encoding='utf-8') as f: + entry = yaml.safe_load(f) + + # Check if already enriched + if entry.get("youtube_enrichment", {}).get("status") == "SUCCESS": + print(f" Already enriched, skipping...") + return False + + # Find YouTube URL + youtube_url = find_youtube_url_in_entry(entry) + + if not youtube_url: + print(f" No YouTube URL found, skipping...") + return False + + print(f" Found YouTube URL: {youtube_url}") + + if dry_run: + print(f" [DRY RUN] Would enrich with YouTube data") + return True + + # Create enrichment + enrichment = create_youtube_enrichment( + youtube_url=youtube_url, + api_key=api_key, + fetch_videos=10, + fetch_comments_per_video=20, + fetch_transcripts=True + ) + + # Add to entry + entry["youtube_enrichment"] = enrichment + + # Update provenance + if enrichment.get("status") == "SUCCESS": + update_provenance(entry, enrichment) + + # Save entry + with open(entry_path, 'w', encoding='utf-8') as f: + yaml.dump(entry, f, allow_unicode=True, default_flow_style=False, sort_keys=False) + + status = enrichment.get("status", "UNKNOWN") + print(f" Status: {status}") + + if status == "SUCCESS": + channel = enrichment.get("channel", {}) + videos = enrichment.get("videos", []) + print(f" Channel: {channel.get('title')}") + print(f" Subscribers: {channel.get('subscriber_count'):,}" if channel.get('subscriber_count') else " Subscribers: Hidden") + print(f" Videos fetched: {len(videos)}") + + return status == "SUCCESS" + + +def main(): + parser = argparse.ArgumentParser( + description="Enrich heritage custodian entries with YouTube channel data" + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be done without making changes" + ) + parser.add_argument( + "--limit", + type=int, + default=None, + help="Limit number of entries to process" + ) + parser.add_argument( + "--entry", + type=str, + default=None, + help="Process a specific entry file (filename or full path)" + ) + parser.add_argument( + "--skip-existing", + action="store_true", + default=True, + help="Skip entries that already have YouTube enrichment (default: True)" + ) + + args = parser.parse_args() + + # Check API key + if not YOUTUBE_API_KEY: + print("ERROR: YOUTUBE_API_KEY environment variable not set") + print("\nTo get an API key:") + print("1. Go to https://console.cloud.google.com/") + print("2. Create a project and enable YouTube Data API v3") + print("3. Create an API key under Credentials") + print("4. Set: export YOUTUBE_API_KEY='your-key-here'") + sys.exit(1) + + print("=" * 60) + print("YouTube Enrichment Script for Heritage Custodians") + print("=" * 60) + print(f"API Key: {YOUTUBE_API_KEY[:8]}...{YOUTUBE_API_KEY[-4:]}") + print(f"Entries directory: {ENTRIES_DIR}") + print(f"Dry run: {args.dry_run}") + + # Collect entries to process + if args.entry: + entry_path = Path(args.entry) + if not entry_path.exists(): + entry_path = ENTRIES_DIR / args.entry + if not entry_path.exists(): + print(f"ERROR: Entry not found: {args.entry}") + sys.exit(1) + entries = [entry_path] + else: + entries = sorted(ENTRIES_DIR.glob("*.yaml")) + + if args.limit: + entries = entries[:args.limit] + + print(f"Entries to process: {len(entries)}") + print("=" * 60) + + # Process entries + success_count = 0 + skip_count = 0 + error_count = 0 + + for entry_path in entries: + try: + result = process_entry(entry_path, YOUTUBE_API_KEY, args.dry_run) + if result: + success_count += 1 + else: + skip_count += 1 + except Exception as e: + print(f" ERROR: {e}") + error_count += 1 + + # Rate limiting + import time + time.sleep(REQUEST_DELAY) + + # Summary + print("\n" + "=" * 60) + print("Summary") + print("=" * 60) + print(f"Entries processed: {len(entries)}") + print(f"Successfully enriched: {success_count}") + print(f"Skipped (no YouTube / already done): {skip_count}") + print(f"Errors: {error_count}") + + +if __name__ == "__main__": + main() diff --git a/scripts/export_nde_map_json.py b/scripts/export_nde_map_json.py index 911daddba0..2cb1d81ff1 100644 --- a/scripts/export_nde_map_json.py +++ b/scripts/export_nde_map_json.py @@ -288,6 +288,7 @@ def extract_institution_data(entry_data: dict) -> dict | None: result['ghcid'] = { 'current': ghcid_data.get('ghcid_current', ''), 'uuid': ghcid_data.get('ghcid_uuid', ''), + 'numeric': ghcid_data.get('ghcid_numeric', ''), } # Add standardized identifiers diff --git a/scripts/generate_custodian_type_enums.py b/scripts/generate_custodian_type_enums.py new file mode 100644 index 0000000000..ea17d18042 --- /dev/null +++ b/scripts/generate_custodian_type_enums.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +""" +Generate LinkML enum files for CustodianType hyponyms from Wikidata data. + +This script reads the curated hyponyms data and generates LinkML enum YAML files +for each GLAMORCUBESFIXPHDNT category (Museum, Archive, Library, Gallery, etc.) + +Usage: + python3 scripts/generate_custodian_type_enums.py + +Output: + Creates/updates files in schemas/20251121/linkml/modules/enums/ +""" + +import yaml +import re +from pathlib import Path +from datetime import datetime, timezone +from collections import defaultdict + +# Type code to enum name mapping +TYPE_CONFIG = { + 'M': { + 'enum_name': 'MuseumTypeEnum', + 'title': 'Museum Type Classification', + 'base_wikidata': 'Q33506', + 'description': 'Types of museums extracted from Wikidata hyponyms of Q33506 (museum).', + }, + 'A': { + 'enum_name': 'ArchiveTypeEnum', + 'title': 'Archive Type Classification', + 'base_wikidata': 'Q166118', + 'description': 'Types of archives extracted from Wikidata hyponyms of Q166118 (archive).', + }, + 'L': { + 'enum_name': 'LibraryTypeEnum', + 'title': 'Library Type Classification', + 'base_wikidata': 'Q7075', + 'description': 'Types of libraries extracted from Wikidata hyponyms of Q7075 (library).', + }, + 'G': { + 'enum_name': 'GalleryTypeEnum', + 'title': 'Gallery Type Classification', + 'base_wikidata': 'Q1007870', + 'description': 'Types of galleries extracted from Wikidata hyponyms of Q1007870 (art gallery).', + }, + 'B': { + 'enum_name': 'BioCustodianTypeEnum', + 'title': 'Bio Custodian Type Classification', + 'base_wikidata': 'Q473972', + 'description': 'Types of botanical gardens, zoos, and living collections from Wikidata.', + }, + 'O': { + 'enum_name': 'OfficialInstitutionTypeEnum', + 'title': 'Official Institution Type Classification', + 'base_wikidata': 'Q895526', + 'description': 'Types of official/government heritage institutions from Wikidata.', + }, + 'R': { + 'enum_name': 'ResearchCenterTypeEnum', + 'title': 'Research Center Type Classification', + 'base_wikidata': 'Q136410232', + 'description': 'Types of research organizations and documentation centers from Wikidata.', + }, + 'C': { + 'enum_name': 'CommercialCustodianTypeEnum', + 'title': 'Commercial Custodian Type Classification', + 'base_wikidata': 'Q21980538', + 'description': 'Types of commercial/corporate heritage custodians from Wikidata.', + }, + 'E': { + 'enum_name': 'EducationProviderTypeEnum', + 'title': 'Education Provider Type Classification', + 'base_wikidata': 'Q5341295', + 'description': 'Types of educational institutions with heritage collections from Wikidata.', + }, + 'S': { + 'enum_name': 'HeritageSocietyTypeEnum', + 'title': 'Heritage Society Type Classification', + 'base_wikidata': 'Q5774403', + 'description': 'Types of heritage societies and collecting organizations from Wikidata.', + }, + 'H': { + 'enum_name': 'HolySiteTypeEnum', + 'title': 'Holy/Sacred Site Type Classification', + 'base_wikidata': 'Q4588528', + 'description': 'Types of religious sites with heritage collections from Wikidata.', + }, + 'I': { + 'enum_name': 'IntangibleHeritageTypeEnum', + 'title': 'Intangible Heritage Group Type Classification', + 'base_wikidata': 'Q105815710', + 'description': 'Types of organizations preserving intangible cultural heritage from Wikidata.', + }, + 'N': { + 'enum_name': 'NonProfitCustodianTypeEnum', + 'title': 'Non-Profit Custodian Type Classification', + 'base_wikidata': 'Q163740', + 'description': 'Types of non-profit heritage organizations from Wikidata.', + }, + 'D': { + 'enum_name': 'DigitalPlatformTypeEnum', + 'title': 'Digital Platform Type Classification', + 'base_wikidata': 'Q28017710', + 'description': 'Types of digital heritage platforms from Wikidata.', + }, + 'P': { + 'enum_name': 'PersonalCollectionTypeEnum', + 'title': 'Personal Collection Type Classification', + 'base_wikidata': 'Q134886297', + 'description': 'Types of personal/private heritage collections from Wikidata.', + }, + 'T': { + 'enum_name': 'TasteScentHeritageTypeEnum', + 'title': 'Taste/Scent Heritage Type Classification', + 'base_wikidata': None, + 'description': 'Types of culinary and olfactory heritage custodians.', + }, +} + +def sanitize_enum_value(label: str) -> str: + """Convert a label to a valid enum value name (UPPER_SNAKE_CASE).""" + # Remove special characters, keep alphanumeric and spaces + clean = re.sub(r'[^\w\s-]', '', label) + # Replace spaces and hyphens with underscores + clean = re.sub(r'[\s-]+', '_', clean) + # Convert to uppercase + clean = clean.upper() + # Remove leading/trailing underscores + clean = clean.strip('_') + # Ensure it starts with a letter + if clean and not clean[0].isalpha(): + clean = 'TYPE_' + clean + return clean or 'UNKNOWN' + +def load_hyponyms(filepath: Path) -> list: + """Load hyponyms from YAML file efficiently.""" + print(f"Loading hyponyms from {filepath}...") + + # Use stream loading for large files + hyponyms = [] + with open(filepath, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + return data.get('hypernym', []) + +def group_by_type(hyponyms: list) -> dict: + """Group hyponyms by their type codes.""" + by_type = defaultdict(list) + + for item in hyponyms: + curated = item.get('curated', {}) + wikidata = item.get('wikidata', {}) + types = curated.get('type', []) + + # Get the Wikidata ID and labels + qid = curated.get('label', wikidata.get('id', '')) + labels = wikidata.get('labels', {}) + descriptions = wikidata.get('descriptions', {}) + + # Get English label (or first available) + en_label = labels.get('en', labels.get('nl', list(labels.values())[0] if labels else qid)) + en_desc = descriptions.get('en', descriptions.get('nl', '')) + + for t in types: + by_type[t].append({ + 'qid': qid, + 'label': en_label, + 'description': en_desc, + 'labels': labels, + }) + + return by_type + +def generate_enum_yaml(type_code: str, items: list, config: dict, output_dir: Path) -> Path: + """Generate a LinkML enum YAML file for a custodian type.""" + + enum_name = config['enum_name'] + output_file = output_dir / f"{enum_name}.yaml" + + # Build permissible values + permissible_values = {} + seen_names = set() + + for item in sorted(items, key=lambda x: x['label'].lower()): + qid = item['qid'] + label = item['label'] + description = item['description'] + + # Generate enum value name + value_name = sanitize_enum_value(label) + + # Handle duplicates by appending QID + original_name = value_name + counter = 1 + while value_name in seen_names: + value_name = f"{original_name}_{counter}" + counter += 1 + seen_names.add(value_name) + + # Build value definition + value_def = { + 'description': description if description else f"{label} ({qid})", + 'meaning': f"wikidata:{qid}", + } + + # Add multilingual labels as comments if available + if item.get('labels'): + other_labels = [] + for lang, lbl in sorted(item['labels'].items()): + if lang != 'en' and lang in ['nl', 'de', 'fr', 'es', 'it']: + other_labels.append(f"{lbl} ({lang})") + if other_labels: + value_def['comments'] = other_labels[:3] # Limit to 3 translations + + permissible_values[value_name] = value_def + + # Build the full YAML structure + yaml_content = { + 'id': f"https://nde.nl/ontology/hc/enum/{enum_name}", + 'name': enum_name, + 'title': config['title'], + 'description': f"{config['description']}\n\nGenerated: {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}\nTotal values: {len(permissible_values)}", + 'enums': { + enum_name: { + 'permissible_values': permissible_values + } + } + } + + # Write YAML file + with open(output_file, 'w', encoding='utf-8') as f: + # Custom YAML dump for better formatting + yaml.dump(yaml_content, f, + default_flow_style=False, + allow_unicode=True, + sort_keys=False, + width=100) + + return output_file + +def main(): + # Paths + project_root = Path(__file__).parent.parent + hyponyms_file = project_root / 'data/wikidata/GLAMORCUBEPSXHFN/hyponyms_curated_full.yaml' + output_dir = project_root / 'schemas/20251121/linkml/modules/enums' + + if not hyponyms_file.exists(): + print(f"❌ Hyponyms file not found: {hyponyms_file}") + return 1 + + # Load and group hyponyms + hyponyms = load_hyponyms(hyponyms_file) + print(f"✅ Loaded {len(hyponyms)} hyponyms") + + by_type = group_by_type(hyponyms) + print(f"✅ Grouped into {len(by_type)} type categories") + + # Generate enum files for each type + generated = [] + skipped = [] + + for type_code, config in TYPE_CONFIG.items(): + items = by_type.get(type_code, []) + + if not items: + skipped.append(f"{type_code} ({config['enum_name']}): no items") + continue + + # Skip Feature type (already exists as FeatureTypeEnum) + if type_code == 'F': + skipped.append(f"F (FeatureTypeEnum): already exists with {len(items)} items") + continue + + output_file = generate_enum_yaml(type_code, items, config, output_dir) + generated.append(f"{type_code} ({config['enum_name']}): {len(items)} values → {output_file.name}") + + # Summary + print("\n" + "="*60) + print("GENERATION SUMMARY") + print("="*60) + print(f"\n✅ Generated {len(generated)} enum files:") + for g in generated: + print(f" {g}") + + if skipped: + print(f"\n⏭️ Skipped {len(skipped)} types:") + for s in skipped: + print(f" {s}") + + print("\n✅ Done!") + return 0 + +if __name__ == '__main__': + exit(main()) diff --git a/scripts/generate_mermaid_with_instances.py b/scripts/generate_mermaid_with_instances.py new file mode 100644 index 0000000000..cb8491f984 --- /dev/null +++ b/scripts/generate_mermaid_with_instances.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +""" +Generate Mermaid ER diagrams with instance data from LinkML schemas. + +This script extends the standard Mermaid generation to include: +1. All classes and their relationships +2. Enum values (from LinkML schema) +3. Instance data (from instances/enums/*.yaml) as annotations + +The instance data provides semantically meaningful "allowed values" for +CustodianType classes like MuseumType, LibraryType, HeritageSocietyType, etc. + +Usage: + python3 scripts/generate_mermaid_with_instances.py + +Output: + frontend/public/data/heritage_custodian_ontology.mmd + schemas/20251121/uml/mermaid/complete_schema_with_instances_YYYYMMDD_HHMMSS.mmd +""" +import sys +import yaml +from pathlib import Path +from datetime import datetime +from linkml_runtime.utils.schemaview import SchemaView + +# Configuration +SCHEMA_PATH = "schemas/20251121/linkml/01_custodian_name_modular.yaml" +INSTANCES_DIR = "schemas/20251121/linkml/instances/enums" +OUTPUT_DIR = "schemas/20251121/uml/mermaid" +FRONTEND_OUTPUT = "frontend/public/data/heritage_custodian_ontology.mmd" + +# Classes to exclude from diagrams (technical artifacts with no semantic significance) +EXCLUDED_CLASSES = { + "Container", # LinkML tree_root for validation only, not part of ontology +} + +# Maximum number of enum values to show in diagram (for readability) +MAX_ENUM_VALUES_IN_DIAGRAM = 10 + +# Maximum number of instance values to show (for readability) +MAX_INSTANCE_VALUES = 15 + +# Mapping from enum names to their instance files +ENUM_INSTANCE_FILES = { + "CustodianPrimaryTypeEnum": "custodian_primary_type.yaml", + "AppellationTypeEnum": "appellation_type.yaml", + "OrganizationalChangeEventTypeEnum": "organizational_change_event_type.yaml", + "StaffRoleTypeEnum": "staff_role_type.yaml", + "OrganizationalUnitTypeEnum": "organizational_unit_type.yaml", + "LegalStatusEnum": "legal_status_type.yaml", + "PlaceSpecificityEnum": "place_specificity.yaml", + "EncompassingBodyTypeEnum": "encompassing_body_type.yaml", + "AuxiliaryDigitalPlatformTypeEnum": "auxiliary_digital_platform_type.yaml", + "AgentTypeEnum": "agent_type.yaml", + "EntityTypeEnum": "entity_type.yaml", + "SourceDocumentTypeEnum": "source_document_type.yaml", + "ReconstructionActivityTypeEnum": "reconstruction_activity_type.yaml", + "WebPortalTypeEnum": "web_portal_type.yaml", + "SocialMediaPlatformTypeEnum": "social_media_platform_type.yaml", + "RecordsLifecycleStageEnum": "records_lifecycle_stage.yaml", + "ArchiveProcessingStatusEnum": "archive_processing_status.yaml", + "StorageTypeEnum": "storage_type.yaml", + "DigitalPresenceTypeEnum": "digital_presence_type.yaml", + "FeatureTypeEnum": "feature_type.yaml", + "ProjectStatusEnum": "project_status.yaml", + "FinancialStatementTypeEnum": "financial_statement_type.yaml", + "StorageConditionStatusEnum": "storage_condition_status.yaml", + "AuxiliaryPlaceTypeEnum": "auxiliary_place_type.yaml", + "GiftShopTypeEnum": "gift_shop_type.yaml", + "FundingRequirementTypeEnum": "funding_requirement_type.yaml", + "OrganizationBranchTypeEnum": "organization_branch_type.yaml", +} + + +def load_instance_data(instances_dir: Path) -> dict: + """Load all instance data from YAML files.""" + instance_data = {} + + for enum_name, filename in ENUM_INSTANCE_FILES.items(): + filepath = instances_dir / filename + if filepath.exists(): + try: + with open(filepath, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + if data and 'instances' in data: + # Extract value names and their English labels + values = [] + for instance in data['instances']: + value = instance.get('value', '') + code = instance.get('code', '') + + # Get English label if available + pref_label = instance.get('skos:prefLabel', {}) + en_label = pref_label.get('en', '') if isinstance(pref_label, dict) else '' + + # Get Wikidata entity + wikidata = instance.get('wikidata', {}) + qid = wikidata.get('entity', '') if isinstance(wikidata, dict) else '' + + values.append({ + 'value': value, + 'code': code, + 'label': en_label, + 'wikidata': qid + }) + + instance_data[enum_name] = { + 'name': data.get('name', enum_name), + 'description': data.get('description', ''), + 'values': values + } + print(f" ✓ Loaded {len(values)} instances from {filename}", file=sys.stderr) + except Exception as e: + print(f" ⚠ Warning: Could not load {filename}: {e}", file=sys.stderr) + + return instance_data + + +def generate_mermaid_with_instances(sv: SchemaView, instance_data: dict, include_enums: bool = True) -> str: + """ + Generate Mermaid ER diagram with instance data annotations. + """ + lines = ["```mermaid"] + lines.append("erDiagram") + lines.append("") + lines.append(" %% Heritage Custodian Ontology - Complete Schema with Instance Data") + lines.append(f" %% Generated: {datetime.now().isoformat()}") + lines.append(f" %% Schema: {sv.schema.name}") + lines.append("") + + # Get all classes except excluded ones + all_classes = [c for c in sv.all_classes() if c not in EXCLUDED_CLASSES] + + # Get all enums + all_enums = list(sv.all_enums()) if include_enums else [] + + # Generate class entities + for class_name in all_classes: + cls = sv.get_class(class_name) + + lines.append(f"{class_name} {{") + + # Add ALL attributes/slots + for slot_name in sv.class_slots(class_name): + slot = sv.induced_slot(slot_name, class_name) + + if slot: + slot_range = slot.range if slot.range else "string" + + # Skip excluded classes only + if slot_range in EXCLUDED_CLASSES: + continue + + # Format: type attribute_name + multivalued_marker = "List" if slot.multivalued else "" + required_marker = " PK" if slot.required else "" + lines.append(f" {slot_range}{multivalued_marker} {slot_name}{required_marker}") + + lines.append("}") + + # Generate enum entities with instance data enrichment + if include_enums and all_enums: + lines.append("") + lines.append(" %% Enumerations with Instance Data") + for enum_name in all_enums: + enum_def = sv.get_enum(enum_name) + if enum_def and enum_def.permissible_values: + lines.append(f"{enum_name} {{") + lines.append(" string enum_type PK") + + # Check if we have instance data for this enum + if enum_name in instance_data: + inst_data = instance_data[enum_name] + values = inst_data['values'] + + # Show values with their labels and Wikidata IDs + for i, val_info in enumerate(values[:MAX_INSTANCE_VALUES]): + value = val_info['value'] + code = val_info.get('code', '') + label = val_info.get('label', '') + qid = val_info.get('wikidata', '') + + # Build annotation string + annotation = f" string {value}" + # Add comment with code, label, and Wikidata ID + # Note: Mermaid ER diagrams don't support comments in entities + # but we include the code for context + if code: + annotation = f" string {value}_{code}" + lines.append(annotation) + + if len(values) > MAX_INSTANCE_VALUES: + remaining = len(values) - MAX_INSTANCE_VALUES + lines.append(f" string _and_{remaining}_more") + else: + # Fall back to schema enum values + values = list(enum_def.permissible_values.keys()) + for i, value_name in enumerate(values[:MAX_ENUM_VALUES_IN_DIAGRAM]): + lines.append(f" string {value_name}") + + if len(values) > MAX_ENUM_VALUES_IN_DIAGRAM: + remaining = len(values) - MAX_ENUM_VALUES_IN_DIAGRAM + lines.append(f" string _and_{remaining}_more") + + lines.append("}") + + lines.append("") + + # Generate relationships + for class_name in all_classes: + cls = sv.get_class(class_name) + + # Inheritance relationships + if cls.is_a and cls.is_a not in EXCLUDED_CLASSES: + lines.append(f'{class_name} ||--|| {cls.is_a} : "inherits"') + + # Association relationships + for slot_name in sv.class_slots(class_name): + slot = sv.induced_slot(slot_name, class_name) + + if slot and slot.range: + # Check if range is a class + if slot.range in all_classes: + if slot.multivalued: + cardinality = "||--}|" if slot.required else "||--}o" + else: + cardinality = "||--||" if slot.required else "||--|o" + + lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"') + + # Check if range is an enum + elif include_enums and slot.range in all_enums: + cardinality = "||--}o" if slot.multivalued else "||--|o" + lines.append(f'{class_name} {cardinality} {slot.range} : "{slot_name}"') + + lines.append("") + lines.append("```") + lines.append("") + + return '\n'.join(lines) + + +def main(): + """Main entry point.""" + print("=" * 60, file=sys.stderr) + print("Mermaid ER Diagram Generator with Instance Data", file=sys.stderr) + print("=" * 60, file=sys.stderr) + + # Load schema + print(f"\nLoading schema: {SCHEMA_PATH}", file=sys.stderr) + sv = SchemaView(SCHEMA_PATH) + print(f"✓ Loaded schema: {sv.schema.name}", file=sys.stderr) + print(f" Classes: {len(list(sv.all_classes()))}", file=sys.stderr) + print(f" Enums: {len(list(sv.all_enums()))}", file=sys.stderr) + + # Load instance data + instances_dir = Path(INSTANCES_DIR) + print(f"\nLoading instance data from: {instances_dir}", file=sys.stderr) + instance_data = load_instance_data(instances_dir) + print(f"✓ Loaded {len(instance_data)} enum instance files", file=sys.stderr) + + # Generate Mermaid + print("\nGenerating Mermaid ER diagram...", file=sys.stderr) + mermaid = generate_mermaid_with_instances(sv, instance_data) + + # Generate timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # Ensure output directories exist + output_dir = Path(OUTPUT_DIR) + output_dir.mkdir(parents=True, exist_ok=True) + frontend_path = Path(FRONTEND_OUTPUT) + frontend_path.parent.mkdir(parents=True, exist_ok=True) + + # Write to schemas directory (timestamped) + schema_output = output_dir / f"complete_schema_with_instances_{timestamp}.mmd" + schema_output.write_text(mermaid) + print(f"\n✓ Generated: {schema_output}", file=sys.stderr) + print(f" Size: {len(mermaid)} bytes", file=sys.stderr) + + # Write to frontend directory (overwrite) + frontend_path.write_text(mermaid) + print(f"✓ Updated frontend: {frontend_path}", file=sys.stderr) + + print("\n" + "=" * 60, file=sys.stderr) + print("Done! The UML diagram now includes instance data.", file=sys.stderr) + print("=" * 60, file=sys.stderr) + + +if __name__ == '__main__': + main()