From 51554947a0d089a8229138e33536effbae2b858c Mon Sep 17 00:00:00 2001
From: kempersc <sckemper@mailfence.com>
Date: Tue, 16 Dec 2025 20:03:17 +0100
Subject: [PATCH] feat(schema): Add video content schema with comprehensive
 examples

Video Schema Classes (9 files):
- VideoPost, VideoComment: Social media video modeling
- VideoTextContent: Base class for text content extraction
- VideoTranscript, VideoSubtitle: Text with timing and formatting
- VideoTimeSegment: Time code handling with ISO 8601 duration
- VideoAnnotation: Base annotation with W3C Web Annotation alignment
- VideoAnnotationTypes: Scene, Object, OCR detection annotations
- VideoChapter, VideoChapterList: Navigation and chapter structure
- VideoAudioAnnotation: Speaker diarization, music, sound events

Enumerations (12 enums):
- VideoDefinitionEnum, LiveBroadcastStatusEnum
- TranscriptFormatEnum, SubtitleFormatEnum, SubtitlePositionEnum
- AnnotationTypeEnum, AnnotationMotivationEnum
- DetectionLevelEnum, SceneTypeEnum, TransitionTypeEnum, TextTypeEnum
- ChapterSourceEnum, AudioEventTypeEnum, SoundEventTypeEnum, MusicTypeEnum

Examples (904 lines, 10 comprehensive heritage-themed examples):
- Rijksmuseum virtual tour chapters (5 chapters with heritage entity refs)
- Operation Night Watch documentary chapters (5 chapters)
- VideoAudioAnnotation: curator interview, exhibition promo, museum lecture

All examples reference real heritage entities with Wikidata IDs:
Q5598 (Rembrandt), Q41264 (Vermeer), Q219831 (The Night Watch)
---
 .../examples/video_content_examples.yaml      |  904 ++++++++++++
 .../modules/classes/VideoAnnotation.yaml      |  542 +++++++
 .../modules/classes/VideoAnnotationTypes.yaml | 1312 +++++++++++++++++
 .../modules/classes/VideoAudioAnnotation.yaml | 1108 ++++++++++++++
 .../linkml/modules/classes/VideoChapter.yaml  |  621 ++++++++
 .../linkml/modules/classes/VideoPost.yaml     |  763 ++++++++++
 .../linkml/modules/classes/VideoSubtitle.yaml |  632 ++++++++
 .../modules/classes/VideoTextContent.yaml     |  524 +++++++
 .../modules/classes/VideoTimeSegment.yaml     |  375 +++++
 .../modules/classes/VideoTranscript.yaml      |  469 ++++++
 10 files changed, 7250 insertions(+)
 create mode 100644 schemas/20251121/linkml/examples/video_content_examples.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoAnnotation.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoAnnotationTypes.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoAudioAnnotation.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoChapter.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoPost.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoSubtitle.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoTextContent.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoTimeSegment.yaml
 create mode 100644 schemas/20251121/linkml/modules/classes/VideoTranscript.yaml

diff --git a/schemas/20251121/linkml/examples/video_content_examples.yaml b/schemas/20251121/linkml/examples/video_content_examples.yaml
new file mode 100644
index 0000000000..d0889a0efb
--- /dev/null
+++ b/schemas/20251121/linkml/examples/video_content_examples.yaml
@@ -0,0 +1,904 @@
+# Video Content Examples
+# Instance data demonstrating video schema classes for heritage institutions
+# Covers: VideoPost, VideoComment, VideoTranscript, VideoSubtitle, VideoAnnotation types
+#
+# Part of Heritage Custodian Ontology v0.9.10
+#
+# HERITAGE INSTITUTION VIDEO USE CASES:
+# - Virtual museum tours
+# - Conservation documentation
+# - Curator interviews
+# - Collection spotlights
+# - Educational content
+# - Live event recordings
+
+# ============================================================================
+# EXAMPLE 1: Museum Virtual Tour Video
+# Complete VideoPost with transcript, subtitles, and scene annotations
+# ============================================================================
+
+video_posts:
+  
+  - post_id: "https://nde.nl/ontology/hc/video/nl/rijksmuseum-gallery-honour"
+    platform_type: YOUTUBE
+    platform_id: "UCo2sQFl0mV4K2v6D4d8Z9bQ"
+    platform_post_id: "dQw4w9WgXcQ"
+    post_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    post_title: "The Gallery of Honour - Rijksmuseum Virtual Tour"
+    post_description: |
+      Take a virtual walk through the famous Gallery of Honour at the Rijksmuseum
+      in Amsterdam. This corridor displays masterpieces of Dutch Golden Age painting,
+      culminating in Rembrandt's Night Watch. Our curator guides you through the
+      history and significance of these iconic works.
+    
+    # Video technical properties
+    duration: "PT15M42S"
+    definition: hd
+    aspect_ratio: "16:9"
+    frame_rate: 30.0
+    
+    # Caption and language
+    caption_available: true
+    default_language: "nl"
+    default_audio_language: "nl"
+    available_caption_languages:
+      - "nl"
+      - "en"
+      - "de"
+      - "fr"
+      - "zh"
+    
+    # Engagement metrics (observational)
+    view_count: 125847
+    like_count: 3421
+    dislike_count: 42
+    comment_count: 287
+    favorite_count: 892
+    metrics_observed_at: "2025-12-15T10:30:00Z"
+    
+    # Platform-specific
+    video_category_id: "27"  # Education
+    live_broadcast_content: none
+    is_licensed_content: false
+    is_embeddable: true
+    is_made_for_kids: false
+    
+    # Publishing info (inherited from SocialMediaPost)
+    published_at: "2023-03-15T14:00:00Z"
+    last_updated_at: "2023-03-15T14:00:00Z"
+    
+    # Comments
+    comments_fetched: 50
+    video_comments:
+      - comment_id: "Ugw3x9K2mL8f7nPqR1"
+        comment_author: "ArtHistoryFan"
+        comment_author_channel_id: "UC7f8n2p3m4x5L6qR7sT8vW"
+        comment_text: "This virtual tour is amazing! I visited last year and seeing it again brings back wonderful memories. The Night Watch looks even more spectacular in 4K."
+        comment_published_at: "2023-03-16T09:22:15Z"
+        comment_like_count: 45
+        comment_reply_count: 3
+        comment_replies:
+          - comment_id: "Ugw3x9K2mL8f7nPqR1.8nRq"
+            comment_author: "Rijksmuseum"
+            comment_author_channel_id: "UCo2sQFl0mV4K2v6D4d8Z9bQ"
+            comment_text: "Thank you for visiting and for your kind words! We hope to see you again soon."
+            comment_published_at: "2023-03-16T11:45:30Z"
+            comment_like_count: 12
+            comment_reply_count: 0
+      
+      - comment_id: "Ugw5y7T4nM9g8oPsS2"
+        comment_author: "DutchHeritageExplorer"
+        comment_author_channel_id: "UC9g0n3p4m5x6L7qR8sT9vX"
+        comment_text: "Great explanation of the Vermeer paintings! Would love to see more content about the restoration process."
+        comment_published_at: "2023-03-17T16:33:45Z"
+        comment_like_count: 28
+        comment_reply_count: 1
+
+# ============================================================================
+# EXAMPLE 2: Video Transcript (Full Text)
+# ============================================================================
+
+video_transcripts:
+  
+  - content_id: "https://nde.nl/ontology/hc/transcript/nl/rijksmuseum-gallery-honour-full"
+    source_video_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    content_language: "nl"
+    
+    full_transcript: |
+      Welkom in de Eregalerij van het Rijksmuseum. Deze iconische gang is het hart
+      van het museum en herbergt de grootste meesterwerken uit de Gouden Eeuw.
+      
+      We beginnen onze wandeling bij de ingang, waar we direct worden begroet door
+      Frans Hals' portret van Isaac Massa en Beatrix van der Laen. Dit schilderij
+      uit 1622 toont de levendige penseelstreek waarmee Hals bekend staat.
+      
+      Verderop zien we werken van Jan Steen, bekend om zijn humoristische taferelen
+      van het dagelijks leven. Zijn schilderij "De vrolijke huishouding" illustreert
+      het Nederlandse spreekwoord "een huishouden van Jan Steen."
+      
+      Aan het einde van de galerie staat het beroemdste schilderij van Nederland:
+      De Nachtwacht van Rembrandt. Dit monumentale werk uit 1642 toont de
+      schutterij van kapitein Frans Banninck Cocq in actie.
+    
+    word_count: 142
+    generation_method: AUTOMATIC
+    generation_model: "whisper-large-v3"
+    generation_confidence: 0.94
+    manual_corrections: true
+    
+    # Provenance
+    generated_by: "OpenAI Whisper"
+    generation_timestamp: "2025-12-01T08:15:00Z"
+    reviewed_by: "Rijksmuseum Digital Team"
+    review_timestamp: "2025-12-02T14:30:00Z"
+    
+    transcript_format: PLAIN_TEXT
+
+# ============================================================================
+# EXAMPLE 3: Video Subtitles (Time-Coded)
+# ============================================================================
+
+video_subtitles:
+  
+  - content_id: "https://nde.nl/ontology/hc/subtitle/nl/rijksmuseum-gallery-honour-en"
+    source_video_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    content_language: "en"
+    
+    subtitle_format: VTT
+    total_cues: 45
+    
+    subtitle_entries:
+      - sequence_number: 1
+        start_time: "00:00:00.000"
+        end_time: "00:00:04.500"
+        text: "Welcome to the Gallery of Honour at the Rijksmuseum."
+        speaker_label: "Curator"
+      
+      - sequence_number: 2
+        start_time: "00:00:04.500"
+        end_time: "00:00:09.200"
+        text: "This iconic corridor is the heart of the museum"
+        speaker_label: "Curator"
+      
+      - sequence_number: 3
+        start_time: "00:00:09.200"
+        end_time: "00:00:14.800"
+        text: "and houses the greatest masterpieces from the Golden Age."
+        speaker_label: "Curator"
+      
+      - sequence_number: 4
+        start_time: "00:00:14.800"
+        end_time: "00:00:20.500"
+        text: "We begin our walk at the entrance, where we are immediately greeted"
+        speaker_label: "Curator"
+      
+      - sequence_number: 5
+        start_time: "00:00:20.500"
+        end_time: "00:00:27.000"
+        text: "by Frans Hals' portrait of Isaac Massa and Beatrix van der Laen."
+        speaker_label: "Curator"
+    
+    is_closed_captions: false
+    is_sdh: false
+    
+    generation_method: HUMAN
+    reviewed_by: "Rijksmuseum Translation Team"
+    review_timestamp: "2023-03-10T16:00:00Z"
+
+# ============================================================================
+# EXAMPLE 4: Scene Annotations (Computer Vision)
+# ============================================================================
+
+video_scene_annotations:
+  
+  - annotation_id: "https://nde.nl/ontology/hc/annotation/scene/rijksmuseum-01"
+    source_video_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    annotation_type: SCENE
+    annotation_motivation: DESCRIBING
+    
+    time_segment:
+      segment_id: "scene-01"
+      start_time: "00:00:00.000"
+      end_time: "00:00:45.000"
+      duration_seconds: 45.0
+    
+    scene_type: ESTABLISHING
+    scene_label: "Gallery Entrance Introduction"
+    scene_description: |
+      Wide shot of the Gallery of Honour entrance. Camera slowly pans 
+      from left to right, revealing the long corridor with paintings
+      on both walls. Natural light streams in from skylights above.
+    
+    detected_elements:
+      - "architectural interior"
+      - "museum gallery"
+      - "natural lighting"
+      - "oil paintings"
+      - "parquet flooring"
+    
+    dominant_colors:
+      - "#8B7355"  # Brown/wood tones
+      - "#F5F5DC"  # Cream walls
+      - "#DAA520"  # Golden frames
+    
+    confidence_score: 0.92
+    detection_model: "google-video-intelligence-v1"
+    detection_timestamp: "2025-12-01T09:00:00Z"
+  
+  - annotation_id: "https://nde.nl/ontology/hc/annotation/scene/rijksmuseum-02"
+    source_video_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    annotation_type: SCENE
+    annotation_motivation: DESCRIBING
+    
+    time_segment:
+      segment_id: "scene-02"
+      start_time: "00:00:45.000"
+      end_time: "00:02:30.000"
+      duration_seconds: 105.0
+    
+    scene_type: CLOSE_UP
+    scene_label: "Frans Hals Portrait Detail"
+    scene_description: |
+      Close-up shots of Frans Hals' portrait painting showing
+      brushwork detail and color palette. Camera moves slowly
+      across canvas surface highlighting texture.
+    
+    detected_elements:
+      - "oil painting"
+      - "portrait"
+      - "17th century costume"
+      - "lace collar"
+      - "dark background"
+    
+    confidence_score: 0.88
+    detection_model: "google-video-intelligence-v1"
+    detection_timestamp: "2025-12-01T09:00:00Z"
+
+# ============================================================================
+# EXAMPLE 5: Object Annotations (Artwork Detection)
+# ============================================================================
+
+video_object_annotations:
+  
+  - annotation_id: "https://nde.nl/ontology/hc/annotation/object/rijksmuseum-night-watch"
+    source_video_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    annotation_type: OBJECT
+    annotation_motivation: IDENTIFYING
+    
+    time_segment:
+      segment_id: "night-watch-segment"
+      start_time: "00:12:30.000"
+      end_time: "00:15:42.000"
+      duration_seconds: 192.0
+    
+    detected_objects:
+      - object_id: "obj-night-watch-001"
+        object_label: "The Night Watch"
+        object_category: "painting"
+        confidence: 0.98
+        bounding_box_x: 120
+        bounding_box_y: 80
+        bounding_box_width: 1680
+        bounding_box_height: 920
+        wikidata_entity: "Q219831"
+        artist: "Rembrandt van Rijn"
+        creation_year: 1642
+      
+      - object_id: "obj-captain-001"
+        object_label: "Captain Frans Banninck Cocq"
+        object_category: "person (depicted)"
+        confidence: 0.91
+        bounding_box_x: 450
+        bounding_box_y: 150
+        bounding_box_width: 380
+        bounding_box_height: 720
+        wikidata_entity: "Q467089"
+    
+    detection_level: FRAME
+    confidence_score: 0.95
+    detection_model: "artwork-recognition-v2"
+    detection_timestamp: "2025-12-01T09:15:00Z"
+
+# ============================================================================
+# EXAMPLE 6: OCR Annotations (Text in Video)
+# ============================================================================
+
+video_ocr_annotations:
+  
+  - annotation_id: "https://nde.nl/ontology/hc/annotation/ocr/rijksmuseum-label-01"
+    source_video_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    annotation_type: OCR
+    annotation_motivation: TRANSCRIBING
+    
+    time_segment:
+      segment_id: "label-segment-01"
+      start_time: "00:05:15.000"
+      end_time: "00:05:22.000"
+      duration_seconds: 7.0
+    
+    detected_text_regions:
+      - region_id: "text-001"
+        detected_text: "Johannes Vermeer"
+        text_language: "nl"
+        text_type: ARTWORK_LABEL
+        bounding_box_x: 100
+        bounding_box_y: 650
+        bounding_box_width: 280
+        bounding_box_height: 35
+        confidence: 0.97
+      
+      - region_id: "text-002"
+        detected_text: "Het melkmeisje, ca. 1660"
+        text_language: "nl"
+        text_type: ARTWORK_LABEL
+        bounding_box_x: 100
+        bounding_box_y: 690
+        bounding_box_width: 320
+        bounding_box_height: 30
+        confidence: 0.94
+      
+      - region_id: "text-003"
+        detected_text: "Olieverf op doek"
+        text_language: "nl"
+        text_type: CAPTION
+        bounding_box_x: 100
+        bounding_box_y: 725
+        bounding_box_width: 200
+        bounding_box_height: 25
+        confidence: 0.91
+    
+    detection_level: FRAME
+    confidence_score: 0.94
+    detection_model: "google-cloud-vision-ocr"
+    detection_timestamp: "2025-12-01T09:20:00Z"
+
+# ============================================================================
+# EXAMPLE 7: Conservation Documentation Video
+# Archive use case with technical annotations
+# ============================================================================
+
+conservation_videos:
+  
+  - post_id: "https://nde.nl/ontology/hc/video/nl/rijksmuseum-night-watch-restoration"
+    platform_type: YOUTUBE
+    platform_id: "UCo2sQFl0mV4K2v6D4d8Z9bQ"
+    platform_post_id: "abcd1234efgh"
+    post_url: "https://www.youtube.com/watch?v=abcd1234efgh"
+    post_title: "Operation Night Watch - Restoration Process Documentary"
+    post_description: |
+      Follow the largest and most detailed art research and conservation project
+      ever undertaken on a single painting. Operation Night Watch uses cutting-edge
+      technology to study and restore Rembrandt's masterpiece.
+    
+    duration: "PT45M30S"
+    definition: uhd
+    aspect_ratio: "16:9"
+    frame_rate: 24.0
+    
+    caption_available: true
+    default_language: "en"
+    default_audio_language: "en"
+    available_caption_languages:
+      - "en"
+      - "nl"
+      - "de"
+      - "ja"
+    
+    view_count: 892341
+    like_count: 28456
+    comment_count: 1523
+    metrics_observed_at: "2025-12-15T10:30:00Z"
+    
+    video_category_id: "28"  # Science & Technology
+    live_broadcast_content: none
+    is_licensed_content: false
+    is_embeddable: true
+    is_made_for_kids: false
+    
+    published_at: "2021-06-22T12:00:00Z"
+
+# ============================================================================
+# EXAMPLE 8: Video Chapters (Navigation Segments)
+# YouTube chapters, virtual tour sections, conservation phases
+# ============================================================================
+
+video_chapters:
+  
+  # Rijksmuseum Virtual Tour - Gallery of Honour chapters
+  - chapter_id: "dQw4w9WgXcQ_chapter_0"
+    chapter_title: "Introduction - Welcome to the Rijksmuseum"
+    chapter_index: 0
+    chapter_start_seconds: 0.0
+    chapter_end_seconds: 45.0
+    chapter_start_time: "PT0S"
+    chapter_end_time: "PT45S"
+    chapter_description: |
+      Opening shot of the Gallery of Honour entrance with curator introduction.
+      Overview of what visitors will see during the virtual tour.
+    auto_generated: false
+    chapter_source: MANUAL
+    chapter_thumbnail_url: "https://i.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg?sqp=-oaymwEjCNACELwBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&rs=AOn4CLBp1"
+  
+  - chapter_id: "dQw4w9WgXcQ_chapter_1"
+    chapter_title: "Frans Hals and Early Portraits"
+    chapter_index: 1
+    chapter_start_seconds: 45.0
+    chapter_end_seconds: 180.0
+    chapter_start_time: "PT45S"
+    chapter_end_time: "PT3M"
+    chapter_description: |
+      Exploration of Frans Hals' portrait of Isaac Massa and Beatrix van der Laen.
+      Discussion of Hals' innovative brushwork techniques.
+    auto_generated: false
+    chapter_source: MANUAL
+    heritage_entities_mentioned:
+      - entity_id: "Q167654"  # Frans Hals
+        entity_type: "Person"
+        entity_label: "Frans Hals"
+      - entity_id: "Q2628540"  # Portrait of Isaac Massa and Beatrix van der Laen
+        entity_type: "Artwork"
+        entity_label: "Portrait of Isaac Massa and Beatrix van der Laen"
+  
+  - chapter_id: "dQw4w9WgXcQ_chapter_2"
+    chapter_title: "Jan Steen's Household Scenes"
+    chapter_index: 2
+    chapter_start_seconds: 180.0
+    chapter_end_seconds: 360.0
+    chapter_start_time: "PT3M"
+    chapter_end_time: "PT6M"
+    chapter_description: |
+      The humorous domestic scenes of Jan Steen and the meaning behind
+      the Dutch expression "een huishouden van Jan Steen."
+    auto_generated: false
+    chapter_source: MANUAL
+    heritage_entities_mentioned:
+      - entity_id: "Q205863"  # Jan Steen
+        entity_type: "Person"
+        entity_label: "Jan Steen"
+  
+  - chapter_id: "dQw4w9WgXcQ_chapter_3"
+    chapter_title: "Vermeer's Masterpieces"
+    chapter_index: 3
+    chapter_start_seconds: 360.0
+    chapter_end_seconds: 600.0
+    chapter_start_time: "PT6M"
+    chapter_end_time: "PT10M"
+    chapter_description: |
+      Close examination of Johannes Vermeer's The Milkmaid and other works.
+      Analysis of Vermeer's distinctive use of light and color.
+    auto_generated: false
+    chapter_source: MANUAL
+    heritage_entities_mentioned:
+      - entity_id: "Q41264"  # Johannes Vermeer
+        entity_type: "Person"
+        entity_label: "Johannes Vermeer"
+      - entity_id: "Q154349"  # The Milkmaid
+        entity_type: "Artwork"
+        entity_label: "Het melkmeisje (The Milkmaid)"
+  
+  - chapter_id: "dQw4w9WgXcQ_chapter_4"
+    chapter_title: "The Night Watch - Rembrandt's Masterpiece"
+    chapter_index: 4
+    chapter_start_seconds: 600.0
+    chapter_end_seconds: 942.0
+    chapter_start_time: "PT10M"
+    chapter_end_time: "PT15M42S"
+    chapter_description: |
+      Culmination of the tour at Rembrandt's iconic Night Watch.
+      Discussion of the painting's history, composition, and restoration.
+    auto_generated: false
+    chapter_source: MANUAL
+    heritage_entities_mentioned:
+      - entity_id: "Q5598"  # Rembrandt
+        entity_type: "Person"
+        entity_label: "Rembrandt van Rijn"
+      - entity_id: "Q219831"  # The Night Watch
+        entity_type: "Artwork"
+        entity_label: "De Nachtwacht (The Night Watch)"
+  
+  # Conservation Documentary - Operation Night Watch chapters
+  - chapter_id: "abcd1234efgh_chapter_0"
+    chapter_title: "Project Overview"
+    chapter_index: 0
+    chapter_start_seconds: 0.0
+    chapter_end_seconds: 300.0
+    chapter_start_time: "PT0S"
+    chapter_end_time: "PT5M"
+    chapter_description: |
+      Introduction to Operation Night Watch, the most extensive research
+      and conservation project ever undertaken on a single painting.
+    auto_generated: false
+    chapter_source: MANUAL
+  
+  - chapter_id: "abcd1234efgh_chapter_1"
+    chapter_title: "Technical Imaging and Analysis"
+    chapter_index: 1
+    chapter_start_seconds: 300.0
+    chapter_end_seconds: 900.0
+    chapter_start_time: "PT5M"
+    chapter_end_time: "PT15M"
+    chapter_description: |
+      Multi-spectral imaging, X-ray analysis, and macro photography
+      revealing hidden layers and underdrawings in the painting.
+    auto_generated: false
+    chapter_source: MANUAL
+    conservation_phase: "DOCUMENTATION"
+  
+  - chapter_id: "abcd1234efgh_chapter_2"
+    chapter_title: "Condition Assessment"
+    chapter_index: 2
+    chapter_start_seconds: 900.0
+    chapter_end_seconds: 1500.0
+    chapter_start_time: "PT15M"
+    chapter_end_time: "PT25M"
+    chapter_description: |
+      Detailed examination of the painting's condition, including
+      craquelure patterns, varnish degradation, and previous restorations.
+    auto_generated: false
+    chapter_source: MANUAL
+    conservation_phase: "ASSESSMENT"
+  
+  - chapter_id: "abcd1234efgh_chapter_3"
+    chapter_title: "Cleaning Process"
+    chapter_index: 3
+    chapter_start_seconds: 1500.0
+    chapter_end_seconds: 2100.0
+    chapter_start_time: "PT25M"
+    chapter_end_time: "PT35M"
+    chapter_description: |
+      The meticulous cleaning process using specialized solvents and
+      techniques to remove centuries of accumulated dirt and varnish.
+    auto_generated: false
+    chapter_source: MANUAL
+    conservation_phase: "TREATMENT"
+  
+  - chapter_id: "abcd1234efgh_chapter_4"
+    chapter_title: "AI-Assisted Reconstruction"
+    chapter_index: 4
+    chapter_start_seconds: 2100.0
+    chapter_end_seconds: 2730.0
+    chapter_start_time: "PT35M"
+    chapter_end_time: "PT45M30S"
+    chapter_description: |
+      How artificial intelligence was used to digitally reconstruct
+      missing portions of the painting that were cut off in 1715.
+    auto_generated: false
+    chapter_source: MANUAL
+    conservation_phase: "DIGITAL_RECONSTRUCTION"
+
+# ============================================================================
+# EXAMPLE 9: Video Chapter Lists (Complete Sets)
+# ============================================================================
+
+video_chapter_lists:
+  
+  # Complete chapter list for Rijksmuseum virtual tour
+  - list_id: "https://nde.nl/ontology/hc/chapterlist/rijksmuseum-gallery-honour"
+    video_id: "dQw4w9WgXcQ"
+    video_url: "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
+    video_title: "The Gallery of Honour - Rijksmuseum Virtual Tour"
+    
+    chapters:
+      - "dQw4w9WgXcQ_chapter_0"
+      - "dQw4w9WgXcQ_chapter_1"
+      - "dQw4w9WgXcQ_chapter_2"
+      - "dQw4w9WgXcQ_chapter_3"
+      - "dQw4w9WgXcQ_chapter_4"
+    
+    total_chapters: 5
+    chapters_source: MANUAL
+    covers_full_video: true
+    video_duration_seconds: 942.0
+    
+    extraction_timestamp: "2025-12-15T14:00:00Z"
+    extraction_method: "YouTube Data API v3"
+  
+  # Complete chapter list for Operation Night Watch documentary
+  - list_id: "https://nde.nl/ontology/hc/chapterlist/operation-night-watch"
+    video_id: "abcd1234efgh"
+    video_url: "https://www.youtube.com/watch?v=abcd1234efgh"
+    video_title: "Operation Night Watch - Restoration Process Documentary"
+    
+    chapters:
+      - "abcd1234efgh_chapter_0"
+      - "abcd1234efgh_chapter_1"
+      - "abcd1234efgh_chapter_2"
+      - "abcd1234efgh_chapter_3"
+      - "abcd1234efgh_chapter_4"
+    
+    total_chapters: 5
+    chapters_source: MANUAL
+    covers_full_video: true
+    video_duration_seconds: 2730.0
+    
+    extraction_timestamp: "2025-12-15T14:00:00Z"
+    extraction_method: "YouTube Data API v3"
+
+# ============================================================================
+# EXAMPLE 10: Video Audio Annotations (Speech, Music, Sound Events)
+# ============================================================================
+
+video_audio_annotations:
+  
+  # Example 1: Curator Interview with Speaker Diarization
+  - annotation_id: "https://nde.nl/ontology/hc/annotation/audio/rijksmuseum-interview-01"
+    source_video_url: "https://www.youtube.com/watch?v=xyz789curator"
+    annotation_type: AUDIO
+    annotation_motivation: TRANSCRIBING
+    
+    # Primary audio characteristics
+    primary_audio_event_type: SPEECH
+    speech_detected: true
+    speech_language: "nl"
+    languages_detected:
+      - "nl"
+      - "en"  # Some English art terminology used
+    
+    # Speaker diarization (who spoke when)
+    diarization_enabled: true
+    speaker_count: 2
+    speaker_labels:
+      - "Dr. Taco Dibbits"
+      - "Interviewer"
+    
+    diarization_segments:
+      - segment_id: "diar-001"
+        diarization_start_seconds: 0.0
+        diarization_end_seconds: 8.5
+        diarization_start_time: "PT0S"
+        diarization_end_time: "PT8.5S"
+        diarization_speaker_id: "spk_001"
+        diarization_speaker_label: "Interviewer"
+        diarization_confidence: 0.94
+        transcript_snippet: "Welkom bij het Rijksmuseum. Vandaag spreken we met de directeur..."
+      
+      - segment_id: "diar-002"
+        diarization_start_seconds: 8.5
+        diarization_end_seconds: 45.0
+        diarization_start_time: "PT8.5S"
+        diarization_end_time: "PT45S"
+        diarization_speaker_id: "spk_002"
+        diarization_speaker_label: "Dr. Taco Dibbits"
+        diarization_confidence: 0.97
+        transcript_snippet: "Dank u wel. Het is een bijzonder moment voor het museum..."
+      
+      - segment_id: "diar-003"
+        diarization_start_seconds: 45.0
+        diarization_end_seconds: 52.0
+        diarization_start_time: "PT45S"
+        diarization_end_time: "PT52S"
+        diarization_speaker_id: "spk_001"
+        diarization_speaker_label: "Interviewer"
+        diarization_confidence: 0.92
+        transcript_snippet: "Kunt u ons meer vertellen over de nieuwe tentoonstelling?"
+      
+      - segment_id: "diar-004"
+        diarization_start_seconds: 52.0
+        diarization_end_seconds: 180.0
+        diarization_start_time: "PT52S"
+        diarization_end_time: "PT3M"
+        diarization_speaker_id: "spk_002"
+        diarization_speaker_label: "Dr. Taco Dibbits"
+        diarization_confidence: 0.96
+        transcript_snippet: "Jazeker. Deze tentoonstelling is uniek omdat we voor het eerst..."
+    
+    # Audio quality metrics
+    audio_quality_score: 0.92
+    snr_db: 28.0
+    has_clipping: false
+    audio_channels: 2
+    sample_rate_hz: 48000
+    
+    # No music in this interview
+    music_detected: false
+    
+    # Detection metadata
+    detection_model: "whisper-large-v3-diarize"
+    detection_timestamp: "2025-12-15T16:00:00Z"
+    confidence_score: 0.94
+  
+  # Example 2: Exhibition Promotional Video with Music
+  - annotation_id: "https://nde.nl/ontology/hc/annotation/audio/vangogh-exhibition-promo"
+    source_video_url: "https://www.youtube.com/watch?v=promo2025vgm"
+    annotation_type: AUDIO
+    annotation_motivation: DESCRIBING
+    
+    # Mixed speech and music
+    primary_audio_event_type: MIXED
+    speech_detected: true
+    music_detected: true
+    
+    speech_language: "en"
+    languages_detected:
+      - "en"
+      - "nl"
+    
+    # Speech segments (voiceover narration)
+    speech_segments:
+      - segment_id: "speech-001"
+        speech_start_seconds: 5.0
+        speech_end_seconds: 25.0
+        speech_start_time: "PT5S"
+        speech_end_time: "PT25S"
+        speaker_id: "narrator"
+        speaker_label: "Voiceover Narrator"
+        speech_type: NARRATION
+        transcript_snippet: "This spring, the Van Gogh Museum presents a groundbreaking exhibition..."
+      
+      - segment_id: "speech-002"
+        speech_start_seconds: 45.0
+        speech_end_seconds: 60.0
+        speech_start_time: "PT45S"
+        speech_end_time: "PT1M"
+        speaker_id: "curator"
+        speaker_label: "Exhibition Curator"
+        speech_type: INTERVIEW
+        transcript_snippet: "Van Gogh's use of color was revolutionary..."
+    
+    # Music segments (background and featured)
+    music_segments:
+      - segment_id: "music-001"
+        music_start_seconds: 0.0
+        music_end_seconds: 120.0
+        music_start_time: "PT0S"
+        music_end_time: "PT2M"
+        music_type: BACKGROUND
+        music_genre: "classical"
+        is_background: true
+        volume_level: "low"
+        music_title: null  # Unknown background track
+      
+      - segment_id: "music-002"
+        music_start_seconds: 90.0
+        music_end_seconds: 115.0
+        music_start_time: "PT1M30S"
+        music_end_time: "PT1M55S"
+        music_type: DRAMATIC
+        music_genre: "orchestral"
+        is_background: false
+        volume_level: "medium"
+        music_description: "Dramatic orchestral swell accompanying visual climax"
+    
+    music_genres_detected:
+      - "classical"
+      - "orchestral"
+    
+    # Audio quality metrics
+    audio_quality_score: 0.88
+    snr_db: 22.0  # Lower due to music mixing
+    audio_channels: 2
+    sample_rate_hz: 48000
+    
+    detection_model: "audio-analysis-v2"
+    detection_timestamp: "2025-12-15T16:30:00Z"
+    confidence_score: 0.86
+  
+  # Example 3: Museum Lecture Recording with Audience Reactions
+  - annotation_id: "https://nde.nl/ontology/hc/annotation/audio/stedelijk-lecture-2024"
+    source_video_url: "https://www.youtube.com/watch?v=lecture2024sted"
+    annotation_type: AUDIO
+    annotation_motivation: TRANSCRIBING
+    
+    primary_audio_event_type: SPEECH
+    speech_detected: true
+    music_detected: false
+    
+    speech_language: "nl"
+    languages_detected:
+      - "nl"
+    
+    # Main lecture content
+    diarization_enabled: true
+    speaker_count: 1
+    speaker_labels:
+      - "Prof. Dr. Beatrix Ruf"
+    
+    diarization_segments:
+      - segment_id: "lecture-001"
+        diarization_start_seconds: 0.0
+        diarization_end_seconds: 1800.0
+        diarization_start_time: "PT0S"
+        diarization_end_time: "PT30M"
+        diarization_speaker_id: "spk_main"
+        diarization_speaker_label: "Prof. Dr. Beatrix Ruf"
+        diarization_confidence: 0.98
+    
+    # Sound events detected (audience reactions)
+    sound_events_detected: true
+    sound_event_types:
+      - APPLAUSE
+      - LAUGHTER
+      - CROWD_NOISE
+    
+    sound_event_segments:
+      - segment_id: "sound-001"
+        sound_start_seconds: 420.0
+        sound_end_seconds: 425.0
+        sound_start_time: "PT7M"
+        sound_end_time: "PT7M5S"
+        sound_event_type: LAUGHTER
+        sound_confidence: 0.89
+        sound_description: "Audience laughter in response to humorous anecdote"
+      
+      - segment_id: "sound-002"
+        sound_start_seconds: 1795.0
+        sound_end_seconds: 1810.0
+        sound_start_time: "PT29M55S"
+        sound_end_time: "PT30M10S"
+        sound_event_type: APPLAUSE
+        sound_confidence: 0.96
+        sound_description: "Audience applause at conclusion of lecture"
+      
+      - segment_id: "sound-003"
+        sound_start_seconds: 1200.0
+        sound_end_seconds: 1203.0
+        sound_start_time: "PT20M"
+        sound_end_time: "PT20M3S"
+        sound_event_type: CROWD_NOISE
+        sound_confidence: 0.72
+        sound_description: "Brief audience murmuring during slide transition"
+    
+    # Audio quality metrics (live recording)
+    audio_quality_score: 0.78
+    snr_db: 18.0  # Lower due to room acoustics
+    has_reverb: true
+    audio_channels: 2
+    sample_rate_hz: 44100
+    
+    detection_model: "audio-event-detector-v1"
+    detection_timestamp: "2025-12-15T17:00:00Z"
+    confidence_score: 0.82
+
+# ============================================================================
+# PROVENANCE METADATA
+# ============================================================================
+
+provenance:
+  data_source: EXAMPLE_INSTANCES
+  data_tier: TIER_4_INFERRED
+  extraction_date: "2025-12-16T00:00:00Z"
+  extraction_method: "Manual example creation for schema documentation"
+  confidence_score: 1.0
+  notes: |
+    Example instances demonstrating video content modeling capabilities.
+    Based on real heritage institution video patterns but with synthetic data.
+    
+    Classes demonstrated:
+    - VideoPost (with VideoComment)
+    - VideoTranscript
+    - VideoSubtitle
+    - VideoSceneAnnotation
+    - VideoObjectAnnotation
+    - VideoOCRAnnotation
+    - VideoChapter (NEW in v0.9.10)
+    - VideoChapterList (NEW in v0.9.10)
+    - VideoAudioAnnotation (NEW in v0.9.10)
+      - SpeechSegment
+      - DiarizationSegment
+      - MusicSegment
+      - SoundEventSegment
+    
+    Heritage use cases covered:
+    - Virtual museum tours
+    - Conservation documentation
+    - Artwork recognition
+    - Museum label OCR
+    - Video chapter navigation (NEW)
+    - Speaker diarization in interviews (NEW)
+    - Music detection in promotional content (NEW)
+    - Audience reaction detection in lectures (NEW)
+    
+    Enumerations demonstrated:
+    - ChapterSourceEnum: MANUAL, AUTO_GENERATED, YOUTUBE_API
+    - AudioEventTypeEnum: SPEECH, MUSIC, MIXED, AMBIENT, SILENCE
+    - SoundEventTypeEnum: APPLAUSE, LAUGHTER, CROWD_NOISE
+    - MusicTypeEnum: BACKGROUND, FOREGROUND, DRAMATIC
+    
+    Heritage entities referenced (Wikidata):
+    - Q5598 (Rembrandt van Rijn)
+    - Q41264 (Johannes Vermeer)
+    - Q167654 (Frans Hals)
+    - Q205863 (Jan Steen)
+    - Q219831 (The Night Watch)
+    - Q154349 (The Milkmaid)
+    - Q2628540 (Portrait of Isaac Massa and Beatrix van der Laen)
diff --git a/schemas/20251121/linkml/modules/classes/VideoAnnotation.yaml b/schemas/20251121/linkml/modules/classes/VideoAnnotation.yaml
new file mode 100644
index 0000000000..15229d97d7
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoAnnotation.yaml
@@ -0,0 +1,542 @@
+# Video Annotation Class
+# Abstract base class for computer vision and multimodal video annotations
+#
+# Part of Heritage Custodian Ontology v0.9.5
+#
+# HIERARCHY:
+# E73_Information_Object (CIDOC-CRM)
+#     │
+#     └── VideoTextContent (abstract base)
+#             │
+#             ├── VideoTranscript (audio-derived)
+#             │       │
+#             │       └── VideoSubtitle (time-coded captions)
+#             │
+#             └── VideoAnnotation (this class - ABSTRACT)
+#                     │
+#                     ├── VideoSceneAnnotation (scene/shot detection)
+#                     ├── VideoObjectAnnotation (object/face/logo detection)
+#                     └── VideoOCRAnnotation (text-in-video extraction)
+#
+# DESIGN RATIONALE:
+# VideoAnnotation is the abstract parent for all annotations derived from
+# visual analysis of video content. Unlike VideoTranscript (audio-derived),
+# these annotations come from computer vision, multimodal AI, or manual
+# visual analysis.
+#
+# Key differences from transcript branch:
+# - Frame-based rather than audio-based analysis
+# - Spatial information (bounding boxes, regions)
+# - Detection thresholds and frame sampling
+# - Multiple detection types per segment
+#
+# ONTOLOGY ALIGNMENT:
+# - W3C Web Annotation (oa:Annotation) for annotation structure
+# - CIDOC-CRM E13_Attribute_Assignment for attribution activities
+# - IIIF Presentation API for spatial/temporal selectors
+
+id: https://nde.nl/ontology/hc/class/VideoAnnotation
+name: video_annotation_class
+title: Video Annotation Class
+
+imports:
+  - linkml:types
+  - ./VideoTextContent
+  - ./VideoTimeSegment
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  prov: http://www.w3.org/ns/prov#
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  oa: http://www.w3.org/ns/oa#
+  as: https://www.w3.org/ns/activitystreams#
+
+default_prefix: hc
+
+classes:
+
+  VideoAnnotation:
+    is_a: VideoTextContent
+    class_uri: oa:Annotation
+    abstract: true
+    description: |
+      Abstract base class for computer vision and multimodal video annotations.
+      
+      **DEFINITION**:
+      
+      VideoAnnotation represents structured information derived from visual
+      analysis of video content. This includes:
+      
+      | Subclass | Analysis Type | Output |
+      |----------|---------------|--------|
+      | VideoSceneAnnotation | Shot/scene detection | Scene boundaries, types |
+      | VideoObjectAnnotation | Object detection | Objects, faces, logos |
+      | VideoOCRAnnotation | Text extraction | On-screen text (OCR) |
+      
+      **RELATIONSHIP TO W3C WEB ANNOTATION**:
+      
+      VideoAnnotation aligns with the W3C Web Annotation Data Model:
+      
+      ```turtle
+      :annotation a oa:Annotation ;
+          oa:hasBody :detection_result ;
+          oa:hasTarget [
+              oa:hasSource :video ;
+              oa:hasSelector [
+                  a oa:FragmentSelector ;
+                  dcterms:conformsTo <http://www.w3.org/TR/media-frags/> ;
+                  rdf:value "t=30,35"
+              ]
+          ] ;
+          oa:motivatedBy oa:classifying .
+      ```
+      
+      **FRAME-BASED ANALYSIS**:
+      
+      Unlike audio transcription (continuous stream), video annotation is
+      typically frame-based:
+      
+      - `frame_sample_rate`: Frames analyzed per second (e.g., 1 fps, 5 fps)
+      - `total_frames_analyzed`: Total frames processed
+      - Higher sample rates = more detections but higher compute cost
+      
+      **DETECTION THRESHOLDS**:
+      
+      CV models output confidence scores. Thresholds filter noise:
+      
+      | Threshold | Use Case |
+      |-----------|----------|
+      | 0.9+ | High precision, production display |
+      | 0.7-0.9 | Balanced, general use |
+      | 0.5-0.7 | High recall, research/review |
+      | < 0.5 | Raw output, needs filtering |
+      
+      **MODEL ARCHITECTURE TRACKING**:
+      
+      Different model architectures have different characteristics:
+      
+      | Architecture | Examples | Strengths |
+      |--------------|----------|-----------|
+      | CNN | ResNet, VGG | Fast inference, good for objects |
+      | Transformer | ViT, CLIP | Better context, multimodal |
+      | Hybrid | DETR, Swin | Balance of speed and accuracy |
+      
+      **HERITAGE INSTITUTION CONTEXT**:
+      
+      Video annotations enable:
+      - **Discovery**: Find videos containing specific objects/artworks
+      - **Accessibility**: Scene descriptions for visually impaired
+      - **Research**: Analyze visual content at scale
+      - **Preservation**: Document visual content as text
+      - **Linking**: Connect detected artworks to collection records
+      
+      **CIDOC-CRM E13_Attribute_Assignment**:
+      
+      Annotations are attribute assignments - asserting properties about
+      video segments. The CV model or human annotator is the assigning agent.
+    
+    exact_mappings:
+      - oa:Annotation
+    
+    close_mappings:
+      - crm:E13_Attribute_Assignment
+    
+    related_mappings:
+      - as:Activity
+      - schema:ClaimReview
+    
+    slots:
+      # Annotation structure
+      - annotation_type
+      - annotation_segments
+      
+      # Detection parameters
+      - detection_threshold
+      - detection_count
+      
+      # Frame analysis
+      - frame_sample_rate
+      - total_frames_analyzed
+      - keyframe_extraction
+      
+      # Model details
+      - model_architecture
+      - model_task
+      
+      # Spatial information
+      - includes_bounding_boxes
+      - includes_segmentation_masks
+      
+      # Annotation motivation
+      - annotation_motivation
+    
+    slot_usage:
+      annotation_type:
+        slot_uri: dcterms:type
+        description: |
+          High-level type classification for this annotation.
+          
+          Dublin Core: type for resource categorization.
+          
+          **Standard Types**:
+          - SCENE_DETECTION: Shot/scene boundary detection
+          - OBJECT_DETECTION: Object, face, logo detection
+          - OCR: Text-in-video extraction
+          - ACTION_RECOGNITION: Human action detection
+          - SEMANTIC_SEGMENTATION: Pixel-level classification
+          - MULTIMODAL: Combined audio+visual analysis
+        range: AnnotationTypeEnum
+        required: true
+        examples:
+          - value: "OBJECT_DETECTION"
+            description: "Object and face detection annotation"
+      
+      annotation_segments:
+        slot_uri: oa:hasBody
+        description: |
+          List of temporal segments with detection results.
+          
+          Web Annotation: hasBody links annotation to its content.
+          
+          Each segment contains:
+          - Time boundaries (start/end)
+          - Detection text/description
+          - Per-segment confidence
+          
+          Reuses VideoTimeSegment for consistent temporal modeling.
+        range: VideoTimeSegment
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 30.0, end_seconds: 35.0, segment_text: 'Night Watch painting visible'}]"
+            description: "Object detection segment"
+      
+      detection_threshold:
+        slot_uri: hc:detectionThreshold
+        description: |
+          Minimum confidence threshold used for detection filtering.
+          
+          Detections below this threshold were excluded from results.
+          
+          Range: 0.0 to 1.0
+          
+          **Common Values**:
+          - 0.5: Standard threshold (balanced)
+          - 0.7: High precision mode
+          - 0.3: High recall mode (includes uncertain detections)
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+        examples:
+          - value: 0.5
+            description: "Standard detection threshold"
+      
+      detection_count:
+        slot_uri: hc:detectionCount
+        description: |
+          Total number of detections across all analyzed frames.
+          
+          Useful for:
+          - Understanding annotation density
+          - Quality assessment
+          - Performance metrics
+          
+          Note: May be higher than annotation_segments count if segments
+          are aggregated or filtered.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 342
+            description: "342 total detections found"
+      
+      frame_sample_rate:
+        slot_uri: hc:frameSampleRate
+        description: |
+          Number of frames analyzed per second of video.
+          
+          **Common Values**:
+          - 1.0: One frame per second (efficient)
+          - 5.0: Five frames per second (balanced)
+          - 30.0: Every frame at 30fps (thorough but expensive)
+          - 0.1: One frame every 10 seconds (overview only)
+          
+          Higher rates catch more content but increase compute cost.
+        range: float
+        required: false
+        minimum_value: 0.0
+        examples:
+          - value: 1.0
+            description: "Analyzed 1 frame per second"
+      
+      total_frames_analyzed:
+        slot_uri: hc:totalFramesAnalyzed
+        description: |
+          Total number of video frames that were analyzed.
+          
+          Calculated as: video_duration_seconds × frame_sample_rate
+          
+          Useful for:
+          - Understanding analysis coverage
+          - Cost estimation
+          - Reproducibility
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 1800
+            description: "Analyzed 1,800 frames (30 min video at 1 fps)"
+      
+      keyframe_extraction:
+        slot_uri: hc:keyframeExtraction
+        description: |
+          Whether keyframe extraction was used instead of uniform sampling.
+          
+          **Keyframe extraction** selects visually distinct frames
+          (scene changes, significant motion) rather than uniform intervals.
+          
+          - true: Keyframes extracted (variable frame selection)
+          - false: Uniform sampling at frame_sample_rate
+          
+          Keyframe extraction is more efficient but may miss content
+          between scene changes.
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Used keyframe extraction"
+      
+      model_architecture:
+        slot_uri: hc:modelArchitecture
+        description: |
+          Architecture type of the CV/ML model used.
+          
+          **Common Architectures**:
+          - CNN: Convolutional Neural Network (ResNet, VGG, EfficientNet)
+          - Transformer: Vision Transformer (ViT, Swin, CLIP)
+          - Hybrid: Combined architectures (DETR, ConvNeXt)
+          - RNN: Recurrent (for temporal analysis)
+          - GAN: Generative (for reconstruction tasks)
+          
+          Useful for understanding model capabilities and limitations.
+        range: string
+        required: false
+        examples:
+          - value: "Transformer"
+            description: "Vision Transformer architecture"
+          - value: "CNN"
+            description: "Convolutional Neural Network"
+      
+      model_task:
+        slot_uri: hc:modelTask
+        description: |
+          Specific task the model was trained for.
+          
+          **Common Tasks**:
+          - classification: Image/frame classification
+          - detection: Object detection with bounding boxes
+          - segmentation: Pixel-level classification
+          - captioning: Image/video captioning
+          - embedding: Feature extraction for similarity
+          
+          A model's task determines its output format.
+        range: string
+        required: false
+        examples:
+          - value: "detection"
+            description: "Object detection task"
+          - value: "captioning"
+            description: "Video captioning task"
+      
+      includes_bounding_boxes:
+        slot_uri: hc:includesBoundingBoxes
+        description: |
+          Whether annotation includes spatial bounding box coordinates.
+          
+          Bounding boxes define rectangular regions in frames where
+          objects/faces/text were detected.
+          
+          Format typically: [x, y, width, height] or [x1, y1, x2, y2]
+          
+          - true: Spatial coordinates available in segment data
+          - false: Only temporal information (no spatial)
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Includes bounding box coordinates"
+      
+      includes_segmentation_masks:
+        slot_uri: hc:includesSegmentationMasks
+        description: |
+          Whether annotation includes pixel-level segmentation masks.
+          
+          Segmentation masks provide precise object boundaries
+          (more detailed than bounding boxes).
+          
+          - true: Pixel masks available (typically as separate files)
+          - false: No segmentation data
+          
+          Masks are memory-intensive; often stored externally.
+        range: boolean
+        required: false
+        examples:
+          - value: false
+            description: "No segmentation masks included"
+      
+      annotation_motivation:
+        slot_uri: oa:motivatedBy
+        description: |
+          The motivation or purpose for creating this annotation.
+          
+          Web Annotation: motivatedBy describes why annotation was created.
+          
+          **Standard Motivations** (from W3C Web Annotation):
+          - classifying: Categorizing content
+          - describing: Adding description
+          - identifying: Identifying depicted things
+          - tagging: Adding tags/keywords
+          - linking: Linking to external resources
+          
+          **Heritage-Specific**:
+          - accessibility: For accessibility services
+          - discovery: For search/discovery
+          - preservation: For digital preservation
+        range: AnnotationMotivationEnum
+        required: false
+        examples:
+          - value: "CLASSIFYING"
+            description: "Annotation for classification purposes"
+    
+    comments:
+      - "Abstract base for all CV/multimodal video annotations"
+      - "Extends VideoTextContent with frame-based analysis parameters"
+      - "W3C Web Annotation compatible structure"
+      - "Supports both temporal and spatial annotation"
+      - "Tracks detection thresholds and model architecture"
+    
+    see_also:
+      - "https://www.w3.org/TR/annotation-model/"
+      - "http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment"
+      - "https://iiif.io/api/presentation/3.0/"
+
+# ============================================================================
+# Enumerations
+# ============================================================================
+
+enums:
+  
+  AnnotationTypeEnum:
+    description: |
+      Types of video annotation based on analysis method.
+    permissible_values:
+      SCENE_DETECTION:
+        description: Shot and scene boundary detection
+      OBJECT_DETECTION:
+        description: Object, face, and logo detection
+      OCR:
+        description: Optical character recognition (text-in-video)
+      ACTION_RECOGNITION:
+        description: Human action and activity detection
+      SEMANTIC_SEGMENTATION:
+        description: Pixel-level semantic classification
+      POSE_ESTIMATION:
+        description: Human body pose detection
+      EMOTION_RECOGNITION:
+        description: Facial emotion/expression analysis
+      MULTIMODAL:
+        description: Combined audio-visual analysis
+      CAPTIONING:
+        description: Automated video captioning/description
+      CUSTOM:
+        description: Custom annotation type
+  
+  AnnotationMotivationEnum:
+    description: |
+      Motivation for creating annotation (W3C Web Annotation aligned).
+    permissible_values:
+      CLASSIFYING:
+        description: Categorizing or classifying content
+        meaning: oa:classifying
+      DESCRIBING:
+        description: Adding descriptive information
+        meaning: oa:describing
+      IDENTIFYING:
+        description: Identifying depicted entities
+        meaning: oa:identifying
+      TAGGING:
+        description: Adding tags or keywords
+        meaning: oa:tagging
+      LINKING:
+        description: Linking to external resources
+        meaning: oa:linking
+      COMMENTING:
+        description: Adding commentary
+        meaning: oa:commenting
+      ACCESSIBILITY:
+        description: Providing accessibility support
+      DISCOVERY:
+        description: Enabling search and discovery
+      PRESERVATION:
+        description: Supporting digital preservation
+      RESEARCH:
+        description: Supporting research and analysis
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  annotation_type:
+    description: High-level type of video annotation
+    range: AnnotationTypeEnum
+  
+  annotation_segments:
+    description: List of temporal segments with detection results
+    range: VideoTimeSegment
+    multivalued: true
+  
+  detection_threshold:
+    description: Minimum confidence threshold for detection filtering
+    range: float
+  
+  detection_count:
+    description: Total number of detections found
+    range: integer
+  
+  frame_sample_rate:
+    description: Frames analyzed per second of video
+    range: float
+  
+  total_frames_analyzed:
+    description: Total number of frames analyzed
+    range: integer
+  
+  keyframe_extraction:
+    description: Whether keyframe extraction was used
+    range: boolean
+  
+  model_architecture:
+    description: Architecture type of CV/ML model (CNN, Transformer, etc.)
+    range: string
+  
+  model_task:
+    description: Specific task model was trained for
+    range: string
+  
+  includes_bounding_boxes:
+    description: Whether annotation includes spatial bounding boxes
+    range: boolean
+  
+  includes_segmentation_masks:
+    description: Whether annotation includes pixel segmentation masks
+    range: boolean
+  
+  annotation_motivation:
+    description: Motivation for creating annotation (W3C Web Annotation)
+    range: AnnotationMotivationEnum
diff --git a/schemas/20251121/linkml/modules/classes/VideoAnnotationTypes.yaml b/schemas/20251121/linkml/modules/classes/VideoAnnotationTypes.yaml
new file mode 100644
index 0000000000..4c20ad7f1e
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoAnnotationTypes.yaml
@@ -0,0 +1,1312 @@
+# Video Annotation Types
+# Concrete subclasses for scene, object, and OCR video annotations
+#
+# Part of Heritage Custodian Ontology v0.9.5
+#
+# HIERARCHY:
+# VideoAnnotation (abstract base)
+#     │
+#     ├── VideoSceneAnnotation (this file)
+#     │       - Shot/scene boundary detection
+#     │       - Scene classification (interior, exterior, etc.)
+#     │       - Transition detection (cut, fade, dissolve)
+#     │
+#     ├── VideoObjectAnnotation (this file)
+#     │       - Object detection (paintings, artifacts, etc.)
+#     │       - Face detection and recognition
+#     │       - Logo and landmark detection
+#     │
+#     └── VideoOCRAnnotation (this file)
+#             - Text-in-video extraction
+#             - Title cards, captions, signs
+#             - Document and handwriting recognition
+#
+# HERITAGE INSTITUTION CONTEXT:
+# These annotation types enable rich discovery and accessibility:
+# - Find videos showing specific artworks or artifacts
+# - Identify speakers and staff members in videos
+# - Extract and index on-screen text
+# - Navigate videos by scene or content type
+
+id: https://nde.nl/ontology/hc/class/VideoAnnotationTypes
+name: video_annotation_types
+title: Video Annotation Types
+
+imports:
+  - linkml:types
+  - ./VideoAnnotation
+  - ./VideoTimeSegment
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  oa: http://www.w3.org/ns/oa#
+
+default_prefix: hc
+
+# ============================================================================
+# Classes
+# ============================================================================
+
+classes:
+
+  # ==========================================================================
+  # VideoSceneAnnotation - Shot/Scene Detection
+  # ==========================================================================
+  
+  VideoSceneAnnotation:
+    is_a: VideoAnnotation
+    class_uri: hc:VideoSceneAnnotation
+    abstract: false
+    description: |
+      Annotation for video scene and shot boundary detection.
+      
+      **DEFINITION**:
+      
+      VideoSceneAnnotation captures the temporal structure of video content
+      by identifying shot boundaries, scene changes, and transitions.
+      
+      **TERMINOLOGY**:
+      
+      | Term | Definition |
+      |------|------------|
+      | **Shot** | Continuous footage from a single camera take |
+      | **Scene** | Semantic unit (may contain multiple shots) |
+      | **Transition** | Visual effect between shots (cut, fade, dissolve) |
+      
+      **SHOT vs SCENE**:
+      
+      - **Shot detection**: Technical boundary (camera edit points)
+      - **Scene detection**: Semantic boundary (content/location change)
+      
+      This class supports both levels of granularity via `detection_level`.
+      
+      **DETECTION METHODS**:
+      
+      | Method | Approach | Accuracy |
+      |--------|----------|----------|
+      | Histogram diff | Color histogram changes | Good for cuts |
+      | CNN-based | Learned visual features | Best for all types |
+      | Motion-based | Optical flow analysis | Good for dissolves |
+      | Audio-visual | Combined modalities | Best for scenes |
+      
+      **TRANSITION TYPES**:
+      
+      | Type | Visual Effect |
+      |------|---------------|
+      | CUT | Instantaneous change (most common) |
+      | FADE_IN | Gradual appearance from black |
+      | FADE_OUT | Gradual disappearance to black |
+      | DISSOLVE | One shot blends into next |
+      | WIPE | Geometric transition effect |
+      
+      **HERITAGE USE CASES**:
+      
+      - **Video navigation**: Jump to specific scenes/chapters
+      - **Highlight extraction**: Key scenes for previews
+      - **Content analysis**: Understanding video structure
+      - **Preservation**: Document editing decisions
+      
+      **SCENE TYPE CLASSIFICATION**:
+      
+      Heritage videos often contain predictable scene types:
+      - Exhibition halls and galleries
+      - Close-ups of artworks/artifacts
+      - Interviews with curators/experts
+      - Exterior establishing shots
+      - Archival footage or photographs
+    
+    exact_mappings:
+      - hc:VideoSceneAnnotation
+    
+    close_mappings:
+      - schema:Clip
+    
+    slots:
+      # Scene structure
+      - scene_segments
+      - scene_count
+      - average_scene_duration_seconds
+      
+      # Detection granularity
+      - detection_level
+      
+      # Scene classification
+      - scene_types_detected
+      - transition_types_detected
+      
+      # Statistics
+      - cut_count
+      - fade_count
+      - dissolve_count
+    
+    slot_usage:
+      scene_segments:
+        slot_uri: hc:sceneSegments
+        description: |
+          List of detected scene/shot boundaries with metadata.
+          
+          Each segment represents one scene or shot:
+          - `start_seconds` / `end_seconds`: Scene boundaries
+          - `segment_text`: Scene description or type label
+          - `confidence`: Detection confidence
+          
+          Ordered chronologically by start_seconds.
+        range: VideoTimeSegment
+        multivalued: true
+        required: true
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 0.0, end_seconds: 15.5, segment_text: 'Opening titles'}]"
+            description: "Scene segment with description"
+      
+      scene_count:
+        slot_uri: hc:sceneCount
+        description: |
+          Total number of scenes/shots detected in the video.
+          
+          Should equal length of scene_segments array.
+        range: integer
+        required: true
+        minimum_value: 1
+        examples:
+          - value: 47
+            description: "47 scenes detected"
+      
+      average_scene_duration_seconds:
+        slot_uri: hc:averageSceneDuration
+        description: |
+          Average duration of scenes in seconds.
+          
+          Calculated as: video_duration / scene_count
+          
+          Useful for understanding video pacing:
+          - < 3s: Fast-paced, music video style
+          - 3-10s: Documentary/educational
+          - > 10s: Slow-paced, interview-heavy
+        range: float
+        required: false
+        minimum_value: 0.0
+        examples:
+          - value: 8.5
+            description: "Average scene is 8.5 seconds"
+      
+      detection_level:
+        slot_uri: hc:detectionLevel
+        description: |
+          Granularity of detection: shots or scenes.
+          
+          - SHOT: Technical camera edit boundaries
+          - SCENE: Semantic content/location boundaries
+          - BOTH: Both shot and scene detection performed
+        range: DetectionLevelEnum
+        required: true
+        examples:
+          - value: "SCENE"
+            description: "Semantic scene detection"
+      
+      scene_types_detected:
+        slot_uri: hc:sceneTypesDetected
+        description: |
+          List of scene type labels found in the video.
+          
+          **Common Heritage Scene Types**:
+          - INTERIOR: Indoor shots
+          - EXTERIOR: Outdoor shots
+          - CLOSEUP: Detail shots of objects
+          - INTERVIEW: Talking head / interview
+          - ARCHIVAL: Historical footage/photos
+          - TITLE_CARD: Text overlays
+          - B_ROLL: Supplementary footage
+        range: SceneTypeEnum
+        multivalued: true
+        required: false
+        examples:
+          - value: "[INTERIOR, CLOSEUP, INTERVIEW]"
+            description: "Scene types found in video"
+      
+      transition_types_detected:
+        slot_uri: hc:transitionTypesDetected
+        description: |
+          Types of transitions detected between scenes.
+          
+          Most heritage videos use simple cuts; complex
+          transitions may indicate professional production.
+        range: TransitionTypeEnum
+        multivalued: true
+        required: false
+        examples:
+          - value: "[CUT, FADE_IN, FADE_OUT]"
+            description: "Transitions found in video"
+      
+      cut_count:
+        slot_uri: hc:cutCount
+        description: |
+          Number of hard cuts (instantaneous transitions).
+          
+          Cuts are the most common transition type.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 42
+            description: "42 cuts detected"
+      
+      fade_count:
+        slot_uri: hc:fadeCount
+        description: |
+          Number of fade transitions (fade in + fade out).
+          
+          Fades often indicate section boundaries or time passage.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 5
+            description: "5 fades detected"
+      
+      dissolve_count:
+        slot_uri: hc:dissolveCount
+        description: |
+          Number of dissolve/crossfade transitions.
+          
+          Dissolves create smooth blending between scenes.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 3
+            description: "3 dissolves detected"
+    
+    comments:
+      - "Scene and shot boundary detection"
+      - "Supports both technical (shot) and semantic (scene) analysis"
+      - "Transition type classification"
+      - "Scene type labeling for heritage content"
+    
+    see_also:
+      - "https://en.wikipedia.org/wiki/Shot_(filmmaking)"
+
+  # ==========================================================================
+  # VideoObjectAnnotation - Object/Face/Logo Detection
+  # ==========================================================================
+  
+  VideoObjectAnnotation:
+    is_a: VideoAnnotation
+    class_uri: hc:VideoObjectAnnotation
+    abstract: false
+    description: |
+      Annotation for object, face, and entity detection in video.
+      
+      **DEFINITION**:
+      
+      VideoObjectAnnotation captures visual entities detected in video frames:
+      
+      | Detection Type | Examples |
+      |----------------|----------|
+      | Objects | Paintings, sculptures, artifacts, furniture |
+      | Faces | People, staff, visitors |
+      | Logos | Institution logos, brand marks |
+      | Landmarks | Buildings, monuments, locations |
+      | Text regions | Signs, labels (see VideoOCRAnnotation for text extraction) |
+      
+      **OBJECT DETECTION vs CLASSIFICATION**:
+      
+      - **Classification**: What is in the frame? (labels only)
+      - **Detection**: What + where? (labels + bounding boxes)
+      - **Segmentation**: What + precise boundary? (pixel masks)
+      
+      This class supports all three via inherited flags.
+      
+      **HERITAGE-SPECIFIC OBJECT CLASSES**:
+      
+      | Category | Objects |
+      |----------|---------|
+      | **Art** | Painting, sculpture, drawing, print, photograph |
+      | **Artifacts** | Pottery, jewelry, tools, textiles, furniture |
+      | **Documents** | Books, manuscripts, letters, maps |
+      | **Architecture** | Columns, arches, facades, interiors |
+      | **Natural** | Specimens, fossils, botanical samples |
+      
+      **FACE DETECTION AND RECOGNITION**:
+      
+      Two distinct capabilities:
+      - **Detection**: Locate faces (bounding boxes)
+      - **Recognition**: Identify who (requires reference database)
+      
+      Heritage use cases:
+      - Identify curators, directors, experts in videos
+      - Find videos featuring specific people
+      - Accessibility: Announce speaker changes
+      
+      **LINKING TO COLLECTION RECORDS**:
+      
+      Detected objects can be linked to collection database:
+      
+      ```yaml
+      detected_objects:
+        - label: "The Night Watch"
+          wikidata_id: Q219831
+          collection_id: "SK-C-5"  # Rijksmuseum ID
+          confidence: 0.95
+      ```
+      
+      **BOUNDING BOX FORMAT**:
+      
+      Coordinates are normalized (0.0-1.0) relative to frame dimensions:
+      - `x`: Left edge (0.0 = left, 1.0 = right)
+      - `y`: Top edge (0.0 = top, 1.0 = bottom)
+      - `width`: Box width as fraction of frame width
+      - `height`: Box height as fraction of frame height
+    
+    exact_mappings:
+      - hc:VideoObjectAnnotation
+    
+    close_mappings:
+      - crm:E1_CRM_Entity
+    
+    related_mappings:
+      - schema:ImageObject
+    
+    slots:
+      # Detected entities
+      - detected_objects
+      - detected_faces
+      - detected_logos
+      - detected_landmarks
+      
+      # Detection statistics
+      - unique_object_count
+      - unique_face_count
+      - object_classes_detected
+      
+      # Tracking
+      - includes_object_tracking
+      - tracking_ids_assigned
+      
+      # Linking
+      - linked_to_collection
+    
+    slot_usage:
+      detected_objects:
+        slot_uri: hc:detectedObjects
+        description: |
+          List of detected objects with labels and locations.
+          
+          Each detection includes:
+          - Object class/label
+          - Confidence score
+          - Temporal segment (when visible)
+          - Bounding box (if includes_bounding_boxes=true)
+          
+          For heritage: paintings, artifacts, specimens, etc.
+        range: DetectedObject
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{label: 'painting', confidence: 0.92, segment: {...}}]"
+            description: "Detected painting object"
+      
+      detected_faces:
+        slot_uri: hc:detectedFaces
+        description: |
+          List of detected faces with optional identity.
+          
+          Each detection includes:
+          - Face bounding box and confidence
+          - Temporal segment (when visible)
+          - Person identity (if recognized)
+          - Facial landmarks (if extracted)
+        range: DetectedFace
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{confidence: 0.88, person_id: 'curator_001'}]"
+            description: "Detected and identified face"
+      
+      detected_logos:
+        slot_uri: hc:detectedLogos
+        description: |
+          List of detected logos or brand marks.
+          
+          Heritage use cases:
+          - Institution logos
+          - Sponsor logos
+          - Historical brand marks on artifacts
+        range: DetectedLogo
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{label: 'Rijksmuseum logo', confidence: 0.95}]"
+            description: "Detected institution logo"
+      
+      detected_landmarks:
+        slot_uri: hc:detectedLandmarks
+        description: |
+          List of detected landmarks or buildings.
+          
+          Uses landmark recognition to identify:
+          - Famous buildings and monuments
+          - Museum facades
+          - Heritage sites
+        range: DetectedLandmark
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{label: 'Rijksmuseum building', wikidata_id: 'Q190804'}]"
+            description: "Detected landmark with Wikidata link"
+      
+      unique_object_count:
+        slot_uri: hc:uniqueObjectCount
+        description: |
+          Number of unique objects detected (deduplicated).
+          
+          Same object appearing in multiple frames counts once.
+          Requires object tracking or deduplication.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 15
+            description: "15 unique objects identified"
+      
+      unique_face_count:
+        slot_uri: hc:uniqueFaceCount
+        description: |
+          Number of unique faces detected (deduplicated).
+          
+          Same person appearing multiple times counts once.
+          Requires face clustering or recognition.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 3
+            description: "3 unique people identified"
+      
+      object_classes_detected:
+        slot_uri: hc:objectClassesDetected
+        description: |
+          List of unique object class labels detected.
+          
+          Vocabulary depends on model training:
+          - COCO: 80 common object categories
+          - ImageNet: 1000 categories
+          - Custom: Heritage-specific categories
+        range: string
+        multivalued: true
+        required: false
+        examples:
+          - value: "[painting, person, sculpture, book]"
+            description: "Object classes found in video"
+      
+      includes_object_tracking:
+        slot_uri: hc:includesObjectTracking
+        description: |
+          Whether objects are tracked across frames.
+          
+          Tracking maintains object identity over time:
+          - true: Same object has consistent ID across frames
+          - false: Independent per-frame detections
+          
+          Tracking enables counting unique objects and
+          understanding object movement/presence.
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Objects tracked across frames"
+      
+      tracking_ids_assigned:
+        slot_uri: hc:trackingIdsAssigned
+        description: |
+          Number of unique tracking IDs assigned.
+          
+          Each tracked entity gets a unique ID maintained
+          across its visible duration.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 23
+            description: "23 unique tracking IDs assigned"
+      
+      linked_to_collection:
+        slot_uri: hc:linkedToCollection
+        description: |
+          Whether detected objects are linked to collection database.
+          
+          When true, detected objects have collection_id or
+          wikidata_id linking them to authoritative records.
+          
+          Enables:
+          - "Find videos showing artwork X"
+          - Rich metadata for detected items
+          - Cross-referencing with collection management
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Objects linked to collection records"
+    
+    comments:
+      - "Object, face, and logo detection in video"
+      - "Supports bounding boxes and tracking"
+      - "Heritage-specific object vocabulary"
+      - "Enables linking to collection database"
+    
+    see_also:
+      - "https://cocodataset.org/"
+      - "https://iiif.io/api/presentation/3.0/"
+
+  # ==========================================================================
+  # VideoOCRAnnotation - Text-in-Video Extraction
+  # ==========================================================================
+  
+  VideoOCRAnnotation:
+    is_a: VideoAnnotation
+    class_uri: hc:VideoOCRAnnotation
+    abstract: false
+    description: |
+      Annotation for extracting text visible in video frames (OCR).
+      
+      **DEFINITION**:
+      
+      VideoOCRAnnotation captures text that appears on-screen in video:
+      
+      | Text Type | Examples |
+      |-----------|----------|
+      | **Titles** | Opening titles, chapter headings |
+      | **Captions** | Burned-in subtitles, name cards |
+      | **Signs** | Museum signage, room labels |
+      | **Documents** | Letters, manuscripts, books shown |
+      | **Labels** | Artifact labels, exhibition text |
+      | **Graphics** | Infographics, charts, timelines |
+      
+      **OCR vs SUBTITLES**:
+      
+      - **VideoSubtitle**: Text derived from AUDIO (speech-to-text)
+      - **VideoOCRAnnotation**: Text derived from VIDEO (image-to-text)
+      
+      OCR captures text VISIBLE in frames, not spoken.
+      
+      **TEXT DETECTION PIPELINE**:
+      
+      1. **Detection**: Locate text regions (bounding boxes)
+      2. **Recognition**: Extract characters from regions
+      3. **Post-processing**: Correct, normalize, structure
+      
+      **HERITAGE USE CASES**:
+      
+      | Use Case | Value |
+      |----------|-------|
+      | **Name cards** | Identify speakers automatically |
+      | **Document digitization** | Extract text from filmed documents |
+      | **Exhibition text** | Capture interpretive panels |
+      | **Historical signs** | Archive street names, shop signs |
+      | **Handwritten text** | Extract letters, diaries, notes |
+      
+      **LANGUAGE DETECTION**:
+      
+      OCR can detect and extract text in multiple languages:
+      - `text_languages_detected`: Languages found in video
+      - Mixed-language content is common in heritage videos
+      
+      **TEXT REGION TYPES**:
+      
+      | Type | Appearance |
+      |------|------------|
+      | OVERLAY | Digitally added text (titles, lower thirds) |
+      | NATURAL | Text in physical scene (signs, documents) |
+      | HISTORICAL | Archival text (period documents, photos) |
+      | HANDWRITTEN | Manuscript, notes, signatures |
+    
+    exact_mappings:
+      - hc:VideoOCRAnnotation
+    
+    close_mappings:
+      - schema:TextDigitalDocument
+    
+    slots:
+      # Extracted text
+      - text_segments
+      - full_extracted_text
+      
+      # Text classification
+      - text_types_detected
+      - text_languages_detected
+      
+      # Statistics
+      - text_region_count
+      - total_characters_extracted
+      
+      # Quality
+      - includes_handwriting
+      - handwriting_confidence
+      
+      # Spatial
+      - text_regions
+    
+    slot_usage:
+      text_segments:
+        slot_uri: hc:textSegments
+        description: |
+          Time-coded segments with extracted text.
+          
+          Each segment contains:
+          - `start_seconds` / `end_seconds`: When text is visible
+          - `segment_text`: The extracted text content
+          - `confidence`: OCR confidence score
+          
+          Segments may overlap if multiple text regions visible.
+        range: VideoTimeSegment
+        multivalued: true
+        required: true
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 0.0, end_seconds: 5.0, segment_text: 'Rijksmuseum Presents'}]"
+            description: "Title card text extraction"
+      
+      full_extracted_text:
+        slot_uri: hc:fullExtractedText
+        description: |
+          All extracted text concatenated as single string.
+          
+          Useful for:
+          - Full-text search indexing
+          - Text analysis (NLP, keyword extraction)
+          - Quick review of all on-screen text
+          
+          Ordered chronologically by appearance.
+        range: string
+        required: false
+        examples:
+          - value: "Rijksmuseum Presents... The Night Watch... Rembrandt van Rijn, 1642..."
+            description: "All text from video"
+      
+      text_types_detected:
+        slot_uri: hc:textTypesDetected
+        description: |
+          Types of text regions found in video.
+          
+          Classifying text type helps with:
+          - Filtering (e.g., show only name cards)
+          - Priority (titles more important than background signs)
+          - Accuracy expectations (overlays clearer than handwriting)
+        range: TextTypeEnum
+        multivalued: true
+        required: false
+        examples:
+          - value: "[TITLE_CARD, NAME_LOWER_THIRD, DOCUMENT]"
+            description: "Text types found in video"
+      
+      text_languages_detected:
+        slot_uri: dcterms:language
+        description: |
+          Languages of detected text (ISO 639-1 codes).
+          
+          Heritage videos often contain multilingual text:
+          - Exhibition labels in multiple languages
+          - Historical documents in period languages
+          - Modern overlays vs historical content
+        range: string
+        multivalued: true
+        required: false
+        examples:
+          - value: "[nl, en, la]"
+            description: "Dutch, English, and Latin text detected"
+      
+      text_region_count:
+        slot_uri: hc:textRegionCount
+        description: |
+          Total number of text regions detected.
+          
+          A region is a contiguous area of text.
+          Multiple regions may be visible simultaneously.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 28
+            description: "28 text regions detected"
+      
+      total_characters_extracted:
+        slot_uri: hc:totalCharactersExtracted
+        description: |
+          Total character count of all extracted text.
+          
+          Useful for:
+          - Understanding OCR output volume
+          - Cost estimation (some OCR APIs charge per character)
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 3456
+            description: "3,456 characters extracted"
+      
+      includes_handwriting:
+        slot_uri: hc:includesHandwriting
+        description: |
+          Whether handwritten text was detected.
+          
+          Handwriting OCR is more challenging and typically
+          has lower confidence than printed text.
+          
+          Heritage relevance: manuscripts, letters, diaries,
+          annotations, signatures.
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Handwritten text detected"
+      
+      handwriting_confidence:
+        slot_uri: hc:handwritingConfidence
+        description: |
+          Average confidence for handwriting recognition.
+          
+          Typically lower than printed text confidence.
+          
+          Useful for quality assessment and filtering.
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+        examples:
+          - value: 0.68
+            description: "Moderate handwriting recognition confidence"
+      
+      text_regions:
+        slot_uri: hc:textRegions
+        description: |
+          Detailed text region data with spatial coordinates.
+          
+          Each region includes:
+          - Bounding box coordinates
+          - Extracted text
+          - Region type classification
+          - Language detection
+          
+          For detailed spatial analysis beyond time segments.
+        range: TextRegion
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{text: 'Welcome', bbox: [0.1, 0.9, 0.4, 0.1], type: 'OVERLAY'}]"
+            description: "Text region with coordinates"
+    
+    comments:
+      - "OCR extraction for text visible in video frames"
+      - "Distinct from subtitles (audio-derived)"
+      - "Supports printed and handwritten text"
+      - "Heritage use: documents, labels, signage, name cards"
+    
+    see_also:
+      - "https://en.wikipedia.org/wiki/Optical_character_recognition"
+
+# ============================================================================
+# Supporting Classes (Embedded Types)
+# ============================================================================
+
+  DetectedObject:
+    class_uri: hc:DetectedObject
+    description: |
+      A single detected object with label, confidence, and location.
+    slots:
+      - object_label
+      - object_confidence
+      - object_segment
+      - object_bbox
+      - object_wikidata_id
+      - object_collection_id
+    
+    slot_usage:
+      object_label:
+        slot_uri: rdfs:label
+        description: Object class label (e.g., "painting", "sculpture")
+        range: string
+        required: true
+      object_confidence:
+        slot_uri: hc:confidence
+        description: Detection confidence (0.0-1.0)
+        range: float
+        required: true
+      object_segment:
+        slot_uri: hc:segment
+        description: Time segment when object is visible
+        range: VideoTimeSegment
+        required: false
+      object_bbox:
+        slot_uri: hc:boundingBox
+        description: Bounding box as [x, y, width, height] normalized 0-1
+        range: float
+        multivalued: true
+        required: false
+      object_wikidata_id:
+        slot_uri: hc:wikidataId
+        description: Wikidata ID if object is identified
+        range: string
+        required: false
+      object_collection_id:
+        slot_uri: hc:collectionId
+        description: Collection database ID for artwork/artifact
+        range: string
+        required: false
+
+  DetectedFace:
+    class_uri: hc:DetectedFace
+    description: |
+      A detected face with optional identity and attributes.
+    slots:
+      - face_confidence
+      - face_segment
+      - face_bbox
+      - person_id
+      - person_name
+      - is_recognized
+    
+    slot_usage:
+      face_confidence:
+        slot_uri: hc:confidence
+        description: Face detection confidence (0.0-1.0)
+        range: float
+        required: true
+      face_segment:
+        slot_uri: hc:segment
+        description: Time segment when face is visible
+        range: VideoTimeSegment
+        required: false
+      face_bbox:
+        slot_uri: hc:boundingBox
+        description: Face bounding box as [x, y, width, height]
+        range: float
+        multivalued: true
+        required: false
+      person_id:
+        slot_uri: hc:personId
+        description: Unique identifier for recognized person
+        range: string
+        required: false
+      person_name:
+        slot_uri: schema:name
+        description: Name of recognized person
+        range: string
+        required: false
+      is_recognized:
+        slot_uri: hc:isRecognized
+        description: Whether face was matched to known person
+        range: boolean
+        required: false
+
+  DetectedLogo:
+    class_uri: hc:DetectedLogo
+    description: |
+      A detected logo or brand mark.
+    slots:
+      - logo_label
+      - logo_confidence
+      - logo_segment
+      - logo_bbox
+      - logo_organization
+    
+    slot_usage:
+      logo_label:
+        slot_uri: rdfs:label
+        description: Logo name or brand
+        range: string
+        required: true
+      logo_confidence:
+        slot_uri: hc:confidence
+        description: Detection confidence (0.0-1.0)
+        range: float
+        required: true
+      logo_segment:
+        slot_uri: hc:segment
+        description: Time segment when logo is visible
+        range: VideoTimeSegment
+        required: false
+      logo_bbox:
+        slot_uri: hc:boundingBox
+        description: Logo bounding box
+        range: float
+        multivalued: true
+        required: false
+      logo_organization:
+        slot_uri: schema:organization
+        description: Organization associated with logo
+        range: string
+        required: false
+
+  DetectedLandmark:
+    class_uri: hc:DetectedLandmark
+    description: |
+      A detected landmark or notable building.
+    slots:
+      - landmark_label
+      - landmark_confidence
+      - landmark_segment
+      - landmark_wikidata_id
+      - landmark_geonames_id
+    
+    slot_usage:
+      landmark_label:
+        slot_uri: rdfs:label
+        description: Landmark name
+        range: string
+        required: true
+      landmark_confidence:
+        slot_uri: hc:confidence
+        description: Detection confidence (0.0-1.0)
+        range: float
+        required: true
+      landmark_segment:
+        slot_uri: hc:segment
+        description: Time segment when landmark is visible
+        range: VideoTimeSegment
+        required: false
+      landmark_wikidata_id:
+        slot_uri: hc:wikidataId
+        description: Wikidata ID for landmark
+        range: string
+        required: false
+      landmark_geonames_id:
+        slot_uri: hc:geonamesId
+        description: GeoNames ID for location
+        range: string
+        required: false
+
+  TextRegion:
+    class_uri: hc:TextRegion
+    description: |
+      A detected text region with extracted content and location.
+    slots:
+      - region_text
+      - region_confidence
+      - region_bbox
+      - region_type
+      - region_language
+    
+    slot_usage:
+      region_text:
+        slot_uri: oa:bodyValue
+        description: Extracted text content
+        range: string
+        required: true
+      region_confidence:
+        slot_uri: hc:confidence
+        description: OCR confidence (0.0-1.0)
+        range: float
+        required: true
+      region_bbox:
+        slot_uri: hc:boundingBox
+        description: Text region bounding box
+        range: float
+        multivalued: true
+        required: false
+      region_type:
+        slot_uri: dcterms:type
+        description: Type of text region
+        range: TextTypeEnum
+        required: false
+      region_language:
+        slot_uri: dcterms:language
+        description: Detected language (ISO 639-1)
+        range: string
+        required: false
+
+# ============================================================================
+# Enumerations
+# ============================================================================
+
+enums:
+
+  DetectionLevelEnum:
+    description: Granularity of scene/shot detection
+    permissible_values:
+      SHOT:
+        description: Technical camera edit boundaries
+      SCENE:
+        description: Semantic content/location boundaries
+      BOTH:
+        description: Both shot and scene detection
+
+  SceneTypeEnum:
+    description: Types of scenes in heritage videos
+    permissible_values:
+      INTERIOR:
+        description: Indoor/interior shots
+      EXTERIOR:
+        description: Outdoor/exterior shots
+      CLOSEUP:
+        description: Detail shots of objects/artworks
+      WIDE_SHOT:
+        description: Establishing or wide-angle shots
+      INTERVIEW:
+        description: Talking head / interview format
+      ARCHIVAL:
+        description: Historical footage or photographs
+      ANIMATION:
+        description: Animated or graphics sequence
+      TITLE_CARD:
+        description: Text overlay or title sequence
+      B_ROLL:
+        description: Supplementary/cutaway footage
+      DEMONSTRATION:
+        description: Process or technique demonstration
+      TOUR:
+        description: Walking tour or navigation sequence
+
+  TransitionTypeEnum:
+    description: Types of video transitions
+    permissible_values:
+      CUT:
+        description: Instantaneous transition (hard cut)
+      FADE_IN:
+        description: Gradual appearance from black
+      FADE_OUT:
+        description: Gradual disappearance to black
+      DISSOLVE:
+        description: Cross-fade between shots
+      WIPE:
+        description: Geometric wipe transition
+      MORPH:
+        description: Morphing transition effect
+      FLASH:
+        description: Flash or strobe transition
+      OTHER:
+        description: Other transition type
+
+  TextTypeEnum:
+    description: Types of on-screen text regions
+    permissible_values:
+      TITLE_CARD:
+        description: Opening/closing titles
+      LOWER_THIRD:
+        description: Name/title overlay at bottom
+      SUBTITLE:
+        description: Burned-in subtitles/captions
+      SIGN:
+        description: Physical signs in scene
+      LABEL:
+        description: Museum/exhibition labels
+      DOCUMENT:
+        description: Text from documents/books
+      HANDWRITTEN:
+        description: Handwritten text
+      GRAPHIC:
+        description: Infographic or chart text
+      WATERMARK:
+        description: Video watermark or logo
+      URL:
+        description: Website URL display
+      CREDITS:
+        description: Credits or attribution text
+      OTHER:
+        description: Other text type
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  # Scene annotation slots
+  scene_segments:
+    description: List of detected scene/shot segments
+    range: VideoTimeSegment
+    multivalued: true
+  scene_count:
+    description: Number of scenes detected
+    range: integer
+  average_scene_duration_seconds:
+    description: Average scene duration
+    range: float
+  detection_level:
+    description: Shot vs scene detection granularity
+    range: DetectionLevelEnum
+  scene_types_detected:
+    description: Scene type labels found
+    range: SceneTypeEnum
+    multivalued: true
+  transition_types_detected:
+    description: Transition types found
+    range: TransitionTypeEnum
+    multivalued: true
+  cut_count:
+    description: Number of hard cuts
+    range: integer
+  fade_count:
+    description: Number of fades
+    range: integer
+  dissolve_count:
+    description: Number of dissolves
+    range: integer
+
+  # Object annotation slots
+  detected_objects:
+    description: List of detected objects
+    range: DetectedObject
+    multivalued: true
+  detected_faces:
+    description: List of detected faces
+    range: DetectedFace
+    multivalued: true
+  detected_logos:
+    description: List of detected logos
+    range: DetectedLogo
+    multivalued: true
+  detected_landmarks:
+    description: List of detected landmarks
+    range: DetectedLandmark
+    multivalued: true
+  unique_object_count:
+    description: Number of unique objects
+    range: integer
+  unique_face_count:
+    description: Number of unique faces
+    range: integer
+  object_classes_detected:
+    description: Object class labels found
+    range: string
+    multivalued: true
+  includes_object_tracking:
+    description: Whether objects tracked across frames
+    range: boolean
+  tracking_ids_assigned:
+    description: Number of tracking IDs
+    range: integer
+  linked_to_collection:
+    description: Whether linked to collection database
+    range: boolean
+
+  # OCR annotation slots
+  text_segments:
+    description: Time-coded text extraction segments
+    range: VideoTimeSegment
+    multivalued: true
+  full_extracted_text:
+    description: All extracted text concatenated
+    range: string
+  text_types_detected:
+    description: Types of text regions found
+    range: TextTypeEnum
+    multivalued: true
+  text_languages_detected:
+    description: Languages detected in text
+    range: string
+    multivalued: true
+  text_region_count:
+    description: Number of text regions
+    range: integer
+  total_characters_extracted:
+    description: Total characters extracted
+    range: integer
+  includes_handwriting:
+    description: Whether handwriting detected
+    range: boolean
+  handwriting_confidence:
+    description: Handwriting OCR confidence
+    range: float
+  text_regions:
+    description: Detailed text region data
+    range: TextRegion
+    multivalued: true
+
+  # Supporting class slots
+  object_label:
+    description: Object class label
+    range: string
+  object_confidence:
+    description: Object detection confidence
+    range: float
+  object_segment:
+    description: Object visibility segment
+    range: VideoTimeSegment
+  object_bbox:
+    description: Object bounding box
+    range: float
+    multivalued: true
+  object_wikidata_id:
+    description: Object Wikidata ID
+    range: string
+  object_collection_id:
+    description: Object collection database ID
+    range: string
+  
+  face_confidence:
+    description: Face detection confidence
+    range: float
+  face_segment:
+    description: Face visibility segment
+    range: VideoTimeSegment
+  face_bbox:
+    description: Face bounding box
+    range: float
+    multivalued: true
+  person_id:
+    description: Recognized person identifier
+    range: string
+  person_name:
+    description: Recognized person name
+    range: string
+  is_recognized:
+    description: Whether face was recognized
+    range: boolean
+  
+  logo_label:
+    description: Logo name
+    range: string
+  logo_confidence:
+    description: Logo detection confidence
+    range: float
+  logo_segment:
+    description: Logo visibility segment
+    range: VideoTimeSegment
+  logo_bbox:
+    description: Logo bounding box
+    range: float
+    multivalued: true
+  logo_organization:
+    description: Organization for logo
+    range: string
+  
+  landmark_label:
+    description: Landmark name
+    range: string
+  landmark_confidence:
+    description: Landmark detection confidence
+    range: float
+  landmark_segment:
+    description: Landmark visibility segment
+    range: VideoTimeSegment
+  landmark_wikidata_id:
+    description: Landmark Wikidata ID
+    range: string
+  landmark_geonames_id:
+    description: Landmark GeoNames ID
+    range: string
+  
+  region_text:
+    description: Extracted text content
+    range: string
+  region_confidence:
+    description: OCR confidence
+    range: float
+  region_bbox:
+    description: Text region bounding box
+    range: float
+    multivalued: true
+  region_type:
+    description: Text region type
+    range: TextTypeEnum
+  region_language:
+    description: Detected language
+    range: string
diff --git a/schemas/20251121/linkml/modules/classes/VideoAudioAnnotation.yaml b/schemas/20251121/linkml/modules/classes/VideoAudioAnnotation.yaml
new file mode 100644
index 0000000000..083ce1db6e
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoAudioAnnotation.yaml
@@ -0,0 +1,1108 @@
+# Video Audio Annotation Class
+# Models audio event detection in video content (speech, music, silence, diarization)
+#
+# Part of Heritage Custodian Ontology v0.9.10
+#
+# HIERARCHY:
+# VideoAnnotation (abstract base)
+#     │
+#     ├── VideoSceneAnnotation (scene/shot detection)
+#     ├── VideoObjectAnnotation (object/face/logo detection)
+#     ├── VideoOCRAnnotation (text-in-video extraction)
+#     └── VideoAudioAnnotation (this class)
+#             - Speech detection and diarization
+#             - Music detection and classification
+#             - Sound event detection
+#             - Silence/noise detection
+#
+# HERITAGE INSTITUTION USE CASES:
+# - Speaker identification in curator interviews
+# - Music detection in promotional videos
+# - Silence detection for video quality analysis
+# - Language detection for multilingual content
+# - Applause/audience reaction in lecture recordings
+# - Sound effects in exhibition media
+#
+# ONTOLOGY ALIGNMENT:
+# - W3C Web Annotation for annotation structure
+# - CIDOC-CRM E13_Attribute_Assignment for attribution
+# - W3C Media Ontology for audio properties
+# - Speech-to-Text standards for diarization
+
+id: https://nde.nl/ontology/hc/class/VideoAudioAnnotation
+name: video_audio_annotation_class
+title: Video Audio Annotation Class
+
+imports:
+  - linkml:types
+  - ./VideoAnnotation
+  - ./VideoTimeSegment
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  prov: http://www.w3.org/ns/prov#
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  oa: http://www.w3.org/ns/oa#
+  ma: http://www.w3.org/ns/ma-ont#
+  wikidata: http://www.wikidata.org/entity/
+
+default_prefix: hc
+
+# ============================================================================
+# Classes
+# ============================================================================
+
+classes:
+
+  VideoAudioAnnotation:
+    is_a: VideoAnnotation
+    class_uri: hc:VideoAudioAnnotation
+    abstract: false
+    description: |
+      Annotation for audio events detected in video content.
+      
+      **DEFINITION**:
+      
+      VideoAudioAnnotation captures structured information derived from audio
+      analysis of video content. This includes speech, music, silence, and
+      various sound events.
+      
+      **AUDIO ANALYSIS TYPES**:
+      
+      | Type | Description | Use Case |
+      |------|-------------|----------|
+      | **Speech Detection** | Identify spoken segments | Transcript alignment |
+      | **Speaker Diarization** | Who spoke when | Interview navigation |
+      | **Music Detection** | Identify musical segments | Content classification |
+      | **Sound Events** | Applause, laughter, etc. | Audience engagement |
+      | **Silence Detection** | Find quiet segments | Quality assessment |
+      | **Language Detection** | Identify spoken languages | Multilingual content |
+      
+      **SPEAKER DIARIZATION**:
+      
+      Diarization answers "who spoke when":
+      
+      ```
+      0:00-0:15  Speaker 1 (Curator)
+      0:15-0:45  Speaker 2 (Artist)
+      0:45-1:00  Speaker 1 (Curator)
+      1:00-1:30  Speaker 3 (Museum Director)
+      ```
+      
+      Heritage applications:
+      - Navigate to specific speakers in interviews
+      - Count speaking time per person
+      - Identify unnamed speakers for annotation
+      - Build speaker databases for recognition
+      
+      **MUSIC DETECTION**:
+      
+      Music detection classifies audio segments as containing music:
+      
+      | Category | Examples |
+      |----------|----------|
+      | **Background music** | Documentary soundtracks |
+      | **Featured music** | Concert recordings, performances |
+      | **Historical music** | Archival recordings |
+      | **Licensed music** | Rights-managed content |
+      
+      Music segments may also include:
+      - Genre classification (classical, jazz, folk)
+      - Mood/tempo analysis
+      - Fingerprinting for identification
+      
+      **SOUND EVENT DETECTION**:
+      
+      Non-speech, non-music audio events:
+      
+      | Event Type | Heritage Context |
+      |------------|------------------|
+      | APPLAUSE | Lecture recordings, openings |
+      | LAUGHTER | Tour guides, educational content |
+      | CROWD_NOISE | Event documentation |
+      | DOOR/FOOTSTEPS | Ambient archive recordings |
+      | NATURE_SOUNDS | Outdoor heritage site recordings |
+      | MACHINERY | Industrial heritage, conservation |
+      
+      **LANGUAGE DETECTION**:
+      
+      Multilingual heritage content requires language identification:
+      
+      ```yaml
+      speech_segments:
+        - start: 0.0
+          end: 120.0
+          language: nl
+          speaker_id: speaker_001
+        - start: 120.0
+          end: 240.0
+          language: en
+          speaker_id: speaker_001  # Same speaker, switched language
+      ```
+      
+      **AUDIO QUALITY ANALYSIS**:
+      
+      Audio quality metrics for preservation and accessibility:
+      
+      | Metric | Description | Threshold |
+      |--------|-------------|-----------|
+      | SNR | Signal-to-noise ratio | > 20 dB good |
+      | Clipping | Peak distortion | None ideal |
+      | Noise floor | Background noise level | < -50 dB good |
+      | Frequency response | Bandwidth | Full-range ideal |
+      
+      **HERITAGE INSTITUTION USE CASES**:
+      
+      | Content Type | Audio Analysis Need |
+      |--------------|---------------------|
+      | Oral histories | Diarization, transcription alignment |
+      | Curator interviews | Speaker identification, language |
+      | Virtual tours | Background music, voiceover detection |
+      | Lecture recordings | Audience reactions, Q&A segments |
+      | Conservation videos | Narration vs demonstration audio |
+      | Archival footage | Speech recovery, noise reduction |
+      
+      **RELATIONSHIP TO VideoTranscript**:
+      
+      VideoAudioAnnotation is complementary to VideoTranscript:
+      
+      - **VideoTranscript**: The text content of speech (WHAT was said)
+      - **VideoAudioAnnotation**: Audio structure (WHO spoke, music, sounds)
+      
+      Together they provide complete audio understanding:
+      
+      ```
+      VideoAudioAnnotation: Speaker 1 spoke 0:00-0:15
+      VideoTranscript: "Welcome to the Rijksmuseum..." (0:00-0:15)
+      → Combined: Curator said "Welcome to the Rijksmuseum..."
+      ```
+    
+    exact_mappings:
+      - hc:VideoAudioAnnotation
+    
+    close_mappings:
+      - ma:AudioTrack
+      - crm:E13_Attribute_Assignment
+    
+    related_mappings:
+      - wikidata:Q11028  # Speech
+      - wikidata:Q638    # Music
+    
+    slots:
+      # Audio event detection
+      - audio_event_segments
+      - primary_audio_event_type
+      
+      # Speech analysis
+      - speech_detected
+      - speech_segments
+      - speech_language
+      - speech_language_confidence
+      - languages_detected
+      
+      # Speaker diarization
+      - diarization_enabled
+      - diarization_segments
+      - speaker_count
+      - speaker_labels
+      
+      # Music detection
+      - music_detected
+      - music_segments
+      - music_genres_detected
+      - music_confidence
+      
+      # Sound events
+      - sound_events_detected
+      - sound_event_types
+      
+      # Silence/noise
+      - silence_segments
+      - silence_total_seconds
+      - noise_floor_db
+      
+      # Audio quality
+      - audio_quality_score
+      - snr_db
+      - has_clipping
+    
+    slot_usage:
+      audio_event_segments:
+        slot_uri: oa:hasBody
+        description: |
+          Time-coded segments with detected audio events.
+          
+          Web Annotation: hasBody links annotation to content.
+          
+          Each segment contains:
+          - Start/end time boundaries
+          - Event type (SPEECH, MUSIC, SILENCE, etc.)
+          - Confidence score
+          - Additional metadata (speaker ID, language, etc.)
+          
+          Segments may overlap (e.g., speech over background music).
+        range: VideoTimeSegment
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 0.0, end_seconds: 15.0, segment_text: 'Speech detected - Speaker 1'}]"
+            description: "Speech detection segment"
+      
+      primary_audio_event_type:
+        slot_uri: dcterms:type
+        description: |
+          The primary type of audio analysis performed.
+          
+          Dublin Core: type for categorization.
+          
+          **Types**:
+          - SPEECH: Speech detection and diarization
+          - MUSIC: Music detection and classification
+          - SOUND_EVENTS: Environmental sound detection
+          - MIXED: Multiple analysis types combined
+        range: AudioEventTypeEnum
+        required: true
+        examples:
+          - value: "SPEECH"
+            description: "Primary focus on speech analysis"
+      
+      speech_detected:
+        slot_uri: hc:speechDetected
+        description: |
+          Whether speech was detected in the video audio.
+          
+          High-level flag for presence of speech content.
+          
+          - true: At least one speech segment detected
+          - false: No speech detected (music-only, silent, etc.)
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Speech is present in video"
+      
+      speech_segments:
+        slot_uri: hc:speechSegments
+        description: |
+          Detailed speech segments with speaker and language info.
+          
+          Each segment represents continuous speech from one speaker.
+          
+          Used for:
+          - Transcript alignment
+          - Speaker navigation
+          - Language segmentation
+        range: SpeechSegment
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 0.0, end_seconds: 15.0, speaker_id: 'spk_001', language: 'nl'}]"
+            description: "Dutch speech from speaker 1"
+      
+      speech_language:
+        slot_uri: dcterms:language
+        description: |
+          Primary language of speech content (ISO 639-1 code).
+          
+          Dublin Core: language for primary language.
+          
+          For multilingual content, this is the predominant language.
+          See `languages_detected` for all languages.
+        range: string
+        required: false
+        examples:
+          - value: "nl"
+            description: "Dutch is primary language"
+          - value: "en"
+            description: "English is primary language"
+      
+      speech_language_confidence:
+        slot_uri: hc:languageConfidence
+        description: |
+          Confidence score for language detection (0.0-1.0).
+          
+          Higher confidence when:
+          - Longer speech segments
+          - Clear audio quality
+          - Distinct language features
+          
+          Lower confidence when:
+          - Short utterances
+          - Background noise
+          - Code-switching
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+        examples:
+          - value: 0.95
+            description: "High confidence language detection"
+      
+      languages_detected:
+        slot_uri: hc:languagesDetected
+        description: |
+          All languages detected in speech (ISO 639-1 codes).
+          
+          Heritage content often includes multiple languages:
+          - Exhibition videos with translations
+          - Interviews with multilingual speakers
+          - Historical content with period languages
+          
+          Ordered by speaking time (most spoken first).
+        range: string
+        multivalued: true
+        required: false
+        examples:
+          - value: "[nl, en, de]"
+            description: "Dutch, English, and German detected"
+      
+      diarization_enabled:
+        slot_uri: hc:diarizationEnabled
+        description: |
+          Whether speaker diarization was performed.
+          
+          Diarization = identifying distinct speakers and their segments.
+          
+          - true: Speaker IDs assigned to speech segments
+          - false: Speech detected but speakers not distinguished
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Diarization was performed"
+      
+      diarization_segments:
+        slot_uri: hc:diarizationSegments
+        description: |
+          Detailed diarization results with speaker assignments.
+          
+          Each segment identifies:
+          - Time boundaries
+          - Speaker ID (anonymous: "spk_001", "spk_002")
+          - Optional speaker name (if identified)
+          - Confidence score
+          
+          Enables "who spoke when" analysis.
+        range: DiarizationSegment
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 0.0, end_seconds: 15.0, speaker_id: 'spk_001', speaker_label: 'Curator'}]"
+            description: "Curator speaking for first 15 seconds"
+      
+      speaker_count:
+        slot_uri: hc:speakerCount
+        description: |
+          Number of distinct speakers detected.
+          
+          Useful for:
+          - Interview classification (1 = monologue, 2+ = dialog)
+          - Content type inference
+          - Accessibility planning
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 3
+            description: "Three distinct speakers detected"
+      
+      speaker_labels:
+        slot_uri: hc:speakerLabels
+        description: |
+          Labels or names assigned to detected speakers.
+          
+          May be:
+          - Anonymous: ["Speaker 1", "Speaker 2"]
+          - Identified: ["Dr. Taco Dibbits", "Interviewer"]
+          - Role-based: ["Curator", "Artist", "Host"]
+          
+          Ordered by speaking time (most speaking first).
+        range: string
+        multivalued: true
+        required: false
+        examples:
+          - value: "[Curator, Artist, Museum Director]"
+            description: "Three identified speakers"
+      
+      music_detected:
+        slot_uri: hc:musicDetected
+        description: |
+          Whether music was detected in the audio.
+          
+          - true: Musical content detected (any amount)
+          - false: No music detected (speech-only, silence)
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Music present in video"
+      
+      music_segments:
+        slot_uri: hc:musicSegments
+        description: |
+          Time segments containing music.
+          
+          Each segment includes:
+          - Time boundaries
+          - Music type (background, featured)
+          - Genre classification (if detected)
+          - Confidence score
+        range: MusicSegment
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 0.0, end_seconds: 30.0, music_type: 'BACKGROUND', genre: 'classical'}]"
+            description: "Classical background music"
+      
+      music_genres_detected:
+        slot_uri: hc:musicGenresDetected
+        description: |
+          Music genres detected in audio.
+          
+          **Common Heritage Genres**:
+          - classical: Art music, orchestral
+          - baroque: Period-specific classical
+          - jazz: Jazz performances
+          - folk: Traditional/folk music
+          - ambient: Background/atmospheric
+          - electronic: Modern electronic music
+        range: string
+        multivalued: true
+        required: false
+        examples:
+          - value: "[classical, baroque]"
+            description: "Classical and baroque music detected"
+      
+      music_confidence:
+        slot_uri: hc:musicConfidence
+        description: |
+          Overall confidence of music detection (0.0-1.0).
+          
+          Average confidence across all music segments.
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+        examples:
+          - value: 0.88
+            description: "High confidence music detection"
+      
+      sound_events_detected:
+        slot_uri: hc:soundEventsDetected
+        description: |
+          Whether non-speech, non-music sound events were detected.
+          
+          Sound events include applause, laughter, environmental sounds, etc.
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Sound events detected"
+      
+      sound_event_types:
+        slot_uri: hc:soundEventTypes
+        description: |
+          Types of sound events detected.
+          
+          **Heritage-Relevant Events**:
+          - APPLAUSE: Lecture endings, openings
+          - LAUGHTER: Tour guide humor
+          - CROWD_NOISE: Event atmosphere
+          - FOOTSTEPS: Gallery ambiance
+          - NATURE_SOUNDS: Outdoor heritage sites
+          - BELLS: Church/temple recordings
+        range: SoundEventTypeEnum
+        multivalued: true
+        required: false
+        examples:
+          - value: "[APPLAUSE, CROWD_NOISE]"
+            description: "Applause and crowd sounds detected"
+      
+      silence_segments:
+        slot_uri: hc:silenceSegments
+        description: |
+          Time segments containing silence or very low audio.
+          
+          Silence detection useful for:
+          - Finding pauses between segments
+          - Quality assessment (unexpected silence)
+          - Identifying chapter/scene boundaries
+          
+          Threshold typically: audio below -40 dB for > 2 seconds.
+        range: VideoTimeSegment
+        multivalued: true
+        required: false
+        inlined_as_list: true
+        examples:
+          - value: "[{start_seconds: 45.0, end_seconds: 48.0}]"
+            description: "3-second silence"
+      
+      silence_total_seconds:
+        slot_uri: hc:silenceTotalSeconds
+        description: |
+          Total duration of silence in the video (seconds).
+          
+          High silence percentage may indicate:
+          - Extended pauses
+          - Silent segments (B-roll without audio)
+          - Audio issues
+        range: float
+        required: false
+        minimum_value: 0.0
+        examples:
+          - value: 15.5
+            description: "15.5 seconds of total silence"
+      
+      noise_floor_db:
+        slot_uri: hc:noiseFloorDb
+        description: |
+          Background noise floor level in decibels.
+          
+          **Quality Guidelines**:
+          - < -60 dB: Excellent (studio quality)
+          - -60 to -40 dB: Good (professional recording)
+          - -40 to -30 dB: Acceptable (field recording)
+          - > -30 dB: Poor (noisy environment)
+        range: float
+        required: false
+        examples:
+          - value: -45.0
+            description: "Good quality, moderate noise floor"
+      
+      audio_quality_score:
+        slot_uri: hc:audioQualityScore
+        description: |
+          Overall audio quality score (0.0-1.0).
+          
+          Composite score based on:
+          - Signal-to-noise ratio
+          - Clipping presence
+          - Frequency response
+          - Clarity of speech
+          
+          **Interpretation**:
+          - > 0.8: High quality, suitable for all uses
+          - 0.6-0.8: Good quality, minor issues
+          - 0.4-0.6: Acceptable, some degradation
+          - < 0.4: Poor quality, may need enhancement
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+        examples:
+          - value: 0.85
+            description: "High audio quality"
+      
+      snr_db:
+        slot_uri: hc:snrDb
+        description: |
+          Signal-to-noise ratio in decibels.
+          
+          Higher is better:
+          - > 30 dB: Excellent
+          - 20-30 dB: Good
+          - 10-20 dB: Acceptable
+          - < 10 dB: Poor (speech intelligibility affected)
+        range: float
+        required: false
+        examples:
+          - value: 25.0
+            description: "Good signal-to-noise ratio"
+      
+      has_clipping:
+        slot_uri: hc:hasClipping
+        description: |
+          Whether audio clipping (peak distortion) was detected.
+          
+          Clipping occurs when audio exceeds maximum level:
+          - true: Clipping detected (distortion present)
+          - false: No clipping (clean audio)
+          
+          Clipping is permanent quality loss.
+        range: boolean
+        required: false
+        examples:
+          - value: false
+            description: "No clipping detected"
+    
+    comments:
+      - "Audio event detection for video content"
+      - "Supports speech, music, silence, and sound event detection"
+      - "Speaker diarization for interview navigation"
+      - "Language detection for multilingual heritage content"
+      - "Audio quality metrics for preservation assessment"
+    
+    see_also:
+      - "https://www.w3.org/TR/annotation-model/"
+      - "https://arxiv.org/abs/2111.08085"  # Speaker diarization survey
+
+# ============================================================================
+# Supporting Classes
+# ============================================================================
+
+  SpeechSegment:
+    class_uri: hc:SpeechSegment
+    description: |
+      A speech segment with speaker and language information.
+      
+      Extends VideoTimeSegment with speech-specific metadata.
+    
+    slots:
+      - segment_start_seconds
+      - segment_end_seconds
+      - speaker_id
+      - speaker_label
+      - segment_language
+      - segment_confidence
+      - speech_text
+    
+    slot_usage:
+      segment_start_seconds:
+        slot_uri: ma:hasStartTime
+        description: Start time in seconds
+        range: float
+        required: true
+        minimum_value: 0.0
+      
+      segment_end_seconds:
+        slot_uri: ma:hasEndTime
+        description: End time in seconds
+        range: float
+        required: true
+        minimum_value: 0.0
+      
+      speaker_id:
+        slot_uri: hc:speakerId
+        description: |
+          Unique identifier for the speaker.
+          
+          Format: "spk_001", "spk_002", etc. (anonymous)
+          Or: "taco_dibbits" (identified)
+        range: string
+        required: false
+      
+      speaker_label:
+        slot_uri: schema:name
+        description: Human-readable speaker name or role
+        range: string
+        required: false
+      
+      segment_language:
+        slot_uri: dcterms:language
+        description: Language of speech in this segment (ISO 639-1)
+        range: string
+        required: false
+      
+      segment_confidence:
+        slot_uri: hc:confidence
+        description: Confidence score for this segment (0.0-1.0)
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+      
+      speech_text:
+        slot_uri: hc:speechText
+        description: |
+          Transcript text for this segment (if available).
+          
+          Links to VideoTranscript for full transcript.
+        range: string
+        required: false
+
+
+  DiarizationSegment:
+    class_uri: hc:DiarizationSegment
+    description: |
+      A diarization segment identifying speaker and time boundaries.
+      
+      Focused on "who spoke when" rather than transcript content.
+    
+    slots:
+      - diarization_start_seconds
+      - diarization_end_seconds
+      - diarization_speaker_id
+      - diarization_speaker_label
+      - diarization_confidence
+      - is_overlapping
+    
+    slot_usage:
+      diarization_start_seconds:
+        slot_uri: ma:hasStartTime
+        description: Start time in seconds
+        range: float
+        required: true
+        minimum_value: 0.0
+      
+      diarization_end_seconds:
+        slot_uri: ma:hasEndTime
+        description: End time in seconds
+        range: float
+        required: true
+        minimum_value: 0.0
+      
+      diarization_speaker_id:
+        slot_uri: hc:speakerId
+        description: Anonymous speaker identifier (spk_001, spk_002, etc.)
+        range: string
+        required: true
+      
+      diarization_speaker_label:
+        slot_uri: schema:name
+        description: Optional identified name or role
+        range: string
+        required: false
+      
+      diarization_confidence:
+        slot_uri: hc:confidence
+        description: Diarization confidence (0.0-1.0)
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+      
+      is_overlapping:
+        slot_uri: hc:isOverlapping
+        description: |
+          Whether this segment overlaps with another speaker.
+          
+          Overlapping speech occurs when multiple people speak simultaneously.
+        range: boolean
+        required: false
+
+
+  MusicSegment:
+    class_uri: hc:MusicSegment
+    description: |
+      A segment of detected music with classification.
+    
+    slots:
+      - music_start_seconds
+      - music_end_seconds
+      - music_type
+      - music_genre
+      - music_segment_confidence
+      - is_background
+    
+    slot_usage:
+      music_start_seconds:
+        slot_uri: ma:hasStartTime
+        description: Start time in seconds
+        range: float
+        required: true
+        minimum_value: 0.0
+      
+      music_end_seconds:
+        slot_uri: ma:hasEndTime
+        description: End time in seconds
+        range: float
+        required: true
+        minimum_value: 0.0
+      
+      music_type:
+        slot_uri: dcterms:type
+        description: Type of music (BACKGROUND, FEATURED, ARCHIVAL)
+        range: MusicTypeEnum
+        required: false
+      
+      music_genre:
+        slot_uri: hc:genre
+        description: Detected music genre
+        range: string
+        required: false
+      
+      music_segment_confidence:
+        slot_uri: hc:confidence
+        description: Music detection confidence (0.0-1.0)
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+      
+      is_background:
+        slot_uri: hc:isBackground
+        description: |
+          Whether music is background (under speech) vs featured.
+          
+          - true: Music is background/ambient
+          - false: Music is primary audio
+        range: boolean
+        required: false
+
+
+# ============================================================================
+# Enumerations
+# ============================================================================
+
+enums:
+
+  AudioEventTypeEnum:
+    description: |
+      Types of audio events detected in video.
+    permissible_values:
+      SPEECH:
+        description: Speech/voice detection and analysis
+      MUSIC:
+        description: Music detection and classification
+      SILENCE:
+        description: Silence or very low audio
+      SOUND_EVENT:
+        description: Non-speech, non-music sound events
+      NOISE:
+        description: Noise detection (for quality assessment)
+      MIXED:
+        description: Multiple audio event types analyzed
+
+  SoundEventTypeEnum:
+    description: |
+      Types of non-speech, non-music sound events.
+    permissible_values:
+      APPLAUSE:
+        description: Clapping, applause
+      LAUGHTER:
+        description: Laughter from audience or speakers
+      CROWD_NOISE:
+        description: General crowd/audience noise
+      FOOTSTEPS:
+        description: Walking, footsteps
+      DOOR:
+        description: Door opening/closing sounds
+      NATURE_SOUNDS:
+        description: Birds, wind, water, etc.
+      TRAFFIC:
+        description: Vehicles, urban sounds
+      BELLS:
+        description: Church bells, temple bells, etc.
+      MACHINERY:
+        description: Industrial, mechanical sounds
+      COUGHING:
+        description: Coughing, clearing throat
+      PAPER:
+        description: Paper rustling
+      TYPING:
+        description: Keyboard typing
+      PHONE:
+        description: Phone ringing or notification
+      MUSIC_INSTRUMENT:
+        description: Individual instrument sounds
+      OTHER:
+        description: Other sound event type
+
+  MusicTypeEnum:
+    description: |
+      Types of music presence in audio.
+    permissible_values:
+      BACKGROUND:
+        description: Background/ambient music under other content
+      FEATURED:
+        description: Primary audio is music (performance, recording)
+      ARCHIVAL:
+        description: Historical/archival music recording
+      INTRO_OUTRO:
+        description: Opening or closing music/jingle
+      TRANSITION:
+        description: Music used for scene transitions
+      DIEGETIC:
+        description: Music from within the scene (radio, live performance)
+      NON_DIEGETIC:
+        description: Music added in post-production
+
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  # Audio event slots
+  audio_event_segments:
+    description: Time-coded segments with detected audio events
+    range: VideoTimeSegment
+    multivalued: true
+  
+  primary_audio_event_type:
+    description: Primary type of audio analysis performed
+    range: AudioEventTypeEnum
+  
+  # Speech slots
+  speech_detected:
+    description: Whether speech was detected
+    range: boolean
+  
+  speech_segments:
+    description: Detailed speech segments with speaker info
+    range: SpeechSegment
+    multivalued: true
+  
+  speech_language:
+    description: Primary language of speech (ISO 639-1)
+    range: string
+  
+  speech_language_confidence:
+    description: Confidence of language detection
+    range: float
+  
+  languages_detected:
+    description: All languages detected in speech
+    range: string
+    multivalued: true
+  
+  # Diarization slots
+  diarization_enabled:
+    description: Whether speaker diarization was performed
+    range: boolean
+  
+  diarization_segments:
+    description: Detailed diarization results
+    range: DiarizationSegment
+    multivalued: true
+  
+  speaker_count:
+    description: Number of distinct speakers detected
+    range: integer
+  
+  speaker_labels:
+    description: Labels or names for detected speakers
+    range: string
+    multivalued: true
+  
+  # Music slots
+  music_detected:
+    description: Whether music was detected
+    range: boolean
+  
+  music_segments:
+    description: Time segments containing music
+    range: MusicSegment
+    multivalued: true
+  
+  music_genres_detected:
+    description: Music genres detected
+    range: string
+    multivalued: true
+  
+  music_confidence:
+    description: Overall music detection confidence
+    range: float
+  
+  # Sound event slots
+  sound_events_detected:
+    description: Whether sound events were detected
+    range: boolean
+  
+  sound_event_types:
+    description: Types of sound events detected
+    range: SoundEventTypeEnum
+    multivalued: true
+  
+  # Silence/noise slots
+  silence_segments:
+    description: Time segments with silence
+    range: VideoTimeSegment
+    multivalued: true
+  
+  silence_total_seconds:
+    description: Total silence duration
+    range: float
+  
+  noise_floor_db:
+    description: Background noise floor in dB
+    range: float
+  
+  # Audio quality slots
+  audio_quality_score:
+    description: Overall audio quality (0.0-1.0)
+    range: float
+  
+  snr_db:
+    description: Signal-to-noise ratio in dB
+    range: float
+  
+  has_clipping:
+    description: Whether audio clipping was detected
+    range: boolean
+  
+  # SpeechSegment slots
+  segment_start_seconds:
+    description: Segment start time
+    range: float
+  
+  segment_end_seconds:
+    description: Segment end time
+    range: float
+  
+  speaker_id:
+    description: Speaker identifier
+    range: string
+  
+  speaker_label:
+    description: Speaker name or role
+    range: string
+  
+  segment_language:
+    description: Language of segment
+    range: string
+  
+  segment_confidence:
+    description: Segment confidence score
+    range: float
+  
+  speech_text:
+    description: Transcript text for segment
+    range: string
+  
+  # DiarizationSegment slots
+  diarization_start_seconds:
+    description: Diarization segment start
+    range: float
+  
+  diarization_end_seconds:
+    description: Diarization segment end
+    range: float
+  
+  diarization_speaker_id:
+    description: Speaker ID in diarization
+    range: string
+  
+  diarization_speaker_label:
+    description: Speaker label in diarization
+    range: string
+  
+  diarization_confidence:
+    description: Diarization confidence
+    range: float
+  
+  is_overlapping:
+    description: Whether segment has overlapping speech
+    range: boolean
+  
+  # MusicSegment slots
+  music_start_seconds:
+    description: Music segment start
+    range: float
+  
+  music_end_seconds:
+    description: Music segment end
+    range: float
+  
+  music_type:
+    description: Type of music presence
+    range: MusicTypeEnum
+  
+  music_genre:
+    description: Detected music genre
+    range: string
+  
+  music_segment_confidence:
+    description: Music segment confidence
+    range: float
+  
+  is_background:
+    description: Whether music is background
+    range: boolean
diff --git a/schemas/20251121/linkml/modules/classes/VideoChapter.yaml b/schemas/20251121/linkml/modules/classes/VideoChapter.yaml
new file mode 100644
index 0000000000..0ff4fe5d5d
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoChapter.yaml
@@ -0,0 +1,621 @@
+# Video Chapter Class
+# Models video chapter markers (YouTube chapters, manual/auto-generated sections)
+#
+# Part of Heritage Custodian Ontology v0.9.10
+#
+# STRUCTURE:
+# VideoChapter (this class)
+#     - chapter_title, chapter_index
+#     - start/end times (via VideoTimeSegment composition)
+#     - auto_generated flag
+#     - thumbnail references
+#
+# USE CASES:
+# - YouTube video chapters (manual creator-defined)
+# - Auto-generated chapters (YouTube AI, third-party tools)
+# - Museum virtual tour sections
+# - Conservation documentation phases
+# - Interview segments
+#
+# ONTOLOGY ALIGNMENT:
+# - Schema.org Clip for media segments
+# - W3C Media Fragments for temporal addressing
+# - CIDOC-CRM E52_Time-Span for temporal extent
+
+id: https://nde.nl/ontology/hc/class/VideoChapter
+name: video_chapter_class
+title: Video Chapter Class
+
+imports:
+  - linkml:types
+  - ./VideoTimeSegment
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  prov: http://www.w3.org/ns/prov#
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  oa: http://www.w3.org/ns/oa#
+  ma: http://www.w3.org/ns/ma-ont#
+  wikidata: http://www.wikidata.org/entity/
+
+default_prefix: hc
+
+classes:
+
+  VideoChapter:
+    class_uri: schema:Clip
+    abstract: false
+    description: |
+      A named chapter or section within a video, defined by temporal boundaries.
+      
+      **DEFINITION**:
+      
+      VideoChapter represents a titled segment of video content, typically used for
+      navigation and content organization. Chapters appear in video player interfaces
+      (YouTube chapters, Vimeo chapters) allowing viewers to jump to specific sections.
+      
+      **PLATFORM SUPPORT**:
+      
+      | Platform | Chapter Support | Auto-Generated | Custom Thumbnails |
+      |----------|-----------------|----------------|-------------------|
+      | YouTube | Yes (2020+) | Yes | No (keyframe) |
+      | Vimeo | Yes | No | Yes |
+      | Facebook | Limited | No | No |
+      | Wistia | Yes | No | Yes |
+      
+      **YOUTUBE CHAPTER REQUIREMENTS**:
+      
+      For YouTube to recognize chapters:
+      - First chapter MUST start at 0:00
+      - Minimum 3 chapters required
+      - Each chapter must be at least 10 seconds
+      - Timestamps in description in `MM:SS` or `HH:MM:SS` format
+      
+      **HERITAGE INSTITUTION USE CASES**:
+      
+      | Content Type | Chapter Examples |
+      |--------------|------------------|
+      | Virtual tour | "Main Hall", "Dutch Masters", "Gift Shop" |
+      | Conservation | "Assessment", "Cleaning", "Retouching", "Varnishing" |
+      | Interview | "Introduction", "Early Career", "Major Works", "Legacy" |
+      | Exhibition | "Curator Introduction", "Theme 1", "Theme 2", "Conclusion" |
+      | Lecture | "Overview", "Case Study 1", "Case Study 2", "Q&A" |
+      
+      **AUTO-GENERATED VS MANUAL CHAPTERS**:
+      
+      | Source | Characteristics | Quality |
+      |--------|-----------------|---------|
+      | Manual (creator) | Semantic, meaningful titles | High |
+      | YouTube AI | Scene-based, generic titles | Variable |
+      | Third-party tools | Transcript-based, keyword titles | Medium |
+      
+      The `auto_generated` flag distinguishes these sources.
+      
+      **RELATIONSHIP TO VideoTimeSegment**:
+      
+      VideoChapter USES VideoTimeSegment for temporal boundaries rather than
+      extending it. This composition pattern allows:
+      - Reuse of segment validation (start < end)
+      - Consistent time representation across schema
+      - Separation of structural (chapter) and temporal (segment) concerns
+      
+      **MEDIA FRAGMENTS URI**:
+      
+      Chapters can be addressed via W3C Media Fragments:
+      ```
+      https://youtube.com/watch?v=ABC123#t=120,300
+      ```
+      Corresponds to chapter starting at 2:00, ending at 5:00.
+      
+      **NESTED CHAPTERS**:
+      
+      Some platforms support hierarchical chapters (parent/child).
+      Use `parent_chapter_id` for nested structure:
+      
+      ```
+      Chapter 1: Dutch Golden Age
+        └─ 1.1: Rembrandt
+        └─ 1.2: Vermeer
+      Chapter 2: Modern Art
+      ```
+    
+    exact_mappings:
+      - schema:Clip
+    
+    close_mappings:
+      - ma:MediaFragment
+      - crm:E52_Time-Span
+    
+    related_mappings:
+      - wikidata:Q1454986  # Chapter (division of a book/document)
+    
+    slots:
+      # Chapter identification
+      - chapter_id
+      - chapter_title
+      - chapter_index
+      - chapter_description
+      
+      # Temporal boundaries (composition with VideoTimeSegment)
+      - chapter_start_seconds
+      - chapter_end_seconds
+      - chapter_start_time
+      - chapter_end_time
+      
+      # Generation metadata
+      - auto_generated
+      - chapter_source
+      
+      # Visual
+      - chapter_thumbnail_url
+      - chapter_thumbnail_timestamp
+      
+      # Hierarchy
+      - parent_chapter_id
+      - nesting_level
+    
+    slot_usage:
+      chapter_id:
+        slot_uri: dcterms:identifier
+        description: |
+          Unique identifier for this chapter.
+          
+          Dublin Core: identifier for unique identification.
+          
+          **Format**: Platform-specific or UUID
+          - YouTube: No native chapter ID (use index)
+          - Generated: `{video_id}_chapter_{index}`
+        range: string
+        required: true
+        examples:
+          - value: "ABC123_chapter_0"
+            description: "First chapter of video ABC123"
+          - value: "550e8400-e29b-41d4-a716-446655440000"
+            description: "UUID-based chapter ID"
+      
+      chapter_title:
+        slot_uri: schema:name
+        description: |
+          Title of the chapter as displayed to viewers.
+          
+          Schema.org: name for the chapter's title.
+          
+          **Best Practices**:
+          - Keep titles concise (under 50 characters)
+          - Use descriptive, meaningful titles
+          - Avoid timestamps in title (redundant)
+          
+          **Auto-Generated Titles**:
+          - YouTube AI: Often generic ("Introduction", "Main Content")
+          - May need manual refinement for heritage content
+        range: string
+        required: true
+        examples:
+          - value: "De Nachtwacht (The Night Watch)"
+            description: "Chapter about specific artwork"
+          - value: "Curator Interview: Conservation Process"
+            description: "Interview segment chapter"
+      
+      chapter_index:
+        slot_uri: hc:chapterIndex
+        description: |
+          Zero-based index of this chapter within the video.
+          
+          **Ordering**:
+          - 0: First chapter (typically starts at 0:00)
+          - Subsequent chapters in temporal order
+          
+          Used for:
+          - Reconstruction of chapter sequence
+          - Navigation (previous/next)
+          - Display ordering
+        range: integer
+        required: true
+        minimum_value: 0
+        examples:
+          - value: 0
+            description: "First chapter"
+          - value: 5
+            description: "Sixth chapter (zero-indexed)"
+      
+      chapter_description:
+        slot_uri: schema:description
+        description: |
+          Optional detailed description of chapter content.
+          
+          Schema.org: description for chapter details.
+          
+          Longer-form description than title. May include:
+          - Topics covered
+          - Featured artworks
+          - Key points discussed
+          
+          Not all platforms display chapter descriptions.
+        range: string
+        required: false
+        examples:
+          - value: "Dr. Dibbits discusses the restoration of Rembrandt's masterpiece, including the controversial 2019 operation."
+            description: "Detailed chapter description"
+      
+      chapter_start_seconds:
+        slot_uri: ma:hasStartTime
+        description: |
+          Start time of chapter in seconds from video beginning.
+          
+          Media Ontology: hasStartTime for temporal start.
+          
+          **First Chapter Rule**:
+          For YouTube chapters to be recognized, the first chapter
+          MUST start at 0.0 seconds.
+          
+          Floating point for millisecond precision.
+        range: float
+        required: true
+        minimum_value: 0.0
+        examples:
+          - value: 0.0
+            description: "First chapter starts at video beginning"
+          - value: 120.5
+            description: "Chapter starts at 2:00.5"
+      
+      chapter_end_seconds:
+        slot_uri: ma:hasEndTime
+        description: |
+          End time of chapter in seconds from video beginning.
+          
+          Media Ontology: hasEndTime for temporal end.
+          
+          **Chapter Boundaries**:
+          - End time of chapter N = start time of chapter N+1
+          - Last chapter ends at video duration
+          - No gaps between chapters (continuous coverage)
+        range: float
+        required: false
+        minimum_value: 0.0
+        examples:
+          - value: 120.0
+            description: "Chapter ends at 2:00"
+      
+      chapter_start_time:
+        slot_uri: hc:chapterStartTime
+        description: |
+          Start time as ISO 8601 duration for display/serialization.
+          
+          Derived from chapter_start_seconds.
+          
+          **Format**: ISO 8601 duration (e.g., "PT2M30S" = 2:30)
+        range: string
+        required: false
+        pattern: "^PT(\\d+H)?(\\d+M)?(\\d+(\\.\\d+)?S)?$"
+        examples:
+          - value: "PT0S"
+            description: "Start of video"
+          - value: "PT10M30S"
+            description: "10 minutes 30 seconds"
+      
+      chapter_end_time:
+        slot_uri: hc:chapterEndTime
+        description: |
+          End time as ISO 8601 duration for display/serialization.
+          
+          Derived from chapter_end_seconds.
+        range: string
+        required: false
+        pattern: "^PT(\\d+H)?(\\d+M)?(\\d+(\\.\\d+)?S)?$"
+        examples:
+          - value: "PT5M0S"
+            description: "5 minutes"
+      
+      auto_generated:
+        slot_uri: hc:autoGenerated
+        description: |
+          Whether this chapter was auto-generated by AI/ML.
+          
+          **Sources**:
+          - true: YouTube AI chapters, third-party tools, ASR-based
+          - false: Manual creator-defined chapters
+          
+          Auto-generated chapters may have:
+          - Generic titles
+          - Less semantic meaning
+          - Scene-based rather than topic-based boundaries
+        range: boolean
+        required: false
+        examples:
+          - value: false
+            description: "Manual creator-defined chapter"
+          - value: true
+            description: "YouTube AI auto-generated"
+      
+      chapter_source:
+        slot_uri: prov:wasAttributedTo
+        description: |
+          Source or method that created this chapter.
+          
+          PROV-O: wasAttributedTo for attribution.
+          
+          **Common Values**:
+          - MANUAL: Creator-defined in video description
+          - YOUTUBE_AI: YouTube auto-chapters feature
+          - WHISPER_CHAPTERS: Generated from Whisper transcript
+          - SCENE_DETECTION: Based on visual scene changes
+          - THIRD_PARTY: External tool (specify in notes)
+        range: ChapterSourceEnum
+        required: false
+        examples:
+          - value: "MANUAL"
+            description: "Creator manually added chapters"
+      
+      chapter_thumbnail_url:
+        slot_uri: schema:thumbnailUrl
+        description: |
+          URL to thumbnail image for this chapter.
+          
+          Schema.org: thumbnailUrl for preview image.
+          
+          **Platform Behavior**:
+          - YouTube: Auto-selects keyframe from chapter start
+          - Vimeo: Allows custom chapter thumbnails
+          
+          Thumbnail helps viewers preview chapter content.
+        range: uri
+        required: false
+        examples:
+          - value: "https://i.ytimg.com/vi/ABC123/hq1.jpg"
+            description: "YouTube chapter thumbnail"
+      
+      chapter_thumbnail_timestamp:
+        slot_uri: hc:thumbnailTimestamp
+        description: |
+          Timestamp (in seconds) of frame used for thumbnail.
+          
+          May differ slightly from chapter_start_seconds if
+          a more visually representative frame was selected.
+        range: float
+        required: false
+        minimum_value: 0.0
+        examples:
+          - value: 122.5
+            description: "Thumbnail from 2:02.5"
+      
+      parent_chapter_id:
+        slot_uri: dcterms:isPartOf
+        description: |
+          Reference to parent chapter for hierarchical chapters.
+          
+          Dublin Core: isPartOf for containment relationship.
+          
+          Enables nested chapter structure:
+          ```
+          Chapter 1: Dutch Masters
+            └─ 1.1: Rembrandt
+            └─ 1.2: Vermeer
+          ```
+          
+          null/empty for top-level chapters.
+        range: string
+        required: false
+        examples:
+          - value: "ABC123_chapter_0"
+            description: "This is a sub-chapter of chapter 0"
+      
+      nesting_level:
+        slot_uri: hc:nestingLevel
+        description: |
+          Depth level in chapter hierarchy.
+          
+          - 0: Top-level chapter
+          - 1: First-level sub-chapter
+          - 2: Second-level sub-chapter
+          - etc.
+          
+          Most platforms only support level 0 (flat chapters).
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 0
+            description: "Top-level chapter"
+          - value: 1
+            description: "Sub-chapter"
+    
+    comments:
+      - "Models video chapters for navigation (YouTube chapters, etc.)"
+      - "Supports both manual and auto-generated chapters"
+      - "Temporal boundaries via composition with VideoTimeSegment pattern"
+      - "Hierarchical chapters supported via parent_chapter_id"
+      - "Schema.org Clip alignment for semantic web compatibility"
+    
+    see_also:
+      - "https://support.google.com/youtube/answer/9884579"
+      - "https://schema.org/Clip"
+      - "https://www.w3.org/TR/media-frags/"
+
+
+  # ==========================================================================
+  # Supporting Class: VideoChapterList
+  # ==========================================================================
+  
+  VideoChapterList:
+    class_uri: schema:ItemList
+    description: |
+      A collection of chapters for a video.
+      
+      Groups all chapters for a video with metadata about the chapter set.
+      
+      Enables bulk operations on chapters:
+      - Import/export of chapter lists
+      - Validation of chapter coverage
+      - Source tracking for entire chapter set
+    
+    exact_mappings:
+      - schema:ItemList
+    
+    slots:
+      - video_id
+      - chapters
+      - total_chapters
+      - chapters_source
+      - chapters_generated_at
+      - covers_full_video
+    
+    slot_usage:
+      video_id:
+        slot_uri: schema:isPartOf
+        description: Reference to the parent video
+        range: string
+        required: true
+      
+      chapters:
+        slot_uri: schema:itemListElement
+        description: Ordered list of chapters
+        range: VideoChapter
+        multivalued: true
+        required: true
+        inlined_as_list: true
+      
+      total_chapters:
+        slot_uri: hc:totalChapters
+        description: Total number of chapters
+        range: integer
+        required: false
+        minimum_value: 0
+      
+      chapters_source:
+        slot_uri: prov:wasAttributedTo
+        description: Primary source for this chapter list
+        range: ChapterSourceEnum
+        required: false
+      
+      chapters_generated_at:
+        slot_uri: prov:generatedAtTime
+        description: When chapters were generated/extracted
+        range: datetime
+        required: false
+      
+      covers_full_video:
+        slot_uri: hc:coversFullVideo
+        description: |
+          Whether chapters cover the entire video duration.
+          
+          - true: No gaps, first chapter at 0:00, last ends at video end
+          - false: Partial coverage (gaps between chapters)
+        range: boolean
+        required: false
+
+# ============================================================================
+# Enumerations
+# ============================================================================
+
+enums:
+  
+  ChapterSourceEnum:
+    description: |
+      Source or method that created video chapters.
+    permissible_values:
+      MANUAL:
+        description: Creator manually defined chapters in video description
+      YOUTUBE_AI:
+        description: YouTube auto-chapters feature (AI-generated)
+      WHISPER_CHAPTERS:
+        description: Generated from Whisper transcript analysis
+      SCENE_DETECTION:
+        description: Based on visual scene change detection
+      TRANSCRIPT_ANALYSIS:
+        description: Topic segmentation from transcript
+      THIRD_PARTY:
+        description: External tool or service
+      IMPORTED:
+        description: Imported from another platform/format
+      UNKNOWN:
+        description: Chapter source not determined
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  chapter_id:
+    description: Unique identifier for chapter
+    range: string
+  
+  chapter_title:
+    description: Display title of chapter
+    range: string
+  
+  chapter_index:
+    description: Zero-based index in chapter sequence
+    range: integer
+  
+  chapter_description:
+    description: Detailed description of chapter content
+    range: string
+  
+  chapter_start_seconds:
+    description: Start time in seconds
+    range: float
+  
+  chapter_end_seconds:
+    description: End time in seconds
+    range: float
+  
+  chapter_start_time:
+    description: Start time as ISO 8601 duration
+    range: string
+  
+  chapter_end_time:
+    description: End time as ISO 8601 duration
+    range: string
+  
+  auto_generated:
+    description: Whether chapter was auto-generated by AI
+    range: boolean
+  
+  chapter_source:
+    description: Source that created this chapter
+    range: ChapterSourceEnum
+  
+  chapter_thumbnail_url:
+    description: URL to chapter thumbnail image
+    range: uri
+  
+  chapter_thumbnail_timestamp:
+    description: Timestamp of thumbnail frame
+    range: float
+  
+  parent_chapter_id:
+    description: Reference to parent chapter for nesting
+    range: string
+  
+  nesting_level:
+    description: Depth level in chapter hierarchy
+    range: integer
+  
+  # VideoChapterList slots
+  video_id:
+    description: Reference to parent video
+    range: string
+  
+  chapters:
+    description: Ordered list of video chapters
+    range: VideoChapter
+    multivalued: true
+  
+  total_chapters:
+    description: Total number of chapters
+    range: integer
+  
+  chapters_source:
+    description: Primary source for chapter list
+    range: ChapterSourceEnum
+  
+  chapters_generated_at:
+    description: When chapters were generated
+    range: datetime
+  
+  covers_full_video:
+    description: Whether chapters cover entire video
+    range: boolean
diff --git a/schemas/20251121/linkml/modules/classes/VideoPost.yaml b/schemas/20251121/linkml/modules/classes/VideoPost.yaml
new file mode 100644
index 0000000000..228aec6055
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoPost.yaml
@@ -0,0 +1,763 @@
+# Video Post Class
+# Concrete subclass of SocialMediaPost for video content with platform-specific properties
+#
+# Part of Heritage Custodian Ontology v0.9.5
+#
+# STRUCTURE:
+# SocialMediaPost (parent)
+#     └── VideoPost (this class)
+#           - duration, definition, captions
+#           - view/like/comment metrics
+#           - YouTube-specific fields
+#
+# DATA SOURCE EXAMPLE:
+# From data/custodian/NL-GE-AAL-M-NOM-nationaal_onderduikmuseum.yaml:
+#   youtube_enrichment:
+#     videos:
+#       - video_id: FbIoC-Owy-M
+#         duration: PT10M59S
+#         definition: hd
+#         caption_available: false
+#         view_count: 132
+#         like_count: 2
+#         comment_count: 0
+
+id: https://nde.nl/ontology/hc/class/VideoPost
+name: video_post_class
+title: Video Post Class
+
+imports:
+  - linkml:types
+  - ./SocialMediaPost
+  - ./SocialMediaPostTypes
+  - ../slots/language
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  prov: http://www.w3.org/ns/prov#
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  skos: http://www.w3.org/2004/02/skos/core#
+  as: https://www.w3.org/ns/activitystreams#
+  wikidata: http://www.wikidata.org/entity/
+
+default_prefix: hc
+
+classes:
+
+  VideoPost:
+    is_a: SocialMediaPost
+    class_uri: as:Video
+    abstract: false
+    description: |
+      Concrete class for video content with platform-specific properties.
+      
+      **DEFINITION**:
+      
+      VideoPost is a specialized SocialMediaPost for video content. It extends
+      the base post class with video-specific slots for duration, resolution,
+      captions, and engagement metrics.
+      
+      **EXTENDS**: SocialMediaPost
+      
+      This class adds:
+      - Video technical properties (duration, definition, aspect ratio)
+      - Caption and subtitle availability
+      - Engagement metrics (views, likes, comments)
+      - Platform-specific fields (YouTube category, live broadcast status)
+      - Temporal markers (chapters, segments)
+      
+      **ONTOLOGY MAPPINGS**:
+      
+      | Property | Activity Streams | Schema.org |
+      |----------|------------------|------------|
+      | Class | as:Video | schema:VideoObject |
+      | duration | as:duration | schema:duration |
+      | definition | - | schema:videoQuality |
+      | caption | - | schema:caption |
+      | view_count | - | schema:interactionStatistic |
+      
+      **PLATFORM SUPPORT**:
+      
+      | Platform | Duration Limit | Resolution | Captions |
+      |----------|----------------|------------|----------|
+      | YouTube | 12 hours (verified) | Up to 8K | VTT, SRT |
+      | Vimeo | Varies by plan | Up to 8K | VTT, SRT |
+      | Facebook | 4 hours | Up to 4K | Auto-generated |
+      | TikTok | 10 minutes | 1080p | Auto-generated |
+      | Instagram Reels | 90 seconds | 1080p | Auto-generated |
+      
+      **HERITAGE INSTITUTION USE CASES**:
+      
+      | Content Type | Typical Duration | Platform |
+      |--------------|------------------|----------|
+      | Virtual tours | 10-30 min | YouTube |
+      | Conservation docs | 5-20 min | YouTube, Vimeo |
+      | Curator interviews | 15-60 min | YouTube |
+      | Object spotlights | 2-5 min | YouTube, Instagram |
+      | Short clips | 15-60 sec | TikTok, Reels |
+      | Live recordings | 30-120 min | YouTube |
+      
+      **METRICS OBSERVATION**:
+      
+      Video metrics (views, likes, comments) are observational data that change
+      constantly. Each metric reading should include:
+      - `metrics_observed_at`: When metrics were recorded
+      - `retrieval_timestamp`: When API call was made
+      
+      **RELATIONSHIP TO VideoPostType**:
+      
+      - VideoPost is a **concrete post instance** with video content
+      - VideoPostType is a **type classification** for categorizing posts
+      - A VideoPost typically has `post_types: [VideoPostType]`
+      - But may also have multiple types: `[LiveStreamPostType, VideoPostType]`
+      
+      **CAPTION AND SUBTITLE DISTINCTION**:
+      
+      Related classes for textual content derived from video:
+      - VideoSubtitle: Time-coded text (SRT/VTT format)
+      - VideoTranscript: Full text without timestamps
+      - VideoAnnotation: Computer vision derived content
+      
+      See VideoTextContent hierarchy for detailed modeling.
+    
+    exact_mappings:
+      - as:Video
+      - schema:VideoObject
+    
+    close_mappings:
+      - crm:E73_Information_Object
+    
+    related_mappings:
+      - wikidata:Q34508   # Video
+      - wikidata:Q604644  # Online video
+    
+    slots:
+      # ========================================
+      # Video Technical Properties
+      # ========================================
+      - duration
+      - definition
+      - aspect_ratio
+      - frame_rate
+      
+      # ========================================
+      # Caption and Subtitle Availability
+      # ========================================
+      - caption_available
+      - default_language
+      - default_audio_language
+      - available_caption_languages
+      
+      # ========================================
+      # Engagement Metrics
+      # ========================================
+      - view_count
+      - like_count
+      - dislike_count
+      - comment_count
+      - favorite_count
+      - metrics_observed_at
+      
+      # ========================================
+      # Platform-Specific
+      # ========================================
+      - video_category_id
+      - live_broadcast_content
+      - is_licensed_content
+      - is_embeddable
+      - is_made_for_kids
+      
+      # ========================================
+      # Comments/Replies
+      # ========================================
+      - comments_fetched
+      - video_comments
+    
+    slot_usage:
+      # --- Video Technical Properties ---
+      
+      duration:
+        slot_uri: schema:duration
+        description: |
+          Duration of the video in ISO 8601 format.
+          
+          Schema.org: duration for media length.
+          
+          **Format**: ISO 8601 duration (e.g., "PT10M59S" = 10 minutes 59 seconds)
+          
+          **Common Patterns**:
+          - PT30S = 30 seconds
+          - PT5M = 5 minutes
+          - PT1H30M = 1 hour 30 minutes
+          - PT2H15M30S = 2 hours 15 minutes 30 seconds
+        range: string
+        required: false
+        pattern: "^P(T(\\d+H)?(\\d+M)?(\\d+S)?)?$"
+        examples:
+          - value: "PT10M59S"
+            description: "10 minutes and 59 seconds"
+          - value: "PT1H30M"
+            description: "1 hour 30 minutes"
+      
+      definition:
+        slot_uri: schema:videoQuality
+        description: |
+          Video resolution/definition quality.
+          
+          Schema.org: videoQuality for resolution class.
+          
+          **Values**:
+          - sd: Standard definition (480p or lower)
+          - hd: High definition (720p, 1080p)
+          - 4k: Ultra HD (2160p)
+          - 8k: Full Ultra HD (4320p)
+        range: VideoDefinitionEnum
+        required: false
+        examples:
+          - value: "hd"
+            description: "High definition (720p/1080p)"
+      
+      aspect_ratio:
+        slot_uri: schema:width
+        description: |
+          Video aspect ratio.
+          
+          **Common Values**:
+          - 16:9: Standard widescreen (YouTube default)
+          - 9:16: Vertical (Shorts, Reels, TikTok)
+          - 4:3: Classic TV format
+          - 1:1: Square (Instagram legacy)
+          - 21:9: Cinematic ultrawide
+        range: string
+        required: false
+        examples:
+          - value: "16:9"
+            description: "Standard widescreen"
+          - value: "9:16"
+            description: "Vertical format for Shorts/Reels"
+      
+      frame_rate:
+        slot_uri: hc:frameRate
+        description: |
+          Video frame rate in frames per second.
+          
+          **Common Values**:
+          - 24: Cinema standard
+          - 25: PAL standard
+          - 30: NTSC standard
+          - 60: High frame rate
+        range: float
+        required: false
+        examples:
+          - value: 30.0
+            description: "30 frames per second"
+      
+      # --- Caption and Subtitle Availability ---
+      
+      caption_available:
+        slot_uri: schema:hasPart
+        description: |
+          Whether captions/subtitles are available for this video.
+          
+          Indicates if the video has any caption tracks (auto-generated or manual).
+          
+          Related: Use `available_caption_languages` for specific languages.
+        range: boolean
+        required: false
+        examples:
+          - value: true
+            description: "Video has captions available"
+          - value: false
+            description: "No captions available"
+      
+      default_language:
+        slot_uri: schema:inLanguage
+        description: |
+          Default/primary language of the video content.
+          
+          Schema.org: inLanguage for content language.
+          
+          ISO 639-1 code (e.g., "nl", "en", "de").
+          
+          Refers to on-screen text, title, description language.
+        range: string
+        required: false
+        examples:
+          - value: "nl"
+            description: "Dutch language content"
+      
+      default_audio_language:
+        slot_uri: hc:defaultAudioLanguage
+        description: |
+          Language of the video's default audio track.
+          
+          ISO 639-1 code. May differ from `default_language` for
+          dubbed or multilingual content.
+        range: string
+        required: false
+        examples:
+          - value: "nl"
+            description: "Dutch audio track"
+      
+      available_caption_languages:
+        slot_uri: hc:availableCaptionLanguages
+        description: |
+          List of languages for which captions are available.
+          
+          ISO 639-1 codes for all caption tracks.
+        range: string
+        multivalued: true
+        required: false
+        examples:
+          - value: ["nl", "en", "de"]
+            description: "Captions available in Dutch, English, German"
+      
+      # --- Engagement Metrics ---
+      
+      view_count:
+        slot_uri: schema:interactionCount
+        description: |
+          Number of views for this video.
+          
+          Schema.org: interactionCount for view statistic.
+          
+          **OBSERVATIONAL**: This value changes constantly.
+          Always record `metrics_observed_at` timestamp.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 132
+            description: "132 views at observation time"
+      
+      like_count:
+        slot_uri: hc:likeCount
+        description: |
+          Number of likes/upvotes for this video.
+          
+          Platform-specific: YouTube likes, Facebook reactions, etc.
+          
+          **OBSERVATIONAL**: Record with `metrics_observed_at`.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 2
+            description: "2 likes at observation time"
+      
+      dislike_count:
+        slot_uri: hc:dislikeCount
+        description: |
+          Number of dislikes/downvotes (if available).
+          
+          Note: YouTube hid public dislike counts in Nov 2021.
+          API may still return dislike data for channel owners.
+        range: integer
+        required: false
+        minimum_value: 0
+      
+      comment_count:
+        slot_uri: hc:commentCount
+        description: |
+          Number of comments on this video.
+          
+          **OBSERVATIONAL**: Record with `metrics_observed_at`.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 0
+            description: "No comments at observation time"
+      
+      favorite_count:
+        slot_uri: hc:favoriteCount
+        description: |
+          Number of times video was favorited/saved.
+          
+          Platform-specific availability.
+        range: integer
+        required: false
+        minimum_value: 0
+      
+      metrics_observed_at:
+        slot_uri: prov:atTime
+        description: |
+          Timestamp when engagement metrics were recorded.
+          
+          PROV-O: atTime for observation timestamp.
+          
+          **CRITICAL**: Metrics change constantly. This timestamp
+          indicates when view_count, like_count, etc. were observed.
+        range: datetime
+        required: false
+        examples:
+          - value: "2025-12-01T23:16:22.294232+00:00"
+            description: "Metrics observed December 1, 2025"
+      
+      # --- Platform-Specific ---
+      
+      video_category_id:
+        slot_uri: hc:videoCategoryId
+        description: |
+          Platform-specific category identifier.
+          
+          **YouTube Category IDs**:
+          - 1: Film & Animation
+          - 2: Autos & Vehicles
+          - 10: Music
+          - 15: Pets & Animals
+          - 17: Sports
+          - 19: Travel & Events
+          - 20: Gaming
+          - 22: People & Blogs
+          - 23: Comedy
+          - 24: Entertainment
+          - 25: News & Politics
+          - 26: Howto & Style
+          - 27: Education
+          - 28: Science & Technology
+          - 29: Nonprofits & Activism
+        range: string
+        required: false
+        examples:
+          - value: "22"
+            description: "YouTube: People & Blogs"
+          - value: "27"
+            description: "YouTube: Education"
+      
+      live_broadcast_content:
+        slot_uri: hc:liveBroadcastContent
+        description: |
+          Live broadcast status of the video.
+          
+          **Values**:
+          - none: Not a live broadcast (standard video)
+          - live: Currently broadcasting live
+          - upcoming: Scheduled live stream not yet started
+          
+          When `live` or `upcoming` becomes `none`, video is archived.
+        range: LiveBroadcastStatusEnum
+        required: false
+        examples:
+          - value: "none"
+            description: "Standard video (not live)"
+          - value: "live"
+            description: "Currently broadcasting"
+      
+      is_licensed_content:
+        slot_uri: hc:isLicensedContent
+        description: |
+          Whether the video contains licensed content (music, clips).
+          
+          Affects monetization and regional availability.
+        range: boolean
+        required: false
+      
+      is_embeddable:
+        slot_uri: hc:isEmbeddable
+        description: |
+          Whether the video can be embedded on external sites.
+          
+          Publisher-controlled setting.
+        range: boolean
+        required: false
+      
+      is_made_for_kids:
+        slot_uri: hc:isMadeForKids
+        description: |
+          Whether the video is designated as made for children.
+          
+          COPPA compliance flag. Affects comments, ads, features.
+        range: boolean
+        required: false
+      
+      # --- Comments ---
+      
+      comments_fetched:
+        slot_uri: hc:commentsFetched
+        description: |
+          Number of comments actually fetched/archived.
+          
+          May be less than `comment_count` due to API limits,
+          deleted comments, or pagination.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 0
+            description: "No comments fetched"
+      
+      video_comments:
+        slot_uri: hc:videoComments
+        description: |
+          Collection of comments on this video.
+          
+          Structured comment data with author, text, timestamp, likes.
+          
+          Note: Comments may contain nested replies.
+        range: VideoComment
+        multivalued: true
+        required: false
+        inlined: true
+    
+    comments:
+      - "Extends SocialMediaPost with video-specific properties"
+      - "Maps to as:Video and schema:VideoObject"
+      - "Metrics are observational - always include metrics_observed_at"
+      - "Caption availability signals but not content (see VideoSubtitle)"
+      - "YouTube is primary platform for heritage institution video content"
+    
+    see_also:
+      - "https://www.w3.org/ns/activitystreams#Video"
+      - "https://schema.org/VideoObject"
+      - "https://developers.google.com/youtube/v3/docs/videos"
+
+  # ==========================================================================
+  # Supporting Class: VideoComment
+  # ==========================================================================
+  
+  VideoComment:
+    class_uri: schema:Comment
+    description: |
+      A comment on a video post.
+      
+      Models user-generated comments with author, text, timestamp,
+      and engagement metrics. Supports nested reply threads.
+    
+    exact_mappings:
+      - schema:Comment
+      - as:Note
+    
+    slots:
+      - comment_id
+      - comment_author
+      - comment_author_channel_id
+      - comment_text
+      - comment_published_at
+      - comment_updated_at
+      - comment_like_count
+      - comment_reply_count
+      - comment_replies
+    
+    slot_usage:
+      comment_id:
+        slot_uri: dcterms:identifier
+        description: Unique identifier for the comment
+        range: string
+        required: true
+      
+      comment_author:
+        slot_uri: schema:author
+        description: Display name of comment author
+        range: string
+        required: true
+      
+      comment_author_channel_id:
+        slot_uri: hc:authorChannelId
+        description: Platform channel/account ID of author
+        range: string
+        required: false
+      
+      comment_text:
+        slot_uri: schema:text
+        description: Full text content of the comment
+        range: string
+        required: true
+      
+      comment_published_at:
+        slot_uri: dcterms:created
+        description: When comment was originally posted
+        range: datetime
+        required: true
+      
+      comment_updated_at:
+        slot_uri: dcterms:modified
+        description: When comment was last edited
+        range: datetime
+        required: false
+      
+      comment_like_count:
+        slot_uri: hc:likeCount
+        description: Number of likes on this comment
+        range: integer
+        required: false
+        minimum_value: 0
+      
+      comment_reply_count:
+        slot_uri: hc:replyCount
+        description: Number of replies to this comment
+        range: integer
+        required: false
+        minimum_value: 0
+      
+      comment_replies:
+        slot_uri: schema:comment
+        description: Nested reply comments
+        range: VideoComment
+        multivalued: true
+        required: false
+        inlined: true
+
+# ============================================================================
+# Supporting Enumerations
+# ============================================================================
+
+enums:
+  
+  VideoDefinitionEnum:
+    description: |
+      Video resolution/definition quality categories.
+      
+      Based on common platform standards.
+    permissible_values:
+      sd:
+        description: Standard definition (480p or lower)
+      hd:
+        description: High definition (720p, 1080p)
+      uhd:
+        description: Ultra HD (2160p/4K)
+      4k:
+        description: 4K resolution (2160p) - alias for uhd
+      8k:
+        description: Full Ultra HD (4320p)
+  
+  LiveBroadcastStatusEnum:
+    description: |
+      Live broadcast status values for video content.
+      
+      Based on YouTube API liveBroadcastContent values.
+    permissible_values:
+      none:
+        description: Not a live broadcast (standard uploaded video)
+      live:
+        description: Currently broadcasting live
+      upcoming:
+        description: Scheduled live stream that hasn't started yet
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  duration:
+    description: Duration in ISO 8601 format
+    range: string
+  
+  definition:
+    description: Video resolution quality (sd, hd, 4k, 8k)
+    range: VideoDefinitionEnum
+  
+  aspect_ratio:
+    description: Video aspect ratio (16:9, 9:16, 4:3, etc.)
+    range: string
+  
+  frame_rate:
+    description: Frame rate in FPS
+    range: float
+  
+  caption_available:
+    description: Whether captions are available
+    range: boolean
+  
+  default_audio_language:
+    description: Language of default audio track
+    range: string
+  
+  available_caption_languages:
+    description: Languages for which captions exist
+    range: string
+    multivalued: true
+  
+  view_count:
+    description: Number of views
+    range: integer
+  
+  like_count:
+    description: Number of likes
+    range: integer
+  
+  dislike_count:
+    description: Number of dislikes
+    range: integer
+  
+  comment_count:
+    description: Number of comments
+    range: integer
+  
+  favorite_count:
+    description: Number of favorites/saves
+    range: integer
+  
+  metrics_observed_at:
+    description: When metrics were recorded
+    range: datetime
+  
+  video_category_id:
+    description: Platform category identifier
+    range: string
+  
+  live_broadcast_content:
+    description: Live broadcast status
+    range: LiveBroadcastStatusEnum
+  
+  is_licensed_content:
+    description: Contains licensed content
+    range: boolean
+  
+  is_embeddable:
+    description: Can be embedded externally
+    range: boolean
+  
+  is_made_for_kids:
+    description: COPPA kids content flag
+    range: boolean
+  
+  comments_fetched:
+    description: Number of comments actually retrieved
+    range: integer
+  
+  video_comments:
+    description: Collection of video comments
+    range: VideoComment
+    multivalued: true
+  
+  # VideoComment slots
+  comment_id:
+    description: Unique comment identifier
+    range: string
+  
+  comment_author:
+    description: Comment author display name
+    range: string
+  
+  comment_author_channel_id:
+    description: Author's channel/account ID
+    range: string
+  
+  comment_text:
+    description: Comment text content
+    range: string
+  
+  comment_published_at:
+    description: When comment was posted
+    range: datetime
+  
+  comment_updated_at:
+    description: When comment was edited
+    range: datetime
+  
+  comment_like_count:
+    description: Likes on this comment
+    range: integer
+  
+  comment_reply_count:
+    description: Number of replies
+    range: integer
+  
+  comment_replies:
+    description: Nested reply comments
+    range: VideoComment
+    multivalued: true
diff --git a/schemas/20251121/linkml/modules/classes/VideoSubtitle.yaml b/schemas/20251121/linkml/modules/classes/VideoSubtitle.yaml
new file mode 100644
index 0000000000..e7335daba7
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoSubtitle.yaml
@@ -0,0 +1,632 @@
+# Video Subtitle Class
+# Time-coded caption/subtitle content extending VideoTranscript
+#
+# Part of Heritage Custodian Ontology v0.9.5
+#
+# HIERARCHY:
+# E73_Information_Object (CIDOC-CRM)
+#     │
+#     └── VideoTextContent (abstract - provenance)
+#             │
+#             └── VideoTranscript (full text transcription)
+#                     │
+#                     └── VideoSubtitle (this class - time-coded captions)
+#
+# DESIGN RATIONALE:
+# VideoSubtitle extends VideoTranscript because subtitles ARE transcripts
+# with additional time-coding and display metadata:
+#
+# 1. A subtitle file (SRT, VTT) contains complete spoken content (transcript)
+# 2. Plus precise start/end times for each caption
+# 3. Plus display formatting (position, styling in some formats)
+#
+# You can always derive a plain transcript from subtitles by stripping times.
+# This inheritance enables polymorphic handling: treat subtitles as transcripts
+# when time-coding isn't needed.
+#
+# SUBTITLE FORMATS SUPPORTED:
+# - SRT (SubRip): Most common, simple time + text
+# - VTT (WebVTT): W3C standard, supports styling
+# - TTML (DFXP): XML-based, broadcast standard
+# - SBV (YouTube): YouTube's native format
+# - ASS/SSA: Advanced styling, anime subtitles
+
+id: https://nde.nl/ontology/hc/class/VideoSubtitle
+name: video_subtitle_class
+title: Video Subtitle Class
+
+imports:
+  - linkml:types
+  - ./VideoTranscript
+  - ./VideoTimeSegment
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  prov: http://www.w3.org/ns/prov#
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  skos: http://www.w3.org/2004/02/skos/core#
+  ma: http://www.w3.org/ns/ma-ont#
+
+default_prefix: hc
+
+classes:
+
+  VideoSubtitle:
+    is_a: VideoTranscript
+    class_uri: hc:VideoSubtitle
+    abstract: false
+    description: |
+      Time-coded caption/subtitle content for video.
+      
+      **DEFINITION**:
+      
+      VideoSubtitle represents caption/subtitle tracks that provide time-coded
+      text synchronized with video playback. It extends VideoTranscript because
+      subtitles contain complete transcription PLUS temporal synchronization.
+      
+      **INHERITANCE FROM VideoTranscript**:
+      
+      VideoSubtitle inherits all transcript capabilities:
+      - `full_text`: Complete subtitle text concatenated
+      - `segments`: Time-coded entries (REQUIRED for subtitles)
+      - `includes_timestamps`: Always true for subtitles
+      - `content_language`: Language of subtitle text
+      - All provenance from VideoTextContent
+      
+      And adds subtitle-specific properties:
+      - `subtitle_format`: SRT, VTT, TTML, SBV, ASS
+      - `is_closed_caption`: CC vs regular subtitles
+      - `is_sdh`: Subtitles for Deaf/Hard-of-Hearing
+      - `includes_sound_descriptions`: Non-speech audio descriptions
+      
+      **SCHEMA.ORG ALIGNMENT**:
+      
+      Maps to `schema:caption` property:
+      > "For downloadable machine formats (closed caption, subtitles etc.)
+      >  use the MediaObject.encodingFormat property."
+      
+      **SUBTITLE vs CAPTION vs TRANSCRIPT**:
+      
+      | Type | Time-coded | Purpose | Audience |
+      |------|------------|---------|----------|
+      | Transcript | Optional | Reading, search | Everyone |
+      | Subtitle | Required | Language translation | Hearing viewers |
+      | Caption (CC) | Required | Accessibility | Deaf/HoH viewers |
+      | SDH | Required | Full accessibility | Deaf viewers, noisy environments |
+      
+      **SDH (Subtitles for Deaf/Hard-of-Hearing)**:
+      
+      SDH differs from regular subtitles by including:
+      - Speaker identification: "(John) Hello"
+      - Sound effects: "[door slams]", "[music playing]"
+      - Music descriptions: "♪ upbeat jazz ♪"
+      - Emotional cues: "[laughing]", "[whispering]"
+      
+      **SUBTITLE FORMATS**:
+      
+      | Format | Extension | Features | Use Case |
+      |--------|-----------|----------|----------|
+      | SRT | .srt | Simple, universal | Most video players |
+      | VTT | .vtt | W3C standard, styling | HTML5 video, web |
+      | TTML | .ttml/.dfxp | XML, rich styling | Broadcast, streaming |
+      | SBV | .sbv | YouTube native | YouTube uploads |
+      | ASS | .ass | Advanced styling | Anime, complex layouts |
+      
+      **SRT FORMAT EXAMPLE**:
+      
+      ```
+      1
+      00:00:00,000 --> 00:00:03,500
+      Welcome to the Rijksmuseum.
+      
+      2
+      00:00:03,500 --> 00:00:08,200
+      Today we'll explore the Night Watch gallery.
+      ```
+      
+      **VTT FORMAT EXAMPLE**:
+      
+      ```
+      WEBVTT
+      
+      00:00:00.000 --> 00:00:03.500
+      Welcome to the Rijksmuseum.
+      
+      00:00:03.500 --> 00:00:08.200
+      Today we'll explore the Night Watch gallery.
+      ```
+      
+      **HERITAGE INSTITUTION CONTEXT**:
+      
+      Subtitles are critical for heritage video accessibility:
+      
+      1. **Accessibility Compliance**: WCAG 2.1, Section 508
+      2. **Multilingual Access**: Translate for international audiences
+      3. **Silent Viewing**: Social media, public displays, quiet spaces
+      4. **Search Discovery**: Subtitle text is indexed by platforms
+      5. **Preservation**: Text outlasts video format obsolescence
+      
+      **YOUTUBE API INTEGRATION**:
+      
+      Subtitle tracks from YouTube API populate:
+      - `subtitle_format`: Typically VTT or SRT
+      - `generation_method`: PLATFORM_PROVIDED or ASR_AUTOMATIC
+      - `content_language`: From track language code
+      - `is_auto_generated`: YouTube auto-caption flag
+      
+      **SEGMENTS ARE REQUIRED**:
+      
+      Unlike VideoTranscript where segments are optional, VideoSubtitle
+      REQUIRES the `segments` slot to be populated with VideoTimeSegment
+      entries that include start_seconds, end_seconds, and segment_text.
+    
+    exact_mappings:
+      - schema:caption
+    
+    close_mappings:
+      - ma:CaptioningFormat
+    
+    related_mappings:
+      - schema:transcript
+    
+    slots:
+      # Subtitle-specific format
+      - subtitle_format
+      - raw_subtitle_content
+      
+      # Accessibility metadata
+      - is_closed_caption
+      - is_sdh
+      - includes_sound_descriptions
+      - includes_music_descriptions
+      - includes_speaker_identification
+      
+      # Source/generation info
+      - is_auto_generated
+      - track_name
+      - track_id
+      
+      # Positioning (for formats that support it)
+      - default_position
+      
+      # Entry counts
+      - entry_count
+      - average_entry_duration_seconds
+    
+    slot_usage:
+      # Override segments to be required for subtitles
+      segments:
+        required: true
+        description: |
+          Time-coded subtitle entries as VideoTimeSegment objects.
+          
+          **REQUIRED for VideoSubtitle** (optional in parent VideoTranscript).
+          
+          Each segment represents one caption display unit:
+          - start_seconds: When caption appears
+          - end_seconds: When caption disappears
+          - segment_text: Caption text content
+          - segment_index: Order in subtitle track
+          - confidence: For auto-generated captions
+          
+          Segments are ordered by start_seconds for proper playback.
+      
+      # Override includes_timestamps to default true
+      includes_timestamps:
+        ifabsent: "true"
+        description: |
+          Whether subtitle includes time markers.
+          
+          **Always true for VideoSubtitle** - time-coding is definitional.
+      
+      subtitle_format:
+        slot_uri: dcterms:format
+        description: |
+          Subtitle file format.
+          
+          Dublin Core: format for resource format.
+          
+          Specifies the encoding format of the subtitle content.
+          Affects parsing and rendering capabilities.
+        range: SubtitleFormatEnum
+        required: true
+        examples:
+          - value: "VTT"
+            description: "WebVTT format (W3C standard)"
+          - value: "SRT"
+            description: "SubRip format (most common)"
+      
+      raw_subtitle_content:
+        slot_uri: hc:rawSubtitleContent
+        description: |
+          Original subtitle file content as raw string.
+          
+          Preserves the complete subtitle file in its native format.
+          Useful for:
+          - Format conversion
+          - Re-parsing with different tools
+          - Archive preservation
+          
+          May be large - consider storing separately for large files.
+        range: string
+        required: false
+        examples:
+          - value: |
+              WEBVTT
+              
+              00:00:00.000 --> 00:00:03.500
+              Welcome to the museum.
+            description: "Complete VTT file content"
+      
+      is_closed_caption:
+        slot_uri: hc:isClosedCaption
+        description: |
+          Whether this is a closed caption track (CC).
+          
+          Closed captions differ from subtitles:
+          - **CC (true)**: Designed for Deaf/HoH, includes non-speech audio
+          - **Subtitles (false)**: Translation of dialogue only
+          
+          CC typically includes [MUSIC], [APPLAUSE], speaker ID, etc.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "This is a closed caption track"
+      
+      is_sdh:
+        slot_uri: hc:isSDH
+        description: |
+          Whether these are Subtitles for Deaf/Hard-of-Hearing (SDH).
+          
+          SDH combines subtitle translation with CC-style annotations:
+          - Dialogue translation (like subtitles)
+          - Sound descriptions (like CC)
+          - Speaker identification
+          
+          Typically marked "[SDH]" on streaming platforms.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "SDH subtitle track"
+      
+      includes_sound_descriptions:
+        slot_uri: hc:includesSoundDescriptions
+        description: |
+          Whether subtitle includes non-speech sound descriptions.
+          
+          Examples of sound descriptions:
+          - [door slams]
+          - [phone ringing]
+          - [thunder]
+          - [footsteps approaching]
+          
+          Characteristic of CC and SDH tracks.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "Contains sound effect descriptions"
+      
+      includes_music_descriptions:
+        slot_uri: hc:includesMusicDescriptions
+        description: |
+          Whether subtitle includes music/song descriptions.
+          
+          Examples:
+          - ♪ upbeat jazz playing ♪
+          - [classical music]
+          - ♪ singing in Dutch ♪
+          - [somber orchestral music]
+          
+          Important for heritage content with significant musical elements.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "Contains music descriptions"
+      
+      includes_speaker_identification:
+        slot_uri: hc:includesSpeakerIdentification
+        description: |
+          Whether subtitle identifies speakers.
+          
+          Speaker identification patterns:
+          - (John): Hello there.
+          - NARRATOR: Welcome to the museum.
+          - [Curator] This painting dates from 1642.
+          
+          Different from transcript speaker_id which is per-segment;
+          this indicates whether the TEXT CONTENT includes labels.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "Subtitle text includes speaker labels"
+      
+      is_auto_generated:
+        slot_uri: hc:isAutoGenerated
+        description: |
+          Whether subtitle was auto-generated by the platform.
+          
+          Distinct from generation_method (inherited from VideoTextContent):
+          - `is_auto_generated`: Platform flag (YouTube, Vimeo)
+          - `generation_method`: How WE know it was generated
+          
+          Auto-generated captions typically have lower accuracy.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "YouTube auto-generated caption"
+      
+      track_name:
+        slot_uri: schema:name
+        description: |
+          Human-readable name of the subtitle track.
+          
+          Schema.org: name for track label.
+          
+          Examples from YouTube:
+          - "English"
+          - "English (auto-generated)"
+          - "Dutch - Nederlands"
+          - "English (United Kingdom)"
+        range: string
+        required: false
+        examples:
+          - value: "English (auto-generated)"
+            description: "YouTube auto-caption track name"
+      
+      track_id:
+        slot_uri: dcterms:identifier
+        description: |
+          Platform-specific identifier for this subtitle track.
+          
+          Dublin Core: identifier for unique ID.
+          
+          Used to fetch/update specific tracks via API.
+        range: string
+        required: false
+        examples:
+          - value: "en.3OWxR1w4QfE"
+            description: "YouTube caption track ID"
+      
+      default_position:
+        slot_uri: hc:defaultPosition
+        description: |
+          Default display position for captions.
+          
+          For formats that support positioning (VTT, TTML, ASS):
+          - BOTTOM: Default, below video content
+          - TOP: Above video content  
+          - MIDDLE: Center of video
+          
+          May be overridden per-segment in advanced formats.
+        range: SubtitlePositionEnum
+        required: false
+        ifabsent: "string(BOTTOM)"
+        examples:
+          - value: "BOTTOM"
+            description: "Standard bottom caption position"
+      
+      entry_count:
+        slot_uri: hc:entryCount
+        description: |
+          Number of subtitle entries (caption cues).
+          
+          Equals length of segments array.
+          Useful for content sizing without loading full segments.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 127
+            description: "127 caption cues in this track"
+      
+      average_entry_duration_seconds:
+        slot_uri: hc:averageEntryDuration
+        description: |
+          Average duration of subtitle entries in seconds.
+          
+          Typical ranges:
+          - 2-4 seconds: Normal speech rate
+          - < 2 seconds: Rapid dialogue
+          - > 5 seconds: Slow narration or long displays
+          
+          Useful for quality assessment - very short or long entries
+          may indicate timing issues.
+        range: float
+        required: false
+        minimum_value: 0.0
+        examples:
+          - value: 3.2
+            description: "Average 3.2 seconds per caption"
+    
+    rules:
+      - postconditions:
+          description: |
+            segments must be populated for VideoSubtitle.
+            This is enforced by making segments required in slot_usage.
+    
+    comments:
+      - "Time-coded caption/subtitle content"
+      - "Extends VideoTranscript - subtitles ARE transcripts plus time codes"
+      - "Supports multiple formats: SRT, VTT, TTML, SBV, ASS"
+      - "Accessibility metadata: CC, SDH, sound/music descriptions"
+      - "Critical for heritage video accessibility compliance"
+    
+    see_also:
+      - "https://schema.org/caption"
+      - "https://www.w3.org/TR/webvtt1/"
+      - "https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API"
+      - "https://www.3playmedia.com/learn/popular-topics/closed-captioning/"
+
+# ============================================================================
+# Enumerations
+# ============================================================================
+
+enums:
+  
+  SubtitleFormatEnum:
+    description: |
+      Subtitle/caption file formats.
+      
+      Each format has different capabilities for timing precision,
+      styling, positioning, and metadata.
+    permissible_values:
+      SRT:
+        description: |
+          SubRip subtitle format (.srt).
+          Most widely supported format.
+          Simple: sequence number, timecode, text.
+          No styling or positioning support.
+      VTT:
+        description: |
+          WebVTT format (.vtt).
+          W3C standard for HTML5 video.
+          Supports styling (CSS), positioning, cue settings.
+          Recommended for web delivery.
+      TTML:
+        description: |
+          Timed Text Markup Language (.ttml/.dfxp/.xml).
+          W3C XML-based standard.
+          Rich styling, regions, timing.
+          Used in broadcast and streaming (Netflix, Amazon).
+      SBV:
+        description: |
+          YouTube SubViewer format (.sbv).
+          Simple format similar to SRT.
+          Native YouTube caption format.
+      ASS:
+        description: |
+          Advanced SubStation Alpha (.ass).
+          Advanced styling, positioning, effects.
+          Popular for anime subtitles.
+          Includes SSA (.ssa) as predecessor.
+      STL:
+        description: |
+          EBU STL format (.stl).
+          European Broadcasting Union standard.
+          Used in broadcast television.
+          Binary format with teletext compatibility.
+      CAP:
+        description: |
+          Scenarist Closed Caption (.scc/.cap).
+          Used for broadcast closed captioning.
+          CEA-608/CEA-708 compliant.
+      SAMI:
+        description: |
+          Synchronized Accessible Media Interchange (.smi/.sami).
+          Microsoft format for Windows Media.
+          HTML-like markup with timing.
+      LRC:
+        description: |
+          LRC lyrics format (.lrc).
+          Simple format for song lyrics.
+          Line-by-line timing, no duration.
+      JSON:
+        description: |
+          JSON-based subtitle format.
+          Used by some APIs (YouTube transcript API).
+          Structure varies by source.
+      UNKNOWN:
+        description: |
+          Unknown or unrecognized format.
+          May require manual parsing or conversion.
+  
+  SubtitlePositionEnum:
+    description: |
+      Default caption display position on video.
+      
+      May be overridden by format-specific positioning (VTT, TTML, ASS).
+    permissible_values:
+      BOTTOM:
+        description: |
+          Bottom of video frame (standard position).
+          Most common for subtitles and captions.
+          Typically in lower 10-15% of frame.
+      TOP:
+        description: |
+          Top of video frame.
+          Used when bottom is occluded.
+          Common for some broadcast formats.
+      MIDDLE:
+        description: |
+          Center of video frame.
+          Rarely used except for specific effects.
+      LEFT:
+        description: |
+          Left side of frame (vertical text).
+          Rare, used for specific languages/effects.
+      RIGHT:
+        description: |
+          Right side of frame (vertical text).
+          Rare, used for specific languages/effects.
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  subtitle_format:
+    description: Subtitle file format (SRT, VTT, TTML, etc.)
+    range: SubtitleFormatEnum
+  
+  raw_subtitle_content:
+    description: Original subtitle file content as raw string
+    range: string
+  
+  is_closed_caption:
+    description: Whether this is a closed caption (CC) track
+    range: boolean
+  
+  is_sdh:
+    description: Whether these are Subtitles for Deaf/Hard-of-Hearing
+    range: boolean
+  
+  includes_sound_descriptions:
+    description: Whether subtitle includes non-speech sound descriptions
+    range: boolean
+  
+  includes_music_descriptions:
+    description: Whether subtitle includes music descriptions
+    range: boolean
+  
+  includes_speaker_identification:
+    description: Whether subtitle text includes speaker labels
+    range: boolean
+  
+  is_auto_generated:
+    description: Whether subtitle was auto-generated by platform
+    range: boolean
+  
+  track_name:
+    description: Human-readable name of subtitle track
+    range: string
+  
+  track_id:
+    description: Platform-specific identifier for subtitle track
+    range: string
+  
+  default_position:
+    description: Default display position for captions
+    range: SubtitlePositionEnum
+  
+  entry_count:
+    description: Number of subtitle entries (caption cues)
+    range: integer
+  
+  average_entry_duration_seconds:
+    description: Average duration of subtitle entries in seconds
+    range: float
diff --git a/schemas/20251121/linkml/modules/classes/VideoTextContent.yaml b/schemas/20251121/linkml/modules/classes/VideoTextContent.yaml
new file mode 100644
index 0000000000..1b798c2a53
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoTextContent.yaml
@@ -0,0 +1,524 @@
+# Video Text Content Class
+# Abstract base class for all textual/derived content from videos
+#
+# Part of Heritage Custodian Ontology v0.9.5
+#
+# HIERARCHY:
+# E73_Information_Object (CIDOC-CRM)
+#     │
+#     └── VideoTextContent (this class - ABSTRACT)
+#             │
+#             ├── VideoTranscript (full text transcription)
+#             │       │
+#             │       └── VideoSubtitle (time-coded captions)
+#             │
+#             └── VideoAnnotation (CV/multimodal derived)
+#                     │
+#                     ├── VideoSceneAnnotation
+#                     ├── VideoObjectAnnotation
+#                     └── VideoOCRAnnotation
+#
+# DESIGN RATIONALE:
+# All text derived from video (transcripts, subtitles, annotations) shares
+# common provenance requirements:
+# - Source video reference
+# - Generation method (ASR, manual, CV model)
+# - Generation timestamp
+# - Model/tool version
+# - Overall confidence score
+#
+# This abstract base ensures consistent provenance tracking across all
+# video-derived text content types.
+
+id: https://nde.nl/ontology/hc/class/VideoTextContent
+name: video_text_content_class
+title: Video Text Content Class
+
+imports:
+  - linkml:types
+  - ./VideoPost
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  prov: http://www.w3.org/ns/prov#
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  skos: http://www.w3.org/2004/02/skos/core#
+  oa: http://www.w3.org/ns/oa#
+
+default_prefix: hc
+
+classes:
+
+  VideoTextContent:
+    class_uri: crm:E73_Information_Object
+    abstract: true
+    description: |
+      Abstract base class for all textual/derived content from videos.
+      
+      **DEFINITION**:
+      
+      VideoTextContent is the abstract parent for all text that is extracted,
+      transcribed, or derived from video content. This includes:
+      
+      | Subclass | Source | Description |
+      |----------|--------|-------------|
+      | VideoTranscript | Audio | Full text transcription of spoken content |
+      | VideoSubtitle | Audio | Time-coded caption entries (SRT/VTT) |
+      | VideoAnnotation | Visual | CV/multimodal-derived descriptions |
+      
+      **PROVENANCE REQUIREMENTS**:
+      
+      All video-derived text MUST include comprehensive provenance:
+      
+      1. **Source**: Which video was processed (`source_video`)
+      2. **Method**: How was content generated (`generation_method`)
+      3. **Agent**: Who/what generated it (`generated_by`)
+      4. **Time**: When was it generated (`generation_timestamp`)
+      5. **Version**: Tool/model version (`model_version`)
+      6. **Quality**: Overall confidence (`overall_confidence`)
+      
+      **PROV-O ALIGNMENT**:
+      
+      Maps to W3C PROV-O for provenance tracking:
+      
+      ```turtle
+      :transcript a hc:VideoTranscript ;
+          prov:wasGeneratedBy :asr_activity ;
+          prov:wasAttributedTo :whisper_model ;
+          prov:generatedAtTime "2025-12-01T10:00:00Z" ;
+          prov:wasDerivedFrom :source_video .
+      ```
+      
+      **CIDOC-CRM E73_Information_Object**:
+      
+      - E73 is the base for all identifiable immaterial items
+      - Includes texts, computer programs, songs, recipes
+      - VideoTextContent are E73 instances derived from video (E73)
+      
+      **GENERATION METHODS**:
+      
+      | Method | Description | Typical Confidence |
+      |--------|-------------|-------------------|
+      | ASR_AUTOMATIC | Automatic speech recognition | 0.75-0.95 |
+      | ASR_ENHANCED | ASR with post-processing | 0.85-0.98 |
+      | MANUAL_TRANSCRIPTION | Human transcription | 0.98-1.0 |
+      | MANUAL_CORRECTION | Human-corrected ASR | 0.95-1.0 |
+      | CV_AUTOMATIC | Computer vision detection | 0.60-0.90 |
+      | MULTIMODAL | Combined audio+visual AI | 0.70-0.95 |
+      | OCR | Optical character recognition | 0.80-0.98 |
+      | PLATFORM_PROVIDED | From YouTube/Vimeo API | 0.85-0.95 |
+      
+      **HERITAGE INSTITUTION CONTEXT**:
+      
+      Video text content is critical for:
+      - **Accessibility**: Deaf/HoH users need accurate captions
+      - **Discovery**: Full-text search over video collections
+      - **Preservation**: Text outlasts video format obsolescence
+      - **Research**: Analyzing spoken content at scale
+      - **Translation**: Multilingual access to heritage content
+      
+      **LANGUAGE SUPPORT**:
+      
+      - `content_language`: Primary language of text content
+      - May differ from video's default_audio_language if translated
+      - ISO 639-1 codes (e.g., "nl", "en", "de")
+    
+    exact_mappings:
+      - crm:E73_Information_Object
+    
+    close_mappings:
+      - prov:Entity
+    
+    related_mappings:
+      - schema:CreativeWork
+      - dcterms:Text
+    
+    slots:
+      # Source reference
+      - source_video
+      - source_video_url
+      
+      # Content metadata
+      - content_language
+      - content_title
+      
+      # Provenance - Generation
+      - generated_by
+      - generation_method
+      - generation_timestamp
+      - model_version
+      - model_provider
+      
+      # Quality
+      - overall_confidence
+      - is_verified
+      - verified_by
+      - verification_date
+      
+      # Processing metadata
+      - processing_duration_seconds
+      - word_count
+      - character_count
+    
+    slot_usage:
+      source_video:
+        slot_uri: prov:wasDerivedFrom
+        description: |
+          Reference to the VideoPost from which this content was derived.
+          
+          PROV-O: wasDerivedFrom links derived content to source.
+          
+          Links to the video's unique identifier (post_id).
+        range: string
+        required: true
+        examples:
+          - value: "FbIoC-Owy-M"
+            description: "YouTube video ID as source reference"
+      
+      source_video_url:
+        slot_uri: schema:url
+        description: |
+          URL of the source video.
+          
+          Convenience field for direct video access.
+          Derived from source_video but stored for quick reference.
+        range: uri
+        required: false
+        examples:
+          - value: "https://www.youtube.com/watch?v=FbIoC-Owy-M"
+            description: "Full YouTube video URL"
+      
+      content_language:
+        slot_uri: dcterms:language
+        description: |
+          Primary language of the text content.
+          
+          Dublin Core: language for content language.
+          
+          ISO 639-1 code. May differ from video's audio language
+          if this is a translation or localization.
+        range: string
+        required: true
+        examples:
+          - value: "nl"
+            description: "Dutch language content"
+          - value: "en"
+            description: "English translation"
+      
+      content_title:
+        slot_uri: dcterms:title
+        description: |
+          Title or label for this text content.
+          
+          Dublin Core: title for content name.
+          
+          Examples:
+          - "Rijksmuseum Tour - Full Transcript"
+          - "Dutch Subtitles - Auto-generated"
+          - "Scene Annotations - CV Model v2.1"
+        range: string
+        required: false
+        examples:
+          - value: "De Vrijheidsroute Ep.3 - Dutch Transcript"
+            description: "Descriptive title for transcript"
+      
+      generated_by:
+        slot_uri: prov:wasAttributedTo
+        description: |
+          The agent (model, service, person) that generated this content.
+          
+          PROV-O: wasAttributedTo identifies the responsible agent.
+          
+          **Examples**:
+          - AI Models: "openai/whisper-large-v3", "google/speech-to-text"
+          - Services: "YouTube Auto-captions", "Rev.com"
+          - Human: "transcriber:jane.doe@museum.nl"
+        range: string
+        required: true
+        examples:
+          - value: "openai/whisper-large-v3"
+            description: "OpenAI Whisper ASR model"
+          - value: "YouTube Auto-captions"
+            description: "Platform-provided captions"
+          - value: "manual:curator@rijksmuseum.nl"
+            description: "Human transcriber"
+      
+      generation_method:
+        slot_uri: prov:wasGeneratedBy
+        description: |
+          The method used to generate this content.
+          
+          PROV-O: wasGeneratedBy for generation activity type.
+          
+          See GenerationMethodEnum for standardized values.
+        range: GenerationMethodEnum
+        required: true
+        examples:
+          - value: "ASR_AUTOMATIC"
+            description: "Automatic speech recognition"
+          - value: "MANUAL_TRANSCRIPTION"
+            description: "Human transcription"
+      
+      generation_timestamp:
+        slot_uri: prov:generatedAtTime
+        description: |
+          When this content was generated.
+          
+          PROV-O: generatedAtTime for creation timestamp.
+          
+          ISO 8601 datetime. Critical for versioning and reproducibility.
+        range: datetime
+        required: true
+        examples:
+          - value: "2025-12-01T10:30:00Z"
+            description: "Generated December 1, 2025 at 10:30 UTC"
+      
+      model_version:
+        slot_uri: schema:softwareVersion
+        description: |
+          Version of the model or tool used for generation.
+          
+          Schema.org: softwareVersion for version tracking.
+          
+          Critical for reproducibility and quality assessment.
+        range: string
+        required: false
+        examples:
+          - value: "large-v3"
+            description: "Whisper model version"
+          - value: "v2.3.1"
+            description: "Software version number"
+      
+      model_provider:
+        slot_uri: schema:provider
+        description: |
+          Provider or vendor of the generation model/service.
+          
+          Schema.org: provider for service provider.
+        range: string
+        required: false
+        examples:
+          - value: "OpenAI"
+            description: "Model provider"
+          - value: "Google Cloud"
+            description: "Cloud service provider"
+      
+      overall_confidence:
+        slot_uri: hc:overallConfidence
+        description: |
+          Overall confidence score for the generated content.
+          
+          Range: 0.0 (no confidence) to 1.0 (complete certainty)
+          
+          Aggregated from per-segment confidence scores or
+          provided by the generation model.
+          
+          **Thresholds** (suggested):
+          - > 0.9: High quality, production-ready
+          - 0.75-0.9: Good, may have minor errors
+          - 0.6-0.75: Usable, should be reviewed
+          - < 0.6: Low quality, needs significant correction
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+        examples:
+          - value: 0.92
+            description: "High confidence ASR output"
+      
+      is_verified:
+        slot_uri: hc:isVerified
+        description: |
+          Whether content has been verified by a human.
+          
+          - **true**: Human-reviewed and approved
+          - **false**: Not yet verified (default for AI-generated)
+          
+          Critical for quality assurance in heritage contexts.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "Human-verified transcript"
+      
+      verified_by:
+        slot_uri: prov:wasAttributedTo
+        description: |
+          Identity of the person who verified the content.
+          
+          Only populated when is_verified = true.
+        range: string
+        required: false
+        examples:
+          - value: "curator@rijksmuseum.nl"
+            description: "Staff member who verified"
+      
+      verification_date:
+        slot_uri: dcterms:dateAccepted
+        description: |
+          Date when content was verified.
+          
+          Dublin Core: dateAccepted for approval date.
+        range: datetime
+        required: false
+        examples:
+          - value: "2025-12-02T15:00:00Z"
+            description: "Verified December 2, 2025"
+      
+      processing_duration_seconds:
+        slot_uri: hc:processingDuration
+        description: |
+          Time taken to generate this content, in seconds.
+          
+          Useful for performance monitoring and cost estimation.
+        range: float
+        required: false
+        minimum_value: 0.0
+        examples:
+          - value: 45.3
+            description: "Processed in 45.3 seconds"
+      
+      word_count:
+        slot_uri: hc:wordCount
+        description: |
+          Total number of words in the text content.
+          
+          Useful for content sizing and analysis.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 1523
+            description: "1,523 words in transcript"
+      
+      character_count:
+        slot_uri: hc:characterCount
+        description: |
+          Total number of characters in the text content.
+          
+          Includes spaces. Useful for storage estimation.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 8742
+            description: "8,742 characters"
+    
+    comments:
+      - "Abstract base for all video-derived text content"
+      - "Comprehensive PROV-O provenance tracking"
+      - "Confidence scoring for AI-generated content"
+      - "Verification workflow support"
+      - "Critical for heritage accessibility and discovery"
+    
+    see_also:
+      - "https://www.w3.org/TR/prov-o/"
+      - "http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object"
+
+# ============================================================================
+# Enumerations
+# ============================================================================
+
+enums:
+  
+  GenerationMethodEnum:
+    description: |
+      Methods for generating video-derived text content.
+      
+      Standardized values for provenance tracking.
+    permissible_values:
+      ASR_AUTOMATIC:
+        description: Automatic speech recognition (raw output)
+      ASR_ENHANCED:
+        description: ASR with post-processing (punctuation, normalization)
+      MANUAL_TRANSCRIPTION:
+        description: Fully human-transcribed content
+      MANUAL_CORRECTION:
+        description: Human-corrected ASR output
+      CV_AUTOMATIC:
+        description: Computer vision detection (raw output)
+      CV_ENHANCED:
+        description: CV with post-processing or filtering
+      MULTIMODAL:
+        description: Combined audio+visual AI processing
+      OCR:
+        description: Optical character recognition from video frames
+      PLATFORM_PROVIDED:
+        description: Content from platform API (YouTube, Vimeo captions)
+      HYBRID:
+        description: Combination of automated and manual methods
+      UNKNOWN:
+        description: Generation method not recorded
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  source_video:
+    description: Reference to source VideoPost (video ID)
+    range: string
+  
+  source_video_url:
+    description: URL of the source video
+    range: uri
+  
+  content_language:
+    description: Primary language of text content (ISO 639-1)
+    range: string
+  
+  content_title:
+    description: Title or label for this text content
+    range: string
+  
+  generated_by:
+    description: Agent that generated this content (model, service, person)
+    range: string
+  
+  generation_method:
+    description: Method used to generate content
+    range: GenerationMethodEnum
+  
+  generation_timestamp:
+    description: When content was generated
+    range: datetime
+  
+  model_version:
+    description: Version of model/tool used
+    range: string
+  
+  model_provider:
+    description: Provider of model/service
+    range: string
+  
+  overall_confidence:
+    description: Overall confidence score (0.0-1.0)
+    range: float
+  
+  is_verified:
+    description: Whether content has been human-verified
+    range: boolean
+  
+  verified_by:
+    description: Person who verified the content
+    range: string
+  
+  verification_date:
+    description: Date content was verified
+    range: datetime
+  
+  processing_duration_seconds:
+    description: Time taken to generate content
+    range: float
+  
+  word_count:
+    description: Total word count
+    range: integer
+  
+  character_count:
+    description: Total character count
+    range: integer
diff --git a/schemas/20251121/linkml/modules/classes/VideoTimeSegment.yaml b/schemas/20251121/linkml/modules/classes/VideoTimeSegment.yaml
new file mode 100644
index 0000000000..0f5d252455
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoTimeSegment.yaml
@@ -0,0 +1,375 @@
+# Video Time Segment Class
+# Reusable temporal segment for video content (subtitles, annotations, chapters)
+#
+# Part of Heritage Custodian Ontology v0.9.5
+#
+# STRUCTURE:
+# VideoTimeSegment (this class)
+#     - start_time, end_time (ISO 8601 duration)
+#     - start_seconds, end_seconds (float for computation)
+#     - segment_text (text content for this segment)
+#     - confidence (for ASR/CV generated content)
+#
+# USED BY:
+# - VideoSubtitle (time-coded caption entries)
+# - VideoAnnotation (scene/object detection segments)
+# - VideoChapter (user-defined chapters)
+#
+# ONTOLOGY ALIGNMENT:
+# - Maps to Media Fragments URI 1.0 (W3C) for temporal addressing
+# - CIDOC-CRM E52_Time-Span for temporal extent
+# - Web Annotation oa:FragmentSelector for annotation targets
+
+id: https://nde.nl/ontology/hc/class/VideoTimeSegment
+name: video_time_segment_class
+title: Video Time Segment Class
+
+imports:
+  - linkml:types
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  oa: http://www.w3.org/ns/oa#
+  ma: http://www.w3.org/ns/ma-ont#
+
+default_prefix: hc
+
+classes:
+
+  VideoTimeSegment:
+    class_uri: crm:E52_Time-Span
+    abstract: false
+    description: |
+      A temporal segment within a video, defined by start and end times.
+      
+      **DEFINITION**:
+      
+      VideoTimeSegment represents a bounded temporal portion of video content.
+      It is the foundational unit for time-coded content including:
+      - Subtitle/caption entries (text displayed at specific times)
+      - Annotation segments (detected scenes, objects, faces)
+      - Chapter markers (user-defined content sections)
+      
+      **DUAL TIME REPRESENTATION**:
+      
+      Times are stored in two formats for different use cases:
+      
+      | Format | Example | Use Case |
+      |--------|---------|----------|
+      | ISO 8601 duration | PT0M30S | Human-readable, serialization |
+      | Seconds (float) | 30.0 | Computation, synchronization |
+      
+      Both representations MUST be kept in sync. The seconds format is
+      primary for computation; ISO 8601 is derived for display/storage.
+      
+      **MEDIA FRAGMENTS URI (W3C)**:
+      
+      VideoTimeSegment aligns with W3C Media Fragments URI 1.0 specification
+      for addressing temporal fragments of video:
+      
+      ```
+      https://example.com/video.mp4#t=30,35
+      ```
+      
+      The `start_seconds` and `end_seconds` map directly to the `t=` parameter.
+      
+      **WEB ANNOTATION COMPATIBILITY**:
+      
+      When used as an annotation target selector:
+      - Maps to `oa:FragmentSelector` with `conformsTo` Media Fragments
+      - Enables interoperability with W3C Web Annotation Data Model
+      
+      **CIDOC-CRM E52_Time-Span**:
+      
+      In cultural heritage documentation:
+      - E52_Time-Span is the extent of a time-span
+      - Used for temporal properties of cultural objects
+      - VideoTimeSegment extends this to media-specific temporal segments
+      
+      **CONFIDENCE SCORING**:
+      
+      For segments generated by ASR (speech recognition) or CV (computer vision):
+      - `confidence`: 0.0-1.0 score for segment accuracy
+      - Enables filtering by quality threshold
+      - Critical for AI-generated transcripts and annotations
+      
+      **HERITAGE USE CASES**:
+      
+      | Use Case | Example | Start | End |
+      |----------|---------|-------|-----|
+      | Subtitle entry | "Welcome to the museum" | 0:30 | 0:35 |
+      | Scene annotation | "Exhibition hall panorama" | 1:00 | 1:30 |
+      | Chapter marker | "Introduction" | 0:00 | 2:00 |
+      | Object detection | "Painting: Night Watch" | 3:15 | 3:20 |
+      | Speaker change | "Curator speaking" | 5:00 | 7:30 |
+    
+    exact_mappings:
+      - crm:E52_Time-Span
+      - oa:FragmentSelector
+    
+    close_mappings:
+      - ma:MediaFragment
+    
+    related_mappings:
+      - schema:Clip
+    
+    slots:
+      # Time boundaries (ISO 8601 duration format)
+      - start_time
+      - end_time
+      
+      # Time boundaries (seconds for computation)
+      - start_seconds
+      - end_seconds
+      
+      # Content
+      - segment_text
+      - segment_index
+      
+      # Quality
+      - confidence
+      
+      # Metadata
+      - speaker_id
+      - speaker_label
+    
+    slot_usage:
+      start_time:
+        slot_uri: ma:hasStartTime
+        description: |
+          Start time of segment as ISO 8601 duration from video beginning.
+          
+          Media Ontology: hasStartTime for temporal start.
+          
+          **Format**: ISO 8601 duration (e.g., "PT0M30S" = 30 seconds from start)
+          
+          **Common Patterns**:
+          - PT0S = Start of video (0 seconds)
+          - PT30S = 30 seconds
+          - PT1M30S = 1 minute 30 seconds
+          - PT1H15M30S = 1 hour 15 minutes 30 seconds
+        range: string
+        required: false
+        pattern: "^PT(\\d+H)?(\\d+M)?(\\d+(\\.\\d+)?S)?$"
+        examples:
+          - value: "PT0M30S"
+            description: "30 seconds from video start"
+          - value: "PT1H15M30S"
+            description: "1 hour 15 minutes 30 seconds"
+      
+      end_time:
+        slot_uri: ma:hasEndTime
+        description: |
+          End time of segment as ISO 8601 duration from video beginning.
+          
+          Media Ontology: hasEndTime for temporal end.
+          
+          Must be greater than or equal to start_time.
+        range: string
+        required: false
+        pattern: "^PT(\\d+H)?(\\d+M)?(\\d+(\\.\\d+)?S)?$"
+        examples:
+          - value: "PT0M35S"
+            description: "35 seconds from video start"
+      
+      start_seconds:
+        slot_uri: hc:startSeconds
+        description: |
+          Start time in seconds (floating point) from video beginning.
+          
+          **PRIMARY for computation**. Use for:
+          - Video player synchronization
+          - Duration calculations
+          - Time-based sorting and filtering
+          
+          Precision to milliseconds (3 decimal places) is typical.
+        range: float
+        required: true
+        minimum_value: 0.0
+        examples:
+          - value: 30.0
+            description: "30 seconds from start"
+          - value: 30.500
+            description: "30.5 seconds (millisecond precision)"
+      
+      end_seconds:
+        slot_uri: hc:endSeconds
+        description: |
+          End time in seconds (floating point) from video beginning.
+          
+          Must be greater than start_seconds.
+          
+          For single-frame annotations (e.g., object detection in one frame),
+          end_seconds may equal start_seconds or be slightly greater.
+        range: float
+        required: true
+        minimum_value: 0.0
+        examples:
+          - value: 35.0
+            description: "35 seconds from start"
+      
+      segment_text:
+        slot_uri: oa:bodyValue
+        description: |
+          Text content for this segment.
+          
+          Web Annotation: bodyValue for textual content.
+          
+          **Usage by content type**:
+          - Subtitles: Displayed caption text
+          - Transcripts: Spoken words during this segment
+          - Annotations: Description of detected content
+          - Chapters: Chapter title/description
+        range: string
+        required: false
+        examples:
+          - value: "Welkom bij het Rijksmuseum"
+            description: "Dutch subtitle text"
+          - value: "The curator explains the painting's history"
+            description: "Transcript segment"
+      
+      segment_index:
+        slot_uri: hc:segmentIndex
+        description: |
+          Sequential index of this segment within the parent content.
+          
+          Zero-based index for ordering segments:
+          - Subtitle: Order in which captions appear
+          - Annotation: Detection sequence
+          
+          Enables reconstruction of segment order when times overlap
+          or for stable sorting.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 0
+            description: "First segment"
+          - value: 42
+            description: "43rd segment (zero-indexed)"
+      
+      confidence:
+        slot_uri: hc:confidence
+        description: |
+          Confidence score for AI-generated content.
+          
+          Range: 0.0 (no confidence) to 1.0 (complete certainty)
+          
+          **Applies to**:
+          - ASR-generated transcript/subtitle segments
+          - CV-detected scene or object annotations
+          - OCR-extracted text from video frames
+          
+          **Thresholds** (suggested):
+          - > 0.9: High confidence, suitable for display
+          - 0.7-0.9: Medium, may need review
+          - < 0.7: Low, flag for human verification
+        range: float
+        required: false
+        minimum_value: 0.0
+        maximum_value: 1.0
+        examples:
+          - value: 0.95
+            description: "High confidence ASR segment"
+          - value: 0.72
+            description: "Medium confidence, may contain errors"
+      
+      speaker_id:
+        slot_uri: hc:speakerId
+        description: |
+          Identifier for the speaker during this segment.
+          
+          For transcripts with speaker diarization:
+          - Links to identified speaker (e.g., "SPEAKER_01")
+          - May be resolved to actual person identity
+          
+          Enables multi-speaker transcript navigation.
+        range: string
+        required: false
+        examples:
+          - value: "SPEAKER_01"
+            description: "First identified speaker"
+          - value: "curator_taco_dibbits"
+            description: "Resolved speaker identity"
+      
+      speaker_label:
+        slot_uri: hc:speakerLabel
+        description: |
+          Human-readable label for the speaker.
+          
+          Display name for the speaker during this segment:
+          - May be generic ("Narrator", "Interviewer")
+          - May be specific ("Dr. Taco Dibbits, Museum Director")
+          
+          Distinguished from speaker_id which is a machine identifier.
+        range: string
+        required: false
+        examples:
+          - value: "Narrator"
+            description: "Generic speaker label"
+          - value: "Dr. Taco Dibbits, Museum Director"
+            description: "Specific identified speaker"
+    
+    rules:
+      - postconditions:
+          description: end_seconds must be >= start_seconds
+          # Note: LinkML doesn't support direct comparison rules,
+          # but this documents the constraint for validation
+    
+    comments:
+      - "Reusable time segment for subtitles, annotations, chapters"
+      - "Dual time format: ISO 8601 for serialization, seconds for computation"
+      - "Aligns with W3C Media Fragments URI specification"
+      - "Confidence scoring for AI-generated content"
+      - "Speaker diarization support for multi-speaker transcripts"
+    
+    see_also:
+      - "https://www.w3.org/TR/media-frags/"
+      - "https://www.w3.org/TR/annotation-model/"
+      - "https://www.w3.org/ns/ma-ont"
+      - "http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span"
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  start_time:
+    description: Start time as ISO 8601 duration from video beginning
+    range: string
+  
+  end_time:
+    description: End time as ISO 8601 duration from video beginning
+    range: string
+  
+  start_seconds:
+    description: Start time in seconds (float) from video beginning
+    range: float
+  
+  end_seconds:
+    description: End time in seconds (float) from video beginning
+    range: float
+  
+  segment_text:
+    description: Text content for this time segment
+    range: string
+  
+  segment_index:
+    description: Sequential index of segment within parent
+    range: integer
+  
+  confidence:
+    description: Confidence score for AI-generated content (0.0-1.0)
+    range: float
+  
+  speaker_id:
+    description: Identifier for speaker during this segment
+    range: string
+  
+  speaker_label:
+    description: Human-readable label for speaker
+    range: string
diff --git a/schemas/20251121/linkml/modules/classes/VideoTranscript.yaml b/schemas/20251121/linkml/modules/classes/VideoTranscript.yaml
new file mode 100644
index 0000000000..e847949fda
--- /dev/null
+++ b/schemas/20251121/linkml/modules/classes/VideoTranscript.yaml
@@ -0,0 +1,469 @@
+# Video Transcript Class
+# Full text transcription of video audio content
+#
+# Part of Heritage Custodian Ontology v0.9.5
+#
+# HIERARCHY:
+# E73_Information_Object (CIDOC-CRM)
+#     │
+#     └── VideoTextContent (abstract base - provenance)
+#             │
+#             └── VideoTranscript (this class)
+#                     │
+#                     └── VideoSubtitle (time-coded extension)
+#
+# DESIGN RATIONALE:
+# VideoTranscript represents the complete textual representation of spoken
+# content in a video. It extends VideoTextContent to inherit comprehensive
+# provenance tracking and adds transcript-specific slots:
+#
+# - full_text: Complete transcript as single text block
+# - transcript_format: How the text is structured (plain, paragraphed, etc.)
+# - segments: Optional structured breakdown into VideoTimeSegments
+# - includes_timestamps/speakers: Metadata about content structure
+#
+# VideoSubtitle extends this because subtitles ARE transcripts plus time-codes.
+
+id: https://nde.nl/ontology/hc/class/VideoTranscript
+name: video_transcript_class
+title: Video Transcript Class
+
+imports:
+  - linkml:types
+  - ./VideoTextContent
+  - ./VideoTimeSegment
+
+prefixes:
+  linkml: https://w3id.org/linkml/
+  hc: https://nde.nl/ontology/hc/
+  schema: http://schema.org/
+  dcterms: http://purl.org/dc/terms/
+  prov: http://www.w3.org/ns/prov#
+  crm: http://www.cidoc-crm.org/cidoc-crm/
+  skos: http://www.w3.org/2004/02/skos/core#
+
+default_prefix: hc
+
+classes:
+
+  VideoTranscript:
+    is_a: VideoTextContent
+    class_uri: crm:E33_Linguistic_Object
+    abstract: false
+    description: |
+      Full text transcription of video audio content.
+      
+      **DEFINITION**:
+      
+      A VideoTranscript is the complete textual representation of all spoken
+      content in a video. It extends VideoTextContent with transcript-specific
+      properties and inherits all provenance tracking capabilities.
+      
+      **RELATIONSHIP TO VideoSubtitle**:
+      
+      VideoSubtitle is a subclass of VideoTranscript because:
+      1. A subtitle file contains everything a transcript needs PLUS time codes
+      2. You can derive a plain transcript from subtitles by stripping times
+      3. This inheritance allows polymorphic handling of text content
+      
+      ```
+      VideoTranscript        VideoSubtitle (is_a VideoTranscript)
+      ├── full_text          ├── full_text (inherited)
+      ├── segments[]         ├── segments[] (required, with times)
+      └── (optional times)   └── subtitle_format (SRT, VTT, etc.)
+      ```
+      
+      **SCHEMA.ORG ALIGNMENT**:
+      
+      Maps to `schema:transcript` property:
+      > "If this MediaObject is an AudioObject or VideoObject, 
+      >  the transcript of that object."
+      
+      **CIDOC-CRM E33_Linguistic_Object**:
+      
+      E33 is the class comprising:
+      > "identifiable expressions in natural language or code"
+      
+      A transcript is a linguistic object derived from the audio track of
+      a video (which is itself an E73_Information_Object).
+      
+      **TRANSCRIPT FORMATS**:
+      
+      | Format | Description | Use Case |
+      |--------|-------------|----------|
+      | PLAIN_TEXT | Continuous text, no structure | Simple search indexing |
+      | PARAGRAPHED | Text broken into paragraphs | Human reading |
+      | STRUCTURED | Segments with speaker labels | Research, analysis |
+      | TIMESTAMPED | Segments with time markers | Navigation, subtitling |
+      
+      **GENERATION METHODS** (inherited from VideoTextContent):
+      
+      | Method | Typical Use | Quality |
+      |--------|-------------|---------|
+      | ASR_AUTOMATIC | Whisper, Google STT | 0.80-0.95 |
+      | MANUAL_TRANSCRIPTION | Human transcriber | 0.98-1.0 |
+      | PLATFORM_PROVIDED | YouTube auto-captions | 0.75-0.90 |
+      | HYBRID | ASR + human correction | 0.95-1.0 |
+      
+      **HERITAGE INSTITUTION CONTEXT**:
+      
+      Transcripts are critical for heritage video collections:
+      
+      1. **Discovery**: Full-text search over video content
+      2. **Accessibility**: Deaf/HoH access to spoken content
+      3. **Preservation**: Text outlasts video format obsolescence
+      4. **Research**: Corpus analysis, keyword extraction
+      5. **Translation**: Base for multilingual access
+      6. **SEO**: Search engine indexing of video content
+      
+      **STRUCTURED SEGMENTS**:
+      
+      When `segments` is populated, the transcript has structural breakdown:
+      
+      ```yaml
+      segments:
+        - segment_index: 0
+          start_seconds: 0.0
+          end_seconds: 5.5
+          segment_text: "Welcome to the Rijksmuseum."
+          speaker_label: "Narrator"
+          confidence: 0.94
+        - segment_index: 1
+          start_seconds: 5.5
+          end_seconds: 12.3
+          segment_text: "Today we'll explore the Night Watch gallery."
+          speaker_label: "Narrator"
+          confidence: 0.91
+      ```
+      
+      **PROVENANCE** (inherited from VideoTextContent):
+      
+      All transcripts include:
+      - `source_video`: Which video was transcribed
+      - `generated_by`: Tool/person that created transcript
+      - `generation_method`: ASR_AUTOMATIC, MANUAL_TRANSCRIPTION, etc.
+      - `generation_timestamp`: When transcript was created
+      - `overall_confidence`: Aggregate quality score
+      - `is_verified`: Whether human-reviewed
+    
+    exact_mappings:
+      - crm:E33_Linguistic_Object
+    
+    close_mappings:
+      - schema:transcript
+    
+    related_mappings:
+      - dcterms:Text
+    
+    slots:
+      # Core content
+      - full_text
+      - transcript_format
+      
+      # Structural information
+      - includes_timestamps
+      - includes_speakers
+      - segments
+      
+      # Speaker metadata
+      - speaker_count
+      - primary_speaker
+      
+      # Additional metadata
+      - source_language_auto_detected
+      - paragraph_count
+      - sentence_count
+    
+    slot_usage:
+      full_text:
+        slot_uri: schema:text
+        description: |
+          Complete transcript text as a single string.
+          
+          Schema.org: text for primary textual content.
+          
+          Contains all spoken content from the video, concatenated.
+          May include:
+          - Speaker labels (if includes_speakers = true)
+          - Timestamps (if includes_timestamps = true)
+          - Paragraph breaks (if format = PARAGRAPHED or STRUCTURED)
+          
+          For structured access, use the `segments` slot instead.
+        range: string
+        required: true
+        examples:
+          - value: |
+              Welcome to the Rijksmuseum. Today we'll explore the masterpieces
+              of Dutch Golden Age painting. Our first stop is the Night Watch
+              by Rembrandt van Rijn, painted in 1642.
+            description: "Plain text transcript excerpt"
+          - value: |
+              [Narrator] Welcome to the Rijksmuseum.
+              [Narrator] Today we'll explore the masterpieces of Dutch Golden Age painting.
+              [Curator] Our first stop is the Night Watch by Rembrandt van Rijn.
+            description: "Transcript with speaker labels"
+      
+      transcript_format:
+        slot_uri: dcterms:format
+        description: |
+          Format/structure of the transcript text.
+          
+          Dublin Core: format for resource format.
+          
+          Indicates how the full_text is structured:
+          - PLAIN_TEXT: Continuous text without breaks
+          - PARAGRAPHED: Broken into paragraphs
+          - STRUCTURED: Includes speaker labels, times, or both
+          - TIMESTAMPED: Includes inline time markers
+        range: TranscriptFormatEnum
+        required: false
+        ifabsent: "string(PLAIN_TEXT)"
+        examples:
+          - value: "STRUCTURED"
+            description: "Text with speaker labels and paragraph breaks"
+      
+      includes_timestamps:
+        slot_uri: hc:includesTimestamps
+        description: |
+          Whether the transcript includes time markers.
+          
+          - **true**: Timestamps are embedded in full_text or segments have times
+          - **false**: No temporal information (default)
+          
+          If true, prefer using `segments` for programmatic access.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "Transcript has time codes"
+      
+      includes_speakers:
+        slot_uri: hc:includesSpeakers
+        description: |
+          Whether the transcript includes speaker identification.
+          
+          - **true**: Speaker labels/diarization available
+          - **false**: Single speaker or no identification (default)
+          
+          When true, check `speaker_count` for number of distinct speakers.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "Multi-speaker transcript with diarization"
+      
+      segments:
+        slot_uri: hc:transcriptSegments
+        description: |
+          Structured breakdown of transcript into time-coded segments.
+          
+          Optional for VideoTranscript (plain transcripts may not have times).
+          Required for VideoSubtitle (subtitles must have time codes).
+          
+          Each segment is a VideoTimeSegment with:
+          - start_seconds / end_seconds: Time boundaries
+          - segment_text: Text for this segment
+          - confidence: Per-segment accuracy score
+          - speaker_id / speaker_label: Speaker identification
+          
+          Use segments for:
+          - Video player synchronization
+          - Jump-to-time navigation
+          - Per-segment quality analysis
+          - Speaker-separated views
+        range: VideoTimeSegment
+        required: false
+        multivalued: true
+        inlined: true
+        inlined_as_list: true
+        examples:
+          - value: |
+              - segment_index: 0
+                start_seconds: 0.0
+                end_seconds: 3.5
+                segment_text: "Welcome to the museum."
+                confidence: 0.95
+            description: "Single structured segment"
+      
+      speaker_count:
+        slot_uri: hc:speakerCount
+        description: |
+          Number of distinct speakers identified in the transcript.
+          
+          Only meaningful when includes_speakers = true.
+          
+          0 = Unknown/not analyzed
+          1 = Single speaker (monologue)
+          2+ = Multi-speaker (dialogue, panel, interview)
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 3
+            description: "Three speakers identified"
+      
+      primary_speaker:
+        slot_uri: hc:primarySpeaker
+        description: |
+          Identifier or name of the main/dominant speaker.
+          
+          For interviews: the interviewee (not interviewer)
+          For presentations: the presenter
+          For tours: the guide
+          
+          May be generic ("Narrator") or specific ("Dr. Taco Dibbits").
+        range: string
+        required: false
+        examples:
+          - value: "Narrator"
+            description: "Generic primary speaker"
+          - value: "Dr. Taco Dibbits, Museum Director"
+            description: "Named primary speaker"
+      
+      source_language_auto_detected:
+        slot_uri: hc:sourceLanguageAutoDetected
+        description: |
+          Whether the content_language was auto-detected by ASR.
+          
+          - **true**: Language detected by ASR model
+          - **false**: Language was specified/known (default)
+          
+          Useful for quality assessment - auto-detection may be wrong.
+        range: boolean
+        required: false
+        ifabsent: "false"
+        examples:
+          - value: true
+            description: "Language was auto-detected"
+      
+      paragraph_count:
+        slot_uri: hc:paragraphCount
+        description: |
+          Number of paragraphs in the transcript.
+          
+          Only meaningful when transcript_format = PARAGRAPHED or STRUCTURED.
+          
+          Useful for content sizing and readability assessment.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 15
+            description: "Transcript has 15 paragraphs"
+      
+      sentence_count:
+        slot_uri: hc:sentenceCount
+        description: |
+          Approximate number of sentences in the transcript.
+          
+          Derived from punctuation analysis or NLP sentence segmentation.
+          
+          Useful for content analysis and readability metrics.
+        range: integer
+        required: false
+        minimum_value: 0
+        examples:
+          - value: 47
+            description: "Transcript has ~47 sentences"
+    
+    comments:
+      - "Full text transcription of video audio content"
+      - "Extends VideoTextContent with transcript-specific properties"
+      - "Base class for VideoSubtitle (subtitles are transcripts + time codes)"
+      - "Supports both plain text and structured segment-based transcripts"
+      - "Critical for accessibility, discovery, and preservation"
+    
+    see_also:
+      - "https://schema.org/transcript"
+      - "http://www.cidoc-crm.org/cidoc-crm/E33_Linguistic_Object"
+
+# ============================================================================
+# Enumerations
+# ============================================================================
+
+enums:
+  
+  TranscriptFormatEnum:
+    description: |
+      Format/structure of transcript text content.
+      
+      Indicates how the full_text is organized.
+    permissible_values:
+      PLAIN_TEXT:
+        description: |
+          Continuous text without structural markers.
+          No speaker labels, no timestamps, no paragraph breaks.
+          Suitable for simple full-text search indexing.
+      PARAGRAPHED:
+        description: |
+          Text broken into paragraphs.
+          May be based on topic changes, speaker pauses, or semantic units.
+          Improves human readability.
+      STRUCTURED:
+        description: |
+          Text with speaker labels and/or section markers.
+          Format: "[Speaker] Text content" or similar.
+          Enables speaker-specific analysis.
+      TIMESTAMPED:
+        description: |
+          Text with inline time markers.
+          Format: "[00:30] Text content" or similar.
+          Enables temporal navigation in text view.
+      VERBATIM:
+        description: |
+          Exact transcription including fillers, false starts, overlaps.
+          "[um]", "[pause]", "[crosstalk]" markers.
+          Used for linguistic analysis or legal transcripts.
+      CLEAN:
+        description: |
+          Edited for readability - fillers removed, grammar corrected.
+          May diverge slightly from literal spoken content.
+          Suitable for publication or accessibility.
+
+# ============================================================================
+# Slot Definitions
+# ============================================================================
+
+slots:
+  full_text:
+    description: Complete transcript text as single string
+    range: string
+  
+  transcript_format:
+    description: Format/structure of transcript text
+    range: TranscriptFormatEnum
+  
+  includes_timestamps:
+    description: Whether transcript includes time markers
+    range: boolean
+  
+  includes_speakers:
+    description: Whether transcript includes speaker identification
+    range: boolean
+  
+  segments:
+    description: Structured breakdown into time-coded segments
+    range: VideoTimeSegment
+    multivalued: true
+  
+  speaker_count:
+    description: Number of distinct speakers identified
+    range: integer
+  
+  primary_speaker:
+    description: Identifier/name of main speaker
+    range: string
+  
+  source_language_auto_detected:
+    description: Whether language was auto-detected by ASR
+    range: boolean
+  
+  paragraph_count:
+    description: Number of paragraphs in transcript
+    range: integer
+  
+  sentence_count:
+    description: Number of sentences in transcript
+    range: integer