glam/schemas/20251121/linkml/modules/classes/VideoTextContent.yaml

# Video Text Content Class
# Abstract base class for all textual/derived content from videos
#
# Part of Heritage Custodian Ontology v0.9.5
#
# HIERARCHY:
# E73_Information_Object (CIDOC-CRM)
#     │
#     └── VideoTextContent (this class - ABSTRACT)
#             │
#             ├── VideoTranscript (full text transcription)
#             │       │
#             │       └── VideoSubtitle (time-coded captions)
#             │
#             └── VideoAnnotation (CV/multimodal derived)
#                     │
#                     ├── VideoSceneAnnotation
#                     ├── VideoObjectAnnotation
#                     └── VideoOCRAnnotation
#
# DESIGN RATIONALE:
# All text derived from video (transcripts, subtitles, annotations) shares
# common provenance requirements:
# - Source video reference
# - Generation method (ASR, manual, CV model)
# - Generation timestamp
# - Model/tool version
# - Overall confidence score
#
# This abstract base ensures consistent provenance tracking across all
# video-derived text content types.

id: https://nde.nl/ontology/hc/class/VideoTextContent
name: video_text_content_class
title: Video Text Content Class

imports:
  - linkml:types
  - ./VideoPost

prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  schema: http://schema.org/
  dcterms: http://purl.org/dc/terms/
  prov: http://www.w3.org/ns/prov#
  crm: http://www.cidoc-crm.org/cidoc-crm/
  skos: http://www.w3.org/2004/02/skos/core#
  oa: http://www.w3.org/ns/oa#

default_prefix: hc

classes:

  VideoTextContent:
    class_uri: crm:E73_Information_Object
    abstract: true
    description: |
      Abstract base class for all textual/derived content from videos.

      **DEFINITION**:

      VideoTextContent is the abstract parent for all text that is extracted,
      transcribed, or derived from video content. This includes:

      | Subclass | Source | Description |
      |----------|--------|-------------|
      | VideoTranscript | Audio | Full text transcription of spoken content |
      | VideoSubtitle | Audio | Time-coded caption entries (SRT/VTT) |
      | VideoAnnotation | Visual | CV/multimodal-derived descriptions |

      **PROVENANCE REQUIREMENTS**:

      All video-derived text MUST include comprehensive provenance:

      1. **Source**: Which video was processed (`source_video`)
      2. **Method**: How was content generated (`generation_method`)
      3. **Agent**: Who/what generated it (`generated_by`)
      4. **Time**: When was it generated (`generation_timestamp`)
      5. **Version**: Tool/model version (`model_version`)
      6. **Quality**: Overall confidence (`overall_confidence`)

      **PROV-O ALIGNMENT**:

      Maps to W3C PROV-O for provenance tracking:

      ```turtle
      :transcript a hc:VideoTranscript ;
          prov:wasGeneratedBy :asr_activity ;
          prov:wasAttributedTo :whisper_model ;
          prov:generatedAtTime "2025-12-01T10:00:00Z" ;
          prov:wasDerivedFrom :source_video .
      ```

      **CIDOC-CRM E73_Information_Object**:

      - E73 is the base for all identifiable immaterial items
      - Includes texts, computer programs, songs, recipes
      - VideoTextContent are E73 instances derived from video (E73)

      **GENERATION METHODS**:

      | Method | Description | Typical Confidence |
      |--------|-------------|-------------------|
      | ASR_AUTOMATIC | Automatic speech recognition | 0.75-0.95 |
      | ASR_ENHANCED | ASR with post-processing | 0.85-0.98 |
      | MANUAL_TRANSCRIPTION | Human transcription | 0.98-1.0 |
      | MANUAL_CORRECTION | Human-corrected ASR | 0.95-1.0 |
      | CV_AUTOMATIC | Computer vision detection | 0.60-0.90 |
      | MULTIMODAL | Combined audio+visual AI | 0.70-0.95 |
      | OCR | Optical character recognition | 0.80-0.98 |
      | PLATFORM_PROVIDED | From YouTube/Vimeo API | 0.85-0.95 |

      **HERITAGE INSTITUTION CONTEXT**:

      Video text content is critical for:
      - **Accessibility**: Deaf/HoH users need accurate captions
      - **Discovery**: Full-text search over video collections
      - **Preservation**: Text outlasts video format obsolescence
      - **Research**: Analyzing spoken content at scale
      - **Translation**: Multilingual access to heritage content

      **LANGUAGE SUPPORT**:

      - `content_language`: Primary language of text content
      - May differ from video's default_audio_language if translated
      - ISO 639-1 codes (e.g., "nl", "en", "de")

    exact_mappings:
      - crm:E73_Information_Object

    close_mappings:
      - prov:Entity

    related_mappings:
      - schema:CreativeWork
      - dcterms:Text

    slots:
      # Source reference
      - source_video
      - source_video_url

      # Content metadata
      - content_language
      - content_title

      # Provenance - Generation
      - generated_by
      - generation_method
      - generation_timestamp
      - model_version
      - model_provider

      # Quality
      - overall_confidence
      - is_verified
      - verified_by
      - verification_date

      # Processing metadata
      - processing_duration_seconds
      - word_count
      - character_count

    slot_usage:
      source_video:
        slot_uri: prov:wasDerivedFrom
        description: |
          Reference to the VideoPost from which this content was derived.

          PROV-O: wasDerivedFrom links derived content to source.

          Links to the video's unique identifier (post_id).
        range: string
        required: true
        examples:
          - value: "FbIoC-Owy-M"
            description: "YouTube video ID as source reference"

      source_video_url:
        slot_uri: schema:url
        description: |
          URL of the source video.

          Convenience field for direct video access.
          Derived from source_video but stored for quick reference.
        range: uri
        required: false
        examples:
          - value: "https://www.youtube.com/watch?v=FbIoC-Owy-M"
            description: "Full YouTube video URL"

      content_language:
        slot_uri: dcterms:language
        description: |
          Primary language of the text content.

          Dublin Core: language for content language.

          ISO 639-1 code. May differ from video's audio language
          if this is a translation or localization.
        range: string
        required: true
        examples:
          - value: "nl"
            description: "Dutch language content"
          - value: "en"
            description: "English translation"

      content_title:
        slot_uri: dcterms:title
        description: |
          Title or label for this text content.

          Dublin Core: title for content name.

          Examples:
          - "Rijksmuseum Tour - Full Transcript"
          - "Dutch Subtitles - Auto-generated"
          - "Scene Annotations - CV Model v2.1"
        range: string
        required: false
        examples:
          - value: "De Vrijheidsroute Ep.3 - Dutch Transcript"
            description: "Descriptive title for transcript"

      generated_by:
        slot_uri: prov:wasAttributedTo
        description: |
          The agent (model, service, person) that generated this content.

          PROV-O: wasAttributedTo identifies the responsible agent.

          **Examples**:
          - AI Models: "openai/whisper-large-v3", "google/speech-to-text"
          - Services: "YouTube Auto-captions", "Rev.com"
          - Human: "transcriber:jane.doe@museum.nl"
        range: string
        required: true
        examples:
          - value: "openai/whisper-large-v3"
            description: "OpenAI Whisper ASR model"
          - value: "YouTube Auto-captions"
            description: "Platform-provided captions"
          - value: "manual:curator@rijksmuseum.nl"
            description: "Human transcriber"

      generation_method:
        slot_uri: prov:wasGeneratedBy
        description: |
          The method used to generate this content.

          PROV-O: wasGeneratedBy for generation activity type.

          See GenerationMethodEnum for standardized values.
        range: GenerationMethodEnum
        required: true
        examples:
          - value: "ASR_AUTOMATIC"
            description: "Automatic speech recognition"
          - value: "MANUAL_TRANSCRIPTION"
            description: "Human transcription"

      generation_timestamp:
        slot_uri: prov:generatedAtTime
        description: |
          When this content was generated.

          PROV-O: generatedAtTime for creation timestamp.

          ISO 8601 datetime. Critical for versioning and reproducibility.
        range: datetime
        required: true
        examples:
          - value: "2025-12-01T10:30:00Z"
            description: "Generated December 1, 2025 at 10:30 UTC"

      model_version:
        slot_uri: schema:softwareVersion
        description: |
          Version of the model or tool used for generation.

          Schema.org: softwareVersion for version tracking.

          Critical for reproducibility and quality assessment.
        range: string
        required: false
        examples:
          - value: "large-v3"
            description: "Whisper model version"
          - value: "v2.3.1"
            description: "Software version number"

      model_provider:
        slot_uri: schema:provider
        description: |
          Provider or vendor of the generation model/service.

          Schema.org: provider for service provider.
        range: string
        required: false
        examples:
          - value: "OpenAI"
            description: "Model provider"
          - value: "Google Cloud"
            description: "Cloud service provider"

      overall_confidence:
        slot_uri: hc:overallConfidence
        description: |
          Overall confidence score for the generated content.

          Range: 0.0 (no confidence) to 1.0 (complete certainty)

          Aggregated from per-segment confidence scores or
          provided by the generation model.

          **Thresholds** (suggested):
          - > 0.9: High quality, production-ready
          - 0.75-0.9: Good, may have minor errors
          - 0.6-0.75: Usable, should be reviewed
          - < 0.6: Low quality, needs significant correction
        range: float
        required: false
        minimum_value: 0.0
        maximum_value: 1.0
        examples:
          - value: 0.92
            description: "High confidence ASR output"

      is_verified:
        slot_uri: hc:isVerified
        description: |
          Whether content has been verified by a human.

          - **true**: Human-reviewed and approved
          - **false**: Not yet verified (default for AI-generated)

          Critical for quality assurance in heritage contexts.
        range: boolean
        required: false
        ifabsent: "false"
        examples:
          - value: true
            description: "Human-verified transcript"

      verified_by:
        slot_uri: prov:wasAttributedTo
        description: |
          Identity of the person who verified the content.

          Only populated when is_verified = true.
        range: string
        required: false
        examples:
          - value: "curator@rijksmuseum.nl"
            description: "Staff member who verified"

      verification_date:
        slot_uri: dcterms:dateAccepted
        description: |
          Date when content was verified.

          Dublin Core: dateAccepted for approval date.
        range: datetime
        required: false
        examples:
          - value: "2025-12-02T15:00:00Z"
            description: "Verified December 2, 2025"

      processing_duration_seconds:
        slot_uri: hc:processingDuration
        description: |
          Time taken to generate this content, in seconds.

          Useful for performance monitoring and cost estimation.
        range: float
        required: false
        minimum_value: 0.0
        examples:
          - value: 45.3
            description: "Processed in 45.3 seconds"

      word_count:
        slot_uri: hc:wordCount
        description: |
          Total number of words in the text content.

          Useful for content sizing and analysis.
        range: integer
        required: false
        minimum_value: 0
        examples:
          - value: 1523
            description: "1,523 words in transcript"

      character_count:
        slot_uri: hc:characterCount
        description: |
          Total number of characters in the text content.

          Includes spaces. Useful for storage estimation.
        range: integer
        required: false
        minimum_value: 0
        examples:
          - value: 8742
            description: "8,742 characters"

    comments:
      - "Abstract base for all video-derived text content"
      - "Comprehensive PROV-O provenance tracking"
      - "Confidence scoring for AI-generated content"
      - "Verification workflow support"
      - "Critical for heritage accessibility and discovery"

    see_also:
      - "https://www.w3.org/TR/prov-o/"
      - "http://www.cidoc-crm.org/cidoc-crm/E73_Information_Object"

# ============================================================================
# Enumerations
# ============================================================================

enums:

  GenerationMethodEnum:
    description: |
      Methods for generating video-derived text content.

      Standardized values for provenance tracking.
    permissible_values:
      ASR_AUTOMATIC:
        description: Automatic speech recognition (raw output)
      ASR_ENHANCED:
        description: ASR with post-processing (punctuation, normalization)
      MANUAL_TRANSCRIPTION:
        description: Fully human-transcribed content
      MANUAL_CORRECTION:
        description: Human-corrected ASR output
      CV_AUTOMATIC:
        description: Computer vision detection (raw output)
      CV_ENHANCED:
        description: CV with post-processing or filtering
      MULTIMODAL:
        description: Combined audio+visual AI processing
      OCR:
        description: Optical character recognition from video frames
      PLATFORM_PROVIDED:
        description: Content from platform API (YouTube, Vimeo captions)
      HYBRID:
        description: Combination of automated and manual methods
      UNKNOWN:
        description: Generation method not recorded

# ============================================================================
# Slot Definitions
# ============================================================================

slots:
  source_video:
    description: Reference to source VideoPost (video ID)
    range: string

  source_video_url:
    description: URL of the source video
    range: uri

  content_language:
    description: Primary language of text content (ISO 639-1)
    range: string

  content_title:
    description: Title or label for this text content
    range: string

  generated_by:
    description: Agent that generated this content (model, service, person)
    range: string

  generation_method:
    description: Method used to generate content
    range: GenerationMethodEnum

  generation_timestamp:
    description: When content was generated
    range: datetime

  model_version:
    description: Version of model/tool used
    range: string

  model_provider:
    description: Provider of model/service
    range: string

  overall_confidence:
    description: Overall confidence score (0.0-1.0)
    range: float

  is_verified:
    description: Whether content has been human-verified
    range: boolean

  verified_by:
    description: Person who verified the content
    range: string

  verification_date:
    description: Date content was verified
    range: datetime

  processing_duration_seconds:
    description: Time taken to generate content
    range: float

  word_count:
    description: Total word count
    range: integer

  character_count:
    description: Total character count
    range: integer