glam/schemas/20251121/linkml/modules/classes/VideoAnnotation.yaml

id: https://nde.nl/ontology/hc/class/VideoAnnotation
name: video_annotation_class
title: Video Annotation Class
imports:
  - linkml:types
  - ./VideoTextContent
  - ./VideoTimeSegment
  - ./AnnotationMotivationType
  - ./AnnotationMotivationTypes
  - ../slots/has_annotation_motivation
  - ../slots/has_annotation_segment
  - ../slots/has_annotation_type
  # MIGRATED 2026-01-25: detection_count, detection_threshold → filters_or_filtered + DetectedEntity + Quantity/DetectionThreshold (Rule 53)
  - ../slots/filters_or_filtered
  - ./DetectedEntity
  - ./DetectionThreshold
  - ../slots/has_or_had_treshold
  # MIGRATED 2026-01-22: frame_sample_rate → analyzes_or_analyzed + VideoFrame + has_or_had_quantity + Quantity (Rule 53)
  - ./VideoFrame
  - ../slots/has_or_had_quantity
  - ../slots/has_or_had_unit
  - ./Quantity
  - ./Unit
  - ../slots/includes_bounding_box
  - ../slots/includes_segmentation_mask
  - ../slots/keyframe_extraction
  - ../slots/model_architecture
  - ../slots/model_task
  - ../slots/specificity_annotation
  - ../slots/has_or_had_score  # was: template_specificity - migrated per Rule 53 (2026-01-17)
  - ../slots/analyzes_or_analyzed
  - ./SpecificityAnnotation
  - ./TemplateSpecificityScore  # was: TemplateSpecificityScores - migrated per Rule 53 (2026-01-17)
  - ./TemplateSpecificityType
  - ./TemplateSpecificityTypes
  - ../enums/AnnotationTypeEnum
prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  schema: http://schema.org/
  dcterms: http://purl.org/dc/terms/
  prov: http://www.w3.org/ns/prov#
  crm: http://www.cidoc-crm.org/cidoc-crm/
  oa: http://www.w3.org/ns/oa#
  as: https://www.w3.org/ns/activitystreams#
default_prefix: hc
classes:
  VideoAnnotation:
    is_a: VideoTextContent
    class_uri: oa:Annotation
    abstract: true
    description: "Abstract base class for computer vision and multimodal video annotations.\n\n**DEFINITION**:\n\nVideoAnnotation\
      \ represents structured information derived from visual\nanalysis of video content. This includes:\n\n| Subclass | Analysis\
      \ Type | Output |\n|----------|---------------|--------|\n| VideoSceneAnnotation | Shot/scene detection | Scene boundaries,\
      \ types |\n| VideoObjectAnnotation | Object detection | Objects, faces, logos |\n| VideoOCRAnnotation | Text extraction\
      \ | On-screen text (OCR) |\n\n**RELATIONSHIP TO W3C WEB ANNOTATION**:\n\nVideoAnnotation aligns with the W3C Web Annotation\
      \ Data Model:\n\n```turtle\n:annotation a oa:Annotation ;\n    oa:hasBody :detection_result ;\n    oa:hasTarget [\n\
      \        oa:hasSource :video ;\n        oa:hasSelector [\n            a oa:FragmentSelector ;\n            dcterms:conformsTo\
      \ <http://www.w3.org/TR/media-frags/> ;\n            rdf:value \"t=30,35\"\n        ]\n    ] ;\n    oa:motivatedBy oa:classifying\
      \ .\n```\n\n**FRAME-BASED ANALYSIS**:\n\nUnlike audio transcription (continuous stream), video annotation is\ntypically\
      \ frame-based:\n\n- `frame_sample_rate`: Frames analyzed per second (e.g., 1 fps, 5 fps)\n- `analyzes_or_analyzed`:\
      \ Total frames processed\n- Higher sample rates = more detections but higher compute cost\n\n**DETECTION THRESHOLDS**:\n\
      \nCV models output confidence scores. Thresholds filter noise:\n\n| Threshold | Use Case |\n|-----------|----------|\n\
      | 0.9+ | High precision, production display |\n| 0.7-0.9 | Balanced, general use |\n| 0.5-0.7 | High recall, research/review\
      \ |\n| < 0.5 | Raw output, needs filtering |\n\n**MODEL ARCHITECTURE TRACKING**:\n\nDifferent model architectures have\
      \ different characteristics:\n\n| Architecture | Examples | Strengths |\n|--------------|----------|-----------|\n|\
      \ CNN | ResNet, VGG | Fast inference, good for objects |\n| Transformer | ViT, CLIP | Better context, multimodal |\n\
      | Hybrid | DETR, Swin | Balance of speed and accuracy |\n\n**HERITAGE INSTITUTION CONTEXT**:\n\nVideo annotations enable:\n\
      - **Discovery**: Find videos containing specific objects/artworks\n- **Accessibility**: Scene descriptions for visually\
      \ impaired\n- **Research**: Analyze visual content at scale\n- **Preservation**: Document visual content as text\n-\
      \ **Linking**: Connect detected artworks to collection records\n\n**CIDOC-CRM E13_Attribute_Assignment**:\n\nAnnotations\
      \ are attribute assignments - asserting properties about\nvideo segments. The CV model or human annotator is the assigning\
      \ agent.\n"
    exact_mappings:
    - oa:Annotation
    close_mappings:
    - crm:E13_Attribute_Assignment
    related_mappings:
    - as:Activity
    - schema:ClaimReview
    slots:
    - has_or_had_rationale
    - contains_or_contained
    - has_or_had_type
    # MIGRATED 2026-01-25: detection_count, detection_threshold → filters_or_filtered (Rule 53)
    - filters_or_filtered
    # REMOVED 2026-01-22: frame_sample_rate - migrated to analyzes_or_analyzed + VideoFrame + has_or_had_quantity (Rule 53)
    - includes_bounding_box
    - includes_segmentation_mask
    - keyframe_extraction
    - model_architecture
    - model_task
    - specificity_annotation
    - has_or_had_score  # was: template_specificity - migrated per Rule 53 (2026-01-17)
    - analyzes_or_analyzed
    slot_usage:
      has_or_had_type:
        range: AnnotationType
        required: true
        description: Type of annotation (Object detection, Scene detection, etc.)
        examples:
        - value:
            has_or_had_code: OBJECT_DETECTION
            has_or_had_label: Object Detection
          description: Object and face detection annotation
      contains_or_contained:
        range: Segment
        multivalued: true
        required: false
        inlined_as_list: true
        description: >-
          Segments (temporal or spatial) identified by the annotation.
          MIGRATED from has_annotation_segment per Rule 53.
        examples:
        - value:
            has_or_had_label: 'Night Watch painting visible'
            has_or_had_description: '30.0 - 35.0 seconds'
          description: Object detection segment
      has_or_had_rationale:
        range: Rationale
        required: false
        description: Motivation for the annotation.
        examples:
        - value:
            has_or_had_label: ClassifyingMotivation
          description: Annotation for classification purposes
      # DEPRECATED 2026-01-25: detection_threshold, detection_count → filters_or_filtered + DetectedEntity (Rule 53)
      # Old: detection_threshold: 0.5, detection_count: 342
      # New: filters_or_filtered with DetectedEntity containing Quantity and DetectionThreshold
      filters_or_filtered:
        description: |
          MIGRATED 2026-01-25: Replaces detection_count and detection_threshold slots.

          Links to DetectedEntity which contains:
          - has_or_had_quantity → Quantity (for detection_count)
          - has_or_had_treshold → DetectionThreshold (for detection_threshold)

          **Migration Pattern**:
          - Old: detection_count: 342, detection_threshold: 0.5
          - New: filters_or_filtered → DetectedEntity with structured data
        range: DetectedEntity
        inlined: true
        required: false
        examples:
        - value:
            has_or_had_quantity:
              quantity_value: 342
              has_or_had_unit:
                unit_value: "detections"
            has_or_had_treshold:
              threshold_value: 0.5
              threshold_type: MINIMUM
          description: "342 detections at 0.5 confidence threshold"
        - value:
            has_or_had_quantity:
              quantity_value: 89
              has_or_had_unit:
                unit_value: "detections"
            has_or_had_treshold:
              threshold_value: 0.9
              threshold_type: MINIMUM
              has_or_had_label: "High Precision"
          description: "89 high-confidence detections"
      # MIGRATED 2026-01-22: frame_sample_rate → analyzes_or_analyzed + VideoFrame + has_or_had_quantity (Rule 53)
      analyzes_or_analyzed:
        description: |
          MIGRATED 2026-01-22: Now supports VideoFrame class for frame_sample_rate migration.

          Frame analysis information including:
          - Total frames analyzed (integer, legacy pattern)
          - Frame sample rate and analysis parameters (VideoFrame class)

          MIGRATED SLOTS:
          - frame_sample_rate → VideoFrame.has_or_had_quantity with unit "samples per second"
        range: VideoFrame
        inlined: true
        required: false
        examples:
        - value:
            has_or_had_quantity:
              quantity_value: 1.0
              quantity_type: FRAME_SAMPLE_RATE
              has_or_had_unit:
                unit_value: "samples per second"
            frame_count: 1800
          description: Analyzed 1,800 frames at 1 fps (30 min video)
        - value:
            has_or_had_quantity:
              quantity_value: 5.0
              quantity_type: FRAME_SAMPLE_RATE
              has_or_had_unit:
                unit_value: "fps"
          description: 5 frames per second sample rate
      keyframe_extraction:
        range: boolean
        required: false
        examples:
        - value: true
          description: Used keyframe extraction
      model_architecture:
        range: string
        required: false
        examples:
        - value: Transformer
          description: Vision Transformer architecture
        - value: CNN
          description: Convolutional Neural Network
      model_task:
        range: string
        required: false
        examples:
        - value: detection
          description: Object detection task
        - value: captioning
          description: Video captioning task
      includes_bounding_box:
        range: boolean
        required: false
        examples:
        - value: true
          description: Includes bounding box coordinates
      includes_segmentation_mask:
        range: boolean
        required: false
        examples:
        - value: false
          description: No segmentation masks included
    comments:
    - Abstract base for all CV/multimodal video annotations
    - Extends VideoTextContent with frame-based analysis parameters
    - W3C Web Annotation compatible structure
    - Supports both temporal and spatial annotation
    - Tracks detection thresholds and model architecture
    see_also:
    - https://www.w3.org/TR/annotation-model/
    - http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment
    - https://iiif.io/api/presentation/3.0/