glam/schemas/20251121/linkml/modules/classes/VideoAudioAnnotation.yaml

id: https://nde.nl/ontology/hc/class/VideoAudioAnnotation
name: video_audio_annotation_class
title: Video Audio Annotation Class
imports:
- linkml:types
- ../enums/AudioEventTypeEnum
- ../enums/MusicTypeEnum
- ../enums/SoundEventTypeEnum
- ../slots/contain
- ../slots/end_of_the_end
- ../slots/has_score
- ../slots/identified_by
- ../slots/has_label
- ../slots/has_provenance
- ../slots/has_segment
- ../slots/has_type
- ../slots/in_background
- ../slots/diarized
- ../slots/overlap_with
- ../slots/has_language
- ../slots/has_confidence_measure
- ../slots/has_music
- ../slots/has_genre
- ../slots/has_sound
- ../slots/in_language
- ../slots/begin_of_the_begin
- ../slots/has_silence
- ../slots/has_ratio
- ../slots/has_speaker
- ../slots/has_spoken_words
- ../slots/temporal_extent
prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  schema: http://schema.org/
  dcterms: http://purl.org/dc/terms/
  prov: http://www.w3.org/ns/prov#
  crm: http://www.cidoc-crm.org/cidoc-crm/
  oa: http://www.w3.org/ns/oa#
  ma: http://www.w3.org/ns/ma-ont#
  wd: http://www.wikidata.org/entity/
default_prefix: hc
classes:
  VideoAudioAnnotation:
    is_a: VideoAnnotation
    class_uri: hc:VideoAudioAnnotation
    abstract: false
    description: >-
      Annotation capturing audio-structure features detected in a video (speech,
      speakers, music, sound events, silence, and related metrics).
    alt_descriptions:
      nl: Annotatie die audio-structuur in video vastlegt (spraak sprekers muziek geluid stilte).
      de: Annotation zur Erfassung der Audiostruktur in Videos (Sprache Sprecher Musik Geraeusche Stille).
      fr: Annotation capturant la structure audio d une video (parole locuteurs musique sons silence).
      es: Anotacion que captura la estructura de audio en un video (habla hablantes musica sonidos silencio).
      ar: حاشية تلتقط بنية الصوت في الفيديو (كلام، متحدثون، موسيقى، أصوات، صمت).
      id: Anotasi yang menangkap struktur audio dalam video (ucapan pembicara musik suara hening).
      zh: 记录视频音频结构特征的注释（语音、说话人、音乐、声音事件、静音等）。
    structured_aliases:
    - {literal_form: audio-annotatie, in_language: nl}
    - {literal_form: Audio-Annotation, in_language: de}
    - {literal_form: annotation audio, in_language: fr}
    - {literal_form: anotacion de audio, in_language: es}
    - {literal_form: وسم صوتي, in_language: ar}
    - {literal_form: anotasi audio, in_language: id}
    - {literal_form: 音频注释, in_language: zh}
    exact_mappings:
    - hc:VideoAudioAnnotation
    close_mappings:
    - ma:AudioTrack
    - crm:E13_Attribute_Assignment
    related_mappings:
    - wd:Q11028
    - wd:Q638
    slots:
    - has_segment
    - contain
    - diarized
    - has_language
    - has_music
    - has_genre
    - has_sound
    - has_type
    - has_silence
    - has_ratio
    - has_speaker
    - has_spoken_words
    - has_confidence_measure
    - has_provenance
    - temporal_extent
    - identified_by
    - has_label
    - has_score
    slot_usage:
      has_segment:
        range: AudioEventSegment
        multivalued: true
        required: false
        inlined_as_list: true
        examples:
        - value: '[{has_type: SPEECH, start_seconds: 0.0, end_seconds: 15.0, segment_text: "Speech detected", confidence: 0.95}]'
        - value: '[{has_type: MUSIC, start_seconds: 30.0, end_seconds: 60.0, segment_text: "Background music", confidence: 0.88}]'
      contain:
        range: DiarizationSegment
        multivalued: true
        required: false
        inlined: true
        inlined_as_list: true
      diarized:
        range: boolean
        required: false
        ifabsent: 'false'
        examples:
        - value: true
      has_music:
        range: boolean
        required: false
        ifabsent: 'false'
        examples:
        - value: true
      has_genre:
        range: string
        multivalued: true
        required: false
        examples:
        - value: classical
        - value: baroque
      has_sound:
        range: float
        required: false
        examples:
        - value: -45.0
      has_silence:
        range: float
        required: false
        minimum_value: 0.0
        examples:
        - value: 15.5
      has_ratio:
        range: float
        required: false
        examples:
        - value: 25.0
      has_speaker:
        range: integer
        required: false
        minimum_value: 0
        examples:
        - value: 3
      has_language:
        range: Language
        required: false
        multivalued: true
        inlined: true
        inlined_as_list: true
      has_confidence_measure:
        range: float
        required: false
        minimum_value: 0.0
        maximum_value: 1.0
        examples:
        - value: 0.88
    comments:
    - Audio event detection for video content
    - Supports speech, music, silence, and sound event detection
    - Speaker diarization for interview navigation
    - Language detection for multilingual heritage content
    - Audio quality metrics for preservation assessment
    see_also:
    - https://www.w3.org/TR/annotation-model/
    - https://arxiv.org/abs/2111.08085
    annotations:
      specificity_score: 0.1
      specificity_rationale: Generic utility class/slot created during migration
      custodian_types: "['*']"
      modeling_notes: |
        VideoAudioAnnotation complements VideoTranscript:
        - VideoAudioAnnotation: audio structure (who spoke when; music and sound events)
        - VideoTranscript: the text content of speech

        Typical use cases
        - diarization for interviews
        - music and sound-event detection for content classification
        - audio quality metrics for preservation assessment

      legacy_description: |
        Preserved from earlier, more verbose description.
        It contained detailed tables and examples for diarization, music detection,
        sound events, language detection, and audio quality analysis.

  SpeechSegment:
    class_uri: hc:SpeechSegment
    description: >-
      Speech segment with speaker and language information.
    slots:
    - begin_of_the_begin
    - end_of_the_end
    - has_speaker
    - in_language
    - has_spoken_words
    - has_confidence_measure
    - has_score
    slot_usage:
      begin_of_the_begin:
        range: float
        required: true
        minimum_value: 0.0
      end_of_the_end:
        range: float
        required: true
        minimum_value: 0.0
      has_speaker:
        range: string
        required: false
      in_language:
        range: string
        required: false
      has_confidence_measure:
        range: float
        required: false
        minimum_value: 0.0
        maximum_value: 1.0
      has_spoken_words:
        range: string
        required: false

  DiarizationSegment:
    class_uri: hc:DiarizationSegment
    description: >-
      Diarization segment identifying the speaker and time boundaries.
    slots:
    - has_provenance
    - temporal_extent
    - contain
    - overlap_with
    - has_score
    slot_usage:
      temporal_extent:
        range: TimeSpan
        inlined: true
        required: true
      contain:
        range: Speaker
        inlined: true
        required: true
      has_provenance:
        range: Provenance
        inlined: true
        required: false
      overlap_with:
        range: boolean
        required: false

  MusicSegment:
    class_uri: hc:MusicSegment
    description: >-
      Segment of detected music with optional classification and confidence.
    slots:
    - begin_of_the_begin
    - end_of_the_end
    - has_type
    - has_genre
    - in_background
    - has_confidence_measure
    - has_score
    slot_usage:
      begin_of_the_begin:
        range: float
        required: true
        minimum_value: 0.0
      end_of_the_end:
        range: float
        required: true
        minimum_value: 0.0
      has_type:
        range: MusicTypeEnum
        required: false
      has_genre:
        range: string
        required: false
      in_background:
        range: boolean
        required: false
      has_confidence_measure:
        range: float
        required: false
        minimum_value: 0.0
        maximum_value: 1.0