glam/frontend/public/schemas/20251121/linkml/modules/classes/AudioEventSegment.yaml

id: https://nde.nl/ontology/hc/class/AudioEventSegment
name: audio_event_segment_class
title: Audio Event Segment Class
description: |
  A temporal segment of audio containing a detected audio event (speech, music, silence, etc.).

  MIGRATED from audio_event_segments slot (Rule 53).
  Uses generic has_or_had_segment slot with range narrowed to AudioEventSegment.
imports:
  - linkml:types
  - ../slots/start_seconds
  - ../slots/end_seconds
  - ../slots/start_time
  - ../slots/end_time
  - ../slots/segment_index
  - ../slots/segment_text
  - ../slots/confidence
  - ../slots/specificity_annotation
  - ../slots/template_specificity
  - ./SpecificityAnnotation
  - ./TemplateSpecificityScores
  - ../enums/AudioEventTypeEnum
prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  schema: http://schema.org/
  dcterms: http://purl.org/dc/terms/
  crm: http://www.cidoc-crm.org/cidoc-crm/
  oa: http://www.w3.org/ns/oa#
  ma: http://www.w3.org/ns/ma-ont#
default_prefix: hc

classes:
  AudioEventSegment:
    class_uri: hc:AudioEventSegment
    description: |
      A temporal segment of audio containing a detected audio event.

      **DEFINITION**:

      AudioEventSegment represents a bounded temporal portion of audio content
      where a specific type of audio event has been detected. This includes:
      - Speech segments (with optional speaker/language info)
      - Music segments (with optional genre/type info)
      - Silence segments (gaps between audio)
      - Sound event segments (applause, laughter, ambient sounds)
      - Noise segments (for quality assessment)

      **RELATIONSHIP TO VideoTimeSegment**:

      AudioEventSegment is a specialized sibling of VideoTimeSegment:
      - Both extend CIDOC-CRM E52_Time-Span concept
      - VideoTimeSegment: general video temporal segments
      - AudioEventSegment: audio-specific event segments

      **AUDIO EVENT TYPES**:

      | Event Type | Description | Example |
      |------------|-------------|---------|
      | SPEECH | Human speech detected | Interview segment |
      | MUSIC | Music detected | Background soundtrack |
      | SILENCE | Very low or no audio | Gap between segments |
      | SOUND_EVENT | Non-speech/music sounds | Applause, footsteps |
      | NOISE | Noise/interference | Quality issue marker |
      | MIXED | Multiple event types | Overlapping audio |

      **HERITAGE USE CASES**:

      | Content Type | Application |
      |--------------|-------------|
      | Oral histories | Speech segment identification |
      | Virtual tours | Background music detection |
      | Lecture recordings | Audience reaction segments |
      | Conservation videos | Narration vs ambient sound |
      | Archival footage | Audio quality assessment |

      **PROVENANCE**:

      Created as part of slot migration (Rule 53) from deprecated
      `audio_event_segments` slot to generic `has_or_had_segment` pattern.
    exact_mappings:
      - hc:AudioEventSegment
    close_mappings:
      - crm:E52_Time-Span
      - ma:MediaFragment
    related_mappings:
      - oa:FragmentSelector
    slots:
      - start_seconds
      - end_seconds
      - start_time
      - end_time
      - segment_index
      - segment_text
      - confidence
      - specificity_annotation
      - template_specificity
    attributes:
      audio_event_type:
        range: AudioEventTypeEnum
        required: true
        description: The type of audio event detected in this segment.
        examples:
          - value: SPEECH
            description: Speech detected in this segment
          - value: MUSIC
            description: Music detected in this segment
    slot_usage:
      start_seconds:
        range: float
        required: true
        minimum_value: 0.0
        description: Start time in seconds for this audio event segment.
        examples:
          - value: 0.0
            description: Audio event starts at beginning
          - value: 45.5
            description: Audio event starts at 45.5 seconds
      end_seconds:
        range: float
        required: true
        minimum_value: 0.0
        description: End time in seconds for this audio event segment.
        examples:
          - value: 15.0
            description: Audio event ends at 15 seconds
          - value: 60.0
            description: Audio event ends at 1 minute
      start_time:
        range: string
        required: false
        pattern: "^PT(\\d+H)?(\\d+M)?(\\d+(\\.\\d+)?S)?$"
        description: Start time in ISO 8601 duration format.
        examples:
          - value: PT0M30S
            description: 30 seconds from start
      end_time:
        range: string
        required: false
        pattern: "^PT(\\d+H)?(\\d+M)?(\\d+(\\.\\d+)?S)?$"
        description: End time in ISO 8601 duration format.
        examples:
          - value: PT0M45S
            description: 45 seconds from start
      segment_text:
        range: string
        required: false
        description: Text content for this segment (e.g., speech transcript, music description).
        examples:
          - value: "Welcome to the Rijksmuseum"
            description: Speech transcript text
          - value: "Classical background music"
            description: Music segment description
      confidence:
        range: float
        required: false
        minimum_value: 0.0
        maximum_value: 1.0
        description: Confidence score (0.0-1.0) for the audio event detection.
        examples:
          - value: 0.95
            description: High confidence detection
          - value: 0.72
            description: Medium confidence detection
    comments:
      - Audio event segment for speech, music, silence, sound event detection
      - Temporal boundaries with start/end seconds (primary) and ISO 8601 (secondary)
      - Confidence scoring for AI-generated detections
      - Part of Rule 53 slot migration from audio_event_segments
    see_also:
      - https://www.w3.org/TR/media-frags/
      - https://www.w3.org/ns/ma-ont