glam/schemas/20251121/linkml/modules/classes/VideoTimeSegment.yaml
kempersc 7cf10084b4 Implement scripts for schema modifications and ontology verification
- Added `fix_dual_class_link.py` to remove dual class link references from specified YAML files.
- Created `fix_specific_ghosts.py` to apply specific replacements in YAML files based on defined mappings.
- Introduced `migrate_staff_count.py` to migrate staff count references to a new structure in specified YAML files.
- Developed `migrate_type_slots.py` to replace type-related slots with new identifiers across YAML files.
- Implemented `scan_ghost_references.py` to identify and report ghost references to archived slots and classes in YAML files.
- Added `verify_ontology_terms.py` to verify the presence of ontology terms in specified ontology files against schema definitions.
2026-01-29 17:10:25 +01:00

265 lines
7.6 KiB
YAML

id: https://nde.nl/ontology/hc/class/VideoTimeSegment
name: video_time_segment_class
title: Video Time Segment Class
imports:
- linkml:types
- ../slots/has_or_had_time_interval
- ./TimeInterval
- ../slots/segment_index
- ../slots/segment_text
- ../slots/speaker_id
- ../slots/speaker_label
- ../slots/specificity_annotation
- ../slots/has_or_had_score
- ./SpecificityAnnotation
- ./TemplateSpecificityScore
- ./TemplateSpecificityType
- ./TemplateSpecificityTypes
- ../slots/is_or_was_generated_by
- ./GenerationEvent
- ./ConfidenceScore
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
crm: http://www.cidoc-crm.org/cidoc-crm/
oa: http://www.w3.org/ns/oa#
ma: http://www.w3.org/ns/ma-ont#
prov: http://www.w3.org/ns/prov#
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
default_prefix: hc
classes:
VideoTimeSegment:
class_uri: crm:E52_Time-Span
abstract: false
description: 'A temporal segment within a video, defined by start and end times.
**DEFINITION**:
VideoTimeSegment represents a bounded temporal portion of video content.
It is the foundational unit for time-coded content including:
- Subtitle/caption entries (text displayed at specific times)
- Annotation segments (detected scenes, objects, faces)
- Chapter markers (user-defined content sections)
**DUAL TIME REPRESENTATION**:
Times are stored in two formats for different use cases:
| Format | Example | Use Case |
|--------|---------|----------|
| ISO 8601 duration | PT0M30S | Human-readable, serialization |
| Seconds (float) | 30.0 | Computation, synchronization |
Both representations MUST be kept in sync. The seconds format is
primary for computation; ISO 8601 is derived for display/storage.
**MEDIA FRAGMENTS URI (W3C)**:
VideoTimeSegment aligns with W3C Media Fragments URI 1.0 specification
for addressing temporal fragments of video:
```
https://example.com/video.mp4#t=30,35
```
The `start_seconds` and `end_seconds` map directly to the `t=` parameter.
**WEB ANNOTATION COMPATIBILITY**:
When used as an annotation target selector:
- Maps to `oa:FragmentSelector` with `conformsTo` Media Fragments
- Enables interoperability with W3C Web Annotation Data Model
**CIDOC-CRM E52_Time-Span**:
In cultural heritage documentation:
- E52_Time-Span is the extent of a time-span
- Used for temporal properties of cultural objects
- VideoTimeSegment extends this to media-specific temporal segments
**CONFIDENCE SCORING**:
For segments generated by ASR (speech recognition) or CV (computer vision):
- `confidence`: 0.0-1.0 score for segment accuracy
- Enables filtering by quality threshold
- Critical for AI-generated transcripts and annotations
**HERITAGE USE CASES**:
| Use Case | Example | Start | End |
|----------|---------|-------|-----|
| Subtitle entry | "Welcome to the museum" | 0:30 | 0:35 |
| Scene annotation | "Exhibition hall panorama" | 1:00 | 1:30 |
| Chapter marker | "Introduction" | 0:00 | 2:00 |
| Object detection | "Painting: Night Watch" | 3:15 | 3:20 |
| Speaker change | "Curator speaking" | 5:00 | 7:30 |
'
exact_mappings:
- crm:E52_Time-Span
- oa:FragmentSelector
close_mappings:
- ma:MediaFragment
related_mappings:
- schema:Clip
slots:
- has_or_had_time_interval
- segment_index
- segment_text
- speaker_id
- speaker_label
- specificity_annotation
- has_or_had_score
- is_or_was_generated_by
attributes:
start_time:
range: string
required: false
pattern: ^PT(\d+H)?(\d+M)?(\d+(\.\d+)?S)?$
description: 'Start time of the segment in ISO 8601 duration format (e.g. PT30S).
Defined as attribute to avoid dependency on archived start_time slot.
'
examples:
- value: PT0M30S
description: 30 seconds from video start
- value: PT1H15M30S
description: 1 hour 15 minutes 30 seconds
start_seconds:
range: float
required: true
minimum_value: 0.0
description: 'Start time of the segment in seconds.
Defined as attribute to avoid dependency on archived start_seconds slot.
'
examples:
- value: 30.0
description: 30 seconds from start
- value: 30.5
description: 30.5 seconds (millisecond precision)
slot_usage:
has_or_had_time_interval:
range: TimeInterval
required: true
inlined: true
description: Duration of the segment (replaces end_time/end_seconds). Use duration_value for ISO 8601 duration (e.g., PT5S).
examples:
- value:
duration_value: PT0M05S
description: 5 second duration
segment_text:
range: string
required: false
examples:
- value: Welkom bij het Rijksmuseum
description: Dutch subtitle text
- value: The curator explains the painting's history
description: Transcript segment
segment_index:
range: integer
required: false
minimum_value: 0
examples:
- value: 0
description: First segment
- value: 42
description: 43rd segment (zero-indexed)
is_or_was_generated_by:
range: GenerationEvent
required: false
inlined: true
description: 'Generation event containing confidence score for ASR/CV segment accuracy. MIGRATED 2026-01-19: Replaces confidence slot with structured pattern.'
examples:
- value:
has_or_had_score:
has_or_had_score: 0.95
has_or_had_method: asr_transcription
description: High confidence ASR segment
- value:
has_or_had_score:
has_or_had_score: 0.72
has_or_had_method: cv_detection
description: Medium confidence, may contain errors
speaker_id:
range: string
required: false
examples:
- value: SPEAKER_01
description: First identified speaker
- value: curator_taco_dibbits
description: Resolved speaker identity
speaker_label:
range: string
required: false
examples:
- value: Narrator
description: Generic speaker label
- value: Dr. Taco Dibbits, Museum Director
description: Specific identified speaker
rules:
- postconditions:
description: end_seconds must be >= start_seconds
comments:
- Reusable time segment for subtitles, annotations, chapters
- 'Dual time format: ISO 8601 for serialization, seconds for computation'
- Aligns with W3C Media Fragments URI specification
- Confidence scoring for AI-generated content
- Speaker diarization support for multi-speaker transcripts
- "MIGRATED 2026-01-19: confidence \u2192 is_or_was_generated_by + ConfidenceScore"
see_also:
- https://www.w3.org/TR/media-frags/
- https://www.w3.org/TR/annotation-model/
- https://www.w3.org/ns/ma-ont
- http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span
annotations:
specificity_score: 0.1
specificity_rationale: Generic utility class/slot created during migration
custodian_types: "['*']"
custodian_types_rationale: Universal utility concept