#!/usr/bin/env python3 """ Create missing slot files for the LinkML schema. This script creates slot files for slots that are used in class files but don't have corresponding slot files in modules/slots/. """ import os from pathlib import Path import yaml import re # Missing slots from lint output MISSING_SLOTS = """actual_end actual_start annex_description annex_id annex_name annex_reason annotation_motivation annotation_segments annotation_type aspect_ratio available_caption_languages caption_available character_count climate_control_type comment_count comments_fetched common_variants content_title contents_description default_audio_language default_language detection_count detection_threshold dislike_count example_portals favorite_count frame_rate frame_sample_rate full_text generated_by generation_method generation_timestamp has_forklift_access has_loading_dock includes_bounding_boxes includes_segmentation_masks includes_speakers is_annex_of_reading_room is_embeddable is_licensed_content is_made_for_kids is_temporary is_verified keyframe_extraction like_count live_broadcast_content material_specialization metrics_observed_at model_architecture model_provider model_task model_version overall_confidence paragraph_count planned_closure_date planned_end planned_start portal_type_description portal_type_id portal_type_name primary_speaker processing_duration_seconds reason_description replaces_primary_location requires_qualification requires_separate_registration role_id role_name role_name_local sentence_count serves_function_of shares_catalog_with_main source_language_auto_detected source_video source_video_url temp_location_description temp_location_id temp_location_name temp_location_reason total_frames_analyzed transcript_format typical_responsibilities verification_date verified_by video_category_id video_comments view_count warehouse_description warehouse_floor_area_sqm warehouse_id warehouse_managed_by warehouse_name warehouse_security_level warehouse_type word_count""".strip().split('\n') SLOTS_DIR = Path('/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots') # Slot type inference patterns TYPE_PATTERNS = { r'_id$': 'uriorcurie', r'_url$': 'uri', r'_date$': 'date', r'_count$': 'integer', r'_seconds$': 'float', r'^is_': 'boolean', r'^has_': 'boolean', r'^includes_': 'boolean', r'^requires_': 'boolean', r'_sqm$': 'float', r'_timestamp$': 'datetime', r'_at$': 'datetime', } # Slot URI mappings based on name patterns URI_PATTERNS = { r'_id$': 'dcterms:identifier', r'_description$': 'dcterms:description', r'_name$': 'skos:prefLabel', r'_type$': 'dcterms:type', r'_date$': 'dcterms:date', r'_url$': 'schema:url', r'_count$': 'schema:interactionCount', r'_timestamp$': 'prov:atTime', r'_at$': 'prov:atTime', r'^verified': 'prov:wasAttributedTo', } def infer_range(slot_name: str) -> str: """Infer the range type from slot name.""" for pattern, range_type in TYPE_PATTERNS.items(): if re.search(pattern, slot_name): return range_type return 'string' def infer_slot_uri(slot_name: str) -> str: """Infer slot_uri from slot name.""" for pattern, uri in URI_PATTERNS.items(): if re.search(pattern, slot_name): return uri # Default fallback camel = ''.join(word.capitalize() for word in slot_name.split('_')) camel = camel[0].lower() + camel[1:] return f'hc:{camel}' def humanize_name(slot_name: str) -> str: """Convert slot_name to human readable title.""" return ' '.join(word.capitalize() for word in slot_name.split('_')) def create_slot_file(slot_name: str) -> dict: """Create a slot file content.""" range_type = infer_range(slot_name) slot_uri = infer_slot_uri(slot_name) title = humanize_name(slot_name) content = { 'id': f'https://nde.nl/ontology/hc/slot/{slot_name}', 'name': f'{slot_name}_slot', 'title': f'{title} Slot', 'prefixes': { 'linkml': 'https://w3id.org/linkml/', 'hc': 'https://nde.nl/ontology/hc/', 'dcterms': 'http://purl.org/dc/terms/', 'schema': 'http://schema.org/', 'skos': 'http://www.w3.org/2004/02/skos/core#', 'prov': 'http://www.w3.org/ns/prov#', }, 'imports': ['linkml:types'], 'default_prefix': 'hc', 'slots': { slot_name: { 'slot_uri': slot_uri, 'description': f'{title} for heritage custodian entities.', 'range': range_type, } } } # Add multivalued for certain patterns if '_comments' in slot_name or '_segments' in slot_name or 'languages' in slot_name: content['slots'][slot_name]['multivalued'] = True return content def main(): created = 0 skipped = 0 for slot_name in MISSING_SLOTS: slot_file = SLOTS_DIR / f'{slot_name}.yaml' if slot_file.exists(): print(f'SKIP: {slot_name}.yaml already exists') skipped += 1 continue content = create_slot_file(slot_name) with open(slot_file, 'w') as f: yaml.dump(content, f, default_flow_style=False, sort_keys=False, allow_unicode=True) print(f'CREATE: {slot_name}.yaml') created += 1 print(f'\n=== Summary ===') print(f'Created: {created}') print(f'Skipped: {skipped}') print(f'Total: {len(MISSING_SLOTS)}') if __name__ == '__main__': main()