#!/usr/bin/env python3 """ Extract inline slots from LinkML class files to individual slot files. Per Rule 38 (AGENTS.md), all LinkML slots MUST be centralized in schemas/20251121/linkml/modules/slots/, never inline in class files. Usage: python scripts/extract_inline_slots.py [--dry-run] [--file PATH] Options: --dry-run Show what would be done without making changes --file PATH Process only a single file """ import argparse import yaml import os import sys from pathlib import Path from typing import Dict, Any, List, Tuple # Schema paths SCHEMA_ROOT = Path("schemas/20251121/linkml") CLASSES_DIR = SCHEMA_ROOT / "modules" / "classes" SLOTS_DIR = SCHEMA_ROOT / "modules" / "slots" # Standard prefixes for slot files STANDARD_PREFIXES = { "linkml": "https://w3id.org/linkml/", "hc": "https://nde.nl/ontology/hc/", "schema": "http://schema.org/", "dcterms": "http://purl.org/dc/terms/", "skos": "http://www.w3.org/2004/02/skos/core#", "rico": "https://www.ica.org/standards/RiC/ontology#", "prov": "http://www.w3.org/ns/prov#", "crm": "http://www.cidoc-crm.org/cidoc-crm/", "foaf": "http://xmlns.com/foaf/0.1/", "bf": "http://id.loc.gov/ontologies/bibframe/", } def extract_slot_prefix(slot_uri: str) -> str: """Extract the prefix from a slot_uri like 'schema:description'.""" if ":" in slot_uri and not slot_uri.startswith("http"): return slot_uri.split(":")[0] return None def get_required_prefixes(slot_def: Dict[str, Any]) -> Dict[str, str]: """Determine which prefixes are needed for this slot.""" prefixes = {"linkml": STANDARD_PREFIXES["linkml"], "hc": STANDARD_PREFIXES["hc"]} # Check slot_uri if "slot_uri" in slot_def: prefix = extract_slot_prefix(slot_def["slot_uri"]) if prefix and prefix in STANDARD_PREFIXES: prefixes[prefix] = STANDARD_PREFIXES[prefix] # Check mappings for mapping_type in ["exact_mappings", "close_mappings", "related_mappings", "narrow_mappings", "broad_mappings"]: if mapping_type in slot_def: for mapping in slot_def[mapping_type]: prefix = extract_slot_prefix(mapping) if prefix and prefix in STANDARD_PREFIXES: prefixes[prefix] = STANDARD_PREFIXES[prefix] return prefixes def create_slot_file_content(slot_name: str, slot_def: Dict[str, Any]) -> str: """Create the content for an individual slot file.""" prefixes = get_required_prefixes(slot_def) # Build the slot file structure slot_file = { "id": f"https://nde.nl/ontology/hc/slot/{slot_name}", "name": f"{slot_name}_slot", "title": f"{slot_name.replace('_', ' ').title()} Slot", "prefixes": prefixes, "imports": ["linkml:types"], "default_prefix": "hc", "slots": { slot_name: slot_def } } # Convert to YAML return yaml.dump(slot_file, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120) def parse_class_file(file_path: Path) -> Tuple[Dict[str, Any], Dict[str, Dict[str, Any]]]: """ Parse a class file and extract inline slots. Returns: Tuple of (full_yaml_dict, {slot_name: slot_definition}) """ with open(file_path, 'r') as f: content = yaml.safe_load(f) slots = {} if content and "slots" in content: slots = content.get("slots", {}) return content, slots def slot_file_exists(slot_name: str) -> bool: """Check if a slot file already exists.""" slot_file = SLOTS_DIR / f"{slot_name}.yaml" return slot_file.exists() def process_class_file(file_path: Path, dry_run: bool = False) -> Dict[str, Any]: """ Process a single class file, extracting inline slots. Returns: Dict with statistics: {created: [], skipped: [], errors: []} """ stats = {"created": [], "skipped": [], "errors": []} try: content, slots = parse_class_file(file_path) except Exception as e: stats["errors"].append(f"Failed to parse {file_path}: {e}") return stats if not slots: return stats for slot_name, slot_def in slots.items(): if slot_file_exists(slot_name): stats["skipped"].append(slot_name) continue try: slot_content = create_slot_file_content(slot_name, slot_def) slot_file_path = SLOTS_DIR / f"{slot_name}.yaml" if dry_run: print(f" Would create: {slot_file_path}") stats["created"].append(slot_name) else: with open(slot_file_path, 'w') as f: f.write(slot_content) print(f" Created: {slot_file_path}") stats["created"].append(slot_name) except Exception as e: stats["errors"].append(f"Failed to create slot {slot_name}: {e}") return stats def find_class_files_with_inline_slots() -> List[Path]: """Find all class files that have inline slots.""" files_with_slots = [] for yaml_file in CLASSES_DIR.glob("*.yaml"): try: with open(yaml_file, 'r') as f: content = yaml.safe_load(f) if content and "slots" in content and content["slots"]: files_with_slots.append(yaml_file) except: continue return sorted(files_with_slots) def main(): parser = argparse.ArgumentParser(description="Extract inline slots from LinkML class files") parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes") parser.add_argument("--file", type=str, help="Process only a single file") args = parser.parse_args() # Change to project root project_root = Path(__file__).parent.parent os.chdir(project_root) # Ensure slots directory exists SLOTS_DIR.mkdir(parents=True, exist_ok=True) total_stats = {"created": [], "skipped": [], "errors": []} if args.file: files_to_process = [Path(args.file)] else: files_to_process = find_class_files_with_inline_slots() print(f"Processing {len(files_to_process)} class file(s)...") if args.dry_run: print("(DRY RUN - no changes will be made)\n") for class_file in files_to_process: print(f"\nProcessing: {class_file.name}") stats = process_class_file(class_file, dry_run=args.dry_run) total_stats["created"].extend(stats["created"]) total_stats["skipped"].extend(stats["skipped"]) total_stats["errors"].extend(stats["errors"]) # Summary print("\n" + "=" * 60) print("SUMMARY") print("=" * 60) print(f"Slot files created: {len(total_stats['created'])}") print(f"Slots skipped (already exist): {len(total_stats['skipped'])}") print(f"Errors: {len(total_stats['errors'])}") if total_stats["errors"]: print("\nErrors encountered:") for error in total_stats["errors"]: print(f" - {error}") return 0 if not total_stats["errors"] else 1 if __name__ == "__main__": sys.exit(main())