222 lines
7.1 KiB
Python
222 lines
7.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Extract inline slots from LinkML class files to individual slot files.
|
|
|
|
Per Rule 38 (AGENTS.md), all LinkML slots MUST be centralized in
|
|
schemas/20251121/linkml/modules/slots/, never inline in class files.
|
|
|
|
Usage:
|
|
python scripts/extract_inline_slots.py [--dry-run] [--file PATH]
|
|
|
|
Options:
|
|
--dry-run Show what would be done without making changes
|
|
--file PATH Process only a single file
|
|
"""
|
|
|
|
import argparse
|
|
import yaml
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List, Tuple
|
|
|
|
# Schema paths
|
|
SCHEMA_ROOT = Path("schemas/20251121/linkml")
|
|
CLASSES_DIR = SCHEMA_ROOT / "modules" / "classes"
|
|
SLOTS_DIR = SCHEMA_ROOT / "modules" / "slots"
|
|
|
|
# Standard prefixes for slot files
|
|
STANDARD_PREFIXES = {
|
|
"linkml": "https://w3id.org/linkml/",
|
|
"hc": "https://nde.nl/ontology/hc/",
|
|
"schema": "http://schema.org/",
|
|
"dcterms": "http://purl.org/dc/terms/",
|
|
"skos": "http://www.w3.org/2004/02/skos/core#",
|
|
"rico": "https://www.ica.org/standards/RiC/ontology#",
|
|
"prov": "http://www.w3.org/ns/prov#",
|
|
"crm": "http://www.cidoc-crm.org/cidoc-crm/",
|
|
"foaf": "http://xmlns.com/foaf/0.1/",
|
|
"bf": "http://id.loc.gov/ontologies/bibframe/",
|
|
}
|
|
|
|
|
|
def extract_slot_prefix(slot_uri: str) -> str:
|
|
"""Extract the prefix from a slot_uri like 'schema:description'."""
|
|
if ":" in slot_uri and not slot_uri.startswith("http"):
|
|
return slot_uri.split(":")[0]
|
|
return None
|
|
|
|
|
|
def get_required_prefixes(slot_def: Dict[str, Any]) -> Dict[str, str]:
|
|
"""Determine which prefixes are needed for this slot."""
|
|
prefixes = {"linkml": STANDARD_PREFIXES["linkml"], "hc": STANDARD_PREFIXES["hc"]}
|
|
|
|
# Check slot_uri
|
|
if "slot_uri" in slot_def:
|
|
prefix = extract_slot_prefix(slot_def["slot_uri"])
|
|
if prefix and prefix in STANDARD_PREFIXES:
|
|
prefixes[prefix] = STANDARD_PREFIXES[prefix]
|
|
|
|
# Check mappings
|
|
for mapping_type in ["exact_mappings", "close_mappings", "related_mappings", "narrow_mappings", "broad_mappings"]:
|
|
if mapping_type in slot_def:
|
|
for mapping in slot_def[mapping_type]:
|
|
prefix = extract_slot_prefix(mapping)
|
|
if prefix and prefix in STANDARD_PREFIXES:
|
|
prefixes[prefix] = STANDARD_PREFIXES[prefix]
|
|
|
|
return prefixes
|
|
|
|
|
|
def create_slot_file_content(slot_name: str, slot_def: Dict[str, Any]) -> str:
|
|
"""Create the content for an individual slot file."""
|
|
prefixes = get_required_prefixes(slot_def)
|
|
|
|
# Build the slot file structure
|
|
slot_file = {
|
|
"id": f"https://nde.nl/ontology/hc/slot/{slot_name}",
|
|
"name": f"{slot_name}_slot",
|
|
"title": f"{slot_name.replace('_', ' ').title()} Slot",
|
|
"prefixes": prefixes,
|
|
"imports": ["linkml:types"],
|
|
"default_prefix": "hc",
|
|
"slots": {
|
|
slot_name: slot_def
|
|
}
|
|
}
|
|
|
|
# Convert to YAML
|
|
return yaml.dump(slot_file, default_flow_style=False, allow_unicode=True, sort_keys=False, width=120)
|
|
|
|
|
|
def parse_class_file(file_path: Path) -> Tuple[Dict[str, Any], Dict[str, Dict[str, Any]]]:
|
|
"""
|
|
Parse a class file and extract inline slots.
|
|
|
|
Returns:
|
|
Tuple of (full_yaml_dict, {slot_name: slot_definition})
|
|
"""
|
|
with open(file_path, 'r') as f:
|
|
content = yaml.safe_load(f)
|
|
|
|
slots = {}
|
|
if content and "slots" in content:
|
|
slots = content.get("slots", {})
|
|
|
|
return content, slots
|
|
|
|
|
|
def slot_file_exists(slot_name: str) -> bool:
|
|
"""Check if a slot file already exists."""
|
|
slot_file = SLOTS_DIR / f"{slot_name}.yaml"
|
|
return slot_file.exists()
|
|
|
|
|
|
def process_class_file(file_path: Path, dry_run: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Process a single class file, extracting inline slots.
|
|
|
|
Returns:
|
|
Dict with statistics: {created: [], skipped: [], errors: []}
|
|
"""
|
|
stats = {"created": [], "skipped": [], "errors": []}
|
|
|
|
try:
|
|
content, slots = parse_class_file(file_path)
|
|
except Exception as e:
|
|
stats["errors"].append(f"Failed to parse {file_path}: {e}")
|
|
return stats
|
|
|
|
if not slots:
|
|
return stats
|
|
|
|
for slot_name, slot_def in slots.items():
|
|
if slot_file_exists(slot_name):
|
|
stats["skipped"].append(slot_name)
|
|
continue
|
|
|
|
try:
|
|
slot_content = create_slot_file_content(slot_name, slot_def)
|
|
slot_file_path = SLOTS_DIR / f"{slot_name}.yaml"
|
|
|
|
if dry_run:
|
|
print(f" Would create: {slot_file_path}")
|
|
stats["created"].append(slot_name)
|
|
else:
|
|
with open(slot_file_path, 'w') as f:
|
|
f.write(slot_content)
|
|
print(f" Created: {slot_file_path}")
|
|
stats["created"].append(slot_name)
|
|
|
|
except Exception as e:
|
|
stats["errors"].append(f"Failed to create slot {slot_name}: {e}")
|
|
|
|
return stats
|
|
|
|
|
|
def find_class_files_with_inline_slots() -> List[Path]:
|
|
"""Find all class files that have inline slots."""
|
|
files_with_slots = []
|
|
|
|
for yaml_file in CLASSES_DIR.glob("*.yaml"):
|
|
try:
|
|
with open(yaml_file, 'r') as f:
|
|
content = yaml.safe_load(f)
|
|
if content and "slots" in content and content["slots"]:
|
|
files_with_slots.append(yaml_file)
|
|
except:
|
|
continue
|
|
|
|
return sorted(files_with_slots)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Extract inline slots from LinkML class files")
|
|
parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes")
|
|
parser.add_argument("--file", type=str, help="Process only a single file")
|
|
args = parser.parse_args()
|
|
|
|
# Change to project root
|
|
project_root = Path(__file__).parent.parent
|
|
os.chdir(project_root)
|
|
|
|
# Ensure slots directory exists
|
|
SLOTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
total_stats = {"created": [], "skipped": [], "errors": []}
|
|
|
|
if args.file:
|
|
files_to_process = [Path(args.file)]
|
|
else:
|
|
files_to_process = find_class_files_with_inline_slots()
|
|
|
|
print(f"Processing {len(files_to_process)} class file(s)...")
|
|
if args.dry_run:
|
|
print("(DRY RUN - no changes will be made)\n")
|
|
|
|
for class_file in files_to_process:
|
|
print(f"\nProcessing: {class_file.name}")
|
|
stats = process_class_file(class_file, dry_run=args.dry_run)
|
|
|
|
total_stats["created"].extend(stats["created"])
|
|
total_stats["skipped"].extend(stats["skipped"])
|
|
total_stats["errors"].extend(stats["errors"])
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("SUMMARY")
|
|
print("=" * 60)
|
|
print(f"Slot files created: {len(total_stats['created'])}")
|
|
print(f"Slots skipped (already exist): {len(total_stats['skipped'])}")
|
|
print(f"Errors: {len(total_stats['errors'])}")
|
|
|
|
if total_stats["errors"]:
|
|
print("\nErrors encountered:")
|
|
for error in total_stats["errors"]:
|
|
print(f" - {error}")
|
|
|
|
return 0 if not total_stats["errors"] else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|