All checks were successful
Deploy Frontend / build-and-deploy (push) Successful in 3m57s
- Remove inline slot definitions from 144 class files - Create 7 new centralized slot files in modules/slots/: - custodian_type_broader.yaml - custodian_type_narrower.yaml - custodian_type_related.yaml - definition.yaml - finding_aid_access_restriction.yaml - finding_aid_description.yaml - finding_aid_temporal_coverage.yaml - Add centralize_inline_slots.py automation script - Update manifest with new timestamp Rule 48: Class files must NOT define inline slots - all slots must be imported from modules/slots/ directory. Note: Pre-existing IdentifierFormat duplicate class definition (in Standard.yaml and IdentifierFormat.yaml) not addressed in this commit - requires separate schema refactor.
286 lines
10 KiB
Python
286 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Centralize inline slot definitions from class files to modules/slots/
|
|
|
|
This script:
|
|
1. Extracts all inline slot definitions from modules/classes/*.yaml
|
|
2. Checks which slots already exist in modules/slots/
|
|
3. Generates missing slot files
|
|
4. Updates class files to import centralized slots instead of inline definitions
|
|
|
|
Usage:
|
|
python scripts/centralize_inline_slots.py --dry-run # Preview changes
|
|
python scripts/centralize_inline_slots.py # Apply changes
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import yaml
|
|
import argparse
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
|
|
# Paths
|
|
SCHEMA_ROOT = Path(__file__).parent.parent / "schemas" / "20251121" / "linkml"
|
|
CLASSES_DIR = SCHEMA_ROOT / "modules" / "classes"
|
|
SLOTS_DIR = SCHEMA_ROOT / "modules" / "slots"
|
|
|
|
|
|
def load_yaml(path: Path) -> dict:
|
|
"""Load YAML file."""
|
|
with open(path, 'r', encoding='utf-8') as f:
|
|
return yaml.safe_load(f) or {}
|
|
|
|
|
|
def save_yaml(path: Path, data: dict, dry_run: bool = False):
|
|
"""Save YAML file with proper formatting."""
|
|
if dry_run:
|
|
print(f" [DRY-RUN] Would write: {path}")
|
|
return
|
|
|
|
with open(path, 'w', encoding='utf-8') as f:
|
|
yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False, width=120)
|
|
|
|
|
|
def get_existing_centralized_slots() -> set:
|
|
"""Get set of slot names already defined in modules/slots/."""
|
|
existing = set()
|
|
for slot_file in SLOTS_DIR.glob("*.yaml"):
|
|
try:
|
|
data = load_yaml(slot_file)
|
|
if 'slots' in data:
|
|
existing.update(data['slots'].keys())
|
|
except Exception as e:
|
|
print(f"Warning: Could not parse {slot_file}: {e}")
|
|
return existing
|
|
|
|
|
|
def extract_inline_slots(class_file: Path) -> dict:
|
|
"""Extract inline slot definitions from a class file.
|
|
|
|
Returns dict mapping slot_name -> slot_definition
|
|
"""
|
|
try:
|
|
data = load_yaml(class_file)
|
|
except Exception as e:
|
|
print(f"Warning: Could not parse {class_file}: {e}")
|
|
return {}
|
|
|
|
# Top-level slots section contains inline definitions
|
|
inline_slots = data.get('slots', {})
|
|
|
|
# Filter to only actual slot definitions (not just references)
|
|
# A slot definition has properties like range, slot_uri, description, etc.
|
|
defined_slots = {}
|
|
for slot_name, slot_def in inline_slots.items():
|
|
if isinstance(slot_def, dict): # Has properties = is a definition
|
|
defined_slots[slot_name] = slot_def
|
|
|
|
return defined_slots
|
|
|
|
|
|
def generate_slot_file_content(slot_name: str, slot_def: dict, source_class: str) -> dict:
|
|
"""Generate content for a centralized slot file."""
|
|
# Extract prefixes needed for this slot
|
|
prefixes = {
|
|
'linkml': 'https://w3id.org/linkml/',
|
|
'hc': 'https://nde.nl/ontology/hc/',
|
|
}
|
|
|
|
# Add prefixes based on slot_uri
|
|
slot_uri = slot_def.get('slot_uri', '')
|
|
if slot_uri:
|
|
prefix_match = re.match(r'^([a-z]+):', slot_uri)
|
|
if prefix_match:
|
|
prefix = prefix_match.group(1)
|
|
# Common prefix mappings
|
|
prefix_map = {
|
|
'schema': 'http://schema.org/',
|
|
'dcterms': 'http://purl.org/dc/terms/',
|
|
'prov': 'http://www.w3.org/ns/prov#',
|
|
'crm': 'http://www.cidoc-crm.org/cidoc-crm/',
|
|
'rico': 'https://www.ica.org/standards/RiC/ontology#',
|
|
'org': 'http://www.w3.org/ns/org#',
|
|
'foaf': 'http://xmlns.com/foaf/0.1/',
|
|
'skos': 'http://www.w3.org/2004/02/skos/core#',
|
|
'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
|
|
'owl': 'http://www.w3.org/2002/07/owl#',
|
|
'sosa': 'http://www.w3.org/ns/sosa/',
|
|
'premis': 'http://www.loc.gov/premis/rdf/v3/',
|
|
'odrl': 'http://www.w3.org/ns/odrl/2/',
|
|
'frapo': 'http://purl.org/cerif/frapo/',
|
|
'dcat': 'http://www.w3.org/ns/dcat#',
|
|
'bf': 'http://id.loc.gov/ontologies/bibframe/',
|
|
}
|
|
if prefix in prefix_map:
|
|
prefixes[prefix] = prefix_map[prefix]
|
|
|
|
content = {
|
|
'id': f'https://nde.nl/ontology/hc/slot/{slot_name}',
|
|
'name': f'{slot_name}_slot',
|
|
'title': f'{slot_name.replace("_", " ").title()} Slot',
|
|
'prefixes': prefixes,
|
|
'imports': ['linkml:types'],
|
|
'default_prefix': 'hc',
|
|
'slots': {
|
|
slot_name: slot_def
|
|
}
|
|
}
|
|
|
|
# Add comment about origin
|
|
content['comments'] = [f'Centralized from {source_class} - {datetime.now().isoformat()}']
|
|
|
|
return content
|
|
|
|
|
|
def update_class_file(class_file: Path, slots_to_remove: list, dry_run: bool = False):
|
|
"""Update class file to remove inline slots and add imports."""
|
|
try:
|
|
data = load_yaml(class_file)
|
|
except Exception as e:
|
|
print(f"Warning: Could not parse {class_file}: {e}")
|
|
return
|
|
|
|
if 'slots' not in data:
|
|
return
|
|
|
|
# Remove inline slot definitions
|
|
original_slots = data.get('slots', {})
|
|
remaining_slots = {}
|
|
removed = []
|
|
|
|
for slot_name, slot_def in original_slots.items():
|
|
if slot_name in slots_to_remove and isinstance(slot_def, dict):
|
|
removed.append(slot_name)
|
|
else:
|
|
remaining_slots[slot_name] = slot_def
|
|
|
|
if not removed:
|
|
return
|
|
|
|
# Update slots section
|
|
if remaining_slots:
|
|
data['slots'] = remaining_slots
|
|
else:
|
|
del data['slots']
|
|
|
|
# Add imports for centralized slots
|
|
imports = data.get('imports', [])
|
|
for slot_name in removed:
|
|
import_path = f'../slots/{slot_name}'
|
|
if import_path not in imports:
|
|
imports.append(import_path)
|
|
data['imports'] = imports
|
|
|
|
# Save updated file
|
|
if dry_run:
|
|
print(f" [DRY-RUN] Would update {class_file.name}: remove {len(removed)} inline slots, add {len(removed)} imports")
|
|
else:
|
|
save_yaml(class_file, data)
|
|
print(f" Updated {class_file.name}: removed {len(removed)} inline slots")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Centralize inline slot definitions')
|
|
parser.add_argument('--dry-run', action='store_true', help='Preview changes without writing')
|
|
parser.add_argument('--verbose', '-v', action='store_true', help='Show detailed output')
|
|
args = parser.parse_args()
|
|
|
|
print("=" * 60)
|
|
print("Centralizing Inline Slot Definitions")
|
|
print("=" * 60)
|
|
|
|
# Get existing centralized slots
|
|
print("\n1. Checking existing centralized slots...")
|
|
existing_slots = get_existing_centralized_slots()
|
|
print(f" Found {len(existing_slots)} slots already in modules/slots/")
|
|
|
|
# Extract inline slots from all class files
|
|
print("\n2. Extracting inline slot definitions from class files...")
|
|
all_inline_slots = {} # slot_name -> (definition, source_file)
|
|
class_files_with_inline = {} # file -> list of slot names
|
|
|
|
for class_file in sorted(CLASSES_DIR.glob("*.yaml")):
|
|
inline_slots = extract_inline_slots(class_file)
|
|
if inline_slots:
|
|
class_files_with_inline[class_file] = list(inline_slots.keys())
|
|
for slot_name, slot_def in inline_slots.items():
|
|
if slot_name not in all_inline_slots:
|
|
all_inline_slots[slot_name] = (slot_def, class_file.name)
|
|
|
|
print(f" Found {len(all_inline_slots)} unique inline slot definitions")
|
|
print(f" Across {len(class_files_with_inline)} class files")
|
|
|
|
# Find slots that need to be created
|
|
print("\n3. Identifying slots to create...")
|
|
slots_to_create = {}
|
|
slots_already_exist = set()
|
|
|
|
for slot_name, (slot_def, source) in all_inline_slots.items():
|
|
if slot_name in existing_slots:
|
|
slots_already_exist.add(slot_name)
|
|
else:
|
|
slots_to_create[slot_name] = (slot_def, source)
|
|
|
|
print(f" {len(slots_already_exist)} slots already exist (will remove inline, keep import)")
|
|
print(f" {len(slots_to_create)} slots need to be created")
|
|
|
|
if args.verbose:
|
|
print("\n Slots to create:")
|
|
for name in sorted(slots_to_create.keys())[:20]:
|
|
print(f" - {name}")
|
|
if len(slots_to_create) > 20:
|
|
print(f" ... and {len(slots_to_create) - 20} more")
|
|
|
|
# Create missing slot files
|
|
print("\n4. Creating centralized slot files...")
|
|
created_count = 0
|
|
for slot_name, (slot_def, source) in sorted(slots_to_create.items()):
|
|
slot_file = SLOTS_DIR / f"{slot_name}.yaml"
|
|
if slot_file.exists():
|
|
if args.verbose:
|
|
print(f" Skipping {slot_name} (file exists)")
|
|
continue
|
|
|
|
content = generate_slot_file_content(slot_name, slot_def, source)
|
|
|
|
if args.dry_run:
|
|
print(f" [DRY-RUN] Would create: {slot_file.name}")
|
|
else:
|
|
save_yaml(slot_file, content)
|
|
if args.verbose:
|
|
print(f" Created: {slot_file.name}")
|
|
created_count += 1
|
|
|
|
print(f" {'Would create' if args.dry_run else 'Created'} {created_count} slot files")
|
|
|
|
# Update class files
|
|
print("\n5. Updating class files to remove inline definitions...")
|
|
updated_count = 0
|
|
for class_file, slot_names in sorted(class_files_with_inline.items()):
|
|
update_class_file(class_file, slot_names, args.dry_run)
|
|
updated_count += 1
|
|
|
|
print(f" {'Would update' if args.dry_run else 'Updated'} {updated_count} class files")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("Summary")
|
|
print("=" * 60)
|
|
print(f" Slots already centralized: {len(slots_already_exist)}")
|
|
print(f" New slot files {'to create' if args.dry_run else 'created'}: {created_count}")
|
|
print(f" Class files {'to update' if args.dry_run else 'updated'}: {updated_count}")
|
|
|
|
if args.dry_run:
|
|
print("\n Run without --dry-run to apply changes.")
|
|
else:
|
|
print("\n Done! Remember to:")
|
|
print(" 1. Run linkml-validate to verify schema integrity")
|
|
print(" 2. Update manifest.json")
|
|
print(" 3. Commit changes")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|