Summary: - Create 46 missing slot definition files with proper slot_uri values - Add slot imports to main schema (01_custodian_name_modular.yaml) - Fix YAML examples sections in 116+ class and slot files - Fix PersonObservation.yaml examples section (nested objects → string literals) Technical changes: - All slots now have explicit slot_uri mapping to base ontologies (RiC-O, Schema.org, SKOS) - Eliminates malformed URIs like 'custodian/:slot_name' in generated RDF - gen-owl now produces valid Turtle with 153,166 triples New slot files (46): - RiC-O slots: rico_note, rico_organizational_principle, rico_has_or_had_holder, etc. - Scope slots: scope_includes, scope_excludes, archive_scope - Organization slots: organization_type, governance_authority, area_served - Platform slots: platform_type_category, portal_type_category - Social media slots: social_media_platform_category, post_type_* - Type hierarchy slots: broader_type, narrower_types, custodian_type_broader - Wikidata slots: wikidata_equivalent, wikidata_mapping Generated output: - schemas/20251121/rdf/01_custodian_name_modular_20260107_134534_clean.owl.ttl (6.9MB) - Validated with rdflib: 153,166 triples, no malformed URIs
109 lines
3.6 KiB
Python
109 lines
3.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix YAML examples formatting issues in LinkML slot files.
|
|
|
|
The common issue is examples like:
|
|
examples:
|
|
- value: "some content..."
|
|
description: Some description
|
|
|
|
When it should be:
|
|
examples:
|
|
- value: "some content..."
|
|
description: Some description
|
|
|
|
(description should be indented under the list item, not at the same level as "- value:")
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def fix_yaml_file(filepath: Path) -> bool:
|
|
"""Fix YAML formatting issues in a file. Returns True if file was modified."""
|
|
content = filepath.read_text()
|
|
original = content
|
|
|
|
# Pattern 1: Fix description at wrong indentation after value in examples
|
|
# Match: " - value:" followed eventually by " description:" (both at 6 spaces)
|
|
# Should be: " description:" (8 spaces)
|
|
|
|
# This regex finds the examples section and fixes indentation
|
|
lines = content.split('\n')
|
|
fixed_lines = []
|
|
in_examples = False
|
|
in_example_item = False
|
|
example_indent = 0
|
|
|
|
for i, line in enumerate(lines):
|
|
# Detect entering examples section
|
|
if re.match(r'^(\s*)examples:\s*$', line):
|
|
in_examples = True
|
|
match = re.match(r'^(\s*)', line)
|
|
example_indent = len(match.group(1)) if match else 0
|
|
fixed_lines.append(line)
|
|
continue
|
|
|
|
# If we're in examples section
|
|
if in_examples:
|
|
# Check if we've left examples (new section at same or less indent)
|
|
if line.strip() and not line.startswith(' ' * (example_indent + 2)):
|
|
if not line.startswith(' ' * example_indent) or (line.strip() and not line.strip().startswith('-') and ':' in line.split('#')[0]):
|
|
in_examples = False
|
|
fixed_lines.append(line)
|
|
continue
|
|
|
|
# Detect new example item (starts with "- value:")
|
|
item_match = re.match(rf'^(\s{{' + str(example_indent + 2) + r'}})-\s+(value:|description:)', line)
|
|
if item_match:
|
|
in_example_item = True
|
|
fixed_lines.append(line)
|
|
continue
|
|
|
|
# Fix: description at same indent as "- value:" should be indented more
|
|
desc_match = re.match(rf'^(\s{{' + str(example_indent + 2) + r'}})(description:.*)', line)
|
|
if desc_match and in_example_item:
|
|
# Add 2 more spaces to indent description under the list item
|
|
fixed_line = ' ' * (example_indent + 4) + desc_match.group(2)
|
|
fixed_lines.append(fixed_line)
|
|
in_example_item = False
|
|
continue
|
|
|
|
fixed_lines.append(line)
|
|
|
|
content = '\n'.join(fixed_lines)
|
|
|
|
# Pattern 2: Fix multiline value strings that got mangled
|
|
# Look for "- value:" followed by a list that should be indented under value
|
|
content = re.sub(
|
|
r'(\s+- value:)\n(\s+)(- [^\n]+)',
|
|
lambda m: f'{m.group(1)}\n{" " * (len(m.group(2)) + 2)}{m.group(3)}',
|
|
content
|
|
)
|
|
|
|
if content != original:
|
|
filepath.write_text(content)
|
|
return True
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
slots_dir = Path('schemas/20251121/linkml/modules/slots')
|
|
|
|
if not slots_dir.exists():
|
|
print(f"Directory not found: {slots_dir}")
|
|
sys.exit(1)
|
|
|
|
modified = 0
|
|
for yaml_file in sorted(slots_dir.glob('*.yaml')):
|
|
if fix_yaml_file(yaml_file):
|
|
print(f"Fixed: {yaml_file.name}")
|
|
modified += 1
|
|
|
|
print(f"\nModified {modified} files")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|