Summary: - Create 46 missing slot definition files with proper slot_uri values - Add slot imports to main schema (01_custodian_name_modular.yaml) - Fix YAML examples sections in 116+ class and slot files - Fix PersonObservation.yaml examples section (nested objects → string literals) Technical changes: - All slots now have explicit slot_uri mapping to base ontologies (RiC-O, Schema.org, SKOS) - Eliminates malformed URIs like 'custodian/:slot_name' in generated RDF - gen-owl now produces valid Turtle with 153,166 triples New slot files (46): - RiC-O slots: rico_note, rico_organizational_principle, rico_has_or_had_holder, etc. - Scope slots: scope_includes, scope_excludes, archive_scope - Organization slots: organization_type, governance_authority, area_served - Platform slots: platform_type_category, portal_type_category - Social media slots: social_media_platform_category, post_type_* - Type hierarchy slots: broader_type, narrower_types, custodian_type_broader - Wikidata slots: wikidata_equivalent, wikidata_mapping Generated output: - schemas/20251121/rdf/01_custodian_name_modular_20260107_134534_clean.owl.ttl (6.9MB) - Validated with rdflib: 153,166 triples, no malformed URIs
152 lines
4.9 KiB
Python
152 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix YAML indentation issues in examples sections.
|
|
|
|
The ruamel.yaml library introduced a bug where example descriptions
|
|
are not properly indented under the example item:
|
|
|
|
BROKEN:
|
|
examples:
|
|
- value: https://example.com
|
|
description: Some description # WRONG - should be indented
|
|
|
|
CORRECT:
|
|
examples:
|
|
- value: https://example.com
|
|
description: Some description # Correct - indented under value
|
|
|
|
This script fixes the indentation by parsing the file as text and
|
|
correcting the pattern.
|
|
"""
|
|
|
|
import argparse
|
|
import re
|
|
from pathlib import Path
|
|
|
|
|
|
def fix_yaml_examples(content: str) -> tuple[str, int]:
|
|
"""
|
|
Fix indentation issues in examples sections.
|
|
|
|
Returns:
|
|
Tuple of (fixed_content, number_of_fixes)
|
|
"""
|
|
lines = content.split('\n')
|
|
fixed_lines = []
|
|
fixes = 0
|
|
i = 0
|
|
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
|
|
# Check if this is a line with "- value:" in examples context
|
|
if re.match(r'^(\s+)-\s+value:', line):
|
|
fixed_lines.append(line)
|
|
base_indent = len(re.match(r'^(\s+)', line).group(1))
|
|
expected_indent = ' ' * (base_indent + 2) # 2 more spaces for nested under -
|
|
i += 1
|
|
|
|
# Look at following lines that should be indented under this item
|
|
while i < len(lines):
|
|
next_line = lines[i]
|
|
|
|
# Check for misaligned description or other keys at wrong indent
|
|
# Pattern: starts with same indent as "- value:" but has a key like "description:"
|
|
if re.match(rf'^{" " * base_indent}(description|value_description):', next_line):
|
|
# This line is at wrong indentation - fix it
|
|
key_match = re.match(rf'^{" " * base_indent}(\w+):\s*(.*)', next_line)
|
|
if key_match:
|
|
key = key_match.group(1)
|
|
value = key_match.group(2)
|
|
fixed_line = f"{expected_indent}{key}: {value}".rstrip()
|
|
fixed_lines.append(fixed_line)
|
|
fixes += 1
|
|
i += 1
|
|
continue
|
|
|
|
# Check if we're still in the same example item (continuation lines)
|
|
# or if we've moved to a new item/section
|
|
if next_line.strip() == '':
|
|
fixed_lines.append(next_line)
|
|
i += 1
|
|
continue
|
|
|
|
# If we hit another "- value:" or a line at lower/same indent that's a new section, break
|
|
if re.match(r'^\s+-\s+value:', next_line) or \
|
|
(next_line.strip() and not next_line.startswith(' ' * (base_indent + 1))):
|
|
break
|
|
|
|
# Otherwise keep the line as-is and continue
|
|
fixed_lines.append(next_line)
|
|
i += 1
|
|
else:
|
|
fixed_lines.append(line)
|
|
i += 1
|
|
|
|
return '\n'.join(fixed_lines), fixes
|
|
|
|
|
|
def fix_yaml_file(filepath: Path, dry_run: bool = False, verbose: bool = False) -> int:
|
|
"""Fix a single YAML file. Returns number of fixes made."""
|
|
content = filepath.read_text()
|
|
fixed_content, fixes = fix_yaml_examples(content)
|
|
|
|
if fixes > 0:
|
|
if verbose:
|
|
print(f" {filepath.name}: {fixes} fix(es)")
|
|
if not dry_run:
|
|
filepath.write_text(fixed_content)
|
|
|
|
return fixes
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Fix YAML examples indentation issues")
|
|
parser.add_argument(
|
|
"--dry-run",
|
|
action="store_true",
|
|
help="Show what would be changed without making changes"
|
|
)
|
|
parser.add_argument(
|
|
"--verbose", "-v",
|
|
action="store_true",
|
|
help="Show detailed output"
|
|
)
|
|
parser.add_argument(
|
|
"--file",
|
|
type=Path,
|
|
help="Fix a single file instead of all slot files"
|
|
)
|
|
parser.add_argument(
|
|
"--slots-dir",
|
|
type=Path,
|
|
default=Path("schemas/20251121/linkml/modules/slots"),
|
|
help="Directory containing slot YAML files"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.file:
|
|
files = [args.file]
|
|
else:
|
|
files = list(args.slots_dir.glob("*.yaml"))
|
|
|
|
total_fixes = 0
|
|
files_fixed = 0
|
|
|
|
print(f"{'[DRY RUN] ' if args.dry_run else ''}Checking {len(files)} files...")
|
|
|
|
for filepath in sorted(files):
|
|
fixes = fix_yaml_file(filepath, dry_run=args.dry_run, verbose=args.verbose)
|
|
if fixes > 0:
|
|
total_fixes += fixes
|
|
files_fixed += 1
|
|
|
|
print(f"\n{'Would fix' if args.dry_run else 'Fixed'} {total_fixes} indentation issue(s) in {files_fixed} file(s)")
|
|
|
|
if args.dry_run and total_fixes > 0:
|
|
print("\nRun without --dry-run to apply fixes.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|