glam/scripts/fix_dual_class_link.py
kempersc 7cf10084b4 Implement scripts for schema modifications and ontology verification
- Added `fix_dual_class_link.py` to remove dual class link references from specified YAML files.
- Created `fix_specific_ghosts.py` to apply specific replacements in YAML files based on defined mappings.
- Introduced `migrate_staff_count.py` to migrate staff count references to a new structure in specified YAML files.
- Developed `migrate_type_slots.py` to replace type-related slots with new identifiers across YAML files.
- Implemented `scan_ghost_references.py` to identify and report ghost references to archived slots and classes in YAML files.
- Added `verify_ontology_terms.py` to verify the presence of ontology terms in specified ontology files against schema definitions.
2026-01-29 17:10:25 +01:00

75 lines
2.3 KiB
Python

import os
SCHEMA_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/"
DUAL_CLASS_FILES = [
"IconographicArchives.yaml",
"ReligiousArchive.yaml",
"FoundationArchiveRecordSetType.yaml",
"BankArchiveRecordSetType.yaml",
"ArchivesForBuildingRecordsRecordSetType.yaml",
"PressArchive.yaml",
"PhotoArchiveRecordSetType.yaml",
"MuseumArchiveRecordSetType.yaml",
"NotarialArchiveRecordSetType.yaml",
"ProvincialArchive.yaml",
"SectorOfArchivesInSwedenRecordSetType.yaml",
"AudiovisualArchive.yaml",
"MediaArchive.yaml",
"FreeArchive.yaml",
"MuseumArchive.yaml"
]
def remove_dual_class_link(filename):
filepath = os.path.join(SCHEMA_DIR, filename)
if not os.path.exists(filepath):
print(f"File not found: {filename}")
return
print(f"Processing {filename}...")
try:
with open(filepath, 'r') as f:
lines = f.readlines()
new_lines = []
skip_block = False
block_indent = 0
for line in lines:
stripped = line.strip()
# Remove imports
if "dual_class_link" in stripped and stripped.startswith("-"):
# Check if it's an import or a slot list item
# Imports usually look like "- ../slots/dual_class_link"
# Slots list looks like " - dual_class_link"
continue
# Remove references to class
if "DualClassLink" in stripped and stripped.startswith("-"):
continue
# Remove slot usage key
if stripped == "dual_class_link:":
skip_block = True
block_indent = len(line) - len(line.lstrip())
continue
if skip_block:
current_indent = len(line) - len(line.lstrip())
if current_indent > block_indent:
continue
else:
skip_block = False
new_lines.append(line)
with open(filepath, 'w') as f:
f.writelines(new_lines)
except Exception as e:
print(f"Error processing {filename}: {e}")
if __name__ == "__main__":
for f in DUAL_CLASS_FILES:
remove_dual_class_link(f)