glam/scripts/fix_specific_ghosts.py
kempersc 7cf10084b4 Implement scripts for schema modifications and ontology verification
- Added `fix_dual_class_link.py` to remove dual class link references from specified YAML files.
- Created `fix_specific_ghosts.py` to apply specific replacements in YAML files based on defined mappings.
- Introduced `migrate_staff_count.py` to migrate staff count references to a new structure in specified YAML files.
- Developed `migrate_type_slots.py` to replace type-related slots with new identifiers across YAML files.
- Implemented `scan_ghost_references.py` to identify and report ghost references to archived slots and classes in YAML files.
- Added `verify_ontology_terms.py` to verify the presence of ontology terms in specified ontology files against schema definitions.
2026-01-29 17:10:25 +01:00

132 lines
5 KiB
Python

import os
import re
SCHEMA_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/"
REPLACEMENTS = {
"StorageConditionPolicy.yaml": {
"has_or_had_access_restriction": "has_or_had_policy",
"imports": {"has_or_had_access_restriction": "has_or_had_policy", "en": None}, # en is archived too
"range_overrides": {"has_or_had_policy": "AccessPolicy"}
},
"VereinsarchivRecordSetType.yaml": {
"geographic_restriction": "is_or_was_applicable_in",
"imports": {"geographic_restriction": "is_or_was_applicable_in", "Location": "../classes/Location"},
"range_overrides": {"is_or_was_applicable_in": "Location"}
},
"VideoPost.yaml": {
"available_caption_languages": "has_available_caption_language",
"imports": {"available_caption_languages": "has_available_caption_language"}
},
"RegistrationAuthority.yaml": {
"website": "has_or_had_url",
"imports": {"website": "has_or_had_url", "URL": "../classes/URL", "en": None, "de": None, "description": "has_or_had_description"}
},
"EncompassingBodyTypes.yaml": {
"website": "has_or_had_url",
"founding_date": "begin_of_the_begin",
"programme_period": "has_or_had_time_interval",
"funding_focus": "has_or_had_focus",
"funding_scheme": "has_or_had_scheme",
"funding_source": "has_or_had_source",
"fiscal_year_start": "begin_of_the_begin", # approximated
"fiscal_year_end": "end_of_the_end", # approximated
"imports": {
"website": "has_or_had_url",
"founding_date": "begin_of_the_begin",
"programme_period": "has_or_had_time_interval",
"funding_focus": "has_or_had_focus",
"funding_scheme": "has_or_had_scheme",
"funding_source": "has_or_had_source",
"en": None,
"de": None
}
},
"GoogleMapsEnrichment.yaml": {
"website": "has_or_had_url",
"imports": {"website": "has_or_had_url"}
},
"CustodianTimelineEvent.yaml": {
"extraction_method": "has_or_had_method",
"extraction_note": "has_or_had_note",
"approximation_level": "has_or_had_status", # ApproximationStatus
"imports": {
"extraction_method": "has_or_had_method",
"extraction_note": "has_or_had_note",
"approximation_level": "has_or_had_status",
"en": None,
"de": None,
"has_archive_path": "has_or_had_file_path"
}
},
"WebObservation.yaml": {
"extraction_note": "has_or_had_note",
"topic": "has_or_had_topic",
"imports": {"extraction_note": "has_or_had_note", "topic": "has_or_had_topic"}
}
}
def apply_fixes(filename, fixes):
filepath = os.path.join(SCHEMA_DIR, filename)
if not os.path.exists(filepath):
print(f"File not found: {filename}")
return
print(f"Processing {filename}...")
try:
with open(filepath, 'r') as f:
lines = f.readlines()
new_lines = []
for line in lines:
original_line = line
stripped = line.strip()
# Handle imports
if stripped.startswith("- ../slots/") or stripped.startswith("- "):
import_match = False
for old, new in fixes.get("imports", {}).items():
if f"/{old}" in stripped or stripped == f"- {old}":
if new is None:
import_match = True # Skip line (delete)
break
# Replace import path
if "../slots/" in stripped:
new_lines.append(line.replace(f"/{old}", f"/{new}"))
else:
new_lines.append(line.replace(old, new))
import_match = True
break
if import_match:
continue
# Handle slot list items and usage keys
replaced = False
for old, new in fixes.items():
if old in ["imports", "range_overrides"]: continue
# Slot list item
if stripped == f"- {old}":
new_lines.append(line.replace(old, new))
replaced = True
break
# Slot usage key
if stripped.startswith(f"{old}:"):
new_lines.append(line.replace(f"{old}:", f"{new}:"))
replaced = True
break
if not replaced:
new_lines.append(original_line)
with open(filepath, 'w') as f:
f.writelines(new_lines)
except Exception as e:
print(f"Error processing {filename}: {e}")
if __name__ == "__main__":
for f, fixes in REPLACEMENTS.items():
apply_fixes(f, fixes)