- Added `fix_dual_class_link.py` to remove dual class link references from specified YAML files. - Created `fix_specific_ghosts.py` to apply specific replacements in YAML files based on defined mappings. - Introduced `migrate_staff_count.py` to migrate staff count references to a new structure in specified YAML files. - Developed `migrate_type_slots.py` to replace type-related slots with new identifiers across YAML files. - Implemented `scan_ghost_references.py` to identify and report ghost references to archived slots and classes in YAML files. - Added `verify_ontology_terms.py` to verify the presence of ontology terms in specified ontology files against schema definitions.
132 lines
5 KiB
Python
132 lines
5 KiB
Python
import os
|
|
import re
|
|
|
|
SCHEMA_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/"
|
|
|
|
REPLACEMENTS = {
|
|
"StorageConditionPolicy.yaml": {
|
|
"has_or_had_access_restriction": "has_or_had_policy",
|
|
"imports": {"has_or_had_access_restriction": "has_or_had_policy", "en": None}, # en is archived too
|
|
"range_overrides": {"has_or_had_policy": "AccessPolicy"}
|
|
},
|
|
"VereinsarchivRecordSetType.yaml": {
|
|
"geographic_restriction": "is_or_was_applicable_in",
|
|
"imports": {"geographic_restriction": "is_or_was_applicable_in", "Location": "../classes/Location"},
|
|
"range_overrides": {"is_or_was_applicable_in": "Location"}
|
|
},
|
|
"VideoPost.yaml": {
|
|
"available_caption_languages": "has_available_caption_language",
|
|
"imports": {"available_caption_languages": "has_available_caption_language"}
|
|
},
|
|
"RegistrationAuthority.yaml": {
|
|
"website": "has_or_had_url",
|
|
"imports": {"website": "has_or_had_url", "URL": "../classes/URL", "en": None, "de": None, "description": "has_or_had_description"}
|
|
},
|
|
"EncompassingBodyTypes.yaml": {
|
|
"website": "has_or_had_url",
|
|
"founding_date": "begin_of_the_begin",
|
|
"programme_period": "has_or_had_time_interval",
|
|
"funding_focus": "has_or_had_focus",
|
|
"funding_scheme": "has_or_had_scheme",
|
|
"funding_source": "has_or_had_source",
|
|
"fiscal_year_start": "begin_of_the_begin", # approximated
|
|
"fiscal_year_end": "end_of_the_end", # approximated
|
|
"imports": {
|
|
"website": "has_or_had_url",
|
|
"founding_date": "begin_of_the_begin",
|
|
"programme_period": "has_or_had_time_interval",
|
|
"funding_focus": "has_or_had_focus",
|
|
"funding_scheme": "has_or_had_scheme",
|
|
"funding_source": "has_or_had_source",
|
|
"en": None,
|
|
"de": None
|
|
}
|
|
},
|
|
"GoogleMapsEnrichment.yaml": {
|
|
"website": "has_or_had_url",
|
|
"imports": {"website": "has_or_had_url"}
|
|
},
|
|
"CustodianTimelineEvent.yaml": {
|
|
"extraction_method": "has_or_had_method",
|
|
"extraction_note": "has_or_had_note",
|
|
"approximation_level": "has_or_had_status", # ApproximationStatus
|
|
"imports": {
|
|
"extraction_method": "has_or_had_method",
|
|
"extraction_note": "has_or_had_note",
|
|
"approximation_level": "has_or_had_status",
|
|
"en": None,
|
|
"de": None,
|
|
"has_archive_path": "has_or_had_file_path"
|
|
}
|
|
},
|
|
"WebObservation.yaml": {
|
|
"extraction_note": "has_or_had_note",
|
|
"topic": "has_or_had_topic",
|
|
"imports": {"extraction_note": "has_or_had_note", "topic": "has_or_had_topic"}
|
|
}
|
|
}
|
|
|
|
def apply_fixes(filename, fixes):
|
|
filepath = os.path.join(SCHEMA_DIR, filename)
|
|
if not os.path.exists(filepath):
|
|
print(f"File not found: {filename}")
|
|
return
|
|
|
|
print(f"Processing {filename}...")
|
|
try:
|
|
with open(filepath, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
new_lines = []
|
|
for line in lines:
|
|
original_line = line
|
|
stripped = line.strip()
|
|
|
|
# Handle imports
|
|
if stripped.startswith("- ../slots/") or stripped.startswith("- "):
|
|
import_match = False
|
|
for old, new in fixes.get("imports", {}).items():
|
|
if f"/{old}" in stripped or stripped == f"- {old}":
|
|
if new is None:
|
|
import_match = True # Skip line (delete)
|
|
break
|
|
|
|
# Replace import path
|
|
if "../slots/" in stripped:
|
|
new_lines.append(line.replace(f"/{old}", f"/{new}"))
|
|
else:
|
|
new_lines.append(line.replace(old, new))
|
|
import_match = True
|
|
break
|
|
if import_match:
|
|
continue
|
|
|
|
# Handle slot list items and usage keys
|
|
replaced = False
|
|
for old, new in fixes.items():
|
|
if old in ["imports", "range_overrides"]: continue
|
|
|
|
# Slot list item
|
|
if stripped == f"- {old}":
|
|
new_lines.append(line.replace(old, new))
|
|
replaced = True
|
|
break
|
|
|
|
# Slot usage key
|
|
if stripped.startswith(f"{old}:"):
|
|
new_lines.append(line.replace(f"{old}:", f"{new}:"))
|
|
replaced = True
|
|
break
|
|
|
|
if not replaced:
|
|
new_lines.append(original_line)
|
|
|
|
with open(filepath, 'w') as f:
|
|
f.writelines(new_lines)
|
|
|
|
except Exception as e:
|
|
print(f"Error processing {filename}: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
for f, fixes in REPLACEMENTS.items():
|
|
apply_fixes(f, fixes)
|