- Updated imports in FindingAid.yaml to remove unnecessary entries and added new slots for arrangement level and provenance path. - Replaced 'full_name' with 'has_or_had_label' in LegalName.yaml and ProfileData.yaml for uniformity. - Enhanced slot definitions in various YAML files, including ceases_or_ceased_through, has_or_had_arrangement_level, has_or_had_assessment, and others, to include metadata and improve structure. - Removed the script fix_linkml_metadata.py as it is no longer needed. - Added new script fix_specific_dead_links.py to handle specific mapping updates for extraction metadata and full name fields across multiple YAML files.
93 lines
3.7 KiB
Python
93 lines
3.7 KiB
Python
import os
|
|
import re
|
|
import glob
|
|
|
|
ARCHIVE_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/slots/archive/"
|
|
CLASSES_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/"
|
|
|
|
def get_archived_slot_names():
|
|
slots = set()
|
|
files = os.listdir(ARCHIVE_DIR)
|
|
for f in files:
|
|
if not f.endswith(".yaml"):
|
|
continue
|
|
# Filename pattern: name.yaml or name_archived_YYYYMMDD.yaml
|
|
name = f[:-5] # remove .yaml
|
|
if "_archived_" in name:
|
|
name = name.split("_archived_")[0]
|
|
slots.add(name)
|
|
return slots
|
|
|
|
def find_references(archived_slots):
|
|
references = {} # {slot_name: [file_paths]}
|
|
|
|
# Metadata keys that mimic slot names but are valid LinkML structure
|
|
# We ignore "Usage as key" for these
|
|
SAFE_METADATA_KEYS = {
|
|
"title", "description", "name", "id", "status", "notes", "comments", "examples",
|
|
"todos", "see_also", "range", "slot_usage", "required", "multivalued",
|
|
"inlined", "identifier", "value", "unit", "prefixes", "imports", "classes",
|
|
"slots", "attributes", "exact_mappings", "close_mappings", "related_mappings"
|
|
}
|
|
|
|
class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml"))
|
|
|
|
for cls_file in class_files:
|
|
with open(cls_file, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
for i, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
|
|
for slot in archived_slots:
|
|
# Import check: "- ../slots/slotname"
|
|
if f"../slots/{slot}" in stripped and not stripped.startswith("#"):
|
|
if slot not in references: references[slot] = []
|
|
references[slot].append(f"{cls_file} (line {i+1}): Import")
|
|
continue
|
|
|
|
# Usage in slots list: "- slotname"
|
|
# Must be exact match to avoid partials
|
|
if stripped == f"- {slot}":
|
|
if slot not in references: references[slot] = []
|
|
references[slot].append(f"{cls_file} (line {i+1}): Usage in slots list")
|
|
continue
|
|
|
|
# Usage as key: "slotname:"
|
|
if stripped.startswith(f"{slot}:"):
|
|
# Check if it's a safe metadata key
|
|
if slot in SAFE_METADATA_KEYS:
|
|
continue
|
|
|
|
# Also, if we are inside a slot_usage block, "slotname:" is valid ONLY IF
|
|
# we are refining that slot. But if the slot is archived, we shouldn't be refining it!
|
|
# So "Usage as key" is actually relevant for slot_usage of archived slots.
|
|
|
|
if slot not in references: references[slot] = []
|
|
references[slot].append(f"{cls_file} (line {i+1}): Usage as key")
|
|
continue
|
|
|
|
return references
|
|
|
|
def main():
|
|
print("Identifying archived slots...")
|
|
slots = get_archived_slot_names()
|
|
print(f"Found {len(slots)} archived slots.")
|
|
|
|
# Filter out common false positives if necessary, but better to check everything
|
|
# 'description' is a common metadata field, but if it's archived as a slot, we should use has_or_had_description
|
|
|
|
print("Scanning class files for references...")
|
|
refs = find_references(slots)
|
|
|
|
if refs:
|
|
print(f"Found {len(refs)} archived slots still referenced in classes:")
|
|
for slot, locations in refs.items():
|
|
print(f"\nSLOT: {slot}")
|
|
for loc in locations:
|
|
print(f" {loc}")
|
|
else:
|
|
print("No dead links found.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|