glam/find_cycles.py
kempersc fc405445c6 Refactor and update schema definitions
- Removed obsolete slots: `has_or_had_custodian_observation`, `provider`, and `specificity_annotation`.
- Updated `has_or_had_score` slot to use `SpecificityScore` class and modified its description and examples.
- Added new slots: `end_seconds`, `end_time`, `has_archive_path`, `has_or_had_custodian_name`, `protocol_name`, and `protocol_version`.
- Introduced a script `check_annotation_types.py` to validate the presence and structure of `custodian_types` in YAML files.
- Added a script `update_specificity.py` to automate updates related to `SpecificityAnnotation` to `SpecificityScore`.
2026-02-01 19:55:38 +01:00

89 lines
2.6 KiB
Python

import os
import networkx as nx
def get_imports(path):
imports = []
try:
with open(path, 'r') as f:
lines = f.readlines()
in_imports = False
for line in lines:
stripped = line.strip()
if stripped == "imports:":
in_imports = True
continue
if in_imports:
if not stripped.startswith("-"):
if stripped and not stripped.startswith("#"):
in_imports = False
else:
imp = stripped.lstrip("- ").strip()
imports.append(imp)
except Exception:
pass
return imports
def build_graph(root_dir):
G = nx.DiGraph()
# Map file paths to node names
# Node name: relative path from root_dir or filename if unique
# LinkML imports are relative paths.
# We need to resolve relative paths.
# files: list of (abs_path, rel_path_from_root)
# root_dir is schemas/20251121/linkml
files_map = {} # abs_path -> rel_path
for root, dirs, files in os.walk(root_dir):
for file in files:
if file.endswith(".yaml"):
abs_path = os.path.join(root, file)
rel_path = os.path.relpath(abs_path, root_dir)
files_map[abs_path] = rel_path
G.add_node(rel_path)
for abs_path, rel_path in files_map.items():
imports = get_imports(abs_path)
base_dir = os.path.dirname(abs_path)
for imp in imports:
# imp is relative to abs_path
# resolve it
if imp.startswith("linkml:"):
continue
imp_abs = os.path.normpath(os.path.join(base_dir, imp))
if not imp_abs.endswith(".yaml"):
imp_abs += ".yaml"
if imp_abs in files_map:
target = files_map[imp_abs]
G.add_edge(rel_path, target)
else:
# print(f"Warning: {rel_path} imports {imp} which resolves to {imp_abs} (not found)")
pass
return G
def find_cycles(G):
try:
cycles = nx.simple_cycles(G)
count = 0
for cycle in cycles:
print("Cycle found:")
print(cycle)
count += 1
if count >= 10:
print("Stopping after 10 cycles.")
break
if count == 0:
print("No cycles found.")
except Exception as e:
print(f"Error finding cycles: {e}")
if __name__ == "__main__":
G = build_graph("schemas/20251121/linkml")
find_cycles(G)