diff --git a/frontend/src/pages/Visualize.css b/frontend/src/pages/Visualize.css index b77f9a35b9..2641f7ab2c 100644 --- a/frontend/src/pages/Visualize.css +++ b/frontend/src/pages/Visualize.css @@ -1050,6 +1050,111 @@ cursor: not-allowed; } +/* UML Density Section */ +.uml-density-section { + padding: 1rem 1.5rem; + background: #f0f4ff; + border-bottom: 1px solid #4a7dff; +} + +.uml-density-section h3 { + margin: 0 0 0.5rem 0; + font-size: 0.875rem; + color: #172a59; + font-weight: 600; +} + +.uml-density-desc { + margin: 0 0 0.75rem 0; + font-size: 0.75rem; + color: #666; +} + +.uml-density-options { + display: flex; + flex-direction: column; + gap: 0.5rem; +} + +.uml-density-option { + display: flex; + flex-direction: column; + gap: 0.125rem; + width: 100%; + padding: 0.625rem 0.75rem; + background: white; + border: 1px solid #e0e0e0; + border-radius: 6px; + text-align: left; + cursor: pointer; + transition: all 0.15s ease; +} + +.uml-density-option:hover { + border-color: #4a7dff; + background: #f8f9ff; +} + +.uml-density-option.active { + border-color: #4a7dff; + background: #ebefff; +} + +.uml-density-label { + font-size: 0.8125rem; + font-weight: 500; + color: #172a59; +} + +.uml-density-hint { + font-size: 0.6875rem; + color: #888; +} + +.uml-density-option.active .uml-density-label { + color: #4a7dff; +} + +.uml-density-info { + margin: 0.625rem 0 0 0; + font-size: 0.75rem; + color: #4a7dff; + font-weight: 500; +} + +.uml-module-picker { + margin-top: 0.75rem; +} + +.uml-module-label { + display: block; + margin: 0 0 0.375rem 0; + font-size: 0.75rem; + color: #172a59; + font-weight: 500; +} + +.uml-module-select { + width: 100%; + padding: 0.5rem; + border: 1px solid #c8d4ff; + border-radius: 4px; + background: white; + font-size: 0.8125rem; + color: #172a59; + cursor: pointer; +} + +.uml-module-select:hover { + border-color: #4a7dff; +} + +.uml-module-select:focus { + outline: none; + border-color: #4a7dff; + box-shadow: 0 0 0 2px rgba(74, 125, 255, 0.2); +} + /* Node Info Section */ .node-info-section { padding: 1rem 1.5rem; @@ -2346,6 +2451,64 @@ body:has(.visualize-page.is-mobile .sidebar--mobile:not(.collapsed)) { background: #6b9eff; } +[data-theme="dark"] .uml-density-section { + background: #2d2d4a; + border-bottom-color: #4a7dff; +} + +[data-theme="dark"] .uml-density-section h3 { + color: #e0e0e0; +} + +[data-theme="dark"] .uml-density-desc { + color: #a0a0b0; +} + +[data-theme="dark"] .uml-density-option { + background: #1e1e32; + border-color: #3d3d5c; +} + +[data-theme="dark"] .uml-density-option:hover { + border-color: #4a7dff; + background: #2d2d4a; +} + +[data-theme="dark"] .uml-density-option.active { + border-color: #4a7dff; + background: #2d2d4a; +} + +[data-theme="dark"] .uml-density-label { + color: #e0e0e0; +} + +[data-theme="dark"] .uml-density-hint { + color: #888; +} + +[data-theme="dark"] .uml-density-option.active .uml-density-label { + color: #6b9eff; +} + +[data-theme="dark"] .uml-density-info { + color: #6b9eff; +} + +[data-theme="dark"] .uml-module-label { + color: #e0e0e0; +} + +[data-theme="dark"] .uml-module-select { + background: #1e1e32; + border-color: #3d3d5c; + color: #e0e0e0; +} + +[data-theme="dark"] .uml-module-select:hover { + border-color: #4a7dff; +} + /* Node Info Section */ [data-theme="dark"] .node-info-section { background: #2d2d4a; diff --git a/frontend/src/pages/Visualize.tsx b/frontend/src/pages/Visualize.tsx index 9d568b8974..3ee28dbe7e 100644 --- a/frontend/src/pages/Visualize.tsx +++ b/frontend/src/pages/Visualize.tsx @@ -3,7 +3,7 @@ * Supports both RDF (Turtle, N-Triples) and UML (Mermaid, PlantUML, GraphViz) formats */ -import React, { useState, useCallback, useRef, useEffect } from 'react'; +import React, { useState, useCallback, useRef, useEffect, useMemo } from 'react'; import { useDatabase } from '@/hooks/useDatabase'; import { useRdfParser } from '@/hooks/useRdfParser'; import { useGraphData } from '@/hooks/useGraphData'; @@ -60,6 +60,99 @@ function isAdvancedRdfLayout(layout: string): layout is RdfAdvancedLayoutType { return ['chord', 'radial-tree', 'sankey', 'edge-bundling', 'pack', 'tree', 'sunburst'].includes(layout); } +type UmlDensityMode = 'full' | 'streamlined' | 'module'; +type UmlModuleOption = { id: string; label: string; count: number }; + +function humanizeModuleName(moduleId: string): string { + return moduleId + .replace(/_/g, ' ') + .replace(/\b\w/g, (c) => c.toUpperCase()); +} + +function buildStreamlinedUmlDiagram(diagram: UMLDiagram): UMLDiagram { + if (!diagram || !diagram.nodes || !diagram.links) { + return diagram; + } + + const nodeCount = diagram.nodes.length; + if (nodeCount <= 300) { + return diagram; + } + + const degree = new Map(); + diagram.nodes.forEach((node) => degree.set(node.id, 0)); + + diagram.links.forEach((link) => { + if (!degree.has(link.source) || !degree.has(link.target)) return; + degree.set(link.source, (degree.get(link.source) || 0) + 1); + degree.set(link.target, (degree.get(link.target) || 0) + 1); + }); + + const threshold = nodeCount > 1200 ? 3 : nodeCount > 600 ? 2 : 1; + const keepIds = new Set(); + + degree.forEach((d, id) => { + if (d >= threshold) keepIds.add(id); + }); + + const coreAnchors = ['Custodian', 'CustodianType', 'Organization', 'Person']; + coreAnchors.forEach((id) => { + if (degree.has(id)) keepIds.add(id); + }); + + const nodes = diagram.nodes.filter((node) => keepIds.has(node.id)); + const links = diagram.links.filter((link) => keepIds.has(link.source) && keepIds.has(link.target)); + + if (nodes.length < 50) { + return diagram; + } + + return { + ...diagram, + nodes, + links, + }; +} + +function buildModuleFocusedUmlDiagram(diagram: UMLDiagram, moduleId: string): UMLDiagram { + if (!diagram || !diagram.nodes || !diagram.links) { + return diagram; + } + + if (!moduleId || moduleId === '__all__') { + return diagram; + } + + const primaryNodes = diagram.nodes.filter((node) => (node.module || 'other') === moduleId); + if (primaryNodes.length === 0) { + return diagram; + } + + const keepIds = new Set(primaryNodes.map((n) => n.id)); + + // Include one-hop neighbors for context around the selected module. + diagram.links.forEach((link) => { + if (keepIds.has(link.source)) keepIds.add(link.target); + if (keepIds.has(link.target)) keepIds.add(link.source); + }); + + let nodes = diagram.nodes.filter((node) => keepIds.has(node.id)); + let links = diagram.links.filter((link) => keepIds.has(link.source) && keepIds.has(link.target)); + + // If the result is still too small, keep only direct module internals. + if (nodes.length < 20) { + const primaryIds = new Set(primaryNodes.map((n) => n.id)); + nodes = primaryNodes; + links = diagram.links.filter((link) => primaryIds.has(link.source) && primaryIds.has(link.target)); + } + + return { + ...diagram, + nodes, + links, + }; +} + // Bilingual text object for translations const TEXT = { // Sidebar @@ -90,6 +183,17 @@ const TEXT = { refreshUml: { nl: 'UML Vernieuwen', en: 'Refresh UML' }, refreshRdf: { nl: 'RDF Vernieuwen', en: 'Refresh RDF' }, refreshHint: { nl: 'Haal de nieuwste versie op', en: 'Fetch the latest version' }, + umlDensity: { nl: 'UML Weergavemodus', en: 'UML Display Mode' }, + umlDensityDesc: { nl: 'Kies tussen volledig en gestroomlijnd overzicht', en: 'Choose between full and streamlined overview' }, + umlModeFull: { nl: 'Volledig', en: 'Full' }, + umlModeFullHint: { nl: 'Toon alle klassen en relaties', en: 'Show all classes and relationships' }, + umlModeStreamlined: { nl: 'Gestroomlijnd', en: 'Streamlined' }, + umlModeStreamlinedHint: { nl: 'Toon de belangrijkste, meest verbonden klassen', en: 'Show key, high-connectivity classes' }, + umlModeModule: { nl: 'Module Focus', en: 'Module Focus' }, + umlModeModuleHint: { nl: 'Toon een domein met context', en: 'Show one domain with context' }, + umlModuleSelect: { nl: 'Module', en: 'Module' }, + umlModuleAll: { nl: 'Alle modules', en: 'All modules' }, + umlShowingClasses: { nl: 'klassen zichtbaar', en: 'classes visible' }, // View switcher viewUml: { nl: 'UML Weergave', en: 'UML View' }, @@ -364,6 +468,14 @@ export function Visualize() { } return null; }); + + const [umlDensityMode, setUmlDensityMode] = useState(() => { + const saved = localStorage.getItem('visualize-uml-density-mode'); + return (saved === 'streamlined' || saved === 'module') ? saved : 'full'; + }); + const [selectedUmlModule, setSelectedUmlModule] = useState(() => { + return localStorage.getItem('visualize-uml-module') || '__all__'; + }); // Dropdown state const [exportDropdownOpen, setExportDropdownOpen] = useState(false); @@ -1063,6 +1175,46 @@ export function Visualize() { const hasUmlContent = umlDiagram !== null; const hasContent = hasRdfContent || hasUmlContent; + const umlModuleOptions = useMemo(() => { + if (!umlDiagram) return []; + const counts = new Map(); + umlDiagram.nodes.forEach((node) => { + const moduleId = node.module || 'other'; + counts.set(moduleId, (counts.get(moduleId) || 0) + 1); + }); + + const options: UmlModuleOption[] = [{ id: '__all__', label: t('umlModuleAll'), count: umlDiagram.nodes.length }]; + const sorted = Array.from(counts.entries()).sort((a, b) => { + if (b[1] !== a[1]) return b[1] - a[1]; + return a[0].localeCompare(b[0]); + }); + + sorted.forEach(([id, count]) => { + options.push({ id, label: humanizeModuleName(id), count }); + }); + + return options; + }, [umlDiagram, t]); + + useEffect(() => { + if (!umlModuleOptions.length) return; + if (umlModuleOptions.some((option) => option.id === selectedUmlModule)) return; + + setSelectedUmlModule('__all__'); + localStorage.setItem('visualize-uml-module', '__all__'); + }, [umlModuleOptions, selectedUmlModule]); + + const displayUmlDiagram = useMemo(() => { + if (!umlDiagram) return null; + if (umlDensityMode === 'streamlined') { + return buildStreamlinedUmlDiagram(umlDiagram); + } + if (umlDensityMode === 'module') { + return buildModuleFocusedUmlDiagram(umlDiagram, selectedUmlModule); + } + return umlDiagram; + }, [umlDiagram, umlDensityMode, selectedUmlModule]); + return (
{/* Mobile overlay when sidebar is open */} @@ -1350,6 +1502,71 @@ export function Visualize() {
)} + {hasUmlContent && currentCategory === 'uml' && ( +
+

{t('umlDensity')}

+

{t('umlDensityDesc')}

+
+ + + +
+ {umlDensityMode === 'module' && umlModuleOptions.length > 0 && ( +
+ + +
+ )} + {displayUmlDiagram && umlDiagram && ( +

+ {displayUmlDiagram.nodes.length.toLocaleString()} / {umlDiagram.nodes.length.toLocaleString()} {t('umlShowingClasses')} +

+ )} +
+ )} + {/* Graph Controls (RDF only) */} {hasRdfContent && currentCategory === 'rdf' && ( [a-z][a-z0-9_-]*):(?P[A-Za-z0-9_./-]+)$") + + +def fetch_text(url: str, timeout: int = 60) -> str: + with urllib.request.urlopen(url, timeout=timeout) as resp: + return resp.read().decode("utf-8", errors="ignore") + + +def fetch_bytes(url: str, timeout: int = 60) -> bytes: + with urllib.request.urlopen(url, timeout=timeout) as resp: + return resp.read() + + +def parse_mapping_curies(file_path: Path) -> list[tuple[int, str, str]]: + """Return (line_number, prefix, local) mapping CURIEs from mapping blocks.""" + out: list[tuple[int, str, str]] = [] + lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines() + + in_block = False + block_indent = -1 + + for idx, line in enumerate(lines, start=1): + stripped = line.strip() + indent = len(line) - len(line.lstrip(" ")) + + if not in_block: + if not stripped or stripped.startswith("#"): + continue + if ":" in stripped: + key = stripped.split(":", 1)[0].strip() + if key in MAPPING_KEYS and stripped.endswith(":"): + in_block = True + block_indent = indent + continue + + # Exit mapping block on dedent to same or lower level and non-list content + if stripped and not stripped.startswith("#"): + if indent <= block_indent and not stripped.startswith("-"): + in_block = False + block_indent = -1 + # re-process this line as potential new key + if ":" in stripped: + key = stripped.split(":", 1)[0].strip() + if key in MAPPING_KEYS and stripped.endswith(":"): + in_block = True + block_indent = indent + continue + + if stripped.startswith("-"): + item = stripped[1:].strip() + # remove inline comment + if " #" in item: + item = item.split(" #", 1)[0].strip() + m = CURIE_RE.match(item) + if m: + pfx = m.group("prefix") + local = m.group("local") + out.append((idx, pfx, local)) + + return out + + +def changed_yaml_files(repo_root: Path, scope: Path) -> list[Path]: + """Collect changed and untracked YAML files inside scope.""" + files: set[Path] = set() + + def run(cmd: list[str]) -> list[str]: + try: + out = subprocess.check_output(cmd, cwd=repo_root) + return [x for x in out.decode().splitlines() if x] + except subprocess.CalledProcessError: + return [] + + tracked = run(["git", "diff", "--name-only"]) + untracked = run(["git", "ls-files", "--others", "--exclude-standard"]) + + for rel in tracked + untracked: + if not rel.endswith(".yaml"): + continue + p = (repo_root / rel).resolve() + try: + p.relative_to(scope.resolve()) + except ValueError: + continue + if p.is_file(): + files.add(p) + + return sorted(files) + + +def load_linked_art_terms() -> tuple[set[str], set[str]]: + xml_data = fetch_bytes("https://linked.art/ns/terms/") + root = ET.fromstring(xml_data) + ns = { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + } + props: set[str] = set() + classes: set[str] = set() + + for p in root.findall("rdf:Property", ns): + uri = p.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", "") + if uri.startswith("https://linked.art/ns/terms/"): + props.add(uri.rsplit("/", 1)[-1]) + for c in root.findall("rdfs:Class", ns): + uri = c.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", "") + if uri.startswith("https://linked.art/ns/terms/"): + classes.add(uri.rsplit("/", 1)[-1]) + + return props, classes + + +def load_rda_ids(path: str, marker: str) -> set[str]: + txt = fetch_text(f"https://www.rdaregistry.info/jsonld/Elements/{path}.jsonld") + return set(re.findall(marker, txt)) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Verify LinkML external mappings") + parser.add_argument( + "files", + nargs="*", + help="YAML files to verify (defaults to changed/untracked files under scope)", + ) + parser.add_argument( + "--scope", + default="schemas/20251121/linkml", + help="Default scope used when no files are provided", + ) + parser.add_argument( + "--all", + action="store_true", + help="Scan all YAML files under --scope (instead of changed/untracked files)", + ) + args = parser.parse_args() + + repo_root = Path(__file__).resolve().parents[1] + scope = (repo_root / args.scope).resolve() + + if args.files: + files = [Path(f).resolve() for f in args.files] + elif args.all: + files = sorted(scope.rglob("*.yaml")) + else: + files = changed_yaml_files(repo_root, scope) + + if not files: + print("No target YAML files found. Nothing to verify.") + return 0 + + occurrences: dict[str, list[tuple[Path, int, str]]] = {} + for file_path in files: + if not file_path.exists() or file_path.suffix != ".yaml": + continue + for line_no, pfx, local in parse_mapping_curies(file_path): + if pfx not in SUPPORTED_PREFIXES: + continue + occurrences.setdefault(pfx, []).append((file_path, line_no, local)) + + if not occurrences: + print("No supported external mapping CURIEs found in selected files.") + return 0 + + failures: list[str] = [] + + la_props: set[str] = set() + la_classes: set[str] = set() + rdac_ids: set[str] = set() + rdau_ids: set[str] = set() + pav_text = "" + + try: + la_props, la_classes = load_linked_art_terms() + except Exception as e: # pragma: no cover - network failures + failures.append(f"[load] Linked Art: {e}") + + try: + rdac_ids = load_rda_ids("c", r"Elements/c/(C\d+)") + except Exception as e: # pragma: no cover + failures.append(f"[load] RDA c.jsonld: {e}") + + try: + rdau_ids = load_rda_ids("u", r"Elements/u/(P\d+)") + except Exception as e: # pragma: no cover + failures.append(f"[load] RDA u.jsonld: {e}") + + try: + pav_text = fetch_text("https://purl.org/pav/2.3") + except Exception as e: # pragma: no cover + failures.append(f"[load] PAV 2.3: {e}") + + print("Verifying mapping CURIEs:") + for prefix in sorted(occurrences): + locals_unique = sorted({x[2] for x in occurrences[prefix]}) + print(f"- {prefix}: {', '.join(locals_unique)}") + + # prefix-specific verification + for file_path, line_no, local in occurrences.get("la", []): + if local not in la_props and local not in la_classes: + failures.append(f"{file_path}:{line_no} la:{local} not found in linked.art/ns/terms") + + for file_path, line_no, local in occurrences.get("rdac", []): + if local not in rdac_ids: + failures.append(f"{file_path}:{line_no} rdac:{local} not found in RDA Elements/c.jsonld") + + for file_path, line_no, local in occurrences.get("rdau", []): + if local not in rdau_ids: + failures.append(f"{file_path}:{line_no} rdau:{local} not found in RDA Elements/u.jsonld") + + for file_path, line_no, local in occurrences.get("pav", []): + if local not in pav_text: + failures.append(f"{file_path}:{line_no} pav:{local} not found in PAV 2.3 ontology") + + for file_path, line_no, local in occurrences.get("ardo", []): + url = f"https://w3id.org/ardo/2.0/{local}" + try: + txt = fetch_text(url) + if local not in txt: + failures.append(f"{file_path}:{line_no} ardo:{local} not found at {url}") + except urllib.error.URLError as e: + failures.append(f"{file_path}:{line_no} ardo:{local} fetch error: {e}") + + for file_path, line_no, local in occurrences.get("pca", []): + url = f"https://rds.posccaesar.org/ontology/plm/rdl/{local}" + try: + txt = fetch_text(url) + if local not in txt: + failures.append(f"{file_path}:{line_no} pca:{local} not found at {url}") + except urllib.error.URLError as e: + failures.append(f"{file_path}:{line_no} pca:{local} fetch error: {e}") + + if failures: + print("\nFAIL") + for f in failures: + print(f"- {f}") + return 1 + + print("\nOK: all checked mapping CURIEs were verified against source ontologies.") + return 0 + + +if __name__ == "__main__": + sys.exit(main())