Add UML density section with display mode options and module picker
This commit is contained in:
parent
ee4f31ea1b
commit
2fd6f491ef
3 changed files with 664 additions and 2 deletions
|
|
@ -1050,6 +1050,111 @@
|
|||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
/* UML Density Section */
|
||||
.uml-density-section {
|
||||
padding: 1rem 1.5rem;
|
||||
background: #f0f4ff;
|
||||
border-bottom: 1px solid #4a7dff;
|
||||
}
|
||||
|
||||
.uml-density-section h3 {
|
||||
margin: 0 0 0.5rem 0;
|
||||
font-size: 0.875rem;
|
||||
color: #172a59;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.uml-density-desc {
|
||||
margin: 0 0 0.75rem 0;
|
||||
font-size: 0.75rem;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.uml-density-options {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.uml-density-option {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.125rem;
|
||||
width: 100%;
|
||||
padding: 0.625rem 0.75rem;
|
||||
background: white;
|
||||
border: 1px solid #e0e0e0;
|
||||
border-radius: 6px;
|
||||
text-align: left;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s ease;
|
||||
}
|
||||
|
||||
.uml-density-option:hover {
|
||||
border-color: #4a7dff;
|
||||
background: #f8f9ff;
|
||||
}
|
||||
|
||||
.uml-density-option.active {
|
||||
border-color: #4a7dff;
|
||||
background: #ebefff;
|
||||
}
|
||||
|
||||
.uml-density-label {
|
||||
font-size: 0.8125rem;
|
||||
font-weight: 500;
|
||||
color: #172a59;
|
||||
}
|
||||
|
||||
.uml-density-hint {
|
||||
font-size: 0.6875rem;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
.uml-density-option.active .uml-density-label {
|
||||
color: #4a7dff;
|
||||
}
|
||||
|
||||
.uml-density-info {
|
||||
margin: 0.625rem 0 0 0;
|
||||
font-size: 0.75rem;
|
||||
color: #4a7dff;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.uml-module-picker {
|
||||
margin-top: 0.75rem;
|
||||
}
|
||||
|
||||
.uml-module-label {
|
||||
display: block;
|
||||
margin: 0 0 0.375rem 0;
|
||||
font-size: 0.75rem;
|
||||
color: #172a59;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.uml-module-select {
|
||||
width: 100%;
|
||||
padding: 0.5rem;
|
||||
border: 1px solid #c8d4ff;
|
||||
border-radius: 4px;
|
||||
background: white;
|
||||
font-size: 0.8125rem;
|
||||
color: #172a59;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.uml-module-select:hover {
|
||||
border-color: #4a7dff;
|
||||
}
|
||||
|
||||
.uml-module-select:focus {
|
||||
outline: none;
|
||||
border-color: #4a7dff;
|
||||
box-shadow: 0 0 0 2px rgba(74, 125, 255, 0.2);
|
||||
}
|
||||
|
||||
/* Node Info Section */
|
||||
.node-info-section {
|
||||
padding: 1rem 1.5rem;
|
||||
|
|
@ -2346,6 +2451,64 @@ body:has(.visualize-page.is-mobile .sidebar--mobile:not(.collapsed)) {
|
|||
background: #6b9eff;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-section {
|
||||
background: #2d2d4a;
|
||||
border-bottom-color: #4a7dff;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-section h3 {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-desc {
|
||||
color: #a0a0b0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-option {
|
||||
background: #1e1e32;
|
||||
border-color: #3d3d5c;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-option:hover {
|
||||
border-color: #4a7dff;
|
||||
background: #2d2d4a;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-option.active {
|
||||
border-color: #4a7dff;
|
||||
background: #2d2d4a;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-label {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-hint {
|
||||
color: #888;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-option.active .uml-density-label {
|
||||
color: #6b9eff;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-density-info {
|
||||
color: #6b9eff;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-module-label {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-module-select {
|
||||
background: #1e1e32;
|
||||
border-color: #3d3d5c;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .uml-module-select:hover {
|
||||
border-color: #4a7dff;
|
||||
}
|
||||
|
||||
/* Node Info Section */
|
||||
[data-theme="dark"] .node-info-section {
|
||||
background: #2d2d4a;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
* Supports both RDF (Turtle, N-Triples) and UML (Mermaid, PlantUML, GraphViz) formats
|
||||
*/
|
||||
|
||||
import React, { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import React, { useState, useCallback, useRef, useEffect, useMemo } from 'react';
|
||||
import { useDatabase } from '@/hooks/useDatabase';
|
||||
import { useRdfParser } from '@/hooks/useRdfParser';
|
||||
import { useGraphData } from '@/hooks/useGraphData';
|
||||
|
|
@ -60,6 +60,99 @@ function isAdvancedRdfLayout(layout: string): layout is RdfAdvancedLayoutType {
|
|||
return ['chord', 'radial-tree', 'sankey', 'edge-bundling', 'pack', 'tree', 'sunburst'].includes(layout);
|
||||
}
|
||||
|
||||
type UmlDensityMode = 'full' | 'streamlined' | 'module';
|
||||
type UmlModuleOption = { id: string; label: string; count: number };
|
||||
|
||||
function humanizeModuleName(moduleId: string): string {
|
||||
return moduleId
|
||||
.replace(/_/g, ' ')
|
||||
.replace(/\b\w/g, (c) => c.toUpperCase());
|
||||
}
|
||||
|
||||
function buildStreamlinedUmlDiagram(diagram: UMLDiagram): UMLDiagram {
|
||||
if (!diagram || !diagram.nodes || !diagram.links) {
|
||||
return diagram;
|
||||
}
|
||||
|
||||
const nodeCount = diagram.nodes.length;
|
||||
if (nodeCount <= 300) {
|
||||
return diagram;
|
||||
}
|
||||
|
||||
const degree = new Map<string, number>();
|
||||
diagram.nodes.forEach((node) => degree.set(node.id, 0));
|
||||
|
||||
diagram.links.forEach((link) => {
|
||||
if (!degree.has(link.source) || !degree.has(link.target)) return;
|
||||
degree.set(link.source, (degree.get(link.source) || 0) + 1);
|
||||
degree.set(link.target, (degree.get(link.target) || 0) + 1);
|
||||
});
|
||||
|
||||
const threshold = nodeCount > 1200 ? 3 : nodeCount > 600 ? 2 : 1;
|
||||
const keepIds = new Set<string>();
|
||||
|
||||
degree.forEach((d, id) => {
|
||||
if (d >= threshold) keepIds.add(id);
|
||||
});
|
||||
|
||||
const coreAnchors = ['Custodian', 'CustodianType', 'Organization', 'Person'];
|
||||
coreAnchors.forEach((id) => {
|
||||
if (degree.has(id)) keepIds.add(id);
|
||||
});
|
||||
|
||||
const nodes = diagram.nodes.filter((node) => keepIds.has(node.id));
|
||||
const links = diagram.links.filter((link) => keepIds.has(link.source) && keepIds.has(link.target));
|
||||
|
||||
if (nodes.length < 50) {
|
||||
return diagram;
|
||||
}
|
||||
|
||||
return {
|
||||
...diagram,
|
||||
nodes,
|
||||
links,
|
||||
};
|
||||
}
|
||||
|
||||
function buildModuleFocusedUmlDiagram(diagram: UMLDiagram, moduleId: string): UMLDiagram {
|
||||
if (!diagram || !diagram.nodes || !diagram.links) {
|
||||
return diagram;
|
||||
}
|
||||
|
||||
if (!moduleId || moduleId === '__all__') {
|
||||
return diagram;
|
||||
}
|
||||
|
||||
const primaryNodes = diagram.nodes.filter((node) => (node.module || 'other') === moduleId);
|
||||
if (primaryNodes.length === 0) {
|
||||
return diagram;
|
||||
}
|
||||
|
||||
const keepIds = new Set<string>(primaryNodes.map((n) => n.id));
|
||||
|
||||
// Include one-hop neighbors for context around the selected module.
|
||||
diagram.links.forEach((link) => {
|
||||
if (keepIds.has(link.source)) keepIds.add(link.target);
|
||||
if (keepIds.has(link.target)) keepIds.add(link.source);
|
||||
});
|
||||
|
||||
let nodes = diagram.nodes.filter((node) => keepIds.has(node.id));
|
||||
let links = diagram.links.filter((link) => keepIds.has(link.source) && keepIds.has(link.target));
|
||||
|
||||
// If the result is still too small, keep only direct module internals.
|
||||
if (nodes.length < 20) {
|
||||
const primaryIds = new Set(primaryNodes.map((n) => n.id));
|
||||
nodes = primaryNodes;
|
||||
links = diagram.links.filter((link) => primaryIds.has(link.source) && primaryIds.has(link.target));
|
||||
}
|
||||
|
||||
return {
|
||||
...diagram,
|
||||
nodes,
|
||||
links,
|
||||
};
|
||||
}
|
||||
|
||||
// Bilingual text object for translations
|
||||
const TEXT = {
|
||||
// Sidebar
|
||||
|
|
@ -90,6 +183,17 @@ const TEXT = {
|
|||
refreshUml: { nl: 'UML Vernieuwen', en: 'Refresh UML' },
|
||||
refreshRdf: { nl: 'RDF Vernieuwen', en: 'Refresh RDF' },
|
||||
refreshHint: { nl: 'Haal de nieuwste versie op', en: 'Fetch the latest version' },
|
||||
umlDensity: { nl: 'UML Weergavemodus', en: 'UML Display Mode' },
|
||||
umlDensityDesc: { nl: 'Kies tussen volledig en gestroomlijnd overzicht', en: 'Choose between full and streamlined overview' },
|
||||
umlModeFull: { nl: 'Volledig', en: 'Full' },
|
||||
umlModeFullHint: { nl: 'Toon alle klassen en relaties', en: 'Show all classes and relationships' },
|
||||
umlModeStreamlined: { nl: 'Gestroomlijnd', en: 'Streamlined' },
|
||||
umlModeStreamlinedHint: { nl: 'Toon de belangrijkste, meest verbonden klassen', en: 'Show key, high-connectivity classes' },
|
||||
umlModeModule: { nl: 'Module Focus', en: 'Module Focus' },
|
||||
umlModeModuleHint: { nl: 'Toon een domein met context', en: 'Show one domain with context' },
|
||||
umlModuleSelect: { nl: 'Module', en: 'Module' },
|
||||
umlModuleAll: { nl: 'Alle modules', en: 'All modules' },
|
||||
umlShowingClasses: { nl: 'klassen zichtbaar', en: 'classes visible' },
|
||||
|
||||
// View switcher
|
||||
viewUml: { nl: 'UML Weergave', en: 'UML View' },
|
||||
|
|
@ -364,6 +468,14 @@ export function Visualize() {
|
|||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const [umlDensityMode, setUmlDensityMode] = useState<UmlDensityMode>(() => {
|
||||
const saved = localStorage.getItem('visualize-uml-density-mode');
|
||||
return (saved === 'streamlined' || saved === 'module') ? saved : 'full';
|
||||
});
|
||||
const [selectedUmlModule, setSelectedUmlModule] = useState<string>(() => {
|
||||
return localStorage.getItem('visualize-uml-module') || '__all__';
|
||||
});
|
||||
|
||||
// Dropdown state
|
||||
const [exportDropdownOpen, setExportDropdownOpen] = useState<boolean>(false);
|
||||
|
|
@ -1063,6 +1175,46 @@ export function Visualize() {
|
|||
const hasUmlContent = umlDiagram !== null;
|
||||
const hasContent = hasRdfContent || hasUmlContent;
|
||||
|
||||
const umlModuleOptions = useMemo<UmlModuleOption[]>(() => {
|
||||
if (!umlDiagram) return [];
|
||||
const counts = new Map<string, number>();
|
||||
umlDiagram.nodes.forEach((node) => {
|
||||
const moduleId = node.module || 'other';
|
||||
counts.set(moduleId, (counts.get(moduleId) || 0) + 1);
|
||||
});
|
||||
|
||||
const options: UmlModuleOption[] = [{ id: '__all__', label: t('umlModuleAll'), count: umlDiagram.nodes.length }];
|
||||
const sorted = Array.from(counts.entries()).sort((a, b) => {
|
||||
if (b[1] !== a[1]) return b[1] - a[1];
|
||||
return a[0].localeCompare(b[0]);
|
||||
});
|
||||
|
||||
sorted.forEach(([id, count]) => {
|
||||
options.push({ id, label: humanizeModuleName(id), count });
|
||||
});
|
||||
|
||||
return options;
|
||||
}, [umlDiagram, t]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!umlModuleOptions.length) return;
|
||||
if (umlModuleOptions.some((option) => option.id === selectedUmlModule)) return;
|
||||
|
||||
setSelectedUmlModule('__all__');
|
||||
localStorage.setItem('visualize-uml-module', '__all__');
|
||||
}, [umlModuleOptions, selectedUmlModule]);
|
||||
|
||||
const displayUmlDiagram = useMemo(() => {
|
||||
if (!umlDiagram) return null;
|
||||
if (umlDensityMode === 'streamlined') {
|
||||
return buildStreamlinedUmlDiagram(umlDiagram);
|
||||
}
|
||||
if (umlDensityMode === 'module') {
|
||||
return buildModuleFocusedUmlDiagram(umlDiagram, selectedUmlModule);
|
||||
}
|
||||
return umlDiagram;
|
||||
}, [umlDiagram, umlDensityMode, selectedUmlModule]);
|
||||
|
||||
return (
|
||||
<div className={`visualize-page ${isFullscreen ? 'fullscreen-active' : ''} ${isMobile ? 'is-mobile' : ''}`}>
|
||||
{/* Mobile overlay when sidebar is open */}
|
||||
|
|
@ -1350,6 +1502,71 @@ export function Visualize() {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{hasUmlContent && currentCategory === 'uml' && (
|
||||
<div className="uml-density-section">
|
||||
<h3>{t('umlDensity')}</h3>
|
||||
<p className="uml-density-desc">{t('umlDensityDesc')}</p>
|
||||
<div className="uml-density-options">
|
||||
<button
|
||||
className={`uml-density-option ${umlDensityMode === 'full' ? 'active' : ''}`}
|
||||
onClick={() => {
|
||||
setUmlDensityMode('full');
|
||||
localStorage.setItem('visualize-uml-density-mode', 'full');
|
||||
}}
|
||||
>
|
||||
<span className="uml-density-label">{t('umlModeFull')}</span>
|
||||
<span className="uml-density-hint">{t('umlModeFullHint')}</span>
|
||||
</button>
|
||||
<button
|
||||
className={`uml-density-option ${umlDensityMode === 'streamlined' ? 'active' : ''}`}
|
||||
onClick={() => {
|
||||
setUmlDensityMode('streamlined');
|
||||
localStorage.setItem('visualize-uml-density-mode', 'streamlined');
|
||||
}}
|
||||
>
|
||||
<span className="uml-density-label">{t('umlModeStreamlined')}</span>
|
||||
<span className="uml-density-hint">{t('umlModeStreamlinedHint')}</span>
|
||||
</button>
|
||||
<button
|
||||
className={`uml-density-option ${umlDensityMode === 'module' ? 'active' : ''}`}
|
||||
onClick={() => {
|
||||
setUmlDensityMode('module');
|
||||
localStorage.setItem('visualize-uml-density-mode', 'module');
|
||||
}}
|
||||
>
|
||||
<span className="uml-density-label">{t('umlModeModule')}</span>
|
||||
<span className="uml-density-hint">{t('umlModeModuleHint')}</span>
|
||||
</button>
|
||||
</div>
|
||||
{umlDensityMode === 'module' && umlModuleOptions.length > 0 && (
|
||||
<div className="uml-module-picker">
|
||||
<label htmlFor="uml-module-select" className="uml-module-label">{t('umlModuleSelect')}</label>
|
||||
<select
|
||||
id="uml-module-select"
|
||||
className="uml-module-select"
|
||||
value={selectedUmlModule}
|
||||
onChange={(e) => {
|
||||
const next = e.target.value;
|
||||
setSelectedUmlModule(next);
|
||||
localStorage.setItem('visualize-uml-module', next);
|
||||
}}
|
||||
>
|
||||
{umlModuleOptions.map((option) => (
|
||||
<option key={option.id} value={option.id}>
|
||||
{option.label} ({option.count.toLocaleString()})
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
)}
|
||||
{displayUmlDiagram && umlDiagram && (
|
||||
<p className="uml-density-info">
|
||||
{displayUmlDiagram.nodes.length.toLocaleString()} / {umlDiagram.nodes.length.toLocaleString()} {t('umlShowingClasses')}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Graph Controls (RDF only) */}
|
||||
{hasRdfContent && currentCategory === 'rdf' && (
|
||||
<GraphControls
|
||||
|
|
@ -1814,7 +2031,7 @@ export function Visualize() {
|
|||
{!isLoading && !umlError && hasUmlContent && currentCategory === 'uml' && (
|
||||
<div className="uml-canvas">
|
||||
<UMLVisualization
|
||||
diagram={umlDiagram!}
|
||||
diagram={displayUmlDiagram || umlDiagram!}
|
||||
width={1400}
|
||||
height={900}
|
||||
layoutType={layoutType}
|
||||
|
|
|
|||
282
scripts/verify_external_mappings.py
Executable file
282
scripts/verify_external_mappings.py
Executable file
|
|
@ -0,0 +1,282 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Verify external ontology mappings used in LinkML YAML files.
|
||||
|
||||
Default behavior targets changed/untracked YAML files under:
|
||||
schemas/20251121/linkml/
|
||||
|
||||
It validates mapping CURIEs under mapping keys:
|
||||
exact_mappings, close_mappings, broad_mappings, narrow_mappings, related_mappings
|
||||
|
||||
Supported prefixes:
|
||||
- la (Linked Art)
|
||||
- rdac (RDA classes)
|
||||
- rdau (RDA unconstrained properties)
|
||||
- pav (PAV 2.3)
|
||||
- ardo (ArDO)
|
||||
- pca (POSC Caesar RDS)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
MAPPING_KEYS = {
|
||||
"exact_mappings",
|
||||
"close_mappings",
|
||||
"broad_mappings",
|
||||
"narrow_mappings",
|
||||
"related_mappings",
|
||||
}
|
||||
SUPPORTED_PREFIXES = {"la", "rdac", "rdau", "pav", "ardo", "pca"}
|
||||
CURIE_RE = re.compile(r"^(?P<prefix>[a-z][a-z0-9_-]*):(?P<local>[A-Za-z0-9_./-]+)$")
|
||||
|
||||
|
||||
def fetch_text(url: str, timeout: int = 60) -> str:
|
||||
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
||||
return resp.read().decode("utf-8", errors="ignore")
|
||||
|
||||
|
||||
def fetch_bytes(url: str, timeout: int = 60) -> bytes:
|
||||
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
||||
return resp.read()
|
||||
|
||||
|
||||
def parse_mapping_curies(file_path: Path) -> list[tuple[int, str, str]]:
|
||||
"""Return (line_number, prefix, local) mapping CURIEs from mapping blocks."""
|
||||
out: list[tuple[int, str, str]] = []
|
||||
lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||
|
||||
in_block = False
|
||||
block_indent = -1
|
||||
|
||||
for idx, line in enumerate(lines, start=1):
|
||||
stripped = line.strip()
|
||||
indent = len(line) - len(line.lstrip(" "))
|
||||
|
||||
if not in_block:
|
||||
if not stripped or stripped.startswith("#"):
|
||||
continue
|
||||
if ":" in stripped:
|
||||
key = stripped.split(":", 1)[0].strip()
|
||||
if key in MAPPING_KEYS and stripped.endswith(":"):
|
||||
in_block = True
|
||||
block_indent = indent
|
||||
continue
|
||||
|
||||
# Exit mapping block on dedent to same or lower level and non-list content
|
||||
if stripped and not stripped.startswith("#"):
|
||||
if indent <= block_indent and not stripped.startswith("-"):
|
||||
in_block = False
|
||||
block_indent = -1
|
||||
# re-process this line as potential new key
|
||||
if ":" in stripped:
|
||||
key = stripped.split(":", 1)[0].strip()
|
||||
if key in MAPPING_KEYS and stripped.endswith(":"):
|
||||
in_block = True
|
||||
block_indent = indent
|
||||
continue
|
||||
|
||||
if stripped.startswith("-"):
|
||||
item = stripped[1:].strip()
|
||||
# remove inline comment
|
||||
if " #" in item:
|
||||
item = item.split(" #", 1)[0].strip()
|
||||
m = CURIE_RE.match(item)
|
||||
if m:
|
||||
pfx = m.group("prefix")
|
||||
local = m.group("local")
|
||||
out.append((idx, pfx, local))
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def changed_yaml_files(repo_root: Path, scope: Path) -> list[Path]:
|
||||
"""Collect changed and untracked YAML files inside scope."""
|
||||
files: set[Path] = set()
|
||||
|
||||
def run(cmd: list[str]) -> list[str]:
|
||||
try:
|
||||
out = subprocess.check_output(cmd, cwd=repo_root)
|
||||
return [x for x in out.decode().splitlines() if x]
|
||||
except subprocess.CalledProcessError:
|
||||
return []
|
||||
|
||||
tracked = run(["git", "diff", "--name-only"])
|
||||
untracked = run(["git", "ls-files", "--others", "--exclude-standard"])
|
||||
|
||||
for rel in tracked + untracked:
|
||||
if not rel.endswith(".yaml"):
|
||||
continue
|
||||
p = (repo_root / rel).resolve()
|
||||
try:
|
||||
p.relative_to(scope.resolve())
|
||||
except ValueError:
|
||||
continue
|
||||
if p.is_file():
|
||||
files.add(p)
|
||||
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def load_linked_art_terms() -> tuple[set[str], set[str]]:
|
||||
xml_data = fetch_bytes("https://linked.art/ns/terms/")
|
||||
root = ET.fromstring(xml_data)
|
||||
ns = {
|
||||
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
||||
}
|
||||
props: set[str] = set()
|
||||
classes: set[str] = set()
|
||||
|
||||
for p in root.findall("rdf:Property", ns):
|
||||
uri = p.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", "")
|
||||
if uri.startswith("https://linked.art/ns/terms/"):
|
||||
props.add(uri.rsplit("/", 1)[-1])
|
||||
for c in root.findall("rdfs:Class", ns):
|
||||
uri = c.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", "")
|
||||
if uri.startswith("https://linked.art/ns/terms/"):
|
||||
classes.add(uri.rsplit("/", 1)[-1])
|
||||
|
||||
return props, classes
|
||||
|
||||
|
||||
def load_rda_ids(path: str, marker: str) -> set[str]:
|
||||
txt = fetch_text(f"https://www.rdaregistry.info/jsonld/Elements/{path}.jsonld")
|
||||
return set(re.findall(marker, txt))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Verify LinkML external mappings")
|
||||
parser.add_argument(
|
||||
"files",
|
||||
nargs="*",
|
||||
help="YAML files to verify (defaults to changed/untracked files under scope)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--scope",
|
||||
default="schemas/20251121/linkml",
|
||||
help="Default scope used when no files are provided",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--all",
|
||||
action="store_true",
|
||||
help="Scan all YAML files under --scope (instead of changed/untracked files)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
scope = (repo_root / args.scope).resolve()
|
||||
|
||||
if args.files:
|
||||
files = [Path(f).resolve() for f in args.files]
|
||||
elif args.all:
|
||||
files = sorted(scope.rglob("*.yaml"))
|
||||
else:
|
||||
files = changed_yaml_files(repo_root, scope)
|
||||
|
||||
if not files:
|
||||
print("No target YAML files found. Nothing to verify.")
|
||||
return 0
|
||||
|
||||
occurrences: dict[str, list[tuple[Path, int, str]]] = {}
|
||||
for file_path in files:
|
||||
if not file_path.exists() or file_path.suffix != ".yaml":
|
||||
continue
|
||||
for line_no, pfx, local in parse_mapping_curies(file_path):
|
||||
if pfx not in SUPPORTED_PREFIXES:
|
||||
continue
|
||||
occurrences.setdefault(pfx, []).append((file_path, line_no, local))
|
||||
|
||||
if not occurrences:
|
||||
print("No supported external mapping CURIEs found in selected files.")
|
||||
return 0
|
||||
|
||||
failures: list[str] = []
|
||||
|
||||
la_props: set[str] = set()
|
||||
la_classes: set[str] = set()
|
||||
rdac_ids: set[str] = set()
|
||||
rdau_ids: set[str] = set()
|
||||
pav_text = ""
|
||||
|
||||
try:
|
||||
la_props, la_classes = load_linked_art_terms()
|
||||
except Exception as e: # pragma: no cover - network failures
|
||||
failures.append(f"[load] Linked Art: {e}")
|
||||
|
||||
try:
|
||||
rdac_ids = load_rda_ids("c", r"Elements/c/(C\d+)")
|
||||
except Exception as e: # pragma: no cover
|
||||
failures.append(f"[load] RDA c.jsonld: {e}")
|
||||
|
||||
try:
|
||||
rdau_ids = load_rda_ids("u", r"Elements/u/(P\d+)")
|
||||
except Exception as e: # pragma: no cover
|
||||
failures.append(f"[load] RDA u.jsonld: {e}")
|
||||
|
||||
try:
|
||||
pav_text = fetch_text("https://purl.org/pav/2.3")
|
||||
except Exception as e: # pragma: no cover
|
||||
failures.append(f"[load] PAV 2.3: {e}")
|
||||
|
||||
print("Verifying mapping CURIEs:")
|
||||
for prefix in sorted(occurrences):
|
||||
locals_unique = sorted({x[2] for x in occurrences[prefix]})
|
||||
print(f"- {prefix}: {', '.join(locals_unique)}")
|
||||
|
||||
# prefix-specific verification
|
||||
for file_path, line_no, local in occurrences.get("la", []):
|
||||
if local not in la_props and local not in la_classes:
|
||||
failures.append(f"{file_path}:{line_no} la:{local} not found in linked.art/ns/terms")
|
||||
|
||||
for file_path, line_no, local in occurrences.get("rdac", []):
|
||||
if local not in rdac_ids:
|
||||
failures.append(f"{file_path}:{line_no} rdac:{local} not found in RDA Elements/c.jsonld")
|
||||
|
||||
for file_path, line_no, local in occurrences.get("rdau", []):
|
||||
if local not in rdau_ids:
|
||||
failures.append(f"{file_path}:{line_no} rdau:{local} not found in RDA Elements/u.jsonld")
|
||||
|
||||
for file_path, line_no, local in occurrences.get("pav", []):
|
||||
if local not in pav_text:
|
||||
failures.append(f"{file_path}:{line_no} pav:{local} not found in PAV 2.3 ontology")
|
||||
|
||||
for file_path, line_no, local in occurrences.get("ardo", []):
|
||||
url = f"https://w3id.org/ardo/2.0/{local}"
|
||||
try:
|
||||
txt = fetch_text(url)
|
||||
if local not in txt:
|
||||
failures.append(f"{file_path}:{line_no} ardo:{local} not found at {url}")
|
||||
except urllib.error.URLError as e:
|
||||
failures.append(f"{file_path}:{line_no} ardo:{local} fetch error: {e}")
|
||||
|
||||
for file_path, line_no, local in occurrences.get("pca", []):
|
||||
url = f"https://rds.posccaesar.org/ontology/plm/rdl/{local}"
|
||||
try:
|
||||
txt = fetch_text(url)
|
||||
if local not in txt:
|
||||
failures.append(f"{file_path}:{line_no} pca:{local} not found at {url}")
|
||||
except urllib.error.URLError as e:
|
||||
failures.append(f"{file_path}:{line_no} pca:{local} fetch error: {e}")
|
||||
|
||||
if failures:
|
||||
print("\nFAIL")
|
||||
for f in failures:
|
||||
print(f"- {f}")
|
||||
return 1
|
||||
|
||||
print("\nOK: all checked mapping CURIEs were verified against source ontologies.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Reference in a new issue