Add UML density section with display mode options and module picker
This commit is contained in:
parent
ee4f31ea1b
commit
2fd6f491ef
3 changed files with 664 additions and 2 deletions
|
|
@ -1050,6 +1050,111 @@
|
||||||
cursor: not-allowed;
|
cursor: not-allowed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* UML Density Section */
|
||||||
|
.uml-density-section {
|
||||||
|
padding: 1rem 1.5rem;
|
||||||
|
background: #f0f4ff;
|
||||||
|
border-bottom: 1px solid #4a7dff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-section h3 {
|
||||||
|
margin: 0 0 0.5rem 0;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
color: #172a59;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-desc {
|
||||||
|
margin: 0 0 0.75rem 0;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: #666;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-options {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-option {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.125rem;
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.625rem 0.75rem;
|
||||||
|
background: white;
|
||||||
|
border: 1px solid #e0e0e0;
|
||||||
|
border-radius: 6px;
|
||||||
|
text-align: left;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.15s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-option:hover {
|
||||||
|
border-color: #4a7dff;
|
||||||
|
background: #f8f9ff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-option.active {
|
||||||
|
border-color: #4a7dff;
|
||||||
|
background: #ebefff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-label {
|
||||||
|
font-size: 0.8125rem;
|
||||||
|
font-weight: 500;
|
||||||
|
color: #172a59;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-hint {
|
||||||
|
font-size: 0.6875rem;
|
||||||
|
color: #888;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-option.active .uml-density-label {
|
||||||
|
color: #4a7dff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-density-info {
|
||||||
|
margin: 0.625rem 0 0 0;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: #4a7dff;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-module-picker {
|
||||||
|
margin-top: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-module-label {
|
||||||
|
display: block;
|
||||||
|
margin: 0 0 0.375rem 0;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: #172a59;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-module-select {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.5rem;
|
||||||
|
border: 1px solid #c8d4ff;
|
||||||
|
border-radius: 4px;
|
||||||
|
background: white;
|
||||||
|
font-size: 0.8125rem;
|
||||||
|
color: #172a59;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-module-select:hover {
|
||||||
|
border-color: #4a7dff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.uml-module-select:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: #4a7dff;
|
||||||
|
box-shadow: 0 0 0 2px rgba(74, 125, 255, 0.2);
|
||||||
|
}
|
||||||
|
|
||||||
/* Node Info Section */
|
/* Node Info Section */
|
||||||
.node-info-section {
|
.node-info-section {
|
||||||
padding: 1rem 1.5rem;
|
padding: 1rem 1.5rem;
|
||||||
|
|
@ -2346,6 +2451,64 @@ body:has(.visualize-page.is-mobile .sidebar--mobile:not(.collapsed)) {
|
||||||
background: #6b9eff;
|
background: #6b9eff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-section {
|
||||||
|
background: #2d2d4a;
|
||||||
|
border-bottom-color: #4a7dff;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-section h3 {
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-desc {
|
||||||
|
color: #a0a0b0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-option {
|
||||||
|
background: #1e1e32;
|
||||||
|
border-color: #3d3d5c;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-option:hover {
|
||||||
|
border-color: #4a7dff;
|
||||||
|
background: #2d2d4a;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-option.active {
|
||||||
|
border-color: #4a7dff;
|
||||||
|
background: #2d2d4a;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-label {
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-hint {
|
||||||
|
color: #888;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-option.active .uml-density-label {
|
||||||
|
color: #6b9eff;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-density-info {
|
||||||
|
color: #6b9eff;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-module-label {
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-module-select {
|
||||||
|
background: #1e1e32;
|
||||||
|
border-color: #3d3d5c;
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[data-theme="dark"] .uml-module-select:hover {
|
||||||
|
border-color: #4a7dff;
|
||||||
|
}
|
||||||
|
|
||||||
/* Node Info Section */
|
/* Node Info Section */
|
||||||
[data-theme="dark"] .node-info-section {
|
[data-theme="dark"] .node-info-section {
|
||||||
background: #2d2d4a;
|
background: #2d2d4a;
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
* Supports both RDF (Turtle, N-Triples) and UML (Mermaid, PlantUML, GraphViz) formats
|
* Supports both RDF (Turtle, N-Triples) and UML (Mermaid, PlantUML, GraphViz) formats
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import React, { useState, useCallback, useRef, useEffect } from 'react';
|
import React, { useState, useCallback, useRef, useEffect, useMemo } from 'react';
|
||||||
import { useDatabase } from '@/hooks/useDatabase';
|
import { useDatabase } from '@/hooks/useDatabase';
|
||||||
import { useRdfParser } from '@/hooks/useRdfParser';
|
import { useRdfParser } from '@/hooks/useRdfParser';
|
||||||
import { useGraphData } from '@/hooks/useGraphData';
|
import { useGraphData } from '@/hooks/useGraphData';
|
||||||
|
|
@ -60,6 +60,99 @@ function isAdvancedRdfLayout(layout: string): layout is RdfAdvancedLayoutType {
|
||||||
return ['chord', 'radial-tree', 'sankey', 'edge-bundling', 'pack', 'tree', 'sunburst'].includes(layout);
|
return ['chord', 'radial-tree', 'sankey', 'edge-bundling', 'pack', 'tree', 'sunburst'].includes(layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type UmlDensityMode = 'full' | 'streamlined' | 'module';
|
||||||
|
type UmlModuleOption = { id: string; label: string; count: number };
|
||||||
|
|
||||||
|
function humanizeModuleName(moduleId: string): string {
|
||||||
|
return moduleId
|
||||||
|
.replace(/_/g, ' ')
|
||||||
|
.replace(/\b\w/g, (c) => c.toUpperCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildStreamlinedUmlDiagram(diagram: UMLDiagram): UMLDiagram {
|
||||||
|
if (!diagram || !diagram.nodes || !diagram.links) {
|
||||||
|
return diagram;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nodeCount = diagram.nodes.length;
|
||||||
|
if (nodeCount <= 300) {
|
||||||
|
return diagram;
|
||||||
|
}
|
||||||
|
|
||||||
|
const degree = new Map<string, number>();
|
||||||
|
diagram.nodes.forEach((node) => degree.set(node.id, 0));
|
||||||
|
|
||||||
|
diagram.links.forEach((link) => {
|
||||||
|
if (!degree.has(link.source) || !degree.has(link.target)) return;
|
||||||
|
degree.set(link.source, (degree.get(link.source) || 0) + 1);
|
||||||
|
degree.set(link.target, (degree.get(link.target) || 0) + 1);
|
||||||
|
});
|
||||||
|
|
||||||
|
const threshold = nodeCount > 1200 ? 3 : nodeCount > 600 ? 2 : 1;
|
||||||
|
const keepIds = new Set<string>();
|
||||||
|
|
||||||
|
degree.forEach((d, id) => {
|
||||||
|
if (d >= threshold) keepIds.add(id);
|
||||||
|
});
|
||||||
|
|
||||||
|
const coreAnchors = ['Custodian', 'CustodianType', 'Organization', 'Person'];
|
||||||
|
coreAnchors.forEach((id) => {
|
||||||
|
if (degree.has(id)) keepIds.add(id);
|
||||||
|
});
|
||||||
|
|
||||||
|
const nodes = diagram.nodes.filter((node) => keepIds.has(node.id));
|
||||||
|
const links = diagram.links.filter((link) => keepIds.has(link.source) && keepIds.has(link.target));
|
||||||
|
|
||||||
|
if (nodes.length < 50) {
|
||||||
|
return diagram;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...diagram,
|
||||||
|
nodes,
|
||||||
|
links,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildModuleFocusedUmlDiagram(diagram: UMLDiagram, moduleId: string): UMLDiagram {
|
||||||
|
if (!diagram || !diagram.nodes || !diagram.links) {
|
||||||
|
return diagram;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!moduleId || moduleId === '__all__') {
|
||||||
|
return diagram;
|
||||||
|
}
|
||||||
|
|
||||||
|
const primaryNodes = diagram.nodes.filter((node) => (node.module || 'other') === moduleId);
|
||||||
|
if (primaryNodes.length === 0) {
|
||||||
|
return diagram;
|
||||||
|
}
|
||||||
|
|
||||||
|
const keepIds = new Set<string>(primaryNodes.map((n) => n.id));
|
||||||
|
|
||||||
|
// Include one-hop neighbors for context around the selected module.
|
||||||
|
diagram.links.forEach((link) => {
|
||||||
|
if (keepIds.has(link.source)) keepIds.add(link.target);
|
||||||
|
if (keepIds.has(link.target)) keepIds.add(link.source);
|
||||||
|
});
|
||||||
|
|
||||||
|
let nodes = diagram.nodes.filter((node) => keepIds.has(node.id));
|
||||||
|
let links = diagram.links.filter((link) => keepIds.has(link.source) && keepIds.has(link.target));
|
||||||
|
|
||||||
|
// If the result is still too small, keep only direct module internals.
|
||||||
|
if (nodes.length < 20) {
|
||||||
|
const primaryIds = new Set(primaryNodes.map((n) => n.id));
|
||||||
|
nodes = primaryNodes;
|
||||||
|
links = diagram.links.filter((link) => primaryIds.has(link.source) && primaryIds.has(link.target));
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...diagram,
|
||||||
|
nodes,
|
||||||
|
links,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Bilingual text object for translations
|
// Bilingual text object for translations
|
||||||
const TEXT = {
|
const TEXT = {
|
||||||
// Sidebar
|
// Sidebar
|
||||||
|
|
@ -90,6 +183,17 @@ const TEXT = {
|
||||||
refreshUml: { nl: 'UML Vernieuwen', en: 'Refresh UML' },
|
refreshUml: { nl: 'UML Vernieuwen', en: 'Refresh UML' },
|
||||||
refreshRdf: { nl: 'RDF Vernieuwen', en: 'Refresh RDF' },
|
refreshRdf: { nl: 'RDF Vernieuwen', en: 'Refresh RDF' },
|
||||||
refreshHint: { nl: 'Haal de nieuwste versie op', en: 'Fetch the latest version' },
|
refreshHint: { nl: 'Haal de nieuwste versie op', en: 'Fetch the latest version' },
|
||||||
|
umlDensity: { nl: 'UML Weergavemodus', en: 'UML Display Mode' },
|
||||||
|
umlDensityDesc: { nl: 'Kies tussen volledig en gestroomlijnd overzicht', en: 'Choose between full and streamlined overview' },
|
||||||
|
umlModeFull: { nl: 'Volledig', en: 'Full' },
|
||||||
|
umlModeFullHint: { nl: 'Toon alle klassen en relaties', en: 'Show all classes and relationships' },
|
||||||
|
umlModeStreamlined: { nl: 'Gestroomlijnd', en: 'Streamlined' },
|
||||||
|
umlModeStreamlinedHint: { nl: 'Toon de belangrijkste, meest verbonden klassen', en: 'Show key, high-connectivity classes' },
|
||||||
|
umlModeModule: { nl: 'Module Focus', en: 'Module Focus' },
|
||||||
|
umlModeModuleHint: { nl: 'Toon een domein met context', en: 'Show one domain with context' },
|
||||||
|
umlModuleSelect: { nl: 'Module', en: 'Module' },
|
||||||
|
umlModuleAll: { nl: 'Alle modules', en: 'All modules' },
|
||||||
|
umlShowingClasses: { nl: 'klassen zichtbaar', en: 'classes visible' },
|
||||||
|
|
||||||
// View switcher
|
// View switcher
|
||||||
viewUml: { nl: 'UML Weergave', en: 'UML View' },
|
viewUml: { nl: 'UML Weergave', en: 'UML View' },
|
||||||
|
|
@ -365,6 +469,14 @@ export function Visualize() {
|
||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const [umlDensityMode, setUmlDensityMode] = useState<UmlDensityMode>(() => {
|
||||||
|
const saved = localStorage.getItem('visualize-uml-density-mode');
|
||||||
|
return (saved === 'streamlined' || saved === 'module') ? saved : 'full';
|
||||||
|
});
|
||||||
|
const [selectedUmlModule, setSelectedUmlModule] = useState<string>(() => {
|
||||||
|
return localStorage.getItem('visualize-uml-module') || '__all__';
|
||||||
|
});
|
||||||
|
|
||||||
// Dropdown state
|
// Dropdown state
|
||||||
const [exportDropdownOpen, setExportDropdownOpen] = useState<boolean>(false);
|
const [exportDropdownOpen, setExportDropdownOpen] = useState<boolean>(false);
|
||||||
const [layoutDropdownOpen, setLayoutDropdownOpen] = useState<boolean>(false);
|
const [layoutDropdownOpen, setLayoutDropdownOpen] = useState<boolean>(false);
|
||||||
|
|
@ -1063,6 +1175,46 @@ export function Visualize() {
|
||||||
const hasUmlContent = umlDiagram !== null;
|
const hasUmlContent = umlDiagram !== null;
|
||||||
const hasContent = hasRdfContent || hasUmlContent;
|
const hasContent = hasRdfContent || hasUmlContent;
|
||||||
|
|
||||||
|
const umlModuleOptions = useMemo<UmlModuleOption[]>(() => {
|
||||||
|
if (!umlDiagram) return [];
|
||||||
|
const counts = new Map<string, number>();
|
||||||
|
umlDiagram.nodes.forEach((node) => {
|
||||||
|
const moduleId = node.module || 'other';
|
||||||
|
counts.set(moduleId, (counts.get(moduleId) || 0) + 1);
|
||||||
|
});
|
||||||
|
|
||||||
|
const options: UmlModuleOption[] = [{ id: '__all__', label: t('umlModuleAll'), count: umlDiagram.nodes.length }];
|
||||||
|
const sorted = Array.from(counts.entries()).sort((a, b) => {
|
||||||
|
if (b[1] !== a[1]) return b[1] - a[1];
|
||||||
|
return a[0].localeCompare(b[0]);
|
||||||
|
});
|
||||||
|
|
||||||
|
sorted.forEach(([id, count]) => {
|
||||||
|
options.push({ id, label: humanizeModuleName(id), count });
|
||||||
|
});
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}, [umlDiagram, t]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!umlModuleOptions.length) return;
|
||||||
|
if (umlModuleOptions.some((option) => option.id === selectedUmlModule)) return;
|
||||||
|
|
||||||
|
setSelectedUmlModule('__all__');
|
||||||
|
localStorage.setItem('visualize-uml-module', '__all__');
|
||||||
|
}, [umlModuleOptions, selectedUmlModule]);
|
||||||
|
|
||||||
|
const displayUmlDiagram = useMemo(() => {
|
||||||
|
if (!umlDiagram) return null;
|
||||||
|
if (umlDensityMode === 'streamlined') {
|
||||||
|
return buildStreamlinedUmlDiagram(umlDiagram);
|
||||||
|
}
|
||||||
|
if (umlDensityMode === 'module') {
|
||||||
|
return buildModuleFocusedUmlDiagram(umlDiagram, selectedUmlModule);
|
||||||
|
}
|
||||||
|
return umlDiagram;
|
||||||
|
}, [umlDiagram, umlDensityMode, selectedUmlModule]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className={`visualize-page ${isFullscreen ? 'fullscreen-active' : ''} ${isMobile ? 'is-mobile' : ''}`}>
|
<div className={`visualize-page ${isFullscreen ? 'fullscreen-active' : ''} ${isMobile ? 'is-mobile' : ''}`}>
|
||||||
{/* Mobile overlay when sidebar is open */}
|
{/* Mobile overlay when sidebar is open */}
|
||||||
|
|
@ -1350,6 +1502,71 @@ export function Visualize() {
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{hasUmlContent && currentCategory === 'uml' && (
|
||||||
|
<div className="uml-density-section">
|
||||||
|
<h3>{t('umlDensity')}</h3>
|
||||||
|
<p className="uml-density-desc">{t('umlDensityDesc')}</p>
|
||||||
|
<div className="uml-density-options">
|
||||||
|
<button
|
||||||
|
className={`uml-density-option ${umlDensityMode === 'full' ? 'active' : ''}`}
|
||||||
|
onClick={() => {
|
||||||
|
setUmlDensityMode('full');
|
||||||
|
localStorage.setItem('visualize-uml-density-mode', 'full');
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span className="uml-density-label">{t('umlModeFull')}</span>
|
||||||
|
<span className="uml-density-hint">{t('umlModeFullHint')}</span>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className={`uml-density-option ${umlDensityMode === 'streamlined' ? 'active' : ''}`}
|
||||||
|
onClick={() => {
|
||||||
|
setUmlDensityMode('streamlined');
|
||||||
|
localStorage.setItem('visualize-uml-density-mode', 'streamlined');
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span className="uml-density-label">{t('umlModeStreamlined')}</span>
|
||||||
|
<span className="uml-density-hint">{t('umlModeStreamlinedHint')}</span>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className={`uml-density-option ${umlDensityMode === 'module' ? 'active' : ''}`}
|
||||||
|
onClick={() => {
|
||||||
|
setUmlDensityMode('module');
|
||||||
|
localStorage.setItem('visualize-uml-density-mode', 'module');
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<span className="uml-density-label">{t('umlModeModule')}</span>
|
||||||
|
<span className="uml-density-hint">{t('umlModeModuleHint')}</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
{umlDensityMode === 'module' && umlModuleOptions.length > 0 && (
|
||||||
|
<div className="uml-module-picker">
|
||||||
|
<label htmlFor="uml-module-select" className="uml-module-label">{t('umlModuleSelect')}</label>
|
||||||
|
<select
|
||||||
|
id="uml-module-select"
|
||||||
|
className="uml-module-select"
|
||||||
|
value={selectedUmlModule}
|
||||||
|
onChange={(e) => {
|
||||||
|
const next = e.target.value;
|
||||||
|
setSelectedUmlModule(next);
|
||||||
|
localStorage.setItem('visualize-uml-module', next);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{umlModuleOptions.map((option) => (
|
||||||
|
<option key={option.id} value={option.id}>
|
||||||
|
{option.label} ({option.count.toLocaleString()})
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{displayUmlDiagram && umlDiagram && (
|
||||||
|
<p className="uml-density-info">
|
||||||
|
{displayUmlDiagram.nodes.length.toLocaleString()} / {umlDiagram.nodes.length.toLocaleString()} {t('umlShowingClasses')}
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Graph Controls (RDF only) */}
|
{/* Graph Controls (RDF only) */}
|
||||||
{hasRdfContent && currentCategory === 'rdf' && (
|
{hasRdfContent && currentCategory === 'rdf' && (
|
||||||
<GraphControls
|
<GraphControls
|
||||||
|
|
@ -1814,7 +2031,7 @@ export function Visualize() {
|
||||||
{!isLoading && !umlError && hasUmlContent && currentCategory === 'uml' && (
|
{!isLoading && !umlError && hasUmlContent && currentCategory === 'uml' && (
|
||||||
<div className="uml-canvas">
|
<div className="uml-canvas">
|
||||||
<UMLVisualization
|
<UMLVisualization
|
||||||
diagram={umlDiagram!}
|
diagram={displayUmlDiagram || umlDiagram!}
|
||||||
width={1400}
|
width={1400}
|
||||||
height={900}
|
height={900}
|
||||||
layoutType={layoutType}
|
layoutType={layoutType}
|
||||||
|
|
|
||||||
282
scripts/verify_external_mappings.py
Executable file
282
scripts/verify_external_mappings.py
Executable file
|
|
@ -0,0 +1,282 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Verify external ontology mappings used in LinkML YAML files.
|
||||||
|
|
||||||
|
Default behavior targets changed/untracked YAML files under:
|
||||||
|
schemas/20251121/linkml/
|
||||||
|
|
||||||
|
It validates mapping CURIEs under mapping keys:
|
||||||
|
exact_mappings, close_mappings, broad_mappings, narrow_mappings, related_mappings
|
||||||
|
|
||||||
|
Supported prefixes:
|
||||||
|
- la (Linked Art)
|
||||||
|
- rdac (RDA classes)
|
||||||
|
- rdau (RDA unconstrained properties)
|
||||||
|
- pav (PAV 2.3)
|
||||||
|
- ardo (ArDO)
|
||||||
|
- pca (POSC Caesar RDS)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
MAPPING_KEYS = {
|
||||||
|
"exact_mappings",
|
||||||
|
"close_mappings",
|
||||||
|
"broad_mappings",
|
||||||
|
"narrow_mappings",
|
||||||
|
"related_mappings",
|
||||||
|
}
|
||||||
|
SUPPORTED_PREFIXES = {"la", "rdac", "rdau", "pav", "ardo", "pca"}
|
||||||
|
CURIE_RE = re.compile(r"^(?P<prefix>[a-z][a-z0-9_-]*):(?P<local>[A-Za-z0-9_./-]+)$")
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_text(url: str, timeout: int = 60) -> str:
|
||||||
|
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
||||||
|
return resp.read().decode("utf-8", errors="ignore")
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_bytes(url: str, timeout: int = 60) -> bytes:
|
||||||
|
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
||||||
|
return resp.read()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mapping_curies(file_path: Path) -> list[tuple[int, str, str]]:
|
||||||
|
"""Return (line_number, prefix, local) mapping CURIEs from mapping blocks."""
|
||||||
|
out: list[tuple[int, str, str]] = []
|
||||||
|
lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
||||||
|
|
||||||
|
in_block = False
|
||||||
|
block_indent = -1
|
||||||
|
|
||||||
|
for idx, line in enumerate(lines, start=1):
|
||||||
|
stripped = line.strip()
|
||||||
|
indent = len(line) - len(line.lstrip(" "))
|
||||||
|
|
||||||
|
if not in_block:
|
||||||
|
if not stripped or stripped.startswith("#"):
|
||||||
|
continue
|
||||||
|
if ":" in stripped:
|
||||||
|
key = stripped.split(":", 1)[0].strip()
|
||||||
|
if key in MAPPING_KEYS and stripped.endswith(":"):
|
||||||
|
in_block = True
|
||||||
|
block_indent = indent
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exit mapping block on dedent to same or lower level and non-list content
|
||||||
|
if stripped and not stripped.startswith("#"):
|
||||||
|
if indent <= block_indent and not stripped.startswith("-"):
|
||||||
|
in_block = False
|
||||||
|
block_indent = -1
|
||||||
|
# re-process this line as potential new key
|
||||||
|
if ":" in stripped:
|
||||||
|
key = stripped.split(":", 1)[0].strip()
|
||||||
|
if key in MAPPING_KEYS and stripped.endswith(":"):
|
||||||
|
in_block = True
|
||||||
|
block_indent = indent
|
||||||
|
continue
|
||||||
|
|
||||||
|
if stripped.startswith("-"):
|
||||||
|
item = stripped[1:].strip()
|
||||||
|
# remove inline comment
|
||||||
|
if " #" in item:
|
||||||
|
item = item.split(" #", 1)[0].strip()
|
||||||
|
m = CURIE_RE.match(item)
|
||||||
|
if m:
|
||||||
|
pfx = m.group("prefix")
|
||||||
|
local = m.group("local")
|
||||||
|
out.append((idx, pfx, local))
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def changed_yaml_files(repo_root: Path, scope: Path) -> list[Path]:
|
||||||
|
"""Collect changed and untracked YAML files inside scope."""
|
||||||
|
files: set[Path] = set()
|
||||||
|
|
||||||
|
def run(cmd: list[str]) -> list[str]:
|
||||||
|
try:
|
||||||
|
out = subprocess.check_output(cmd, cwd=repo_root)
|
||||||
|
return [x for x in out.decode().splitlines() if x]
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
tracked = run(["git", "diff", "--name-only"])
|
||||||
|
untracked = run(["git", "ls-files", "--others", "--exclude-standard"])
|
||||||
|
|
||||||
|
for rel in tracked + untracked:
|
||||||
|
if not rel.endswith(".yaml"):
|
||||||
|
continue
|
||||||
|
p = (repo_root / rel).resolve()
|
||||||
|
try:
|
||||||
|
p.relative_to(scope.resolve())
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
if p.is_file():
|
||||||
|
files.add(p)
|
||||||
|
|
||||||
|
return sorted(files)
|
||||||
|
|
||||||
|
|
||||||
|
def load_linked_art_terms() -> tuple[set[str], set[str]]:
|
||||||
|
xml_data = fetch_bytes("https://linked.art/ns/terms/")
|
||||||
|
root = ET.fromstring(xml_data)
|
||||||
|
ns = {
|
||||||
|
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||||
|
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
||||||
|
}
|
||||||
|
props: set[str] = set()
|
||||||
|
classes: set[str] = set()
|
||||||
|
|
||||||
|
for p in root.findall("rdf:Property", ns):
|
||||||
|
uri = p.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", "")
|
||||||
|
if uri.startswith("https://linked.art/ns/terms/"):
|
||||||
|
props.add(uri.rsplit("/", 1)[-1])
|
||||||
|
for c in root.findall("rdfs:Class", ns):
|
||||||
|
uri = c.attrib.get("{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", "")
|
||||||
|
if uri.startswith("https://linked.art/ns/terms/"):
|
||||||
|
classes.add(uri.rsplit("/", 1)[-1])
|
||||||
|
|
||||||
|
return props, classes
|
||||||
|
|
||||||
|
|
||||||
|
def load_rda_ids(path: str, marker: str) -> set[str]:
|
||||||
|
txt = fetch_text(f"https://www.rdaregistry.info/jsonld/Elements/{path}.jsonld")
|
||||||
|
return set(re.findall(marker, txt))
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(description="Verify LinkML external mappings")
|
||||||
|
parser.add_argument(
|
||||||
|
"files",
|
||||||
|
nargs="*",
|
||||||
|
help="YAML files to verify (defaults to changed/untracked files under scope)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--scope",
|
||||||
|
default="schemas/20251121/linkml",
|
||||||
|
help="Default scope used when no files are provided",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--all",
|
||||||
|
action="store_true",
|
||||||
|
help="Scan all YAML files under --scope (instead of changed/untracked files)",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
repo_root = Path(__file__).resolve().parents[1]
|
||||||
|
scope = (repo_root / args.scope).resolve()
|
||||||
|
|
||||||
|
if args.files:
|
||||||
|
files = [Path(f).resolve() for f in args.files]
|
||||||
|
elif args.all:
|
||||||
|
files = sorted(scope.rglob("*.yaml"))
|
||||||
|
else:
|
||||||
|
files = changed_yaml_files(repo_root, scope)
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
print("No target YAML files found. Nothing to verify.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
occurrences: dict[str, list[tuple[Path, int, str]]] = {}
|
||||||
|
for file_path in files:
|
||||||
|
if not file_path.exists() or file_path.suffix != ".yaml":
|
||||||
|
continue
|
||||||
|
for line_no, pfx, local in parse_mapping_curies(file_path):
|
||||||
|
if pfx not in SUPPORTED_PREFIXES:
|
||||||
|
continue
|
||||||
|
occurrences.setdefault(pfx, []).append((file_path, line_no, local))
|
||||||
|
|
||||||
|
if not occurrences:
|
||||||
|
print("No supported external mapping CURIEs found in selected files.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
failures: list[str] = []
|
||||||
|
|
||||||
|
la_props: set[str] = set()
|
||||||
|
la_classes: set[str] = set()
|
||||||
|
rdac_ids: set[str] = set()
|
||||||
|
rdau_ids: set[str] = set()
|
||||||
|
pav_text = ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
la_props, la_classes = load_linked_art_terms()
|
||||||
|
except Exception as e: # pragma: no cover - network failures
|
||||||
|
failures.append(f"[load] Linked Art: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
rdac_ids = load_rda_ids("c", r"Elements/c/(C\d+)")
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
failures.append(f"[load] RDA c.jsonld: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
rdau_ids = load_rda_ids("u", r"Elements/u/(P\d+)")
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
failures.append(f"[load] RDA u.jsonld: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
pav_text = fetch_text("https://purl.org/pav/2.3")
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
failures.append(f"[load] PAV 2.3: {e}")
|
||||||
|
|
||||||
|
print("Verifying mapping CURIEs:")
|
||||||
|
for prefix in sorted(occurrences):
|
||||||
|
locals_unique = sorted({x[2] for x in occurrences[prefix]})
|
||||||
|
print(f"- {prefix}: {', '.join(locals_unique)}")
|
||||||
|
|
||||||
|
# prefix-specific verification
|
||||||
|
for file_path, line_no, local in occurrences.get("la", []):
|
||||||
|
if local not in la_props and local not in la_classes:
|
||||||
|
failures.append(f"{file_path}:{line_no} la:{local} not found in linked.art/ns/terms")
|
||||||
|
|
||||||
|
for file_path, line_no, local in occurrences.get("rdac", []):
|
||||||
|
if local not in rdac_ids:
|
||||||
|
failures.append(f"{file_path}:{line_no} rdac:{local} not found in RDA Elements/c.jsonld")
|
||||||
|
|
||||||
|
for file_path, line_no, local in occurrences.get("rdau", []):
|
||||||
|
if local not in rdau_ids:
|
||||||
|
failures.append(f"{file_path}:{line_no} rdau:{local} not found in RDA Elements/u.jsonld")
|
||||||
|
|
||||||
|
for file_path, line_no, local in occurrences.get("pav", []):
|
||||||
|
if local not in pav_text:
|
||||||
|
failures.append(f"{file_path}:{line_no} pav:{local} not found in PAV 2.3 ontology")
|
||||||
|
|
||||||
|
for file_path, line_no, local in occurrences.get("ardo", []):
|
||||||
|
url = f"https://w3id.org/ardo/2.0/{local}"
|
||||||
|
try:
|
||||||
|
txt = fetch_text(url)
|
||||||
|
if local not in txt:
|
||||||
|
failures.append(f"{file_path}:{line_no} ardo:{local} not found at {url}")
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
failures.append(f"{file_path}:{line_no} ardo:{local} fetch error: {e}")
|
||||||
|
|
||||||
|
for file_path, line_no, local in occurrences.get("pca", []):
|
||||||
|
url = f"https://rds.posccaesar.org/ontology/plm/rdl/{local}"
|
||||||
|
try:
|
||||||
|
txt = fetch_text(url)
|
||||||
|
if local not in txt:
|
||||||
|
failures.append(f"{file_path}:{line_no} pca:{local} not found at {url}")
|
||||||
|
except urllib.error.URLError as e:
|
||||||
|
failures.append(f"{file_path}:{line_no} pca:{local} fetch error: {e}")
|
||||||
|
|
||||||
|
if failures:
|
||||||
|
print("\nFAIL")
|
||||||
|
for f in failures:
|
||||||
|
print(f"- {f}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print("\nOK: all checked mapping CURIEs were verified against source ontologies.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Loading…
Reference in a new issue