#!/usr/bin/env node /* * Generate a Mermaid UML class diagram from the synced LinkML schemas. * * Output: frontend/public/data/heritage_custodian_ontology.mmd * * The Visualize page loads this file from /data/heritage_custodian_ontology.mmd. */ const fs = require('fs'); const path = require('path'); const SCHEMA_DIR = path.join(__dirname, '../public/schemas/20251121/linkml'); const MANIFEST_PATH = path.join(SCHEMA_DIR, 'manifest.json'); const OUTPUT_PATH = path.join(__dirname, '../public/data/heritage_custodian_ontology.mmd'); function readJson(p) { return JSON.parse(fs.readFileSync(p, 'utf8')); } function readText(p) { return fs.readFileSync(p, 'utf8'); } function ensureDir(p) { const dir = path.dirname(p); if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); } function safeArray(v) { return Array.isArray(v) ? v : []; } function indentOf(line) { const m = line.match(/^ */); return m ? m[0].length : 0; } function isBlankOrComment(line) { const t = line.trim(); return t === '' || t.startsWith('#'); } function stripInlineComment(v) { // Conservative: only strip comments when preceded by whitespace. // This avoids clobbering URLs like http://example.com#fragment. return v.replace(/\s+#.*$/, '').trim(); } function stripQuotes(v) { const s = v.trim(); if ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'"))) { return s.slice(1, -1); } return s; } function collectIndentedBlock(lines, startIndex, startIndent) { const block = [lines[startIndex]]; for (let i = startIndex + 1; i < lines.length; i++) { const line = lines[i]; if (isBlankOrComment(line)) { block.push(line); continue; } const ind = indentOf(line); if (ind <= startIndent) break; block.push(line); } return block; } function findScalarInBlock(blockLines, key, minIndent) { // Returns a single-line scalar value for `${key}: value`. // Ignores folded/literal multi-line scalars (>- / |). for (const line of blockLines) { if (isBlankOrComment(line)) continue; if (indentOf(line) < minIndent) continue; const m = line.match(new RegExp(`^\\s*${key}:\\s*(.+)\\s*$`)); if (!m) continue; const raw = stripInlineComment(m[1]); if (raw === '' || raw === '|' || raw === '>-') continue; return stripQuotes(raw); } return null; } function findListInBlock(blockLines, key, minIndent) { // Parses: // key: // - item // - item2 const out = []; for (let i = 0; i < blockLines.length; i++) { const line = blockLines[i]; if (isBlankOrComment(line)) continue; const ind = indentOf(line); if (ind < minIndent) continue; const keyMatch = line.match(new RegExp(`^\\s*${key}:\\s*$`)); if (!keyMatch) continue; const keyIndent = ind; for (let j = i + 1; j < blockLines.length; j++) { const l = blockLines[j]; if (isBlankOrComment(l)) continue; const li = indentOf(l); if (li <= keyIndent) break; const m = l.match(/^\s*-\s*(.+)\s*$/); if (m) { out.push(stripQuotes(stripInlineComment(m[1]))); } } break; } return out; } function parseSlotUsageOverrides(classBlockLines, minIndent) { // Parses: // slot_usage: // slot_name: // range: SomeClass const overrides = new Map(); for (let i = 0; i < classBlockLines.length; i++) { const line = classBlockLines[i]; if (isBlankOrComment(line)) continue; const ind = indentOf(line); if (ind < minIndent) continue; if (!line.match(/^\s*slot_usage:\s*$/)) continue; const usageIndent = ind; let currentSlot = null; let currentSlotIndent = null; for (let j = i + 1; j < classBlockLines.length; j++) { const l = classBlockLines[j]; if (isBlankOrComment(l)) continue; const li = indentOf(l); if (li <= usageIndent) break; // Slot key line: ` slot_name:` if (li === usageIndent + 2) { const mKey = l.trim().match(/^([^\s:]+):\s*$/); if (mKey) { currentSlot = mKey[1]; currentSlotIndent = li; continue; } } if (currentSlot && currentSlotIndent !== null && li > currentSlotIndent) { const mRange = l.match(/^\s*range:\s*(.+)\s*$/); if (mRange) { const range = stripQuotes(stripInlineComment(mRange[1])); if (range) overrides.set(currentSlot, range); } } } break; } return overrides; } function extractNamedMappingBlock(text, rootKey, itemName) { // Extract the YAML-ish block for `rootKey: { itemName: {...} }` without parsing YAML. // This is resilient to invalid YAML elsewhere in the file. const lines = text.split(/\r?\n/); for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (isBlankOrComment(line)) continue; const rootMatch = line.match(new RegExp(`^\\s*${rootKey}:\\s*$`)); if (!rootMatch) continue; const rootIndent = indentOf(line); for (let j = i + 1; j < lines.length; j++) { const l = lines[j]; if (isBlankOrComment(l)) continue; const li = indentOf(l); if (li <= rootIndent) break; if (li === rootIndent + 2 && l.trim() === `${itemName}:`) { return { blockLines: collectIndentedBlock(lines, j, li), baseIndent: li, }; } } } return null; } function extractAllNamedMappingBlocks(text, rootKey) { // Extract all mapping entry blocks under `rootKey:` // Example: // classes: // Foo: // ... // Bar: // ... const lines = text.split(/\r?\n/); for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (isBlankOrComment(line)) continue; const rootMatch = line.match(new RegExp(`^\\s*${rootKey}:\\s*$`)); if (!rootMatch) continue; const rootIndent = indentOf(line); const blocks = []; for (let j = i + 1; j < lines.length; j++) { const l = lines[j]; if (isBlankOrComment(l)) continue; const li = indentOf(l); if (li <= rootIndent) break; if (li === rootIndent + 2) { const mKey = l.trim().match(/^([^\s:]+):\s*$/); if (mKey) { blocks.push({ name: mKey[1], blockLines: collectIndentedBlock(lines, j, li), baseIndent: li, }); } } } return blocks; } return []; } function main() { if (!fs.existsSync(MANIFEST_PATH)) { console.error(`[generate-ontology-uml] Missing manifest: ${MANIFEST_PATH}`); console.error('[generate-ontology-uml] Run pnpm run generate-manifest first.'); process.exit(1); } const manifest = readJson(MANIFEST_PATH); const classCategory = (manifest.categories || []).find((c) => c.name === 'class'); const slotCategory = (manifest.categories || []).find((c) => c.name === 'slot'); const classFiles = safeArray(classCategory && classCategory.files); const slotFiles = safeArray(slotCategory && slotCategory.files); const slotIndex = new Map(); for (const f of slotFiles) { const fullPath = path.join(SCHEMA_DIR, f.path); if (!fs.existsSync(fullPath)) continue; const text = readText(fullPath); const slotBlocks = extractAllNamedMappingBlocks(text, 'slots'); for (const b of slotBlocks) { const range = findScalarInBlock(b.blockLines, 'range', b.baseIndent + 2); const multivaluedRaw = findScalarInBlock(b.blockLines, 'multivalued', b.baseIndent + 2); const multivalued = multivaluedRaw === 'true' || multivaluedRaw === 'True'; if (!slotIndex.has(b.name)) { slotIndex.set(b.name, { range, multivalued, }); } } } const classIndex = new Map(); for (const f of classFiles) { const fullPath = path.join(SCHEMA_DIR, f.path); if (!fs.existsSync(fullPath)) continue; const text = readText(fullPath); const classBlocks = extractAllNamedMappingBlocks(text, 'classes'); for (const b of classBlocks) { const baseIndent = b.baseIndent; const parent = findScalarInBlock(b.blockLines, 'is_a', baseIndent + 2); const slots = findListInBlock(b.blockLines, 'slots', baseIndent + 2); const overrides = parseSlotUsageOverrides(b.blockLines, baseIndent + 2); if (!classIndex.has(b.name)) { classIndex.set(b.name, { is_a: parent, slots, slot_usage_overrides: overrides, }); } } } const classNames = [...classIndex.keys()].sort((a, b) => a.localeCompare(b)); const lines = []; lines.push('```mermaid'); lines.push('classDiagram'); lines.push(`%% Generated from LinkML manifest: /schemas/20251121/linkml/manifest.json`); if (manifest.generated) { lines.push(`%% Manifest generated: ${manifest.generated}`); } lines.push('direction LR'); for (const c of classNames) { lines.push(`class ${c}`); } const edgeSet = new Set(); const addEdge = (a, b, kind, label) => { const key = `${a}|${b}|${kind}|${label || ''}`; if (edgeSet.has(key)) return; edgeSet.add(key); if (kind === 'inheritance') { lines.push(`${b} <|-- ${a}`); return; } if (kind === 'association') { lines.push(`${a} --> ${b} : ${label}`); } }; for (const [className, classDef] of classIndex.entries()) { const parent = classDef.is_a; if (parent && classIndex.has(parent)) { addEdge(className, parent, 'inheritance'); } const slots = safeArray(classDef.slots); const slotUsageOverrides = classDef.slot_usage_overrides instanceof Map ? classDef.slot_usage_overrides : new Map(); for (const slotName of slots) { const slotDef = slotIndex.get(slotName) || {}; const range = slotUsageOverrides.get(slotName) || slotDef.range; if (!range) continue; if (classIndex.has(range)) { addEdge(className, range, 'association', slotName); } } } lines.push('```'); lines.push(''); ensureDir(OUTPUT_PATH); fs.writeFileSync(OUTPUT_PATH, lines.join('\n'), 'utf8'); console.log(`[generate-ontology-uml] Wrote ${OUTPUT_PATH}`); console.log(`[generate-ontology-uml] Classes: ${classIndex.size}, Slots indexed: ${slotIndex.size}, Edges: ${edgeSet.size}`); } main();