272 lines
10 KiB
TypeScript
272 lines
10 KiB
TypeScript
#!/usr/bin/env npx ts-node
|
|
/**
|
|
* Test script to verify all ontology files parse correctly
|
|
* Run with: npx ts-node scripts/test-ontology-parsing.ts
|
|
*/
|
|
|
|
import { readFileSync, existsSync } from 'fs';
|
|
import { join, dirname } from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = dirname(__filename);
|
|
|
|
// Minimal implementation of parsing logic for testing
|
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
const NAMESPACES: Record<string, string> = {
|
|
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
|
|
owl: 'http://www.w3.org/2002/07/owl#',
|
|
xsd: 'http://www.w3.org/2001/XMLSchema#',
|
|
dc: 'http://purl.org/dc/elements/1.1/',
|
|
dcterms: 'http://purl.org/dc/terms/',
|
|
skos: 'http://www.w3.org/2004/02/skos/core#',
|
|
foaf: 'http://xmlns.com/foaf/0.1/',
|
|
schema: 'http://schema.org/',
|
|
prov: 'http://www.w3.org/ns/prov#',
|
|
org: 'http://www.w3.org/ns/org#',
|
|
};
|
|
|
|
interface OntologyFile {
|
|
name: string;
|
|
path: string;
|
|
format: 'ttl' | 'rdf' | 'owl' | 'jsonld' | 'csv';
|
|
category: string;
|
|
description?: string;
|
|
}
|
|
|
|
interface TestResult {
|
|
name: string;
|
|
path: string;
|
|
success: boolean;
|
|
classCount: number;
|
|
propertyCount: number;
|
|
error?: string;
|
|
warnings: string[];
|
|
}
|
|
|
|
// Copy of ONTOLOGY_FILES from ontology-loader.ts
|
|
const ONTOLOGY_FILES: OntologyFile[] = [
|
|
{ name: 'PROV-O', path: 'prov-o.rdf', format: 'rdf', category: 'top-level' },
|
|
{ name: 'PROV', path: 'prov.ttl', format: 'ttl', category: 'top-level' },
|
|
{ name: 'SKOS', path: 'skos.rdf', format: 'rdf', category: 'top-level' },
|
|
{ name: 'FOAF', path: 'foaf.ttl', format: 'ttl', category: 'top-level' },
|
|
{ name: 'Dublin Core Elements', path: 'dublin_core_elements.rdf', format: 'rdf', category: 'top-level' },
|
|
{ name: 'Schema.org', path: 'schemaorg.owl', format: 'owl', category: 'top-level' },
|
|
{ name: 'ORG Ontology', path: 'org.rdf', format: 'rdf', category: 'top-level' },
|
|
{ name: 'DCAT 3', path: 'dcat3.ttl', format: 'ttl', category: 'top-level' },
|
|
{ name: 'CIDOC-CRM v7.1.3', path: 'CIDOC_CRM_v7.1.3.rdf', format: 'rdf', category: 'domain' },
|
|
{ name: 'RiC-O 1.1', path: 'RiC-O_1-1.rdf', format: 'rdf', category: 'domain' },
|
|
{ name: 'BIBFRAME', path: 'bibframe.rdf', format: 'rdf', category: 'domain' },
|
|
{ name: 'PREMIS 3', path: 'premis3.owl', format: 'owl', category: 'domain' },
|
|
{ name: 'CRMgeo', path: 'CRMgeo_v1_2.rdfs', format: 'rdf', category: 'domain' },
|
|
{ name: 'PiCo', path: 'pico.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'TOOI', path: 'tooiont.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'OASIS', path: 'oasis.owl', format: 'owl', category: 'domain' },
|
|
{ name: 'OMRSE', path: 'omrse.owl', format: 'owl', category: 'domain' },
|
|
{ name: 'ERA Ontology', path: 'era_ontology.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'EBG Ontology', path: 'ebg-ontology.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'FIBO', path: 'fibo.rdf', format: 'rdf', category: 'domain' },
|
|
{ name: 'GLEIF Base', path: 'gleif_base.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'GLEIF L1', path: 'gleif_l1.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'GLEIF L2', path: 'gleif_l2.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'GLEIF Legal Form', path: 'gleif_legal_form.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'GLEIF RA', path: 'gleif_ra.ttl', format: 'ttl', category: 'domain' },
|
|
{ name: 'GEO', path: 'geo.ttl', format: 'ttl', category: 'utility' },
|
|
{ name: 'TIME', path: 'time.rdf', format: 'rdf', category: 'utility' },
|
|
{ name: 'VCard', path: 'vcard.rdf', format: 'rdf', category: 'utility' },
|
|
{ name: 'PAV', path: 'pav.rdf', format: 'rdf', category: 'utility' },
|
|
{ name: 'DOAP', path: 'doap.rdf', format: 'rdf', category: 'utility' },
|
|
{ name: 'Hydra', path: 'hydra_cg.jsonld', format: 'jsonld', category: 'utility' },
|
|
{ name: 'RegOrg', path: 'regorg.ttl', format: 'ttl', category: 'utility' },
|
|
{ name: 'CPOV (Core Public Org)', path: 'core-public-organisation-ap.ttl', format: 'ttl', category: 'application' },
|
|
{ name: 'DBpedia Classes Sample', path: 'dbpedia_classes_sample.ttl', format: 'ttl', category: 'mapping' },
|
|
{ name: 'DBpedia Heritage Classes', path: 'dbpedia_heritage_classes.ttl', format: 'ttl', category: 'mapping' },
|
|
{ name: 'DBpedia-Wikidata Mappings', path: 'dbpedia_wikidata_mappings.ttl', format: 'ttl', category: 'mapping' },
|
|
{ name: 'WOD Thing', path: 'wod_thing.ttl', format: 'ttl', category: 'mapping' },
|
|
];
|
|
|
|
const ONTOLOGY_DIR = join(__dirname, '../../data/ontology');
|
|
|
|
function detectFormat(content: string, path: string): string {
|
|
const ext = path.split('.').pop()?.toLowerCase() || '';
|
|
const trimmedContent = content.trim();
|
|
|
|
if (trimmedContent.startsWith('{') || trimmedContent.startsWith('[')) {
|
|
return 'jsonld';
|
|
}
|
|
|
|
if (trimmedContent.startsWith('@prefix') ||
|
|
trimmedContent.startsWith('PREFIX') ||
|
|
trimmedContent.startsWith('@base') ||
|
|
trimmedContent.startsWith('# baseURI') ||
|
|
trimmedContent.startsWith('# generated from')) {
|
|
return 'ttl';
|
|
}
|
|
|
|
if (trimmedContent.startsWith('<?xml') ||
|
|
trimmedContent.startsWith('<rdf:RDF') ||
|
|
trimmedContent.startsWith('<RDF') ||
|
|
trimmedContent.includes('xmlns:rdf=')) {
|
|
return 'rdf';
|
|
}
|
|
|
|
if (ext === 'rdfs') return 'rdf';
|
|
return ext;
|
|
}
|
|
|
|
function countClassesAndProperties(content: string, format: string): { classes: number; properties: number; warnings: string[] } {
|
|
const warnings: string[] = [];
|
|
let classes = 0;
|
|
let properties = 0;
|
|
|
|
if (format === 'ttl') {
|
|
// Count Turtle classes
|
|
const classMatches = content.match(/rdf:type\s+(rdfs:Class|owl:Class)|a\s+(rdfs:Class|owl:Class)/g);
|
|
classes = classMatches?.length || 0;
|
|
|
|
const propMatches = content.match(/rdf:type\s+(rdf:Property|owl:ObjectProperty|owl:DatatypeProperty|owl:AnnotationProperty)|a\s+(rdf:Property|owl:ObjectProperty|owl:DatatypeProperty|owl:AnnotationProperty)/g);
|
|
properties = propMatches?.length || 0;
|
|
} else if (format === 'rdf' || format === 'owl') {
|
|
// Count XML classes
|
|
const classMatches = content.match(/<(owl:Class|rdfs:Class)[^>]*>/g);
|
|
classes = classMatches?.length || 0;
|
|
|
|
const propMatches = content.match(/<(rdf:Property|owl:ObjectProperty|owl:DatatypeProperty|owl:AnnotationProperty)[^>]*>/g);
|
|
properties = propMatches?.length || 0;
|
|
} else if (format === 'jsonld') {
|
|
try {
|
|
const json = JSON.parse(content);
|
|
const items = json['@graph'] || json.defines || (Array.isArray(json) ? json : [json]);
|
|
|
|
for (const item of items) {
|
|
const types = Array.isArray(item['@type']) ? item['@type'] : [item['@type']];
|
|
for (const t of types) {
|
|
if (t?.includes('Class')) classes++;
|
|
if (t?.includes('Property')) properties++;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
warnings.push(`JSON-LD parse error: ${(e as Error).message}`);
|
|
}
|
|
}
|
|
|
|
if (classes === 0 && properties === 0) {
|
|
warnings.push('No classes or properties detected - may need parser improvements');
|
|
}
|
|
|
|
return { classes, properties, warnings };
|
|
}
|
|
|
|
function testOntology(file: OntologyFile): TestResult {
|
|
const filePath = join(ONTOLOGY_DIR, file.path);
|
|
const result: TestResult = {
|
|
name: file.name,
|
|
path: file.path,
|
|
success: false,
|
|
classCount: 0,
|
|
propertyCount: 0,
|
|
warnings: [],
|
|
};
|
|
|
|
// Check if file exists
|
|
if (!existsSync(filePath)) {
|
|
result.error = 'File not found';
|
|
return result;
|
|
}
|
|
|
|
try {
|
|
const content = readFileSync(filePath, 'utf-8');
|
|
|
|
// Check for placeholder content
|
|
if (content.trim() === "Sorry can't find that!") {
|
|
result.error = 'File contains placeholder content';
|
|
return result;
|
|
}
|
|
|
|
// Detect format
|
|
const detectedFormat = detectFormat(content, file.path);
|
|
if (detectedFormat !== file.format && detectedFormat !== 'owl') {
|
|
result.warnings.push(`Format mismatch: declared ${file.format}, detected ${detectedFormat}`);
|
|
}
|
|
|
|
// Count classes and properties
|
|
const counts = countClassesAndProperties(content, detectedFormat);
|
|
result.classCount = counts.classes;
|
|
result.propertyCount = counts.properties;
|
|
result.warnings.push(...counts.warnings);
|
|
|
|
result.success = true;
|
|
} catch (e) {
|
|
result.error = (e as Error).message;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
function main() {
|
|
console.log('Testing Ontology Parsing\n');
|
|
console.log('=' .repeat(80));
|
|
|
|
const results: TestResult[] = [];
|
|
let passed = 0;
|
|
let failed = 0;
|
|
let warnings = 0;
|
|
|
|
for (const file of ONTOLOGY_FILES) {
|
|
const result = testOntology(file);
|
|
results.push(result);
|
|
|
|
if (result.success) {
|
|
passed++;
|
|
if (result.warnings.length > 0) warnings++;
|
|
} else {
|
|
failed++;
|
|
}
|
|
}
|
|
|
|
// Print results by category
|
|
const categories = ['top-level', 'domain', 'utility', 'application', 'mapping'];
|
|
|
|
for (const category of categories) {
|
|
const catResults = results.filter(r =>
|
|
ONTOLOGY_FILES.find(f => f.path === r.path)?.category === category
|
|
);
|
|
|
|
if (catResults.length === 0) continue;
|
|
|
|
console.log(`\n## ${category.toUpperCase()} ONTOLOGIES\n`);
|
|
|
|
for (const result of catResults) {
|
|
const status = result.success ? '✅' : '❌';
|
|
const counts = result.success ? `(${result.classCount} classes, ${result.propertyCount} properties)` : '';
|
|
console.log(`${status} ${result.name} ${counts}`);
|
|
|
|
if (result.error) {
|
|
console.log(` ERROR: ${result.error}`);
|
|
}
|
|
|
|
for (const warning of result.warnings) {
|
|
console.log(` ⚠️ ${warning}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
console.log('\n' + '=' .repeat(80));
|
|
console.log('\n## SUMMARY\n');
|
|
console.log(`Total: ${results.length}`);
|
|
console.log(`Passed: ${passed}`);
|
|
console.log(`Failed: ${failed}`);
|
|
console.log(`With Warnings: ${warnings}`);
|
|
|
|
// Calculate totals
|
|
const totalClasses = results.reduce((sum, r) => sum + r.classCount, 0);
|
|
const totalProperties = results.reduce((sum, r) => sum + r.propertyCount, 0);
|
|
console.log(`\nTotal Classes Found: ${totalClasses}`);
|
|
console.log(`Total Properties Found: ${totalProperties}`);
|
|
|
|
process.exit(failed > 0 ? 1 : 0);
|
|
}
|
|
|
|
main();
|