fix: add @base directive support for Turtle/RDF parsing

The VCard ontology file (and 3 others) use @base directive with relative URIs
like <#Address>. The Turtle parser was not extracting @base or resolving
relative URIs against it.

Changes:
- Extract @base directive in first pass alongside @prefix
- Add baseUri parameter to expandUri() function
- Handle relative URIs starting with # (resolve against base)
- Handle empty relative URI <> (returns base URI itself)
- Pass baseUri through to processSubject() function

This fixes the 'Term not found' error for vcard:Address and similar terms
that use relative URI notation in their ontology definitions.

Affected ontologies: vcard.rdf, prov.ttl, era_ontology.ttl, ebg-ontology.ttl
This commit is contained in:
kempersc 2026-01-13 15:54:29 +01:00
parent f2b10fca19
commit 6781073d06

View file

@ -623,8 +623,9 @@ function parseTurtleOntology(content: string): ParsedOntology {
let blankNodeDepth = 0; // Track depth of blank node blocks to skip let blankNodeDepth = 0; // Track depth of blank node blocks to skip
let inMultiLineString = false; // Track if we're inside a multi-line triple-quoted string let inMultiLineString = false; // Track if we're inside a multi-line triple-quoted string
let multiLineQuoteChar = ''; // The quote character(s) for the multi-line string let multiLineQuoteChar = ''; // The quote character(s) for the multi-line string
let baseUri: string | null = null; // @base directive for relative URI resolution
// First pass: extract prefixes // First pass: extract prefixes and @base
for (const line of lines) { for (const line of lines) {
const trimmed = line.trim(); const trimmed = line.trim();
if (trimmed.startsWith('@prefix') || trimmed.startsWith('PREFIX')) { if (trimmed.startsWith('@prefix') || trimmed.startsWith('PREFIX')) {
@ -632,9 +633,58 @@ function parseTurtleOntology(content: string): ParsedOntology {
if (match) { if (match) {
prefixes[match[1] || ''] = match[2]; prefixes[match[1] || ''] = match[2];
} }
} else if (trimmed.startsWith('@base') || trimmed.startsWith('BASE')) {
// Extract @base directive: @base <http://example.org/> or BASE <http://example.org/>
const match = trimmed.match(/@?base\s+<([^>]+)>/i);
if (match) {
baseUri = match[1];
}
} }
} }
// Helper to expand URIs with base URI support for relative URIs
const expand = (uri: string): string => {
if (!uri) return uri;
// Remove angle brackets if present
let cleaned = uri.trim().replace(/^</, '').replace(/>$/, '');
// Handle empty relative URI <> - refers to the base URI itself
if (cleaned === '' && baseUri) {
return baseUri;
}
// Handle relative URIs starting with # (like #Address)
// These resolve against the base URI
if (cleaned.startsWith('#') && baseUri) {
return baseUri + cleaned;
}
// Check if it's already a full URI
if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) {
return cleaned;
}
// Handle prefixed URIs (like vcard:Address, owl:Class)
const colonIndex = cleaned.indexOf(':');
if (colonIndex > 0) {
const prefix = cleaned.substring(0, colonIndex);
const localName = cleaned.substring(colonIndex + 1);
if (prefixes[prefix]) {
return prefixes[prefix] + localName;
}
}
// Handle unprefixed URIs with default prefix (empty prefix ":")
// e.g., ":Address" with PREFIX : <http://example.org/#>
if (cleaned.startsWith(':') && prefixes['']) {
return prefixes[''] + cleaned.substring(1);
}
return cleaned;
};
// Second pass: parse triples // Second pass: parse triples
let lineNum = 0; let lineNum = 0;
for (const line of lines) { for (const line of lines) {
@ -734,16 +784,16 @@ function parseTurtleOntology(content: string): ParsedOntology {
if (!trimmed.startsWith(';') && !trimmed.startsWith(',') && !isIndentedLine) { if (!trimmed.startsWith(';') && !trimmed.startsWith(',') && !isIndentedLine) {
// Process previous subject if exists // Process previous subject if exists
if (currentSubject && currentTriples.length > 0) { if (currentSubject && currentTriples.length > 0) {
processSubject(currentSubject, currentTriples, prefixes, classes, properties, individuals); processSubject(currentSubject, currentTriples, prefixes, classes, properties, individuals, baseUri);
} }
// Start new subject // Start new subject
const parts = splitTurtleLine(trimmed, prefixes); const parts = splitTurtleLine(trimmed, prefixes);
if (parts.length >= 3) { if (parts.length >= 3) {
// Full triple on one line: subject predicate object(s) // Full triple on one line: subject predicate object(s)
currentSubject = expandUri(parts[0], prefixes); currentSubject = expand(parts[0]);
// Handle 'a' shorthand for rdf:type // Handle 'a' shorthand for rdf:type
const predicate = parts[1] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[1], prefixes); const predicate = parts[1] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[1]);
lastPredicate = predicate; // Track for comma continuations lastPredicate = predicate; // Track for comma continuations
// Handle comma-separated values (e.g., "Subject a Class1, Class2") // Handle comma-separated values (e.g., "Subject a Class1, Class2")
currentTriples = []; currentTriples = [];
@ -752,7 +802,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
} }
} else if (parts.length === 1 && !trimmed.endsWith('.')) { } else if (parts.length === 1 && !trimmed.endsWith('.')) {
// Subject alone on a line (DCAT3 style): dcat:Catalog // Subject alone on a line (DCAT3 style): dcat:Catalog
currentSubject = expandUri(parts[0], prefixes); currentSubject = expand(parts[0]);
currentTriples = []; currentTriples = [];
lastPredicate = null; lastPredicate = null;
} }
@ -761,7 +811,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
const parts = splitTurtleLine(trimmed.substring(1).trim(), prefixes); const parts = splitTurtleLine(trimmed.substring(1).trim(), prefixes);
if (parts.length >= 2) { if (parts.length >= 2) {
// Handle 'a' shorthand for rdf:type // Handle 'a' shorthand for rdf:type
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[0], prefixes); const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[0]);
lastPredicate = predicate; // Track for comma continuations lastPredicate = predicate; // Track for comma continuations
// Handle comma-separated values // Handle comma-separated values
for (let i = 1; i < parts.length; i++) { for (let i = 1; i < parts.length; i++) {
@ -774,7 +824,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
const parts = splitTurtleLine(trimmed, prefixes); const parts = splitTurtleLine(trimmed, prefixes);
if (parts.length >= 2) { if (parts.length >= 2) {
// Handle 'a' shorthand for rdf:type // Handle 'a' shorthand for rdf:type
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[0], prefixes); const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[0]);
lastPredicate = predicate; // Track for comma continuations lastPredicate = predicate; // Track for comma continuations
// Handle comma-separated values (e.g., "a rdfs:Class, owl:Class") // Handle comma-separated values (e.g., "a rdfs:Class, owl:Class")
// Each part after predicate is a separate object // Each part after predicate is a separate object
@ -894,7 +944,8 @@ function processSubject(
prefixes: Record<string, string>, prefixes: Record<string, string>,
classes: Map<string, OntologyClass>, classes: Map<string, OntologyClass>,
properties: Map<string, OntologyProperty>, properties: Map<string, OntologyProperty>,
individuals: Map<string, OntologyIndividual> individuals: Map<string, OntologyIndividual>,
baseUri?: string | null
): void { ): void {
const types: string[] = []; const types: string[] = [];
const labels: LangString[] = []; const labels: LangString[] = [];
@ -915,7 +966,7 @@ function processSubject(
for (const triple of triples) { for (const triple of triples) {
const { predicate, object } = triple; const { predicate, object } = triple;
const expandedObject = expandUri(object, prefixes); const expandedObject = expandUri(object, prefixes, baseUri);
if (predicate === NAMESPACES.rdf + 'type') { if (predicate === NAMESPACES.rdf + 'type') {
types.push(expandedObject); types.push(expandedObject);
@ -1506,19 +1557,33 @@ function parseJsonLdOntology(content: string): ParsedOntology {
/** /**
* Expand prefixed URI to full URI * Expand prefixed URI to full URI
* @param uri - The URI to expand (can be prefixed like "owl:Class", relative like "#Address", or full)
* @param prefixes - Map of prefix to namespace
* @param baseUri - Optional base URI for resolving relative URIs (from @base directive)
*/ */
export function expandUri(uri: string, prefixes: Record<string, string>): string { export function expandUri(uri: string, prefixes: Record<string, string>, baseUri?: string | null): string {
if (!uri) return uri; if (!uri) return uri;
// Remove angle brackets if present // Remove angle brackets if present
let cleaned = uri.trim().replace(/^</, '').replace(/>$/, ''); let cleaned = uri.trim().replace(/^</, '').replace(/>$/, '');
// Handle empty relative URI <> - refers to the base URI itself
if (cleaned === '' && baseUri) {
return baseUri;
}
// Handle relative URIs starting with # (like #Address)
// These resolve against the base URI
if (cleaned.startsWith('#') && baseUri) {
return baseUri + cleaned;
}
// Check if it's already a full URI // Check if it's already a full URI
if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) { if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) {
return cleaned; return cleaned;
} }
// Handle prefixed URIs // Handle prefixed URIs (like vcard:Address, owl:Class)
const colonIndex = cleaned.indexOf(':'); const colonIndex = cleaned.indexOf(':');
if (colonIndex > 0) { if (colonIndex > 0) {
const prefix = cleaned.substring(0, colonIndex); const prefix = cleaned.substring(0, colonIndex);
@ -1529,6 +1594,12 @@ export function expandUri(uri: string, prefixes: Record<string, string>): string
} }
} }
// Handle unprefixed URIs with default prefix (empty prefix ":")
// e.g., ":Address" with PREFIX : <http://example.org/#>
if (cleaned.startsWith(':') && prefixes['']) {
return prefixes[''] + cleaned.substring(1);
}
return cleaned; return cleaned;
} }