fix: add @base directive support for Turtle/RDF parsing
The VCard ontology file (and 3 others) use @base directive with relative URIs like <#Address>. The Turtle parser was not extracting @base or resolving relative URIs against it. Changes: - Extract @base directive in first pass alongside @prefix - Add baseUri parameter to expandUri() function - Handle relative URIs starting with # (resolve against base) - Handle empty relative URI <> (returns base URI itself) - Pass baseUri through to processSubject() function This fixes the 'Term not found' error for vcard:Address and similar terms that use relative URI notation in their ontology definitions. Affected ontologies: vcard.rdf, prov.ttl, era_ontology.ttl, ebg-ontology.ttl
This commit is contained in:
parent
f2b10fca19
commit
6781073d06
1 changed files with 82 additions and 11 deletions
|
|
@ -623,8 +623,9 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
|||
let blankNodeDepth = 0; // Track depth of blank node blocks to skip
|
||||
let inMultiLineString = false; // Track if we're inside a multi-line triple-quoted string
|
||||
let multiLineQuoteChar = ''; // The quote character(s) for the multi-line string
|
||||
let baseUri: string | null = null; // @base directive for relative URI resolution
|
||||
|
||||
// First pass: extract prefixes
|
||||
// First pass: extract prefixes and @base
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith('@prefix') || trimmed.startsWith('PREFIX')) {
|
||||
|
|
@ -632,9 +633,58 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
|||
if (match) {
|
||||
prefixes[match[1] || ''] = match[2];
|
||||
}
|
||||
} else if (trimmed.startsWith('@base') || trimmed.startsWith('BASE')) {
|
||||
// Extract @base directive: @base <http://example.org/> or BASE <http://example.org/>
|
||||
const match = trimmed.match(/@?base\s+<([^>]+)>/i);
|
||||
if (match) {
|
||||
baseUri = match[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to expand URIs with base URI support for relative URIs
|
||||
const expand = (uri: string): string => {
|
||||
if (!uri) return uri;
|
||||
|
||||
// Remove angle brackets if present
|
||||
let cleaned = uri.trim().replace(/^</, '').replace(/>$/, '');
|
||||
|
||||
// Handle empty relative URI <> - refers to the base URI itself
|
||||
if (cleaned === '' && baseUri) {
|
||||
return baseUri;
|
||||
}
|
||||
|
||||
// Handle relative URIs starting with # (like #Address)
|
||||
// These resolve against the base URI
|
||||
if (cleaned.startsWith('#') && baseUri) {
|
||||
return baseUri + cleaned;
|
||||
}
|
||||
|
||||
// Check if it's already a full URI
|
||||
if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
// Handle prefixed URIs (like vcard:Address, owl:Class)
|
||||
const colonIndex = cleaned.indexOf(':');
|
||||
if (colonIndex > 0) {
|
||||
const prefix = cleaned.substring(0, colonIndex);
|
||||
const localName = cleaned.substring(colonIndex + 1);
|
||||
|
||||
if (prefixes[prefix]) {
|
||||
return prefixes[prefix] + localName;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle unprefixed URIs with default prefix (empty prefix ":")
|
||||
// e.g., ":Address" with PREFIX : <http://example.org/#>
|
||||
if (cleaned.startsWith(':') && prefixes['']) {
|
||||
return prefixes[''] + cleaned.substring(1);
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
};
|
||||
|
||||
// Second pass: parse triples
|
||||
let lineNum = 0;
|
||||
for (const line of lines) {
|
||||
|
|
@ -734,16 +784,16 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
|||
if (!trimmed.startsWith(';') && !trimmed.startsWith(',') && !isIndentedLine) {
|
||||
// Process previous subject if exists
|
||||
if (currentSubject && currentTriples.length > 0) {
|
||||
processSubject(currentSubject, currentTriples, prefixes, classes, properties, individuals);
|
||||
processSubject(currentSubject, currentTriples, prefixes, classes, properties, individuals, baseUri);
|
||||
}
|
||||
|
||||
// Start new subject
|
||||
const parts = splitTurtleLine(trimmed, prefixes);
|
||||
if (parts.length >= 3) {
|
||||
// Full triple on one line: subject predicate object(s)
|
||||
currentSubject = expandUri(parts[0], prefixes);
|
||||
currentSubject = expand(parts[0]);
|
||||
// Handle 'a' shorthand for rdf:type
|
||||
const predicate = parts[1] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[1], prefixes);
|
||||
const predicate = parts[1] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[1]);
|
||||
lastPredicate = predicate; // Track for comma continuations
|
||||
// Handle comma-separated values (e.g., "Subject a Class1, Class2")
|
||||
currentTriples = [];
|
||||
|
|
@ -752,7 +802,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
|||
}
|
||||
} else if (parts.length === 1 && !trimmed.endsWith('.')) {
|
||||
// Subject alone on a line (DCAT3 style): dcat:Catalog
|
||||
currentSubject = expandUri(parts[0], prefixes);
|
||||
currentSubject = expand(parts[0]);
|
||||
currentTriples = [];
|
||||
lastPredicate = null;
|
||||
}
|
||||
|
|
@ -761,7 +811,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
|||
const parts = splitTurtleLine(trimmed.substring(1).trim(), prefixes);
|
||||
if (parts.length >= 2) {
|
||||
// Handle 'a' shorthand for rdf:type
|
||||
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[0], prefixes);
|
||||
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[0]);
|
||||
lastPredicate = predicate; // Track for comma continuations
|
||||
// Handle comma-separated values
|
||||
for (let i = 1; i < parts.length; i++) {
|
||||
|
|
@ -774,7 +824,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
|||
const parts = splitTurtleLine(trimmed, prefixes);
|
||||
if (parts.length >= 2) {
|
||||
// Handle 'a' shorthand for rdf:type
|
||||
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[0], prefixes);
|
||||
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[0]);
|
||||
lastPredicate = predicate; // Track for comma continuations
|
||||
// Handle comma-separated values (e.g., "a rdfs:Class, owl:Class")
|
||||
// Each part after predicate is a separate object
|
||||
|
|
@ -894,7 +944,8 @@ function processSubject(
|
|||
prefixes: Record<string, string>,
|
||||
classes: Map<string, OntologyClass>,
|
||||
properties: Map<string, OntologyProperty>,
|
||||
individuals: Map<string, OntologyIndividual>
|
||||
individuals: Map<string, OntologyIndividual>,
|
||||
baseUri?: string | null
|
||||
): void {
|
||||
const types: string[] = [];
|
||||
const labels: LangString[] = [];
|
||||
|
|
@ -915,7 +966,7 @@ function processSubject(
|
|||
|
||||
for (const triple of triples) {
|
||||
const { predicate, object } = triple;
|
||||
const expandedObject = expandUri(object, prefixes);
|
||||
const expandedObject = expandUri(object, prefixes, baseUri);
|
||||
|
||||
if (predicate === NAMESPACES.rdf + 'type') {
|
||||
types.push(expandedObject);
|
||||
|
|
@ -1506,19 +1557,33 @@ function parseJsonLdOntology(content: string): ParsedOntology {
|
|||
|
||||
/**
|
||||
* Expand prefixed URI to full URI
|
||||
* @param uri - The URI to expand (can be prefixed like "owl:Class", relative like "#Address", or full)
|
||||
* @param prefixes - Map of prefix to namespace
|
||||
* @param baseUri - Optional base URI for resolving relative URIs (from @base directive)
|
||||
*/
|
||||
export function expandUri(uri: string, prefixes: Record<string, string>): string {
|
||||
export function expandUri(uri: string, prefixes: Record<string, string>, baseUri?: string | null): string {
|
||||
if (!uri) return uri;
|
||||
|
||||
// Remove angle brackets if present
|
||||
let cleaned = uri.trim().replace(/^</, '').replace(/>$/, '');
|
||||
|
||||
// Handle empty relative URI <> - refers to the base URI itself
|
||||
if (cleaned === '' && baseUri) {
|
||||
return baseUri;
|
||||
}
|
||||
|
||||
// Handle relative URIs starting with # (like #Address)
|
||||
// These resolve against the base URI
|
||||
if (cleaned.startsWith('#') && baseUri) {
|
||||
return baseUri + cleaned;
|
||||
}
|
||||
|
||||
// Check if it's already a full URI
|
||||
if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
// Handle prefixed URIs
|
||||
// Handle prefixed URIs (like vcard:Address, owl:Class)
|
||||
const colonIndex = cleaned.indexOf(':');
|
||||
if (colonIndex > 0) {
|
||||
const prefix = cleaned.substring(0, colonIndex);
|
||||
|
|
@ -1529,6 +1594,12 @@ export function expandUri(uri: string, prefixes: Record<string, string>): string
|
|||
}
|
||||
}
|
||||
|
||||
// Handle unprefixed URIs with default prefix (empty prefix ":")
|
||||
// e.g., ":Address" with PREFIX : <http://example.org/#>
|
||||
if (cleaned.startsWith(':') && prefixes['']) {
|
||||
return prefixes[''] + cleaned.substring(1);
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue