fix: add @base directive support for Turtle/RDF parsing
The VCard ontology file (and 3 others) use @base directive with relative URIs like <#Address>. The Turtle parser was not extracting @base or resolving relative URIs against it. Changes: - Extract @base directive in first pass alongside @prefix - Add baseUri parameter to expandUri() function - Handle relative URIs starting with # (resolve against base) - Handle empty relative URI <> (returns base URI itself) - Pass baseUri through to processSubject() function This fixes the 'Term not found' error for vcard:Address and similar terms that use relative URI notation in their ontology definitions. Affected ontologies: vcard.rdf, prov.ttl, era_ontology.ttl, ebg-ontology.ttl
This commit is contained in:
parent
f2b10fca19
commit
6781073d06
1 changed files with 82 additions and 11 deletions
|
|
@ -623,8 +623,9 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
||||||
let blankNodeDepth = 0; // Track depth of blank node blocks to skip
|
let blankNodeDepth = 0; // Track depth of blank node blocks to skip
|
||||||
let inMultiLineString = false; // Track if we're inside a multi-line triple-quoted string
|
let inMultiLineString = false; // Track if we're inside a multi-line triple-quoted string
|
||||||
let multiLineQuoteChar = ''; // The quote character(s) for the multi-line string
|
let multiLineQuoteChar = ''; // The quote character(s) for the multi-line string
|
||||||
|
let baseUri: string | null = null; // @base directive for relative URI resolution
|
||||||
|
|
||||||
// First pass: extract prefixes
|
// First pass: extract prefixes and @base
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
const trimmed = line.trim();
|
const trimmed = line.trim();
|
||||||
if (trimmed.startsWith('@prefix') || trimmed.startsWith('PREFIX')) {
|
if (trimmed.startsWith('@prefix') || trimmed.startsWith('PREFIX')) {
|
||||||
|
|
@ -632,9 +633,58 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
||||||
if (match) {
|
if (match) {
|
||||||
prefixes[match[1] || ''] = match[2];
|
prefixes[match[1] || ''] = match[2];
|
||||||
}
|
}
|
||||||
|
} else if (trimmed.startsWith('@base') || trimmed.startsWith('BASE')) {
|
||||||
|
// Extract @base directive: @base <http://example.org/> or BASE <http://example.org/>
|
||||||
|
const match = trimmed.match(/@?base\s+<([^>]+)>/i);
|
||||||
|
if (match) {
|
||||||
|
baseUri = match[1];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper to expand URIs with base URI support for relative URIs
|
||||||
|
const expand = (uri: string): string => {
|
||||||
|
if (!uri) return uri;
|
||||||
|
|
||||||
|
// Remove angle brackets if present
|
||||||
|
let cleaned = uri.trim().replace(/^</, '').replace(/>$/, '');
|
||||||
|
|
||||||
|
// Handle empty relative URI <> - refers to the base URI itself
|
||||||
|
if (cleaned === '' && baseUri) {
|
||||||
|
return baseUri;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle relative URIs starting with # (like #Address)
|
||||||
|
// These resolve against the base URI
|
||||||
|
if (cleaned.startsWith('#') && baseUri) {
|
||||||
|
return baseUri + cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it's already a full URI
|
||||||
|
if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) {
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle prefixed URIs (like vcard:Address, owl:Class)
|
||||||
|
const colonIndex = cleaned.indexOf(':');
|
||||||
|
if (colonIndex > 0) {
|
||||||
|
const prefix = cleaned.substring(0, colonIndex);
|
||||||
|
const localName = cleaned.substring(colonIndex + 1);
|
||||||
|
|
||||||
|
if (prefixes[prefix]) {
|
||||||
|
return prefixes[prefix] + localName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle unprefixed URIs with default prefix (empty prefix ":")
|
||||||
|
// e.g., ":Address" with PREFIX : <http://example.org/#>
|
||||||
|
if (cleaned.startsWith(':') && prefixes['']) {
|
||||||
|
return prefixes[''] + cleaned.substring(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return cleaned;
|
||||||
|
};
|
||||||
|
|
||||||
// Second pass: parse triples
|
// Second pass: parse triples
|
||||||
let lineNum = 0;
|
let lineNum = 0;
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
|
|
@ -734,16 +784,16 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
||||||
if (!trimmed.startsWith(';') && !trimmed.startsWith(',') && !isIndentedLine) {
|
if (!trimmed.startsWith(';') && !trimmed.startsWith(',') && !isIndentedLine) {
|
||||||
// Process previous subject if exists
|
// Process previous subject if exists
|
||||||
if (currentSubject && currentTriples.length > 0) {
|
if (currentSubject && currentTriples.length > 0) {
|
||||||
processSubject(currentSubject, currentTriples, prefixes, classes, properties, individuals);
|
processSubject(currentSubject, currentTriples, prefixes, classes, properties, individuals, baseUri);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start new subject
|
// Start new subject
|
||||||
const parts = splitTurtleLine(trimmed, prefixes);
|
const parts = splitTurtleLine(trimmed, prefixes);
|
||||||
if (parts.length >= 3) {
|
if (parts.length >= 3) {
|
||||||
// Full triple on one line: subject predicate object(s)
|
// Full triple on one line: subject predicate object(s)
|
||||||
currentSubject = expandUri(parts[0], prefixes);
|
currentSubject = expand(parts[0]);
|
||||||
// Handle 'a' shorthand for rdf:type
|
// Handle 'a' shorthand for rdf:type
|
||||||
const predicate = parts[1] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[1], prefixes);
|
const predicate = parts[1] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[1]);
|
||||||
lastPredicate = predicate; // Track for comma continuations
|
lastPredicate = predicate; // Track for comma continuations
|
||||||
// Handle comma-separated values (e.g., "Subject a Class1, Class2")
|
// Handle comma-separated values (e.g., "Subject a Class1, Class2")
|
||||||
currentTriples = [];
|
currentTriples = [];
|
||||||
|
|
@ -752,7 +802,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
||||||
}
|
}
|
||||||
} else if (parts.length === 1 && !trimmed.endsWith('.')) {
|
} else if (parts.length === 1 && !trimmed.endsWith('.')) {
|
||||||
// Subject alone on a line (DCAT3 style): dcat:Catalog
|
// Subject alone on a line (DCAT3 style): dcat:Catalog
|
||||||
currentSubject = expandUri(parts[0], prefixes);
|
currentSubject = expand(parts[0]);
|
||||||
currentTriples = [];
|
currentTriples = [];
|
||||||
lastPredicate = null;
|
lastPredicate = null;
|
||||||
}
|
}
|
||||||
|
|
@ -761,7 +811,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
||||||
const parts = splitTurtleLine(trimmed.substring(1).trim(), prefixes);
|
const parts = splitTurtleLine(trimmed.substring(1).trim(), prefixes);
|
||||||
if (parts.length >= 2) {
|
if (parts.length >= 2) {
|
||||||
// Handle 'a' shorthand for rdf:type
|
// Handle 'a' shorthand for rdf:type
|
||||||
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[0], prefixes);
|
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[0]);
|
||||||
lastPredicate = predicate; // Track for comma continuations
|
lastPredicate = predicate; // Track for comma continuations
|
||||||
// Handle comma-separated values
|
// Handle comma-separated values
|
||||||
for (let i = 1; i < parts.length; i++) {
|
for (let i = 1; i < parts.length; i++) {
|
||||||
|
|
@ -774,7 +824,7 @@ function parseTurtleOntology(content: string): ParsedOntology {
|
||||||
const parts = splitTurtleLine(trimmed, prefixes);
|
const parts = splitTurtleLine(trimmed, prefixes);
|
||||||
if (parts.length >= 2) {
|
if (parts.length >= 2) {
|
||||||
// Handle 'a' shorthand for rdf:type
|
// Handle 'a' shorthand for rdf:type
|
||||||
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expandUri(parts[0], prefixes);
|
const predicate = parts[0] === 'a' ? NAMESPACES.rdf + 'type' : expand(parts[0]);
|
||||||
lastPredicate = predicate; // Track for comma continuations
|
lastPredicate = predicate; // Track for comma continuations
|
||||||
// Handle comma-separated values (e.g., "a rdfs:Class, owl:Class")
|
// Handle comma-separated values (e.g., "a rdfs:Class, owl:Class")
|
||||||
// Each part after predicate is a separate object
|
// Each part after predicate is a separate object
|
||||||
|
|
@ -894,7 +944,8 @@ function processSubject(
|
||||||
prefixes: Record<string, string>,
|
prefixes: Record<string, string>,
|
||||||
classes: Map<string, OntologyClass>,
|
classes: Map<string, OntologyClass>,
|
||||||
properties: Map<string, OntologyProperty>,
|
properties: Map<string, OntologyProperty>,
|
||||||
individuals: Map<string, OntologyIndividual>
|
individuals: Map<string, OntologyIndividual>,
|
||||||
|
baseUri?: string | null
|
||||||
): void {
|
): void {
|
||||||
const types: string[] = [];
|
const types: string[] = [];
|
||||||
const labels: LangString[] = [];
|
const labels: LangString[] = [];
|
||||||
|
|
@ -915,7 +966,7 @@ function processSubject(
|
||||||
|
|
||||||
for (const triple of triples) {
|
for (const triple of triples) {
|
||||||
const { predicate, object } = triple;
|
const { predicate, object } = triple;
|
||||||
const expandedObject = expandUri(object, prefixes);
|
const expandedObject = expandUri(object, prefixes, baseUri);
|
||||||
|
|
||||||
if (predicate === NAMESPACES.rdf + 'type') {
|
if (predicate === NAMESPACES.rdf + 'type') {
|
||||||
types.push(expandedObject);
|
types.push(expandedObject);
|
||||||
|
|
@ -1506,19 +1557,33 @@ function parseJsonLdOntology(content: string): ParsedOntology {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expand prefixed URI to full URI
|
* Expand prefixed URI to full URI
|
||||||
|
* @param uri - The URI to expand (can be prefixed like "owl:Class", relative like "#Address", or full)
|
||||||
|
* @param prefixes - Map of prefix to namespace
|
||||||
|
* @param baseUri - Optional base URI for resolving relative URIs (from @base directive)
|
||||||
*/
|
*/
|
||||||
export function expandUri(uri: string, prefixes: Record<string, string>): string {
|
export function expandUri(uri: string, prefixes: Record<string, string>, baseUri?: string | null): string {
|
||||||
if (!uri) return uri;
|
if (!uri) return uri;
|
||||||
|
|
||||||
// Remove angle brackets if present
|
// Remove angle brackets if present
|
||||||
let cleaned = uri.trim().replace(/^</, '').replace(/>$/, '');
|
let cleaned = uri.trim().replace(/^</, '').replace(/>$/, '');
|
||||||
|
|
||||||
|
// Handle empty relative URI <> - refers to the base URI itself
|
||||||
|
if (cleaned === '' && baseUri) {
|
||||||
|
return baseUri;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle relative URIs starting with # (like #Address)
|
||||||
|
// These resolve against the base URI
|
||||||
|
if (cleaned.startsWith('#') && baseUri) {
|
||||||
|
return baseUri + cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if it's already a full URI
|
// Check if it's already a full URI
|
||||||
if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) {
|
if (cleaned.startsWith('http://') || cleaned.startsWith('https://')) {
|
||||||
return cleaned;
|
return cleaned;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle prefixed URIs
|
// Handle prefixed URIs (like vcard:Address, owl:Class)
|
||||||
const colonIndex = cleaned.indexOf(':');
|
const colonIndex = cleaned.indexOf(':');
|
||||||
if (colonIndex > 0) {
|
if (colonIndex > 0) {
|
||||||
const prefix = cleaned.substring(0, colonIndex);
|
const prefix = cleaned.substring(0, colonIndex);
|
||||||
|
|
@ -1529,6 +1594,12 @@ export function expandUri(uri: string, prefixes: Record<string, string>): string
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle unprefixed URIs with default prefix (empty prefix ":")
|
||||||
|
// e.g., ":Address" with PREFIX : <http://example.org/#>
|
||||||
|
if (cleaned.startsWith(':') && prefixes['']) {
|
||||||
|
return prefixes[''] + cleaned.substring(1);
|
||||||
|
}
|
||||||
|
|
||||||
return cleaned;
|
return cleaned;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue