498 lines
18 KiB
PL/PgSQL
498 lines
18 KiB
PL/PgSQL
-- LinkML Schema Storage for Heritage Custodian Ontology
|
|
-- Migration: 001_linkml_schema.sql
|
|
-- Created: 2025-12-06
|
|
--
|
|
-- Stores LinkML schema elements (classes, slots, enums) with version tracking
|
|
-- Enables querying schema structure via SQL and exposing via REST API
|
|
|
|
-- ============================================================================
|
|
-- Schema Version Tracking
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_schema_versions (
|
|
id SERIAL PRIMARY KEY,
|
|
version VARCHAR(50) NOT NULL UNIQUE, -- e.g., "20251121", "v0.9.6"
|
|
schema_name VARCHAR(255) NOT NULL, -- e.g., "heritage_custodian"
|
|
description TEXT,
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
is_current BOOLEAN DEFAULT FALSE, -- Only one version can be current
|
|
source_path TEXT, -- e.g., "schemas/20251121/linkml/"
|
|
git_commit VARCHAR(40), -- Git SHA for traceability
|
|
loaded_by VARCHAR(255), -- Who loaded this version
|
|
metadata JSONB DEFAULT '{}'::jsonb -- Additional metadata
|
|
);
|
|
|
|
-- Ensure only one current version
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_linkml_schema_versions_current
|
|
ON linkml_schema_versions (is_current) WHERE is_current = TRUE;
|
|
|
|
-- ============================================================================
|
|
-- Classes Table
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_classes (
|
|
id SERIAL PRIMARY KEY,
|
|
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
|
|
|
|
-- Core identity
|
|
class_name VARCHAR(255) NOT NULL, -- e.g., "Custodian", "WebPortal"
|
|
class_id TEXT NOT NULL, -- Full URI, e.g., "https://nde.nl/ontology/hc/class/Custodian"
|
|
title VARCHAR(500),
|
|
|
|
-- Schema relationships
|
|
is_a VARCHAR(255), -- Parent class name
|
|
class_uri TEXT, -- Ontology mapping, e.g., "crm:E39_Actor"
|
|
abstract BOOLEAN DEFAULT FALSE,
|
|
|
|
-- Documentation
|
|
description TEXT,
|
|
comments TEXT[], -- Array of comment strings
|
|
|
|
-- Ontology mappings (arrays of URIs)
|
|
exact_mappings TEXT[],
|
|
close_mappings TEXT[],
|
|
broad_mappings TEXT[],
|
|
narrow_mappings TEXT[],
|
|
|
|
-- Raw YAML content for full fidelity
|
|
yaml_content TEXT,
|
|
|
|
-- Metadata
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
-- Constraints
|
|
UNIQUE(version_id, class_name)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_classes_name ON linkml_classes(class_name);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_classes_version ON linkml_classes(version_id);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_classes_is_a ON linkml_classes(is_a);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_classes_class_uri ON linkml_classes(class_uri);
|
|
|
|
-- Full-text search on class names and descriptions
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_classes_fts
|
|
ON linkml_classes USING gin(to_tsvector('english', coalesce(class_name, '') || ' ' || coalesce(title, '') || ' ' || coalesce(description, '')));
|
|
|
|
-- ============================================================================
|
|
-- Slots Table
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_slots (
|
|
id SERIAL PRIMARY KEY,
|
|
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
|
|
|
|
-- Core identity
|
|
slot_name VARCHAR(255) NOT NULL, -- e.g., "preferred_label", "hc_id"
|
|
slot_id TEXT NOT NULL, -- Full URI
|
|
|
|
-- Type information
|
|
range VARCHAR(255), -- Target type (class, enum, or primitive)
|
|
slot_uri TEXT, -- Ontology property mapping
|
|
|
|
-- Cardinality and constraints
|
|
required BOOLEAN DEFAULT FALSE,
|
|
multivalued BOOLEAN DEFAULT FALSE,
|
|
identifier BOOLEAN DEFAULT FALSE, -- Is this the class identifier?
|
|
inlined BOOLEAN,
|
|
inlined_as_list BOOLEAN,
|
|
|
|
-- Validation
|
|
pattern TEXT, -- Regex pattern for validation
|
|
minimum_value NUMERIC,
|
|
maximum_value NUMERIC,
|
|
|
|
-- Documentation
|
|
description TEXT,
|
|
comments TEXT[],
|
|
examples JSONB, -- Array of example objects
|
|
|
|
-- Raw YAML content
|
|
yaml_content TEXT,
|
|
|
|
-- Metadata
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
UNIQUE(version_id, slot_name)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_slots_name ON linkml_slots(slot_name);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_slots_version ON linkml_slots(version_id);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_slots_range ON linkml_slots(range);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_slots_slot_uri ON linkml_slots(slot_uri);
|
|
|
|
-- Full-text search on slots
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_slots_fts
|
|
ON linkml_slots USING gin(to_tsvector('english', coalesce(slot_name, '') || ' ' || coalesce(description, '')));
|
|
|
|
-- ============================================================================
|
|
-- Class-Slot Association (which slots belong to which classes)
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_class_slots (
|
|
id SERIAL PRIMARY KEY,
|
|
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
|
|
class_id INTEGER NOT NULL REFERENCES linkml_classes(id) ON DELETE CASCADE,
|
|
slot_id INTEGER NOT NULL REFERENCES linkml_slots(id) ON DELETE CASCADE,
|
|
|
|
-- Slot usage overrides (class-specific customization)
|
|
slot_usage JSONB, -- Overrides from class slot_usage section
|
|
|
|
-- Ordering
|
|
slot_order INTEGER, -- Order within the class definition
|
|
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
UNIQUE(version_id, class_id, slot_id)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_class_slots_class ON linkml_class_slots(class_id);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_class_slots_slot ON linkml_class_slots(slot_id);
|
|
|
|
-- ============================================================================
|
|
-- Enums Table
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_enums (
|
|
id SERIAL PRIMARY KEY,
|
|
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
|
|
|
|
-- Core identity
|
|
enum_name VARCHAR(255) NOT NULL, -- e.g., "CustodianPrimaryTypeEnum"
|
|
enum_id TEXT NOT NULL, -- Full URI
|
|
title VARCHAR(500),
|
|
|
|
-- Documentation
|
|
description TEXT,
|
|
comments TEXT[],
|
|
|
|
-- Raw YAML content
|
|
yaml_content TEXT,
|
|
|
|
-- Metadata
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
UNIQUE(version_id, enum_name)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_enums_name ON linkml_enums(enum_name);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_enums_version ON linkml_enums(version_id);
|
|
|
|
-- ============================================================================
|
|
-- Enum Values Table
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_enum_values (
|
|
id SERIAL PRIMARY KEY,
|
|
enum_id INTEGER NOT NULL REFERENCES linkml_enums(id) ON DELETE CASCADE,
|
|
|
|
-- Value identity
|
|
value_name VARCHAR(255) NOT NULL, -- e.g., "GALLERY", "MUSEUM"
|
|
|
|
-- Semantics
|
|
meaning TEXT, -- Wikidata or other URI
|
|
description TEXT,
|
|
comments TEXT[],
|
|
|
|
-- Ordering
|
|
value_order INTEGER,
|
|
|
|
-- Metadata
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
UNIQUE(enum_id, value_name)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_enum ON linkml_enum_values(enum_id);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_name ON linkml_enum_values(value_name);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_meaning ON linkml_enum_values(meaning);
|
|
|
|
-- ============================================================================
|
|
-- Prefixes Table (namespace definitions)
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_prefixes (
|
|
id SERIAL PRIMARY KEY,
|
|
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
|
|
|
|
prefix VARCHAR(50) NOT NULL, -- e.g., "crm", "schema", "hc"
|
|
uri TEXT NOT NULL, -- e.g., "http://www.cidoc-crm.org/cidoc-crm/"
|
|
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
UNIQUE(version_id, prefix)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_prefixes_version ON linkml_prefixes(version_id);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_prefixes_prefix ON linkml_prefixes(prefix);
|
|
|
|
-- ============================================================================
|
|
-- Imports Table (schema dependencies)
|
|
-- ============================================================================
|
|
|
|
CREATE TABLE IF NOT EXISTS linkml_imports (
|
|
id SERIAL PRIMARY KEY,
|
|
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
|
|
class_id INTEGER REFERENCES linkml_classes(id) ON DELETE CASCADE, -- NULL = schema-level import
|
|
|
|
import_path TEXT NOT NULL, -- e.g., "linkml:types", "../slots/hc_id"
|
|
import_type VARCHAR(50), -- "schema", "class", "slot", "enum"
|
|
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
|
|
|
UNIQUE(version_id, class_id, import_path)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_imports_version ON linkml_imports(version_id);
|
|
CREATE INDEX IF NOT EXISTS idx_linkml_imports_class ON linkml_imports(class_id);
|
|
|
|
-- ============================================================================
|
|
-- Views for convenient querying
|
|
-- ============================================================================
|
|
|
|
-- View: Current schema version classes
|
|
CREATE OR REPLACE VIEW linkml_current_classes AS
|
|
SELECT c.*
|
|
FROM linkml_classes c
|
|
JOIN linkml_schema_versions v ON c.version_id = v.id
|
|
WHERE v.is_current = TRUE;
|
|
|
|
-- View: Current schema version slots
|
|
CREATE OR REPLACE VIEW linkml_current_slots AS
|
|
SELECT s.*
|
|
FROM linkml_slots s
|
|
JOIN linkml_schema_versions v ON s.version_id = v.id
|
|
WHERE v.is_current = TRUE;
|
|
|
|
-- View: Current schema version enums
|
|
CREATE OR REPLACE VIEW linkml_current_enums AS
|
|
SELECT e.*
|
|
FROM linkml_enums e
|
|
JOIN linkml_schema_versions v ON e.version_id = v.id
|
|
WHERE v.is_current = TRUE;
|
|
|
|
-- View: Class hierarchy (with parent class resolved)
|
|
CREATE OR REPLACE VIEW linkml_class_hierarchy AS
|
|
SELECT
|
|
c.id,
|
|
c.class_name,
|
|
c.is_a AS parent_class_name,
|
|
p.id AS parent_class_id,
|
|
c.class_uri,
|
|
c.abstract,
|
|
c.title,
|
|
c.description,
|
|
v.version
|
|
FROM linkml_classes c
|
|
JOIN linkml_schema_versions v ON c.version_id = v.id
|
|
LEFT JOIN linkml_classes p ON c.is_a = p.class_name AND c.version_id = p.version_id
|
|
WHERE v.is_current = TRUE;
|
|
|
|
-- View: Slots with their classes
|
|
CREATE OR REPLACE VIEW linkml_slots_by_class AS
|
|
SELECT
|
|
c.class_name,
|
|
s.slot_name,
|
|
s.range,
|
|
s.slot_uri,
|
|
s.required,
|
|
s.multivalued,
|
|
s.description,
|
|
cs.slot_usage
|
|
FROM linkml_class_slots cs
|
|
JOIN linkml_classes c ON cs.class_id = c.id
|
|
JOIN linkml_slots s ON cs.slot_id = s.id
|
|
JOIN linkml_schema_versions v ON cs.version_id = v.id
|
|
WHERE v.is_current = TRUE
|
|
ORDER BY c.class_name, cs.slot_order;
|
|
|
|
-- View: Enum values expanded
|
|
CREATE OR REPLACE VIEW linkml_enum_values_expanded AS
|
|
SELECT
|
|
e.enum_name,
|
|
ev.value_name,
|
|
ev.meaning,
|
|
ev.description,
|
|
ev.comments,
|
|
v.version
|
|
FROM linkml_enum_values ev
|
|
JOIN linkml_enums e ON ev.enum_id = e.id
|
|
JOIN linkml_schema_versions v ON e.version_id = v.id
|
|
WHERE v.is_current = TRUE
|
|
ORDER BY e.enum_name, ev.value_order;
|
|
|
|
-- ============================================================================
|
|
-- Functions for common queries
|
|
-- ============================================================================
|
|
|
|
-- Function: Get all slots for a class (including inherited)
|
|
CREATE OR REPLACE FUNCTION get_class_slots(p_class_name VARCHAR, p_version_id INTEGER DEFAULT NULL)
|
|
RETURNS TABLE (
|
|
slot_name VARCHAR,
|
|
range VARCHAR,
|
|
slot_uri TEXT,
|
|
required BOOLEAN,
|
|
multivalued BOOLEAN,
|
|
description TEXT,
|
|
inherited_from VARCHAR
|
|
) AS $$
|
|
WITH RECURSIVE class_hierarchy AS (
|
|
-- Base case: the class itself
|
|
SELECT id, class_name, is_a, version_id, class_name AS source_class
|
|
FROM linkml_classes
|
|
WHERE class_name = p_class_name
|
|
AND (p_version_id IS NULL OR version_id = p_version_id)
|
|
AND (p_version_id IS NOT NULL OR version_id = (SELECT id FROM linkml_schema_versions WHERE is_current = TRUE))
|
|
|
|
UNION ALL
|
|
|
|
-- Recursive case: parent classes
|
|
SELECT c.id, c.class_name, c.is_a, c.version_id, h.source_class
|
|
FROM linkml_classes c
|
|
JOIN class_hierarchy h ON c.class_name = h.is_a AND c.version_id = h.version_id
|
|
)
|
|
SELECT
|
|
s.slot_name,
|
|
s.range,
|
|
s.slot_uri,
|
|
s.required,
|
|
s.multivalued,
|
|
s.description,
|
|
CASE WHEN ch.class_name = p_class_name THEN NULL ELSE ch.class_name END AS inherited_from
|
|
FROM class_hierarchy ch
|
|
JOIN linkml_class_slots cs ON cs.class_id = ch.id
|
|
JOIN linkml_slots s ON cs.slot_id = s.id
|
|
ORDER BY ch.class_name = p_class_name DESC, s.slot_name;
|
|
$$ LANGUAGE SQL;
|
|
|
|
-- Function: Get class inheritance chain
|
|
CREATE OR REPLACE FUNCTION get_class_inheritance(p_class_name VARCHAR, p_version_id INTEGER DEFAULT NULL)
|
|
RETURNS TABLE (
|
|
level INTEGER,
|
|
class_name VARCHAR,
|
|
class_uri TEXT,
|
|
abstract BOOLEAN
|
|
) AS $$
|
|
WITH RECURSIVE inheritance AS (
|
|
SELECT id, class_name, is_a, class_uri, abstract, version_id, 0 AS level
|
|
FROM linkml_classes
|
|
WHERE class_name = p_class_name
|
|
AND (p_version_id IS NULL OR version_id = p_version_id)
|
|
AND (p_version_id IS NOT NULL OR version_id = (SELECT id FROM linkml_schema_versions WHERE is_current = TRUE))
|
|
|
|
UNION ALL
|
|
|
|
SELECT c.id, c.class_name, c.is_a, c.class_uri, c.abstract, c.version_id, i.level + 1
|
|
FROM linkml_classes c
|
|
JOIN inheritance i ON c.class_name = i.is_a AND c.version_id = i.version_id
|
|
)
|
|
SELECT level, class_name, class_uri, abstract
|
|
FROM inheritance
|
|
ORDER BY level;
|
|
$$ LANGUAGE SQL;
|
|
|
|
-- Function: Search across all schema elements
|
|
CREATE OR REPLACE FUNCTION search_linkml_schema(p_query TEXT, p_version_id INTEGER DEFAULT NULL)
|
|
RETURNS TABLE (
|
|
element_type VARCHAR,
|
|
element_name VARCHAR,
|
|
element_uri TEXT,
|
|
description TEXT,
|
|
rank REAL
|
|
) AS $$
|
|
DECLARE
|
|
v_version_id INTEGER;
|
|
BEGIN
|
|
-- Get version ID (current if not specified)
|
|
IF p_version_id IS NULL THEN
|
|
SELECT id INTO v_version_id FROM linkml_schema_versions WHERE is_current = TRUE;
|
|
ELSE
|
|
v_version_id := p_version_id;
|
|
END IF;
|
|
|
|
RETURN QUERY
|
|
-- Search classes
|
|
SELECT
|
|
'class'::VARCHAR,
|
|
c.class_name,
|
|
c.class_uri,
|
|
c.description,
|
|
ts_rank(to_tsvector('english', coalesce(c.class_name, '') || ' ' || coalesce(c.title, '') || ' ' || coalesce(c.description, '')),
|
|
plainto_tsquery('english', p_query)) AS rank
|
|
FROM linkml_classes c
|
|
WHERE c.version_id = v_version_id
|
|
AND to_tsvector('english', coalesce(c.class_name, '') || ' ' || coalesce(c.title, '') || ' ' || coalesce(c.description, ''))
|
|
@@ plainto_tsquery('english', p_query)
|
|
|
|
UNION ALL
|
|
|
|
-- Search slots
|
|
SELECT
|
|
'slot'::VARCHAR,
|
|
s.slot_name,
|
|
s.slot_uri,
|
|
s.description,
|
|
ts_rank(to_tsvector('english', coalesce(s.slot_name, '') || ' ' || coalesce(s.description, '')),
|
|
plainto_tsquery('english', p_query)) AS rank
|
|
FROM linkml_slots s
|
|
WHERE s.version_id = v_version_id
|
|
AND to_tsvector('english', coalesce(s.slot_name, '') || ' ' || coalesce(s.description, ''))
|
|
@@ plainto_tsquery('english', p_query)
|
|
|
|
UNION ALL
|
|
|
|
-- Search enums
|
|
SELECT
|
|
'enum'::VARCHAR,
|
|
e.enum_name,
|
|
e.enum_id,
|
|
e.description,
|
|
ts_rank(to_tsvector('english', coalesce(e.enum_name, '') || ' ' || coalesce(e.title, '') || ' ' || coalesce(e.description, '')),
|
|
plainto_tsquery('english', p_query)) AS rank
|
|
FROM linkml_enums e
|
|
WHERE e.version_id = v_version_id
|
|
AND to_tsvector('english', coalesce(e.enum_name, '') || ' ' || coalesce(e.title, '') || ' ' || coalesce(e.description, ''))
|
|
@@ plainto_tsquery('english', p_query)
|
|
|
|
ORDER BY rank DESC;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- ============================================================================
|
|
-- Statistics View
|
|
-- ============================================================================
|
|
|
|
CREATE OR REPLACE VIEW linkml_schema_stats AS
|
|
SELECT
|
|
v.version,
|
|
v.schema_name,
|
|
v.is_current,
|
|
v.created_at,
|
|
(SELECT COUNT(*) FROM linkml_classes WHERE version_id = v.id) AS class_count,
|
|
(SELECT COUNT(*) FROM linkml_slots WHERE version_id = v.id) AS slot_count,
|
|
(SELECT COUNT(*) FROM linkml_enums WHERE version_id = v.id) AS enum_count,
|
|
(SELECT COUNT(*) FROM linkml_enum_values ev
|
|
JOIN linkml_enums e ON ev.enum_id = e.id
|
|
WHERE e.version_id = v.id) AS enum_value_count,
|
|
(SELECT COUNT(*) FROM linkml_prefixes WHERE version_id = v.id) AS prefix_count
|
|
FROM linkml_schema_versions v
|
|
ORDER BY v.created_at DESC;
|
|
|
|
-- ============================================================================
|
|
-- Permissions
|
|
-- ============================================================================
|
|
|
|
-- Grant access to the glam_api user
|
|
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO glam_api;
|
|
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO glam_api;
|
|
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO glam_api;
|
|
|
|
-- Commit
|
|
COMMENT ON TABLE linkml_schema_versions IS 'Tracks LinkML schema versions loaded into the database';
|
|
COMMENT ON TABLE linkml_classes IS 'LinkML class definitions with ontology mappings';
|
|
COMMENT ON TABLE linkml_slots IS 'LinkML slot (property) definitions';
|
|
COMMENT ON TABLE linkml_enums IS 'LinkML enumeration definitions';
|
|
COMMENT ON TABLE linkml_enum_values IS 'Permissible values for LinkML enumerations';
|
|
COMMENT ON VIEW linkml_schema_stats IS 'Summary statistics for each schema version';
|