glam/infrastructure/sql/001_linkml_schema.sql
2025-12-07 00:26:01 +01:00

498 lines
18 KiB
PL/PgSQL

-- LinkML Schema Storage for Heritage Custodian Ontology
-- Migration: 001_linkml_schema.sql
-- Created: 2025-12-06
--
-- Stores LinkML schema elements (classes, slots, enums) with version tracking
-- Enables querying schema structure via SQL and exposing via REST API
-- ============================================================================
-- Schema Version Tracking
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_schema_versions (
id SERIAL PRIMARY KEY,
version VARCHAR(50) NOT NULL UNIQUE, -- e.g., "20251121", "v0.9.6"
schema_name VARCHAR(255) NOT NULL, -- e.g., "heritage_custodian"
description TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
is_current BOOLEAN DEFAULT FALSE, -- Only one version can be current
source_path TEXT, -- e.g., "schemas/20251121/linkml/"
git_commit VARCHAR(40), -- Git SHA for traceability
loaded_by VARCHAR(255), -- Who loaded this version
metadata JSONB DEFAULT '{}'::jsonb -- Additional metadata
);
-- Ensure only one current version
CREATE UNIQUE INDEX IF NOT EXISTS idx_linkml_schema_versions_current
ON linkml_schema_versions (is_current) WHERE is_current = TRUE;
-- ============================================================================
-- Classes Table
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_classes (
id SERIAL PRIMARY KEY,
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
-- Core identity
class_name VARCHAR(255) NOT NULL, -- e.g., "Custodian", "WebPortal"
class_id TEXT NOT NULL, -- Full URI, e.g., "https://nde.nl/ontology/hc/class/Custodian"
title VARCHAR(500),
-- Schema relationships
is_a VARCHAR(255), -- Parent class name
class_uri TEXT, -- Ontology mapping, e.g., "crm:E39_Actor"
abstract BOOLEAN DEFAULT FALSE,
-- Documentation
description TEXT,
comments TEXT[], -- Array of comment strings
-- Ontology mappings (arrays of URIs)
exact_mappings TEXT[],
close_mappings TEXT[],
broad_mappings TEXT[],
narrow_mappings TEXT[],
-- Raw YAML content for full fidelity
yaml_content TEXT,
-- Metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
-- Constraints
UNIQUE(version_id, class_name)
);
CREATE INDEX IF NOT EXISTS idx_linkml_classes_name ON linkml_classes(class_name);
CREATE INDEX IF NOT EXISTS idx_linkml_classes_version ON linkml_classes(version_id);
CREATE INDEX IF NOT EXISTS idx_linkml_classes_is_a ON linkml_classes(is_a);
CREATE INDEX IF NOT EXISTS idx_linkml_classes_class_uri ON linkml_classes(class_uri);
-- Full-text search on class names and descriptions
CREATE INDEX IF NOT EXISTS idx_linkml_classes_fts
ON linkml_classes USING gin(to_tsvector('english', coalesce(class_name, '') || ' ' || coalesce(title, '') || ' ' || coalesce(description, '')));
-- ============================================================================
-- Slots Table
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_slots (
id SERIAL PRIMARY KEY,
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
-- Core identity
slot_name VARCHAR(255) NOT NULL, -- e.g., "preferred_label", "hc_id"
slot_id TEXT NOT NULL, -- Full URI
-- Type information
range VARCHAR(255), -- Target type (class, enum, or primitive)
slot_uri TEXT, -- Ontology property mapping
-- Cardinality and constraints
required BOOLEAN DEFAULT FALSE,
multivalued BOOLEAN DEFAULT FALSE,
identifier BOOLEAN DEFAULT FALSE, -- Is this the class identifier?
inlined BOOLEAN,
inlined_as_list BOOLEAN,
-- Validation
pattern TEXT, -- Regex pattern for validation
minimum_value NUMERIC,
maximum_value NUMERIC,
-- Documentation
description TEXT,
comments TEXT[],
examples JSONB, -- Array of example objects
-- Raw YAML content
yaml_content TEXT,
-- Metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(version_id, slot_name)
);
CREATE INDEX IF NOT EXISTS idx_linkml_slots_name ON linkml_slots(slot_name);
CREATE INDEX IF NOT EXISTS idx_linkml_slots_version ON linkml_slots(version_id);
CREATE INDEX IF NOT EXISTS idx_linkml_slots_range ON linkml_slots(range);
CREATE INDEX IF NOT EXISTS idx_linkml_slots_slot_uri ON linkml_slots(slot_uri);
-- Full-text search on slots
CREATE INDEX IF NOT EXISTS idx_linkml_slots_fts
ON linkml_slots USING gin(to_tsvector('english', coalesce(slot_name, '') || ' ' || coalesce(description, '')));
-- ============================================================================
-- Class-Slot Association (which slots belong to which classes)
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_class_slots (
id SERIAL PRIMARY KEY,
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
class_id INTEGER NOT NULL REFERENCES linkml_classes(id) ON DELETE CASCADE,
slot_id INTEGER NOT NULL REFERENCES linkml_slots(id) ON DELETE CASCADE,
-- Slot usage overrides (class-specific customization)
slot_usage JSONB, -- Overrides from class slot_usage section
-- Ordering
slot_order INTEGER, -- Order within the class definition
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(version_id, class_id, slot_id)
);
CREATE INDEX IF NOT EXISTS idx_linkml_class_slots_class ON linkml_class_slots(class_id);
CREATE INDEX IF NOT EXISTS idx_linkml_class_slots_slot ON linkml_class_slots(slot_id);
-- ============================================================================
-- Enums Table
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_enums (
id SERIAL PRIMARY KEY,
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
-- Core identity
enum_name VARCHAR(255) NOT NULL, -- e.g., "CustodianPrimaryTypeEnum"
enum_id TEXT NOT NULL, -- Full URI
title VARCHAR(500),
-- Documentation
description TEXT,
comments TEXT[],
-- Raw YAML content
yaml_content TEXT,
-- Metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(version_id, enum_name)
);
CREATE INDEX IF NOT EXISTS idx_linkml_enums_name ON linkml_enums(enum_name);
CREATE INDEX IF NOT EXISTS idx_linkml_enums_version ON linkml_enums(version_id);
-- ============================================================================
-- Enum Values Table
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_enum_values (
id SERIAL PRIMARY KEY,
enum_id INTEGER NOT NULL REFERENCES linkml_enums(id) ON DELETE CASCADE,
-- Value identity
value_name VARCHAR(255) NOT NULL, -- e.g., "GALLERY", "MUSEUM"
-- Semantics
meaning TEXT, -- Wikidata or other URI
description TEXT,
comments TEXT[],
-- Ordering
value_order INTEGER,
-- Metadata
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(enum_id, value_name)
);
CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_enum ON linkml_enum_values(enum_id);
CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_name ON linkml_enum_values(value_name);
CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_meaning ON linkml_enum_values(meaning);
-- ============================================================================
-- Prefixes Table (namespace definitions)
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_prefixes (
id SERIAL PRIMARY KEY,
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
prefix VARCHAR(50) NOT NULL, -- e.g., "crm", "schema", "hc"
uri TEXT NOT NULL, -- e.g., "http://www.cidoc-crm.org/cidoc-crm/"
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(version_id, prefix)
);
CREATE INDEX IF NOT EXISTS idx_linkml_prefixes_version ON linkml_prefixes(version_id);
CREATE INDEX IF NOT EXISTS idx_linkml_prefixes_prefix ON linkml_prefixes(prefix);
-- ============================================================================
-- Imports Table (schema dependencies)
-- ============================================================================
CREATE TABLE IF NOT EXISTS linkml_imports (
id SERIAL PRIMARY KEY,
version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE,
class_id INTEGER REFERENCES linkml_classes(id) ON DELETE CASCADE, -- NULL = schema-level import
import_path TEXT NOT NULL, -- e.g., "linkml:types", "../slots/hc_id"
import_type VARCHAR(50), -- "schema", "class", "slot", "enum"
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(version_id, class_id, import_path)
);
CREATE INDEX IF NOT EXISTS idx_linkml_imports_version ON linkml_imports(version_id);
CREATE INDEX IF NOT EXISTS idx_linkml_imports_class ON linkml_imports(class_id);
-- ============================================================================
-- Views for convenient querying
-- ============================================================================
-- View: Current schema version classes
CREATE OR REPLACE VIEW linkml_current_classes AS
SELECT c.*
FROM linkml_classes c
JOIN linkml_schema_versions v ON c.version_id = v.id
WHERE v.is_current = TRUE;
-- View: Current schema version slots
CREATE OR REPLACE VIEW linkml_current_slots AS
SELECT s.*
FROM linkml_slots s
JOIN linkml_schema_versions v ON s.version_id = v.id
WHERE v.is_current = TRUE;
-- View: Current schema version enums
CREATE OR REPLACE VIEW linkml_current_enums AS
SELECT e.*
FROM linkml_enums e
JOIN linkml_schema_versions v ON e.version_id = v.id
WHERE v.is_current = TRUE;
-- View: Class hierarchy (with parent class resolved)
CREATE OR REPLACE VIEW linkml_class_hierarchy AS
SELECT
c.id,
c.class_name,
c.is_a AS parent_class_name,
p.id AS parent_class_id,
c.class_uri,
c.abstract,
c.title,
c.description,
v.version
FROM linkml_classes c
JOIN linkml_schema_versions v ON c.version_id = v.id
LEFT JOIN linkml_classes p ON c.is_a = p.class_name AND c.version_id = p.version_id
WHERE v.is_current = TRUE;
-- View: Slots with their classes
CREATE OR REPLACE VIEW linkml_slots_by_class AS
SELECT
c.class_name,
s.slot_name,
s.range,
s.slot_uri,
s.required,
s.multivalued,
s.description,
cs.slot_usage
FROM linkml_class_slots cs
JOIN linkml_classes c ON cs.class_id = c.id
JOIN linkml_slots s ON cs.slot_id = s.id
JOIN linkml_schema_versions v ON cs.version_id = v.id
WHERE v.is_current = TRUE
ORDER BY c.class_name, cs.slot_order;
-- View: Enum values expanded
CREATE OR REPLACE VIEW linkml_enum_values_expanded AS
SELECT
e.enum_name,
ev.value_name,
ev.meaning,
ev.description,
ev.comments,
v.version
FROM linkml_enum_values ev
JOIN linkml_enums e ON ev.enum_id = e.id
JOIN linkml_schema_versions v ON e.version_id = v.id
WHERE v.is_current = TRUE
ORDER BY e.enum_name, ev.value_order;
-- ============================================================================
-- Functions for common queries
-- ============================================================================
-- Function: Get all slots for a class (including inherited)
CREATE OR REPLACE FUNCTION get_class_slots(p_class_name VARCHAR, p_version_id INTEGER DEFAULT NULL)
RETURNS TABLE (
slot_name VARCHAR,
range VARCHAR,
slot_uri TEXT,
required BOOLEAN,
multivalued BOOLEAN,
description TEXT,
inherited_from VARCHAR
) AS $$
WITH RECURSIVE class_hierarchy AS (
-- Base case: the class itself
SELECT id, class_name, is_a, version_id, class_name AS source_class
FROM linkml_classes
WHERE class_name = p_class_name
AND (p_version_id IS NULL OR version_id = p_version_id)
AND (p_version_id IS NOT NULL OR version_id = (SELECT id FROM linkml_schema_versions WHERE is_current = TRUE))
UNION ALL
-- Recursive case: parent classes
SELECT c.id, c.class_name, c.is_a, c.version_id, h.source_class
FROM linkml_classes c
JOIN class_hierarchy h ON c.class_name = h.is_a AND c.version_id = h.version_id
)
SELECT
s.slot_name,
s.range,
s.slot_uri,
s.required,
s.multivalued,
s.description,
CASE WHEN ch.class_name = p_class_name THEN NULL ELSE ch.class_name END AS inherited_from
FROM class_hierarchy ch
JOIN linkml_class_slots cs ON cs.class_id = ch.id
JOIN linkml_slots s ON cs.slot_id = s.id
ORDER BY ch.class_name = p_class_name DESC, s.slot_name;
$$ LANGUAGE SQL;
-- Function: Get class inheritance chain
CREATE OR REPLACE FUNCTION get_class_inheritance(p_class_name VARCHAR, p_version_id INTEGER DEFAULT NULL)
RETURNS TABLE (
level INTEGER,
class_name VARCHAR,
class_uri TEXT,
abstract BOOLEAN
) AS $$
WITH RECURSIVE inheritance AS (
SELECT id, class_name, is_a, class_uri, abstract, version_id, 0 AS level
FROM linkml_classes
WHERE class_name = p_class_name
AND (p_version_id IS NULL OR version_id = p_version_id)
AND (p_version_id IS NOT NULL OR version_id = (SELECT id FROM linkml_schema_versions WHERE is_current = TRUE))
UNION ALL
SELECT c.id, c.class_name, c.is_a, c.class_uri, c.abstract, c.version_id, i.level + 1
FROM linkml_classes c
JOIN inheritance i ON c.class_name = i.is_a AND c.version_id = i.version_id
)
SELECT level, class_name, class_uri, abstract
FROM inheritance
ORDER BY level;
$$ LANGUAGE SQL;
-- Function: Search across all schema elements
CREATE OR REPLACE FUNCTION search_linkml_schema(p_query TEXT, p_version_id INTEGER DEFAULT NULL)
RETURNS TABLE (
element_type VARCHAR,
element_name VARCHAR,
element_uri TEXT,
description TEXT,
rank REAL
) AS $$
DECLARE
v_version_id INTEGER;
BEGIN
-- Get version ID (current if not specified)
IF p_version_id IS NULL THEN
SELECT id INTO v_version_id FROM linkml_schema_versions WHERE is_current = TRUE;
ELSE
v_version_id := p_version_id;
END IF;
RETURN QUERY
-- Search classes
SELECT
'class'::VARCHAR,
c.class_name,
c.class_uri,
c.description,
ts_rank(to_tsvector('english', coalesce(c.class_name, '') || ' ' || coalesce(c.title, '') || ' ' || coalesce(c.description, '')),
plainto_tsquery('english', p_query)) AS rank
FROM linkml_classes c
WHERE c.version_id = v_version_id
AND to_tsvector('english', coalesce(c.class_name, '') || ' ' || coalesce(c.title, '') || ' ' || coalesce(c.description, ''))
@@ plainto_tsquery('english', p_query)
UNION ALL
-- Search slots
SELECT
'slot'::VARCHAR,
s.slot_name,
s.slot_uri,
s.description,
ts_rank(to_tsvector('english', coalesce(s.slot_name, '') || ' ' || coalesce(s.description, '')),
plainto_tsquery('english', p_query)) AS rank
FROM linkml_slots s
WHERE s.version_id = v_version_id
AND to_tsvector('english', coalesce(s.slot_name, '') || ' ' || coalesce(s.description, ''))
@@ plainto_tsquery('english', p_query)
UNION ALL
-- Search enums
SELECT
'enum'::VARCHAR,
e.enum_name,
e.enum_id,
e.description,
ts_rank(to_tsvector('english', coalesce(e.enum_name, '') || ' ' || coalesce(e.title, '') || ' ' || coalesce(e.description, '')),
plainto_tsquery('english', p_query)) AS rank
FROM linkml_enums e
WHERE e.version_id = v_version_id
AND to_tsvector('english', coalesce(e.enum_name, '') || ' ' || coalesce(e.title, '') || ' ' || coalesce(e.description, ''))
@@ plainto_tsquery('english', p_query)
ORDER BY rank DESC;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- Statistics View
-- ============================================================================
CREATE OR REPLACE VIEW linkml_schema_stats AS
SELECT
v.version,
v.schema_name,
v.is_current,
v.created_at,
(SELECT COUNT(*) FROM linkml_classes WHERE version_id = v.id) AS class_count,
(SELECT COUNT(*) FROM linkml_slots WHERE version_id = v.id) AS slot_count,
(SELECT COUNT(*) FROM linkml_enums WHERE version_id = v.id) AS enum_count,
(SELECT COUNT(*) FROM linkml_enum_values ev
JOIN linkml_enums e ON ev.enum_id = e.id
WHERE e.version_id = v.id) AS enum_value_count,
(SELECT COUNT(*) FROM linkml_prefixes WHERE version_id = v.id) AS prefix_count
FROM linkml_schema_versions v
ORDER BY v.created_at DESC;
-- ============================================================================
-- Permissions
-- ============================================================================
-- Grant access to the glam_api user
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO glam_api;
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO glam_api;
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO glam_api;
-- Commit
COMMENT ON TABLE linkml_schema_versions IS 'Tracks LinkML schema versions loaded into the database';
COMMENT ON TABLE linkml_classes IS 'LinkML class definitions with ontology mappings';
COMMENT ON TABLE linkml_slots IS 'LinkML slot (property) definitions';
COMMENT ON TABLE linkml_enums IS 'LinkML enumeration definitions';
COMMENT ON TABLE linkml_enum_values IS 'Permissible values for LinkML enumerations';
COMMENT ON VIEW linkml_schema_stats IS 'Summary statistics for each schema version';