-- LinkML Schema Storage for Heritage Custodian Ontology -- Migration: 001_linkml_schema.sql -- Created: 2025-12-06 -- -- Stores LinkML schema elements (classes, slots, enums) with version tracking -- Enables querying schema structure via SQL and exposing via REST API -- ============================================================================ -- Schema Version Tracking -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_schema_versions ( id SERIAL PRIMARY KEY, version VARCHAR(50) NOT NULL UNIQUE, -- e.g., "20251121", "v0.9.6" schema_name VARCHAR(255) NOT NULL, -- e.g., "heritage_custodian" description TEXT, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), is_current BOOLEAN DEFAULT FALSE, -- Only one version can be current source_path TEXT, -- e.g., "schemas/20251121/linkml/" git_commit VARCHAR(40), -- Git SHA for traceability loaded_by VARCHAR(255), -- Who loaded this version metadata JSONB DEFAULT '{}'::jsonb -- Additional metadata ); -- Ensure only one current version CREATE UNIQUE INDEX IF NOT EXISTS idx_linkml_schema_versions_current ON linkml_schema_versions (is_current) WHERE is_current = TRUE; -- ============================================================================ -- Classes Table -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_classes ( id SERIAL PRIMARY KEY, version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE, -- Core identity class_name VARCHAR(255) NOT NULL, -- e.g., "Custodian", "WebPortal" class_id TEXT NOT NULL, -- Full URI, e.g., "https://nde.nl/ontology/hc/class/Custodian" title VARCHAR(500), -- Schema relationships is_a VARCHAR(255), -- Parent class name class_uri TEXT, -- Ontology mapping, e.g., "crm:E39_Actor" abstract BOOLEAN DEFAULT FALSE, -- Documentation description TEXT, comments TEXT[], -- Array of comment strings -- Ontology mappings (arrays of URIs) exact_mappings TEXT[], close_mappings TEXT[], broad_mappings TEXT[], narrow_mappings TEXT[], -- Raw YAML content for full fidelity yaml_content TEXT, -- Metadata created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), -- Constraints UNIQUE(version_id, class_name) ); CREATE INDEX IF NOT EXISTS idx_linkml_classes_name ON linkml_classes(class_name); CREATE INDEX IF NOT EXISTS idx_linkml_classes_version ON linkml_classes(version_id); CREATE INDEX IF NOT EXISTS idx_linkml_classes_is_a ON linkml_classes(is_a); CREATE INDEX IF NOT EXISTS idx_linkml_classes_class_uri ON linkml_classes(class_uri); -- Full-text search on class names and descriptions CREATE INDEX IF NOT EXISTS idx_linkml_classes_fts ON linkml_classes USING gin(to_tsvector('english', coalesce(class_name, '') || ' ' || coalesce(title, '') || ' ' || coalesce(description, ''))); -- ============================================================================ -- Slots Table -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_slots ( id SERIAL PRIMARY KEY, version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE, -- Core identity slot_name VARCHAR(255) NOT NULL, -- e.g., "preferred_label", "hc_id" slot_id TEXT NOT NULL, -- Full URI -- Type information range VARCHAR(255), -- Target type (class, enum, or primitive) slot_uri TEXT, -- Ontology property mapping -- Cardinality and constraints required BOOLEAN DEFAULT FALSE, multivalued BOOLEAN DEFAULT FALSE, identifier BOOLEAN DEFAULT FALSE, -- Is this the class identifier? inlined BOOLEAN, inlined_as_list BOOLEAN, -- Validation pattern TEXT, -- Regex pattern for validation minimum_value NUMERIC, maximum_value NUMERIC, -- Documentation description TEXT, comments TEXT[], examples JSONB, -- Array of example objects -- Raw YAML content yaml_content TEXT, -- Metadata created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(version_id, slot_name) ); CREATE INDEX IF NOT EXISTS idx_linkml_slots_name ON linkml_slots(slot_name); CREATE INDEX IF NOT EXISTS idx_linkml_slots_version ON linkml_slots(version_id); CREATE INDEX IF NOT EXISTS idx_linkml_slots_range ON linkml_slots(range); CREATE INDEX IF NOT EXISTS idx_linkml_slots_slot_uri ON linkml_slots(slot_uri); -- Full-text search on slots CREATE INDEX IF NOT EXISTS idx_linkml_slots_fts ON linkml_slots USING gin(to_tsvector('english', coalesce(slot_name, '') || ' ' || coalesce(description, ''))); -- ============================================================================ -- Class-Slot Association (which slots belong to which classes) -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_class_slots ( id SERIAL PRIMARY KEY, version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE, class_id INTEGER NOT NULL REFERENCES linkml_classes(id) ON DELETE CASCADE, slot_id INTEGER NOT NULL REFERENCES linkml_slots(id) ON DELETE CASCADE, -- Slot usage overrides (class-specific customization) slot_usage JSONB, -- Overrides from class slot_usage section -- Ordering slot_order INTEGER, -- Order within the class definition created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(version_id, class_id, slot_id) ); CREATE INDEX IF NOT EXISTS idx_linkml_class_slots_class ON linkml_class_slots(class_id); CREATE INDEX IF NOT EXISTS idx_linkml_class_slots_slot ON linkml_class_slots(slot_id); -- ============================================================================ -- Enums Table -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_enums ( id SERIAL PRIMARY KEY, version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE, -- Core identity enum_name VARCHAR(255) NOT NULL, -- e.g., "CustodianPrimaryTypeEnum" enum_id TEXT NOT NULL, -- Full URI title VARCHAR(500), -- Documentation description TEXT, comments TEXT[], -- Raw YAML content yaml_content TEXT, -- Metadata created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(version_id, enum_name) ); CREATE INDEX IF NOT EXISTS idx_linkml_enums_name ON linkml_enums(enum_name); CREATE INDEX IF NOT EXISTS idx_linkml_enums_version ON linkml_enums(version_id); -- ============================================================================ -- Enum Values Table -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_enum_values ( id SERIAL PRIMARY KEY, enum_id INTEGER NOT NULL REFERENCES linkml_enums(id) ON DELETE CASCADE, -- Value identity value_name VARCHAR(255) NOT NULL, -- e.g., "GALLERY", "MUSEUM" -- Semantics meaning TEXT, -- Wikidata or other URI description TEXT, comments TEXT[], -- Ordering value_order INTEGER, -- Metadata created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(enum_id, value_name) ); CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_enum ON linkml_enum_values(enum_id); CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_name ON linkml_enum_values(value_name); CREATE INDEX IF NOT EXISTS idx_linkml_enum_values_meaning ON linkml_enum_values(meaning); -- ============================================================================ -- Prefixes Table (namespace definitions) -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_prefixes ( id SERIAL PRIMARY KEY, version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE, prefix VARCHAR(50) NOT NULL, -- e.g., "crm", "schema", "hc" uri TEXT NOT NULL, -- e.g., "http://www.cidoc-crm.org/cidoc-crm/" created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(version_id, prefix) ); CREATE INDEX IF NOT EXISTS idx_linkml_prefixes_version ON linkml_prefixes(version_id); CREATE INDEX IF NOT EXISTS idx_linkml_prefixes_prefix ON linkml_prefixes(prefix); -- ============================================================================ -- Imports Table (schema dependencies) -- ============================================================================ CREATE TABLE IF NOT EXISTS linkml_imports ( id SERIAL PRIMARY KEY, version_id INTEGER NOT NULL REFERENCES linkml_schema_versions(id) ON DELETE CASCADE, class_id INTEGER REFERENCES linkml_classes(id) ON DELETE CASCADE, -- NULL = schema-level import import_path TEXT NOT NULL, -- e.g., "linkml:types", "../slots/hc_id" import_type VARCHAR(50), -- "schema", "class", "slot", "enum" created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(version_id, class_id, import_path) ); CREATE INDEX IF NOT EXISTS idx_linkml_imports_version ON linkml_imports(version_id); CREATE INDEX IF NOT EXISTS idx_linkml_imports_class ON linkml_imports(class_id); -- ============================================================================ -- Views for convenient querying -- ============================================================================ -- View: Current schema version classes CREATE OR REPLACE VIEW linkml_current_classes AS SELECT c.* FROM linkml_classes c JOIN linkml_schema_versions v ON c.version_id = v.id WHERE v.is_current = TRUE; -- View: Current schema version slots CREATE OR REPLACE VIEW linkml_current_slots AS SELECT s.* FROM linkml_slots s JOIN linkml_schema_versions v ON s.version_id = v.id WHERE v.is_current = TRUE; -- View: Current schema version enums CREATE OR REPLACE VIEW linkml_current_enums AS SELECT e.* FROM linkml_enums e JOIN linkml_schema_versions v ON e.version_id = v.id WHERE v.is_current = TRUE; -- View: Class hierarchy (with parent class resolved) CREATE OR REPLACE VIEW linkml_class_hierarchy AS SELECT c.id, c.class_name, c.is_a AS parent_class_name, p.id AS parent_class_id, c.class_uri, c.abstract, c.title, c.description, v.version FROM linkml_classes c JOIN linkml_schema_versions v ON c.version_id = v.id LEFT JOIN linkml_classes p ON c.is_a = p.class_name AND c.version_id = p.version_id WHERE v.is_current = TRUE; -- View: Slots with their classes CREATE OR REPLACE VIEW linkml_slots_by_class AS SELECT c.class_name, s.slot_name, s.range, s.slot_uri, s.required, s.multivalued, s.description, cs.slot_usage FROM linkml_class_slots cs JOIN linkml_classes c ON cs.class_id = c.id JOIN linkml_slots s ON cs.slot_id = s.id JOIN linkml_schema_versions v ON cs.version_id = v.id WHERE v.is_current = TRUE ORDER BY c.class_name, cs.slot_order; -- View: Enum values expanded CREATE OR REPLACE VIEW linkml_enum_values_expanded AS SELECT e.enum_name, ev.value_name, ev.meaning, ev.description, ev.comments, v.version FROM linkml_enum_values ev JOIN linkml_enums e ON ev.enum_id = e.id JOIN linkml_schema_versions v ON e.version_id = v.id WHERE v.is_current = TRUE ORDER BY e.enum_name, ev.value_order; -- ============================================================================ -- Functions for common queries -- ============================================================================ -- Function: Get all slots for a class (including inherited) CREATE OR REPLACE FUNCTION get_class_slots(p_class_name VARCHAR, p_version_id INTEGER DEFAULT NULL) RETURNS TABLE ( slot_name VARCHAR, range VARCHAR, slot_uri TEXT, required BOOLEAN, multivalued BOOLEAN, description TEXT, inherited_from VARCHAR ) AS $$ WITH RECURSIVE class_hierarchy AS ( -- Base case: the class itself SELECT id, class_name, is_a, version_id, class_name AS source_class FROM linkml_classes WHERE class_name = p_class_name AND (p_version_id IS NULL OR version_id = p_version_id) AND (p_version_id IS NOT NULL OR version_id = (SELECT id FROM linkml_schema_versions WHERE is_current = TRUE)) UNION ALL -- Recursive case: parent classes SELECT c.id, c.class_name, c.is_a, c.version_id, h.source_class FROM linkml_classes c JOIN class_hierarchy h ON c.class_name = h.is_a AND c.version_id = h.version_id ) SELECT s.slot_name, s.range, s.slot_uri, s.required, s.multivalued, s.description, CASE WHEN ch.class_name = p_class_name THEN NULL ELSE ch.class_name END AS inherited_from FROM class_hierarchy ch JOIN linkml_class_slots cs ON cs.class_id = ch.id JOIN linkml_slots s ON cs.slot_id = s.id ORDER BY ch.class_name = p_class_name DESC, s.slot_name; $$ LANGUAGE SQL; -- Function: Get class inheritance chain CREATE OR REPLACE FUNCTION get_class_inheritance(p_class_name VARCHAR, p_version_id INTEGER DEFAULT NULL) RETURNS TABLE ( level INTEGER, class_name VARCHAR, class_uri TEXT, abstract BOOLEAN ) AS $$ WITH RECURSIVE inheritance AS ( SELECT id, class_name, is_a, class_uri, abstract, version_id, 0 AS level FROM linkml_classes WHERE class_name = p_class_name AND (p_version_id IS NULL OR version_id = p_version_id) AND (p_version_id IS NOT NULL OR version_id = (SELECT id FROM linkml_schema_versions WHERE is_current = TRUE)) UNION ALL SELECT c.id, c.class_name, c.is_a, c.class_uri, c.abstract, c.version_id, i.level + 1 FROM linkml_classes c JOIN inheritance i ON c.class_name = i.is_a AND c.version_id = i.version_id ) SELECT level, class_name, class_uri, abstract FROM inheritance ORDER BY level; $$ LANGUAGE SQL; -- Function: Search across all schema elements CREATE OR REPLACE FUNCTION search_linkml_schema(p_query TEXT, p_version_id INTEGER DEFAULT NULL) RETURNS TABLE ( element_type VARCHAR, element_name VARCHAR, element_uri TEXT, description TEXT, rank REAL ) AS $$ DECLARE v_version_id INTEGER; BEGIN -- Get version ID (current if not specified) IF p_version_id IS NULL THEN SELECT id INTO v_version_id FROM linkml_schema_versions WHERE is_current = TRUE; ELSE v_version_id := p_version_id; END IF; RETURN QUERY -- Search classes SELECT 'class'::VARCHAR, c.class_name, c.class_uri, c.description, ts_rank(to_tsvector('english', coalesce(c.class_name, '') || ' ' || coalesce(c.title, '') || ' ' || coalesce(c.description, '')), plainto_tsquery('english', p_query)) AS rank FROM linkml_classes c WHERE c.version_id = v_version_id AND to_tsvector('english', coalesce(c.class_name, '') || ' ' || coalesce(c.title, '') || ' ' || coalesce(c.description, '')) @@ plainto_tsquery('english', p_query) UNION ALL -- Search slots SELECT 'slot'::VARCHAR, s.slot_name, s.slot_uri, s.description, ts_rank(to_tsvector('english', coalesce(s.slot_name, '') || ' ' || coalesce(s.description, '')), plainto_tsquery('english', p_query)) AS rank FROM linkml_slots s WHERE s.version_id = v_version_id AND to_tsvector('english', coalesce(s.slot_name, '') || ' ' || coalesce(s.description, '')) @@ plainto_tsquery('english', p_query) UNION ALL -- Search enums SELECT 'enum'::VARCHAR, e.enum_name, e.enum_id, e.description, ts_rank(to_tsvector('english', coalesce(e.enum_name, '') || ' ' || coalesce(e.title, '') || ' ' || coalesce(e.description, '')), plainto_tsquery('english', p_query)) AS rank FROM linkml_enums e WHERE e.version_id = v_version_id AND to_tsvector('english', coalesce(e.enum_name, '') || ' ' || coalesce(e.title, '') || ' ' || coalesce(e.description, '')) @@ plainto_tsquery('english', p_query) ORDER BY rank DESC; END; $$ LANGUAGE plpgsql; -- ============================================================================ -- Statistics View -- ============================================================================ CREATE OR REPLACE VIEW linkml_schema_stats AS SELECT v.version, v.schema_name, v.is_current, v.created_at, (SELECT COUNT(*) FROM linkml_classes WHERE version_id = v.id) AS class_count, (SELECT COUNT(*) FROM linkml_slots WHERE version_id = v.id) AS slot_count, (SELECT COUNT(*) FROM linkml_enums WHERE version_id = v.id) AS enum_count, (SELECT COUNT(*) FROM linkml_enum_values ev JOIN linkml_enums e ON ev.enum_id = e.id WHERE e.version_id = v.id) AS enum_value_count, (SELECT COUNT(*) FROM linkml_prefixes WHERE version_id = v.id) AS prefix_count FROM linkml_schema_versions v ORDER BY v.created_at DESC; -- ============================================================================ -- Permissions -- ============================================================================ -- Grant access to the glam_api user GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO glam_api; GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO glam_api; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO glam_api; -- Commit COMMENT ON TABLE linkml_schema_versions IS 'Tracks LinkML schema versions loaded into the database'; COMMENT ON TABLE linkml_classes IS 'LinkML class definitions with ontology mappings'; COMMENT ON TABLE linkml_slots IS 'LinkML slot (property) definitions'; COMMENT ON TABLE linkml_enums IS 'LinkML enumeration definitions'; COMMENT ON TABLE linkml_enum_values IS 'Permissible values for LinkML enumerations'; COMMENT ON VIEW linkml_schema_stats IS 'Summary statistics for each schema version';