glam/backend/postgres/schema_geo.sql
2025-12-10 13:01:13 +01:00

408 lines
13 KiB
PL/PgSQL

-- ============================================================================
-- PostGIS Schema for GLAM Heritage Institutions
-- Database: glam_geo
-- ============================================================================
-- Enable PostGIS extensions (should already be enabled)
CREATE EXTENSION IF NOT EXISTS postgis;
CREATE EXTENSION IF NOT EXISTS postgis_topology;
-- ============================================================================
-- Administrative Boundaries
-- ============================================================================
-- Provinces (admin level 1)
CREATE TABLE IF NOT EXISTS provinces (
id SERIAL PRIMARY KEY,
province_code VARCHAR(10) NOT NULL UNIQUE, -- e.g., "PV27" for Noord-Holland
iso_code VARCHAR(5), -- ISO 3166-2 code, e.g., "NH"
name VARCHAR(100) NOT NULL,
name_local VARCHAR(100),
country_code CHAR(2) NOT NULL DEFAULT 'NL',
geom GEOMETRY(MULTIPOLYGON, 4326),
centroid GEOMETRY(POINT, 4326),
area_km2 NUMERIC(12, 2),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_provinces_geom ON provinces USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_provinces_country ON provinces (country_code);
-- Municipalities (admin level 2)
CREATE TABLE IF NOT EXISTS municipalities (
id SERIAL PRIMARY KEY,
municipality_code VARCHAR(10) NOT NULL, -- CBS code
name VARCHAR(100) NOT NULL,
name_local VARCHAR(100),
province_id INTEGER REFERENCES provinces(id),
country_code CHAR(2) NOT NULL DEFAULT 'NL',
geom GEOMETRY(MULTIPOLYGON, 4326),
centroid GEOMETRY(POINT, 4326),
area_km2 NUMERIC(12, 2),
population INTEGER,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(country_code, municipality_code)
);
CREATE INDEX IF NOT EXISTS idx_municipalities_geom ON municipalities USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_municipalities_province ON municipalities (province_id);
-- Historical boundaries (for historical maps)
CREATE TABLE IF NOT EXISTS historical_boundaries (
id SERIAL PRIMARY KEY,
boundary_code VARCHAR(50) NOT NULL,
name VARCHAR(200) NOT NULL,
name_local VARCHAR(200),
boundary_type VARCHAR(50) NOT NULL, -- 'territory', 'county', 'diocese', etc.
valid_from DATE, -- Start of validity period
valid_to DATE, -- End of validity period (NULL = current)
reference_year INTEGER NOT NULL, -- e.g., 1500 for historical map
country_code CHAR(2) NOT NULL DEFAULT 'NL',
geom GEOMETRY(MULTIPOLYGON, 4326),
centroid GEOMETRY(POINT, 4326),
area_km2 NUMERIC(12, 2),
source_dataset VARCHAR(100),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_historical_geom ON historical_boundaries USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_historical_year ON historical_boundaries (reference_year);
CREATE INDEX IF NOT EXISTS idx_historical_type ON historical_boundaries (boundary_type);
-- ============================================================================
-- Heritage Institutions
-- ============================================================================
-- Institution types enum
CREATE TYPE institution_type AS ENUM (
'G', -- Gallery
'L', -- Library
'A', -- Archive
'M', -- Museum
'O', -- Official institution
'R', -- Research center
'C', -- Corporation
'U', -- Unknown
'B', -- Botanical/Zoo
'E', -- Education provider
'S', -- Collecting society
'F', -- Features
'I', -- Intangible heritage group
'X', -- Mixed
'P', -- Personal collection
'H', -- Holy sites
'D', -- Digital platform
'N', -- NGO
'T' -- Taste/smell heritage
);
-- Main institutions table
CREATE TABLE IF NOT EXISTS institutions (
id SERIAL PRIMARY KEY,
ghcid_current VARCHAR(50) NOT NULL UNIQUE, -- e.g., "NL-NH-AMS-M-RM"
ghcid_uuid UUID NOT NULL UNIQUE,
ghcid_numeric NUMERIC(20, 0), -- Larger than BIGINT to handle 64-bit unsigned hashes
-- Names
name VARCHAR(500) NOT NULL,
name_verified VARCHAR(500),
name_source VARCHAR(50),
-- Type and classification
institution_type institution_type NOT NULL,
type_name VARCHAR(100),
wikidata_types TEXT[],
-- Location
geom GEOMETRY(POINT, 4326),
address TEXT,
city VARCHAR(100),
province VARCHAR(100),
province_id INTEGER REFERENCES provinces(id),
municipality_id INTEGER REFERENCES municipalities(id),
country_code CHAR(2) NOT NULL DEFAULT 'NL',
-- Metadata
description TEXT,
website VARCHAR(500),
phone VARCHAR(50),
-- External identifiers
wikidata_id VARCHAR(20),
google_place_id VARCHAR(100),
isil_code VARCHAR(20),
-- Enrichment data (JSONB for flexibility)
reviews JSONB,
rating NUMERIC(2, 1),
total_ratings INTEGER,
photos JSONB,
-- Genealogy/Archive linkage
genealogiewerkbalk JSONB,
-- Business status
business_status VARCHAR(50),
founding_year INTEGER,
founding_decade INTEGER,
-- Timestamps
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_institutions_geom ON institutions USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_institutions_ghcid ON institutions (ghcid_current);
CREATE INDEX IF NOT EXISTS idx_institutions_type ON institutions (institution_type);
CREATE INDEX IF NOT EXISTS idx_institutions_province ON institutions (province_id);
CREATE INDEX IF NOT EXISTS idx_institutions_municipality ON institutions (municipality_id);
CREATE INDEX IF NOT EXISTS idx_institutions_wikidata ON institutions (wikidata_id);
CREATE INDEX IF NOT EXISTS idx_institutions_country ON institutions (country_code);
-- Full text search on names and descriptions
CREATE INDEX IF NOT EXISTS idx_institutions_name_fts
ON institutions USING GIN (to_tsvector('simple', name || ' ' || COALESCE(description, '')));
-- ============================================================================
-- Archive Service Areas (werkgebied mapping)
-- ============================================================================
CREATE TABLE IF NOT EXISTS archive_service_areas (
id SERIAL PRIMARY KEY,
archive_isil VARCHAR(20) NOT NULL,
archive_name VARCHAR(200) NOT NULL,
archive_website VARCHAR(500),
municipality_code VARCHAR(10) NOT NULL,
municipality_name VARCHAR(100) NOT NULL,
is_municipal BOOLEAN DEFAULT FALSE,
is_provincial BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(archive_isil, municipality_code)
);
CREATE INDEX IF NOT EXISTS idx_archive_service_isil ON archive_service_areas (archive_isil);
CREATE INDEX IF NOT EXISTS idx_archive_service_municipality ON archive_service_areas (municipality_code);
-- ============================================================================
-- Spatial Query Functions
-- ============================================================================
-- Find institutions within a bounding box
CREATE OR REPLACE FUNCTION find_institutions_in_bbox(
min_lon FLOAT,
min_lat FLOAT,
max_lon FLOAT,
max_lat FLOAT,
inst_type institution_type DEFAULT NULL,
max_results INTEGER DEFAULT 1000
)
RETURNS TABLE (
id INTEGER,
ghcid_current VARCHAR,
name VARCHAR,
institution_type institution_type,
type_name VARCHAR,
lon FLOAT,
lat FLOAT,
city VARCHAR,
province VARCHAR,
rating NUMERIC,
wikidata_id VARCHAR
) AS $$
BEGIN
RETURN QUERY
SELECT
i.id,
i.ghcid_current,
i.name,
i.institution_type,
i.type_name,
ST_X(i.geom)::FLOAT as lon,
ST_Y(i.geom)::FLOAT as lat,
i.city,
i.province,
i.rating,
i.wikidata_id
FROM institutions i
WHERE i.geom && ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat, 4326)
AND (inst_type IS NULL OR i.institution_type = inst_type)
ORDER BY i.name
LIMIT max_results;
END;
$$ LANGUAGE plpgsql;
-- Find institutions within a radius of a point
CREATE OR REPLACE FUNCTION find_institutions_near_point(
lon FLOAT,
lat FLOAT,
radius_km FLOAT DEFAULT 10,
inst_type institution_type DEFAULT NULL,
max_results INTEGER DEFAULT 100
)
RETURNS TABLE (
id INTEGER,
ghcid_current VARCHAR,
name VARCHAR,
institution_type institution_type,
type_name VARCHAR,
distance_km FLOAT,
city VARCHAR,
province VARCHAR,
rating NUMERIC
) AS $$
BEGIN
RETURN QUERY
SELECT
i.id,
i.ghcid_current,
i.name,
i.institution_type,
i.type_name,
(ST_Distance(
i.geom::geography,
ST_SetSRID(ST_Point(lon, lat), 4326)::geography
) / 1000)::FLOAT as distance_km,
i.city,
i.province,
i.rating
FROM institutions i
WHERE ST_DWithin(
i.geom::geography,
ST_SetSRID(ST_Point(lon, lat), 4326)::geography,
radius_km * 1000
)
AND (inst_type IS NULL OR i.institution_type = inst_type)
ORDER BY distance_km
LIMIT max_results;
END;
$$ LANGUAGE plpgsql;
-- Find which municipality/province contains a point
CREATE OR REPLACE FUNCTION find_admin_for_point(
lon FLOAT,
lat FLOAT
)
RETURNS TABLE (
province_code VARCHAR,
province_name VARCHAR,
municipality_code VARCHAR,
municipality_name VARCHAR
) AS $$
BEGIN
RETURN QUERY
SELECT
p.province_code,
p.name as province_name,
m.municipality_code,
m.name as municipality_name
FROM municipalities m
JOIN provinces p ON m.province_id = p.id
WHERE ST_Contains(m.geom, ST_SetSRID(ST_Point(lon, lat), 4326))
LIMIT 1;
END;
$$ LANGUAGE plpgsql;
-- Get institutions by province as GeoJSON
CREATE OR REPLACE FUNCTION get_institutions_geojson(
province_filter VARCHAR DEFAULT NULL,
type_filter institution_type DEFAULT NULL
)
RETURNS JSON AS $$
BEGIN
RETURN (
SELECT json_build_object(
'type', 'FeatureCollection',
'features', COALESCE(json_agg(
json_build_object(
'type', 'Feature',
'geometry', ST_AsGeoJSON(i.geom)::json,
'properties', json_build_object(
'id', i.id,
'ghcid', i.ghcid_current,
'name', i.name,
'type', i.institution_type,
'type_name', i.type_name,
'city', i.city,
'province', i.province,
'rating', i.rating,
'wikidata_id', i.wikidata_id,
'website', i.website
)
)
), '[]')
)
FROM institutions i
WHERE (province_filter IS NULL OR i.province = province_filter)
AND (type_filter IS NULL OR i.institution_type = type_filter)
);
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- Views for common queries
-- ============================================================================
-- Institutions with province info
CREATE OR REPLACE VIEW v_institutions_with_admin AS
SELECT
i.*,
p.name as province_name,
p.iso_code as province_iso,
m.name as municipality_name
FROM institutions i
LEFT JOIN provinces p ON i.province_id = p.id
LEFT JOIN municipalities m ON i.municipality_id = m.id;
-- Institution counts by type and province
CREATE OR REPLACE VIEW v_institution_stats AS
SELECT
i.province,
i.institution_type,
i.type_name,
COUNT(*) as count,
ROUND(AVG(i.rating), 2) as avg_rating
FROM institutions i
WHERE i.geom IS NOT NULL
GROUP BY i.province, i.institution_type, i.type_name
ORDER BY i.province, count DESC;
-- ============================================================================
-- Metadata table
-- ============================================================================
CREATE TABLE IF NOT EXISTS geo_metadata (
id SERIAL PRIMARY KEY,
key VARCHAR(100) NOT NULL UNIQUE,
value TEXT,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
INSERT INTO geo_metadata (key, value) VALUES
('schema_version', '1.0.0'),
('created_at', NOW()::TEXT),
('description', 'PostGIS schema for GLAM heritage institutions')
ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value, updated_at = NOW();
-- ============================================================================
-- Grants (adjust user as needed)
-- ============================================================================
-- Create API user if not exists
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'glam_api') THEN
CREATE ROLE glam_api WITH LOGIN PASSWORD 'glam_secret_2025';
END IF;
END
$$;
-- Grant permissions
GRANT USAGE ON SCHEMA public TO glam_api;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO glam_api;
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO glam_api;
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO glam_api;
-- Allow inserts for data loading (revoke in production if needed)
GRANT INSERT, UPDATE ON institutions, provinces, municipalities, historical_boundaries, archive_service_areas TO glam_api;