glam/data/isil/nl/nan/linkml/schema.yaml
2025-11-19 23:25:22 +01:00

252 lines
7.7 KiB
YAML

id: https://w3id.org/heritage/isil/nl/nan/schema
name: dutch-national-archive-isil-schema
title: Dutch National Archive ISIL Registry - LinkML Schema
description: >-
LinkML schema for mapping Dutch National Archive ISIL registry CSV data
to HeritageCustodian schema. This schema documents the transformation from
the ISIL-codes_2025-11-06.csv file (371 records) to LinkML-compliant YAML.
version: 1.0.0
license: CC0-1.0
prefixes:
linkml: https://w3id.org/linkml/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
isil: https://isil.org/
hc: https://w3id.org/heritage/custodian/
default_prefix: hc
default_range: string
imports:
- linkml:types
classes:
ISILRegistryRecord:
description: >-
A record from the Dutch National Archive ISIL registry CSV.
Source: https://www.nationaalarchief.nl/isil
attributes:
# Original CSV fields (preserved with csv_ prefix)
csv_row_number:
description: Sequential row number in original CSV (1-371)
range: integer
required: true
csv_plaats:
description: City/location name from CSV "Plaats" column
range: string
required: true
examples:
- value: Amsterdam
- value: Den Haag
- value: Leiden
csv_instelling:
description: Institution name from CSV "Instelling" column
range: string
required: true
examples:
- value: Rijksmuseum
- value: Stadsarchief Amsterdam
- value: KB, nationale bibliotheek
csv_isil_code:
description: >-
ISIL code from CSV "ISIL code" column.
Format: NL-{CityAbbrev}{InstitutionAbbrev}
Length: 7-17 characters (variable)
range: string
required: true
pattern: "^NL-[A-Za-z0-9]+"
examples:
- value: NL-AsdRM
- value: NL-HaNa
- value: NL-LlsBatavialand
csv_toegekend_op:
description: >-
Assignment date from CSV "Toegekend op" column.
Format: YYYY-MM-DD
Range: 2008-10-10 to 2025-09-18
range: date
examples:
- value: "2013-03-07"
- value: "2025-09-18"
csv_opmerking:
description: >-
Remarks/notes from CSV "Opmerking" column.
Contains organizational history (mergers, name changes, closures).
Present in 18/371 records (4.9%).
range: string
examples:
- value: "n.b. in 2020 ontstaan uit een fusie tussen het RHCL en Rijckheyt"
- value: "n.b. Per 2021-10-08 in onbruik a.g.v. naamswijziging / fusie"
# LinkML mapped fields
name:
description: Institution name (mapped from csv_instelling)
range: string
required: true
slot_uri: schema:name
institution_type:
description: >-
Institution type classification (not in original CSV,
would need manual classification or NLP)
range: InstitutionTypeEnum
examples:
- value: MUSEUM
- value: ARCHIVE
- value: LIBRARY
locations:
description: Geographic location (mapped from csv_plaats)
range: Location
multivalued: true
required: true
identifiers:
description: ISIL identifier (mapped from csv_isil_code)
range: Identifier
multivalued: true
required: true
description:
description: >-
Generated from csv_opmerking when present.
Format: "Opmerking: {csv_opmerking}"
range: string
slot_uri: dcterms:description
provenance:
description: Data source provenance metadata
range: Provenance
required: true
Location:
description: Geographic location of the institution
attributes:
city:
description: City name (from csv_plaats)
range: string
required: true
slot_uri: schema:addressLocality
country:
description: ISO 3166-1 alpha-2 country code (always "NL")
range: string
required: true
pattern: "^NL$"
slot_uri: schema:addressCountry
Identifier:
description: ISIL code identifier structure
attributes:
identifier_scheme:
description: Identifier scheme name (always "ISIL")
range: string
required: true
pattern: "^ISIL$"
slot_uri: dcterms:type
identifier_value:
description: ISIL code value (from csv_isil_code)
range: string
required: true
pattern: "^NL-[A-Za-z0-9]+"
slot_uri: dcterms:identifier
identifier_url:
description: URL to ISIL registry entry
range: uri
required: true
pattern: "^https://isil.org/NL-"
slot_uri: schema:url
assigned_date:
description: Date ISIL code was assigned (from csv_toegekend_op)
range: date
slot_uri: dcterms:issued
Provenance:
description: Data source and extraction metadata
attributes:
data_source:
description: Source type (always "ISIL_REGISTRY")
range: DataSourceEnum
required: true
data_tier:
description: Data quality tier (always "TIER_1_AUTHORITATIVE")
range: DataTierEnum
required: true
extraction_date:
description: Timestamp of CSV-to-YAML conversion
range: datetime
required: true
slot_uri: dcterms:created
extraction_method:
description: Conversion method description
range: string
required: true
source_url:
description: URL to ISIL registry website
range: uri
required: true
confidence_score:
description: Data confidence (always 1.0 for authoritative source)
range: float
required: true
minimum_value: 1.0
maximum_value: 1.0
enums:
InstitutionTypeEnum:
description: GLAMORCUBESFIXPHDNT institution type taxonomy
permissible_values:
GALLERY:
description: Art gallery or exhibition space
LIBRARY:
description: Library (public, academic, specialized)
ARCHIVE:
description: Archive (government, corporate, personal)
MUSEUM:
description: Museum (art, history, science, etc.)
OFFICIAL_INSTITUTION:
description: Government heritage agencies
RESEARCH_CENTER:
description: Research institutes and documentation centers
DataSourceEnum:
description: Data source types
permissible_values:
ISIL_REGISTRY:
description: Official ISIL code registry
DataTierEnum:
description: Data quality tiers
permissible_values:
TIER_1_AUTHORITATIVE:
description: Authoritative primary sources (registries, databases)
# Transformation rules
comments:
- "CSV Parsing: File uses latin-1 encoding, malformed structure with fields in single cell"
- "CSV Structure: All fields separated by '\",'\" pattern, trailing semicolons removed"
- "Header Row: Contains sequence number as first field, actual headers start at index 1"
- "Field Preservation: All 6 CSV fields preserved with csv_ prefix (100% preservation)"
- "Mapping: csv_instelling → name, csv_plaats → locations[0].city, csv_isil_code → identifiers[0]"
- "Organizational History: csv_opmerking contains merger/closure info (18 records)"
- "Date Format: csv_toegekend_op converted from string to date type"
- "ISIL URL: Generated as https://isil.org/{csv_isil_code}"
- "Provenance: All records marked as TIER_1_AUTHORITATIVE with confidence 1.0"
- "Total Records: 371 institutions (201 unique cities)"