252 lines
7.7 KiB
YAML
252 lines
7.7 KiB
YAML
id: https://w3id.org/heritage/isil/nl/nan/schema
|
|
name: dutch-national-archive-isil-schema
|
|
title: Dutch National Archive ISIL Registry - LinkML Schema
|
|
description: >-
|
|
LinkML schema for mapping Dutch National Archive ISIL registry CSV data
|
|
to HeritageCustodian schema. This schema documents the transformation from
|
|
the ISIL-codes_2025-11-06.csv file (371 records) to LinkML-compliant YAML.
|
|
|
|
version: 1.0.0
|
|
license: CC0-1.0
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
schema: http://schema.org/
|
|
dcterms: http://purl.org/dc/terms/
|
|
isil: https://isil.org/
|
|
hc: https://w3id.org/heritage/custodian/
|
|
|
|
default_prefix: hc
|
|
default_range: string
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
classes:
|
|
|
|
ISILRegistryRecord:
|
|
description: >-
|
|
A record from the Dutch National Archive ISIL registry CSV.
|
|
Source: https://www.nationaalarchief.nl/isil
|
|
attributes:
|
|
# Original CSV fields (preserved with csv_ prefix)
|
|
csv_row_number:
|
|
description: Sequential row number in original CSV (1-371)
|
|
range: integer
|
|
required: true
|
|
|
|
csv_plaats:
|
|
description: City/location name from CSV "Plaats" column
|
|
range: string
|
|
required: true
|
|
examples:
|
|
- value: Amsterdam
|
|
- value: Den Haag
|
|
- value: Leiden
|
|
|
|
csv_instelling:
|
|
description: Institution name from CSV "Instelling" column
|
|
range: string
|
|
required: true
|
|
examples:
|
|
- value: Rijksmuseum
|
|
- value: Stadsarchief Amsterdam
|
|
- value: KB, nationale bibliotheek
|
|
|
|
csv_isil_code:
|
|
description: >-
|
|
ISIL code from CSV "ISIL code" column.
|
|
Format: NL-{CityAbbrev}{InstitutionAbbrev}
|
|
Length: 7-17 characters (variable)
|
|
range: string
|
|
required: true
|
|
pattern: "^NL-[A-Za-z0-9]+"
|
|
examples:
|
|
- value: NL-AsdRM
|
|
- value: NL-HaNa
|
|
- value: NL-LlsBatavialand
|
|
|
|
csv_toegekend_op:
|
|
description: >-
|
|
Assignment date from CSV "Toegekend op" column.
|
|
Format: YYYY-MM-DD
|
|
Range: 2008-10-10 to 2025-09-18
|
|
range: date
|
|
examples:
|
|
- value: "2013-03-07"
|
|
- value: "2025-09-18"
|
|
|
|
csv_opmerking:
|
|
description: >-
|
|
Remarks/notes from CSV "Opmerking" column.
|
|
Contains organizational history (mergers, name changes, closures).
|
|
Present in 18/371 records (4.9%).
|
|
range: string
|
|
examples:
|
|
- value: "n.b. in 2020 ontstaan uit een fusie tussen het RHCL en Rijckheyt"
|
|
- value: "n.b. Per 2021-10-08 in onbruik a.g.v. naamswijziging / fusie"
|
|
|
|
# LinkML mapped fields
|
|
name:
|
|
description: Institution name (mapped from csv_instelling)
|
|
range: string
|
|
required: true
|
|
slot_uri: schema:name
|
|
|
|
institution_type:
|
|
description: >-
|
|
Institution type classification (not in original CSV,
|
|
would need manual classification or NLP)
|
|
range: InstitutionTypeEnum
|
|
examples:
|
|
- value: MUSEUM
|
|
- value: ARCHIVE
|
|
- value: LIBRARY
|
|
|
|
locations:
|
|
description: Geographic location (mapped from csv_plaats)
|
|
range: Location
|
|
multivalued: true
|
|
required: true
|
|
|
|
identifiers:
|
|
description: ISIL identifier (mapped from csv_isil_code)
|
|
range: Identifier
|
|
multivalued: true
|
|
required: true
|
|
|
|
description:
|
|
description: >-
|
|
Generated from csv_opmerking when present.
|
|
Format: "Opmerking: {csv_opmerking}"
|
|
range: string
|
|
slot_uri: dcterms:description
|
|
|
|
provenance:
|
|
description: Data source provenance metadata
|
|
range: Provenance
|
|
required: true
|
|
|
|
Location:
|
|
description: Geographic location of the institution
|
|
attributes:
|
|
city:
|
|
description: City name (from csv_plaats)
|
|
range: string
|
|
required: true
|
|
slot_uri: schema:addressLocality
|
|
|
|
country:
|
|
description: ISO 3166-1 alpha-2 country code (always "NL")
|
|
range: string
|
|
required: true
|
|
pattern: "^NL$"
|
|
slot_uri: schema:addressCountry
|
|
|
|
Identifier:
|
|
description: ISIL code identifier structure
|
|
attributes:
|
|
identifier_scheme:
|
|
description: Identifier scheme name (always "ISIL")
|
|
range: string
|
|
required: true
|
|
pattern: "^ISIL$"
|
|
slot_uri: dcterms:type
|
|
|
|
identifier_value:
|
|
description: ISIL code value (from csv_isil_code)
|
|
range: string
|
|
required: true
|
|
pattern: "^NL-[A-Za-z0-9]+"
|
|
slot_uri: dcterms:identifier
|
|
|
|
identifier_url:
|
|
description: URL to ISIL registry entry
|
|
range: uri
|
|
required: true
|
|
pattern: "^https://isil.org/NL-"
|
|
slot_uri: schema:url
|
|
|
|
assigned_date:
|
|
description: Date ISIL code was assigned (from csv_toegekend_op)
|
|
range: date
|
|
slot_uri: dcterms:issued
|
|
|
|
Provenance:
|
|
description: Data source and extraction metadata
|
|
attributes:
|
|
data_source:
|
|
description: Source type (always "ISIL_REGISTRY")
|
|
range: DataSourceEnum
|
|
required: true
|
|
|
|
data_tier:
|
|
description: Data quality tier (always "TIER_1_AUTHORITATIVE")
|
|
range: DataTierEnum
|
|
required: true
|
|
|
|
extraction_date:
|
|
description: Timestamp of CSV-to-YAML conversion
|
|
range: datetime
|
|
required: true
|
|
slot_uri: dcterms:created
|
|
|
|
extraction_method:
|
|
description: Conversion method description
|
|
range: string
|
|
required: true
|
|
|
|
source_url:
|
|
description: URL to ISIL registry website
|
|
range: uri
|
|
required: true
|
|
|
|
confidence_score:
|
|
description: Data confidence (always 1.0 for authoritative source)
|
|
range: float
|
|
required: true
|
|
minimum_value: 1.0
|
|
maximum_value: 1.0
|
|
|
|
enums:
|
|
|
|
InstitutionTypeEnum:
|
|
description: GLAMORCUBESFIXPHDNT institution type taxonomy
|
|
permissible_values:
|
|
GALLERY:
|
|
description: Art gallery or exhibition space
|
|
LIBRARY:
|
|
description: Library (public, academic, specialized)
|
|
ARCHIVE:
|
|
description: Archive (government, corporate, personal)
|
|
MUSEUM:
|
|
description: Museum (art, history, science, etc.)
|
|
OFFICIAL_INSTITUTION:
|
|
description: Government heritage agencies
|
|
RESEARCH_CENTER:
|
|
description: Research institutes and documentation centers
|
|
|
|
DataSourceEnum:
|
|
description: Data source types
|
|
permissible_values:
|
|
ISIL_REGISTRY:
|
|
description: Official ISIL code registry
|
|
|
|
DataTierEnum:
|
|
description: Data quality tiers
|
|
permissible_values:
|
|
TIER_1_AUTHORITATIVE:
|
|
description: Authoritative primary sources (registries, databases)
|
|
|
|
# Transformation rules
|
|
comments:
|
|
- "CSV Parsing: File uses latin-1 encoding, malformed structure with fields in single cell"
|
|
- "CSV Structure: All fields separated by '\",'\" pattern, trailing semicolons removed"
|
|
- "Header Row: Contains sequence number as first field, actual headers start at index 1"
|
|
- "Field Preservation: All 6 CSV fields preserved with csv_ prefix (100% preservation)"
|
|
- "Mapping: csv_instelling → name, csv_plaats → locations[0].city, csv_isil_code → identifiers[0]"
|
|
- "Organizational History: csv_opmerking contains merger/closure info (18 records)"
|
|
- "Date Format: csv_toegekend_op converted from string to date type"
|
|
- "ISIL URL: Generated as https://isil.org/{csv_isil_code}"
|
|
- "Provenance: All records marked as TIER_1_AUTHORITATIVE with confidence 1.0"
|
|
- "Total Records: 371 institutions (201 unique cities)"
|