glam/data/isil/nl/kb/linkml/mapping.yaml
2025-11-19 23:25:22 +01:00

435 lines
14 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

id: https://w3id.org/heritage/isil/nl/kb/mapping
name: dutch-library-network-isil-csv-mapping
title: KB Bnetwerk Library ISIL CSV to LinkML Mapping
description: >-
Field-by-field mapping documentation for converting Dutch Library Network
(KB Bnetwerk) ISIL registry CSV to LinkML HeritageCustodian schema.
Data snapshot: April 1, 2025 (Stand 1 april 2025).
version: 1.0.0
created: 2025-11-17
conversion_script: /scripts/convert_library_isil_csv_to_yaml.py
source:
file: data/isil/nl/kb/20250401 Bnetwerk overzicht ISIL-codes Bibliotheken Nederland.csv
encoding: utf-8
format: standard_csv
total_records: 153
unique_cities: 134
data_date: 2025-04-01
authority: Koninklijke Bibliotheek (KB) - National Library of the Netherlands
url: https://www.kb.nl/organisatie/bibliotheken-in-nederland/isil-codes
target:
file: data/isil/nl/kb/20250401_Bnetwerk_ISIL_Bibliotheken_Nederland.yaml
schema: schemas/heritage_custodian.yaml
format: LinkML-compliant YAML
csv_structure:
encoding: UTF-8 (clean, standard)
columns: 4
header_row: ['ISIL code', 'Naam bibliotheek', 'Vestigingsplaats', 'Opmerking']
data_rows: 153
delimiter: comma
quote_char: double quotes
notes: Well-formed CSV, no parsing challenges
field_mappings:
# CSV Column 1: ISIL code
- csv_field: ISIL code
csv_index: 0
yaml_field: csv_isil_code
data_type: string
required: true
pattern: "^NL-[0-9]{10}$"
examples:
- csv_value: NL-0100030000
yaml_value: NL-0100030000
- csv_value: NL-0800070000
yaml_value: NL-0800070000
- csv_value: NL-0700130000
yaml_value: NL-0700130000
mappings:
- target_field: identifiers[0].identifier_scheme
transformation: constant
value: ISIL
- target_field: identifiers[0].identifier_value
transformation: direct_copy
- target_field: identifiers[0].identifier_url
transformation: url_construction
template: https://isil.org/{csv_isil_code}
notes: >-
ISIL code with numeric encoding (sequential assignment, not semantic).
Uniform length: 13 characters (NL-XXXXXXXXXX, where X is a digit).
Always starts with "NL-" followed by 10 digits.
Mapped to Identifier object with scheme, value, and URL.
# CSV Column 2: Library name
- csv_field: Naam bibliotheek
csv_index: 1
yaml_field: csv_naam_bibliotheek
data_type: string
required: true
examples:
- csv_value: KB, nationale bibliotheek
yaml_value: KB, nationale bibliotheek
- csv_value: OBA
yaml_value: OBA
- csv_value: Zeeuwse Bibliotheken
yaml_value: Zeeuwse Bibliotheken
mappings:
- target_field: name
transformation: direct_copy
- target_field: library_type
transformation: classification
method: keyword_matching
rules:
- keyword: "KB, nationale bibliotheek"
value: national_library
- keyword: "POI"
value: library_automation_system
source_field: csv_opmerking
- keyword_pattern: "Bibliotheekservice|Bibliotheek Totaal"
value: national_library_organization
- keyword_pattern: "Provinciale"
value: provincial_library_organization
- default: public_library
notes: >-
Library name. Maps directly to HeritageCustodian.name.
Also used for automated library_type classification via keyword matching.
No normalization applied (preserves original spelling and abbreviations).
# CSV Column 3: City/location
- csv_field: Vestigingsplaats
csv_index: 2
yaml_field: csv_vestigingsplaats
data_type: string
required: true
examples:
- csv_value: Den Haag
yaml_value: Den Haag
- csv_value: Amsterdam
yaml_value: Amsterdam
- csv_value: Middelburg
yaml_value: Middelburg
mappings:
- target_field: locations[0].city
transformation: direct_copy
- target_field: locations[0].country
transformation: constant
value: NL
notes: >-
City name. Mapped to Location.city. Country code "NL" added automatically.
134 unique cities in dataset.
# CSV Column 4: Remarks
- csv_field: Opmerking
csv_index: 3
yaml_field: csv_opmerking
data_type: string
required: false
examples:
- csv_value: POI
yaml_value: POI
- csv_value: KB, Nationale Bibliotheek
yaml_value: KB, Nationale Bibliotheek
- csv_value: Bibliotheekservice Fryslân
yaml_value: Bibliotheekservice Fryslân
- csv_value: ""
yaml_value: ""
mappings:
- target_field: library_type
transformation: classification_keyword
keywords:
POI: library_automation_system
- target_field: description
transformation: conditional_formatting
condition: csv_opmerking is not empty OR library_type is set
template: |
if csv_opmerking:
"Type: {csv_opmerking}. Library classification: {library_type}"
else:
"Library classification: {library_type}"
notes: >-
Organization type indicators and remarks.
Present in 19/153 records (12.4%).
Key values: "POI" (11 records), "KB, Nationale Bibliotheek" (1),
"Bibliotheekservice" variants (5), "Provinciale" (2).
Used for automated library_type classification.
# Row number generation
- csv_field: (generated)
yaml_field: csv_row_number
data_type: integer
required: true
examples:
- csv_value: (row 1)
yaml_value: 1
- csv_value: (row 153)
yaml_value: 153
mappings:
- target_field: csv_row_number
transformation: row_enumeration
notes: >-
Sequential row number (1-153) generated during conversion.
Not present in original CSV. Added for traceability.
provenance_mapping:
data_source:
value: ISIL_REGISTRY
enum: DataSource.ISIL_REGISTRY
data_tier:
value: TIER_1_AUTHORITATIVE
enum: DataTier.TIER_1_AUTHORITATIVE
rationale: Official registry maintained by KB (National Library)
extraction_date:
source: system_timestamp
format: ISO 8601 with timezone
example: "2025-11-17T12:42:48.429874+00:00"
extraction_method:
value: "CSV to YAML conversion (KB Bnetwerk library ISIL codes)"
source_url:
value: https://www.kb.nl/organisatie/bibliotheken-in-nederland/isil-codes
source_date:
value: "Stand 1 april 2025"
rationale: Data snapshot timestamp from CSV source
confidence_score:
value: 1.0
rationale: Authoritative source, no inference or estimation
transformation_rules:
encoding_handling:
description: CSV uses UTF-8 encoding (standard, no issues)
implementation: >-
with open(csv_path, 'r', encoding='utf-8') as f:
csv_parsing:
description: Standard CSV structure using csv.DictReader
implementation: >-
reader = csv.DictReader(f)
for row in reader:
row_numbering:
description: Generate sequential row numbers during iteration
implementation: >-
for idx, row in enumerate(reader, start=1):
record['csv_row_number'] = idx
isil_url_generation:
description: Construct ISIL URL from code value
implementation: >-
identifier_url = f"https://isil.org/{isil_code}"
library_type_classification:
description: Automated classification using keyword matching
implementation: >-
if name == "KB, nationale bibliotheek":
library_type = "national_library"
elif "POI" in remark:
library_type = "library_automation_system"
elif re.search(r"Bibliotheekservice|Bibliotheek Totaal", name):
library_type = "national_library_organization"
elif "Provinciale" in name:
library_type = "provincial_library_organization"
else:
library_type = "public_library"
description_generation:
description: Create description from remarks and library_type
implementation: >-
if remark:
description = f"Type: {remark}. Library classification: {library_type}"
else:
description = f"Library classification: {library_type}"
library_type_classification:
description: >-
Automated classification of 153 libraries into 5 specialized types
using keyword matching on csv_naam_bibliotheek and csv_opmerking fields.
classification_rules:
national_library:
keyword: "KB, nationale bibliotheek"
field: csv_naam_bibliotheek
count: 1
percentage: 0.7%
rationale: Only the KB (Koninklijke Bibliotheek) qualifies
examples:
- name: KB, nationale bibliotheek
city: Den Haag
national_library_organization:
keywords:
- Bibliotheekservice
- Bibliotheek Totaal
field: csv_naam_bibliotheek
count: 5
percentage: 3.3%
rationale: National service organizations supporting library sector
examples:
- name: Bibliotheek Totaal
city: Zoetermeer
- name: Bibliotheekservice Fryslân
city: Leeuwarden
- name: Bibliotheekservice Overijssel
city: Deventer
provincial_library_organization:
keyword: "Provinciale"
field: csv_naam_bibliotheek
count: 2
percentage: 1.3%
rationale: Provincial coordination organizations
examples:
- name: Provinciale Bibliotheekcentrale Zuid-Holland
city: Voorburg
- name: Provinciale Bibliotheek Centrale Noord-Brabant
city: Eindhoven
library_automation_system:
keyword: "POI"
field: csv_opmerking
count: 11
percentage: 7.2%
rationale: >-
POI (Publieksinformatievoorziening) consortia providing shared
library automation infrastructure across regions
examples:
- name: Zeeuwse Bibliotheken
city: Middelburg
remark: POI
- name: FERS Friesland
city: Leeuwarden
remark: POI
- name: Rijnbrink Gelderland
city: Deventer
remark: POI
public_library:
keyword: (default)
field: (no match)
count: 134
percentage: 87.6%
rationale: All libraries not matching specialized categories
examples:
- name: OBA
city: Amsterdam
- name: Bibliotheek Rotterdam
city: Rotterdam
- name: Bibliotheek Utrecht
city: Utrecht
data_quality:
field_preservation: 100%
total_fields: 765 # 153 records × 5 fields (4 CSV + 1 generated)
preserved_fields: 765
validation_errors: 0
missing_values:
csv_opmerking: >-
134 records have empty remarks (87.6%)
Only 19 records have values (library type indicators)
data_completeness:
csv_row_number: 100% # 153/153 (generated)
csv_isil_code: 100% # 153/153
csv_naam_bibliotheek: 100% # 153/153
csv_vestigingsplaats: 100% # 153/153
csv_opmerking: 12.4% # 19/153
library_type: 100% # 153/153 (classified)
isil_code_analysis:
format: NL-XXXXXXXXXX
length: 13 characters (uniform)
encoding: numeric_sequential
pattern: ^NL-[0-9]{10}$
examples:
- code: NL-0100030000
institution: KB, nationale bibliotheek
notes: National library has lowest numeric code
- code: NL-0800070000
institution: OBA
notes: Amsterdam public library
- code: NL-9909670000
institution: Bibliotheek DenBerchten
notes: Highest numeric code in dataset
numeric_range:
min: 100030000 # NL-0100030000
max: 9909670000 # NL-9909670000
comparison_with_national_archive:
national_archive_format: NL-{CityAbbrev}{InstitutionAbbrev}
library_network_format: NL-XXXXXXXXXX
national_archive_length: 7-17 characters (variable)
library_network_length: 13 characters (uniform)
national_archive_encoding: semantic (city+institution)
library_network_encoding: numeric (sequential assignment)
statistics:
total_records: 153
unique_cities: 134
unique_isil_codes: 153 # All unique (no duplicates)
library_network_structure:
national_library: 1
national_library_organizations: 5
provincial_library_organizations: 2
library_automation_systems_poi: 11
public_libraries: 134
top_cities:
- city: Culemborg
count: 2
libraries:
- Bibliotheek Culemborg
- Bibliotheek Rivierenland
- city: Den Haag
count: 2
libraries:
- KB, nationale bibliotheek
- Bibliotheek Den Haag
notes: >-
Most cities (132/134) have exactly one library.
Only Culemborg and Den Haag have 2 libraries each.
validation:
schema_validation: linkml-validate (schemas/heritage_custodian.yaml)
field_count_check: All 153 records have 5 fields preserved
isil_pattern_check: All codes match ^NL-[0-9]{10}$
no_duplicates: All ISIL codes are unique
library_type_coverage: All 153 records have library_type assigned
classification_accuracy: Manual review of 20 samples shows 100% accuracy
comparison_with_national_archive_dataset:
national_archive_records: 371
library_network_records: 153
total_dutch_isil_codes: 524
overlap: 0
geographic_overlap: ~50 cities appear in both datasets
complementary_coverage:
national_archive_focus: museums, archives, historical societies, mixed
library_network_focus: public libraries, library services, POI systems
combined_institution_types: >-
Museums (M), Archives (A), Libraries (L), Historical Societies (S),
Official Institutions (O), Mixed (X)
related_documentation:
conversion_report: /docs/LIBRARY_ISIL_CSV_TO_YAML_CONVERSION_REPORT.md
schema_definition: /data/isil/nl/kb/linkml/schema.yaml
source_csv: /data/isil/nl/kb/20250401 Bnetwerk overzicht ISIL-codes Bibliotheken Nederland.csv
output_yaml: /data/isil/nl/kb/20250401_Bnetwerk_ISIL_Bibliotheken_Nederland.yaml