Refactor LinkML schemas and slots for consistency and clarity

- Updated imports in FindingAid.yaml to remove unnecessary entries and added new slots for arrangement level and provenance path.
- Replaced 'full_name' with 'has_or_had_label' in LegalName.yaml and ProfileData.yaml for uniformity.
- Enhanced slot definitions in various YAML files, including ceases_or_ceased_through, has_or_had_arrangement_level, has_or_had_assessment, and others, to include metadata and improve structure.
- Removed the script fix_linkml_metadata.py as it is no longer needed.
- Added new script fix_specific_dead_links.py to handle specific mapping updates for extraction metadata and full name fields across multiple YAML files.
This commit is contained in:
kempersc 2026-01-29 18:17:47 +01:00
parent fcb704c97e
commit 2f44857028
24 changed files with 552 additions and 359 deletions

View file

@ -1,5 +1,5 @@
{
"generated": "2026-01-29T16:40:47.585Z",
"generated": "2026-01-29T17:17:48.016Z",
"schemaRoot": "/schemas/20251121/linkml",
"totalFiles": 3003,
"categoryCounts": {

View file

@ -26,7 +26,7 @@ classes:
legal_form:
range: string
description: Legal form of the organization (Municipal library, Foundation, etc.)
full_name:
has_or_had_label:
range: string
description: Full official name of the organization
closure_date:

View file

@ -142,6 +142,7 @@ classes:
'
range: EducationFacilityType
inlined: true
equals_expression: '["hc:EducationProviderType"]'
examples:
- value:
has_or_had_label: EDUCATION_CENTER
@ -265,8 +266,6 @@ classes:
is_or_was_generated_by:
range: ReconstructionActivity
required: false
has_or_had_type:
equals_expression: '["hc:EducationProviderType"]'
comments:
- EducationCenter models educational facilities of heritage custodians
- Schema.org EducationalOrganization for education facilities
@ -308,10 +307,16 @@ classes:
av_equipped: true
flexible_seating: true
has_or_had_quantity:
value: 2
unit: workshop_spaces
- numeric_value: 2
has_or_had_unit:
unit_text: workshop_spaces
- numeric_value: 12
has_or_had_unit:
unit_text: staff_members
max_group_size: 30
has_av_equipment: true
has_or_had_equipment:
- has_or_had_name: 4K Projector
has_or_had_type: Projector
provides_or_provided:
has_or_had_description: Hands-on learning lab
has_or_had_accessibility_feature:
@ -321,10 +326,6 @@ classes:
- has_or_had_quantity: 75000
has_or_had_time_interval:
has_or_had_label: Annual
has_or_had_quantity:
- numeric_value: 12
has_or_had_unit:
unit_text: staff_members
is_or_was_required: true
description: Major museum education center
- value:
@ -353,7 +354,9 @@ classes:
seating_capacity: 40
av_equipped: true
max_group_size: 20
has_av_equipment: true
has_or_had_equipment:
- has_or_had_name: Projector
has_or_had_type: Projector
is_or_was_required: true
description: Archive learning center
annotations:

View file

@ -18,100 +18,78 @@ imports:
- ./TemplateSpecificityType
- ./TemplateSpecificityTypes
- ../enums/ProfileExtractionMethodEnum
- ../slots/extraction_agent
- ../slots/extraction_method
- ../slots/cost_usd
- ../slots/source_file
- ../slots/staff_id
- ../slots/extraction_date
- ../slots/linkedin_url
- ../slots/request_id
- ../slots/is_or_was_retrieved_by
- ../slots/has_or_had_method
- ../slots/has_or_had_expense
- ../slots/has_or_had_source
- ../slots/has_or_had_identifier
- ../slots/retrieval_timestamp
- ../slots/has_or_had_url
- ../slots/llm_response
- ../slots/specificity_annotation
- ../slots/has_or_had_score
- ../slots/cost_usd
- ../slots/extraction_agent
- ../slots/extraction_date
- ../slots/extraction_method
- ../slots/linkedin_url
- ../slots/llm_response
- ../slots/request_id
- ../slots/source_file
- ../slots/specificity_annotation
- ../slots/staff_id
- ../slots/has_or_had_score
- ../slots/cost_usd
- ../slots/extraction_agent
- ../slots/extraction_date
- ../slots/extraction_method
- ../slots/linkedin_url
- ../slots/llm_response
- ../slots/request_id
- ../slots/source_file
- ../slots/specificity_annotation
- ../slots/staff_id
- ../slots/has_or_had_score
default_range: string
classes:
ExtractionMetadata:
class_uri: prov:Activity
description: "Provenance metadata for data extraction activities.\n\nRecords how, when, and by what agent data was extracted from \nexternal sources (LinkedIn, web scraping, APIs).\n\n**PROV-O Alignment**:\n- ExtractionMetadata IS a prov:Activity (the extraction process)\n- The extracted data IS the prov:Entity (output of the activity)\n- extraction_agent IS the prov:Agent (software/AI that performed extraction)\n- source_file/linkedin_url IS prov:used (input to the activity)\n\n**Use Cases**:\n- LinkedIn profile extractions via Exa API\n- Web scraping provenance\n- Staff list parsing provenance\n- Connection network extraction\n\n**Example JSON Structure**:\n```json\n{\n \"extraction_metadata\": {\n \"source_file\": \"/path/to/source.json\",\n \"staff_id\": \"org_staff_0001_name\",\n \"extraction_date\": \"2025-12-12T22:00:00Z\",\n \"extraction_method\": \"exa_crawling_exa\",\n \"extraction_agent\": \"claude-opus-4.5\",\n \"linkedin_url\": \"https://www.linkedin.com/in/...\"\
,\n \"cost_usd\": 0.001\n }\n}\n```\n"
description: "Provenance metadata for data extraction activities.\n\nRecords how, when, and by what agent data was extracted from \nexternal sources (LinkedIn, web scraping, APIs).\n\n**PROV-O Alignment**:\n- ExtractionMetadata IS a prov:Activity (the extraction process)\n- The extracted data IS the prov:Entity (output of the activity)\n- is_or_was_retrieved_by IS the prov:Agent (software/AI that performed extraction)\n- has_or_had_source/has_or_had_url IS prov:used (input to the activity)\n\n**Use Cases**:\n- LinkedIn profile extractions via Exa API\n- Web scraping provenance\n- Staff list parsing provenance\n- Connection network extraction\n\n**Example JSON Structure**:\n```json\n{\n \"extraction_metadata\": {\n \"has_or_had_source\": \"/path/to/source.json\",\n \"has_or_had_identifier\": \"org_staff_0001_name\",\n \"retrieval_timestamp\": \"2025-12-12T22:00:00Z\",\n \"has_or_had_method\": \"exa_crawling_exa\",\n \"is_or_was_retrieved_by\": \"claude-opus-4.5\",\n \"has_or_had_url\": \"https://www.linkedin.com/in/...\"\
,\n \"has_or_had_expense\": 0.001\n }\n}\n```\n"
exact_mappings:
- prov:Activity
close_mappings:
- schema:Action
- dct:ProvenanceStatement
slots:
- cost_usd
- extraction_agent
- extraction_date
- extraction_method
- linkedin_url
- has_or_had_expense
- is_or_was_retrieved_by
- retrieval_timestamp
- has_or_had_method
- has_or_had_url
- llm_response
- request_id
- source_file
- has_or_had_identifier
- has_or_had_source
- specificity_annotation
- staff_id
- has_or_had_score
slot_usage:
source_file:
has_or_had_source:
range: string
examples:
- value: /data/custodian/person/affiliated/parsed/rijksmuseum_staff_20251210T155416Z.json
description: Path to parsed staff list JSON
staff_id:
has_or_had_identifier:
range: string
pattern: ^[a-z0-9-]+_staff_[a-z0-9-_]+$
examples:
- value: rijksmuseum_staff_0042_jan_van_der_berg
description: Staff ID with org prefix, index, and name slug
extraction_date:
- value: exa_12345678-abcd-efgh-ijkl-mnopqrstuv
description: Exa API request ID
retrieval_timestamp:
range: datetime
required: true
examples:
- value: '2025-12-12T22:00:00Z'
description: UTC timestamp of extraction
extraction_method:
has_or_had_method:
range: ProfileExtractionMethodEnum
required: true
examples:
- value: exa_crawling_exa
description: Extracted via Exa AI crawling API
extraction_agent:
is_or_was_retrieved_by:
range: string
examples:
- value: claude-opus-4.5
description: Extracted by Claude Opus 4.5
- value: ''
description: Empty string for fully automated extraction
linkedin_url:
has_or_had_url:
range: uri
pattern: ^https://www\.linkedin\.com/in/[a-z0-9-]+/?$
examples:
- value: https://www.linkedin.com/in/jan-van-der-berg-12345
description: LinkedIn profile URL
cost_usd:
has_or_had_expense:
range: float
minimum_value: 0.0
examples:
@ -119,11 +97,6 @@ classes:
description: Exa API call cost
- value: 0.0
description: Free extraction (cached/local)
request_id:
range: string
examples:
- value: exa_12345678-abcd-efgh-ijkl-mnopqrstuv
description: Exa API request ID
llm_response:
range: LLMResponse
required: false
@ -134,8 +107,8 @@ classes:
comments:
- Every person entity file MUST have extraction_metadata
- See AGENTS.md Rule 20 for required fields
- extraction_agent should be 'claude-opus-4.5' for manual extraction
- cost_usd enables budget tracking for API-heavy extractions
- is_or_was_retrieved_by should be 'claude-opus-4.5' for manual extraction
- has_or_had_expense enables budget tracking for API-heavy extractions
see_also:
- https://www.w3.org/TR/prov-o/
- https://docs.exa.ai/

View file

@ -47,55 +47,9 @@ imports:
- ./ConfidenceMethod
- ../slots/html_file
- ../slots/has_or_had_identifier
- ./Identifier
- ../slots/source_url
- ../slots/has_or_had_label
- ../slots/date
- ../slots/note
- ../slots/creator
- ../slots/has_or_had_publisher
- ./Publisher
- ../slots/is_or_was_published_at
- ./PublicationEvent
- ./Quantity
- ../slots/isbn
- ../slots/has_or_had_access_condition
- ../slots/is_or_was_access_restricted
- ../slots/has_or_had_overview
- ../slots/has_or_had_image
- ./Image
- ../slots/has_or_had_quantity
- ../slots/includes_or_included
- ./GeoSpatialPlace
- ../slots/is_or_was_categorized_as
- ./ColonialStatus
- ../slots/content_block
- ../slots/crawler_version
- ../slots/custodian
- ../slots/was_retrieved_at
- ./Timestamp
- ../slots/de
- ../slots/has_or_had_identifier
- ./Identifier
- ./EADIdentifier
- ../slots/ead_id
- ../slots/en
- ../slots/end
- ../slots/has_or_had_external_resource
- ../slots/has_or_had_featured_item
- ../slots/has_or_had_note
- ./Note
- ../slots/has_or_had_scope
- ./Scope
- ../slots/has_or_had_type
- ../slots/has_or_had_format
- ./Format
- ../slots/has_or_had_language
- ./Language
- ../slots/full_name
- ../slots/has_or_had_geographic_extent
- ../slots/has_or_had_identifier
- ./GHCIdentifier
- ../slots/has_or_had_level
- ../slots/has_or_had_provenance_path
- ../slots/contains_or_contained
- ../slots/contains_or_contained
- ../slots/contains_or_contained_en
@ -535,36 +489,28 @@ classes:
'
slots:
- has_or_had_access_condition
- full_name
- has_or_had_label
- geographic_coverage
- has_or_had_identifier
- has_or_had_level
- contains_or_contained
- contains_or_contained
- contains_or_contained_en
- list_item
- section_id
- contains_or_contained
- specificity_annotation
- contains_or_contained
- has_or_had_score
- has_or_had_provenance_path
slot_usage:
contains_or_contained:
has_or_had_level:
range: integer
minimum_value: 1
maximum_value: 6
description: Heading level (1-6). MIGRATED from heading_level/contains_or_contained duplicate.
contains_or_contained:
required: true
content_block:
multivalued: true
contains_or_contained:
range: PageSection
multivalued: true
inlined_as_list: true
contains_or_contained:
range: PageLink
multivalued: true
inlined_as_list: true
any_of:
- range: PageSection
- range: PageLink
has_or_had_featured_item:
range: FeaturedCard
multivalued: true

View file

@ -56,7 +56,7 @@ classes:
description: Unique identifier for this legal name record
range: uriorcurie
required: true
full_name:
has_or_had_label:
slot_uri: rov:legalName
description: 'Complete legal name including organizational type.

View file

@ -29,7 +29,7 @@ classes:
- schema:Person
- prov:Entity
attributes:
full_name:
has_or_had_label:
range: string
description: Person's full name
has_or_had_title:

View file

@ -1,9 +1,27 @@
id: https://nde.nl/ontology/hc/slot/ceases_or_ceased_through
name: ceases_or_ceased_through
description: >-
The event through which an entity ceases or ceased to exist/operate.
MIGRATED from cessation_observed_in (Rule 53).
range: CeasingEvent
slot_uri: prov:wasInvalidatedBy
exact_mappings:
- crm:P93i_was_taken_out_of_existence_by
multivalued: true
title: Ceases Or Ceased Through
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
ceases_or_ceased_through:
description: >-
The event through which an entity ceases or ceased to exist/operate.
MIGRATED from cessation_observed_in (Rule 53).
range: CeasingEvent
slot_uri: prov:wasInvalidatedBy
exact_mappings:
- crm:P93i_was_taken_out_of_existence_by
multivalued: true

View file

@ -1,10 +1,28 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_arrangement_level
name: has_or_had_arrangement_level
description: The level of arrangement of the record set or information carrier.
title: has or had arrangement level
slot_uri: rico:hasRecordSetType
range: ArrangementLevel
multivalued: false
exact_mappings:
- isad:level_of_description
close_mappings:
- rico:RecordSetType
title: Has Or Had Arrangement Level
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_arrangement_level:
description: The level of arrangement of the record set or information carrier.
title: has or had arrangement level
slot_uri: rico:hasRecordSetType
range: ArrangementLevel
multivalued: false
exact_mappings:
- isad:level_of_description
close_mappings:
- rico:RecordSetType

View file

@ -1,8 +1,26 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_assessment
name: has_or_had_assessment
description: >-
Assessment associated with the entity.
Range should be an Assessment class.
MIGRATED from heritage_relevance (for LinkedInProfile) per Rule 53.
slot_uri: crm:P140i_was_attributed_by
range: Any
multivalued: true
title: Has Or Had Assessment
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_assessment:
description: >-
Assessment associated with the entity.
Range should be an Assessment class.
MIGRATED from heritage_relevance (for LinkedInProfile) per Rule 53.
slot_uri: crm:P140i_was_attributed_by
range: Any
multivalued: true

View file

@ -1,7 +1,25 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_citation
name: has_or_had_citation
description: >-
A bibliographic citation for the resource.
MIGRATED from citation (Rule 53).
range: string
slot_uri: schema:citation
multivalued: true
title: Has Or Had Citation
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_citation:
description: >-
A bibliographic citation for the resource.
MIGRATED from citation (Rule 53).
range: string
slot_uri: schema:citation
multivalued: true

View file

@ -1,7 +1,25 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_city_code
name: has_or_had_city_code
description: >-
The 3-letter city/settlement code (e.g., AMS for Amsterdam).
MIGRATED from city_code (Rule 53).
range: string
slot_uri: schema:code
multivalued: false
title: Has Or Had City Code
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_city_code:
description: >-
The 3-letter city/settlement code (e.g., AMS for Amsterdam).
MIGRATED from city_code (Rule 53).
range: string
slot_uri: schema:code
multivalued: false

View file

@ -1,11 +1,29 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_embargo_end_date
name: has_or_had_embargo_end_date
description: >-
The date when an embargo or restriction ends.
title: has or had embargo end date
slot_uri: premis:endDate
range: date
multivalued: false
exact_mappings:
- premis:endDate
close_mappings:
- rico:hasEndDate
title: Has Or Had Embargo End Date
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_embargo_end_date:
description: >-
The date when an embargo or restriction ends.
title: has or had embargo end date
slot_uri: premis:endDate
range: date
multivalued: false
exact_mappings:
- premis:endDate
close_mappings:
- rico:hasEndDate

View file

@ -1,7 +1,25 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_embargo_reason
name: has_or_had_embargo_reason
description: >-
The reason for an embargo or restriction.
title: has or had embargo reason
slot_uri: rico:conditionsOfAccess
range: string
multivalued: true
title: Has Or Had Embargo Reason
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_embargo_reason:
description: >-
The reason for an embargo or restriction.
title: has or had embargo reason
slot_uri: rico:conditionsOfAccess
range: string
multivalued: true

View file

@ -1,9 +1,27 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_exhibition_type
name: has_or_had_exhibition_type
description: >-
The type or category of an exhibition (e.g., Permanent, Temporary, Traveling).
title: has or had exhibition type
slot_uri: rico:hasEventType
close_mappings:
- crm:P2_has_type
range: string
multivalued: true
title: Has Or Had Exhibition Type
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_exhibition_type:
description: >-
The type or category of an exhibition (e.g., Permanent, Temporary, Traveling).
title: has or had exhibition type
slot_uri: rico:hasEventType
close_mappings:
- crm:P2_has_type
range: string
multivalued: true

View file

@ -1,9 +1,27 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_extent_text
name: has_or_had_extent_text
description: >-
Textual description of the extent of an entity (e.g., '300 boxes', '2 linear meters').
title: has or had extent text
slot_uri: rico:hasExtent
exact_mappings:
- rico:hasExtent
range: string
multivalued: true
title: Has Or Had Extent Text
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_extent_text:
description: >-
Textual description of the extent of an entity (e.g., '300 boxes', '2 linear meters').
title: has or had extent text
slot_uri: rico:hasExtent
exact_mappings:
- rico:hasExtent
range: string
multivalued: true

View file

@ -1,7 +1,25 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_file_location
name: has_or_had_file_location
description: >-
The location of a file.
MIGRATED from html_snapshot_path (Rule 53).
range: FileLocation
slot_uri: skos:note
multivalued: true
title: Has Or Had File Location
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_file_location:
description: >-
The location of a file.
MIGRATED from html_snapshot_path (Rule 53).
range: FileLocation
slot_uri: skos:note
multivalued: true

View file

@ -1,7 +1,25 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_parent
name: has_or_had_parent
description: >-
The parent entity of this entity.
MIGRATED from parent_chapter_id (Rule 53).
range: Any
slot_uri: schema:parent
multivalued: false
title: Has Or Had Parent
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_parent:
description: >-
The parent entity of this entity.
MIGRATED from parent_chapter_id (Rule 53).
range: Any
slot_uri: schema:parent
multivalued: false

View file

@ -1,9 +1,27 @@
id: https://nde.nl/ontology/hc/slot/has_or_had_sequence_index
name: has_or_had_sequence_index
description: >-
The sequence index or order of an item (e.g. chapter number, page number).
MIGRATED from chapter_index (Rule 53).
range: integer
slot_uri: schema:position
exact_mappings:
- schema:position
multivalued: false
title: Has Or Had Sequence Index
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
has_or_had_sequence_index:
description: >-
The sequence index or order of an item (e.g. chapter number, page number).
MIGRATED from chapter_index (Rule 53).
range: integer
slot_uri: schema:position
exact_mappings:
- schema:position
multivalued: false

View file

@ -1,11 +1,29 @@
id: https://nde.nl/ontology/hc/slot/is_or_was_documented_by
name: is_or_was_documented_by
description: >-
Indicates that the entity is or was documented by another resource (e.g., a FinancialStatement documenting a Budget).
title: is or was documented by
slot_uri: schema:subjectOf
range: ReconstructedEntity
multivalued: true
exact_mappings:
- crm:P70i_is_documented_in
close_mappings:
- rico:isOrWasSubjectOf
title: Is Or Was Documented By
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
is_or_was_documented_by:
description: >-
Indicates that the entity is or was documented by another resource (e.g., a FinancialStatement documenting a Budget).
title: is or was documented by
slot_uri: schema:subjectOf
range: ReconstructedEntity
multivalued: true
exact_mappings:
- crm:P70i_is_documented_in
close_mappings:
- rico:isOrWasSubjectOf

View file

@ -1,9 +1,27 @@
id: https://nde.nl/ontology/hc/slot/is_or_was_observed_by
name: is_or_was_observed_by
description: >-
The observation that documented this event or state.
MIGRATED from cessation_observed_in (Rule 53).
range: CustodianObservation
slot_uri: prov:wasGeneratedBy
exact_mappings:
- prov:wasGeneratedBy
multivalued: true
title: Is Or Was Observed By
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
dcterms: http://purl.org/dc/terms/
prov: http://www.w3.org/ns/prov#
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_prefix: hc
slots:
is_or_was_observed_by:
description: >-
The observation that documented this event or state.
MIGRATED from cessation_observed_in (Rule 53).
range: CustodianObservation
slot_uri: prov:wasGeneratedBy
exact_mappings:
- prov:wasGeneratedBy
multivalued: true

View file

@ -1,111 +0,0 @@
import os
import re
directory = "schemas/20251121/linkml/modules/classes/"
prefixes_block = """prefixes:
linkml: https://w3id.org/linkml/
schema: http://schema.org/
skos: http://www.w3.org/2004/02/skos/core#
rico: https://www.ica.org/standards/RiC/ontology#
wd: http://www.wikidata.org/entity/
"""
imports_block = """imports:
- linkml:types
"""
def split_camel_case(name):
return re.sub('([a-z0-9])([A-Z])', r'\1 \2', name)
count = 0
for filename in os.listdir(directory):
if not filename.endswith(".yaml"):
continue
filepath = os.path.join(directory, filename)
with open(filepath, 'r') as f:
content = f.read()
if content.startswith("id:"):
continue # Already has metadata
# Check if imports already exist in the file (even if unstructured)
has_imports = re.search(r"^imports:", content, re.MULTILINE)
if not content.strip().startswith("classes:") and not has_imports:
# Some files might have comments at the top?
# If it doesn't start with classes: or id:, we should check.
# But my grep showed files starting with classes:
pass
# Simple parsing
lines = content.splitlines()
class_name = None
description = None
# Determine class name from filename first as fallback/confirmation
filename_class = filename.replace(".yaml", "")
found_class_in_content = False
for i, line in enumerate(lines):
if line.strip().startswith("classes:"):
# Look for class name in subsequent lines
for j in range(i+1, min(i+5, len(lines))):
# Matches " ClassName:"
match = re.match(r"^ ([a-zA-Z0-9_]+):", lines[j])
if match:
class_name = match.group(1)
found_class_in_content = True
# Look for description inside the class
for k in range(j+1, min(j+15, len(lines))):
# Matches " description: Value"
desc_match = re.match(r"^ description:\s+(.*)", lines[k])
if desc_match:
description = desc_match.group(1).strip()
# Handle multi-line description if needed?
if description.startswith(">") or description.startswith("|"):
description = None
break
break
break
if not class_name:
# Fallback to filename if parsing failed (e.g. if file is empty or weird)
class_name = filename_class
# Ensure class name matches filename (convention)
if class_name != filename_class:
print(f"Warning: Class name '{class_name}' in content differs from filename '{filename_class}'. Using filename.")
class_name = filename_class
title = split_camel_case(class_name)
if not description:
description = f"LinkML class definition for {title}"
else:
# Strip quotes if present
if (description.startswith('"') and description.endswith('"')) or (description.startswith("'") and description.endswith("'")):
description = description[1:-1]
# Construct new content
new_header = f"id: https://nde.nl/ontology/hc/class/{class_name}\n"
new_header += f"name: {class_name}\n"
new_header += f"title: {title}\n"
new_header += f"description: {description}\n"
new_header += prefixes_block
if not has_imports:
new_header += imports_block
new_content = new_header + content
with open(filepath, 'w') as f:
f.write(new_content)
count += 1
# print(f"Updated {filename}")
print(f"Total files updated: {count}")

View file

@ -0,0 +1,119 @@
import os
import re
SCHEMA_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/"
# Mappings for ExtractionMetadata.yaml
EXTRACTION_METADATA_MAP = {
"extraction_agent": "is_or_was_retrieved_by",
"extraction_method": "has_or_had_method",
"extraction_date": "retrieval_timestamp",
"cost_usd": "has_or_had_expense",
"source_file": "has_or_had_source",
"staff_id": "has_or_had_identifier",
"linkedin_url": "has_or_had_url",
"request_id": "has_or_had_identifier" # request_id also maps to identifier
}
# General mapping for full_name
FULL_NAME_MAP = {
"full_name": "has_or_had_label"
}
def fix_extraction_metadata():
filepath = os.path.join(SCHEMA_DIR, "ExtractionMetadata.yaml")
if not os.path.exists(filepath): return
print(f"Fixing {filepath}...")
with open(filepath, 'r') as f:
lines = f.readlines()
new_lines = []
# Track which new slots we've already imported to avoid dupes
added_imports = set()
for line in lines:
stripped = line.strip()
replaced = False
# 1. Imports
if stripped.startswith("- ../slots/"):
slot_name = stripped.split("/")[-1]
if slot_name in EXTRACTION_METADATA_MAP:
new_slot = EXTRACTION_METADATA_MAP[slot_name]
if new_slot not in added_imports:
new_lines.append(line.replace(slot_name, new_slot))
added_imports.add(new_slot)
replaced = True
# 2. Slots list
elif stripped.startswith("- ") and stripped[2:] in EXTRACTION_METADATA_MAP:
slot_name = stripped[2:]
new_slot = EXTRACTION_METADATA_MAP[slot_name]
# Avoid duplicate slots in list if possible, but simple replacement is safer than deletion logic
# However, request_id and staff_id BOTH map to has_or_had_identifier.
# If we just replace, we get duplicates.
# LinkML allows duplicate slot entries (it dedupes), but cleaner to avoid.
# But simple replace is fine for now.
new_lines.append(line.replace(slot_name, new_slot))
replaced = True
# 3. Slot usage keys
elif stripped.endswith(":") and stripped[:-1] in EXTRACTION_METADATA_MAP:
slot_name = stripped[:-1]
new_slot = EXTRACTION_METADATA_MAP[slot_name]
new_lines.append(line.replace(slot_name, new_slot))
replaced = True
if not replaced:
new_lines.append(line)
with open(filepath, 'w') as f:
f.writelines(new_lines)
def fix_full_name(filename):
filepath = os.path.join(SCHEMA_DIR, filename)
if not os.path.exists(filepath): return
print(f"Fixing {filepath}...")
with open(filepath, 'r') as f:
lines = f.readlines()
new_lines = []
for line in lines:
stripped = line.strip()
replaced = False
# Imports
if stripped == "- ../slots/full_name":
new_lines.append(line.replace("full_name", "has_or_had_label"))
replaced = True
# Slots list
elif stripped == "- full_name":
new_lines.append(line.replace("full_name", "has_or_had_label"))
replaced = True
# Slot usage key
elif stripped == "full_name:":
new_lines.append(line.replace("full_name:", "has_or_had_label:"))
replaced = True
if not replaced:
new_lines.append(line)
with open(filepath, 'w') as f:
f.writelines(new_lines)
def main():
fix_extraction_metadata()
fix_full_name("FindingAid.yaml")
fix_full_name("OrganizationBranch.yaml")
fix_full_name("DigitalPlatformV2OrganizationStatus.yaml") # Also flagged
fix_full_name("LegalName.yaml") # Also flagged
fix_full_name("CustodianLegalStatus.yaml") # Also flagged
fix_full_name("ProfileData.yaml") # Also flagged
if __name__ == "__main__":
main()

View file

@ -21,6 +21,15 @@ def get_archived_slot_names():
def find_references(archived_slots):
references = {} # {slot_name: [file_paths]}
# Metadata keys that mimic slot names but are valid LinkML structure
# We ignore "Usage as key" for these
SAFE_METADATA_KEYS = {
"title", "description", "name", "id", "status", "notes", "comments", "examples",
"todos", "see_also", "range", "slot_usage", "required", "multivalued",
"inlined", "identifier", "value", "unit", "prefixes", "imports", "classes",
"slots", "attributes", "exact_mappings", "close_mappings", "related_mappings"
}
class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml"))
for cls_file in class_files:
@ -30,28 +39,30 @@ def find_references(archived_slots):
for i, line in enumerate(lines):
stripped = line.strip()
# Check for imports: "- ../slots/slotname"
# Check for slot usage: "- slotname" (in slots list)
# Check for slot_usage keys: "slotname:"
for slot in archived_slots:
# Import check
if f"../slots/{slot}" in stripped and not stripped.strip().startswith("#"):
# Import check: "- ../slots/slotname"
if f"../slots/{slot}" in stripped and not stripped.startswith("#"):
if slot not in references: references[slot] = []
references[slot].append(f"{cls_file} (line {i+1}): Import")
continue
# Loose usage check (can be false positive if slot name is common word like 'description')
# But we restrict to archived slots.
# Check for "- slotname"
# Usage in slots list: "- slotname"
# Must be exact match to avoid partials
if stripped == f"- {slot}":
if slot not in references: references[slot] = []
references[slot].append(f"{cls_file} (line {i+1}): Usage in slots list")
continue
# Check for "slotname:"
# Usage as key: "slotname:"
if stripped.startswith(f"{slot}:"):
# Check if it's a safe metadata key
if slot in SAFE_METADATA_KEYS:
continue
# Also, if we are inside a slot_usage block, "slotname:" is valid ONLY IF
# we are refining that slot. But if the slot is archived, we shouldn't be refining it!
# So "Usage as key" is actually relevant for slot_usage of archived slots.
if slot not in references: references[slot] = []
references[slot].append(f"{cls_file} (line {i+1}): Usage as key")
continue