Refactor LinkML schemas and slots for consistency and clarity
- Updated imports in FindingAid.yaml to remove unnecessary entries and added new slots for arrangement level and provenance path. - Replaced 'full_name' with 'has_or_had_label' in LegalName.yaml and ProfileData.yaml for uniformity. - Enhanced slot definitions in various YAML files, including ceases_or_ceased_through, has_or_had_arrangement_level, has_or_had_assessment, and others, to include metadata and improve structure. - Removed the script fix_linkml_metadata.py as it is no longer needed. - Added new script fix_specific_dead_links.py to handle specific mapping updates for extraction metadata and full name fields across multiple YAML files.
This commit is contained in:
parent
fcb704c97e
commit
2f44857028
24 changed files with 552 additions and 359 deletions
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"generated": "2026-01-29T16:40:47.585Z",
|
||||
"generated": "2026-01-29T17:17:48.016Z",
|
||||
"schemaRoot": "/schemas/20251121/linkml",
|
||||
"totalFiles": 3003,
|
||||
"categoryCounts": {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ classes:
|
|||
legal_form:
|
||||
range: string
|
||||
description: Legal form of the organization (Municipal library, Foundation, etc.)
|
||||
full_name:
|
||||
has_or_had_label:
|
||||
range: string
|
||||
description: Full official name of the organization
|
||||
closure_date:
|
||||
|
|
|
|||
|
|
@ -142,6 +142,7 @@ classes:
|
|||
'
|
||||
range: EducationFacilityType
|
||||
inlined: true
|
||||
equals_expression: '["hc:EducationProviderType"]'
|
||||
examples:
|
||||
- value:
|
||||
has_or_had_label: EDUCATION_CENTER
|
||||
|
|
@ -265,8 +266,6 @@ classes:
|
|||
is_or_was_generated_by:
|
||||
range: ReconstructionActivity
|
||||
required: false
|
||||
has_or_had_type:
|
||||
equals_expression: '["hc:EducationProviderType"]'
|
||||
comments:
|
||||
- EducationCenter models educational facilities of heritage custodians
|
||||
- Schema.org EducationalOrganization for education facilities
|
||||
|
|
@ -308,10 +307,16 @@ classes:
|
|||
av_equipped: true
|
||||
flexible_seating: true
|
||||
has_or_had_quantity:
|
||||
value: 2
|
||||
unit: workshop_spaces
|
||||
- numeric_value: 2
|
||||
has_or_had_unit:
|
||||
unit_text: workshop_spaces
|
||||
- numeric_value: 12
|
||||
has_or_had_unit:
|
||||
unit_text: staff_members
|
||||
max_group_size: 30
|
||||
has_av_equipment: true
|
||||
has_or_had_equipment:
|
||||
- has_or_had_name: 4K Projector
|
||||
has_or_had_type: Projector
|
||||
provides_or_provided:
|
||||
has_or_had_description: Hands-on learning lab
|
||||
has_or_had_accessibility_feature:
|
||||
|
|
@ -321,10 +326,6 @@ classes:
|
|||
- has_or_had_quantity: 75000
|
||||
has_or_had_time_interval:
|
||||
has_or_had_label: Annual
|
||||
has_or_had_quantity:
|
||||
- numeric_value: 12
|
||||
has_or_had_unit:
|
||||
unit_text: staff_members
|
||||
is_or_was_required: true
|
||||
description: Major museum education center
|
||||
- value:
|
||||
|
|
@ -353,7 +354,9 @@ classes:
|
|||
seating_capacity: 40
|
||||
av_equipped: true
|
||||
max_group_size: 20
|
||||
has_av_equipment: true
|
||||
has_or_had_equipment:
|
||||
- has_or_had_name: Projector
|
||||
has_or_had_type: Projector
|
||||
is_or_was_required: true
|
||||
description: Archive learning center
|
||||
annotations:
|
||||
|
|
|
|||
|
|
@ -18,100 +18,78 @@ imports:
|
|||
- ./TemplateSpecificityType
|
||||
- ./TemplateSpecificityTypes
|
||||
- ../enums/ProfileExtractionMethodEnum
|
||||
- ../slots/extraction_agent
|
||||
- ../slots/extraction_method
|
||||
- ../slots/cost_usd
|
||||
- ../slots/source_file
|
||||
- ../slots/staff_id
|
||||
- ../slots/extraction_date
|
||||
- ../slots/linkedin_url
|
||||
- ../slots/request_id
|
||||
- ../slots/is_or_was_retrieved_by
|
||||
- ../slots/has_or_had_method
|
||||
- ../slots/has_or_had_expense
|
||||
- ../slots/has_or_had_source
|
||||
- ../slots/has_or_had_identifier
|
||||
- ../slots/retrieval_timestamp
|
||||
- ../slots/has_or_had_url
|
||||
- ../slots/llm_response
|
||||
- ../slots/specificity_annotation
|
||||
- ../slots/has_or_had_score
|
||||
- ../slots/cost_usd
|
||||
- ../slots/extraction_agent
|
||||
- ../slots/extraction_date
|
||||
- ../slots/extraction_method
|
||||
- ../slots/linkedin_url
|
||||
- ../slots/llm_response
|
||||
- ../slots/request_id
|
||||
- ../slots/source_file
|
||||
- ../slots/specificity_annotation
|
||||
- ../slots/staff_id
|
||||
- ../slots/has_or_had_score
|
||||
- ../slots/cost_usd
|
||||
- ../slots/extraction_agent
|
||||
- ../slots/extraction_date
|
||||
- ../slots/extraction_method
|
||||
- ../slots/linkedin_url
|
||||
- ../slots/llm_response
|
||||
- ../slots/request_id
|
||||
- ../slots/source_file
|
||||
- ../slots/specificity_annotation
|
||||
- ../slots/staff_id
|
||||
- ../slots/has_or_had_score
|
||||
default_range: string
|
||||
classes:
|
||||
ExtractionMetadata:
|
||||
class_uri: prov:Activity
|
||||
description: "Provenance metadata for data extraction activities.\n\nRecords how, when, and by what agent data was extracted from \nexternal sources (LinkedIn, web scraping, APIs).\n\n**PROV-O Alignment**:\n- ExtractionMetadata IS a prov:Activity (the extraction process)\n- The extracted data IS the prov:Entity (output of the activity)\n- extraction_agent IS the prov:Agent (software/AI that performed extraction)\n- source_file/linkedin_url IS prov:used (input to the activity)\n\n**Use Cases**:\n- LinkedIn profile extractions via Exa API\n- Web scraping provenance\n- Staff list parsing provenance\n- Connection network extraction\n\n**Example JSON Structure**:\n```json\n{\n \"extraction_metadata\": {\n \"source_file\": \"/path/to/source.json\",\n \"staff_id\": \"org_staff_0001_name\",\n \"extraction_date\": \"2025-12-12T22:00:00Z\",\n \"extraction_method\": \"exa_crawling_exa\",\n \"extraction_agent\": \"claude-opus-4.5\",\n \"linkedin_url\": \"https://www.linkedin.com/in/...\"\
|
||||
,\n \"cost_usd\": 0.001\n }\n}\n```\n"
|
||||
description: "Provenance metadata for data extraction activities.\n\nRecords how, when, and by what agent data was extracted from \nexternal sources (LinkedIn, web scraping, APIs).\n\n**PROV-O Alignment**:\n- ExtractionMetadata IS a prov:Activity (the extraction process)\n- The extracted data IS the prov:Entity (output of the activity)\n- is_or_was_retrieved_by IS the prov:Agent (software/AI that performed extraction)\n- has_or_had_source/has_or_had_url IS prov:used (input to the activity)\n\n**Use Cases**:\n- LinkedIn profile extractions via Exa API\n- Web scraping provenance\n- Staff list parsing provenance\n- Connection network extraction\n\n**Example JSON Structure**:\n```json\n{\n \"extraction_metadata\": {\n \"has_or_had_source\": \"/path/to/source.json\",\n \"has_or_had_identifier\": \"org_staff_0001_name\",\n \"retrieval_timestamp\": \"2025-12-12T22:00:00Z\",\n \"has_or_had_method\": \"exa_crawling_exa\",\n \"is_or_was_retrieved_by\": \"claude-opus-4.5\",\n \"has_or_had_url\": \"https://www.linkedin.com/in/...\"\
|
||||
,\n \"has_or_had_expense\": 0.001\n }\n}\n```\n"
|
||||
exact_mappings:
|
||||
- prov:Activity
|
||||
close_mappings:
|
||||
- schema:Action
|
||||
- dct:ProvenanceStatement
|
||||
slots:
|
||||
- cost_usd
|
||||
- extraction_agent
|
||||
- extraction_date
|
||||
- extraction_method
|
||||
- linkedin_url
|
||||
- has_or_had_expense
|
||||
- is_or_was_retrieved_by
|
||||
- retrieval_timestamp
|
||||
- has_or_had_method
|
||||
- has_or_had_url
|
||||
- llm_response
|
||||
- request_id
|
||||
- source_file
|
||||
- has_or_had_identifier
|
||||
- has_or_had_source
|
||||
- specificity_annotation
|
||||
- staff_id
|
||||
- has_or_had_score
|
||||
slot_usage:
|
||||
source_file:
|
||||
has_or_had_source:
|
||||
range: string
|
||||
examples:
|
||||
- value: /data/custodian/person/affiliated/parsed/rijksmuseum_staff_20251210T155416Z.json
|
||||
description: Path to parsed staff list JSON
|
||||
staff_id:
|
||||
has_or_had_identifier:
|
||||
range: string
|
||||
pattern: ^[a-z0-9-]+_staff_[a-z0-9-_]+$
|
||||
examples:
|
||||
- value: rijksmuseum_staff_0042_jan_van_der_berg
|
||||
description: Staff ID with org prefix, index, and name slug
|
||||
extraction_date:
|
||||
- value: exa_12345678-abcd-efgh-ijkl-mnopqrstuv
|
||||
description: Exa API request ID
|
||||
retrieval_timestamp:
|
||||
range: datetime
|
||||
required: true
|
||||
examples:
|
||||
- value: '2025-12-12T22:00:00Z'
|
||||
description: UTC timestamp of extraction
|
||||
extraction_method:
|
||||
has_or_had_method:
|
||||
range: ProfileExtractionMethodEnum
|
||||
required: true
|
||||
examples:
|
||||
- value: exa_crawling_exa
|
||||
description: Extracted via Exa AI crawling API
|
||||
extraction_agent:
|
||||
is_or_was_retrieved_by:
|
||||
range: string
|
||||
examples:
|
||||
- value: claude-opus-4.5
|
||||
description: Extracted by Claude Opus 4.5
|
||||
- value: ''
|
||||
description: Empty string for fully automated extraction
|
||||
linkedin_url:
|
||||
has_or_had_url:
|
||||
range: uri
|
||||
pattern: ^https://www\.linkedin\.com/in/[a-z0-9-]+/?$
|
||||
examples:
|
||||
- value: https://www.linkedin.com/in/jan-van-der-berg-12345
|
||||
description: LinkedIn profile URL
|
||||
cost_usd:
|
||||
has_or_had_expense:
|
||||
range: float
|
||||
minimum_value: 0.0
|
||||
examples:
|
||||
|
|
@ -119,11 +97,6 @@ classes:
|
|||
description: Exa API call cost
|
||||
- value: 0.0
|
||||
description: Free extraction (cached/local)
|
||||
request_id:
|
||||
range: string
|
||||
examples:
|
||||
- value: exa_12345678-abcd-efgh-ijkl-mnopqrstuv
|
||||
description: Exa API request ID
|
||||
llm_response:
|
||||
range: LLMResponse
|
||||
required: false
|
||||
|
|
@ -134,8 +107,8 @@ classes:
|
|||
comments:
|
||||
- Every person entity file MUST have extraction_metadata
|
||||
- See AGENTS.md Rule 20 for required fields
|
||||
- extraction_agent should be 'claude-opus-4.5' for manual extraction
|
||||
- cost_usd enables budget tracking for API-heavy extractions
|
||||
- is_or_was_retrieved_by should be 'claude-opus-4.5' for manual extraction
|
||||
- has_or_had_expense enables budget tracking for API-heavy extractions
|
||||
see_also:
|
||||
- https://www.w3.org/TR/prov-o/
|
||||
- https://docs.exa.ai/
|
||||
|
|
|
|||
|
|
@ -47,55 +47,9 @@ imports:
|
|||
- ./ConfidenceMethod
|
||||
- ../slots/html_file
|
||||
- ../slots/has_or_had_identifier
|
||||
- ./Identifier
|
||||
- ../slots/source_url
|
||||
- ../slots/has_or_had_label
|
||||
- ../slots/date
|
||||
- ../slots/note
|
||||
- ../slots/creator
|
||||
- ../slots/has_or_had_publisher
|
||||
- ./Publisher
|
||||
- ../slots/is_or_was_published_at
|
||||
- ./PublicationEvent
|
||||
- ./Quantity
|
||||
- ../slots/isbn
|
||||
- ../slots/has_or_had_access_condition
|
||||
- ../slots/is_or_was_access_restricted
|
||||
- ../slots/has_or_had_overview
|
||||
- ../slots/has_or_had_image
|
||||
- ./Image
|
||||
- ../slots/has_or_had_quantity
|
||||
- ../slots/includes_or_included
|
||||
- ./GeoSpatialPlace
|
||||
- ../slots/is_or_was_categorized_as
|
||||
- ./ColonialStatus
|
||||
- ../slots/content_block
|
||||
- ../slots/crawler_version
|
||||
- ../slots/custodian
|
||||
- ../slots/was_retrieved_at
|
||||
- ./Timestamp
|
||||
- ../slots/de
|
||||
- ../slots/has_or_had_identifier
|
||||
- ./Identifier
|
||||
- ./EADIdentifier
|
||||
- ../slots/ead_id
|
||||
- ../slots/en
|
||||
- ../slots/end
|
||||
- ../slots/has_or_had_external_resource
|
||||
- ../slots/has_or_had_featured_item
|
||||
- ../slots/has_or_had_note
|
||||
- ./Note
|
||||
- ../slots/has_or_had_scope
|
||||
- ./Scope
|
||||
- ../slots/has_or_had_type
|
||||
- ../slots/has_or_had_format
|
||||
- ./Format
|
||||
- ../slots/has_or_had_language
|
||||
- ./Language
|
||||
- ../slots/full_name
|
||||
- ../slots/has_or_had_geographic_extent
|
||||
- ../slots/has_or_had_identifier
|
||||
- ./GHCIdentifier
|
||||
- ../slots/has_or_had_level
|
||||
- ../slots/has_or_had_provenance_path
|
||||
- ../slots/contains_or_contained
|
||||
- ../slots/contains_or_contained
|
||||
- ../slots/contains_or_contained_en
|
||||
|
|
@ -535,36 +489,28 @@ classes:
|
|||
'
|
||||
slots:
|
||||
- has_or_had_access_condition
|
||||
- full_name
|
||||
- has_or_had_label
|
||||
- geographic_coverage
|
||||
- has_or_had_identifier
|
||||
- has_or_had_level
|
||||
- contains_or_contained
|
||||
- contains_or_contained
|
||||
- contains_or_contained_en
|
||||
- list_item
|
||||
- section_id
|
||||
- contains_or_contained
|
||||
- specificity_annotation
|
||||
- contains_or_contained
|
||||
- has_or_had_score
|
||||
- has_or_had_provenance_path
|
||||
slot_usage:
|
||||
contains_or_contained:
|
||||
has_or_had_level:
|
||||
range: integer
|
||||
minimum_value: 1
|
||||
maximum_value: 6
|
||||
description: Heading level (1-6). MIGRATED from heading_level/contains_or_contained duplicate.
|
||||
contains_or_contained:
|
||||
required: true
|
||||
content_block:
|
||||
multivalued: true
|
||||
contains_or_contained:
|
||||
range: PageSection
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
contains_or_contained:
|
||||
range: PageLink
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
any_of:
|
||||
- range: PageSection
|
||||
- range: PageLink
|
||||
has_or_had_featured_item:
|
||||
range: FeaturedCard
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ classes:
|
|||
description: Unique identifier for this legal name record
|
||||
range: uriorcurie
|
||||
required: true
|
||||
full_name:
|
||||
has_or_had_label:
|
||||
slot_uri: rov:legalName
|
||||
description: 'Complete legal name including organizational type.
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ classes:
|
|||
- schema:Person
|
||||
- prov:Entity
|
||||
attributes:
|
||||
full_name:
|
||||
has_or_had_label:
|
||||
range: string
|
||||
description: Person's full name
|
||||
has_or_had_title:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,27 @@
|
|||
id: https://nde.nl/ontology/hc/slot/ceases_or_ceased_through
|
||||
name: ceases_or_ceased_through
|
||||
description: >-
|
||||
The event through which an entity ceases or ceased to exist/operate.
|
||||
MIGRATED from cessation_observed_in (Rule 53).
|
||||
range: CeasingEvent
|
||||
slot_uri: prov:wasInvalidatedBy
|
||||
exact_mappings:
|
||||
- crm:P93i_was_taken_out_of_existence_by
|
||||
multivalued: true
|
||||
title: Ceases Or Ceased Through
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
ceases_or_ceased_through:
|
||||
description: >-
|
||||
The event through which an entity ceases or ceased to exist/operate.
|
||||
MIGRATED from cessation_observed_in (Rule 53).
|
||||
range: CeasingEvent
|
||||
slot_uri: prov:wasInvalidatedBy
|
||||
exact_mappings:
|
||||
- crm:P93i_was_taken_out_of_existence_by
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,10 +1,28 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_arrangement_level
|
||||
name: has_or_had_arrangement_level
|
||||
description: The level of arrangement of the record set or information carrier.
|
||||
title: has or had arrangement level
|
||||
slot_uri: rico:hasRecordSetType
|
||||
range: ArrangementLevel
|
||||
multivalued: false
|
||||
exact_mappings:
|
||||
- isad:level_of_description
|
||||
close_mappings:
|
||||
- rico:RecordSetType
|
||||
title: Has Or Had Arrangement Level
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_arrangement_level:
|
||||
description: The level of arrangement of the record set or information carrier.
|
||||
title: has or had arrangement level
|
||||
slot_uri: rico:hasRecordSetType
|
||||
range: ArrangementLevel
|
||||
multivalued: false
|
||||
exact_mappings:
|
||||
- isad:level_of_description
|
||||
close_mappings:
|
||||
- rico:RecordSetType
|
||||
|
|
|
|||
|
|
@ -1,8 +1,26 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_assessment
|
||||
name: has_or_had_assessment
|
||||
description: >-
|
||||
Assessment associated with the entity.
|
||||
Range should be an Assessment class.
|
||||
MIGRATED from heritage_relevance (for LinkedInProfile) per Rule 53.
|
||||
slot_uri: crm:P140i_was_attributed_by
|
||||
range: Any
|
||||
multivalued: true
|
||||
title: Has Or Had Assessment
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_assessment:
|
||||
description: >-
|
||||
Assessment associated with the entity.
|
||||
Range should be an Assessment class.
|
||||
MIGRATED from heritage_relevance (for LinkedInProfile) per Rule 53.
|
||||
slot_uri: crm:P140i_was_attributed_by
|
||||
range: Any
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,7 +1,25 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_citation
|
||||
name: has_or_had_citation
|
||||
description: >-
|
||||
A bibliographic citation for the resource.
|
||||
MIGRATED from citation (Rule 53).
|
||||
range: string
|
||||
slot_uri: schema:citation
|
||||
multivalued: true
|
||||
title: Has Or Had Citation
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_citation:
|
||||
description: >-
|
||||
A bibliographic citation for the resource.
|
||||
MIGRATED from citation (Rule 53).
|
||||
range: string
|
||||
slot_uri: schema:citation
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,7 +1,25 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_city_code
|
||||
name: has_or_had_city_code
|
||||
description: >-
|
||||
The 3-letter city/settlement code (e.g., AMS for Amsterdam).
|
||||
MIGRATED from city_code (Rule 53).
|
||||
range: string
|
||||
slot_uri: schema:code
|
||||
multivalued: false
|
||||
title: Has Or Had City Code
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_city_code:
|
||||
description: >-
|
||||
The 3-letter city/settlement code (e.g., AMS for Amsterdam).
|
||||
MIGRATED from city_code (Rule 53).
|
||||
range: string
|
||||
slot_uri: schema:code
|
||||
multivalued: false
|
||||
|
|
|
|||
|
|
@ -1,11 +1,29 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_embargo_end_date
|
||||
name: has_or_had_embargo_end_date
|
||||
description: >-
|
||||
The date when an embargo or restriction ends.
|
||||
title: has or had embargo end date
|
||||
slot_uri: premis:endDate
|
||||
range: date
|
||||
multivalued: false
|
||||
exact_mappings:
|
||||
- premis:endDate
|
||||
close_mappings:
|
||||
- rico:hasEndDate
|
||||
title: Has Or Had Embargo End Date
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_embargo_end_date:
|
||||
description: >-
|
||||
The date when an embargo or restriction ends.
|
||||
title: has or had embargo end date
|
||||
slot_uri: premis:endDate
|
||||
range: date
|
||||
multivalued: false
|
||||
exact_mappings:
|
||||
- premis:endDate
|
||||
close_mappings:
|
||||
- rico:hasEndDate
|
||||
|
|
|
|||
|
|
@ -1,7 +1,25 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_embargo_reason
|
||||
name: has_or_had_embargo_reason
|
||||
description: >-
|
||||
The reason for an embargo or restriction.
|
||||
title: has or had embargo reason
|
||||
slot_uri: rico:conditionsOfAccess
|
||||
range: string
|
||||
multivalued: true
|
||||
title: Has Or Had Embargo Reason
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_embargo_reason:
|
||||
description: >-
|
||||
The reason for an embargo or restriction.
|
||||
title: has or had embargo reason
|
||||
slot_uri: rico:conditionsOfAccess
|
||||
range: string
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,9 +1,27 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_exhibition_type
|
||||
name: has_or_had_exhibition_type
|
||||
description: >-
|
||||
The type or category of an exhibition (e.g., Permanent, Temporary, Traveling).
|
||||
title: has or had exhibition type
|
||||
slot_uri: rico:hasEventType
|
||||
close_mappings:
|
||||
- crm:P2_has_type
|
||||
range: string
|
||||
multivalued: true
|
||||
title: Has Or Had Exhibition Type
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_exhibition_type:
|
||||
description: >-
|
||||
The type or category of an exhibition (e.g., Permanent, Temporary, Traveling).
|
||||
title: has or had exhibition type
|
||||
slot_uri: rico:hasEventType
|
||||
close_mappings:
|
||||
- crm:P2_has_type
|
||||
range: string
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,9 +1,27 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_extent_text
|
||||
name: has_or_had_extent_text
|
||||
description: >-
|
||||
Textual description of the extent of an entity (e.g., '300 boxes', '2 linear meters').
|
||||
title: has or had extent text
|
||||
slot_uri: rico:hasExtent
|
||||
exact_mappings:
|
||||
- rico:hasExtent
|
||||
range: string
|
||||
multivalued: true
|
||||
title: Has Or Had Extent Text
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_extent_text:
|
||||
description: >-
|
||||
Textual description of the extent of an entity (e.g., '300 boxes', '2 linear meters').
|
||||
title: has or had extent text
|
||||
slot_uri: rico:hasExtent
|
||||
exact_mappings:
|
||||
- rico:hasExtent
|
||||
range: string
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,7 +1,25 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_file_location
|
||||
name: has_or_had_file_location
|
||||
description: >-
|
||||
The location of a file.
|
||||
MIGRATED from html_snapshot_path (Rule 53).
|
||||
range: FileLocation
|
||||
slot_uri: skos:note
|
||||
multivalued: true
|
||||
title: Has Or Had File Location
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_file_location:
|
||||
description: >-
|
||||
The location of a file.
|
||||
MIGRATED from html_snapshot_path (Rule 53).
|
||||
range: FileLocation
|
||||
slot_uri: skos:note
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,7 +1,25 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_parent
|
||||
name: has_or_had_parent
|
||||
description: >-
|
||||
The parent entity of this entity.
|
||||
MIGRATED from parent_chapter_id (Rule 53).
|
||||
range: Any
|
||||
slot_uri: schema:parent
|
||||
multivalued: false
|
||||
title: Has Or Had Parent
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_parent:
|
||||
description: >-
|
||||
The parent entity of this entity.
|
||||
MIGRATED from parent_chapter_id (Rule 53).
|
||||
range: Any
|
||||
slot_uri: schema:parent
|
||||
multivalued: false
|
||||
|
|
|
|||
|
|
@ -1,9 +1,27 @@
|
|||
id: https://nde.nl/ontology/hc/slot/has_or_had_sequence_index
|
||||
name: has_or_had_sequence_index
|
||||
description: >-
|
||||
The sequence index or order of an item (e.g. chapter number, page number).
|
||||
MIGRATED from chapter_index (Rule 53).
|
||||
range: integer
|
||||
slot_uri: schema:position
|
||||
exact_mappings:
|
||||
- schema:position
|
||||
multivalued: false
|
||||
title: Has Or Had Sequence Index
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
has_or_had_sequence_index:
|
||||
description: >-
|
||||
The sequence index or order of an item (e.g. chapter number, page number).
|
||||
MIGRATED from chapter_index (Rule 53).
|
||||
range: integer
|
||||
slot_uri: schema:position
|
||||
exact_mappings:
|
||||
- schema:position
|
||||
multivalued: false
|
||||
|
|
|
|||
|
|
@ -1,11 +1,29 @@
|
|||
id: https://nde.nl/ontology/hc/slot/is_or_was_documented_by
|
||||
name: is_or_was_documented_by
|
||||
description: >-
|
||||
Indicates that the entity is or was documented by another resource (e.g., a FinancialStatement documenting a Budget).
|
||||
title: is or was documented by
|
||||
slot_uri: schema:subjectOf
|
||||
range: ReconstructedEntity
|
||||
multivalued: true
|
||||
exact_mappings:
|
||||
- crm:P70i_is_documented_in
|
||||
close_mappings:
|
||||
- rico:isOrWasSubjectOf
|
||||
title: Is Or Was Documented By
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
is_or_was_documented_by:
|
||||
description: >-
|
||||
Indicates that the entity is or was documented by another resource (e.g., a FinancialStatement documenting a Budget).
|
||||
title: is or was documented by
|
||||
slot_uri: schema:subjectOf
|
||||
range: ReconstructedEntity
|
||||
multivalued: true
|
||||
exact_mappings:
|
||||
- crm:P70i_is_documented_in
|
||||
close_mappings:
|
||||
- rico:isOrWasSubjectOf
|
||||
|
|
|
|||
|
|
@ -1,9 +1,27 @@
|
|||
id: https://nde.nl/ontology/hc/slot/is_or_was_observed_by
|
||||
name: is_or_was_observed_by
|
||||
description: >-
|
||||
The observation that documented this event or state.
|
||||
MIGRATED from cessation_observed_in (Rule 53).
|
||||
range: CustodianObservation
|
||||
slot_uri: prov:wasGeneratedBy
|
||||
exact_mappings:
|
||||
- prov:wasGeneratedBy
|
||||
multivalued: true
|
||||
title: Is Or Was Observed By
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
||||
org: http://www.w3.org/ns/org#
|
||||
xsd: http://www.w3.org/2001/XMLSchema#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
is_or_was_observed_by:
|
||||
description: >-
|
||||
The observation that documented this event or state.
|
||||
MIGRATED from cessation_observed_in (Rule 53).
|
||||
range: CustodianObservation
|
||||
slot_uri: prov:wasGeneratedBy
|
||||
exact_mappings:
|
||||
- prov:wasGeneratedBy
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -1,111 +0,0 @@
|
|||
import os
|
||||
import re
|
||||
|
||||
directory = "schemas/20251121/linkml/modules/classes/"
|
||||
|
||||
prefixes_block = """prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
schema: http://schema.org/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
rico: https://www.ica.org/standards/RiC/ontology#
|
||||
wd: http://www.wikidata.org/entity/
|
||||
"""
|
||||
|
||||
imports_block = """imports:
|
||||
- linkml:types
|
||||
"""
|
||||
|
||||
def split_camel_case(name):
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1 \2', name)
|
||||
|
||||
count = 0
|
||||
|
||||
for filename in os.listdir(directory):
|
||||
if not filename.endswith(".yaml"):
|
||||
continue
|
||||
|
||||
filepath = os.path.join(directory, filename)
|
||||
with open(filepath, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
if content.startswith("id:"):
|
||||
continue # Already has metadata
|
||||
|
||||
# Check if imports already exist in the file (even if unstructured)
|
||||
has_imports = re.search(r"^imports:", content, re.MULTILINE)
|
||||
|
||||
if not content.strip().startswith("classes:") and not has_imports:
|
||||
# Some files might have comments at the top?
|
||||
# If it doesn't start with classes: or id:, we should check.
|
||||
# But my grep showed files starting with classes:
|
||||
pass
|
||||
|
||||
# Simple parsing
|
||||
lines = content.splitlines()
|
||||
class_name = None
|
||||
description = None
|
||||
|
||||
# Determine class name from filename first as fallback/confirmation
|
||||
filename_class = filename.replace(".yaml", "")
|
||||
|
||||
found_class_in_content = False
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip().startswith("classes:"):
|
||||
# Look for class name in subsequent lines
|
||||
for j in range(i+1, min(i+5, len(lines))):
|
||||
# Matches " ClassName:"
|
||||
match = re.match(r"^ ([a-zA-Z0-9_]+):", lines[j])
|
||||
if match:
|
||||
class_name = match.group(1)
|
||||
found_class_in_content = True
|
||||
|
||||
# Look for description inside the class
|
||||
for k in range(j+1, min(j+15, len(lines))):
|
||||
# Matches " description: Value"
|
||||
desc_match = re.match(r"^ description:\s+(.*)", lines[k])
|
||||
if desc_match:
|
||||
description = desc_match.group(1).strip()
|
||||
# Handle multi-line description if needed?
|
||||
if description.startswith(">") or description.startswith("|"):
|
||||
description = None
|
||||
break
|
||||
break
|
||||
break
|
||||
|
||||
if not class_name:
|
||||
# Fallback to filename if parsing failed (e.g. if file is empty or weird)
|
||||
class_name = filename_class
|
||||
|
||||
# Ensure class name matches filename (convention)
|
||||
if class_name != filename_class:
|
||||
print(f"Warning: Class name '{class_name}' in content differs from filename '{filename_class}'. Using filename.")
|
||||
class_name = filename_class
|
||||
|
||||
title = split_camel_case(class_name)
|
||||
if not description:
|
||||
description = f"LinkML class definition for {title}"
|
||||
else:
|
||||
# Strip quotes if present
|
||||
if (description.startswith('"') and description.endswith('"')) or (description.startswith("'") and description.endswith("'")):
|
||||
description = description[1:-1]
|
||||
|
||||
# Construct new content
|
||||
new_header = f"id: https://nde.nl/ontology/hc/class/{class_name}\n"
|
||||
new_header += f"name: {class_name}\n"
|
||||
new_header += f"title: {title}\n"
|
||||
new_header += f"description: {description}\n"
|
||||
new_header += prefixes_block
|
||||
|
||||
if not has_imports:
|
||||
new_header += imports_block
|
||||
|
||||
new_content = new_header + content
|
||||
|
||||
with open(filepath, 'w') as f:
|
||||
f.write(new_content)
|
||||
|
||||
count += 1
|
||||
# print(f"Updated {filename}")
|
||||
|
||||
print(f"Total files updated: {count}")
|
||||
119
scripts/fix_specific_dead_links.py
Normal file
119
scripts/fix_specific_dead_links.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import os
|
||||
import re
|
||||
|
||||
SCHEMA_DIR = "/Users/kempersc/apps/glam/schemas/20251121/linkml/modules/classes/"
|
||||
|
||||
# Mappings for ExtractionMetadata.yaml
|
||||
EXTRACTION_METADATA_MAP = {
|
||||
"extraction_agent": "is_or_was_retrieved_by",
|
||||
"extraction_method": "has_or_had_method",
|
||||
"extraction_date": "retrieval_timestamp",
|
||||
"cost_usd": "has_or_had_expense",
|
||||
"source_file": "has_or_had_source",
|
||||
"staff_id": "has_or_had_identifier",
|
||||
"linkedin_url": "has_or_had_url",
|
||||
"request_id": "has_or_had_identifier" # request_id also maps to identifier
|
||||
}
|
||||
|
||||
# General mapping for full_name
|
||||
FULL_NAME_MAP = {
|
||||
"full_name": "has_or_had_label"
|
||||
}
|
||||
|
||||
def fix_extraction_metadata():
|
||||
filepath = os.path.join(SCHEMA_DIR, "ExtractionMetadata.yaml")
|
||||
if not os.path.exists(filepath): return
|
||||
|
||||
print(f"Fixing {filepath}...")
|
||||
with open(filepath, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
new_lines = []
|
||||
|
||||
# Track which new slots we've already imported to avoid dupes
|
||||
added_imports = set()
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
replaced = False
|
||||
|
||||
# 1. Imports
|
||||
if stripped.startswith("- ../slots/"):
|
||||
slot_name = stripped.split("/")[-1]
|
||||
if slot_name in EXTRACTION_METADATA_MAP:
|
||||
new_slot = EXTRACTION_METADATA_MAP[slot_name]
|
||||
if new_slot not in added_imports:
|
||||
new_lines.append(line.replace(slot_name, new_slot))
|
||||
added_imports.add(new_slot)
|
||||
replaced = True
|
||||
|
||||
# 2. Slots list
|
||||
elif stripped.startswith("- ") and stripped[2:] in EXTRACTION_METADATA_MAP:
|
||||
slot_name = stripped[2:]
|
||||
new_slot = EXTRACTION_METADATA_MAP[slot_name]
|
||||
# Avoid duplicate slots in list if possible, but simple replacement is safer than deletion logic
|
||||
# However, request_id and staff_id BOTH map to has_or_had_identifier.
|
||||
# If we just replace, we get duplicates.
|
||||
# LinkML allows duplicate slot entries (it dedupes), but cleaner to avoid.
|
||||
# But simple replace is fine for now.
|
||||
new_lines.append(line.replace(slot_name, new_slot))
|
||||
replaced = True
|
||||
|
||||
# 3. Slot usage keys
|
||||
elif stripped.endswith(":") and stripped[:-1] in EXTRACTION_METADATA_MAP:
|
||||
slot_name = stripped[:-1]
|
||||
new_slot = EXTRACTION_METADATA_MAP[slot_name]
|
||||
new_lines.append(line.replace(slot_name, new_slot))
|
||||
replaced = True
|
||||
|
||||
if not replaced:
|
||||
new_lines.append(line)
|
||||
|
||||
with open(filepath, 'w') as f:
|
||||
f.writelines(new_lines)
|
||||
|
||||
def fix_full_name(filename):
|
||||
filepath = os.path.join(SCHEMA_DIR, filename)
|
||||
if not os.path.exists(filepath): return
|
||||
|
||||
print(f"Fixing {filepath}...")
|
||||
with open(filepath, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
new_lines = []
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
replaced = False
|
||||
|
||||
# Imports
|
||||
if stripped == "- ../slots/full_name":
|
||||
new_lines.append(line.replace("full_name", "has_or_had_label"))
|
||||
replaced = True
|
||||
|
||||
# Slots list
|
||||
elif stripped == "- full_name":
|
||||
new_lines.append(line.replace("full_name", "has_or_had_label"))
|
||||
replaced = True
|
||||
|
||||
# Slot usage key
|
||||
elif stripped == "full_name:":
|
||||
new_lines.append(line.replace("full_name:", "has_or_had_label:"))
|
||||
replaced = True
|
||||
|
||||
if not replaced:
|
||||
new_lines.append(line)
|
||||
|
||||
with open(filepath, 'w') as f:
|
||||
f.writelines(new_lines)
|
||||
|
||||
def main():
|
||||
fix_extraction_metadata()
|
||||
fix_full_name("FindingAid.yaml")
|
||||
fix_full_name("OrganizationBranch.yaml")
|
||||
fix_full_name("DigitalPlatformV2OrganizationStatus.yaml") # Also flagged
|
||||
fix_full_name("LegalName.yaml") # Also flagged
|
||||
fix_full_name("CustodianLegalStatus.yaml") # Also flagged
|
||||
fix_full_name("ProfileData.yaml") # Also flagged
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -21,6 +21,15 @@ def get_archived_slot_names():
|
|||
def find_references(archived_slots):
|
||||
references = {} # {slot_name: [file_paths]}
|
||||
|
||||
# Metadata keys that mimic slot names but are valid LinkML structure
|
||||
# We ignore "Usage as key" for these
|
||||
SAFE_METADATA_KEYS = {
|
||||
"title", "description", "name", "id", "status", "notes", "comments", "examples",
|
||||
"todos", "see_also", "range", "slot_usage", "required", "multivalued",
|
||||
"inlined", "identifier", "value", "unit", "prefixes", "imports", "classes",
|
||||
"slots", "attributes", "exact_mappings", "close_mappings", "related_mappings"
|
||||
}
|
||||
|
||||
class_files = glob.glob(os.path.join(CLASSES_DIR, "*.yaml"))
|
||||
|
||||
for cls_file in class_files:
|
||||
|
|
@ -30,28 +39,30 @@ def find_references(archived_slots):
|
|||
for i, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
|
||||
# Check for imports: "- ../slots/slotname"
|
||||
# Check for slot usage: "- slotname" (in slots list)
|
||||
# Check for slot_usage keys: "slotname:"
|
||||
|
||||
for slot in archived_slots:
|
||||
# Import check
|
||||
if f"../slots/{slot}" in stripped and not stripped.strip().startswith("#"):
|
||||
# Import check: "- ../slots/slotname"
|
||||
if f"../slots/{slot}" in stripped and not stripped.startswith("#"):
|
||||
if slot not in references: references[slot] = []
|
||||
references[slot].append(f"{cls_file} (line {i+1}): Import")
|
||||
continue
|
||||
|
||||
# Loose usage check (can be false positive if slot name is common word like 'description')
|
||||
# But we restrict to archived slots.
|
||||
|
||||
# Check for "- slotname"
|
||||
# Usage in slots list: "- slotname"
|
||||
# Must be exact match to avoid partials
|
||||
if stripped == f"- {slot}":
|
||||
if slot not in references: references[slot] = []
|
||||
references[slot].append(f"{cls_file} (line {i+1}): Usage in slots list")
|
||||
continue
|
||||
|
||||
# Check for "slotname:"
|
||||
# Usage as key: "slotname:"
|
||||
if stripped.startswith(f"{slot}:"):
|
||||
# Check if it's a safe metadata key
|
||||
if slot in SAFE_METADATA_KEYS:
|
||||
continue
|
||||
|
||||
# Also, if we are inside a slot_usage block, "slotname:" is valid ONLY IF
|
||||
# we are refining that slot. But if the slot is archived, we shouldn't be refining it!
|
||||
# So "Usage as key" is actually relevant for slot_usage of archived slots.
|
||||
|
||||
if slot not in references: references[slot] = []
|
||||
references[slot].append(f"{cls_file} (line {i+1}): Usage as key")
|
||||
continue
|
||||
|
|
|
|||
Loading…
Reference in a new issue