471 lines
14 KiB
YAML
471 lines
14 KiB
YAML
# NDE Enriched Entry Schema
|
|
# This schema defines the structure of NDE (Netwerk Digitaal Erfgoed) enriched entry files.
|
|
# These are intermediate working files containing original CSV data + enrichments from
|
|
# Wikidata, Google Maps, OpenStreetMap, and other sources.
|
|
#
|
|
# Use: linkml-convert -s nde_enriched_entry.yaml -t json input.yaml -o output.json
|
|
|
|
id: https://nde.nl/ontology/enriched-entry
|
|
name: nde_enriched_entry
|
|
title: NDE Enriched Entry Schema
|
|
description: |
|
|
Schema for NDE (Netwerk Digitaal Erfgoed) enriched heritage custodian entries.
|
|
|
|
These entries contain:
|
|
- Original CSV data from NDE bronhouder spreadsheet
|
|
- Wikidata enrichment (labels, descriptions, claims, coordinates)
|
|
- Google Maps enrichment (ratings, reviews, place data)
|
|
- OpenStreetMap enrichment (OSM IDs, tags)
|
|
- GHCID persistent identifier assignment
|
|
- Location resolution from GeoNames
|
|
- Web scraping enrichment (website claims with XPath provenance)
|
|
|
|
version: 1.0.0
|
|
license: https://creativecommons.org/licenses/by-sa/4.0/
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
schema: http://schema.org/
|
|
wd: http://www.wikidata.org/entity/
|
|
wdt: http://www.wikidata.org/prop/direct/
|
|
geo: http://www.w3.org/2003/01/geo/wgs84_pos#
|
|
prov: http://www.w3.org/ns/prov#
|
|
|
|
default_range: string
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
classes:
|
|
NDEEnrichedEntry:
|
|
description: A single enriched heritage custodian entry from the NDE bronhouder dataset.
|
|
tree_root: true
|
|
attributes:
|
|
entry_index:
|
|
description: Sequential index of the entry in the original CSV
|
|
range: integer
|
|
processing_timestamp:
|
|
description: ISO 8601 timestamp when this entry was processed
|
|
range: datetime
|
|
enrichment_status:
|
|
description: Current enrichment status
|
|
range: EnrichmentStatusEnum
|
|
original_entry:
|
|
description: Original data from NDE CSV
|
|
range: OriginalEntry
|
|
wikidata_enrichment:
|
|
description: Data enriched from Wikidata
|
|
range: WikidataEnrichment
|
|
google_maps_enrichment:
|
|
description: Data enriched from Google Maps
|
|
range: GoogleMapsEnrichment
|
|
osm_enrichment:
|
|
description: Data enriched from OpenStreetMap
|
|
range: OSMEnrichment
|
|
ghcid:
|
|
description: Global Heritage Custodian Identifier assignment
|
|
range: GHCIDAssignment
|
|
location:
|
|
description: Resolved location data
|
|
range: Location
|
|
location_resolution:
|
|
description: GeoNames-based location resolution metadata
|
|
range: LocationResolution
|
|
web_enrichment:
|
|
description: Data from website scraping with XPath provenance
|
|
range: WebEnrichment
|
|
custodian_name:
|
|
description: Standardized emic name (legal form filtered)
|
|
range: string
|
|
|
|
OriginalEntry:
|
|
description: Original data from NDE bronhouder CSV spreadsheet
|
|
attributes:
|
|
plaatsnaam_bezoekadres:
|
|
description: City/place of the visiting address
|
|
straat_en_huisnummer_bezoekadres:
|
|
description: Street and house number of visiting address
|
|
organisatie:
|
|
description: Organization name
|
|
webadres_organisatie:
|
|
description: Website URL
|
|
range: uri
|
|
type_organisatie:
|
|
description: Type of organization (museum, archief, bibliotheek, etc.)
|
|
systeem:
|
|
description: Collection management system used
|
|
versnellen:
|
|
description: Part of Versnellen digitization project
|
|
museum_register:
|
|
description: Registered in Museum Register
|
|
in_scope_voor_dc4eu:
|
|
description: In scope for DC4EU project
|
|
linked_data:
|
|
description: Publishes linked data
|
|
datasetregister:
|
|
description: Dataset register category
|
|
versnellen_project:
|
|
description: Specific Versnellen project
|
|
wikidata_id:
|
|
description: Wikidata entity ID from original CSV
|
|
type:
|
|
description: GLAMORCUBESFIXPHDNT type codes
|
|
multivalued: true
|
|
|
|
WikidataEnrichment:
|
|
description: Enrichment data from Wikidata REST API
|
|
attributes:
|
|
wikidata_entity_id:
|
|
description: Wikidata Q-ID
|
|
pattern: "^Q[0-9]+$"
|
|
api_metadata:
|
|
description: API request metadata
|
|
range: APIMetadata
|
|
wikidata_labels:
|
|
description: Labels in multiple languages
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
wikidata_label_nl:
|
|
description: Dutch label
|
|
wikidata_label_en:
|
|
description: English label
|
|
wikidata_descriptions:
|
|
description: Descriptions in multiple languages
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
wikidata_description_nl:
|
|
description: Dutch description
|
|
wikidata_description_en:
|
|
description: English description
|
|
wikidata_aliases:
|
|
description: Alternative names by language
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
wikidata_instance_of:
|
|
description: Instance of (P31) claims
|
|
range: WikidataEntity
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
wikidata_country:
|
|
description: Country (P17) claim
|
|
range: WikidataEntity
|
|
wikidata_located_in:
|
|
description: Located in (P131) claim
|
|
range: WikidataEntity
|
|
wikidata_coordinates:
|
|
description: Coordinate location (P625)
|
|
range: Coordinates
|
|
wikidata_inception:
|
|
description: Inception/founding date (P571)
|
|
range: WikidataTime
|
|
wikidata_official_website:
|
|
description: Official website (P856)
|
|
range: uri
|
|
wikidata_image:
|
|
description: Image filename (P18)
|
|
wikidata_sitelinks:
|
|
description: Wikipedia and other sitelinks
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
wikidata_claims:
|
|
description: Additional Wikidata claims
|
|
range: WikidataClaims
|
|
|
|
WikidataEntity:
|
|
description: A Wikidata entity reference with labels
|
|
attributes:
|
|
id:
|
|
description: Wikidata Q-ID
|
|
pattern: "^Q[0-9]+$"
|
|
label_en:
|
|
description: English label
|
|
label_nl:
|
|
description: Dutch label
|
|
description_en:
|
|
description: English description
|
|
description_nl:
|
|
description: Dutch description
|
|
instance_of:
|
|
description: Instance of IDs
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
Coordinates:
|
|
description: Geographic coordinates from Wikidata
|
|
attributes:
|
|
latitude:
|
|
range: float
|
|
longitude:
|
|
range: float
|
|
precision:
|
|
range: float
|
|
globe:
|
|
description: Globe entity (usually Q2 for Earth)
|
|
range: uri
|
|
|
|
WikidataTime:
|
|
description: Wikidata time value
|
|
attributes:
|
|
time:
|
|
description: ISO 8601 time string with Wikidata prefix
|
|
precision:
|
|
description: Precision level (9=year, 10=month, 11=day)
|
|
range: integer
|
|
calendarmodel:
|
|
description: Calendar model (usually Gregorian)
|
|
range: uri
|
|
|
|
WikidataClaims:
|
|
description: Container for additional Wikidata claims
|
|
attributes:
|
|
commons_category:
|
|
description: Wikimedia Commons category
|
|
postal_code:
|
|
description: Postal code from Wikidata
|
|
phone:
|
|
description: Phone number from Wikidata
|
|
described_at_url:
|
|
description: URLs describing this entity
|
|
range: uri
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
APIMetadata:
|
|
description: Metadata about API requests
|
|
attributes:
|
|
api_endpoint:
|
|
range: uri
|
|
request_url:
|
|
range: uri
|
|
response_status:
|
|
range: integer
|
|
response_time_ms:
|
|
range: float
|
|
fetch_timestamp:
|
|
range: datetime
|
|
user_agent:
|
|
description: User agent string used
|
|
authenticated:
|
|
range: boolean
|
|
rate_limit_delay_used:
|
|
range: float
|
|
|
|
GoogleMapsEnrichment:
|
|
description: Enrichment data from Google Maps Places API
|
|
attributes:
|
|
place_id:
|
|
description: Google Place ID
|
|
name:
|
|
description: Name from Google Maps
|
|
formatted_address:
|
|
description: Full formatted address
|
|
rating:
|
|
description: Average rating (1-5)
|
|
range: float
|
|
user_ratings_total:
|
|
description: Total number of ratings
|
|
range: integer
|
|
reviews_count:
|
|
description: Number of reviews
|
|
range: integer
|
|
photo_count:
|
|
description: Number of photos
|
|
range: integer
|
|
business_status:
|
|
description: Business status (OPERATIONAL, CLOSED_TEMPORARILY, etc.)
|
|
opening_hours:
|
|
description: Opening hours
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
website:
|
|
range: uri
|
|
phone:
|
|
description: Phone number
|
|
types:
|
|
description: Google place types
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
latitude:
|
|
range: float
|
|
longitude:
|
|
range: float
|
|
|
|
OSMEnrichment:
|
|
description: Enrichment data from OpenStreetMap
|
|
attributes:
|
|
osm_id:
|
|
description: OpenStreetMap ID
|
|
osm_type:
|
|
description: OSM element type (node, way, relation)
|
|
range: OSMTypeEnum
|
|
name:
|
|
description: Name from OSM
|
|
amenity:
|
|
description: Amenity tag value
|
|
building:
|
|
description: Building tag value
|
|
heritage:
|
|
description: Heritage tag value
|
|
wikidata:
|
|
description: Wikidata tag from OSM
|
|
wikipedia:
|
|
description: Wikipedia tag from OSM
|
|
website:
|
|
range: uri
|
|
osm_tags:
|
|
description: Additional OSM tags
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
GHCIDAssignment:
|
|
description: Global Heritage Custodian Identifier assignment
|
|
attributes:
|
|
ghcid_string:
|
|
description: Human-readable GHCID (e.g., NL-DR-BOR-M-HC)
|
|
ghcid_uuid:
|
|
description: UUID v5 derived from GHCID string
|
|
ghcid_uuid_sha256:
|
|
description: UUID v8 (SHA-256) for future-proofing
|
|
ghcid_numeric:
|
|
description: 64-bit numeric identifier
|
|
range: integer
|
|
generation_timestamp:
|
|
range: datetime
|
|
collision_resolution:
|
|
description: Method used if collision occurred
|
|
|
|
Location:
|
|
description: Resolved geographic location
|
|
attributes:
|
|
city:
|
|
description: City/settlement name
|
|
address:
|
|
description: Full street address
|
|
postal_code:
|
|
description: Postal code
|
|
region:
|
|
description: Province/state/region
|
|
country:
|
|
description: ISO 3166-1 alpha-2 country code
|
|
pattern: "^[A-Z]{2}$"
|
|
latitude:
|
|
range: float
|
|
longitude:
|
|
range: float
|
|
|
|
LocationResolution:
|
|
description: GeoNames-based location resolution metadata
|
|
attributes:
|
|
method:
|
|
description: Resolution method used
|
|
range: LocationResolutionMethodEnum
|
|
geonames_id:
|
|
description: GeoNames ID of resolved settlement
|
|
range: integer
|
|
geonames_name:
|
|
description: Settlement name from GeoNames
|
|
feature_code:
|
|
description: GeoNames feature code (PPL, PPLA, etc.)
|
|
settlement_code:
|
|
description: 3-letter settlement code for GHCID
|
|
admin1_code:
|
|
description: GeoNames admin1 code
|
|
region_code:
|
|
description: ISO 3166-2 region code
|
|
country_code:
|
|
description: ISO 3166-1 alpha-2 country code
|
|
resolution_date:
|
|
range: datetime
|
|
|
|
WebEnrichment:
|
|
description: Data from website scraping with XPath provenance
|
|
attributes:
|
|
claims:
|
|
description: Verified claims with XPath provenance
|
|
range: WebClaim
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
removed_unverified_claims:
|
|
description: Claims removed due to missing XPath verification
|
|
range: WebClaim
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
scrape_timestamp:
|
|
range: datetime
|
|
html_file:
|
|
description: Path to archived HTML file
|
|
|
|
WebClaim:
|
|
description: A claim extracted from a webpage with XPath provenance
|
|
attributes:
|
|
claim_type:
|
|
description: Type of claim (full_name, description, email, etc.)
|
|
claim_value:
|
|
description: The extracted value
|
|
source_url:
|
|
range: uri
|
|
retrieved_on:
|
|
range: datetime
|
|
xpath:
|
|
description: XPath to the element containing this value
|
|
html_file:
|
|
description: Relative path to archived HTML file
|
|
xpath_match_score:
|
|
description: Match confidence (1.0 = exact)
|
|
range: float
|
|
|
|
NDEEnrichedEntryCollection:
|
|
description: Collection of NDE enriched entries for batch processing
|
|
attributes:
|
|
entries:
|
|
range: NDEEnrichedEntry
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
generated_at:
|
|
range: datetime
|
|
entry_count:
|
|
range: integer
|
|
source_directory:
|
|
description: Source directory path
|
|
|
|
enums:
|
|
EnrichmentStatusEnum:
|
|
permissible_values:
|
|
pending:
|
|
description: Not yet enriched
|
|
enriched:
|
|
description: Successfully enriched
|
|
partial:
|
|
description: Partially enriched (some sources failed)
|
|
failed:
|
|
description: Enrichment failed
|
|
unknown:
|
|
description: Status unknown
|
|
|
|
OSMTypeEnum:
|
|
permissible_values:
|
|
node:
|
|
description: OSM node
|
|
way:
|
|
description: OSM way
|
|
relation:
|
|
description: OSM relation
|
|
|
|
LocationResolutionMethodEnum:
|
|
permissible_values:
|
|
REVERSE_GEOCODE:
|
|
description: Resolved by reverse geocoding coordinates
|
|
NAME_LOOKUP:
|
|
description: Resolved by name lookup in GeoNames
|
|
MANUAL:
|
|
description: Manually assigned
|
|
INHERITED:
|
|
description: Inherited from parent entry
|