1252 lines
31 KiB
YAML
1252 lines
31 KiB
YAML
id: https://w3id.org/heritage/nde/enriched-source
|
|
name: nde-enriched-source
|
|
title: NDE Enriched Heritage Organizations Source Schema
|
|
description: >-
|
|
LinkML schema for enriched NDE heritage institution entries.
|
|
This schema represents the structure of enriched YAML files in data/nde/enriched/entries/
|
|
after Google Maps, Wikidata, website scraping, and GHCID enrichment.
|
|
version: 1.0.0
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
nde: https://w3id.org/heritage/nde/
|
|
schema: http://schema.org/
|
|
wdt: http://www.wikidata.org/prop/direct/
|
|
wd: http://www.wikidata.org/entity/
|
|
|
|
default_prefix: nde
|
|
default_range: string
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
# =============================================================================
|
|
# CLASSES
|
|
# =============================================================================
|
|
classes:
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ROOT CONTAINER
|
|
# ---------------------------------------------------------------------------
|
|
NDEEnrichedEntry:
|
|
description: >-
|
|
Root container for a single enriched NDE heritage institution entry.
|
|
Contains all enrichment data from multiple sources.
|
|
tree_root: true
|
|
slots:
|
|
- entry_index
|
|
- processing_timestamp
|
|
- enrichment_status
|
|
- enrichment_timestamp
|
|
- enrichment_source
|
|
- original_entry
|
|
- organization
|
|
- location
|
|
- contact
|
|
- digital_presence
|
|
- opening_hours
|
|
- publications
|
|
- collections
|
|
- historical_documentation
|
|
- activities
|
|
- historical_sites
|
|
- nde_metadata
|
|
- google_maps_enrichment
|
|
- wikidata_enrichment
|
|
- nan_isil_enrichment
|
|
- url_enrichment
|
|
- web_enrichment
|
|
- web_claims
|
|
- custodian_name
|
|
- identifiers
|
|
- ghcid
|
|
- provenance
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORIGINAL ENTRY (from CSV)
|
|
# ---------------------------------------------------------------------------
|
|
OriginalEntry:
|
|
description: Original entry data from NDE CSV source
|
|
slots:
|
|
- plaatsnaam_bezoekadres
|
|
- straat_en_huisnummer_bezoekadres
|
|
- organisatie
|
|
- koepelorganisatie
|
|
- webadres_organisatie
|
|
- type_organisatie
|
|
- systeem
|
|
- isil_code_na
|
|
- collectie_nederland
|
|
- museum_register
|
|
- rijkscollectie
|
|
- wikidata_id
|
|
- type
|
|
- opmerkingen
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ORGANIZATION
|
|
# ---------------------------------------------------------------------------
|
|
Organization:
|
|
description: Structured organization information derived from enrichment
|
|
slots:
|
|
- name
|
|
- alternative_names
|
|
- type
|
|
- institution_type
|
|
- description
|
|
- legal_form
|
|
- kvk_number
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# LOCATION
|
|
# ---------------------------------------------------------------------------
|
|
Location:
|
|
description: Physical location of the institution
|
|
slots:
|
|
- location_name
|
|
- street_address
|
|
- postal_code
|
|
- city
|
|
- municipality
|
|
- sublocality
|
|
- province
|
|
- country
|
|
- coordinates
|
|
|
|
Coordinates:
|
|
description: Geographic coordinates
|
|
slots:
|
|
- latitude
|
|
- longitude
|
|
- precision
|
|
- globe
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CONTACT
|
|
# ---------------------------------------------------------------------------
|
|
Contact:
|
|
description: Contact information for the institution
|
|
slots:
|
|
- address
|
|
- email
|
|
- phone
|
|
- website
|
|
- iban
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DIGITAL PRESENCE
|
|
# ---------------------------------------------------------------------------
|
|
DigitalPresence:
|
|
description: Online presence information
|
|
slots:
|
|
- website
|
|
- social_media
|
|
- systems
|
|
|
|
SocialMediaAccount:
|
|
description: A social media account
|
|
slots:
|
|
- platform
|
|
- name
|
|
- url
|
|
- username
|
|
|
|
DigitalSystem:
|
|
description: Digital system used by the institution
|
|
slots:
|
|
- name
|
|
- type
|
|
- url
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# OPENING HOURS
|
|
# ---------------------------------------------------------------------------
|
|
OpeningHours:
|
|
description: Opening hours information
|
|
slots:
|
|
- venue_hours
|
|
|
|
VenueHours:
|
|
description: Hours for a specific venue
|
|
slots:
|
|
- monday
|
|
- tuesday
|
|
- wednesday
|
|
- thursday
|
|
- friday
|
|
- saturday
|
|
- sunday
|
|
- note
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# PUBLICATIONS
|
|
# ---------------------------------------------------------------------------
|
|
Publication:
|
|
description: Publication produced by the institution
|
|
slots:
|
|
- name
|
|
- type
|
|
- description
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# COLLECTIONS
|
|
# ---------------------------------------------------------------------------
|
|
Collection:
|
|
description: Collection held by the institution
|
|
slots:
|
|
- name
|
|
- type
|
|
- description
|
|
- note
|
|
- temporal_coverage
|
|
- extent
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HISTORICAL DOCUMENTATION
|
|
# ---------------------------------------------------------------------------
|
|
HistoricalDocumentation:
|
|
description: Historical documentation projects or resources
|
|
slots:
|
|
- name
|
|
- description
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ACTIVITIES
|
|
# ---------------------------------------------------------------------------
|
|
Activity:
|
|
description: Activity or service provided by the institution
|
|
slots:
|
|
- type
|
|
- name
|
|
- description
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GOOGLE MAPS ENRICHMENT
|
|
# ---------------------------------------------------------------------------
|
|
GoogleMapsEnrichment:
|
|
description: Enrichment data from Google Maps Places API
|
|
slots:
|
|
- place_id
|
|
- name
|
|
- business_status
|
|
- rating
|
|
- total_ratings
|
|
- photo_count
|
|
- coordinates
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WIKIDATA ENRICHMENT
|
|
# ---------------------------------------------------------------------------
|
|
WikidataEnrichment:
|
|
description: Enrichment data from Wikidata REST API
|
|
slots:
|
|
- wikidata_entity_id
|
|
- api_metadata
|
|
- wikidata_labels
|
|
- wikidata_label_nl
|
|
- wikidata_label_en
|
|
- wikidata_descriptions
|
|
- wikidata_description_nl
|
|
- wikidata_description_en
|
|
- wikidata_aliases
|
|
- wikidata_identifiers
|
|
- wikidata_instance_of
|
|
- wikidata_country
|
|
- wikidata_located_in
|
|
- wikidata_coordinates
|
|
- wikidata_inception
|
|
- wikidata_official_website
|
|
- wikidata_image
|
|
- wikidata_logo
|
|
- wikidata_sitelinks
|
|
- wikidata_claims
|
|
|
|
WikidataApiMetadata:
|
|
description: API request metadata from Wikidata fetch
|
|
slots:
|
|
- api_endpoint
|
|
- request_url
|
|
- response_status
|
|
- response_time_ms
|
|
- fetch_timestamp
|
|
- user_agent
|
|
- authenticated
|
|
- rate_limit_delay_used
|
|
|
|
WikidataEntityReference:
|
|
description: Reference to a Wikidata entity with labels
|
|
slots:
|
|
- id
|
|
- label_en
|
|
- label_nl
|
|
- description_en
|
|
- description_nl
|
|
- instance_of
|
|
|
|
WikidataCoordinates:
|
|
description: Geographic coordinates from Wikidata
|
|
slots:
|
|
- latitude
|
|
- longitude
|
|
- precision
|
|
- globe
|
|
|
|
WikidataTime:
|
|
description: Time value from Wikidata
|
|
slots:
|
|
- time
|
|
- precision
|
|
- calendarmodel
|
|
|
|
WikidataIdentifiers:
|
|
description: External identifiers from Wikidata
|
|
slots:
|
|
- gnd
|
|
- viaf
|
|
- lcnaf
|
|
- geonames
|
|
- isni
|
|
- ringgold
|
|
- bnf
|
|
- idref
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NAN ISIL ENRICHMENT
|
|
# ---------------------------------------------------------------------------
|
|
NANISILEnrichment:
|
|
description: Enrichment from Nationaal Archief ISIL Registry
|
|
slots:
|
|
- source
|
|
- source_file
|
|
- source_url
|
|
- data_tier
|
|
- extraction_date
|
|
- enrichment_timestamp
|
|
- enrichment_method
|
|
- match_confidence
|
|
- isil_code
|
|
- nan_name
|
|
- nan_city
|
|
- nan_toegekend_op
|
|
- nan_opmerking
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# URL ENRICHMENT
|
|
# ---------------------------------------------------------------------------
|
|
URLEnrichment:
|
|
description: URL discovery enrichment
|
|
slots:
|
|
- discovered_url
|
|
- search_query
|
|
- enrichment_timestamp
|
|
- enrichment_method
|
|
- status
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WEB ENRICHMENT
|
|
# ---------------------------------------------------------------------------
|
|
WebEnrichment:
|
|
description: Website archiving and scraping enrichment
|
|
slots:
|
|
- web_archives
|
|
- full_site_archive_timestamp
|
|
|
|
WebArchive:
|
|
description: Archived website information
|
|
slots:
|
|
- url
|
|
- directory
|
|
- pages_archived
|
|
- archive_method
|
|
- warc_file
|
|
- warc_size_bytes
|
|
- warc_format
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WEB CLAIMS
|
|
# ---------------------------------------------------------------------------
|
|
WebClaims:
|
|
description: Claims extracted from archived websites with XPath provenance
|
|
slots:
|
|
- extraction_timestamp
|
|
- source_archive
|
|
- claims_count
|
|
- claims
|
|
|
|
WebClaim:
|
|
description: A single claim extracted from a website with XPath provenance
|
|
slots:
|
|
- claim_type
|
|
- claim_value
|
|
- raw_value
|
|
- source_url
|
|
- retrieved_on
|
|
- xpath
|
|
- html_file
|
|
- xpath_match_score
|
|
- extraction_method
|
|
- extraction_timestamp
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CUSTODIAN NAME (Consensus)
|
|
# ---------------------------------------------------------------------------
|
|
CustodianNameConsensus:
|
|
description: Consensus custodian name derived from multiple sources
|
|
slots:
|
|
- claim_type
|
|
- claim_value
|
|
- source
|
|
- confidence
|
|
- consensus_method
|
|
- sources_checked
|
|
- sources_matched
|
|
- extraction_timestamp
|
|
- matching_sources
|
|
|
|
MatchingSource:
|
|
description: A source that matched in consensus derivation
|
|
slots:
|
|
- source
|
|
- name
|
|
- score
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# IDENTIFIERS
|
|
# ---------------------------------------------------------------------------
|
|
Identifier:
|
|
description: An identifier for the institution
|
|
slots:
|
|
- identifier_scheme
|
|
- identifier_value
|
|
- identifier_url
|
|
- assigned_date
|
|
- source
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GHCID (Global Heritage Custodian ID)
|
|
# ---------------------------------------------------------------------------
|
|
GHCIDInfo:
|
|
description: GHCID persistent identifier information
|
|
slots:
|
|
- ghcid_current
|
|
- ghcid_original
|
|
- ghcid_uuid
|
|
- ghcid_uuid_sha256
|
|
- ghcid_numeric
|
|
- record_id
|
|
- generation_timestamp
|
|
- ghcid_history
|
|
- location_resolution
|
|
- geonames_id
|
|
|
|
GHCIDHistoryEntry:
|
|
description: Historical GHCID entry for tracking changes
|
|
slots:
|
|
- ghcid
|
|
- ghcid_numeric
|
|
- valid_from
|
|
- valid_to
|
|
- reason
|
|
|
|
LocationResolution:
|
|
description: GeoNames location resolution information
|
|
slots:
|
|
- method
|
|
- geonames_id
|
|
- geonames_name
|
|
- feature_code
|
|
- admin1_code
|
|
- region_code
|
|
- country_code
|
|
- source_coordinates
|
|
- distance_km
|
|
|
|
SourceCoordinates:
|
|
description: Source coordinates used for location resolution
|
|
slots:
|
|
- latitude
|
|
- longitude
|
|
- source
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# PROVENANCE
|
|
# ---------------------------------------------------------------------------
|
|
Provenance:
|
|
description: Provenance and data quality tracking
|
|
slots:
|
|
- schema_version
|
|
- generated_at
|
|
- sources
|
|
- data_tier_summary
|
|
- notes
|
|
|
|
ProvenanceSources:
|
|
description: Sources used in enrichment
|
|
slots:
|
|
- google_maps
|
|
- wikidata_api
|
|
- website_scrape
|
|
|
|
ProvenanceSourceEntry:
|
|
description: A single provenance source entry
|
|
slots:
|
|
- source_type
|
|
- api_endpoint
|
|
- place_id
|
|
- entity_id
|
|
- claims_extracted
|
|
|
|
DataTierSummary:
|
|
description: Summary of data tier classifications
|
|
slots:
|
|
- tier_1_authoritative
|
|
- tier_2_verified
|
|
- tier_3_crowd_sourced
|
|
- tier_4_inferred
|
|
|
|
# =============================================================================
|
|
# SLOTS
|
|
# =============================================================================
|
|
slots:
|
|
|
|
# Root level slots
|
|
entry_index:
|
|
description: Index number from original NDE CSV
|
|
range: integer
|
|
processing_timestamp:
|
|
description: When entry was processed
|
|
range: datetime
|
|
enrichment_status:
|
|
description: Status of enrichment (enriched, pending, failed)
|
|
range: string
|
|
enrichment_timestamp:
|
|
description: When enrichment was performed
|
|
range: datetime
|
|
enrichment_source:
|
|
description: Source of enrichment data
|
|
range: string
|
|
|
|
# Original entry slots
|
|
plaatsnaam_bezoekadres:
|
|
description: City/town from original CSV
|
|
range: string
|
|
straat_en_huisnummer_bezoekadres:
|
|
description: Street address from original CSV
|
|
range: string
|
|
organisatie:
|
|
description: Organization name from original CSV
|
|
range: string
|
|
koepelorganisatie:
|
|
description: Parent organization from original CSV
|
|
range: string
|
|
webadres_organisatie:
|
|
description: Website URL from original CSV
|
|
range: string
|
|
type_organisatie:
|
|
description: Organization type from original CSV
|
|
range: string
|
|
systeem:
|
|
description: Collection management system from original CSV
|
|
range: string
|
|
isil_code_na:
|
|
description: ISIL code from original CSV
|
|
range: string
|
|
collectie_nederland:
|
|
description: Collectie Nederland participation
|
|
range: string
|
|
museum_register:
|
|
description: Museum register participation
|
|
range: string
|
|
rijkscollectie:
|
|
description: Rijkscollectie participation
|
|
range: string
|
|
wikidata_id:
|
|
description: Wikidata ID from original CSV
|
|
range: string
|
|
type:
|
|
description: Institution type codes (list)
|
|
range: string
|
|
multivalued: true
|
|
opmerkingen:
|
|
description: Remarks from original CSV
|
|
range: string
|
|
|
|
# Organization slots
|
|
name:
|
|
description: Name of the entity
|
|
range: string
|
|
alternative_names:
|
|
description: Alternative names or aliases
|
|
range: string
|
|
multivalued: true
|
|
institution_type:
|
|
description: GLAMORCUBESFIXPHDNT institution type code
|
|
range: string
|
|
description:
|
|
description: Description text
|
|
range: string
|
|
legal_form:
|
|
description: Legal form (vereniging, stichting, etc.)
|
|
range: string
|
|
kvk_number:
|
|
description: Dutch Chamber of Commerce number
|
|
range: string
|
|
|
|
# Location slots
|
|
location_name:
|
|
description: Name of location/venue
|
|
range: string
|
|
street_address:
|
|
description: Street address
|
|
range: string
|
|
postal_code:
|
|
description: Postal code
|
|
range: string
|
|
city:
|
|
description: City name
|
|
range: string
|
|
municipality:
|
|
description: Municipality name
|
|
range: string
|
|
sublocality:
|
|
description: Sublocality or district
|
|
range: string
|
|
province:
|
|
description: Province name
|
|
range: string
|
|
country:
|
|
description: Country code (ISO 3166-1)
|
|
range: string
|
|
coordinates:
|
|
description: Geographic coordinates
|
|
range: Coordinates
|
|
latitude:
|
|
description: Latitude coordinate
|
|
range: float
|
|
longitude:
|
|
description: Longitude coordinate
|
|
range: float
|
|
precision:
|
|
description: Coordinate precision
|
|
range: float
|
|
globe:
|
|
description: Reference globe (typically Earth)
|
|
range: string
|
|
|
|
# Contact slots
|
|
address:
|
|
description: Full address string
|
|
range: string
|
|
email:
|
|
description: Email address
|
|
range: string
|
|
phone:
|
|
description: Phone number
|
|
range: string
|
|
website:
|
|
description: Website URL
|
|
range: uri
|
|
iban:
|
|
description: IBAN bank account number
|
|
range: string
|
|
|
|
# Digital presence slots
|
|
social_media:
|
|
description: Social media accounts
|
|
range: SocialMediaAccount
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
systems:
|
|
description: Digital systems used
|
|
range: DigitalSystem
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
platform:
|
|
description: Platform name
|
|
range: string
|
|
url:
|
|
description: URL
|
|
range: uri
|
|
username:
|
|
description: Username on platform
|
|
range: string
|
|
|
|
# Opening hours slots
|
|
venue_hours:
|
|
description: Hours for specific venues
|
|
range: VenueHours
|
|
inlined: true
|
|
monday:
|
|
range: string
|
|
tuesday:
|
|
range: string
|
|
wednesday:
|
|
range: string
|
|
thursday:
|
|
range: string
|
|
friday:
|
|
range: string
|
|
saturday:
|
|
range: string
|
|
sunday:
|
|
range: string
|
|
note:
|
|
range: string
|
|
|
|
# Collection/publication slots
|
|
publications:
|
|
description: Publications by the institution
|
|
range: Publication
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
collections:
|
|
description: Collections held
|
|
range: Collection
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
temporal_coverage:
|
|
description: Time period covered
|
|
range: string
|
|
extent:
|
|
description: Size/extent of collection
|
|
range: string
|
|
|
|
# Historical slots
|
|
historical_documentation:
|
|
description: Historical documentation projects
|
|
range: HistoricalDocumentation
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
historical_sites:
|
|
description: Historical sites associated with institution
|
|
range: string
|
|
multivalued: true
|
|
activities:
|
|
description: Activities and services
|
|
range: Activity
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
|
|
# NDE metadata
|
|
nde_metadata:
|
|
description: NDE-specific metadata
|
|
range: OriginalEntry
|
|
inlined: true
|
|
|
|
# Google Maps enrichment slots
|
|
google_maps_enrichment:
|
|
description: Google Maps enrichment data
|
|
range: GoogleMapsEnrichment
|
|
inlined: true
|
|
place_id:
|
|
description: Google Maps place ID
|
|
range: string
|
|
business_status:
|
|
description: Business operational status
|
|
range: string
|
|
rating:
|
|
description: Google Maps rating
|
|
range: float
|
|
total_ratings:
|
|
description: Total number of ratings
|
|
range: integer
|
|
photo_count:
|
|
description: Number of photos
|
|
range: integer
|
|
|
|
# Wikidata enrichment slots
|
|
wikidata_enrichment:
|
|
description: Wikidata enrichment data
|
|
range: WikidataEnrichment
|
|
inlined: true
|
|
wikidata_entity_id:
|
|
description: Wikidata entity ID (Q-number)
|
|
range: string
|
|
api_metadata:
|
|
description: API request metadata
|
|
range: WikidataApiMetadata
|
|
inlined: true
|
|
api_endpoint:
|
|
description: API endpoint URL
|
|
range: string
|
|
request_url:
|
|
description: Full request URL
|
|
range: string
|
|
response_status:
|
|
description: HTTP response status
|
|
range: integer
|
|
response_time_ms:
|
|
description: Response time in milliseconds
|
|
range: float
|
|
fetch_timestamp:
|
|
description: When data was fetched
|
|
range: datetime
|
|
user_agent:
|
|
description: User agent string used
|
|
range: string
|
|
authenticated:
|
|
description: Whether request was authenticated
|
|
range: boolean
|
|
rate_limit_delay_used:
|
|
description: Rate limit delay in seconds
|
|
range: float
|
|
|
|
# Wikidata label/description slots
|
|
wikidata_labels:
|
|
description: Labels in multiple languages
|
|
range: string
|
|
# Note: This is actually a dict in source data
|
|
wikidata_label_nl:
|
|
description: Dutch label
|
|
range: string
|
|
wikidata_label_en:
|
|
description: English label
|
|
range: string
|
|
wikidata_descriptions:
|
|
description: Descriptions in multiple languages
|
|
range: string
|
|
# Note: This is actually a dict in source data
|
|
wikidata_description_nl:
|
|
description: Dutch description
|
|
range: string
|
|
wikidata_description_en:
|
|
description: English description
|
|
range: string
|
|
wikidata_aliases:
|
|
description: Aliases in multiple languages
|
|
range: string
|
|
# Note: This is actually a dict of lists in source data
|
|
|
|
# Wikidata identifier slots
|
|
wikidata_identifiers:
|
|
description: External identifiers from Wikidata
|
|
range: WikidataIdentifiers
|
|
inlined: true
|
|
gnd:
|
|
description: GND (German National Library) ID
|
|
range: string
|
|
viaf:
|
|
description: VIAF ID
|
|
range: string
|
|
lcnaf:
|
|
description: Library of Congress Name Authority File ID
|
|
range: string
|
|
geonames:
|
|
description: GeoNames ID
|
|
range: string
|
|
isni:
|
|
description: ISNI ID
|
|
range: string
|
|
ringgold:
|
|
description: Ringgold ID
|
|
range: string
|
|
bnf:
|
|
description: BnF (French National Library) ID
|
|
range: string
|
|
idref:
|
|
description: IdRef ID
|
|
range: string
|
|
|
|
# Wikidata entity reference slots
|
|
wikidata_instance_of:
|
|
description: Instance of (P31) values
|
|
range: WikidataEntityReference
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
wikidata_country:
|
|
description: Country (P17) value
|
|
range: WikidataEntityReference
|
|
inlined: true
|
|
wikidata_located_in:
|
|
description: Located in (P131) value
|
|
range: WikidataEntityReference
|
|
inlined: true
|
|
wikidata_coordinates:
|
|
description: Coordinates from Wikidata
|
|
range: WikidataCoordinates
|
|
inlined: true
|
|
wikidata_inception:
|
|
description: Inception date
|
|
range: WikidataTime
|
|
inlined: true
|
|
wikidata_official_website:
|
|
description: Official website from Wikidata
|
|
range: uri
|
|
wikidata_image:
|
|
description: Wikimedia Commons image filename
|
|
range: string
|
|
wikidata_logo:
|
|
description: Wikimedia Commons logo filename
|
|
range: string
|
|
wikidata_sitelinks:
|
|
description: Wikipedia sitelinks
|
|
range: string
|
|
# Note: This is actually a dict in source data
|
|
wikidata_claims:
|
|
description: Additional Wikidata claims
|
|
range: string
|
|
# Note: This is actually a complex nested dict in source data
|
|
|
|
# Wikidata entity reference internal slots
|
|
id:
|
|
description: Entity ID (Q-number or P-number)
|
|
range: string
|
|
label_en:
|
|
description: English label
|
|
range: string
|
|
label_nl:
|
|
description: Dutch label
|
|
range: string
|
|
description_en:
|
|
description: English description
|
|
range: string
|
|
description_nl:
|
|
description: Dutch description
|
|
range: string
|
|
instance_of:
|
|
description: Instance of values (Q-numbers)
|
|
range: string
|
|
multivalued: true
|
|
|
|
# Wikidata time slots
|
|
time:
|
|
description: ISO 8601 time value
|
|
range: string
|
|
calendarmodel:
|
|
description: Calendar model URI
|
|
range: string
|
|
|
|
# NAN ISIL enrichment slots
|
|
nan_isil_enrichment:
|
|
description: Nationaal Archief ISIL enrichment
|
|
range: NANISILEnrichment
|
|
inlined: true
|
|
source:
|
|
description: Source identifier
|
|
range: string
|
|
source_file:
|
|
description: Source file path
|
|
range: string
|
|
source_url:
|
|
description: Source URL
|
|
range: uri
|
|
data_tier:
|
|
description: Data tier classification
|
|
range: string
|
|
extraction_date:
|
|
description: When data was extracted
|
|
range: datetime
|
|
enrichment_method:
|
|
description: Method used for enrichment
|
|
range: string
|
|
match_confidence:
|
|
description: Confidence score for match
|
|
range: float
|
|
isil_code:
|
|
description: ISIL code
|
|
range: string
|
|
nan_name:
|
|
description: Name from NAN registry
|
|
range: string
|
|
nan_city:
|
|
description: City from NAN registry
|
|
range: string
|
|
nan_toegekend_op:
|
|
description: Date ISIL was assigned
|
|
range: date
|
|
nan_opmerking:
|
|
description: Remarks from NAN registry
|
|
range: string
|
|
|
|
# URL enrichment slots
|
|
url_enrichment:
|
|
description: URL discovery enrichment
|
|
range: URLEnrichment
|
|
inlined: true
|
|
discovered_url:
|
|
description: Discovered URL
|
|
range: uri
|
|
search_query:
|
|
description: Search query used
|
|
range: string
|
|
status:
|
|
description: Status of operation
|
|
range: string
|
|
|
|
# Web enrichment slots
|
|
web_enrichment:
|
|
description: Web archiving enrichment
|
|
range: WebEnrichment
|
|
inlined: true
|
|
web_archives:
|
|
description: Archived websites
|
|
range: WebArchive
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
full_site_archive_timestamp:
|
|
description: When full site was archived
|
|
range: datetime
|
|
directory:
|
|
description: Archive directory path
|
|
range: string
|
|
pages_archived:
|
|
description: Number of pages archived
|
|
range: integer
|
|
archive_method:
|
|
description: Archive method used
|
|
range: string
|
|
warc_file:
|
|
description: WARC filename
|
|
range: string
|
|
warc_size_bytes:
|
|
description: WARC file size in bytes
|
|
range: integer
|
|
warc_format:
|
|
description: WARC format standard
|
|
range: string
|
|
|
|
# Web claims slots
|
|
web_claims:
|
|
description: Claims extracted from website
|
|
range: WebClaims
|
|
inlined: true
|
|
source_archive:
|
|
description: Source archive directory
|
|
range: string
|
|
claims_count:
|
|
description: Number of claims
|
|
range: integer
|
|
claims:
|
|
description: List of claims
|
|
range: WebClaim
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
claim_type:
|
|
description: Type of claim
|
|
range: string
|
|
claim_value:
|
|
description: Extracted claim value
|
|
range: string
|
|
raw_value:
|
|
description: Raw value before processing
|
|
range: string
|
|
retrieved_on:
|
|
description: When source was retrieved
|
|
range: datetime
|
|
xpath:
|
|
description: XPath to source element
|
|
range: string
|
|
html_file:
|
|
description: Source HTML file path
|
|
range: string
|
|
xpath_match_score:
|
|
description: XPath match confidence score
|
|
range: float
|
|
extraction_method:
|
|
description: Method used for extraction
|
|
range: string
|
|
extraction_timestamp:
|
|
description: When extraction occurred
|
|
range: datetime
|
|
|
|
# Custodian name consensus slots
|
|
custodian_name:
|
|
description: Consensus custodian name
|
|
range: CustodianNameConsensus
|
|
inlined: true
|
|
confidence:
|
|
description: Confidence score
|
|
range: float
|
|
consensus_method:
|
|
description: Whether consensus was achieved
|
|
range: boolean
|
|
sources_checked:
|
|
description: Number of sources checked
|
|
range: integer
|
|
sources_matched:
|
|
description: Number of sources that matched
|
|
range: integer
|
|
matching_sources:
|
|
description: Sources that matched
|
|
range: MatchingSource
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
score:
|
|
description: Match score
|
|
range: float
|
|
|
|
# Identifier slots
|
|
identifiers:
|
|
description: Institution identifiers
|
|
range: Identifier
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
identifier_scheme:
|
|
description: Identifier scheme (ISIL, GHCID, Wikidata, etc.)
|
|
range: string
|
|
identifier_value:
|
|
description: Identifier value
|
|
range: string
|
|
identifier_url:
|
|
description: URL for identifier
|
|
range: uri
|
|
assigned_date:
|
|
description: When identifier was assigned
|
|
range: date
|
|
|
|
# GHCID slots
|
|
ghcid:
|
|
description: GHCID information
|
|
range: GHCIDInfo
|
|
inlined: true
|
|
ghcid_current:
|
|
description: Current GHCID string
|
|
range: string
|
|
ghcid_original:
|
|
description: Original GHCID string
|
|
range: string
|
|
ghcid_uuid:
|
|
description: GHCID as UUID v5
|
|
range: string
|
|
ghcid_uuid_sha256:
|
|
description: GHCID as UUID v8 (SHA-256)
|
|
range: string
|
|
ghcid_numeric:
|
|
description: GHCID as 64-bit numeric
|
|
range: integer
|
|
record_id:
|
|
description: Database record UUID v7
|
|
range: string
|
|
generation_timestamp:
|
|
description: When GHCID was generated
|
|
range: datetime
|
|
ghcid_history:
|
|
description: GHCID change history
|
|
range: GHCIDHistoryEntry
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
location_resolution:
|
|
description: Location resolution details
|
|
range: LocationResolution
|
|
inlined: true
|
|
geonames_id:
|
|
description: GeoNames ID for location
|
|
range: integer
|
|
|
|
# GHCID history slots
|
|
valid_from:
|
|
description: Start of validity period
|
|
range: datetime
|
|
valid_to:
|
|
description: End of validity period
|
|
range: datetime
|
|
reason:
|
|
description: Reason for change
|
|
range: string
|
|
|
|
# Location resolution slots
|
|
method:
|
|
description: Resolution method used
|
|
range: string
|
|
geonames_name:
|
|
description: Name from GeoNames
|
|
range: string
|
|
feature_code:
|
|
description: GeoNames feature code
|
|
range: string
|
|
admin1_code:
|
|
description: Admin level 1 code
|
|
range: string
|
|
region_code:
|
|
description: Region code (ISO 3166-2)
|
|
range: string
|
|
country_code:
|
|
description: Country code (ISO 3166-1)
|
|
range: string
|
|
source_coordinates:
|
|
description: Source coordinates used
|
|
range: SourceCoordinates
|
|
inlined: true
|
|
distance_km:
|
|
description: Distance in kilometers
|
|
range: float
|
|
|
|
# Provenance slots
|
|
provenance:
|
|
description: Provenance information
|
|
range: Provenance
|
|
inlined: true
|
|
schema_version:
|
|
description: Schema version used
|
|
range: string
|
|
generated_at:
|
|
description: When record was generated
|
|
range: datetime
|
|
sources:
|
|
description: Sources used
|
|
range: ProvenanceSources
|
|
inlined: true
|
|
data_tier_summary:
|
|
description: Data tier classifications
|
|
range: DataTierSummary
|
|
inlined: true
|
|
notes:
|
|
description: Provenance notes
|
|
range: string
|
|
multivalued: true
|
|
|
|
# Provenance source slots
|
|
source_type:
|
|
description: Type of source
|
|
range: string
|
|
entity_id:
|
|
description: Entity ID at source
|
|
range: string
|
|
claims_extracted:
|
|
description: Claims extracted from source
|
|
range: string
|
|
multivalued: true
|
|
|
|
# Data tier summary slots
|
|
tier_1_authoritative:
|
|
description: Tier 1 authoritative sources
|
|
range: string
|
|
multivalued: true
|
|
tier_2_verified:
|
|
description: Tier 2 verified sources
|
|
range: string
|
|
multivalued: true
|
|
tier_3_crowd_sourced:
|
|
description: Tier 3 crowd-sourced sources
|
|
range: string
|
|
multivalued: true
|
|
tier_4_inferred:
|
|
description: Tier 4 inferred sources
|
|
range: string
|
|
multivalued: true
|
|
|
|
# Cross-reference slots for nested structures
|
|
original_entry:
|
|
description: Original CSV entry data
|
|
range: OriginalEntry
|
|
inlined: true
|
|
organization:
|
|
description: Organization information
|
|
range: Organization
|
|
inlined: true
|
|
location:
|
|
description: Location information
|
|
range: Location
|
|
inlined: true
|
|
contact:
|
|
description: Contact information
|
|
range: Contact
|
|
inlined: true
|
|
digital_presence:
|
|
description: Digital presence information
|
|
range: DigitalPresence
|
|
inlined: true
|
|
opening_hours:
|
|
description: Opening hours
|
|
range: OpeningHours
|
|
inlined: true
|