145 lines
5.2 KiB
YAML
145 lines
5.2 KiB
YAML
# SourceRecord - Individual source record with claims
|
|
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
|
|
# Extraction date: 2026-01-08
|
|
|
|
id: https://nde.nl/ontology/hc/classes/SourceRecord
|
|
name: SourceRecord
|
|
title: SourceRecord
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
schema: http://schema.org/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
pav: http://purl.org/pav/
|
|
dcat: http://www.w3.org/ns/dcat#
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
- ../enums/DataTierEnum
|
|
|
|
default_range: string
|
|
|
|
classes:
|
|
SourceRecord:
|
|
description: >-
|
|
Individual source record with claims, representing a data extraction from a specific
|
|
source (API, registry, web scrape, etc.). Contains metadata about the source type,
|
|
data tier, fetch timestamp, and extracted claims. Used to track provenance of
|
|
individual data points.
|
|
|
|
Ontology mapping rationale:
|
|
- class_uri is prov:Entity because this represents a discrete data entity with
|
|
provenance (when fetched, from where, by what method)
|
|
- close_mappings includes dcat:Distribution as this is similar to a specific
|
|
manifestation/representation of data from a source
|
|
- related_mappings includes pav:retrievedFrom conceptually (the source was retrieved)
|
|
and prov:PrimarySource (the record may be from a primary source)
|
|
class_uri: prov:Entity
|
|
close_mappings:
|
|
- dcat:Distribution
|
|
related_mappings:
|
|
- prov:PrimarySource
|
|
attributes:
|
|
source_type:
|
|
range: string
|
|
description: Type identifier (nde_csv_registry, google_maps_api, etc.)
|
|
data_tier:
|
|
range: DataTierEnum
|
|
description: Quality tier of this source
|
|
fetch_timestamp:
|
|
range: string
|
|
description: When data was fetched (ISO datetime string)
|
|
has_or_had_api_endpoint:
|
|
range: uri
|
|
description: API endpoint used
|
|
api_endpoint:
|
|
range: uri
|
|
description: API endpoint used (alias for has_or_had_api_endpoint for backward compatibility)
|
|
place_id:
|
|
range: string
|
|
description: Google Maps place ID
|
|
data_url:
|
|
range: uri
|
|
description: Data source URL
|
|
match_method:
|
|
range: string
|
|
description: Method used for matching
|
|
claims_extracted:
|
|
range: Any
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of claim fields extracted (can be strings or structured objects)
|
|
entity_id:
|
|
range: string
|
|
description: Wikidata entity ID (Q-number)
|
|
wikidata_id:
|
|
range: string
|
|
description: Wikidata entity ID (Q-number) - alternative key to entity_id
|
|
source_url:
|
|
range: uri
|
|
description: Source URL for the data
|
|
extraction_source:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of extraction source methods (e.g., archiveslab_llm_extraction)
|
|
retrieved_at:
|
|
range: datetime
|
|
description: When data was retrieved (alias for fetch_timestamp)
|
|
search_result:
|
|
range: string
|
|
description: Result of search operation (found, not_found, etc.)
|
|
search_queries:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Search queries attempted
|
|
note:
|
|
range: string
|
|
description: Additional notes about this source record
|
|
source_file:
|
|
range: string
|
|
description: Source file name
|
|
research_date:
|
|
range: string
|
|
description: Date of research (YYYY-MM-DD format)
|
|
url:
|
|
range: uri
|
|
description: URL of the source (website URL, etc.)
|
|
data_extracted:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of data types/fields extracted from this source
|
|
merge_note:
|
|
range: string
|
|
description: Note about merge operations involving this source record
|
|
extraction_timestamp:
|
|
range: string
|
|
description: When extraction was performed (ISO datetime string)
|
|
api_version:
|
|
range: string
|
|
description: API version used for the request (e.g., v1, v2)
|
|
search_query:
|
|
range: string
|
|
description: Search query used to find the source
|
|
source_urls:
|
|
range: uri
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Multiple source URLs (for sources with multiple pages)
|
|
archive_path:
|
|
range: string
|
|
description: Path to archived source data file
|
|
extraction_method:
|
|
range: string
|
|
description: Method used for data extraction (e.g., docling_pdf_table_extraction, linkup_markdown_extraction)
|
|
notes:
|
|
range: string
|
|
description: Additional notes about the source record extraction
|
|
pdf_count:
|
|
range: integer
|
|
description: Number of PDF files processed in this source record
|