133 lines
5 KiB
YAML
133 lines
5 KiB
YAML
id: https://nde.nl/ontology/hc/classes/SourceRecord
|
|
name: SourceRecord
|
|
title: SourceRecord
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
schema: http://schema.org/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
pav: http://purl.org/pav/
|
|
dcat: http://www.w3.org/ns/dcat#
|
|
dcterms: http://purl.org/dc/terms/
|
|
crm: http://www.cidoc-crm.org/cidoc-crm/
|
|
skos: http://www.w3.org/2004/02/skos/core#
|
|
rdfs: http://www.w3.org/2000/01/rdf-schema#
|
|
org: http://www.w3.org/ns/org#
|
|
imports:
|
|
- linkml:types
|
|
- ../enums/DataTierEnum
|
|
default_range: string
|
|
classes:
|
|
SourceRecord:
|
|
description: "Individual source record with claims, representing a data extraction from a specific source (API, registry, web scrape, etc.). Contains metadata about the source type, data tier, fetch timestamp, and extracted claims. Used to track provenance of individual data points.\nOntology mapping rationale: - class_uri is prov:Entity because this represents a discrete data entity with\n provenance (when fetched, from where, by what method)\n- close_mappings includes dcat:Distribution as this is similar to a specific\n manifestation/representation of data from a source\n- related_mappings includes pav:retrievedFrom conceptually (the source was retrieved)\n and prov:PrimarySource (the record may be from a primary source)"
|
|
class_uri: prov:Entity
|
|
close_mappings:
|
|
- dcat:Distribution
|
|
related_mappings:
|
|
- prov:PrimarySource
|
|
attributes:
|
|
source_type:
|
|
range: string
|
|
description: Type identifier (nde_csv_registry, google_maps_api, etc.)
|
|
data_tier:
|
|
range: DataTierEnum
|
|
description: Quality tier of this source
|
|
fetch_timestamp:
|
|
range: string
|
|
description: When data was fetched (ISO datetime string)
|
|
has_or_had_api_endpoint:
|
|
range: uri
|
|
description: API endpoint used
|
|
api_endpoint:
|
|
range: uri
|
|
description: API endpoint used (alias for has_or_had_api_endpoint for backward compatibility)
|
|
place_id:
|
|
range: string
|
|
description: Google Maps place ID
|
|
data_url:
|
|
range: uri
|
|
description: Data source URL
|
|
match_method:
|
|
range: string
|
|
description: Method used for matching
|
|
claims_extracted:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of claim fields extracted (can be strings or structured objects)
|
|
entity_id:
|
|
range: string
|
|
description: Wikidata entity ID (Q-number)
|
|
wikidata_id:
|
|
range: string
|
|
description: Wikidata entity ID (Q-number) - alternative key to entity_id
|
|
source_url:
|
|
range: uri
|
|
description: Source URL for the data
|
|
extraction_source:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of extraction source methods (e.g., archiveslab_llm_extraction)
|
|
retrieved_at:
|
|
range: datetime
|
|
description: When data was retrieved (alias for fetch_timestamp)
|
|
search_result:
|
|
range: string
|
|
description: Result of search operation (found, not_found, etc.)
|
|
search_queries:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Search queries attempted
|
|
note:
|
|
range: string
|
|
description: Additional notes about this source record
|
|
source_file:
|
|
range: string
|
|
description: Source file name
|
|
research_date:
|
|
range: string
|
|
description: Date of research (YYYY-MM-DD format)
|
|
url:
|
|
range: uri
|
|
description: URL of the source (website URL, etc.)
|
|
data_extracted:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: List of data types/fields extracted from this source
|
|
merge_note:
|
|
range: string
|
|
description: Note about merge operations involving this source record
|
|
extraction_timestamp:
|
|
range: string
|
|
description: When extraction was performed (ISO datetime string)
|
|
api_version:
|
|
range: string
|
|
description: API version used for the request (e.g., v1, v2)
|
|
search_query:
|
|
range: string
|
|
description: Search query used to find the source
|
|
source_urls:
|
|
range: uri
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Multiple source URLs (for sources with multiple pages)
|
|
archive_path:
|
|
range: string
|
|
description: Path to archived source data file
|
|
extraction_method:
|
|
range: string
|
|
description: Method used for data extraction (e.g., docling_pdf_table_extraction, linkup_markdown_extraction)
|
|
notes:
|
|
range: string
|
|
description: Additional notes about the source record extraction
|
|
pdf_count:
|
|
range: integer
|
|
description: Number of PDF files processed in this source record
|
|
annotations:
|
|
specificity_score: 0.1
|
|
specificity_rationale: Generic utility class/slot created during migration
|
|
custodian_types: "['*']"
|