glam/schemas/20251121/linkml/modules/classes/SourceRecord.yaml
2026-01-31 00:15:53 +01:00

133 lines
5 KiB
YAML

id: https://nde.nl/ontology/hc/classes/SourceRecord
name: SourceRecord
title: SourceRecord
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
pav: http://purl.org/pav/
dcat: http://www.w3.org/ns/dcat#
dcterms: http://purl.org/dc/terms/
crm: http://www.cidoc-crm.org/cidoc-crm/
skos: http://www.w3.org/2004/02/skos/core#
rdfs: http://www.w3.org/2000/01/rdf-schema#
org: http://www.w3.org/ns/org#
imports:
- linkml:types
- ../enums/DataTierEnum
default_range: string
classes:
SourceRecord:
description: "Individual source record with claims, representing a data extraction from a specific source (API, registry, web scrape, etc.). Contains metadata about the source type, data tier, fetch timestamp, and extracted claims. Used to track provenance of individual data points.\nOntology mapping rationale: - class_uri is prov:Entity because this represents a discrete data entity with\n provenance (when fetched, from where, by what method)\n- close_mappings includes dcat:Distribution as this is similar to a specific\n manifestation/representation of data from a source\n- related_mappings includes pav:retrievedFrom conceptually (the source was retrieved)\n and prov:PrimarySource (the record may be from a primary source)"
class_uri: prov:Entity
close_mappings:
- dcat:Distribution
related_mappings:
- prov:PrimarySource
attributes:
source_type:
range: string
description: Type identifier (nde_csv_registry, google_maps_api, etc.)
data_tier:
range: DataTierEnum
description: Quality tier of this source
fetch_timestamp:
range: string
description: When data was fetched (ISO datetime string)
has_or_had_api_endpoint:
range: uri
description: API endpoint used
api_endpoint:
range: uri
description: API endpoint used (alias for has_or_had_api_endpoint for backward compatibility)
place_id:
range: string
description: Google Maps place ID
data_url:
range: uri
description: Data source URL
match_method:
range: string
description: Method used for matching
claims_extracted:
range: string
multivalued: true
inlined_as_list: true
description: List of claim fields extracted (can be strings or structured objects)
entity_id:
range: string
description: Wikidata entity ID (Q-number)
wikidata_id:
range: string
description: Wikidata entity ID (Q-number) - alternative key to entity_id
source_url:
range: uri
description: Source URL for the data
extraction_source:
range: string
multivalued: true
inlined_as_list: true
description: List of extraction source methods (e.g., archiveslab_llm_extraction)
retrieved_at:
range: datetime
description: When data was retrieved (alias for fetch_timestamp)
search_result:
range: string
description: Result of search operation (found, not_found, etc.)
search_queries:
range: string
multivalued: true
inlined_as_list: true
description: Search queries attempted
note:
range: string
description: Additional notes about this source record
source_file:
range: string
description: Source file name
research_date:
range: string
description: Date of research (YYYY-MM-DD format)
url:
range: uri
description: URL of the source (website URL, etc.)
data_extracted:
range: string
multivalued: true
inlined_as_list: true
description: List of data types/fields extracted from this source
merge_note:
range: string
description: Note about merge operations involving this source record
extraction_timestamp:
range: string
description: When extraction was performed (ISO datetime string)
api_version:
range: string
description: API version used for the request (e.g., v1, v2)
search_query:
range: string
description: Search query used to find the source
source_urls:
range: uri
multivalued: true
inlined_as_list: true
description: Multiple source URLs (for sources with multiple pages)
archive_path:
range: string
description: Path to archived source data file
extraction_method:
range: string
description: Method used for data extraction (e.g., docling_pdf_table_extraction, linkup_markdown_extraction)
notes:
range: string
description: Additional notes about the source record extraction
pdf_count:
range: integer
description: Number of PDF files processed in this source record
annotations:
specificity_score: 0.1
specificity_rationale: Generic utility class/slot created during migration
custodian_types: "['*']"