glam/schemas/20251121/linkml/modules/classes/RawSource.yaml

78 lines
2.5 KiB
YAML

# RawSource - Raw source information for web enrichment
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
# Extraction date: 2026-01-08
id: https://nde.nl/ontology/hc/classes/RawSource
name: RawSource
title: RawSource
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
pav: http://purl.org/pav/
imports:
- linkml:types
default_range: string
classes:
RawSource:
description: >-
Raw source information for web enrichment including URL, fetch timestamp,
HTTP status, content hash, and extracted highlights from search results.
Ontology mapping rationale:
- class_uri is prov:PrimarySource because this represents the original
source material fetched from the web with full provenance metadata
- close_mappings includes pav:RetrievedFrom for web retrieval provenance
- close_mappings includes schema:WebPage as it captures web page data
- related_mappings includes prov:Entity for general provenance entity
class_uri: prov:PrimarySource
close_mappings:
- pav:RetrievedFrom
- schema:WebPage
related_mappings:
- prov:Entity
attributes:
source_id:
range: string
description: Unique identifier for this source
url:
range: uri
description: Source URL
fetch_timestamp:
range: datetime
description: When the source was fetched
published_date:
range: datetime
description: When the source content was published
source_type:
range: string
description: Type of source (official_website, etc.)
fetch_status:
range: string
description: Status of fetch (SUCCESS, FAILED, etc.)
http_status:
range: integer
description: HTTP response status code
title:
range: string
description: Page title
raw_markdown_hash:
range: string
description: SHA-256 hash of the raw markdown content
exa_highlights:
range: string
multivalued: true
inlined_as_list: true
description: Highlighted excerpts from Exa search results
exa_highlight_scores:
range: float
multivalued: true
inlined_as_list: true
description: Relevance scores for Exa highlights