glam/schemas/20251121/linkml/modules/classes/RawSource.yaml
2026-01-31 00:15:53 +01:00

64 lines
2.3 KiB
YAML

id: https://nde.nl/ontology/hc/classes/RawSource
name: RawSource
title: RawSource
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
schema: http://schema.org/
prov: http://www.w3.org/ns/prov#
xsd: http://www.w3.org/2001/XMLSchema#
pav: http://purl.org/pav/
imports:
- linkml:types
default_range: string
classes:
RawSource:
description: "Raw source information for web enrichment including URL, fetch timestamp, HTTP status, content hash, and extracted highlights from search results.\nOntology mapping rationale: - class_uri is prov:PrimarySource because this represents the original\n source material fetched from the web with full provenance metadata\n- close_mappings includes pav:RetrievedFrom for web retrieval provenance - close_mappings includes schema:WebPage as it captures web page data - related_mappings includes prov:Entity for general provenance entity"
class_uri: prov:PrimarySource
close_mappings:
- pav:RetrievedFrom
- schema:WebPage
related_mappings:
- prov:Entity
attributes:
source_id:
range: string
description: Unique identifier for this source
url:
range: uri
description: Source URL
fetch_timestamp:
range: datetime
description: When the source was fetched
published_date:
range: datetime
description: When the source content was published
source_type:
range: string
description: Type of source (official_website, etc.)
fetch_status:
range: string
description: Status of fetch (SUCCESS, FAILED, etc.)
http_status:
range: integer
description: HTTP response status code
title:
range: string
description: Page title
raw_markdown_hash:
range: string
description: SHA-256 hash of the raw markdown content
exa_highlights:
range: string
multivalued: true
inlined_as_list: true
description: Highlighted excerpts from Exa search results
exa_highlight_scores:
range: float
multivalued: true
inlined_as_list: true
description: Relevance scores for Exa highlights
annotations:
specificity_score: 0.1
specificity_rationale: Generic utility class/slot created during migration
custodian_types: "['*']"