78 lines
2.5 KiB
YAML
78 lines
2.5 KiB
YAML
# RawSource - Raw source information for web enrichment
|
|
# Extracted from custodian_source.yaml per Rule 38 (modular schema files)
|
|
# Extraction date: 2026-01-08
|
|
|
|
id: https://nde.nl/ontology/hc/classes/RawSource
|
|
name: RawSource
|
|
title: RawSource
|
|
|
|
prefixes:
|
|
linkml: https://w3id.org/linkml/
|
|
hc: https://nde.nl/ontology/hc/
|
|
schema: http://schema.org/
|
|
prov: http://www.w3.org/ns/prov#
|
|
xsd: http://www.w3.org/2001/XMLSchema#
|
|
pav: http://purl.org/pav/
|
|
|
|
imports:
|
|
- linkml:types
|
|
|
|
|
|
default_range: string
|
|
|
|
classes:
|
|
RawSource:
|
|
description: >-
|
|
Raw source information for web enrichment including URL, fetch timestamp,
|
|
HTTP status, content hash, and extracted highlights from search results.
|
|
|
|
Ontology mapping rationale:
|
|
- class_uri is prov:PrimarySource because this represents the original
|
|
source material fetched from the web with full provenance metadata
|
|
- close_mappings includes pav:RetrievedFrom for web retrieval provenance
|
|
- close_mappings includes schema:WebPage as it captures web page data
|
|
- related_mappings includes prov:Entity for general provenance entity
|
|
class_uri: prov:PrimarySource
|
|
close_mappings:
|
|
- pav:RetrievedFrom
|
|
- schema:WebPage
|
|
related_mappings:
|
|
- prov:Entity
|
|
attributes:
|
|
source_id:
|
|
range: string
|
|
description: Unique identifier for this source
|
|
url:
|
|
range: uri
|
|
description: Source URL
|
|
fetch_timestamp:
|
|
range: datetime
|
|
description: When the source was fetched
|
|
published_date:
|
|
range: datetime
|
|
description: When the source content was published
|
|
source_type:
|
|
range: string
|
|
description: Type of source (official_website, etc.)
|
|
fetch_status:
|
|
range: string
|
|
description: Status of fetch (SUCCESS, FAILED, etc.)
|
|
http_status:
|
|
range: integer
|
|
description: HTTP response status code
|
|
title:
|
|
range: string
|
|
description: Page title
|
|
raw_markdown_hash:
|
|
range: string
|
|
description: SHA-256 hash of the raw markdown content
|
|
exa_highlights:
|
|
range: string
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Highlighted excerpts from Exa search results
|
|
exa_highlight_scores:
|
|
range: float
|
|
multivalued: true
|
|
inlined_as_list: true
|
|
description: Relevance scores for Exa highlights
|