glam/schemas/20251121/linkml/modules/classes/XPath.yaml
2026-01-30 23:56:19 +01:00

96 lines
3 KiB
YAML

id: https://nde.nl/ontology/hc/classes/XPath
name: XPath
title: XPath
prefixes:
linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/
prov: http://www.w3.org/ns/prov#
schema: http://schema.org/
xsd: http://www.w3.org/2001/XMLSchema#
imports:
- linkml:types
default_range: string
classes:
XPath:
description: >-
An XPath expression used to locate a specific element within an HTML or XML document.
**CRITICAL PROVENANCE FIELD**:
XPath expressions provide the essential link between extracted data values and their
original source location in archived documents. Without an XPath, a claim extracted
from a webpage is unverifiable.
**FORMAT**: Standard XPath 1.0 expressions
**EXAMPLE**: `/html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]`
**USAGE CONTEXT**:
Used with `has_or_had_provenance_path` slot to link provenance records to
specific locations in source documents.
class_uri: prov:Location
close_mappings:
- schema:xpath
related_mappings:
- prov:atLocation
attributes:
expression:
range: string
required: true
description: >-
The XPath expression string.
Example: /html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]
pattern: "^/.*"
matched_text:
range: string
description: >-
The text content found at this XPath location.
Used for verification and debugging.
match_score:
range: float
minimum_value: 0.0
maximum_value: 1.0
description: >-
Confidence score (0.0 to 1.0) for the XPath match.
1.0 = exact match, <1.0 = fuzzy match.
source_document:
range: uriorcurie
description: >-
URI or path to the source document where this XPath applies.
Example: web/GHCID/example.org/rendered.html
annotations:
custodian_types: '["*"]'
custodian_types_rationale: >-
XPath provenance is relevant for any custodian type where web content
is extracted and archived.
custodian_types_primary: "*"
specificity_score: 0.7
specificity_rationale: >-
High specificity - only relevant for web-extracted data with HTML archival.
examples:
- value: |
XPath:
expression: "/html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]"
matched_text: "Historische Vereniging Nijeveen"
match_score: 1.0
source_document: "web/0021/historischeverenigingnijeveen.nl/rendered.html"
description: >-
XPath extraction pointing to an institution name in archived HTML.
- value: |
XPath:
expression: "//meta[@property='og:title']/@content"
matched_text: "Amsterdam Museum - Official Website"
match_score: 0.95
description: >-
XPath to OpenGraph metadata in a webpage header.