# XPath - An XPath expression for locating elements in HTML/XML documents
# Created per slot_fixes.yaml migration for: xpath
# Creation date: 2026-01-14

id: https://nde.nl/ontology/hc/classes/XPath
name: XPath
title: XPath

prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  prov: http://www.w3.org/ns/prov#
  schema: http://schema.org/
  xsd: http://www.w3.org/2001/XMLSchema#

imports:
  - linkml:types

default_range: string

classes:
  XPath:
    description: >-
      An XPath expression used to locate a specific element within an HTML or XML document.
      
      **CRITICAL PROVENANCE FIELD**:
      XPath expressions provide the essential link between extracted data values and their
      original source location in archived documents. Without an XPath, a claim extracted
      from a webpage is unverifiable.
      
      **FORMAT**: Standard XPath 1.0 expressions
      **EXAMPLE**: `/html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]`
      
      **USAGE CONTEXT**:
      Used with `has_or_had_provenance_path` slot to link provenance records to
      specific locations in source documents.
      
    class_uri: prov:Location
    
    close_mappings:
      - schema:xpath
    
    related_mappings:
      - prov:atLocation
    
    attributes:
      expression:
        range: string
        required: true
        description: >-
          The XPath expression string.
          Example: /html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]
        pattern: "^/.*"
      
      matched_text:
        range: string
        description: >-
          The text content found at this XPath location.
          Used for verification and debugging.
      
      match_score:
        range: float
        minimum_value: 0.0
        maximum_value: 1.0
        description: >-
          Confidence score (0.0 to 1.0) for the XPath match.
          1.0 = exact match, <1.0 = fuzzy match.
      
      source_document:
        range: uriorcurie
        description: >-
          URI or path to the source document where this XPath applies.
          Example: web/GHCID/example.org/rendered.html
    
    annotations:
      custodian_types: '["*"]'
      custodian_types_rationale: >-
        XPath provenance is relevant for any custodian type where web content
        is extracted and archived.
      custodian_types_primary: "*"
      specificity_score: 0.7
      specificity_rationale: >-
        High specificity - only relevant for web-extracted data with HTML archival.
    
    examples:
      - value: |
          XPath:
            expression: "/html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]"
            matched_text: "Historische Vereniging Nijeveen"
            match_score: 1.0
            source_document: "web/0021/historischeverenigingnijeveen.nl/rendered.html"
        description: >-
          XPath extraction pointing to an institution name in archived HTML.
      
      - value: |
          XPath:
            expression: "//meta[@property='og:title']/@content"
            matched_text: "Amsterdam Museum - Official Website"
            match_score: 0.95
        description: >-
          XPath to OpenGraph metadata in a webpage header.