glam/schemas/20251121/linkml/modules/classes/PersonWebClaim.yaml

id: https://nde.nl/ontology/hc/class/PersonWebClaim
name: PersonWebClaim
title: Person Web Claim - Verifiable Person Data Claims
prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  schema: http://schema.org/
  dcterms: http://purl.org/dc/terms/
  prov: http://www.w3.org/ns/prov#
  pico: https://personsincontext.org/model#
  foaf: http://xmlns.com/foaf/0.1/
imports:
  - linkml:types
  # Slots
  - ../slots/has_or_had_note  # was: person_claim_note - migrated per Rule 53 (2026-01-18)
  - ../slots/has_or_had_provenance_path
  - ../slots/has_or_had_score  # was: template_specificity - migrated per Rule 53 (2026-01-17)
  - ../slots/person_claim_id
  - ../slots/person_claim_type
  - ../slots/person_claim_value
  - ../slots/person_html_file
  - ../slots/retrieval_agent
  - ../slots/retrieved_on
  - ../slots/source_url
  - ../slots/specificity_annotation
  # Classes
  - ./Note  # for has_or_had_note range
  - ./SpecificityAnnotation
  - ./TemplateSpecificityScore  # was: TemplateSpecificityScores - migrated per Rule 53 (2026-01-17)
  - ./TemplateSpecificityType
  - ./TemplateSpecificityTypes
  - ./XPath
  # Enums
  - ../enums/PersonClaimTypeEnum
  - ../enums/RetrievalAgentEnum
default_prefix: hc
classes:
  PersonWebClaim:
    class_uri: prov:Entity
    description: "A verifiable claim about a person extracted from a web page with provenance.\n\n**RULE 26 COMPLIANCE: Person\
      \ Data Provenance**\n\nAll person/staff data associated with heritage custodians MUST have\nweb claim provenance. This\
      \ includes:\n- Staff names, titles, and roles\n- Contact information (if publicly available)\n- Professional history\
      \ and education\n- Affiliations and expertise areas\n\n**VERIFIABILITY PRINCIPLE**\n\nLike the base WebClaim class,\
      \ PersonWebClaim follows the \"XPath or Remove\" principle:\n- Claims from web pages MUST have has_or_had_provenance_path with XPath expression\n- Claims from APIs (LinkedIn via Exa) may omit has_or_had_provenance_path but MUST have source_url\n- Claims without any\
      \ verifiable source are FABRICATED and must be removed\n\n**SOURCE HIERARCHY**\n\nWhen multiple sources provide the\
      \ same information:\n1. Official institutional website (highest reliability)\n2. LinkedIn profile (high reliability)\n\
      3. News articles/press releases (medium-high)\n4. Conference programs (medium)\n5. Academic publications (medium)\n\
      6. Third-party databases (lower)\n\nDocument all sources when available; note conflicts.\n\n**CLAIM TYPES**\n\nCommon\
      \ claim types for heritage institution staff:\n- **full_name**: Complete name as displayed\n- **role_title**: Job title\
      \ (e.g., \"Senior Curator\")\n- **department**: Organizational unit\n- **email**: Professional contact (only if public)\n\
      - **biography**: Professional bio text\n- **specialization**: Expertise areas\n- **education**: Degrees and institutions\n\
      \n**EXTRACTION WORKFLOW**\n\n1. SCRAPE institutional staff/team pages with FireCrawl\n2. EXTRACT names and roles with\
      \ XPath locations stored in has_or_had_provenance_path\n3. SEARCH LinkedIn for additional profile data\n4. CREATE PersonWebClaim for each extracted fact\n\
      5. LINK claims to PersonObservation records\n\n**INTEGRATION WITH PERSONOBSERVATION**\n\nPersonObservation (the PiCo-based\
      \ staff role record) references\nPersonWebClaim instances via the `web_claims` slot:\n\n```yaml\nPersonObservation:\n\
      \  person_name: \"Dr. Jane Smith\"\n  staff_role: CONSERVATOR\n  has_or_had_web_claim:\n    - claim_type: full_name\n\
      \      claim_value: \"Dr. Jane Smith\"\n      source_url: https://museum.org/team\n      has_or_had_provenance_path:\n        expression: /html/body/main/div[2]/h3\n        match_score: 1.0\n\
      \      retrieval_agent: firecrawl\n```\n\n**LINKEDIN PROFILE HANDLING**\n\nFor LinkedIn data, create separate profile\
      \ files (per Rule 12, Rule 20)\nand reference them:\n\n```yaml\nlinkedin_claims:\n  linkedin_url: https://www.linkedin.com/in/jane-smith\n\
      \  profile_data_path: data/custodian/person/entity/jane-smith_20250115.json\n  retrieved_on: \"2025-01-15T10:30:00Z\"\
      \n  retrieval_agent: exa_crawling_exa\n```\n"
    exact_mappings:
    - prov:Entity
    close_mappings:
    - schema:PropertyValue
    - foaf:Document
    slots:
    - has_or_had_note  # was: person_claim_note - migrated per Rule 53 (2026-01-18)
    - has_or_had_provenance_path
    - has_or_had_score  # was: template_specificity - migrated per Rule 53 (2026-01-17)
    - person_claim_id
    - person_claim_type
    - person_claim_value
    - person_html_file
    - retrieval_agent
    - retrieved_on
    - source_url
    - specificity_annotation
    slot_usage:
      has_or_had_note:  # was: person_claim_note - migrated per Rule 53 (2026-01-18)
        description: |
          MIGRATED from person_claim_note per Rule 53 (2026-01-18).
          Notes about this claim extraction using the Note class.
          Document any issues, conflicts, or special circumstances.
          Use note_type: "extraction" for extraction-related notes.
        range: Note
        multivalued: true
        inlined: true
        inlined_as_list: true
        examples:
        - value:
            - note_type: extraction
              note_content: "Biography truncated from longer text on page"
          description: Extraction note about truncated content
        - value:
            - note_type: extraction
              note_content: "Profile data stored in person/entity/taco-dibbits_20250115.json. No XPath for API extraction."
          description: LinkedIn API extraction note
      person_claim_type:
        required: true
      person_claim_value:
        required: true
      source_url:
        required: true
      retrieved_on:
        required: true
      retrieval_agent:
        required: true
        range: RetrievalAgentEnum
      has_or_had_provenance_path:
        range: XPath
        inlined: true
        description: |
          XPath provenance path documenting the exact location in the source HTML
          where this person claim was extracted from. Contains the XPath expression,
          match score, and matched text.
          REQUIRED for web page claims. May be omitted for API-sourced claims (e.g., LinkedIn via Exa).
    comments:
    - 'MIGRATION (2026-01-18): Replaced person_claim_note with has_or_had_note using Note class per Rule 53'
    - 'MIGRATION (2026-01-15): Replaced person_xpath/person_xpath_match_score slots with has_or_had_provenance_path using XPath class per slot_fixes.yaml'
    - PersonWebClaim extends WebClaim pattern for person-specific data
    - 'XPATH OR REMOVE: Claims without verifiable source must be removed'
    - Links to PersonObservation via web_claims slot
    - LinkedIn data stored separately in person/entity/ files (Rule 12, 20)
    - See Rule 26 in AGENTS.md for complete documentation
    examples:
    - value:
        person_claim_type: full_name
        person_claim_value: Taco Dibbits
        source_url: https://www.rijksmuseum.nl/en/about-us/organisation
        retrieved_on: '2025-01-15T10:30:00Z'
        has_or_had_provenance_path:
          expression: /html/body/main/section[2]/div[1]/h2
          match_score: 1.0
          source_document: web/NL-NH-AMS-M-RM/rijksmuseum.nl/organisation.html
        person_html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/organisation.html
        retrieval_agent: firecrawl
      description: Exact match for museum director name
    - value:
        person_claim_type: role_title
        person_claim_value: General Director
        source_url: https://www.rijksmuseum.nl/en/about-us/organisation
        retrieved_on: '2025-01-15T10:30:00Z'
        has_or_had_provenance_path:
          expression: /html/body/main/section[2]/div[1]/p[1]
          match_score: 1.0
          source_document: web/NL-NH-AMS-M-RM/rijksmuseum.nl/organisation.html
        person_html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/organisation.html
        retrieval_agent: firecrawl
      description: Role title from institutional page
    - value:
        person_claim_type: biography
        person_claim_value: Taco Dibbits has been General Director since 2016...
        source_url: https://www.rijksmuseum.nl/en/about-us/organisation
        retrieved_on: '2025-01-15T10:30:00Z'
        has_or_had_provenance_path:
          expression: /html/body/main/section[2]/div[1]/div[2]
          match_score: 0.92
          matched_text: Taco Dibbits has been General Director since 2016...
          source_document: web/NL-NH-AMS-M-RM/rijksmuseum.nl/organisation.html
        person_html_file: web/NL-NH-AMS-M-RM/rijksmuseum.nl/organisation.html
        retrieval_agent: firecrawl
        has_or_had_note:  # was: person_claim_note - migrated per Rule 53 (2026-01-18)
          - note_type: extraction
            note_content: Biography truncated from longer text on page
      description: Biography text with partial match score
    - value:
        person_claim_type: linkedin_url
        person_claim_value: https://www.linkedin.com/in/taco-dibbits
        source_url: https://www.linkedin.com/in/taco-dibbits
        retrieved_on: '2025-01-15T11:00:00Z'
        retrieval_agent: exa_crawling_exa
        has_or_had_note:  # was: person_claim_note - migrated per Rule 53 (2026-01-18)
          - note_type: extraction
            note_content: Profile data stored in person/entity/taco-dibbits_20250115.json. No XPath for API extraction.
      description: LinkedIn claim - No XPath for API extraction