glam/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml

# LinkedIn Profile Class
# Complete LinkedIn profile extraction for person entity files

id: https://nde.nl/ontology/hc/class/LinkedInProfile
name: linkedin_profile_class
title: LinkedIn Profile Class
version: 1.0.0

prefixes:
  linkml: https://w3id.org/linkml/
  hc: https://nde.nl/ontology/hc/
  schema: http://schema.org/
  foaf: http://xmlns.com/foaf/0.1/
  prov: http://www.w3.org/ns/prov#
  dct: http://purl.org/dc/terms/

imports:
  - linkml:types
  - ../metadata
  - ./ExtractionMetadata
  - ./WorkExperience
  - ./EducationCredential
  - ./HeritageRelevance
  - ./LanguageProficiency

default_range: string

classes:

  LinkedInProfile:
    class_uri: schema:ProfilePage
    description: |
      Complete LinkedIn profile extraction for a person.

      Models the content of person entity JSON files stored at
      `data/custodian/person/entity/*.json`. This is the root class
      for LinkedIn profile data extracted via Exa API or HTML parsing.

      **Relationship to PersonObservation**:
      - PersonObservation.linkedin_profile_path references the file containing
        this LinkedInProfile data
      - PersonObservation.linkedin_profile_url links to the source URL
      - This class models the CONTENT of that file

      **Relationship to SocialMediaProfile**:
      - SocialMediaProfile (in same schema) is for CUSTODIAN social media accounts
        (e.g., Rijksmuseum's Instagram, Nationaal Archief's Twitter)
      - LinkedInProfile is for PERSON LinkedIn profiles (staff members)
      - These are complementary, not overlapping classes

      **Data Flow**:
      ```
      LinkedIn URL → Exa API → JSON file → LinkedInProfile (this class)
                                    ↑
      PersonObservation.linkedin_profile_path references this file
      ```

      **Example JSON Structure**:
      ```json
      {
        "extraction_metadata": {
          "source_file": "...",
          "staff_id": "org_staff_0001_name",
          "extraction_date": "2025-12-12T22:00:00Z",
          "extraction_method": "exa_crawling_exa",
          "extraction_agent": "claude-opus-4.5",
          "linkedin_url": "https://www.linkedin.com/in/...",
          "cost_usd": 0.001
        },
        "profile_data": {
          "name": "Sander Hulleman",
          "headline": "Stafadviseur PO",
          "location": "Arnhem, Gelderland, Netherlands",
          "connections": "246 connections • 248 followers",
          "about": "Third year student...",
          "experience": [...],
          "education": [...],
          "skills": ["education"],
          "languages": ["English - Native or bilingual"],
          "profile_image_url": "https://media.licdn.com/..."
        },
        "heritage_relevance": {
          "is_heritage_relevant": true,
          "heritage_types": ["E"],
          "rationale": "Education sector professional..."
        },
        "source_organization": "the-dutch-inspectorate-of-education",
        "whatsapp_enrichment": {...}
      }
      ```

    tree_root: true

    exact_mappings:
      - schema:ProfilePage
    close_mappings:
      - foaf:PersonalProfileDocument
      - schema:Person

    slots:
      - extraction_metadata
      - profile_data
      - heritage_relevance
      - source_organization
      - whatsapp_enrichment

    slot_usage:
      extraction_metadata:
        description: |
          Provenance metadata for the extraction activity.
          Records how, when, and by what agent this profile was extracted.
          See ExtractionMetadata class for field definitions.
        range: ExtractionMetadata
        required: true
        inlined: true

      profile_data:
        description: |
          Core profile data extracted from LinkedIn.
          Contains personal info, career history, education, skills, languages.
          See LinkedInProfileData class for field definitions.
        range: LinkedInProfileData
        required: true
        inlined: true

      heritage_relevance:
        description: |
          Classification of this person's relevance to heritage sectors.
          See HeritageRelevance class for scoring guidelines.
        range: HeritageRelevance
        inlined: true

      source_organization:
        description: |
          Slug identifier of the organization from which this profile was discovered.
          Matches the custodian slug used in staff list parsing.
          Format: lowercase with hyphens (e.g., "rijksmuseum", "nationaal-archief")
        slot_uri: prov:wasInfluencedBy
        range: string
        pattern: "^[a-z0-9-]+$"
        examples:
          - value: "the-dutch-inspectorate-of-education"
            description: "Organization where person was discovered as staff"
          - value: "rijksmuseum"
            description: "Heritage institution employer"

      whatsapp_enrichment:
        description: |
          Optional WhatsApp business likelihood enrichment.
          Added by enrichment scripts to assess digital communication capabilities.
        range: WhatsAppEnrichment
        inlined: true

    comments:
      - "This is the root class for person entity JSON files"
      - "PersonObservation.linkedin_profile_path references files containing this data"
      - "See AGENTS.md Rule 20 for person entity file requirements"
      - "See AGENTS.md Rule 27 for person-custodian data architecture"

    see_also:
      - "https://schema.org/ProfilePage"
      - "https://nde.nl/ontology/hc/class/PersonObservation"
      - "https://nde.nl/ontology/hc/class/SocialMediaProfile"

  LinkedInProfileData:
    class_uri: schema:Person
    description: |
      Core profile data extracted from a LinkedIn profile.

      Contains the person's professional information including:
      - Basic info (name, headline, location, connections)
      - About/summary text
      - Career history (experience array)
      - Education history (education array)
      - Skills and languages
      - Profile image URL

      **Note on Data Representation**:
      - Raw strings are preserved for provenance (e.g., connections text)
      - Nested objects use defined classes (WorkExperience, EducationCredential)
      - Skills are simple strings (not structured objects)
      - Languages may be raw strings or LanguageProficiency objects

    exact_mappings:
      - schema:Person
    close_mappings:
      - foaf:Person

    slots:
      - profile_name
      - profile_linkedin_url
      - headline
      - profile_location
      - connections_text
      - about_text
      - experience
      - education
      - skills
      - languages_raw
      - languages
      - profile_image_url

    slot_usage:
      profile_name:
        description: |
          Full name of the person as displayed on LinkedIn.
        slot_uri: schema:name
        range: string
        required: true
        examples:
          - value: "Sander Hulleman"
          - value: "Jan van der Berg"

      profile_linkedin_url:
        description: |
          LinkedIn profile URL for this person.
          Duplicated from extraction_metadata for convenience.
        slot_uri: schema:url
        range: uri
        pattern: "^https://www\\.linkedin\\.com/in/[a-z0-9-]+/?$"
        examples:
          - value: "https://www.linkedin.com/in/sander-hulleman-5017b9105"

      headline:
        description: |
          Professional headline/tagline from LinkedIn.
          Typically includes current job title and/or professional identity.
        slot_uri: schema:jobTitle
        range: string
        examples:
          - value: "Stafadviseur PO"
            description: "Dutch job title"
          - value: "Senior Curator | Rijksmuseum"
            description: "Title with organization"
          - value: "Digital Archivist | Heritage Data Specialist"
            description: "Multiple roles"

      profile_location:
        description: |
          Location as displayed on LinkedIn profile.
          Format varies: "City, Region, Country" or "City, Country"
        slot_uri: schema:homeLocation
        range: string
        examples:
          - value: "Arnhem, Gelderland, Netherlands"
          - value: "Amsterdam, Netherlands"

      connections_text:
        description: |
          Raw connections/followers text from LinkedIn.
          Format: "X connections • Y followers"
          Preserved as-is for provenance.
        slot_uri: schema:description
        range: string
        examples:
          - value: "246 connections • 248 followers"
          - value: "500+ connections"

      about_text:
        description: |
          About/summary section text from LinkedIn profile.
          May be absent if person hasn't written a summary.
        slot_uri: schema:description
        range: string
        examples:
          - value: "Third year student at Stenden University..."

      experience:
        description: |
          Work experience entries from LinkedIn.
          Array of WorkExperience objects with job title, company, dates, location.
        range: WorkExperience
        multivalued: true
        inlined_as_list: true

      education:
        description: |
          Education entries from LinkedIn.
          Array of EducationCredential objects with school, degree, years.
        range: EducationCredential
        multivalued: true
        inlined_as_list: true

      skills:
        description: |
          Skills listed on LinkedIn profile.
          Simple string array (not structured objects).
        slot_uri: schema:knowsAbout
        range: string
        multivalued: true
        examples:
          - value: ["education", "teaching", "curriculum development"]

      languages_raw:
        description: |
          Raw language strings as extracted from LinkedIn.
          Format: "Language - Proficiency level"
          Use this when storing unprocessed data.
        range: string
        multivalued: true
        examples:
          - value: ["English - Native or bilingual", "Dutch - Native or bilingual"]

      languages:
        description: |
          Parsed language proficiency entries.
          Array of LanguageProficiency objects with language name, code, level.
          Use this when storing processed/structured data.
        range: LanguageProficiency
        multivalued: true
        inlined_as_list: true

      profile_image_url:
        description: |
          URL to the LinkedIn profile photo.
          Should be the actual CDN URL (media.licdn.com), not overlay page.
          See AGENTS.md Rule 16 for photo URL requirements.
        slot_uri: schema:image
        range: uri
        pattern: "^https://media\\.licdn\\.com/.*$"
        examples:
          - value: "https://media.licdn.com/dms/image/v2/C4E03AQHoGyR6G0kphA/profile-displayphoto-shrink_200_200/..."

    comments:
      - "Inlined within LinkedInProfile as profile_data"
      - "experience and education use inlined_as_list for JSON array representation"
      - "languages_raw preserves original strings; languages has parsed objects"
      - "profile_image_url must be CDN URL per AGENTS.md Rule 16"

  WhatsAppEnrichment:
    class_uri: hc:WhatsAppEnrichment
    description: |
      WhatsApp business likelihood enrichment data.

      Added by enrichment scripts to assess whether a person is likely
      to use WhatsApp for professional/business communication.

      **Assessment Factors**:
      - Digital technology indicators in profile
      - Role type (customer-facing, technical, etc.)
      - Industry/sector norms
      - Geographic region (WhatsApp prevalence varies)

    slots:
      - digital_professional
      - whatsapp_business_likelihood
      - enrichment_metadata_whatsapp

    slot_usage:
      digital_professional:
        description: |
          Assessment of digital/technology proficiency.
        range: DigitalProfessionalAssessment
        inlined: true

      whatsapp_business_likelihood:
        description: |
          Likelihood score for WhatsApp business usage.
        range: WhatsAppLikelihood
        inlined: true

      enrichment_metadata_whatsapp:
        description: |
          Metadata about the enrichment process.
        range: WhatsAppEnrichmentMetadata
        inlined: true

  DigitalProfessionalAssessment:
    class_uri: hc:DigitalProfessionalAssessment
    description: |
      Assessment of a person's digital/technology proficiency.
    slots:
      - likely_whatsapp_proficient
      - digital_indicators
      - digital_confidence

    slot_usage:
      likely_whatsapp_proficient:
        description: "Whether person is likely proficient with WhatsApp"
        range: boolean

      digital_indicators:
        description: "Indicators of digital proficiency from profile"
        range: string
        multivalued: true

      digital_confidence:
        description: "Confidence level: low, medium, high"
        range: string

  WhatsAppLikelihood:
    class_uri: hc:WhatsAppLikelihood
    description: |
      Likelihood score for WhatsApp business usage.
    slots:
      - likelihood_score
      - max_likelihood_score
      - likelihood_level
      - likelihood_confidence
      - likelihood_factors
      - assessment_date

    slot_usage:
      likelihood_score:
        description: "Numeric score (0-100)"
        range: integer
        minimum_value: 0
        maximum_value: 100

      max_likelihood_score:
        description: "Maximum possible score (typically 100)"
        range: integer

      likelihood_level:
        description: "Categorical level: low, medium, high"
        range: string

      likelihood_confidence:
        description: "Confidence in the assessment (0.0-1.0)"
        range: float
        minimum_value: 0.0
        maximum_value: 1.0

      likelihood_factors:
        description: "Factors contributing to the score"
        range: string
        multivalued: true

      assessment_date:
        description: "When the assessment was performed (ISO 8601)"
        range: datetime

  WhatsAppEnrichmentMetadata:
    class_uri: hc:WhatsAppEnrichmentMetadata
    description: |
      Metadata about the WhatsApp enrichment process.
    slots:
      - enriched_date
      - enrichment_method_whatsapp
      - data_source_whatsapp
      - no_fabrication
      - all_data_real

    slot_usage:
      enriched_date:
        description: "When enrichment was performed (ISO 8601)"
        range: datetime

      enrichment_method_whatsapp:
        description: "Method used for enrichment"
        range: string
        examples:
          - value: "linkedin_profile_analysis"

      data_source_whatsapp:
        description: "Source of data for enrichment"
        range: string
        examples:
          - value: "public_linkedin_profile"

      no_fabrication:
        description: "Confirms no data was fabricated"
        range: boolean

      all_data_real:
        description: "Confirms all data is from real sources"
        range: boolean

# Top-level slot definitions
slots:
  extraction_metadata:
    description: "Provenance metadata for the extraction activity"
    range: ExtractionMetadata

  profile_data:
    description: "Core profile data from LinkedIn"
    range: LinkedInProfileData

  heritage_relevance:
    description: "Heritage sector classification"
    range: HeritageRelevance

  source_organization:
    description: "Organization slug where person was discovered"
    range: string

  whatsapp_enrichment:
    description: "WhatsApp business likelihood enrichment"
    range: WhatsAppEnrichment

  profile_name:
    description: "Full name of the person"
    range: string

  profile_linkedin_url:
    description: "LinkedIn profile URL"
    range: uri

  headline:
    description: "Professional headline/tagline"
    range: string

  profile_location:
    description: "Location as displayed on profile"
    range: string

  connections_text:
    description: "Raw connections/followers text"
    range: string

  about_text:
    description: "About/summary section text"
    range: string

  experience:
    description: "Work experience entries"
    range: WorkExperience
    multivalued: true

  education:
    description: "Education entries"
    range: EducationCredential
    multivalued: true

  skills:
    description: "Skills listed on profile"
    range: string
    multivalued: true

  languages_raw:
    description: "Raw language strings"
    range: string
    multivalued: true

  languages:
    description: "Parsed language proficiency entries"
    range: LanguageProficiency
    multivalued: true

  profile_image_url:
    description: "Profile photo URL"
    range: uri

  digital_professional:
    description: "Digital proficiency assessment"
    range: DigitalProfessionalAssessment

  whatsapp_business_likelihood:
    description: "WhatsApp business usage likelihood"
    range: WhatsAppLikelihood

  enrichment_metadata_whatsapp:
    description: "WhatsApp enrichment metadata"
    range: WhatsAppEnrichmentMetadata

  likely_whatsapp_proficient:
    description: "Whether person is likely WhatsApp proficient"
    range: boolean

  digital_indicators:
    description: "Indicators of digital proficiency"
    range: string
    multivalued: true

  digital_confidence:
    description: "Digital proficiency confidence level"
    range: string

  likelihood_score:
    description: "Numeric likelihood score"
    range: integer

  max_likelihood_score:
    description: "Maximum possible score"
    range: integer

  likelihood_level:
    description: "Categorical likelihood level"
    range: string

  likelihood_confidence:
    description: "Confidence in the assessment"
    range: float

  likelihood_factors:
    description: "Factors contributing to score"
    range: string
    multivalued: true

  assessment_date:
    description: "When assessment was performed"
    range: datetime

  enriched_date:
    description: "When enrichment was performed"
    range: datetime

  enrichment_method_whatsapp:
    description: "Method used for enrichment"
    range: string

  data_source_whatsapp:
    description: "Data source for enrichment"
    range: string

  no_fabrication:
    description: "Confirms no data was fabricated"
    range: boolean

  all_data_real:
    description: "Confirms all data is real"
    range: boolean