diff --git a/schemas/20251121/linkml/modules/classes/ConnectionNetwork.yaml b/schemas/20251121/linkml/modules/classes/ConnectionNetwork.yaml new file mode 100644 index 0000000000..c0f89dce9d --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/ConnectionNetwork.yaml @@ -0,0 +1,403 @@ +# Connection Network Class +# Collection of LinkedIn connections with source metadata + +id: https://nde.nl/ontology/hc/class/ConnectionNetwork +name: connection_network_class +title: Connection Network Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + prov: http://www.w3.org/ns/prov# + dct: http://purl.org/dc/terms/ + xsd: http://www.w3.org/2001/XMLSchema# + +imports: + - linkml:types + - ../metadata + - ./PersonConnection + +default_range: string + +classes: + + ConnectionNetwork: + class_uri: schema:ItemList + tree_root: true + description: | + Collection of LinkedIn network connections with source metadata. + + This is the root class for connection JSON files stored at: + `data/custodian/person/connection/bu/{linkedin_slug}_connections_{timestamp}.json` + + Each file contains: + - **source_metadata**: Provenance about the extraction (who, when, how) + - **connections**: Array of PersonConnection entries (the actual network data) + - **network_analysis**: Optional aggregated statistics + + **Use Cases**: + - Heritage sector network analysis + - Cross-custodian relationship discovery + - Staff member connection patterns + - Professional community mapping + + **File Naming Convention**: + `{linkedin-slug}_connections_{ISO-timestamp}.json` + + Example: `giovannafossati_connections_20251209T220000Z.json` + + **Example JSON Structure**: + ```json + { + "source_metadata": { + "source_url": "https://www.linkedin.com/search/results/people/...", + "scraped_timestamp": "2025-12-09T22:00:00Z", + "scrape_method": "manual_linkedin_browse", + "target_profile": "giovannafossati", + "target_name": "Giovanna Fossati", + "connections_extracted": 776 + }, + "connections": [ + { "connection_id": "...", "name": "...", ... } + ], + "network_analysis": { + "total_connections_extracted": 776, + "heritage_relevant_count": 456, + "heritage_relevant_percentage": 58.8 + } + } + ``` + + exact_mappings: + - schema:ItemList + close_mappings: + - prov:Collection + + slots: + - source_metadata + - connections + - network_analysis + + slot_usage: + source_metadata: + description: "Provenance metadata about the connection extraction" + range: ConnectionSourceMetadata + required: true + inlined: true + + connections: + description: "Array of connection entries from the LinkedIn network" + range: PersonConnection + required: true + multivalued: true + inlined: true + inlined_as_list: true + + network_analysis: + description: "Aggregated statistics about the connection network" + range: NetworkAnalysis + inlined: true + + comments: + - "Root class for connection network JSON files (tree_root: true)" + - "Per AGENTS.md Rule 15: ALL connections must be fully registered" + - "Enables heritage sector network analysis" + - "File naming: {linkedin-slug}_connections_{timestamp}.json" + + see_also: + - "https://schema.org/ItemList" + + ConnectionSourceMetadata: + class_uri: prov:Activity + description: | + Provenance metadata about how the connections were extracted. + + Records the extraction context including: + - Source URL (LinkedIn search or profile page) + - When the extraction occurred + - Which method was used (manual browse, automated scrape) + - Target profile being analyzed + - Count of connections extracted + + **Scrape Methods**: + - manual_linkedin_browse: Manual copy-paste while logged in + - linkedin_html_parser: Parsed from saved HTML file + - exa_search: Extracted via Exa API + + exact_mappings: + - prov:Activity + + slots: + - source_url + - scraped_timestamp + - scrape_method + - target_profile + - target_name + - connections_extracted + - notes + + slot_usage: + source_url: + description: | + URL of the LinkedIn page where connections were extracted from. + Usually a LinkedIn search results URL or profile connections page. + slot_uri: prov:used + range: uri + required: true + examples: + - value: "https://www.linkedin.com/search/results/people/?network=%5B%22F%22%2C%22S%22%2C%22O%22%5D" + description: "LinkedIn connection search URL" + + scraped_timestamp: + description: | + ISO 8601 timestamp when the connections were extracted. + Critical for tracking network changes over time. + slot_uri: prov:endedAtTime + range: datetime + required: true + examples: + - value: "2025-12-09T22:00:00Z" + + scrape_method: + description: | + Method used to extract the connection data. + + Values: + - manual_linkedin_browse: Manual extraction while logged in + - linkedin_html_parser: Parsed from saved HTML file + - exa_search: Extracted via Exa API + slot_uri: prov:wasAssociatedWith + range: ScrapeMethodEnum + required: true + examples: + - value: "manual_linkedin_browse" + + target_profile: + description: | + LinkedIn slug of the profile whose connections were extracted. + Format: lowercase alphanumeric with hyphens. + slot_uri: dct:subject + range: string + required: true + pattern: "^[a-z0-9-]+$" + examples: + - value: "giovannafossati" + - value: "alexandr-belov-bb547b46" + + target_name: + description: | + Full display name of the target profile. + The person whose connections were extracted. + slot_uri: schema:name + range: string + required: true + examples: + - value: "Giovanna Fossati" + - value: "Alexandr Belov" + + connections_extracted: + description: | + Total number of connections extracted from this source. + Used for validation and completeness tracking. + slot_uri: schema:numberOfItems + range: integer + required: true + minimum_value: 0 + examples: + - value: 776 + + notes: + description: | + Optional notes about the extraction process. + May reference raw source files or explain any issues. + slot_uri: schema:description + range: string + examples: + - value: "Raw scrape in giovannafossati_connections_20251209T220000Z_note-max100p-1st2nd3th.md" + + comments: + - "Aligns with PROV-O Activity pattern" + - "scraped_timestamp maps to prov:endedAtTime" + - "target_profile is the LinkedIn slug being analyzed" + + NetworkAnalysis: + class_uri: schema:DataFeedItem + description: | + Aggregated statistics about the connection network. + + Provides summary metrics for quick analysis: + - Total connections extracted + - Heritage-relevant count and percentage + - Breakdown by heritage type (GLAMORCUBESFIXPHDNT) + + **Example**: + ```json + { + "total_connections_extracted": 776, + "heritage_relevant_count": 456, + "heritage_relevant_percentage": 58.8, + "connections_by_heritage_type": { + "A": 45, + "M": 89, + "D": 112, + "R": 78 + } + } + ``` + + slots: + - total_connections_extracted + - heritage_relevant_count + - heritage_relevant_percentage + - connections_by_heritage_type + + slot_usage: + total_connections_extracted: + description: "Total number of connections in the network" + slot_uri: schema:numberOfItems + range: integer + required: true + minimum_value: 0 + + heritage_relevant_count: + description: "Number of connections marked as heritage-relevant" + slot_uri: hc:heritageRelevantCount + range: integer + required: true + minimum_value: 0 + + heritage_relevant_percentage: + description: "Percentage of connections that are heritage-relevant (0-100)" + slot_uri: hc:heritageRelevantPercentage + range: float + minimum_value: 0.0 + maximum_value: 100.0 + examples: + - value: 58.8 + + connections_by_heritage_type: + description: | + Breakdown of heritage-relevant connections by type code. + Keys are single-letter GLAMORCUBESFIXPHDNT codes. + slot_uri: hc:connectionsByHeritageType + range: HeritageTypeCount + multivalued: true + inlined: true + inlined_as_list: true + + comments: + - "Optional aggregation - can be computed from connections array" + - "Useful for quick heritage sector analysis" + + HeritageTypeCount: + class_uri: schema:PropertyValue + description: | + Count of connections for a specific heritage type. + Used in network_analysis.connections_by_heritage_type. + + slots: + - heritage_type_code + - count + + slot_usage: + heritage_type_code: + description: "Single-letter heritage type code (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T)" + slot_uri: schema:propertyID + range: string + required: true + pattern: "^[GLAMORCUBESFIXPHDNT]$" + + count: + description: "Number of connections of this heritage type" + slot_uri: schema:value + range: integer + required: true + minimum_value: 0 + +enums: + ScrapeMethodEnum: + description: | + Methods used to extract LinkedIn connection data. + Determines data quality and potential limitations. + permissible_values: + manual_linkedin_browse: + description: "Manual extraction while logged into LinkedIn" + meaning: prov:SoftwareAgent + linkedin_html_parser: + description: "Parsed from saved LinkedIn HTML file" + meaning: prov:SoftwareAgent + exa_search: + description: "Extracted via Exa API search" + meaning: prov:SoftwareAgent + automated_scraper: + description: "Automated scraping tool" + meaning: prov:SoftwareAgent + +slots: + source_metadata: + description: "Provenance metadata about the extraction" + range: ConnectionSourceMetadata + + connections: + description: "Array of connection entries" + range: PersonConnection + multivalued: true + + network_analysis: + description: "Aggregated network statistics" + range: NetworkAnalysis + + source_url: + description: "URL where data was extracted from" + range: uri + + scraped_timestamp: + description: "When the extraction occurred" + range: datetime + + scrape_method: + description: "Method used for extraction" + range: ScrapeMethodEnum + + target_profile: + description: "LinkedIn slug of target profile" + range: string + + target_name: + description: "Display name of target profile" + range: string + + connections_extracted: + description: "Number of connections extracted" + range: integer + + notes: + description: "Optional notes about extraction" + range: string + + total_connections_extracted: + description: "Total connection count" + range: integer + + heritage_relevant_count: + description: "Count of heritage-relevant connections" + range: integer + + heritage_relevant_percentage: + description: "Percentage of heritage-relevant connections" + range: float + + connections_by_heritage_type: + description: "Breakdown by heritage type code" + range: HeritageTypeCount + multivalued: true + + heritage_type_code: + description: "Single-letter heritage type code" + range: string + + count: + description: "Count value" + range: integer diff --git a/schemas/20251121/linkml/modules/classes/EducationCredential.yaml b/schemas/20251121/linkml/modules/classes/EducationCredential.yaml new file mode 100644 index 0000000000..f228c414b9 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/EducationCredential.yaml @@ -0,0 +1,242 @@ +# Education Credential Class +# Education entries from academic history + +id: https://nde.nl/ontology/hc/class/EducationCredential +name: education_credential_class +title: Education Credential Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + org: http://www.w3.org/ns/org# + ceds: http://purl.org/ceds/ + xsd: http://www.w3.org/2001/XMLSchema# + +imports: + - linkml:types + - ../metadata + +default_range: string + +classes: + + EducationCredential: + class_uri: schema:EducationalOccupationalCredential + description: | + A single education entry from a person's academic history. + + Models educational credentials with institution, degree, field of study, + and date range. Used for LinkedIn education sections. + + **Schema.org Alignment**: + - Represents schema:EducationalOccupationalCredential + - Institution is schema:CollegeOrUniversity or schema:EducationalOrganization + + **Use Cases**: + - LinkedIn profile education entries + - CV/resume academic history + - Heritage education background tracking (museology, archival science, etc.) + + **Example JSON Structure**: + ```json + { + "school": "Stenden", + "degree": "Bachelor of Education - BEd, Elementary Education and Teaching", + "years": "2013 - 2017" + } + ``` + + **Heritage Relevance**: + - Track heritage-related degrees (museology, archival science, art history) + - Identify heritage education pathways + + exact_mappings: + - schema:EducationalOccupationalCredential + close_mappings: + - schema:AlumniOf + - ceds:Credential + related_mappings: + - schema:Course + - schema:Degree + + slots: + - institution_name + - institution_linkedin_url + - degree_name + - field_of_study + - education_years_raw + - education_start_year + - education_end_year + - activities_societies + - education_description + - heritage_education + + slot_usage: + institution_name: + description: | + Name of the educational institution. + Schema.org: schema:name of schema:EducationalOrganization + slot_uri: schema:educationalCredentialAwarded + range: string + required: true + examples: + - value: "Stenden" + description: "Short institutional name" + - value: "University of Amsterdam" + description: "Full university name" + - value: "Reinwardt Academy" + description: "Heritage-focused institution" + + institution_linkedin_url: + description: | + LinkedIn school/university page URL for the institution. + slot_uri: schema:url + range: uri + pattern: "^https://www\\.linkedin\\.com/school/[a-z0-9-]+/?$" + examples: + - value: "https://www.linkedin.com/school/university-of-amsterdam" + + degree_name: + description: | + Full degree name as displayed on LinkedIn. + May include degree type, field, and specialization combined. + slot_uri: schema:credentialCategory + range: string + examples: + - value: "Bachelor of Education - BEd, Elementary Education and Teaching" + description: "Combined degree type and field" + - value: "Master of Arts - MA" + description: "Degree type only" + - value: "PhD in Museology" + description: "Doctoral degree with specialization" + + field_of_study: + description: | + Primary field of study or major (extracted from degree_name if possible). + slot_uri: schema:educationalProgramMode + range: string + examples: + - value: "Elementary Education and Teaching" + description: "Extracted field of study" + - value: "Art History" + description: "Heritage-related field" + - value: "Archival Science" + description: "Heritage information management" + + education_years_raw: + description: | + Raw year range as extracted from LinkedIn. + Format: "YYYY - YYYY" or "YYYY" for ongoing. + Preserved for provenance; use parsed fields for queries. + slot_uri: schema:description + range: string + examples: + - value: "2013 - 2017" + description: "Completed degree with year range" + - value: "2023" + description: "Ongoing education (current student)" + + education_start_year: + description: | + Year when education began. + slot_uri: schema:startDate + range: integer + minimum_value: 1900 + maximum_value: 2100 + examples: + - value: 2013 + description: "Started in 2013" + + education_end_year: + description: | + Year when education completed. + Null/absent indicates ongoing education. + slot_uri: schema:endDate + range: integer + minimum_value: 1900 + maximum_value: 2100 + examples: + - value: 2017 + description: "Completed in 2017" + - value: null + description: "Ongoing (current student)" + + activities_societies: + description: | + Activities and societies during education. + LinkedIn field for extracurricular involvement. + slot_uri: schema:member + range: string + examples: + - value: "Student Museum Association, Heritage Preservation Club" + + education_description: + description: | + Additional description of educational experience. + May include thesis topics, research focus, etc. + slot_uri: schema:description + range: string + + heritage_education: + description: | + Whether this is heritage-related education. + True for museology, archival science, conservation, art history, etc. + slot_uri: hc:heritageRelevant + range: boolean + ifabsent: "false" + comments: + - "Set to true for heritage-focused degrees" + - "Examples: Museology, Archival Science, Art History, Conservation" + - "Reinwardt Academy, Getty Conservation Institute, etc." + + comments: + - "Inlined in LinkedInProfile.education[] as multivalued list" + - "Preserves raw LinkedIn year formats for provenance" + - "heritage_education enables filtering for heritage-trained professionals" + + see_also: + - "https://schema.org/EducationalOccupationalCredential" + - "https://schema.org/CollegeOrUniversity" + +slots: + institution_name: + description: "Name of the educational institution" + range: string + + institution_linkedin_url: + description: "LinkedIn page URL for the institution" + range: uri + + degree_name: + description: "Full degree name as displayed" + range: string + + field_of_study: + description: "Primary field of study or major" + range: string + + education_years_raw: + description: "Raw year range as extracted" + range: string + + education_start_year: + description: "Year when education began" + range: integer + + education_end_year: + description: "Year when education completed" + range: integer + + activities_societies: + description: "Activities and societies during education" + range: string + + education_description: + description: "Additional description of educational experience" + range: string + + heritage_education: + description: "Whether this is heritage-related education" + range: boolean diff --git a/schemas/20251121/linkml/modules/classes/ExtractionMetadata.yaml b/schemas/20251121/linkml/modules/classes/ExtractionMetadata.yaml new file mode 100644 index 0000000000..a9283d2371 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/ExtractionMetadata.yaml @@ -0,0 +1,253 @@ +# Extraction Metadata Class +# Provenance for LinkedIn/web extractions with PROV-O alignment + +id: https://nde.nl/ontology/hc/class/ExtractionMetadata +name: extraction_metadata_class +title: Extraction Metadata Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + prov: http://www.w3.org/ns/prov# + dct: http://purl.org/dc/terms/ + xsd: http://www.w3.org/2001/XMLSchema# + +imports: + - linkml:types + - ../metadata + +default_range: string + +classes: + + ExtractionMetadata: + class_uri: prov:Activity + description: | + Provenance metadata for data extraction activities. + + Records how, when, and by what agent data was extracted from + external sources (LinkedIn, web scraping, APIs). + + **PROV-O Alignment**: + - ExtractionMetadata IS a prov:Activity (the extraction process) + - The extracted data IS the prov:Entity (output of the activity) + - extraction_agent IS the prov:Agent (software/AI that performed extraction) + - source_file/linkedin_url IS prov:used (input to the activity) + + **Use Cases**: + - LinkedIn profile extractions via Exa API + - Web scraping provenance + - Staff list parsing provenance + - Connection network extraction + + **Example JSON Structure**: + ```json + { + "extraction_metadata": { + "source_file": "/path/to/source.json", + "staff_id": "org_staff_0001_name", + "extraction_date": "2025-12-12T22:00:00Z", + "extraction_method": "exa_crawling_exa", + "extraction_agent": "claude-opus-4.5", + "linkedin_url": "https://www.linkedin.com/in/...", + "cost_usd": 0.001 + } + } + ``` + + exact_mappings: + - prov:Activity + close_mappings: + - schema:Action + - dct:ProvenanceStatement + + slots: + - source_file + - staff_id + - extraction_date + - extraction_method + - extraction_agent + - linkedin_url + - cost_usd + - request_id + + slot_usage: + source_file: + description: | + Path to the source file from which data was derived. + PROV-O: prov:used - the entity that was used as input. + slot_uri: prov:used + range: string + examples: + - value: "/data/custodian/person/affiliated/parsed/rijksmuseum_staff_20251210T155416Z.json" + description: "Path to parsed staff list JSON" + + staff_id: + description: | + Unique identifier for the staff member within the source organization. + Format: {org_slug}_staff_{index}_{name_slug} + slot_uri: dct:identifier + range: string + pattern: "^[a-z0-9-]+_staff_[a-z0-9-_]+$" + examples: + - value: "rijksmuseum_staff_0042_jan_van_der_berg" + description: "Staff ID with org prefix, index, and name slug" + + extraction_date: + description: | + ISO 8601 timestamp when the extraction was performed. + PROV-O: prov:endedAtTime - when the activity completed. + slot_uri: prov:endedAtTime + range: datetime + required: true + examples: + - value: "2025-12-12T22:00:00Z" + description: "UTC timestamp of extraction" + + extraction_method: + description: | + The method/tool used to extract the data. + PROV-O: prov:wasAssociatedWith via software agent. + + **Common Values**: + - exa_crawling_exa: Exa AI crawling API + - exa_contents: Exa contents endpoint + - exa_crawling_glm46: Exa + GLM 4.6 extraction + - linkedin_html_parser: Local HTML parsing + - manual: Manual data entry + - firecrawl: Firecrawl web scraping + - playwright: Playwright browser automation + slot_uri: prov:wasGeneratedBy + range: ExtractionMethodEnum + required: true + examples: + - value: "exa_crawling_exa" + description: "Extracted via Exa AI crawling API" + + extraction_agent: + description: | + The AI agent or software that performed the extraction. + PROV-O: prov:wasAssociatedWith - agent associated with the activity. + + **Common Values**: + - claude-opus-4.5: Claude Opus 4.5 (manual extraction) + - glm-4.6: ZhipuAI GLM 4.6 + - automated: Fully automated script (no LLM) + slot_uri: prov:wasAssociatedWith + range: string + examples: + - value: "claude-opus-4.5" + description: "Extracted by Claude Opus 4.5" + - value: "" + description: "Empty string for fully automated extraction" + + linkedin_url: + description: | + LinkedIn profile URL that was extracted. + PROV-O: prov:used - the source entity. + slot_uri: schema:url + range: uri + pattern: "^https://www\\.linkedin\\.com/in/[a-z0-9-]+/?$" + examples: + - value: "https://www.linkedin.com/in/jan-van-der-berg-12345" + description: "LinkedIn profile URL" + + cost_usd: + description: | + API cost in USD for the extraction operation. + Used for tracking extraction costs (Exa API, etc.). + slot_uri: schema:price + range: float + minimum_value: 0.0 + examples: + - value: 0.001 + description: "Exa API call cost" + - value: 0.0 + description: "Free extraction (cached/local)" + + request_id: + description: | + Unique request ID from the extraction service (for tracing). + slot_uri: dct:identifier + range: string + examples: + - value: "exa_12345678-abcd-efgh-ijkl-mnopqrstuv" + description: "Exa API request ID" + + comments: + - "Every person entity file MUST have extraction_metadata" + - "See AGENTS.md Rule 20 for required fields" + - "extraction_agent should be 'claude-opus-4.5' for manual extraction" + - "cost_usd enables budget tracking for API-heavy extractions" + + see_also: + - "https://www.w3.org/TR/prov-o/" + - "https://docs.exa.ai/" + +enums: + ExtractionMethodEnum: + description: | + Enumeration of extraction methods/tools used for person data extraction. + permissible_values: + exa_crawling_exa: + description: "Exa AI crawling API - primary LinkedIn extraction" + meaning: schema:SoftwareApplication + exa_contents: + description: "Exa contents endpoint - cached content retrieval" + meaning: schema:SoftwareApplication + exa_crawling_glm46: + description: "Exa crawling with GLM 4.6 processing" + meaning: schema:SoftwareApplication + linkedin_html_parser: + description: "Local HTML parsing of saved LinkedIn pages" + meaning: schema:SoftwareApplication + manual: + description: "Manual data entry by human operator" + meaning: prov:Person + firecrawl: + description: "Firecrawl web scraping service" + meaning: schema:SoftwareApplication + playwright: + description: "Playwright browser automation" + meaning: schema:SoftwareApplication + web_archive: + description: "Internet Archive Wayback Machine" + meaning: schema:SoftwareApplication + institutional_website: + description: "Direct scraping from institutional website" + meaning: schema:SoftwareApplication + +slots: + source_file: + description: "Path to the source file from which data was derived" + range: string + + staff_id: + description: "Unique identifier for staff member within source organization" + range: string + + extraction_date: + description: "ISO 8601 timestamp when extraction was performed" + range: datetime + + extraction_method: + description: "Method/tool used to extract the data" + range: ExtractionMethodEnum + + extraction_agent: + description: "AI agent or software that performed extraction" + range: string + + linkedin_url: + description: "LinkedIn profile URL that was extracted" + range: uri + + cost_usd: + description: "API cost in USD for the extraction operation" + range: float + + request_id: + description: "Unique request ID from extraction service" + range: string diff --git a/schemas/20251121/linkml/modules/classes/HeritageRelevance.yaml b/schemas/20251121/linkml/modules/classes/HeritageRelevance.yaml new file mode 100644 index 0000000000..ef99cc9c96 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/HeritageRelevance.yaml @@ -0,0 +1,168 @@ +# Heritage Relevance Class +# Classification of person's relevance to heritage sectors + +id: https://nde.nl/ontology/hc/class/HeritageRelevance +name: heritage_relevance_class +title: Heritage Relevance Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + skos: http://www.w3.org/2004/02/skos/core# + +imports: + - linkml:types + - ../metadata + - ../enums/HeritageTypeEnum + +default_range: string + +classes: + + HeritageRelevance: + class_uri: hc:HeritageRelevance + description: | + Classification of a person's professional relevance to heritage sectors. + + Captures whether a person works in or is connected to heritage institutions, + which heritage types they relate to, and the reasoning for classification. + + **Use Cases**: + - LinkedIn profile heritage sector classification + - Staff affiliation analysis + - Heritage professional network mapping + + **Example JSON Structure**: + ```json + { + "heritage_relevance": { + "is_heritage_relevant": true, + "heritage_types": ["E"], + "rationale": "Education sector professional at government education inspectorate" + } + } + ``` + + **Scoring Guidelines** (per AGENTS.md Rule 30): + - 0.90-0.95: Senior heritage role, clear title, named institution + - 0.75-0.85: Mid-level role, good institutional context + - 0.60-0.70: Entry-level/support role, technical role + - 0.50-0.55: Intern, unclear relationship + + close_mappings: + - skos:Concept + - schema:DefinedTerm + + slots: + - is_heritage_relevant + - heritage_types + - heritage_relevance_score + - rationale + - primary_heritage_type + + slot_usage: + is_heritage_relevant: + description: | + Whether this person is professionally relevant to heritage sectors. + True if they work at, consult for, or research heritage institutions. + slot_uri: hc:heritageRelevant + range: boolean + required: true + examples: + - value: true + description: "Works at a museum, archive, library, etc." + - value: false + description: "No apparent heritage sector connection" + + heritage_types: + description: | + Single-letter heritage sector codes applicable to this person. + Uses HeritageTypeEnum values (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T). + Multiple types possible for cross-domain professionals. + slot_uri: hc:heritageType + range: HeritageTypeEnum + multivalued: true + examples: + - value: ["M"] + description: "Museum sector professional" + - value: ["A", "D"] + description: "Archive + digital heritage (cross-domain)" + - value: ["E"] + description: "Education sector" + + heritage_relevance_score: + description: | + Confidence score for heritage sector classification (0.50-0.95). + NOT the same as heritage_sector_relevance.score (domain expertise). + + This measures EXTRACTION QUALITY/CONFIDENCE, not domain expertise. + + **Scoring Rubric**: + - 0.90-0.95: Senior role, clear title, named heritage institution + - 0.75-0.85: Mid-level role, good institutional context + - 0.60-0.70: Entry-level, technical role, limited details + - 0.50-0.55: Intern, unclear relationship, abbreviated name + slot_uri: hc:confidenceScore + range: float + minimum_value: 0.0 + maximum_value: 1.0 + examples: + - value: 0.85 + description: "High confidence - clear heritage role" + - value: 0.55 + description: "Low confidence - intern position" + + rationale: + description: | + Human-readable explanation of the heritage classification. + Documents why the person was classified with specific types/score. + slot_uri: skos:note + range: string + required: true + examples: + - value: "Education sector professional at government education inspectorate" + - value: "Senior curator at Rijksmuseum with 15+ years experience" + - value: "Digital archivist specializing in AV heritage preservation" + + primary_heritage_type: + description: | + Primary heritage type if multiple apply. + The single most relevant type for this person's current role. + slot_uri: hc:primaryHeritageType + range: HeritageTypeEnum + examples: + - value: "A" + description: "Primary focus is archival work" + + comments: + - "Every person entity profile should have heritage_relevance" + - "See AGENTS.md Rule 30 for scoring guidelines" + - "is_heritage_relevant=false for non-heritage professionals" + - "Multiple heritage_types indicate cross-domain expertise" + + see_also: + - "https://nde.nl/ontology/hc/enum/HeritageTypeEnum" + +slots: + is_heritage_relevant: + description: "Whether person is professionally relevant to heritage sectors" + range: boolean + + heritage_types: + description: "Single-letter heritage sector codes applicable to person" + range: HeritageTypeEnum + multivalued: true + + heritage_relevance_score: + description: "Confidence score for heritage sector classification" + range: float + + rationale: + description: "Explanation of the heritage classification" + range: string + + primary_heritage_type: + description: "Primary heritage type if multiple apply" + range: HeritageTypeEnum diff --git a/schemas/20251121/linkml/modules/classes/LanguageProficiency.yaml b/schemas/20251121/linkml/modules/classes/LanguageProficiency.yaml new file mode 100644 index 0000000000..b610b0a5b4 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/LanguageProficiency.yaml @@ -0,0 +1,164 @@ +# Language Proficiency Class +# Language skills with proficiency level + +id: https://nde.nl/ontology/hc/class/LanguageProficiency +name: language_proficiency_class +title: Language Proficiency Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + dct: http://purl.org/dc/terms/ + +imports: + - linkml:types + - ../metadata + +default_range: string + +classes: + + LanguageProficiency: + class_uri: schema:knowsLanguage + description: | + A language skill with proficiency level. + + Models language abilities as extracted from LinkedIn profiles, + with both raw string and parsed components. + + **Schema.org Alignment**: + - Represents schema:knowsLanguage relation + - Language is schema:Language + + **Use Cases**: + - LinkedIn profile language sections + - Multilingual staff identification + - Heritage institution language capabilities + + **Example JSON Values**: + ```json + [ + "English - Native or bilingual", + "Dutch - Native or bilingual", + "French - Professional working proficiency" + ] + ``` + + **LinkedIn Proficiency Levels**: + - Native or bilingual proficiency + - Full professional proficiency + - Professional working proficiency + - Limited working proficiency + - Elementary proficiency + + exact_mappings: + - schema:knowsLanguage + close_mappings: + - dct:language + + slots: + - language_raw + - language_name + - language_code + - proficiency_level + + slot_usage: + language_raw: + description: | + Raw language string as extracted from LinkedIn. + Format: "Language - Proficiency level" + Preserved for provenance. + slot_uri: schema:description + range: string + examples: + - value: "English - Native or bilingual" + description: "Native English speaker" + - value: "Dutch - Professional working proficiency" + description: "Professional Dutch" + + language_name: + description: | + Language name (parsed from raw string). + slot_uri: schema:name + range: string + examples: + - value: "English" + - value: "Dutch" + - value: "French" + + language_code: + description: | + ISO 639-1 two-letter language code. + Derived from language_name lookup. + slot_uri: dct:language + range: string + pattern: "^[a-z]{2}$" + examples: + - value: "en" + description: "English" + - value: "nl" + description: "Dutch" + - value: "fr" + description: "French" + + proficiency_level: + description: | + Proficiency level as parsed from LinkedIn. + Uses LanguageProficiencyEnum values. + slot_uri: schema:proficiencyLevel + range: LanguageProficiencyEnum + examples: + - value: "NATIVE_BILINGUAL" + description: "Native or bilingual proficiency" + + comments: + - "Inlined in LinkedInProfile.languages[] as multivalued list" + - "Preserves raw LinkedIn format for provenance" + - "ISO 639-1 codes enable language-based filtering" + + see_also: + - "https://schema.org/knowsLanguage" + - "https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes" + +enums: + LanguageProficiencyEnum: + description: | + LinkedIn language proficiency levels. + Based on LinkedIn's standard proficiency descriptions. + permissible_values: + NATIVE_BILINGUAL: + description: "Native or bilingual proficiency" + meaning: schema:Expert + FULL_PROFESSIONAL: + description: "Full professional proficiency" + meaning: schema:Advanced + PROFESSIONAL_WORKING: + description: "Professional working proficiency" + meaning: schema:Intermediate + LIMITED_WORKING: + description: "Limited working proficiency" + meaning: schema:Intermediate + ELEMENTARY: + description: "Elementary proficiency" + meaning: schema:Beginner + UNKNOWN: + description: "Proficiency level not specified" + +slots: + language_raw: + description: "Raw language string as extracted" + range: string + + language_name: + description: "Language name" + range: string + + language_code: + description: "ISO 639-1 language code" + range: string + + proficiency_level: + description: "Proficiency level" + range: LanguageProficiencyEnum diff --git a/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml b/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml new file mode 100644 index 0000000000..21ab5c49a5 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/LinkedInProfile.yaml @@ -0,0 +1,612 @@ +# LinkedIn Profile Class +# Complete LinkedIn profile extraction for person entity files + +id: https://nde.nl/ontology/hc/class/LinkedInProfile +name: linkedin_profile_class +title: LinkedIn Profile Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + foaf: http://xmlns.com/foaf/0.1/ + prov: http://www.w3.org/ns/prov# + dct: http://purl.org/dc/terms/ + +imports: + - linkml:types + - ../metadata + - ./ExtractionMetadata + - ./WorkExperience + - ./EducationCredential + - ./HeritageRelevance + - ./LanguageProficiency + +default_range: string + +classes: + + LinkedInProfile: + class_uri: schema:ProfilePage + description: | + Complete LinkedIn profile extraction for a person. + + Models the content of person entity JSON files stored at + `data/custodian/person/entity/*.json`. This is the root class + for LinkedIn profile data extracted via Exa API or HTML parsing. + + **Relationship to PersonObservation**: + - PersonObservation.linkedin_profile_path references the file containing + this LinkedInProfile data + - PersonObservation.linkedin_profile_url links to the source URL + - This class models the CONTENT of that file + + **Relationship to SocialMediaProfile**: + - SocialMediaProfile (in same schema) is for CUSTODIAN social media accounts + (e.g., Rijksmuseum's Instagram, Nationaal Archief's Twitter) + - LinkedInProfile is for PERSON LinkedIn profiles (staff members) + - These are complementary, not overlapping classes + + **Data Flow**: + ``` + LinkedIn URL → Exa API → JSON file → LinkedInProfile (this class) + ↑ + PersonObservation.linkedin_profile_path references this file + ``` + + **Example JSON Structure**: + ```json + { + "extraction_metadata": { + "source_file": "...", + "staff_id": "org_staff_0001_name", + "extraction_date": "2025-12-12T22:00:00Z", + "extraction_method": "exa_crawling_exa", + "extraction_agent": "claude-opus-4.5", + "linkedin_url": "https://www.linkedin.com/in/...", + "cost_usd": 0.001 + }, + "profile_data": { + "name": "Sander Hulleman", + "headline": "Stafadviseur PO", + "location": "Arnhem, Gelderland, Netherlands", + "connections": "246 connections • 248 followers", + "about": "Third year student...", + "experience": [...], + "education": [...], + "skills": ["education"], + "languages": ["English - Native or bilingual"], + "profile_image_url": "https://media.licdn.com/..." + }, + "heritage_relevance": { + "is_heritage_relevant": true, + "heritage_types": ["E"], + "rationale": "Education sector professional..." + }, + "source_organization": "the-dutch-inspectorate-of-education", + "whatsapp_enrichment": {...} + } + ``` + + tree_root: true + + exact_mappings: + - schema:ProfilePage + close_mappings: + - foaf:PersonalProfileDocument + - schema:Person + + slots: + - extraction_metadata + - profile_data + - heritage_relevance + - source_organization + - whatsapp_enrichment + + slot_usage: + extraction_metadata: + description: | + Provenance metadata for the extraction activity. + Records how, when, and by what agent this profile was extracted. + See ExtractionMetadata class for field definitions. + range: ExtractionMetadata + required: true + inlined: true + + profile_data: + description: | + Core profile data extracted from LinkedIn. + Contains personal info, career history, education, skills, languages. + See LinkedInProfileData class for field definitions. + range: LinkedInProfileData + required: true + inlined: true + + heritage_relevance: + description: | + Classification of this person's relevance to heritage sectors. + See HeritageRelevance class for scoring guidelines. + range: HeritageRelevance + inlined: true + + source_organization: + description: | + Slug identifier of the organization from which this profile was discovered. + Matches the custodian slug used in staff list parsing. + Format: lowercase with hyphens (e.g., "rijksmuseum", "nationaal-archief") + slot_uri: prov:wasInfluencedBy + range: string + pattern: "^[a-z0-9-]+$" + examples: + - value: "the-dutch-inspectorate-of-education" + description: "Organization where person was discovered as staff" + - value: "rijksmuseum" + description: "Heritage institution employer" + + whatsapp_enrichment: + description: | + Optional WhatsApp business likelihood enrichment. + Added by enrichment scripts to assess digital communication capabilities. + range: WhatsAppEnrichment + inlined: true + + comments: + - "This is the root class for person entity JSON files" + - "PersonObservation.linkedin_profile_path references files containing this data" + - "See AGENTS.md Rule 20 for person entity file requirements" + - "See AGENTS.md Rule 27 for person-custodian data architecture" + + see_also: + - "https://schema.org/ProfilePage" + - "https://nde.nl/ontology/hc/class/PersonObservation" + - "https://nde.nl/ontology/hc/class/SocialMediaProfile" + + LinkedInProfileData: + class_uri: schema:Person + description: | + Core profile data extracted from a LinkedIn profile. + + Contains the person's professional information including: + - Basic info (name, headline, location, connections) + - About/summary text + - Career history (experience array) + - Education history (education array) + - Skills and languages + - Profile image URL + + **Note on Data Representation**: + - Raw strings are preserved for provenance (e.g., connections text) + - Nested objects use defined classes (WorkExperience, EducationCredential) + - Skills are simple strings (not structured objects) + - Languages may be raw strings or LanguageProficiency objects + + exact_mappings: + - schema:Person + close_mappings: + - foaf:Person + + slots: + - profile_name + - profile_linkedin_url + - headline + - profile_location + - connections_text + - about_text + - experience + - education + - skills + - languages_raw + - languages + - profile_image_url + + slot_usage: + profile_name: + description: | + Full name of the person as displayed on LinkedIn. + slot_uri: schema:name + range: string + required: true + examples: + - value: "Sander Hulleman" + - value: "Jan van der Berg" + + profile_linkedin_url: + description: | + LinkedIn profile URL for this person. + Duplicated from extraction_metadata for convenience. + slot_uri: schema:url + range: uri + pattern: "^https://www\\.linkedin\\.com/in/[a-z0-9-]+/?$" + examples: + - value: "https://www.linkedin.com/in/sander-hulleman-5017b9105" + + headline: + description: | + Professional headline/tagline from LinkedIn. + Typically includes current job title and/or professional identity. + slot_uri: schema:jobTitle + range: string + examples: + - value: "Stafadviseur PO" + description: "Dutch job title" + - value: "Senior Curator | Rijksmuseum" + description: "Title with organization" + - value: "Digital Archivist | Heritage Data Specialist" + description: "Multiple roles" + + profile_location: + description: | + Location as displayed on LinkedIn profile. + Format varies: "City, Region, Country" or "City, Country" + slot_uri: schema:homeLocation + range: string + examples: + - value: "Arnhem, Gelderland, Netherlands" + - value: "Amsterdam, Netherlands" + + connections_text: + description: | + Raw connections/followers text from LinkedIn. + Format: "X connections • Y followers" + Preserved as-is for provenance. + slot_uri: schema:description + range: string + examples: + - value: "246 connections • 248 followers" + - value: "500+ connections" + + about_text: + description: | + About/summary section text from LinkedIn profile. + May be absent if person hasn't written a summary. + slot_uri: schema:description + range: string + examples: + - value: "Third year student at Stenden University..." + + experience: + description: | + Work experience entries from LinkedIn. + Array of WorkExperience objects with job title, company, dates, location. + range: WorkExperience + multivalued: true + inlined_as_list: true + + education: + description: | + Education entries from LinkedIn. + Array of EducationCredential objects with school, degree, years. + range: EducationCredential + multivalued: true + inlined_as_list: true + + skills: + description: | + Skills listed on LinkedIn profile. + Simple string array (not structured objects). + slot_uri: schema:knowsAbout + range: string + multivalued: true + examples: + - value: ["education", "teaching", "curriculum development"] + + languages_raw: + description: | + Raw language strings as extracted from LinkedIn. + Format: "Language - Proficiency level" + Use this when storing unprocessed data. + range: string + multivalued: true + examples: + - value: ["English - Native or bilingual", "Dutch - Native or bilingual"] + + languages: + description: | + Parsed language proficiency entries. + Array of LanguageProficiency objects with language name, code, level. + Use this when storing processed/structured data. + range: LanguageProficiency + multivalued: true + inlined_as_list: true + + profile_image_url: + description: | + URL to the LinkedIn profile photo. + Should be the actual CDN URL (media.licdn.com), not overlay page. + See AGENTS.md Rule 16 for photo URL requirements. + slot_uri: schema:image + range: uri + pattern: "^https://media\\.licdn\\.com/.*$" + examples: + - value: "https://media.licdn.com/dms/image/v2/C4E03AQHoGyR6G0kphA/profile-displayphoto-shrink_200_200/..." + + comments: + - "Inlined within LinkedInProfile as profile_data" + - "experience and education use inlined_as_list for JSON array representation" + - "languages_raw preserves original strings; languages has parsed objects" + - "profile_image_url must be CDN URL per AGENTS.md Rule 16" + + WhatsAppEnrichment: + class_uri: hc:WhatsAppEnrichment + description: | + WhatsApp business likelihood enrichment data. + + Added by enrichment scripts to assess whether a person is likely + to use WhatsApp for professional/business communication. + + **Assessment Factors**: + - Digital technology indicators in profile + - Role type (customer-facing, technical, etc.) + - Industry/sector norms + - Geographic region (WhatsApp prevalence varies) + + slots: + - digital_professional + - whatsapp_business_likelihood + - enrichment_metadata_whatsapp + + slot_usage: + digital_professional: + description: | + Assessment of digital/technology proficiency. + range: DigitalProfessionalAssessment + inlined: true + + whatsapp_business_likelihood: + description: | + Likelihood score for WhatsApp business usage. + range: WhatsAppLikelihood + inlined: true + + enrichment_metadata_whatsapp: + description: | + Metadata about the enrichment process. + range: WhatsAppEnrichmentMetadata + inlined: true + + DigitalProfessionalAssessment: + class_uri: hc:DigitalProfessionalAssessment + description: | + Assessment of a person's digital/technology proficiency. + slots: + - likely_whatsapp_proficient + - digital_indicators + - digital_confidence + + slot_usage: + likely_whatsapp_proficient: + description: "Whether person is likely proficient with WhatsApp" + range: boolean + + digital_indicators: + description: "Indicators of digital proficiency from profile" + range: string + multivalued: true + + digital_confidence: + description: "Confidence level: low, medium, high" + range: string + + WhatsAppLikelihood: + class_uri: hc:WhatsAppLikelihood + description: | + Likelihood score for WhatsApp business usage. + slots: + - likelihood_score + - max_likelihood_score + - likelihood_level + - likelihood_confidence + - likelihood_factors + - assessment_date + + slot_usage: + likelihood_score: + description: "Numeric score (0-100)" + range: integer + minimum_value: 0 + maximum_value: 100 + + max_likelihood_score: + description: "Maximum possible score (typically 100)" + range: integer + + likelihood_level: + description: "Categorical level: low, medium, high" + range: string + + likelihood_confidence: + description: "Confidence in the assessment (0.0-1.0)" + range: float + minimum_value: 0.0 + maximum_value: 1.0 + + likelihood_factors: + description: "Factors contributing to the score" + range: string + multivalued: true + + assessment_date: + description: "When the assessment was performed (ISO 8601)" + range: datetime + + WhatsAppEnrichmentMetadata: + class_uri: hc:WhatsAppEnrichmentMetadata + description: | + Metadata about the WhatsApp enrichment process. + slots: + - enriched_date + - enrichment_method_whatsapp + - data_source_whatsapp + - no_fabrication + - all_data_real + + slot_usage: + enriched_date: + description: "When enrichment was performed (ISO 8601)" + range: datetime + + enrichment_method_whatsapp: + description: "Method used for enrichment" + range: string + examples: + - value: "linkedin_profile_analysis" + + data_source_whatsapp: + description: "Source of data for enrichment" + range: string + examples: + - value: "public_linkedin_profile" + + no_fabrication: + description: "Confirms no data was fabricated" + range: boolean + + all_data_real: + description: "Confirms all data is from real sources" + range: boolean + +# Top-level slot definitions +slots: + extraction_metadata: + description: "Provenance metadata for the extraction activity" + range: ExtractionMetadata + + profile_data: + description: "Core profile data from LinkedIn" + range: LinkedInProfileData + + heritage_relevance: + description: "Heritage sector classification" + range: HeritageRelevance + + source_organization: + description: "Organization slug where person was discovered" + range: string + + whatsapp_enrichment: + description: "WhatsApp business likelihood enrichment" + range: WhatsAppEnrichment + + profile_name: + description: "Full name of the person" + range: string + + profile_linkedin_url: + description: "LinkedIn profile URL" + range: uri + + headline: + description: "Professional headline/tagline" + range: string + + profile_location: + description: "Location as displayed on profile" + range: string + + connections_text: + description: "Raw connections/followers text" + range: string + + about_text: + description: "About/summary section text" + range: string + + experience: + description: "Work experience entries" + range: WorkExperience + multivalued: true + + education: + description: "Education entries" + range: EducationCredential + multivalued: true + + skills: + description: "Skills listed on profile" + range: string + multivalued: true + + languages_raw: + description: "Raw language strings" + range: string + multivalued: true + + languages: + description: "Parsed language proficiency entries" + range: LanguageProficiency + multivalued: true + + profile_image_url: + description: "Profile photo URL" + range: uri + + digital_professional: + description: "Digital proficiency assessment" + range: DigitalProfessionalAssessment + + whatsapp_business_likelihood: + description: "WhatsApp business usage likelihood" + range: WhatsAppLikelihood + + enrichment_metadata_whatsapp: + description: "WhatsApp enrichment metadata" + range: WhatsAppEnrichmentMetadata + + likely_whatsapp_proficient: + description: "Whether person is likely WhatsApp proficient" + range: boolean + + digital_indicators: + description: "Indicators of digital proficiency" + range: string + multivalued: true + + digital_confidence: + description: "Digital proficiency confidence level" + range: string + + likelihood_score: + description: "Numeric likelihood score" + range: integer + + max_likelihood_score: + description: "Maximum possible score" + range: integer + + likelihood_level: + description: "Categorical likelihood level" + range: string + + likelihood_confidence: + description: "Confidence in the assessment" + range: float + + likelihood_factors: + description: "Factors contributing to score" + range: string + multivalued: true + + assessment_date: + description: "When assessment was performed" + range: datetime + + enriched_date: + description: "When enrichment was performed" + range: datetime + + enrichment_method_whatsapp: + description: "Method used for enrichment" + range: string + + data_source_whatsapp: + description: "Data source for enrichment" + range: string + + no_fabrication: + description: "Confirms no data was fabricated" + range: boolean + + all_data_real: + description: "Confirms all data is real" + range: boolean diff --git a/schemas/20251121/linkml/modules/classes/PersonConnection.yaml b/schemas/20251121/linkml/modules/classes/PersonConnection.yaml new file mode 100644 index 0000000000..6d8da93444 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/PersonConnection.yaml @@ -0,0 +1,319 @@ +# Person Connection Class +# Single network connection entry from LinkedIn connection lists + +id: https://nde.nl/ontology/hc/class/PersonConnection +name: person_connection_class +title: Person Connection Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + foaf: http://xmlns.com/foaf/0.1/ + dct: http://purl.org/dc/terms/ + +imports: + - linkml:types + - ../metadata + - ../enums/HeritageTypeEnum + +default_range: string + +classes: + + PersonConnection: + class_uri: schema:Person + description: | + A single connection entry from a person's LinkedIn network. + + Represents one person in another person's connection list, including + their relationship degree, professional headline, and heritage sector + classification. + + **Relationship to ConnectionNetwork**: + - ConnectionNetwork contains an array of PersonConnection entries + - Each PersonConnection represents one network connection + - connection_id links back to the target profile + + **Connection Degrees**: + - 1st: Direct connection (mutual connection) + - 2nd: Connected through one mutual connection + - 3rd+: Connected through two or more people + + **Name Types** (per AGENTS.md Rule 17): + - full: Complete first and last name visible + - abbreviated: Contains single initial (e.g., "Amy B.", "S. Buse") + - anonymous: Privacy-hidden profile ("LinkedIn Member") + + **Example JSON Structure**: + ```json + { + "connection_id": "giovannafossati_conn_0000_maddalena_ghiotto", + "name": "Maddalena Ghiotto", + "name_type": "full", + "degree": "2nd", + "headline": "Archiving nerd in the digital era.", + "location": "Utrecht, Netherlands", + "organization": "National Archive", + "heritage_relevant": true, + "heritage_type": "D" + } + ``` + + exact_mappings: + - schema:Person + close_mappings: + - foaf:Person + related_mappings: + - schema:knows + + slots: + - connection_id + - connection_name + - name_type + - connection_degree + - connection_headline + - connection_location + - connection_organization + - connection_heritage_relevant + - connection_heritage_type + - connection_linkedin_url + - mutual_connections_text + + slot_usage: + connection_id: + description: | + Unique identifier for this connection entry. + Format: {target_slug}_conn_{index:04d}_{name_slug} + + The connection_id enables: + - Deduplication across connection lists + - Linking to full profile if extracted later + - Tracking same person across multiple target profiles + slot_uri: dct:identifier + range: string + required: true + pattern: "^[a-z0-9-]+_conn_[0-9]{4}_[a-z0-9_]+$" + examples: + - value: "giovannafossati_conn_0042_amy_b" + description: "Connection #42 from Giovanna Fossati's network" + - value: "rijksmuseum_staff_0001_jan_van_berg" + description: "Staff member from Rijksmuseum" + + connection_name: + description: | + Name of the connected person as displayed on LinkedIn. + May be abbreviated ("Amy B.") or anonymous ("LinkedIn Member") + depending on privacy settings. + slot_uri: schema:name + range: string + required: true + examples: + - value: "Maddalena Ghiotto" + description: "Full name visible" + - value: "Amy B." + description: "Abbreviated last name" + - value: "LinkedIn Member" + description: "Anonymous/privacy-protected" + + name_type: + description: | + Classification of the name visibility level. + + Per AGENTS.md Rule 17: + - full: Complete first and last name + - abbreviated: Contains single initial (e.g., "Amy B.") + - anonymous: Privacy-hidden profile ("LinkedIn Member") + slot_uri: hc:nameType + range: NameTypeEnum + required: true + examples: + - value: "full" + description: "Complete name visible" + - value: "abbreviated" + description: "Partial name (privacy setting)" + + connection_degree: + description: | + LinkedIn connection degree relative to the viewer. + + **Important**: The degree is relative to the VIEWER (person conducting + the search), NOT the target profile being analyzed. See AGENTS.md Rule 17. + + Values: + - 1st: Direct mutual connection + - 2nd: One person between viewer and connection + - 3rd+: Two or more people between + slot_uri: hc:connectionDegree + range: ConnectionDegreeEnum + required: true + examples: + - value: "2nd" + description: "Second-degree connection" + + connection_headline: + description: | + Professional headline/tagline from the connection's profile. + Contains current job title and/or professional identity. + slot_uri: schema:jobTitle + range: string + examples: + - value: "Archiving nerd in the digital era." + - value: "Senior Curator at Rijksmuseum" + - value: "PhD candidate Critical audiovisual heritage" + + connection_location: + description: | + Location as displayed on the connection's profile. + Format varies: "City, Region, Country" or "Country" only. + slot_uri: schema:homeLocation + range: string + examples: + - value: "Utrecht, Utrecht, Netherlands" + - value: "Netherlands" + - value: "Amsterdam, North Holland, Netherlands" + + connection_organization: + description: | + Primary organization extracted from headline (when identifiable). + May be absent if headline doesn't clearly indicate organization. + slot_uri: schema:memberOf + range: string + examples: + - value: "Vrije Universiteit Amsterdam" + - value: "Digital Infrastructure department of the KNAW Humanities Cluster" + + connection_heritage_relevant: + description: | + Whether this connection is professionally relevant to heritage sectors. + Determined by analyzing headline for heritage-related keywords. + slot_uri: hc:heritageRelevant + range: boolean + required: true + examples: + - value: true + description: "Works in museum, archive, library, etc." + - value: false + description: "No apparent heritage sector connection" + + connection_heritage_type: + description: | + Single-letter heritage type code if heritage_relevant is true. + Uses GLAMORCUBESFIXPHDNT taxonomy (G,L,A,M,O,R,C,U,B,E,S,F,I,X,P,H,D,N,T). + slot_uri: hc:heritageType + range: HeritageTypeEnum + examples: + - value: "A" + description: "Archive sector" + - value: "M" + description: "Museum sector" + - value: "D" + description: "Digital heritage" + + connection_linkedin_url: + description: | + LinkedIn profile URL for this connection (if extractable). + May be absent for privacy-restricted or abbreviated name profiles. + slot_uri: schema:url + range: uri + pattern: "^https://www\\.linkedin\\.com/in/[a-z0-9-]+/?$" + examples: + - value: "https://www.linkedin.com/in/maddalena-ghiotto-12345" + + mutual_connections_text: + description: | + Raw mutual connections text from LinkedIn. + Format: "X mutual connections" or "Name and X others" + Preserved for network analysis. + slot_uri: schema:description + range: string + examples: + - value: "Thomas van Maaren, Bob Coret, and 4 other mutual connections" + - value: "12 mutual connections" + + comments: + - "Inlined in ConnectionNetwork.connections[] as multivalued list" + - "connection_id enables deduplication across multiple connection lists" + - "name_type classification per AGENTS.md Rule 17" + - "connection_degree is relative to VIEWER, not target profile" + + see_also: + - "https://schema.org/Person" + - "https://schema.org/knows" + +enums: + NameTypeEnum: + description: | + Classification of name visibility level in LinkedIn data. + Per AGENTS.md Rule 17. + permissible_values: + full: + description: "Complete first and last name visible" + meaning: schema:Text + abbreviated: + description: "Contains single initial (e.g., 'Amy B.', 'S. Buse')" + meaning: schema:Text + anonymous: + description: "Privacy-hidden profile ('LinkedIn Member')" + meaning: schema:Text + + ConnectionDegreeEnum: + description: | + LinkedIn connection degree values. + Indicates network distance from the viewer. + permissible_values: + 1st: + description: "Direct mutual connection" + meaning: schema:knows + 2nd: + description: "Connected through one mutual connection" + meaning: schema:knows + 3rd+: + description: "Connected through two or more people" + meaning: schema:knows + +slots: + connection_id: + description: "Unique identifier for this connection entry" + range: string + + connection_name: + description: "Name of the connected person" + range: string + + name_type: + description: "Classification of name visibility level" + range: NameTypeEnum + + connection_degree: + description: "LinkedIn connection degree (1st, 2nd, 3rd+)" + range: ConnectionDegreeEnum + + connection_headline: + description: "Professional headline from connection's profile" + range: string + + connection_location: + description: "Location from connection's profile" + range: string + + connection_organization: + description: "Primary organization from headline" + range: string + + connection_heritage_relevant: + description: "Whether connection is heritage-sector relevant" + range: boolean + + connection_heritage_type: + description: "Heritage type code if heritage_relevant" + range: HeritageTypeEnum + + connection_linkedin_url: + description: "LinkedIn profile URL for connection" + range: uri + + mutual_connections_text: + description: "Raw mutual connections text" + range: string diff --git a/schemas/20251121/linkml/modules/classes/PersonName.yaml b/schemas/20251121/linkml/modules/classes/PersonName.yaml new file mode 100644 index 0000000000..6a46c63a28 --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/PersonName.yaml @@ -0,0 +1,283 @@ +id: https://nde.nl/ontology/hc/class/PersonName +name: PersonName +title: Person Name Class + +prefixes: + pnv: https://w3id.org/pnv# + pico: https://personsincontext.org/model# + sdo: https://schema.org/ + prov: http://www.w3.org/ns/prov# + skos: http://www.w3.org/2004/02/skos/core# + foaf: http://xmlns.com/foaf/0.1/ + crm: http://www.cidoc-crm.org/cidoc-crm/ + +imports: + - linkml:types + +classes: + PersonName: + class_uri: pnv:PersonName + description: | + Structured person name following the Person Name Vocabulary (PNV). + + PNV is a Dutch heritage standard for representing person names with their + constituent parts. It is designed for historical records where names may + be incomplete, spelled variably, or follow naming conventions different + from modern Western norms. + + =========================================================================== + PNV DESIGN PRINCIPLES (from https://w3id.org/pnv) + =========================================================================== + + 1. A name is a resource in its own right - not just a string + 2. Names have structure that can be decomposed into components + 3. Components may be missing, uncertain, or abbreviated + 4. Patronymics and surname prefixes are distinct name elements + 5. Unknown/unnamed persons are explicitly marked, not left blank + + =========================================================================== + REQUIRED vs OPTIONAL PROPERTIES + =========================================================================== + + - literal_name: REQUIRED unless name_specification is set + - name_specification: REQUIRED if literal_name is blank ("unknown" or "unnamed") + - All other properties: OPTIONAL + + =========================================================================== + DUTCH NAME EXAMPLES + =========================================================================== + + Example 1: "Pieter Corneliszoon van der Berg" + - literal_name: "Pieter Corneliszoon van der Berg" + - given_name: "Pieter" + - patronym: "Corneliszoon" (son of Cornelis) + - surname_prefix: "van der" + - base_surname: "Berg" + + Example 2: "H.A.F.M.O. (Hans) van Mierlo" + - literal_name: "H.A.F.M.O. (Hans) van Mierlo" + - initials: "H.A.F.M.O." + - given_name: "Hans" + - surname_prefix: "van" + - base_surname: "Mierlo" + + Example 3: "Maria de Vries" + - literal_name: "Maria de Vries" + - given_name: "Maria" + - surname_prefix: "de" + - base_surname: "Vries" (used for alphabetical sorting under V) + + Example 4: "Jan Janszoon" (historical record with patronymic only) + - literal_name: "Jan Janszoon" + - given_name: "Jan" + - patronym: "Janszoon" (son of Jan) + - base_surname: null (no family surname) + + Example 5: Unknown person in historical record + - literal_name: null + - name_specification: "unknown" + + Example 6: Unnamed child who died shortly after birth + - literal_name: null + - name_specification: "unnamed" + + =========================================================================== + INTERNATIONAL NAME PATTERNS + =========================================================================== + + Icelandic Patronymic: + - "Björk Guðmundsdóttir" → patronym: "Guðmundsdóttir" + + Spanish Compound Surname: + - "Pablo Ruiz Picasso" → given_name: "Pablo", base_surname: "Ruiz Picasso" + + Arabic Name with Nasab: + - "Muhammad ibn Abdullah" → given_name: "Muhammad", patronym: "ibn Abdullah" + + Indonesian Single Name: + - "Sukarno" → literal_name: "Sukarno", given_name: "Sukarno" + + =========================================================================== + SORTING BEHAVIOR + =========================================================================== + + The base_surname property exists specifically for sorting. Dutch names with + prefixes (van, de, van der, etc.) are sorted by the main surname element: + + - "Johannes de Vries" sorts under V (base_surname: "Vries") + - "Anna van den Berg" sorts under B (base_surname: "Berg") + + However, Belgian Dutch and some other conventions sort by prefix: + - "Jan De Smedt" (Belgian) sorts under D (use surname_prefix in sorting) + + =========================================================================== + ONTOLOGY ALIGNMENT + =========================================================================== + + PNV is the PRIMARY ontology for structured name representation. + + Additional mappings: + - CIDOC-CRM: crm:E41_Appellation (name as identifying entity) + - Schema.org: sdo:name (simple string fallback) + - FOAF: foaf:name (simple string fallback) + + exact_mappings: + - pnv:PersonName + close_mappings: + - crm:E41_Appellation + - foaf:name + - sdo:name + related_mappings: + - skos:prefLabel + - sdo:givenName + - sdo:familyName + + slots: + - literal_name + - given_name + - base_surname + - surname_prefix + - patronym + - initials + - name_specification + + slot_usage: + literal_name: + slot_uri: pnv:literalName + description: | + Full personal name as written. REQUIRED unless name_specification is set. + + This is the complete name string as it appears in the source material. + It may include given names, patronyms, surname prefixes, and surnames. + + Examples: + - "Pieter Corneliszoon van der Berg" + - "H.A.F.M.O. (Hans) van Mierlo" + - "Maria de Vries" + + Leave blank ONLY if: + - Person's name was unknown (set name_specification: "unknown") + - Person was unnamed (set name_specification: "unnamed") + range: string + required: false + + given_name: + slot_uri: pnv:givenName + description: | + The name(s) given to someone at birth or any other name-giving event. + + May include multiple given names, nicknames in parentheses, or call names. + + Examples: + - "Pieter" (single given name) + - "Hans" (from "H.A.F.M.O. (Hans)") + - "Jan Willem" (multiple given names) + - "Pietje" (diminutive/nickname) + range: string + + base_surname: + slot_uri: pnv:baseSurname + description: | + Family name WITHOUT prefixes, used for alphabetical sorting. + + Dutch naming convention sorts by the main surname element, ignoring + prefixes like "van", "de", "van der", etc. + + Examples: + - "de Vries" → base_surname: "Vries" (sorts under V) + - "van den Berg" → base_surname: "Berg" (sorts under B) + - "van der Waals" → base_surname: "Waals" (sorts under W) + + Equivalent to BioDes:geslachtsnaam in Dutch heritage standards. + range: string + + surname_prefix: + slot_uri: pnv:surnamePrefix + description: | + Prefix before the surname (tussenvoegsel in Dutch). + + Common Dutch prefixes: van, de, het, ter, ten, van de, van der, van den, + in 't, op den, etc. + + German: von, zu, von und zu + French: de, du, de la, des + + Examples: + - "van der Berg" → surname_prefix: "van der" + - "de Vries" → surname_prefix: "de" + - "in 't Veld" → surname_prefix: "in 't" + + Equivalent to A2A:PersonNamePrefixLastName. + range: string + + patronym: + slot_uri: pnv:patronym + description: | + Name element based on the given name of one's father (patronym) + or mother (matronym). + + Common patterns: + - Dutch: -zoon, -szoon, -z., -sen, -dochter (e.g., "Janszoon", "Pietersdochter") + - Icelandic: -son, -dóttir (e.g., "Jónsson", "Guðmundsdóttir") + - Arabic: ibn, bin, bint (e.g., "ibn Abdullah", "bint Fatima") + - Hebrew: ben, bat (e.g., "ben David") + - Slavic: -ovich, -ovna (e.g., "Ivanovich", "Petrovna") + + Examples: + - "Jan Pieterszoon" → patronym: "Pieterszoon" + - "Björk Guðmundsdóttir" → patronym: "Guðmundsdóttir" + - "Muhammad ibn Abdullah" → patronym: "ibn Abdullah" + range: string + + initials: + slot_uri: pnv:initials + description: | + Initial letter(s) of given name(s), each followed by a period. + + Used when only initials are known, or to record initials alongside + the full given name. + + Format: Each initial followed by period, no spaces between. + + Examples: + - "P.R." (for "Peter R.") + - "H.A.F.M.O." (for Hans van Mierlo's full initials) + - "C.Joh." (unusual abbreviation format) + - "J." (single initial) + + If only initials are known (not full given name), use this property. + The given_name property may also contain the initials as a fallback. + range: string + pattern: "^[A-Z][a-z]*\\.([A-Z][a-z]*\\.)*$" + + name_specification: + slot_uri: pnv:nameSpecification + description: | + Indicates why literal_name is blank: "unknown" or "unnamed". + + REQUIRED when literal_name is empty. Prevents ambiguity between + missing data and deliberately unnamed/unknown persons. + + Values: + - "unknown": Person existed but name was not recorded or is illegible + - "unnamed": Person was never given a name (e.g., infant who died at birth) + + Examples: + - Historical record mentions "a servant" without name → "unknown" + - Baptism record for stillborn child → "unnamed" + - Illegible signature on document → "unknown" + range: string + pattern: "^(unknown|unnamed)$" + + rules: + - postconditions: + slot_conditions: + literal_name: + required: true + preconditions: + slot_conditions: + name_specification: + none_of: + - equals_string: "unknown" + - equals_string: "unnamed" + description: "literal_name is REQUIRED unless name_specification is set" diff --git a/schemas/20251121/linkml/modules/classes/WorkExperience.yaml b/schemas/20251121/linkml/modules/classes/WorkExperience.yaml new file mode 100644 index 0000000000..6ecb7ffe9d --- /dev/null +++ b/schemas/20251121/linkml/modules/classes/WorkExperience.yaml @@ -0,0 +1,245 @@ +# Work Experience Class +# Career history entries with temporal information + +id: https://nde.nl/ontology/hc/class/WorkExperience +name: work_experience_class +title: Work Experience Class +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + org: http://www.w3.org/ns/org# + prov: http://www.w3.org/ns/prov# + crm: http://www.cidoc-crm.org/cidoc-crm/ + xsd: http://www.w3.org/2001/XMLSchema# + +imports: + - linkml:types + - ../metadata + - TimeSpan + +default_range: string + +classes: + + WorkExperience: + class_uri: schema:OrganizationRole + description: | + A single work experience entry from a person's career history. + + Models employment positions with job title, employer, dates, and location. + Uses simplified date representation (not full TimeSpan) since LinkedIn + data typically has precise month/year values. + + **Schema.org Alignment**: + - Represents a schema:OrganizationRole (role at an organization) + - Related to org:Membership (W3C Organization Ontology) + + **Use Cases**: + - LinkedIn profile experience entries + - CV/resume work history + - Staff affiliation tracking + + **Example JSON Structure**: + ```json + { + "title": "Staff Adviseur", + "company": "The Dutch Inspectorate of Education", + "dates": "Apr 2025 - Present", + "location": "Utrecht, Netherlands" + } + ``` + + **Heritage Relevance**: + - Each work experience can be tagged for heritage sector relevance + - Links to CustodianName if employer is a known heritage custodian + + exact_mappings: + - schema:OrganizationRole + close_mappings: + - org:Membership + - crm:E7_Activity + related_mappings: + - schema:EmployeeRole + - prov:Association + + slots: + - job_title + - employer_name + - employer_linkedin_url + - employment_dates_raw + - employment_start_date + - employment_end_date + - is_current_position + - work_location + - job_description + - heritage_employer + + slot_usage: + job_title: + description: | + The job title or role held at this position. + Schema.org: schema:roleName + slot_uri: schema:roleName + range: string + required: true + examples: + - value: "Staff Adviseur" + description: "Dutch job title" + - value: "Senior Curator, Asian Art" + description: "Curatorial role with specialization" + + employer_name: + description: | + Name of the employing organization. + Schema.org: schema:name of the schema:Organization + slot_uri: schema:memberOf + range: string + required: true + examples: + - value: "The Dutch Inspectorate of Education" + description: "Full organization name" + - value: "Rijksmuseum" + description: "Heritage institution employer" + + employer_linkedin_url: + description: | + LinkedIn company page URL for the employer. + slot_uri: schema:url + range: uri + pattern: "^https://www\\.linkedin\\.com/company/[a-z0-9-]+/?$" + examples: + - value: "https://www.linkedin.com/company/rijksmuseum" + + employment_dates_raw: + description: | + Raw date string as extracted from LinkedIn. + Format varies: "Apr 2025 - Present", "2020 - 2023", etc. + Preserved for provenance; use parsed fields for queries. + slot_uri: schema:description + range: string + examples: + - value: "Apr 2025 - Present" + description: "Current position with start month" + - value: "Aug 2017 - Apr 2025" + description: "Completed position with month precision" + - value: "2015 - 2020" + description: "Year-only precision" + + employment_start_date: + description: | + Parsed start date of employment (ISO 8601). + May be year-only (YYYY) or month-precision (YYYY-MM). + slot_uri: schema:startDate + range: date + examples: + - value: "2025-04" + description: "April 2025 start" + - value: "2017" + description: "Year-only precision" + + employment_end_date: + description: | + Parsed end date of employment (ISO 8601). + Null/absent indicates current position. + slot_uri: schema:endDate + range: date + examples: + - value: "2025-04" + description: "Ended April 2025" + - value: null + description: "Current position (ongoing)" + + is_current_position: + description: | + Whether this is the person's current position. + True if employment_dates_raw contains "Present" or end_date is null. + slot_uri: schema:activeStatus + range: boolean + ifabsent: "false" + examples: + - value: true + description: "Currently employed at this position" + + work_location: + description: | + Location of the work position (city, region, country). + Raw string as extracted; use Location class for structured data. + slot_uri: schema:workLocation + range: string + examples: + - value: "Utrecht, Netherlands" + description: "City and country" + - value: "Amsterdam, Noord-Holland, Netherlands" + description: "City, region, country" + + job_description: + description: | + Description of responsibilities and achievements (if available). + Often not present in LinkedIn basic profile data. + slot_uri: schema:description + range: string + + heritage_employer: + description: | + Whether the employer is a known heritage custodian. + If true, employer_name should match a CustodianName. + slot_uri: hc:heritageRelevant + range: boolean + ifabsent: "false" + comments: + - "Set to true if employer is museum, archive, library, etc." + - "Links to HeritageTypeEnum for classification" + + comments: + - "Inlined in LinkedInProfile.experience[] as multivalued list" + - "Preserves raw LinkedIn date formats for provenance" + - "Current positions have is_current_position=true and null end_date" + - "heritage_employer enables filtering for heritage sector careers" + + see_also: + - "https://schema.org/OrganizationRole" + - "https://www.w3.org/TR/vocab-org/#class-membership" + +slots: + job_title: + description: "Job title or role held at this position" + range: string + + employer_name: + description: "Name of the employing organization" + range: string + + employer_linkedin_url: + description: "LinkedIn company page URL for the employer" + range: uri + + employment_dates_raw: + description: "Raw date string as extracted from LinkedIn" + range: string + + employment_start_date: + description: "Parsed start date of employment" + range: date + + employment_end_date: + description: "Parsed end date of employment" + range: date + + is_current_position: + description: "Whether this is a current position" + range: boolean + + work_location: + description: "Location of the work position" + range: string + + job_description: + description: "Description of responsibilities and achievements" + range: string + + heritage_employer: + description: "Whether employer is a known heritage custodian" + range: boolean diff --git a/schemas/20251121/linkml/modules/enums/HeritageTypeEnum.yaml b/schemas/20251121/linkml/modules/enums/HeritageTypeEnum.yaml new file mode 100644 index 0000000000..90a0079425 --- /dev/null +++ b/schemas/20251121/linkml/modules/enums/HeritageTypeEnum.yaml @@ -0,0 +1,186 @@ +id: https://nde.nl/ontology/hc/enum/HeritageTypeEnum +name: HeritageTypeEnum +title: Person Heritage Type Single-Letter Codes +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + schema: http://schema.org/ + wikidata: http://www.wikidata.org/entity/ + +default_range: string + +description: | + Single-letter codes classifying a person's relevance to heritage sectors. + Used in LinkedIn profile analysis, connection network data, and staff + heritage relevance classification. + + These codes are derived from the GLAMORCUBESFIXPHDNT taxonomy + (see CustodianPrimaryTypeEnum for full institutional classification). + + **Usage Context**: + - `heritage_relevance.heritage_types[]` in person entity profiles + - `heritage_type` in PersonConnection records + - Staff classification in custodian files + + **Mapping to CustodianPrimaryTypeEnum**: + Each single-letter code maps to a corresponding full enum value: + - G → GALLERY + - L → LIBRARY + - A → ARCHIVE + - M → MUSEUM + - O → OFFICIAL_INSTITUTION + - R → RESEARCH_CENTER + - C → COMMERCIAL + - U → UNSPECIFIED + - B → BIO_CUSTODIAN + - E → EDUCATION_PROVIDER + - S → HERITAGE_SOCIETY + - F → FEATURE_CUSTODIAN + - I → INTANGIBLE_HERITAGE_GROUP + - X → MIXED + - P → PERSONAL_COLLECTION + - H → HOLY_SACRED_SITE + - D → DIGITAL_PLATFORM + - N → NON_PROFIT + - T → TASTE_SCENT_HERITAGE + +enums: + HeritageTypeEnum: + description: | + Single-letter heritage sector classification codes for person data. + Used to classify individuals' professional relevance to heritage sectors. + permissible_values: + G: + description: "Gallery - Works in gallery/exhibition sector" + meaning: hc:CustodianPrimaryTypeEnum.GALLERY + annotations: + full_label: Gallery + example_roles: "Gallery director, Curator, Exhibition designer" + + L: + description: "Library - Works in library sector" + meaning: hc:CustodianPrimaryTypeEnum.LIBRARY + annotations: + full_label: Library + example_roles: "Librarian, Collection manager, Digital librarian" + + A: + description: "Archive - Works in archival/audiovisual heritage sector" + meaning: hc:CustodianPrimaryTypeEnum.ARCHIVE + annotations: + full_label: Archive + example_roles: "Archivist, Records manager, AV specialist" + + M: + description: "Museum - Works in museum sector" + meaning: hc:CustodianPrimaryTypeEnum.MUSEUM + annotations: + full_label: Museum + example_roles: "Museum curator, Conservator, Registrar" + + O: + description: "Official - Works in government/official heritage institution" + meaning: hc:CustodianPrimaryTypeEnum.OFFICIAL_INSTITUTION + annotations: + full_label: Official Institution + example_roles: "Policy advisor, Heritage officer, Government archivist" + + R: + description: "Research - Works in heritage research/academia" + meaning: hc:CustodianPrimaryTypeEnum.RESEARCH_CENTER + annotations: + full_label: Research Center + example_roles: "Researcher, Professor, Documentation specialist" + + C: + description: "Commercial - Works in corporate heritage/brand archives" + meaning: hc:CustodianPrimaryTypeEnum.COMMERCIAL + annotations: + full_label: Commercial/Corporate + example_roles: "Corporate archivist, Brand historian" + + U: + description: "Unknown - Heritage sector affiliation unclear" + meaning: hc:CustodianPrimaryTypeEnum.UNSPECIFIED + annotations: + full_label: Unknown/Unspecified + example_roles: "Unable to determine from available data" + + B: + description: "Bio - Works in botanical garden, zoo, or living collections" + meaning: hc:CustodianPrimaryTypeEnum.BIO_CUSTODIAN + annotations: + full_label: Botanical/Zoo + example_roles: "Botanist, Zoologist, Living collections curator" + + E: + description: "Education - Works in heritage education/universities with collections" + meaning: hc:CustodianPrimaryTypeEnum.EDUCATION_PROVIDER + annotations: + full_label: Education Provider + example_roles: "University librarian, Academic archivist, Museum educator" + + S: + description: "Society - Works with heritage/historical societies" + meaning: hc:CustodianPrimaryTypeEnum.HERITAGE_SOCIETY + annotations: + full_label: Heritage Society + example_roles: "Society curator, Heritage volunteer, Local historian" + + F: + description: "Feature - Works with geographic feature heritage sites" + meaning: hc:CustodianPrimaryTypeEnum.FEATURE_CUSTODIAN + annotations: + full_label: Feature Custodian + example_roles: "Site manager, Monument keeper, Landscape curator" + + I: + description: "Intangible - Works with intangible cultural heritage" + meaning: hc:CustodianPrimaryTypeEnum.INTANGIBLE_HERITAGE_GROUP + annotations: + full_label: Intangible Heritage + example_roles: "Oral historian, Folklore specialist, Traditional crafts preservationist" + + X: + description: "Mixed - Works across multiple heritage sectors" + meaning: hc:CustodianPrimaryTypeEnum.MIXED + annotations: + full_label: Mixed/Multiple + example_roles: "Cross-domain specialist, Multi-institution consultant" + + P: + description: "Personal - Works with private/personal collections" + meaning: hc:CustodianPrimaryTypeEnum.PERSONAL_COLLECTION + annotations: + full_label: Personal Collection + example_roles: "Private collection manager, Family archive keeper" + + H: + description: "Holy - Works with religious heritage sites/collections" + meaning: hc:CustodianPrimaryTypeEnum.HOLY_SACRED_SITE + annotations: + full_label: Holy/Sacred Site + example_roles: "Church archivist, Temple curator, Religious heritage officer" + + D: + description: "Digital - Works in digital heritage/data/technology" + meaning: hc:CustodianPrimaryTypeEnum.DIGITAL_PLATFORM + annotations: + full_label: Digital Platform + example_roles: "Digital archivist, Data engineer, Heritage technologist" + + N: + description: "NGO - Works with non-profit heritage organizations" + meaning: hc:CustodianPrimaryTypeEnum.NON_PROFIT + annotations: + full_label: Non-Profit Organization + example_roles: "NGO program manager, Heritage advocate, Conservation officer" + + T: + description: "Taste/Scent - Works with culinary/olfactory heritage" + meaning: hc:CustodianPrimaryTypeEnum.TASTE_SCENT_HERITAGE + annotations: + full_label: Taste/Scent Heritage + example_roles: "Culinary historian, Food heritage specialist, Perfume archivist" diff --git a/schemas/20251121/linkml/modules/slots/age.yaml b/schemas/20251121/linkml/modules/slots/age.yaml new file mode 100644 index 0000000000..179da8d07c --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/age.yaml @@ -0,0 +1,121 @@ +# Age Slot +# String representing the age of a person as mentioned in source +# +# PiCo Pattern: Used when birth date is unknown but age is recorded. +# Combined with source date, a birth date range can be derived. + +id: https://nde.nl/ontology/hc/slot/age +name: age_slot +title: Age Slot + +prefixes: + pico: https://personsincontext.org/model# + sdo: https://schema.org/ + foaf: http://xmlns.com/foaf/0.1/ + +imports: + - linkml:types + +slots: + age: + slot_uri: pico:hasAge + description: | + The age of a person as mentioned in the source. + + =========================================================================== + PiCo ONTOLOGY ALIGNMENT (pico.ttl lines 531-546) + =========================================================================== + + Per PiCo (Persons in Context) ontology: + - Only used for PersonObservations when birth date is unknown + - Age is recorded as mentioned on the source + - Combined with source document date, a birth date range can be derived + + =========================================================================== + FORMAT RULES + =========================================================================== + + **Default unit**: Years (unless otherwise specified) + - "4" → interpreted as 4 years old + - "4 months" → interpreted as 4 months old + - "4 weeks" → interpreted as 4 weeks old + + **Preferred format**: Numerical ages + - "4" (preferred) + - "four" (acceptable but less precise for computation) + + **Approximate ages**: + - "ca. 30" or "about 30" (approximate) + - "30-35" (range) + - "adult" or "child" (categorical) + + =========================================================================== + BIRTH DATE DERIVATION + =========================================================================== + + When age is known and source date is known, birth date range can be derived: + + **Example**: + - Source document date: 1850-06-15 + - Age mentioned: "35" + - Derived birth date range: 1814-06-16 to 1815-06-15 + + This derivation happens during PersonReconstruction, not in the + PersonObservation (which just records what the source says). + + =========================================================================== + EXAMPLES + =========================================================================== + + Simple numeric age: + ```yaml + age: "35" + ``` + + Age with unit: + ```yaml + age: "6 months" + ``` + + Approximate age: + ```yaml + age: "ca. 40" + ``` + + Age range: + ```yaml + age: "25-30" + ``` + + Categorical age: + ```yaml + age: "infant" + ``` + + =========================================================================== + RELATIONSHIP TO birth_date SLOT + =========================================================================== + + - If birth_date is known, use birth_date (more precise) + - If only age is known from source, use age slot + - Do NOT use both for the same observation (redundant) + - Age + source date → derived birth_date in PersonReconstruction + + =========================================================================== + ONTOLOGY MAPPINGS + =========================================================================== + + - PiCo: pico:hasAge (primary) + - FOAF: foaf:age (simple age property) + + range: string + required: false + exact_mappings: + - pico:hasAge + close_mappings: + - foaf:age + comments: + - "Only use when birth_date is unknown" + - "Default unit is years unless specified" + - "Numerical ages preferred over text" + - "Combined with source date to derive birth date range" diff --git a/schemas/20251121/linkml/modules/slots/base_surname.yaml b/schemas/20251121/linkml/modules/slots/base_surname.yaml new file mode 100644 index 0000000000..db81994dc4 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/base_surname.yaml @@ -0,0 +1,47 @@ +id: https://nde.nl/ontology/hc/slot/base_surname +name: base_surname +title: Base Surname + +prefixes: + pnv: https://w3id.org/pnv# + sdo: https://schema.org/ + +imports: + - linkml:types + +slots: + base_surname: + slot_uri: pnv:baseSurname + description: | + Family name WITHOUT prefixes, used for alphabetical sorting. + + In Dutch naming conventions, surnames with prefixes (tussenvoegsels) like + "van", "de", "van der", etc. are sorted by the main surname element, + ignoring the prefix. + + This property captures just the sortable surname component: + + Examples: + - "de Vries" → base_surname: "Vries" (sorts under V) + - "van den Berg" → base_surname: "Berg" (sorts under B) + - "van der Waals" → base_surname: "Waals" (sorts under W) + - "in 't Veld" → base_surname: "Veld" (sorts under V) + + Equivalent to BioDes:geslachtsnaam in Dutch heritage standards. + + IMPORTANT: Belgian Dutch and some international conventions may sort + by the full surname including prefix. In those cases, use the full + surname here or handle sorting differently in your application. + + For surnames without prefixes, base_surname equals the full surname: + - "Jansen" → base_surname: "Jansen" + - "Bakker" → base_surname: "Bakker" + + range: string + exact_mappings: + - pnv:baseSurname + close_mappings: + - sdo:familyName + - foaf:familyName + related_mappings: + - foaf:lastName diff --git a/schemas/20251121/linkml/modules/slots/birth_date.yaml b/schemas/20251121/linkml/modules/slots/birth_date.yaml new file mode 100644 index 0000000000..6a091f8643 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/birth_date.yaml @@ -0,0 +1,112 @@ +# Birth Date Slot +# Date or string representing when a person was born +# +# PiCo Pattern: Allows both xsd:date and xsd:string to preserve original +# source formats while enabling date-based queries. + +id: https://nde.nl/ontology/hc/slot/birth_date +name: birth_date_slot +title: Birth Date Slot + +prefixes: + sdo: https://schema.org/ + pico: https://personsincontext.org/model# + crm: http://www.cidoc-crm.org/cidoc-crm/ + wikidata: http://www.wikidata.org/entity/ + xsd: http://www.w3.org/2001/XMLSchema# + +imports: + - linkml:types + +slots: + birth_date: + slot_uri: sdo:birthDate + description: | + The birth date of a person. + + =========================================================================== + PiCo ONTOLOGY ALIGNMENT (pico.ttl lines 365-380) + =========================================================================== + + Per PiCo (Persons in Context) ontology, birth dates can be recorded as: + - **xsd:date**: ISO 8601 format for structured date processing + - **xsd:string**: Original format from source for preservation + + Both formats may be used together when source fidelity is important. + + =========================================================================== + INCOMPLETE DATES + =========================================================================== + + Incomplete dates are ALLOWED when truncated from small to large: + + **VALID formats**: + - "1970-08-15" (full date) + - "1970-08" (year and month only) + - "1970" (year only) + + **INVALID formats** (never use): + - "1970-00-15" (missing month with day present) + - "08-15" (missing year) + - "15" (day only) + + =========================================================================== + SOURCE PRESERVATION + =========================================================================== + + Sometimes the original source format should be preserved exactly as written: + - Archival records: "born in the year of our Lord 1823" + - Historical documents: "circa 1750" + - Genealogical notes: "abt. 1800" + - Fuzzy dates: "late 18th century" + + In these cases, use the string format and optionally provide a structured + date interpretation in a separate field or as part of TimeSpan processing. + + =========================================================================== + EXAMPLES + =========================================================================== + + Precise date known: + ```yaml + birth_date: "1970-08-15" + ``` + + Only year known: + ```yaml + birth_date: "1970" + ``` + + Source format preservation: + ```yaml + birth_date: "ca. 1750" # String format preserves source notation + ``` + + =========================================================================== + RELATIONSHIP TO age SLOT + =========================================================================== + + When birth_date is unknown but age is known (from source), use the `age` + slot instead. Combined with the source document date, a birth date range + can be derived during PersonReconstruction. + + =========================================================================== + ONTOLOGY MAPPINGS + =========================================================================== + + - Schema.org: sdo:birthDate (primary) + - CIDOC-CRM: crm:P98i_was_born via crm:E67_Birth event + - Wikidata: P569 (date of birth) + + range: string + required: false + exact_mappings: + - sdo:birthDate + - wikidata:P569 + close_mappings: + - crm:P98i_was_born + comments: + - "Allows both date and string formats per PiCo pattern" + - "Incomplete dates valid when truncated from small to large" + - "Use age slot when birth date unknown but age is recorded" + - "String format can preserve original source notation" diff --git a/schemas/20251121/linkml/modules/slots/birth_place.yaml b/schemas/20251121/linkml/modules/slots/birth_place.yaml new file mode 100644 index 0000000000..97e9650ef8 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/birth_place.yaml @@ -0,0 +1,111 @@ +# Birth Place Slot +# String or URI representing where a person was born +# +# PiCo Pattern: Allows both literal place names and URIs for thesaurus linking +# (GeoNames, Wikidata, etc.) + +id: https://nde.nl/ontology/hc/slot/birth_place +name: birth_place_slot +title: Birth Place Slot + +prefixes: + sdo: https://schema.org/ + pico: https://personsincontext.org/model# + crm: http://www.cidoc-crm.org/cidoc-crm/ + wikidata: http://www.wikidata.org/entity/ + geonames: http://sws.geonames.org/ + +imports: + - linkml:types + +slots: + birth_place: + slot_uri: sdo:birthPlace + description: | + The place where a person was born. + + =========================================================================== + PiCo ONTOLOGY ALIGNMENT (pico.ttl lines 382-394) + =========================================================================== + + Per PiCo (Persons in Context) ontology, birth places can be recorded as: + - **xsd:string**: Literal place name as mentioned in source + - **xsd:anyURI**: Link to geographic thesaurus (GeoNames, Wikidata) + + Both formats may be used together: + - Literal: Preserves original source notation + - URI: Enables geographic linking and reconciliation + + =========================================================================== + THESAURUS LINKING + =========================================================================== + + Preferably link to established geographic thesauri: + + **GeoNames** (preferred for geographic entities): + - Format: http://sws.geonames.org/{geonames_id}/ + - Example: http://sws.geonames.org/2759794/ (Amsterdam) + + **Wikidata** (preferred for historical places): + - Format: http://www.wikidata.org/entity/Q{number} + - Example: http://www.wikidata.org/entity/Q727 (Amsterdam) + + **TGN** (Getty Thesaurus of Geographic Names): + - For art history contexts + + =========================================================================== + SOURCE PRESERVATION + =========================================================================== + + The literal place name as mentioned on the source should be preserved: + - Historical spellings: "Amsteldam" (historical name for Amsterdam) + - Demolished places: "East Prussia" (no longer exists) + - Vernacular names: "'s-Hertogenbosch" vs "Den Bosch" + - Regional specificity: "Jordaan, Amsterdam" (neighborhood level) + + =========================================================================== + EXAMPLES + =========================================================================== + + Literal place name only: + ```yaml + birth_place: "Amsterdam, Noord-Holland, Netherlands" + ``` + + URI to GeoNames: + ```yaml + birth_place: "http://sws.geonames.org/2759794/" + ``` + + Both literal and URI (using separate fields): + ```yaml + birth_place: "Amsteldam" # Source spelling + birth_place_uri: "http://sws.geonames.org/2759794/" # Reconciled + ``` + + Historical place (no modern equivalent): + ```yaml + birth_place: "Königsberg, East Prussia" + birth_place_uri: "http://www.wikidata.org/entity/Q1773" # Now Kaliningrad + ``` + + =========================================================================== + ONTOLOGY MAPPINGS + =========================================================================== + + - Schema.org: sdo:birthPlace (primary) + - CIDOC-CRM: crm:P7_took_place_at via crm:E67_Birth event + - Wikidata: P19 (place of birth) + + range: string + required: false + exact_mappings: + - sdo:birthPlace + - wikidata:P19 + close_mappings: + - crm:P7_took_place_at + comments: + - "Allows both string and URI formats per PiCo pattern" + - "Prefer thesaurus links (GeoNames, Wikidata) when possible" + - "Preserve literal source notation alongside reconciled URI" + - "Handle historical place names that may not have modern equivalents" diff --git a/schemas/20251121/linkml/modules/slots/death_place.yaml b/schemas/20251121/linkml/modules/slots/death_place.yaml new file mode 100644 index 0000000000..0da8203592 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/death_place.yaml @@ -0,0 +1,129 @@ +# Death Place Slot +# String or URI representing where a person died +# +# PiCo Pattern: Allows both literal place names and URIs for thesaurus linking +# (GeoNames, Wikidata, etc.) + +id: https://nde.nl/ontology/hc/slot/death_place +name: death_place_slot +title: Death Place Slot + +prefixes: + sdo: https://schema.org/ + pico: https://personsincontext.org/model# + crm: http://www.cidoc-crm.org/cidoc-crm/ + wikidata: http://www.wikidata.org/entity/ + geonames: http://sws.geonames.org/ + +imports: + - linkml:types + +slots: + death_place: + slot_uri: sdo:deathPlace + description: | + The place where a person died. + + =========================================================================== + PiCo ONTOLOGY ALIGNMENT (pico.ttl lines 436-447) + =========================================================================== + + Per PiCo (Persons in Context) ontology, death places can be recorded as: + - **xsd:string**: Literal place name as mentioned in source + - **xsd:anyURI**: Link to geographic thesaurus (GeoNames, Wikidata) + + Both formats may be used together: + - Literal: Preserves original source notation + - URI: Enables geographic linking and reconciliation + + =========================================================================== + THESAURUS LINKING + =========================================================================== + + Preferably link to established geographic thesauri: + + **GeoNames** (preferred for geographic entities): + - Format: http://sws.geonames.org/{geonames_id}/ + - Example: http://sws.geonames.org/2759794/ (Amsterdam) + + **Wikidata** (preferred for historical places): + - Format: http://www.wikidata.org/entity/Q{number} + - Example: http://www.wikidata.org/entity/Q727 (Amsterdam) + + =========================================================================== + SPECIAL CASES + =========================================================================== + + **Conflict zones**: + - Place may be approximate: "Gaza Strip" rather than specific city + - Multiple possible locations: "somewhere in Auschwitz complex" + + **At sea / in transit**: + - "At sea, North Atlantic Ocean" + - "En route between Amsterdam and Batavia" + + **Unknown location**: + - If deceased is true but place unknown, leave death_place empty + - Use circumstances_of_death to document known context + + **Historical places**: + - Use Wikidata for places that no longer exist + - Document modern equivalent if applicable + + =========================================================================== + EXAMPLES + =========================================================================== + + Literal place name only: + ```yaml + death_place: "Auschwitz concentration camp, Poland" + ``` + + URI to Wikidata: + ```yaml + death_place: "http://www.wikidata.org/entity/Q7341" # Auschwitz + ``` + + Approximate location (conflict zone): + ```yaml + death_place: "Gaza Strip, Palestinian Territories" + death_place_uri: "http://www.wikidata.org/entity/Q39760" + ``` + + Historical place: + ```yaml + death_place: "Batavia, Dutch East Indies" + death_place_uri: "http://www.wikidata.org/entity/Q3630" # Now Jakarta + ``` + + =========================================================================== + RELATIONSHIP TO OTHER DEATH SLOTS + =========================================================================== + + death_place works together with: + - `deceased`: Boolean flag (true if person has died) + - `date_of_death`: TimeSpan for when death occurred + - `circumstances_of_death`: Description of how/why death occurred + - `martyred`: Boolean if death due to conflict/persecution + + =========================================================================== + ONTOLOGY MAPPINGS + =========================================================================== + + - Schema.org: sdo:deathPlace (primary) + - CIDOC-CRM: crm:P7_took_place_at via crm:E69_Death event + - Wikidata: P20 (place of death) + + range: string + required: false + exact_mappings: + - sdo:deathPlace + - wikidata:P20 + close_mappings: + - crm:P7_took_place_at + comments: + - "Allows both string and URI formats per PiCo pattern" + - "Prefer thesaurus links (GeoNames, Wikidata) when possible" + - "Handle conflict zones and approximate locations gracefully" + - "Only populate when deceased is true" + - "Use circumstances_of_death for context when location is approximate" diff --git a/schemas/20251121/linkml/modules/slots/extraction_metadata.yaml b/schemas/20251121/linkml/modules/slots/extraction_metadata.yaml new file mode 100644 index 0000000000..853a77c3df --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/extraction_metadata.yaml @@ -0,0 +1,57 @@ +# Extraction Metadata Slot +# Reusable slot for linking to ExtractionMetadata provenance + +id: https://nde.nl/ontology/hc/slot/extraction_metadata +name: extraction_metadata_slot +title: Extraction Metadata Slot +version: 1.0.0 + +prefixes: + linkml: https://w3id.org/linkml/ + hc: https://nde.nl/ontology/hc/ + prov: http://www.w3.org/ns/prov# + +imports: + - linkml:types + - ../classes/ExtractionMetadata + +slots: + extraction_metadata: + slot_uri: prov:wasGeneratedBy + description: | + Provenance metadata for how this observation/record was extracted. + + Records the extraction activity including: + - source_file: Path to input data source + - extraction_date: When extraction occurred + - extraction_method: Tool/API used (exa, firecrawl, manual, etc.) + - extraction_agent: AI agent that performed extraction + - cost_usd: API cost for the extraction + + **PROV-O Alignment**: + - This slot represents prov:wasGeneratedBy relationship + - Links the observation (prov:Entity) to the extraction (prov:Activity) + + **Use Cases**: + - PersonObservation: Track how staff data was extracted + - WebObservation: Track web scraping provenance + - CustodianObservation: Track custodian data extraction + - LinkedInProfile: Track LinkedIn profile extraction + - ConnectionNetwork: Track connection list extraction + + **Example**: + ```yaml + extraction_metadata: + source_file: data/custodian/person/affiliated/parsed/rijksmuseum_staff.json + extraction_date: "2025-12-12T22:00:00Z" + extraction_method: exa_crawling_exa + extraction_agent: claude-opus-4.5 + cost_usd: 0.001 + ``` + + **See Also**: + - ExtractionMetadata class for full field definitions + - AGENTS.md Rule 20 for required provenance fields + range: ExtractionMetadata + inlined: true + required: false diff --git a/schemas/20251121/linkml/modules/slots/gender_identity.yaml b/schemas/20251121/linkml/modules/slots/gender_identity.yaml new file mode 100644 index 0000000000..a4f2426e29 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/gender_identity.yaml @@ -0,0 +1,81 @@ +# Gender Identity Slot +# +# DESIGN PRINCIPLE: Inclusive, self-determined gender expression +# +# This slot follows Schema.org's explicit guidance that "text strings are also +# acceptable for people who are not a binary gender" (schema.org/gender). +# +# We use FREE-TEXT rather than an enum to: +# 1. Respect self-identification (people define their own gender) +# 2. Support cultural/historical variations in gender concepts +# 3. Avoid forcing people into categories that don't fit them +# 4. Allow for terms that may not exist yet or are culturally specific +# +# See also: FOAF's gender property which notes it is "typically but not +# necessarily 'male' or 'female'" (foaf:gender) + +id: https://nde.nl/ontology/hc/slot/gender_identity +name: gender_identity_slot +title: Gender Identity Slot + +prefixes: + schema: https://schema.org/ + foaf: http://xmlns.com/foaf/0.1/ + pico: https://personsincontext.org/model# + +imports: + - linkml:types + +slots: + gender_identity: + slot_uri: schema:gender + description: >- + The person's gender identity as they define it. This is a free-text field + that respects self-identification and does not impose binary categories. + comments: + - >- + **Inclusive Design**: This field intentionally uses free text rather than + a restrictive enumeration. People should be able to express their gender + in their own terms. + - >- + **Common values** (not exhaustive, not required): man, woman, non-binary, + genderqueer, genderfluid, agender, bigender, two-spirit, transgender, + cisgender, or any self-identified term. + - >- + **Historical records**: For historical persons, use terms as recorded in + sources where possible, or use contextually appropriate terms. + - >- + **Unknown/Not recorded**: Leave empty if unknown; do not assume. + - >- + **Privacy**: This field may be left empty by choice. Absence of data + should not be interpreted as absence of identity. + range: string + examples: + - value: "non-binary" + description: "Self-identified non-binary person" + - value: "woman" + description: "Self-identified woman" + - value: "man" + description: "Self-identified man" + - value: "genderfluid" + description: "Self-identified genderfluid person" + - value: "two-spirit" + description: "Indigenous North American gender identity" + - value: "transgender woman" + description: "Self-identified transgender woman" + - value: "not specified" + description: "Person chose not to specify" + notes: + - >- + Schema.org explicitly states: "While https://schema.org/Male and + https://schema.org/Female may be used, text strings are also acceptable + for people who are not a binary gender." + - >- + FOAF defines foaf:gender as "The gender of this Agent (typically but + not necessarily 'male' or 'female')." + - >- + This approach aligns with modern data protection principles (GDPR) + regarding sensitive personal data and self-determination. + annotations: + custodian_types: '["*"]' + custodian_types_rationale: "Applicable to all custodian types that track person data" diff --git a/schemas/20251121/linkml/modules/slots/given_name.yaml b/schemas/20251121/linkml/modules/slots/given_name.yaml new file mode 100644 index 0000000000..aeaf0ea375 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/given_name.yaml @@ -0,0 +1,46 @@ +id: https://nde.nl/ontology/hc/slot/given_name +name: given_name +title: Given Name + +prefixes: + pnv: https://w3id.org/pnv# + sdo: https://schema.org/ + +imports: + - linkml:types + +slots: + given_name: + slot_uri: pnv:givenName + description: | + The name(s) given to someone at birth or any other name-giving event. + + In the Person Name Vocabulary (PNV), this represents the given name + component of a structured person name. It may include: + + - Single given name: "Pieter" + - Multiple given names: "Jan Willem" + - Nicknames/call names: "Hans" (from "H.A.F.M.O. (Hans)") + - Diminutives: "Pietje" + + For historical Dutch records, this typically corresponds to the + "voornaam" (first name) or "roepnaam" (call name). + + If only initials are known, either: + - Use the initials slot (preferred): "P.R." + - Or put initials here as fallback: "P.R." + + Examples: + - "Maria" (simple given name) + - "Johannes Wilhelmus" (multiple given names) + - "Hans" (extracted from "H.A.F.M.O. (Hans) van Mierlo") + - "Geertje" (diminutive of Geertruid) + + range: string + exact_mappings: + - pnv:givenName + close_mappings: + - sdo:givenName + - foaf:firstName + related_mappings: + - foaf:givenName diff --git a/schemas/20251121/linkml/modules/slots/has_person_name.yaml b/schemas/20251121/linkml/modules/slots/has_person_name.yaml new file mode 100644 index 0000000000..d9542815a3 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/has_person_name.yaml @@ -0,0 +1,131 @@ +# has_person_name slot +# Links a PersonObservation to a structured PersonName (PNV-aligned) +# +# This slot enables linking the simple person_name string to a fully +# structured PersonName class with PNV components (given_name, patronym, +# surname_prefix, base_surname, initials, etc.) + +id: https://nde.nl/ontology/hc/slot/has_person_name +name: has_person_name_slot +title: Has Person Name (Structured PNV) + +prefixes: + pnv: https://w3id.org/pnv# + pico: https://personsincontext.org/model# + sdo: https://schema.org/ + foaf: http://xmlns.com/foaf/0.1/ + +imports: + - linkml:types + +slots: + has_person_name: + slot_uri: pnv:hasName + description: | + Structured name of the person following Person Name Vocabulary (PNV). + + =========================================================================== + RELATIONSHIP TO person_name SLOT + =========================================================================== + + PersonObservation has TWO name-related slots: + + 1. **person_name** (string): Simple full name as recorded in source + - Example: "Dr. Jane Smith" + - Always present for human-readable display + - Quick access without parsing structured components + + 2. **has_person_name** (PersonName): Structured name with PNV components + - Optional but recommended for Dutch/historical names + - Enables sorting by base_surname (Dutch convention) + - Supports patronymics, tussenvoegsels, initials + - Links to PersonName class with full PNV structure + + =========================================================================== + USE CASES FOR STRUCTURED NAMES + =========================================================================== + + 1. **Dutch Name Sorting**: + - "Maria de Vries" sorts under V (base_surname: "Vries") + - "Jan van den Berg" sorts under B (base_surname: "Berg") + + 2. **Historical Records with Patronymics**: + - "Jan Pieterszoon van der Waals" + - given_name: "Jan" + - patronym: "Pieterszoon" + - surname_prefix: "van der" + - base_surname: "Waals" + + 3. **Initial-Based Names (Common in NL)**: + - "H.A.F.M.O. (Hans) van Mierlo" + - initials: "H.A.F.M.O." + - given_name: "Hans" + - surname_prefix: "van" + - base_surname: "Mierlo" + + 4. **Unknown/Unnamed Persons** (historical records): + - name_specification: "unknown" or "unnamed" + - Prevents ambiguity between missing data and genuinely unnamed persons + + =========================================================================== + WHEN TO USE has_person_name + =========================================================================== + + ALWAYS use has_person_name when: + - Name has Dutch surname prefix (tussenvoegsel) + - Name has patronymic component + - Name contains initials alongside given name + - Historical name with uncertain/variable spelling + - Need to sort by base_surname (Dutch alphabetization) + + OPTIONAL (person_name string sufficient) when: + - Simple Western name: "John Smith" + - No special components to parse + - Quick data entry without structured analysis + + =========================================================================== + ONTOLOGY ALIGNMENT + =========================================================================== + + - PNV: `pnv:hasName` (primary - links person to PersonName) + - Schema.org: `sdo:name` (fallback for simple string via person_name slot) + - FOAF: `foaf:name` (fallback for simple string) + - CIDOC-CRM: `crm:P1_is_identified_by` (general identification relationship) + + range: PersonName + required: false + inlined: true + + comments: + - "PNV standard: https://w3id.org/pnv" + - "Use alongside person_name (string) for both quick access and structured parsing" + - "Essential for Dutch names with tussenvoegsels (van, de, van der, etc.)" + - "Enables proper alphabetical sorting by base_surname" + + examples: + - value: + literal_name: "Prof. dr. Willem van der Berg" + given_name: "Willem" + surname_prefix: "van der" + base_surname: "Berg" + description: "Dutch name with title and tussenvoegsel, sorts under B" + + - value: + literal_name: "Jan Pieterszoon Sweelinck" + given_name: "Jan" + patronym: "Pieterszoon" + base_surname: "Sweelinck" + description: "Historical Dutch name with patronymic" + + - value: + literal_name: "H.A.F.M.O. (Hans) van Mierlo" + initials: "H.A.F.M.O." + given_name: "Hans" + surname_prefix: "van" + base_surname: "Mierlo" + description: "Dutch name with initials and call name" + + - value: + literal_name: null + name_specification: "unknown" + description: "Unknown person from historical record" diff --git a/schemas/20251121/linkml/modules/slots/initials.yaml b/schemas/20251121/linkml/modules/slots/initials.yaml new file mode 100644 index 0000000000..c2ed24edab --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/initials.yaml @@ -0,0 +1,52 @@ +id: https://nde.nl/ontology/hc/slot/initials +name: initials +title: Initials + +prefixes: + pnv: https://w3id.org/pnv# + +imports: + - linkml:types + +slots: + initials: + slot_uri: pnv:initials + description: | + Initial letter(s) of given name(s), each followed by a period. + + An initial is the first letter of a person's given name, or sometimes + a combination of letters. Use this property to record initials + separately from (or in addition to) the full given name. + + Format: Each initial should be followed by a period (dot). + + Use cases: + 1. Record initials alongside known given name + 2. Record initials when full given name is unknown + 3. Capture unusual abbreviation patterns from historical sources + + Examples from PNV specification: + + Example 1: "Peter R. de Vries" + - initials: "P.R." + - given_name: "Peter R." + + Example 2: "C.Joh. Kieviet" + - initials: "C.Joh." (unusual abbreviation format) + - OR given_name: "C.Joh." (alternative approach) + + Example 3: "H.A.F.M.O. (Hans) van Mierlo" + - initials: "H.A.F.M.O." + - given_name: "Hans" + + If only initials are known (not the full given name), use this property. + The given_name property may also contain the initials as a fallback + when no other representation is available. + + Note: Store initials exactly as they appear in the source, including + unusual formatting patterns like "C.Joh." for historical accuracy. + + range: string + pattern: "^[A-Z][a-zA-Z]*\\.([A-Z][a-zA-Z]*\\.)*$" + exact_mappings: + - pnv:initials diff --git a/schemas/20251121/linkml/modules/slots/literal_name.yaml b/schemas/20251121/linkml/modules/slots/literal_name.yaml new file mode 100644 index 0000000000..2a0e5e56b1 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/literal_name.yaml @@ -0,0 +1,42 @@ +id: https://nde.nl/ontology/hc/slot/literal_name +name: literal_name +title: Literal Name + +prefixes: + pnv: https://w3id.org/pnv# + +imports: + - linkml:types + +slots: + literal_name: + slot_uri: pnv:literalName + description: | + Full personal name as written in the source material. + + This is the complete name string exactly as it appears, including all + name components: given names, patronyms, surname prefixes, and surnames. + + REQUIRED for PersonName unless name_specification indicates the person + was "unknown" or "unnamed". + + Per PNV specification: + "This property may only be left blank if a person's name was unknown + or if a person was unnamed (e.g. a child that died shortly after being + born), in which cases the property pnv:nameSpecification should state + 'unknown' or 'unnamed'." + + Examples: + - "Pieter Corneliszoon van der Berg" + - "H.A.F.M.O. (Hans) van Mierlo" + - "Maria de Vries" + - "Jan Janszoon" + - "Björk Guðmundsdóttir" + + range: string + exact_mappings: + - pnv:literalName + close_mappings: + - foaf:name + - sdo:name + - rdfs:label diff --git a/schemas/20251121/linkml/modules/slots/name_specification.yaml b/schemas/20251121/linkml/modules/slots/name_specification.yaml new file mode 100644 index 0000000000..61e12ed942 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/name_specification.yaml @@ -0,0 +1,55 @@ +id: https://nde.nl/ontology/hc/slot/name_specification +name: name_specification +title: Name Specification + +prefixes: + pnv: https://w3id.org/pnv# + +imports: + - linkml:types + +slots: + name_specification: + slot_uri: pnv:nameSpecification + description: | + Indicates why literal_name is blank: "unknown" or "unnamed". + + REQUIRED when literal_name is empty. This property prevents ambiguity + between missing data and persons whose names were genuinely not known + or never given. + + Allowed values: + + "unknown": + Use when the person's name was not recorded, is illegible, or cannot + be determined from the source. The person had a name, but we don't + know what it was. + + Examples: + - Historical record mentions "a servant" without specifying name + - Signature on document is illegible + - Record says "name not given" + - Photograph of unidentified person + + "unnamed": + Use when the person was never given a name. This is distinct from + unknown - the person genuinely did not have a name. + + Examples: + - Stillborn infant who was never named + - Child who died shortly after birth before being named + - Baptism record explicitly states child was not yet named + + Per PNV specification: + "Literal name [...] may only be left blank if a person's name was + unknown or if a person was unnamed (e.g. a child that died shortly + after being born), in which cases the property pnv:nameSpecification + should state 'unknown' or 'unnamed'." + + IMPORTANT: If literal_name is populated, do NOT set name_specification. + This property is only for cases where literal_name must be blank. + + range: string + pattern: "^(unknown|unnamed)$" + exact_mappings: + - pnv:nameSpecification diff --git a/schemas/20251121/linkml/modules/slots/occupation.yaml b/schemas/20251121/linkml/modules/slots/occupation.yaml new file mode 100644 index 0000000000..9977d70bd8 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/occupation.yaml @@ -0,0 +1,137 @@ +# Occupation Slot +# String or URI representing a person's occupation as mentioned in source +# +# PiCo Pattern: Occupation as recorded in the source document. +# Can be linked to occupational thesauri for standardization. + +id: https://nde.nl/ontology/hc/slot/occupation +name: occupation_slot +title: Occupation Slot + +prefixes: + sdo: https://schema.org/ + pico: https://personsincontext.org/model# + wikidata: http://www.wikidata.org/entity/ + +imports: + - linkml:types + +slots: + occupation: + slot_uri: sdo:hasOccupation + description: | + The occupational title or profession of a person as mentioned in the source. + + =========================================================================== + PiCo ONTOLOGY ALIGNMENT (pico.ttl lines 548-556) + =========================================================================== + + Per PiCo (Persons in Context) ontology: + - Records occupation as mentioned on the source + - Can be literal string or URI to thesaurus + - Part of PersonObservation (evidence from source) + + =========================================================================== + SOURCE PRESERVATION + =========================================================================== + + Record the occupation exactly as stated in the source: + - Historical terms: "wheelwright", "cordwainer", "cooper" + - Period-specific titles: "Keeper of the King's Pictures" + - Vernacular forms: "schilder" (Dutch for painter) + + =========================================================================== + THESAURUS LINKING + =========================================================================== + + When possible, link to occupational thesauri: + + **Wikidata** (general occupations): + - Q1028181 (painter) + - Q1792450 (archivist) + - Q947873 (museum curator) + + **HISCO** (Historical International Standard Classification of Occupations): + - For historical occupations + + **AAT** (Getty Art & Architecture Thesaurus): + - For heritage-related occupations + + =========================================================================== + HERITAGE-SPECIFIC OCCUPATIONS + =========================================================================== + + Common heritage sector occupations: + - Archivist, Librarian, Curator, Conservator + - Museum Director, Collection Manager + - Registrar, Preparator, Educator + - Digital Preservation Specialist + + For staff roles at heritage institutions, prefer the `staff_role` slot + which uses the StaffRole class hierarchy for controlled vocabulary. + + =========================================================================== + EXAMPLES + =========================================================================== + + Simple literal: + ```yaml + occupation: "painter" + ``` + + Historical occupation: + ```yaml + occupation: "court painter to His Majesty King Willem I" + ``` + + With thesaurus URI: + ```yaml + occupation: "http://www.wikidata.org/entity/Q1028181" # painter + ``` + + Dutch source: + ```yaml + occupation: "schilder" # Preserve source language + occupation_normalized: "painter" # Optional normalized form + ``` + + =========================================================================== + RELATIONSHIP TO staff_role SLOT + =========================================================================== + + - `occupation`: General profession (from any source, any context) + - `staff_role`: Specific role at heritage institution (StaffRole class) + + A person may have: + - occupation: "art historian" (general profession) + - staff_role: Curator (specific institutional role) + + =========================================================================== + MULTIVALUED + =========================================================================== + + A person may have multiple occupations: + - Primary occupation + - Secondary occupation + - Historical occupations (changed over time) + + Use separate PersonObservations for different time periods. + + =========================================================================== + ONTOLOGY MAPPINGS + =========================================================================== + + - Schema.org: sdo:hasOccupation (primary) + - Wikidata: P106 (occupation) + + range: string + required: false + multivalued: true + exact_mappings: + - sdo:hasOccupation + - wikidata:P106 + comments: + - "Record occupation as mentioned in source" + - "Prefer thesaurus links when possible" + - "Preserve historical/vernacular occupation terms" + - "Use staff_role for heritage institution positions" diff --git a/schemas/20251121/linkml/modules/slots/patronym.yaml b/schemas/20251121/linkml/modules/slots/patronym.yaml new file mode 100644 index 0000000000..9d9443334f --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/patronym.yaml @@ -0,0 +1,56 @@ +id: https://nde.nl/ontology/hc/slot/patronym +name: patronym +title: Patronym + +prefixes: + pnv: https://w3id.org/pnv# + +imports: + - linkml:types + +slots: + patronym: + slot_uri: pnv:patronym + description: | + Name element based on the given name of one's father (patronym) + or mother (matronym). + + A patronym is a name component derived from the father's (or sometimes + mother's) given name. This was the primary naming system in many cultures + before hereditary surnames became common. + + Patronymic patterns by culture: + + Dutch Historical: + - -zoon, -szoon, -z. (son of): "Janszoon" = son of Jan + - -sen (son of): "Pietersen" = son of Pieter + - -dochter (daughter of): "Jansdochter" = daughter of Jan + + Icelandic (still in use): + - -son (son of): "Jónsson" = son of Jón + - -dóttir (daughter of): "Guðmundsdóttir" = daughter of Guðmundur + + Arabic: + - ibn, bin (son of): "ibn Abdullah" = son of Abdullah + - bint (daughter of): "bint Fatima" = daughter of Fatima + + Hebrew: + - ben (son of): "ben David" = son of David + - bat (daughter of): "bat Sarah" = daughter of Sarah + + Slavic: + - -ovich, -evich (son of): "Ivanovich" = son of Ivan + - -ovna, -evna (daughter of): "Petrovna" = daughter of Petr + + Examples: + - "Jan Pieterszoon" → patronym: "Pieterszoon" + - "Björk Guðmundsdóttir" → patronym: "Guðmundsdóttir" + - "Muhammad ibn Abdullah" → patronym: "ibn Abdullah" + - "Anna Jansdochter" → patronym: "Jansdochter" + + Note: Store the complete patronymic element including any prefix + like "ibn" or "ben". + + range: string + exact_mappings: + - pnv:patronym diff --git a/schemas/20251121/linkml/modules/slots/pronouns.yaml b/schemas/20251121/linkml/modules/slots/pronouns.yaml new file mode 100644 index 0000000000..0a444ee801 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/pronouns.yaml @@ -0,0 +1,131 @@ +# Pronouns Slot +# +# DESIGN PRINCIPLE: Self-determination of pronouns +# +# Schema.org explicitly supports this with schema:pronouns, noting: +# "Typically the person concerned is the best authority as pronouns are a +# critical part of personal identity and expression." +# +# This is a free-text field to support all pronoun configurations including +# neopronouns and culturally-specific forms. +# +# LINGUISTIC DIVERSITY NOTE: +# Pronouns are LANGUAGE-SPECIFIC. Many languages have gender-neutral pronouns +# by default and do not encode gender in third-person pronouns at all: +# - Malay/Indonesian: "dia" (singular), "mereka" (plural) - no gender distinction +# - Finnish: "hän" - gender-neutral third-person singular +# - Turkish: "o" - gender-neutral third-person singular +# - Hungarian: "ő" - gender-neutral third-person singular +# - Persian/Farsi: "او" (u) - gender-neutral third-person singular +# - Estonian: "tema/ta" - gender-neutral third-person singular +# - Swahili: "yeye" - gender-neutral third-person singular +# - Bengali: "সে" (se/she) - gender-neutral colloquially +# - Armenian: "նա" (na) - gender-neutral third-person singular +# - Georgian: "ის" (is) - gender-neutral third-person singular +# - Yoruba: "ó/o" - gender-neutral third-person singular +# - Chinese: "他/她/它" (tā) - same pronunciation, different characters (modern written distinction) +# - Japanese: Context-based, pronouns often omitted entirely +# +# In these languages, pronouns do NOT indicate gender identity. The concept of +# "preferred pronouns" as a gender expression is primarily relevant in languages +# with gendered third-person pronouns (English, Romance, Slavic, Germanic, etc.). + +id: https://nde.nl/ontology/hc/slot/pronouns +name: pronouns_slot +title: Pronouns Slot + +prefixes: + schema: https://schema.org/ + +imports: + - linkml:types + +slots: + pronouns: + slot_uri: schema:pronouns + description: >- + The person's preferred pronouns as they specify them. This is a free-text + field that respects self-determination. + comments: + - >- + **Self-Authority**: As Schema.org notes, "the person concerned is the + best authority as pronouns are a critical part of personal identity + and expression." + - >- + **Format**: May be formatted as the person prefers, e.g., "she/her", + "they/them", "he/him", "ze/zir", "any pronouns", etc. + - >- + **Multiple Sets**: Some people use multiple pronoun sets (e.g., + "she/they" or "he/they"). Record as specified. + - >- + **Neopronouns**: Fully supported - use as specified by the person + (e.g., "xe/xem", "fae/faer", "ey/em"). + - >- + **Linguistic Diversity**: Many languages use gender-neutral pronouns + by default (e.g., Malay/Indonesian "dia", Finnish "hän", Turkish "o", + Hungarian "ő", Persian "او", Estonian "tema", Swahili "yeye"). In these + languages, pronouns do NOT indicate gender identity. A value of "dia" + (Malay/Indonesian) is gender-neutral by nature, not a gender statement. + - >- + **Multilingual Context**: This field should be interpreted within the + person's linguistic/cultural context. Consider adding the language + (e.g., "dia [Malay]" or "hän [Finnish]") for clarity when relevant. + - >- + **Historical/Cultural Records**: Historical and non-Western sources may + not have "pronoun preferences" as a concept. Leave empty rather than + assuming. For historical persons, pronouns were typically assigned by + observers based on perceived gender, not self-determined. + - >- + **Privacy**: This field may be left empty by choice. + range: string + examples: + - value: "she/her" + description: "Feminine pronouns (English)" + - value: "he/him" + description: "Masculine pronouns (English)" + - value: "they/them" + description: "Gender-neutral singular they (English)" + - value: "she/they" + description: "Person uses both she/her and they/them (English)" + - value: "ze/zir" + description: "Neopronoun set (English)" + - value: "any pronouns" + description: "Person is comfortable with any pronouns" + - value: "avoid pronouns, use name" + description: "Person prefers name instead of pronouns" + - value: "dia" + description: "Gender-neutral by default (Malay/Indonesian) - NOT a gender identity statement" + - value: "hän" + description: "Gender-neutral by default (Finnish) - standard third-person singular" + - value: "o" + description: "Gender-neutral by default (Turkish) - standard third-person singular" + - value: "ő" + description: "Gender-neutral by default (Hungarian) - standard third-person singular" + - value: "او" + description: "Gender-neutral by default (Persian/Farsi) - standard third-person singular" + - value: "elle/elles" + description: "Gender-neutral Spanish neologism (alternative to él/ella)" + - value: "iel" + description: "Gender-neutral French neologism (alternative to il/elle)" + notes: + - >- + Schema.org (schema:pronouns): "A short string listing or describing + pronouns for a person... Publishers and consumers of this information + are reminded to treat this data responsibly." + - >- + This slot supports professional contexts where correct pronoun usage + is important for respectful communication. + - >- + **IMPORTANT: Pronouns ≠ Gender Identity in many languages**. In languages + with gender-neutral default pronouns (Malay, Finnish, Turkish, Hungarian, + Persian, Estonian, Swahili, etc.), recording pronouns is NOT equivalent + to recording gender identity. The concept of "pronoun preference" as + gender expression is primarily relevant in gendered-pronoun languages + (English, Spanish, French, German, Russian, Arabic, Hindi, etc.). + - >- + For global heritage data, consider that many historical and contemporary + cultures do not have the same pronoun/gender relationship as Western + gendered languages. Interpret this field accordingly. + annotations: + custodian_types: '["*"]' + custodian_types_rationale: "Applicable to all custodian types that track person data" diff --git a/schemas/20251121/linkml/modules/slots/religion.yaml b/schemas/20251121/linkml/modules/slots/religion.yaml new file mode 100644 index 0000000000..cf0df4769b --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/religion.yaml @@ -0,0 +1,136 @@ +# Religion Slot +# String or URI representing a person's religious conviction as mentioned in source +# +# PiCo Pattern: Religion as recorded in the source document. +# Can be linked to religious thesauri for standardization. + +id: https://nde.nl/ontology/hc/slot/religion +name: religion_slot +title: Religion Slot + +prefixes: + pico: https://personsincontext.org/model# + sdo: https://schema.org/ + wikidata: http://www.wikidata.org/entity/ + +imports: + - linkml:types + +slots: + religion: + slot_uri: pico:hasReligion + description: | + The religious conviction of a person as mentioned in the source. + + =========================================================================== + PiCo ONTOLOGY ALIGNMENT (pico.ttl lines 558-567) + =========================================================================== + + Per PiCo (Persons in Context) ontology: + - Records religion as mentioned on the source + - Can be literal string or URI to thesaurus + - Part of PersonObservation (evidence from source) + + =========================================================================== + SOURCE PRESERVATION + =========================================================================== + + Record the religious affiliation exactly as stated in the source: + - Historical terms: "Nederduits Gereformeerd" (Dutch Reformed) + - Period-specific: "of the Roman persuasion" + - Vernacular forms: "Rooms-Katholiek" (Dutch for Roman Catholic) + + =========================================================================== + THESAURUS LINKING + =========================================================================== + + When possible, link to thesauri for religions: + + **Wikidata** (recommended): + - Q5043 (Christianity) + - Q432 (Islam) + - Q9268 (Judaism) + - Q748 (Buddhism) + - Q9089 (Hinduism) + - Q35032 (Protestantism) + - Q1841 (Catholicism) + - Q170208 (Dutch Reformed Church) + + **AAT** (Getty Art & Architecture Thesaurus): + - For religious contexts in art history + + =========================================================================== + HERITAGE CONTEXT + =========================================================================== + + Religion is particularly relevant for heritage institutions when: + - Holy sites (H type): Churches, temples, mosques managing collections + - Religious archives: Parish records, ecclesiastical documents + - Art history: Religious patronage, iconography research + - Genealogy: Baptism, marriage, burial records + - Provenance: Persecution, confiscation, restitution research + + =========================================================================== + EXAMPLES + =========================================================================== + + Simple literal: + ```yaml + religion: "Roman Catholic" + ``` + + Historical Dutch source: + ```yaml + religion: "Nederduits Gereformeerd" + ``` + + With thesaurus URI: + ```yaml + religion: "http://www.wikidata.org/entity/Q170208" # Dutch Reformed + ``` + + Combined literal and URI: + ```yaml + religion: "Nederduits Gereformeerd" + religion_uri: "http://www.wikidata.org/entity/Q170208" + ``` + + =========================================================================== + SENSITIVITY CONSIDERATIONS + =========================================================================== + + Religious data is sensitive personal information: + - Only record when relevant to heritage/archival context + - Follow GDPR and local privacy regulations + - Historical records (pre-1900) generally less restricted + - Living persons: Only use publicly available information + + =========================================================================== + TEMPORAL ASPECTS + =========================================================================== + + A person's religion may change over time: + - Conversion (voluntary or forced) + - Apostasy + - Multiple affiliations + + Use separate PersonObservations for different time periods, + each with its own source and observation date. + + =========================================================================== + ONTOLOGY MAPPINGS + =========================================================================== + + - PiCo: pico:hasReligion (primary) + - Wikidata: P140 (religion or worldview) + + range: string + required: false + exact_mappings: + - pico:hasReligion + - wikidata:P140 + comments: + - "Record religion as mentioned in source" + - "Prefer Wikidata links for standardization" + - "Sensitive data - follow privacy regulations" + - "Use separate observations for temporal changes" diff --git a/schemas/20251121/linkml/modules/slots/surname_prefix.yaml b/schemas/20251121/linkml/modules/slots/surname_prefix.yaml new file mode 100644 index 0000000000..c07897a365 --- /dev/null +++ b/schemas/20251121/linkml/modules/slots/surname_prefix.yaml @@ -0,0 +1,50 @@ +id: https://nde.nl/ontology/hc/slot/surname_prefix +name: surname_prefix +title: Surname Prefix + +prefixes: + pnv: https://w3id.org/pnv# + +imports: + - linkml:types + +slots: + surname_prefix: + slot_uri: pnv:surnamePrefix + description: | + Prefix before the surname (tussenvoegsel in Dutch). + + A surname prefix is a name part prefixing the surname. In Dutch, these + are called "tussenvoegsels" and include words like "van", "de", "het", + "ter", "ten", "van de", "van der", "van den", etc. + + Common prefixes by language/region: + + Dutch: + - van, de, het, 't + - ter, ten, van de, van der, van den + - in 't, op den, uit het + + German: + - von, zu, von und zu + + French: + - de, du, de la, des, le, la + + Spanish/Portuguese: + - de, del, de la, dos, da, das + + Examples: + - "Pieter van der Berg" → surname_prefix: "van der" + - "Maria de Vries" → surname_prefix: "de" + - "Jan in 't Veld" → surname_prefix: "in 't" + - "Ludwig von Beethoven" → surname_prefix: "von" + + Equivalent to A2A:PersonNamePrefixLastName. + + Note: Store the prefix exactly as written in the source, including + spacing and capitalization. + + range: string + exact_mappings: + - pnv:surnamePrefix