Refactor schema slots and classes for improved clarity and structure
- Migrated `archived_at` to `is_or_was_archived_at` in AuxiliaryDigitalPlatform, WebObservation, and other relevant classes to better reflect historical archival status. - Removed `bold_id` slot and replaced it with `has_or_had_identifier` linked to the new `BOLDIdentifier` class in BiologicalObject. - Introduced `Bookplate` and `Approver` classes to enhance provenance tracking and ownership documentation. - Updated `InformationCarrier` to replace `bookplate` with `includes_or_included` for better representation of ownership marks. - Added new slots `is_or_was_approved_by` and `is_or_was_archived_at` to capture historical approval and archival locations. - Archived old slot definitions for `archived_at` and `bold_id` to maintain schema integrity. - Enhanced LinkedIn profile extraction functionality by integrating Linkup API alongside Exa API.
This commit is contained in:
parent
c8471d3a02
commit
21c207c9da
17 changed files with 779 additions and 113 deletions
|
|
@ -269,6 +269,28 @@ export interface SlotExportInfo {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Information about what a slot depends on (forward dependencies).
|
||||
* This represents the "imports" - what schema elements this slot references.
|
||||
*/
|
||||
export interface SlotImportInfo {
|
||||
slotName: string;
|
||||
|
||||
/** The range type if it's a class or enum (dependency) */
|
||||
rangeType?: {
|
||||
name: string;
|
||||
isClass: boolean;
|
||||
isEnum: boolean;
|
||||
};
|
||||
|
||||
/** Any_of types if the slot has union types */
|
||||
anyOfTypes: Array<{
|
||||
name: string;
|
||||
isClass: boolean;
|
||||
isEnum: boolean;
|
||||
}>;
|
||||
}
|
||||
|
||||
const SCHEMA_BASE_PATH = '/schemas/20251121/linkml';
|
||||
|
||||
/**
|
||||
|
|
@ -1561,6 +1583,68 @@ class LinkMLSchemaService {
|
|||
return exportInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get import/dependency information for a slot.
|
||||
* This finds what a slot depends ON (forward dependencies).
|
||||
*
|
||||
* Analyzes:
|
||||
* - Range type (class or enum this slot references)
|
||||
* - Any_of types (union types if applicable)
|
||||
*
|
||||
* This is the inverse of getSlotExportInfo.
|
||||
*/
|
||||
async getSlotImportInfo(slotName: string): Promise<SlotImportInfo> {
|
||||
await this.initialize();
|
||||
|
||||
const importInfo: SlotImportInfo = {
|
||||
slotName,
|
||||
rangeType: undefined,
|
||||
anyOfTypes: [],
|
||||
};
|
||||
|
||||
// Get the slot definition
|
||||
const slotDef = this.slotSchemas.get(slotName);
|
||||
if (!slotDef) {
|
||||
return importInfo;
|
||||
}
|
||||
|
||||
// Check range type
|
||||
if (slotDef.range) {
|
||||
const isClass = this.classSchemas.has(slotDef.range);
|
||||
const isEnum = this.enumSchemas.has(slotDef.range);
|
||||
|
||||
if (isClass || isEnum) {
|
||||
importInfo.rangeType = {
|
||||
name: slotDef.range,
|
||||
isClass,
|
||||
isEnum,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Check any_of types (union types)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const anyOf = (slotDef as any).any_of;
|
||||
if (anyOf && Array.isArray(anyOf)) {
|
||||
for (const item of anyOf) {
|
||||
if (item.range) {
|
||||
const isClass = this.classSchemas.has(item.range);
|
||||
const isEnum = this.enumSchemas.has(item.range);
|
||||
|
||||
if (isClass || isEnum) {
|
||||
importInfo.anyOfTypes.push({
|
||||
name: item.range,
|
||||
isClass,
|
||||
isEnum,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return importInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get import/dependency information for a class.
|
||||
* This finds what a class depends ON (forward dependencies).
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ import {
|
|||
extractSlots,
|
||||
extractEnums,
|
||||
} from '../lib/linkml/schema-loader';
|
||||
import { linkmlSchemaService, type ClassExportInfo, type ClassImportInfo, type ClassDependencyCounts, type SlotDefinition, type SlotExportInfo } from '../lib/linkml/linkml-schema-service';
|
||||
import { linkmlSchemaService, type ClassExportInfo, type ClassImportInfo, type ClassDependencyCounts, type SlotDefinition, type SlotExportInfo, type SlotImportInfo } from '../lib/linkml/linkml-schema-service';
|
||||
import { useLanguage } from '../contexts/LanguageContext';
|
||||
import { useSchemaLoadingProgress } from '../hooks/useSchemaLoadingProgress';
|
||||
import { CustodianTypeBadge } from '../components/uml/CustodianTypeIndicator';
|
||||
|
|
@ -1130,6 +1130,11 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
const [slotExports, setSlotExports] = useState<Record<string, SlotExportInfo>>({});
|
||||
const [loadingSlotExports, setLoadingSlotExports] = useState<Set<string>>(new Set());
|
||||
|
||||
// State for expandable Imports section in slot details (what this slot depends on)
|
||||
const [expandedSlotImports, setExpandedSlotImports] = useState<Set<string>>(new Set());
|
||||
const [slotImports, setSlotImports] = useState<Record<string, SlotImportInfo>>({});
|
||||
const [loadingSlotImports, setLoadingSlotImports] = useState<Set<string>>(new Set());
|
||||
|
||||
// State for expandable UML diagram section in class details
|
||||
const [expandedUML, setExpandedUML] = useState<Set<string>>(new Set());
|
||||
|
||||
|
|
@ -1409,6 +1414,37 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
}
|
||||
}, [slotExports, loadingSlotExports, isSchemaServiceComplete]);
|
||||
|
||||
// Toggle imports section for a slot and load import data on demand
|
||||
const toggleSlotImports = useCallback(async (slotName: string) => {
|
||||
// Toggle expansion state
|
||||
setExpandedSlotImports(prev => {
|
||||
const next = new Set(prev);
|
||||
if (next.has(slotName)) {
|
||||
next.delete(slotName);
|
||||
} else {
|
||||
next.add(slotName);
|
||||
}
|
||||
return next;
|
||||
});
|
||||
|
||||
// Load import data if not already loaded and schema service is ready
|
||||
if (!slotImports[slotName] && !loadingSlotImports.has(slotName) && isSchemaServiceComplete) {
|
||||
setLoadingSlotImports(prev => new Set(prev).add(slotName));
|
||||
try {
|
||||
const importInfo = await linkmlSchemaService.getSlotImportInfo(slotName);
|
||||
setSlotImports(prev => ({ ...prev, [slotName]: importInfo }));
|
||||
} catch (error) {
|
||||
console.error(`Error loading import info for slot ${slotName}:`, error);
|
||||
} finally {
|
||||
setLoadingSlotImports(prev => {
|
||||
const next = new Set(prev);
|
||||
next.delete(slotName);
|
||||
return next;
|
||||
});
|
||||
}
|
||||
}
|
||||
}, [slotImports, loadingSlotImports, isSchemaServiceComplete]);
|
||||
|
||||
// Toggle UML diagram section for a class
|
||||
// Loads both exports AND imports data since UML diagram can show both directions
|
||||
const toggleUML = useCallback(async (className: string) => {
|
||||
|
|
@ -3113,107 +3149,143 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Exports section - Only show for standalone slots (not within class context) */}
|
||||
{!className && (
|
||||
<div className="linkml-viewer__accordion">
|
||||
{/* Imports Section - Shows forward dependencies (what this slot depends on) */}
|
||||
{!className && isSchemaServiceComplete && (
|
||||
<div className="linkml-viewer__imports-section">
|
||||
<button
|
||||
className={`linkml-viewer__accordion-header ${expandedSlotExports.has(slot.name) ? 'linkml-viewer__accordion-header--expanded' : ''}`}
|
||||
onClick={() => toggleSlotExports(slot.name)}
|
||||
className={`linkml-viewer__imports-toggle ${expandedSlotImports.has(slot.name) ? 'linkml-viewer__imports-toggle--expanded' : ''}`}
|
||||
onClick={() => toggleSlotImports(slot.name)}
|
||||
title={t('slotImportsTooltip')}
|
||||
>
|
||||
<span className="linkml-viewer__accordion-icon">
|
||||
{expandedSlotExports.has(slot.name) ? '▼' : '▶'}
|
||||
</span>
|
||||
{t('exports')}
|
||||
{slotExports[slot.name] && (
|
||||
<span className="linkml-viewer__accordion-count">
|
||||
({slotExports[slot.name].classesUsingSlot.length + slotExports[slot.name].classesWithSlotUsage.length})
|
||||
<span className="linkml-viewer__imports-icon">{expandedSlotImports.has(slot.name) ? '▼' : '▶'}</span>
|
||||
<span className="linkml-viewer__label">{t('slotImports')}</span>
|
||||
{slotImports[slot.name] && (
|
||||
<span className="linkml-viewer__imports-count">
|
||||
{(slotImports[slot.name].rangeType ? 1 : 0) + slotImports[slot.name].anyOfTypes.length}
|
||||
</span>
|
||||
)}
|
||||
{loadingSlotExports.has(slot.name) && (
|
||||
<span className="linkml-viewer__accordion-loading">...</span>
|
||||
)}
|
||||
{loadingSlotImports.has(slot.name) && <span className="linkml-viewer__imports-loading">Loading...</span>}
|
||||
</button>
|
||||
|
||||
{expandedSlotExports.has(slot.name) && (
|
||||
<div className="linkml-viewer__accordion-content">
|
||||
{loadingSlotExports.has(slot.name) ? (
|
||||
<div className="linkml-viewer__loading-text">{t('loading')}...</div>
|
||||
) : slotExports[slot.name] ? (
|
||||
<>
|
||||
{/* Range Type - Navigation to class/enum */}
|
||||
{slotExports[slot.name].rangeType && (
|
||||
<div className="linkml-viewer__export-section">
|
||||
<span className="linkml-viewer__label">{t('range')}</span>
|
||||
<button
|
||||
className="linkml-viewer__link-button"
|
||||
{expandedSlotImports.has(slot.name) && slotImports[slot.name] && (
|
||||
<div className="linkml-viewer__imports-content">
|
||||
{/* Range Type (main dependency) */}
|
||||
{slotImports[slot.name].rangeType && (
|
||||
<div className="linkml-viewer__imports-category">
|
||||
<span className="linkml-viewer__imports-category-label">{t('rangeTypeLabel')}</span>
|
||||
<div className="linkml-viewer__imports-list">
|
||||
<button
|
||||
className="linkml-viewer__imports-link"
|
||||
onClick={() => {
|
||||
const range = slotImports[slot.name].rangeType!;
|
||||
if (range.isClass) {
|
||||
navigateToClass(range.name);
|
||||
} else if (range.isEnum) {
|
||||
navigateToEnum(range.name);
|
||||
}
|
||||
}}
|
||||
>
|
||||
{slotImports[slot.name].rangeType!.name}
|
||||
{!slotImports[slot.name].rangeType!.isClass && <span className="linkml-viewer__imports-badge linkml-viewer__imports-badge--enum">enum</span>}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* Any_of Types (union types) */}
|
||||
{slotImports[slot.name].anyOfTypes.length > 0 && (
|
||||
<div className="linkml-viewer__imports-category">
|
||||
<span className="linkml-viewer__imports-category-label">{t('anyOfTypesLabel')} ({slotImports[slot.name].anyOfTypes.length})</span>
|
||||
<div className="linkml-viewer__imports-list">
|
||||
{slotImports[slot.name].anyOfTypes.map(type => (
|
||||
<button
|
||||
key={type.name}
|
||||
className="linkml-viewer__imports-link"
|
||||
onClick={() => {
|
||||
const range = slotExports[slot.name].rangeType!;
|
||||
if (range.isClass) {
|
||||
navigateToClass(range.name);
|
||||
} else if (range.isEnum) {
|
||||
navigateToEnum(range.name);
|
||||
if (type.isClass) {
|
||||
navigateToClass(type.name);
|
||||
} else if (type.isEnum) {
|
||||
navigateToEnum(type.name);
|
||||
}
|
||||
}}
|
||||
title={`Navigate to ${slotExports[slot.name].rangeType!.isClass ? 'class' : 'enum'}: ${slotExports[slot.name].rangeType!.name}`}
|
||||
>
|
||||
<code>{slotExports[slot.name].rangeType!.name}</code>
|
||||
<span className="linkml-viewer__badge linkml-viewer__badge--small">
|
||||
{slotExports[slot.name].rangeType!.isClass ? 'class' : 'enum'}
|
||||
</span>
|
||||
{type.name}
|
||||
{!type.isClass && <span className="linkml-viewer__imports-badge linkml-viewer__imports-badge--enum">enum</span>}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Classes that use this slot */}
|
||||
{slotExports[slot.name].classesUsingSlot.length > 0 && (
|
||||
<div className="linkml-viewer__export-section">
|
||||
<span className="linkml-viewer__label">{t('classesUsingSlot')}</span>
|
||||
<div className="linkml-viewer__export-list">
|
||||
{slotExports[slot.name].classesUsingSlot.map(cls => (
|
||||
<button
|
||||
key={cls}
|
||||
className="linkml-viewer__link-button"
|
||||
onClick={() => navigateToClass(cls)}
|
||||
title={`Navigate to class: ${cls}`}
|
||||
>
|
||||
{cls}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Classes with slot_usage overrides */}
|
||||
{slotExports[slot.name].classesWithSlotUsage.length > 0 && (
|
||||
<div className="linkml-viewer__export-section">
|
||||
<span className="linkml-viewer__label">{t('classesWithSlotUsage')}</span>
|
||||
<div className="linkml-viewer__export-list">
|
||||
{slotExports[slot.name].classesWithSlotUsage.map(({ className: cls, overrides }) => (
|
||||
<button
|
||||
key={cls}
|
||||
className="linkml-viewer__link-button linkml-viewer__link-button--with-meta"
|
||||
onClick={() => navigateToClass(cls)}
|
||||
title={`Navigate to class: ${cls}\nOverrides: ${overrides.join(', ')}`}
|
||||
>
|
||||
{cls}
|
||||
<span className="linkml-viewer__badge linkml-viewer__badge--usage linkml-viewer__badge--small">
|
||||
{overrides.length} {overrides.length === 1 ? 'override' : 'overrides'}
|
||||
</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Empty state */}
|
||||
{slotExports[slot.name].classesUsingSlot.length === 0 &&
|
||||
slotExports[slot.name].classesWithSlotUsage.length === 0 && (
|
||||
<div className="linkml-viewer__export-empty">
|
||||
{t('noClassesUsingSlot')}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
) : null}
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* No imports message */}
|
||||
{!slotImports[slot.name].rangeType && slotImports[slot.name].anyOfTypes.length === 0 && (
|
||||
<div className="linkml-viewer__imports-empty">
|
||||
{t('noSlotImports')}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Exports Section - Shows reverse dependencies (what classes use this slot) */}
|
||||
{!className && isSchemaServiceComplete && (
|
||||
<div className="linkml-viewer__exports-section">
|
||||
<button
|
||||
className={`linkml-viewer__exports-toggle ${expandedSlotExports.has(slot.name) ? 'linkml-viewer__exports-toggle--expanded' : ''}`}
|
||||
onClick={() => toggleSlotExports(slot.name)}
|
||||
title={t('slotExportsTooltip')}
|
||||
>
|
||||
<span className="linkml-viewer__exports-icon">{expandedSlotExports.has(slot.name) ? '▼' : '▶'}</span>
|
||||
<span className="linkml-viewer__label">{t('slotExports')}</span>
|
||||
{slotExports[slot.name] && (
|
||||
<span className="linkml-viewer__exports-count">
|
||||
{slotExports[slot.name].classesUsingSlot.length + slotExports[slot.name].classesWithSlotUsage.length}
|
||||
</span>
|
||||
)}
|
||||
{loadingSlotExports.has(slot.name) && <span className="linkml-viewer__exports-loading">Loading...</span>}
|
||||
</button>
|
||||
{expandedSlotExports.has(slot.name) && slotExports[slot.name] && (
|
||||
<div className="linkml-viewer__exports-content">
|
||||
{/* Classes that use this slot */}
|
||||
{slotExports[slot.name].classesUsingSlot.length > 0 && (
|
||||
<div className="linkml-viewer__exports-category">
|
||||
<span className="linkml-viewer__exports-category-label">{t('classesUsingSlot')} ({slotExports[slot.name].classesUsingSlot.length})</span>
|
||||
<div className="linkml-viewer__exports-list">
|
||||
{slotExports[slot.name].classesUsingSlot.map(cls => (
|
||||
<button
|
||||
key={cls}
|
||||
className="linkml-viewer__exports-link"
|
||||
onClick={() => navigateToClass(cls)}
|
||||
>
|
||||
{cls}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* Classes with slot_usage overrides */}
|
||||
{slotExports[slot.name].classesWithSlotUsage.length > 0 && (
|
||||
<div className="linkml-viewer__exports-category">
|
||||
<span className="linkml-viewer__exports-category-label">{t('classesWithSlotUsage')} ({slotExports[slot.name].classesWithSlotUsage.length})</span>
|
||||
<div className="linkml-viewer__exports-list">
|
||||
{slotExports[slot.name].classesWithSlotUsage.map(({ className: cls, overrides }) => (
|
||||
<button
|
||||
key={cls}
|
||||
className="linkml-viewer__exports-link"
|
||||
onClick={() => navigateToClass(cls)}
|
||||
title={`Overrides: ${overrides.join(', ')}`}
|
||||
>
|
||||
{cls} <span className="linkml-viewer__exports-via">{overrides.length} {overrides.length === 1 ? 'override' : 'overrides'}</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{/* No exports message */}
|
||||
{slotExports[slot.name].classesUsingSlot.length === 0 &&
|
||||
slotExports[slot.name].classesWithSlotUsage.length === 0 && (
|
||||
<div className="linkml-viewer__exports-empty">
|
||||
{t('noClassesUsingSlot')}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"generated": "2026-01-14T12:09:32.173Z",
|
||||
"generated": "2026-01-14T12:28:33.699Z",
|
||||
"schemaRoot": "/schemas/20251121/linkml",
|
||||
"totalFiles": 2884,
|
||||
"categoryCounts": {
|
||||
|
|
|
|||
72
schemas/20251121/linkml/modules/classes/Approver.yaml
Normal file
72
schemas/20251121/linkml/modules/classes/Approver.yaml
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
id: https://nde.nl/ontology/hc/class/Approver
|
||||
name: approver_class
|
||||
title: Approver Class
|
||||
imports:
|
||||
- linkml:types
|
||||
- ../slots/has_or_had_label
|
||||
- ../slots/has_or_had_identifier
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
foaf: http://xmlns.com/foaf/0.1/
|
||||
default_prefix: hc
|
||||
classes:
|
||||
Approver:
|
||||
class_uri: prov:Agent
|
||||
description: >-
|
||||
An agent (person or organization) that approves or authorized something.
|
||||
|
||||
**DEFINITION**:
|
||||
|
||||
Approver represents the agent responsible for approving decisions,
|
||||
policies, budgets, or other organizational actions. This replaces
|
||||
simple string fields like `approved_by` with a structured class
|
||||
that can link to person or organization entities.
|
||||
|
||||
**ONTOLOGY ALIGNMENT**:
|
||||
|
||||
- PROV-O: `prov:Agent` - entity that bears responsibility
|
||||
- FOAF: `foaf:Agent` - agent (person or organization)
|
||||
- Schema.org: `schema:Person` or `schema:Organization`
|
||||
|
||||
**USE CASES**:
|
||||
|
||||
1. **Budget Approval**: Who approved the budget allocation
|
||||
2. **Policy Approval**: Who authorized the policy
|
||||
3. **Decision Records**: Documenting approval chains
|
||||
|
||||
exact_mappings:
|
||||
- prov:Agent
|
||||
close_mappings:
|
||||
- foaf:Agent
|
||||
- schema:Person
|
||||
- schema:Organization
|
||||
slots:
|
||||
- has_or_had_label
|
||||
- has_or_had_identifier
|
||||
slot_usage:
|
||||
has_or_had_label:
|
||||
range: string
|
||||
examples:
|
||||
- value: "Board of Directors"
|
||||
description: Organizational approver
|
||||
- value: "Museum Director"
|
||||
description: Role-based approver
|
||||
has_or_had_identifier:
|
||||
range: uriorcurie
|
||||
examples:
|
||||
- value: "https://nde.nl/ontology/hc/person/jan-de-vries"
|
||||
description: Link to person entity
|
||||
comments:
|
||||
- Generic approver class for approval provenance
|
||||
- Can represent individuals or organizational bodies
|
||||
- Aligns with PROV-O Agent for provenance tracking
|
||||
see_also:
|
||||
- https://www.w3.org/TR/prov-o/#Agent
|
||||
examples:
|
||||
- value:
|
||||
has_or_had_label: "Museum Director"
|
||||
has_or_had_identifier: "https://nde.nl/ontology/hc/person/example-director"
|
||||
description: Individual approver
|
||||
|
|
@ -20,7 +20,7 @@ imports:
|
|||
- ../slots/technology_stack
|
||||
- ../slots/funding_source
|
||||
- ../slots/has_or_had_powered_by_cm
|
||||
- ../slots/archived_at
|
||||
- ../slots/is_or_was_archived_at # MIGRATED: was ../slots/archived_at (2026-01-15)
|
||||
- ../slots/serves_finding_aid
|
||||
- ../slots/has_or_had_data_service_endpoint
|
||||
- ../slots/has_or_had_documentation # MIGRATED: was ../slots/api_documentation (2026-01-15)
|
||||
|
|
@ -113,7 +113,7 @@ classes:
|
|||
slots:
|
||||
- has_or_had_documentation # MIGRATED: was api_documentation (2026-01-15)
|
||||
- has_or_had_archival_status
|
||||
- archived_at
|
||||
- is_or_was_archived_at # MIGRATED: was archived_at (2026-01-15)
|
||||
- has_or_had_identifier
|
||||
- has_auxiliary_platform_type
|
||||
- cms_detected
|
||||
|
|
@ -266,7 +266,8 @@ classes:
|
|||
description: Still accessible but not maintained
|
||||
- value: MIGRATED
|
||||
description: Content migrated to successor platform
|
||||
was_archived_at:
|
||||
is_or_was_archived_at: # MIGRATED: was was_archived_at (2026-01-15)
|
||||
description: URL where this platform is or was archived (e.g., Wayback Machine)
|
||||
range: uri
|
||||
examples:
|
||||
- value: https://web.archive.org/web/20211231/https://example.nl/exhibition/
|
||||
|
|
|
|||
108
schemas/20251121/linkml/modules/classes/BOLDIdentifier.yaml
Normal file
108
schemas/20251121/linkml/modules/classes/BOLDIdentifier.yaml
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
id: https://nde.nl/ontology/hc/class/BOLDIdentifier
|
||||
name: bold_identifier_class
|
||||
title: BOLD Identifier Class
|
||||
description: >-
|
||||
Barcode of Life Data System (BOLD) identifier for biological specimens.
|
||||
|
||||
Links heritage biological objects to their DNA barcode records in BOLD.
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
dcterms: http://purl.org/dc/terms/
|
||||
schema: http://schema.org/
|
||||
|
||||
default_prefix: hc
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ../slots/id
|
||||
- ../slots/identifier_value
|
||||
- ../slots/identifier_url
|
||||
- ../slots/description
|
||||
- ../slots/specificity_annotation
|
||||
- ../slots/template_specificity
|
||||
- ./SpecificityAnnotation
|
||||
- ./TemplateSpecificityScores
|
||||
|
||||
classes:
|
||||
BOLDIdentifier:
|
||||
class_uri: schema:PropertyValue
|
||||
description: >-
|
||||
A Barcode of Life Data System (BOLD) identifier linking a biological
|
||||
specimen to its DNA barcode record.
|
||||
|
||||
**WHAT IS BOLD?**
|
||||
|
||||
BOLD (Barcode of Life Data System) is an online workbench and database
|
||||
for DNA barcoding. It stores specimen data and DNA barcode sequences,
|
||||
enabling species identification through DNA.
|
||||
|
||||
**USE CASES**:
|
||||
|
||||
1. **Specimen Identification**: Link natural history specimens to DNA data
|
||||
2. **Species Verification**: Cross-reference morphological IDs with DNA barcodes
|
||||
3. **Research Provenance**: Document genetic sampling of collection objects
|
||||
|
||||
**IDENTIFIER FORMAT**:
|
||||
|
||||
BOLD identifiers follow the pattern: BOLD:XXXNNN
|
||||
- Process IDs: BOLD:AAA0001
|
||||
- Sample IDs: Institution-specific prefixes
|
||||
|
||||
**EXTERNAL LINKS**:
|
||||
|
||||
- BOLD Systems: https://boldsystems.org/
|
||||
- Record URL pattern: https://boldsystems.org/index.php/Public_RecordView?processid={id}
|
||||
|
||||
exact_mappings:
|
||||
- schema:PropertyValue
|
||||
|
||||
close_mappings:
|
||||
- dcterms:identifier
|
||||
|
||||
slots:
|
||||
- id
|
||||
- identifier_value
|
||||
- identifier_url
|
||||
- description
|
||||
- specificity_annotation
|
||||
- template_specificity
|
||||
|
||||
slot_usage:
|
||||
id:
|
||||
identifier: true
|
||||
required: true
|
||||
range: uriorcurie
|
||||
pattern: ^https://nde\.nl/ontology/hc/bold-id/[A-Z0-9-]+$
|
||||
examples:
|
||||
- value: https://nde.nl/ontology/hc/bold-id/NLNAT001-21
|
||||
description: Dutch natural history specimen BOLD ID
|
||||
identifier_value:
|
||||
description: The BOLD process ID or sample ID value.
|
||||
range: string
|
||||
required: true
|
||||
pattern: ^[A-Z]{2,5}[0-9]{3,7}(-[0-9]{2})?$
|
||||
examples:
|
||||
- value: NLNAT001-21
|
||||
description: Netherlands natural history specimen 2021
|
||||
- value: GBMIN12345-19
|
||||
description: UK specimen from 2019
|
||||
identifier_url:
|
||||
description: URL to the BOLD record page.
|
||||
range: uri
|
||||
examples:
|
||||
- value: https://boldsystems.org/index.php/Public_RecordView?processid=NLNAT001-21
|
||||
|
||||
comments:
|
||||
- Used for DNA barcode identifiers in natural history collections
|
||||
- Links physical specimens to molecular data
|
||||
- Part of global biodiversity identification infrastructure
|
||||
|
||||
examples:
|
||||
- value:
|
||||
id: https://nde.nl/ontology/hc/bold-id/NLNAT001-21
|
||||
identifier_value: NLNAT001-21
|
||||
identifier_url: https://boldsystems.org/index.php/Public_RecordView?processid=NLNAT001-21
|
||||
description: DNA barcode for Naturalis specimen
|
||||
description: BOLD identifier for a Dutch natural history specimen
|
||||
|
|
@ -21,7 +21,9 @@ imports:
|
|||
# associated_taxa REMOVED - migrated to is_or_was_associated_with (Rule 53)
|
||||
- ../slots/is_or_was_associated_with
|
||||
- ./Taxon
|
||||
- ../slots/bold_id
|
||||
# bold_id REMOVED - migrated to has_or_had_identifier with BOLDIdentifier class (Rule 53)
|
||||
- ../slots/has_or_had_identifier
|
||||
- ./BOLDIdentifier
|
||||
- ../slots/cites_appendix
|
||||
- ../slots/collection_date
|
||||
- ../slots/collection_locality_text
|
||||
|
|
@ -104,7 +106,8 @@ classes:
|
|||
slots:
|
||||
# associated_taxa REMOVED - migrated to is_or_was_associated_with (Rule 53)
|
||||
- is_or_was_associated_with
|
||||
- bold_id
|
||||
# bold_id REMOVED - migrated to has_or_had_identifier with BOLDIdentifier (Rule 53)
|
||||
- has_or_had_identifier
|
||||
- cites_appendix
|
||||
- collection_date
|
||||
- collection_locality_text
|
||||
|
|
@ -152,6 +155,19 @@ classes:
|
|||
examples:
|
||||
- value: https://nde.nl/ontology/hc/taxon/raphus-cucullatus
|
||||
description: Associated with Dodo taxon
|
||||
has_or_had_identifier:
|
||||
description: >-
|
||||
MIGRATED from bold_id (Rule 53).
|
||||
BOLD (Barcode of Life Data System) identifier for DNA barcode records.
|
||||
Range narrowed to BOLDIdentifier class.
|
||||
range: BOLDIdentifier
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
examples:
|
||||
- value:
|
||||
id: https://nde.nl/ontology/hc/bold-id/NLNAT001-21
|
||||
identifier_value: NLNAT001-21
|
||||
description: BOLD identifier for specimen
|
||||
taxon_name:
|
||||
required: true
|
||||
range: string
|
||||
|
|
|
|||
119
schemas/20251121/linkml/modules/classes/Bookplate.yaml
Normal file
119
schemas/20251121/linkml/modules/classes/Bookplate.yaml
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
id: https://nde.nl/ontology/hc/class/Bookplate
|
||||
name: bookplate_class
|
||||
title: Bookplate Class
|
||||
description: >-
|
||||
Bookplate (ex libris) marking ownership of a book or manuscript.
|
||||
|
||||
Records provenance information through ownership marks in heritage library items.
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
crm: http://www.cidoc-crm.org/cidoc-crm/
|
||||
bf: http://id.loc.gov/ontologies/bibframe/
|
||||
|
||||
default_prefix: hc
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ../slots/id
|
||||
- ../slots/has_or_had_label
|
||||
- ../slots/description
|
||||
- ../slots/has_or_had_owner
|
||||
- ../slots/specificity_annotation
|
||||
- ../slots/template_specificity
|
||||
- ./SpecificityAnnotation
|
||||
- ./TemplateSpecificityScores
|
||||
|
||||
classes:
|
||||
Bookplate:
|
||||
class_uri: bf:Bookplate
|
||||
description: >-
|
||||
A bookplate (ex libris) or ownership mark found in a book, manuscript,
|
||||
or other library material.
|
||||
|
||||
**WHAT IS A BOOKPLATE?**
|
||||
|
||||
A bookplate is a printed or decorative label pasted inside a book,
|
||||
typically on the front endpaper, indicating ownership. Also known
|
||||
as "ex libris" (Latin: "from the books of").
|
||||
|
||||
**PROVENANCE SIGNIFICANCE**:
|
||||
|
||||
Bookplates are crucial for provenance research:
|
||||
- Document historical ownership chains
|
||||
- Connect items to notable collectors
|
||||
- Evidence of institutional vs. personal ownership
|
||||
- May indicate stolen/looted heritage
|
||||
|
||||
**USE CASES**:
|
||||
|
||||
1. **Provenance Research**: Track ownership history of rare books
|
||||
2. **Collection Documentation**: Record all bookplates in a volume
|
||||
3. **Restitution Claims**: Document pre-war ownership evidence
|
||||
|
||||
**TYPES OF BOOKPLATES**:
|
||||
|
||||
- Printed pictorial bookplates
|
||||
- Armorial bookplates (with coat of arms)
|
||||
- Typographic bookplates (text only)
|
||||
- Stamps and ink marks
|
||||
- Manuscript ownership inscriptions
|
||||
|
||||
exact_mappings:
|
||||
- bf:Bookplate
|
||||
|
||||
close_mappings:
|
||||
- crm:E37_Mark
|
||||
- schema:Thing
|
||||
|
||||
slots:
|
||||
- id
|
||||
- has_or_had_label
|
||||
- description
|
||||
- has_or_had_owner
|
||||
- specificity_annotation
|
||||
- template_specificity
|
||||
|
||||
slot_usage:
|
||||
id:
|
||||
identifier: true
|
||||
required: true
|
||||
range: uriorcurie
|
||||
pattern: ^https://nde\.nl/ontology/hc/bookplate/[a-z0-9-]+$
|
||||
examples:
|
||||
- value: https://nde.nl/ontology/hc/bookplate/kb-exlibris-001
|
||||
description: KB bookplate record
|
||||
has_or_had_label:
|
||||
description: Text or name on the bookplate.
|
||||
range: string
|
||||
required: true
|
||||
examples:
|
||||
- value: "Ex Libris Johann Wolfgang von Goethe"
|
||||
description: Goethe's bookplate
|
||||
- value: "Bibliotheca Regia"
|
||||
description: Royal library bookplate
|
||||
description:
|
||||
range: string
|
||||
examples:
|
||||
- value: Armorial bookplate with three lions, gilt border, 18th century
|
||||
has_or_had_owner:
|
||||
description: Person or institution who owned the book according to this bookplate.
|
||||
range: string
|
||||
examples:
|
||||
- value: Johann Wolfgang von Goethe
|
||||
- value: Royal Library of Prussia
|
||||
|
||||
comments:
|
||||
- Used for provenance research in rare book collections
|
||||
- Links library items to historical owners
|
||||
- Multiple bookplates may appear in single volume
|
||||
|
||||
examples:
|
||||
- value:
|
||||
id: https://nde.nl/ontology/hc/bookplate/kb-exlibris-goethe-001
|
||||
has_or_had_label: "Ex Libris J.W. von Goethe"
|
||||
description: Armorial bookplate with oak wreath, early 19th century
|
||||
has_or_had_owner: Johann Wolfgang von Goethe
|
||||
description: Goethe bookplate in rare book collection
|
||||
|
|
@ -26,7 +26,9 @@ imports:
|
|||
- ../slots/has_or_had_description
|
||||
- ../slots/has_or_had_provenance
|
||||
- ../slots/has_or_had_type
|
||||
- ../slots/bookplate
|
||||
# bookplate REMOVED - migrated to includes_or_included with Bookplate class (Rule 53)
|
||||
- ../slots/includes_or_included
|
||||
- ./Bookplate
|
||||
- ./BindingType
|
||||
- ../slots/call_number
|
||||
- ../slots/carrier_type
|
||||
|
|
@ -112,7 +114,8 @@ classes:
|
|||
- has_or_had_description
|
||||
- has_or_had_provenance
|
||||
- has_or_had_type
|
||||
- bookplate
|
||||
# bookplate REMOVED - migrated to includes_or_included (Rule 53)
|
||||
- includes_or_included
|
||||
- call_number
|
||||
- carrier_type
|
||||
- carries_information
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ prefixes:
|
|||
imports:
|
||||
- linkml:types
|
||||
- ./WebClaim
|
||||
- ../slots/archived_at
|
||||
- ../slots/is_or_was_is_or_was_archived_at # MIGRATED: was ../slots/is_or_was_archived_at (2026-01-15)
|
||||
- ../slots/extraction_confidence
|
||||
- ../slots/extraction_note
|
||||
- ../slots/source_url
|
||||
|
|
@ -58,7 +58,7 @@ classes:
|
|||
\ DETECTION**:\n\nWebObservation supports tracking changes over time:\n- Link to `previous_observation` for same URL\n\
|
||||
- `content_changed` flag for quick change detection\n- `content_hash` for integrity verification\n- Compare `last_modified`\
|
||||
\ and `etag` across observations\n\n**ARCHIVAL INTEGRATION**:\n\nFor long-term preservation, link to archived copies:\n\
|
||||
- `archived_at` can point to Wayback Machine, Archive.today, etc.\n- Ensures cited web content remains accessible\n\n\
|
||||
- `is_or_was_archived_at` can point to Wayback Machine, Archive.today, etc.\n- Ensures cited web content remains accessible\n\n\
|
||||
**EXAMPLES**:\n\n1. **EU Funding Portal Observation**\n - source_url: https://ec.europa.eu/.../topic-details/horizon-cl2-2025-heritage-01\n\
|
||||
\ - retrieved_on: 2025-11-29T10:30:00Z\n - retrieved_by: \"glam-harvester/1.0\"\n - extraction_confidence: 0.95\n\
|
||||
\ \n2. **Heritage Organisation Website**\n - source_url: https://www.heritagefund.org.uk/funding/medium-grants\n\
|
||||
|
|
@ -75,7 +75,7 @@ classes:
|
|||
- pav:sourceAccessedAt
|
||||
- dcterms:source
|
||||
slots:
|
||||
- archived_at
|
||||
- is_or_was_is_or_was_archived_at # MIGRATED: was is_or_was_archived_at (2026-01-15)
|
||||
- claim
|
||||
- content_changed
|
||||
- content_hash
|
||||
|
|
@ -99,7 +99,7 @@ classes:
|
|||
- WebObservation is a prov:Activity documenting web content retrieval
|
||||
- Integrates PROV-O for provenance and PAV for retrieval-specific properties
|
||||
- Supports change detection via content_hash, previous_observation, content_changed
|
||||
- Links to archived copies via archived_at for long-term citation
|
||||
- Links to archived copies via is_or_was_archived_at for long-term citation
|
||||
- observed_entities links observation to extracted data (prov:generated)
|
||||
see_also:
|
||||
- https://www.w3.org/TR/prov-o/
|
||||
|
|
@ -121,7 +121,7 @@ classes:
|
|||
stated. Eligibility criteria parsed from HTML sections.
|
||||
observed_entity:
|
||||
- https://nde.nl/ontology/hc/call/ec/cl2-2025-heritage-01
|
||||
archived_at: https://web.archive.org/web/20251129103000/https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/horizon-cl2-2025-heritage-01
|
||||
is_or_was_archived_at: https://web.archive.org/web/20251129103000/https://ec.europa.eu/info/funding-tenders/opportunities/portal/screen/opportunities/topic-details/horizon-cl2-2025-heritage-01
|
||||
description: Web observation of Horizon Europe CL2 2025 heritage call
|
||||
- value:
|
||||
observation_id: https://nde.nl/ontology/hc/observation/web/2025-11-28/nlhf-medium-grants
|
||||
|
|
|
|||
|
|
@ -0,0 +1,44 @@
|
|||
id: https://nde.nl/ontology/hc/slot/is_or_was_approved_by
|
||||
name: is_or_was_approved_by_slot
|
||||
title: Is Or Was Approved By Slot
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
schema: http://schema.org/
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
is_or_was_approved_by:
|
||||
description: >-
|
||||
The agent (person or organization) that approved or authorized something.
|
||||
|
||||
**SEMANTIC PATTERN**:
|
||||
|
||||
This slot follows the RiC-O temporal predicate pattern (is_or_was_*)
|
||||
to indicate that approval is a historical event - something was approved
|
||||
by someone at some point in time.
|
||||
|
||||
**REPLACES**:
|
||||
|
||||
- `approved_by` - Simple string field for approver name
|
||||
|
||||
**RANGE OPTIONS**:
|
||||
|
||||
- string: Simple approver name (backwards compatible)
|
||||
- Approver: Structured approver with identity link
|
||||
|
||||
Classes should use slot_usage to specify appropriate range.
|
||||
|
||||
slot_uri: prov:wasAttributedTo
|
||||
range: string
|
||||
exact_mappings:
|
||||
- prov:wasAttributedTo
|
||||
close_mappings:
|
||||
- schema:author
|
||||
examples:
|
||||
- value: "Museum Director"
|
||||
description: Role-based approver
|
||||
- value: "Board of Directors"
|
||||
description: Organizational body approver
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
id: https://nde.nl/ontology/hc/slot/is_or_was_archived_at
|
||||
name: is_or_was_archived_at_slot
|
||||
title: Is Or Was Archived At Slot
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
hc: https://nde.nl/ontology/hc/
|
||||
schema: http://schema.org/
|
||||
prov: http://www.w3.org/ns/prov#
|
||||
imports:
|
||||
- linkml:types
|
||||
default_prefix: hc
|
||||
slots:
|
||||
is_or_was_archived_at:
|
||||
description: >-
|
||||
Location or URL where content was archived or preserved.
|
||||
|
||||
**SEMANTIC PATTERN**:
|
||||
|
||||
This slot follows the RiC-O temporal predicate pattern (is_or_was_*)
|
||||
to indicate that archival location may change over time or refer to
|
||||
historical archival events.
|
||||
|
||||
**REPLACES**:
|
||||
|
||||
- `archived_at` - URL to archived version (e.g., Wayback Machine)
|
||||
- `was_archived_at` - Similar pattern
|
||||
|
||||
**USE CASES**:
|
||||
|
||||
1. **Web Archival**: Link to Internet Archive/Wayback Machine snapshots
|
||||
2. **Platform Preservation**: Where deprecated platforms are preserved
|
||||
3. **Content Snapshots**: Historical versions of web content
|
||||
|
||||
slot_uri: schema:archivedAt
|
||||
range: uri
|
||||
exact_mappings:
|
||||
- schema:archivedAt
|
||||
close_mappings:
|
||||
- prov:atLocation
|
||||
examples:
|
||||
- value: "https://web.archive.org/web/20211231/https://example.nl/exhibition/"
|
||||
description: Wayback Machine archived URL
|
||||
- value: "https://archive.org/details/example-collection"
|
||||
description: Internet Archive collection
|
||||
|
|
@ -44,7 +44,6 @@
|
|||
"bio_type_classification.yaml",
|
||||
"birth_date.yaml",
|
||||
"birth_place.yaml",
|
||||
"bold_id.yaml",
|
||||
"booking_required.yaml",
|
||||
"bookplate.yaml",
|
||||
"borrower.yaml",
|
||||
|
|
|
|||
|
|
@ -851,10 +851,14 @@ fixes:
|
|||
|
||||
- original_slot_id: https://nde.nl/ontology/hc/slot/bold_id
|
||||
processed:
|
||||
status: false
|
||||
timestamp: null
|
||||
session: null
|
||||
notes: "Requires BOLDIdentifier class creation"
|
||||
status: true
|
||||
timestamp: '2026-01-14T23:00:00Z'
|
||||
session: "session-2026-01-14-identifier-migration"
|
||||
notes: >-
|
||||
FULLY MIGRATED: BiologicalObject - bold_id REMOVED.
|
||||
Created BOLDIdentifier class (Barcode of Life Data System identifier).
|
||||
Replaced with has_or_had_identifier slot with range BOLDIdentifier.
|
||||
Slot archived to modules/slots/archive/bold_id_archived_20260114.yaml (Rule 53).
|
||||
revision:
|
||||
- label: has_or_had_identifier
|
||||
type: slot
|
||||
|
|
|
|||
|
|
@ -32,12 +32,17 @@ from pydantic import BaseModel
|
|||
# Linkup API configuration
|
||||
LINKUP_API_KEY = os.getenv("LINKUP_API_KEY", "")
|
||||
LINKUP_API_URL = "https://api.linkup.so/v1/search"
|
||||
LINKUP_FETCH_URL = "https://api.linkup.so/v1/fetch"
|
||||
|
||||
# Exa API configuration for LinkedIn profile extraction
|
||||
EXA_API_KEY = os.getenv("EXA_API_KEY", "")
|
||||
EXA_API_URL = "https://api.exa.ai/contents"
|
||||
ENTITY_DIR = Path(os.getenv("ENTITY_DIR", "/Users/kempersc/apps/glam/data/custodian/person/entity"))
|
||||
|
||||
# LinkedIn profile fetch provider configuration
|
||||
# Options: "exa", "linkup", "exa,linkup" (try exa first, fallback to linkup), "linkup,exa"
|
||||
LINKEDIN_FETCH_PROVIDERS = os.getenv("LINKEDIN_FETCH_PROVIDERS", "exa,linkup")
|
||||
|
||||
# Email semantics for on-demand analysis
|
||||
try:
|
||||
from glam_extractor.entity_resolution.email_semantics import parse_email_semantics
|
||||
|
|
@ -1075,7 +1080,7 @@ async def linkup_search(request: LinkupSearchRequest):
|
|||
|
||||
|
||||
# ============================================================================
|
||||
# LinkedIn Profile Extraction via Exa API
|
||||
# LinkedIn Profile Extraction (Exa and Linkup providers)
|
||||
# ============================================================================
|
||||
|
||||
import re as regex_module # Avoid shadowing
|
||||
|
|
@ -1087,6 +1092,7 @@ async def fetch_linkedin_profile_exa(linkedin_url: str) -> Optional[dict]:
|
|||
Returns parsed profile data or None if extraction fails.
|
||||
"""
|
||||
if not EXA_API_KEY:
|
||||
print("Exa API: No API key configured")
|
||||
return None
|
||||
|
||||
try:
|
||||
|
|
@ -1111,13 +1117,15 @@ async def fetch_linkedin_profile_exa(linkedin_url: str) -> Optional[dict]:
|
|||
|
||||
data = response.json()
|
||||
if not data.get('results') or len(data['results']) == 0:
|
||||
print("Exa API: No results returned")
|
||||
return None
|
||||
|
||||
result = data['results'][0]
|
||||
return {
|
||||
'raw_result': result,
|
||||
'request_id': data.get('requestId', ''),
|
||||
'cost': data.get('costDollars', {}).get('total', 0)
|
||||
'cost': data.get('costDollars', {}).get('total', 0),
|
||||
'provider': 'exa'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -1125,6 +1133,98 @@ async def fetch_linkedin_profile_exa(linkedin_url: str) -> Optional[dict]:
|
|||
return None
|
||||
|
||||
|
||||
async def fetch_linkedin_profile_linkup(linkedin_url: str) -> Optional[dict]:
|
||||
"""
|
||||
Fetch LinkedIn profile data using Linkup Fetch API.
|
||||
|
||||
Returns parsed profile data or None if extraction fails.
|
||||
"""
|
||||
if not LINKUP_API_KEY:
|
||||
print("Linkup API: No API key configured")
|
||||
return None
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
LINKUP_FETCH_URL,
|
||||
headers={
|
||||
"Authorization": f"Bearer {LINKUP_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"url": linkedin_url,
|
||||
"outputType": "markdown"
|
||||
},
|
||||
timeout=60.0
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"Linkup API error: HTTP {response.status_code}")
|
||||
return None
|
||||
|
||||
data = response.json()
|
||||
content = data.get('content', '')
|
||||
|
||||
if not content:
|
||||
print("Linkup API: No content returned")
|
||||
return None
|
||||
|
||||
# Transform to Exa-like format for consistent parsing
|
||||
return {
|
||||
'raw_result': {
|
||||
'text': content,
|
||||
'url': linkedin_url,
|
||||
'title': data.get('title', ''),
|
||||
'author': '', # Will be extracted from content
|
||||
'image': ''
|
||||
},
|
||||
'request_id': '',
|
||||
'cost': 0, # Linkup doesn't report cost per request
|
||||
'provider': 'linkup'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Linkup API exception: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def fetch_linkedin_profile(linkedin_url: str) -> Optional[dict]:
|
||||
"""
|
||||
Fetch LinkedIn profile using configured providers with fallback.
|
||||
|
||||
Uses LINKEDIN_FETCH_PROVIDERS setting to determine order.
|
||||
Examples:
|
||||
"exa" - Use only Exa
|
||||
"linkup" - Use only Linkup
|
||||
"exa,linkup" - Try Exa first, fallback to Linkup
|
||||
"linkup,exa" - Try Linkup first, fallback to Exa
|
||||
|
||||
Returns the raw response with 'provider' field indicating which was used.
|
||||
"""
|
||||
providers = [p.strip().lower() for p in LINKEDIN_FETCH_PROVIDERS.split(',')]
|
||||
|
||||
for provider in providers:
|
||||
print(f"Trying LinkedIn profile fetch with: {provider}")
|
||||
|
||||
if provider == 'exa':
|
||||
result = await fetch_linkedin_profile_exa(linkedin_url)
|
||||
if result:
|
||||
print(f"Successfully fetched profile with Exa")
|
||||
return result
|
||||
|
||||
elif provider == 'linkup':
|
||||
result = await fetch_linkedin_profile_linkup(linkedin_url)
|
||||
if result:
|
||||
print(f"Successfully fetched profile with Linkup")
|
||||
return result
|
||||
|
||||
else:
|
||||
print(f"Unknown provider: {provider}")
|
||||
|
||||
print(f"All providers failed for: {linkedin_url}")
|
||||
return None
|
||||
|
||||
|
||||
def parse_linkedin_profile_from_exa(raw_data: dict) -> dict:
|
||||
"""Parse Exa response into structured profile data."""
|
||||
result = raw_data.get('raw_result', {})
|
||||
|
|
@ -1283,7 +1383,7 @@ async def save_entity_profile(
|
|||
"source_file": "manual_add_candidate",
|
||||
"staff_id": f"manual_add_{linkedin_slug}",
|
||||
"extraction_date": datetime.now(timezone.utc).isoformat(),
|
||||
"extraction_method": "exa_contents",
|
||||
"extraction_method": f"{raw_response.get('provider', 'unknown')}_contents",
|
||||
"extraction_agent": "entity_review_api",
|
||||
"linkedin_url": source_info.get('linkedin_url', ''),
|
||||
"cost_usd": raw_response.get('cost', 0),
|
||||
|
|
|
|||
Loading…
Reference in a new issue