refactor: migrate cataloging_standard to complies_or_complied_with and create CatalogingStandard class per Rule 53/56

This commit is contained in:
kempersc 2026-01-17 20:58:12 +01:00
parent 47834df7a3
commit ed80fb316e
8 changed files with 122 additions and 23 deletions

View file

@ -1,5 +1,5 @@
{
"generated": "2026-01-17T18:50:28.754Z",
"generated": "2026-01-17T18:53:55.988Z",
"schemaRoot": "/schemas/20251121/linkml",
"totalFiles": 2968,
"categoryCounts": {

View file

@ -2123,16 +2123,30 @@ export default function EntityReviewPage() {
<Globe size={14} />
{language === 'nl' ? 'Toegevoegde bronnen' : 'Added sources'}
</div>
{selectedProfile.source_urls.map((source: SourceUrlItem) => (
{selectedProfile.source_urls.map((source: SourceUrlItem) => {
// Extract path from URL for display (full URL minus protocol)
let displayUrl = source.source_url;
try {
const urlObj = new URL(source.source_url);
// Show hostname + path (truncate if too long)
displayUrl = urlObj.hostname + urlObj.pathname;
if (displayUrl.length > 50) {
displayUrl = displayUrl.substring(0, 47) + '...';
}
} catch {
displayUrl = source.source_domain || source.source_url;
}
return (
<div key={source.source_id} className="source-url-item">
<a
href={source.source_url}
target="_blank"
rel="noopener noreferrer"
className="source-url-link"
title={source.source_url}
>
<Globe size={12} />
{source.source_domain || new URL(source.source_url).hostname}
{displayUrl}
</a>
{source.comment && (
<div className="source-url-comment">
@ -2145,7 +2159,8 @@ export default function EntityReviewPage() {
</div>
)}
</div>
))}
);
})}
</div>
)}

View file

@ -1,5 +1,5 @@
{
"generated": "2026-01-17T18:53:55.988Z",
"generated": "2026-01-17T19:58:12.596Z",
"schemaRoot": "/schemas/20251121/linkml",
"totalFiles": 2968,
"categoryCounts": {

View file

@ -4,7 +4,9 @@ title: Library Type Classification
imports:
- linkml:types
- ./CustodianType
- ../slots/cataloging_standard
# - ../slots/cataloging_standard # ARCHIVED 2026-01-17: migrated to complies_or_complied_with + CatalogingStandard per Rule 53/56
- ../slots/complies_or_complied_with
- ./CatalogingStandard
- ../enums/LibraryTypeEnum
- ../slots/catalog_system
- ../slots/has_or_had_custodian_type
@ -122,7 +124,7 @@ classes:
\ 60 Wikidata entities with type='L'\nin `data/wikidata/GLAMORCUBEPSXHFN/hyponyms_curated_full.yaml`.\n"
slots:
- catalog_system
- cataloging_standard
- complies_or_complied_with # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
- has_or_had_custodian_type
- interlibrary_loan
- lending_policy
@ -144,6 +146,14 @@ classes:
range: LibraryType
has_or_had_custodian_type:
equals_expression: '["hc:LibraryType"]'
complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
description: |
Cataloging and metadata standards used by the library.
MIGRATED from cataloging_standard per Rule 53/56 (2026-01-17).
Common library standards: MARC21, RDA, BIBFRAME, Dublin Core.
range: CatalogingStandard
inlined: true
multivalued: true
exact_mappings:
- skos:Concept
- schema:Library
@ -180,7 +190,11 @@ classes:
- national heritage literature
membership_required: false
interlibrary_loan: true
cataloging_standard: RDA
complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
- has_or_had_identifier: RDA
has_or_had_label: Resource Description and Access
standard_domain:
- library
description: National Library with comprehensive national collection and research-level access
- value:
type_id: https://nde.nl/ontology/hc/type/library/Q1994819
@ -199,7 +213,11 @@ classes:
- regional heritage
membership_required: false
interlibrary_loan: true
cataloging_standard: MARC21
complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
- has_or_had_identifier: MARC21
has_or_had_label: Machine-Readable Cataloging 21
standard_domain:
- library
has_applicable_country:
- NL
description: 'Dutch WSF Library combining public lending with research collections (geographic restriction: Netherlands

View file

@ -138,7 +138,7 @@ classes:
- lido:administrativeMetadata - Collection management\n\n**Data Population**:\nMuseum subtypes extracted from 187 Wikidata\
\ entities with type='M'\nin `data/wikidata/GLAMORCUBEPSXHFN/hyponyms_curated_full.yaml`.\n"
slots:
- cataloging_standard
- complies_or_complied_with # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
- collection_focus
- conservation_lab
- has_or_had_custodian_type
@ -171,6 +171,14 @@ classes:
range: Facility
inlined: true
multivalued: true
complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
description: |
Standards used for object cataloging and collection management.
MIGRATED from cataloging_standard per Rule 53/56 (2026-01-17).
Common museum standards: LIDO, SPECTRUM, CIDOC-CRM.
range: CatalogingStandard
inlined: true
multivalued: true
exact_mappings:
- skos:Concept
- schema:Museum
@ -210,7 +218,11 @@ classes:
facility_type: RETAIL
- facility_name: Education Center
facility_type: EDUCATION
cataloging_standard: LIDO
complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
- has_or_had_identifier: LIDO
has_or_had_label: Lightweight Information Describing Objects
standard_domain:
- museum
conservation_lab: true
research_department: true
description: Art Museum classification with domain-specific metadata
@ -229,6 +241,11 @@ classes:
- minerals
- taxidermy
exhibition_program: permanent collection with themed galleries
cataloging_standard: Darwin Core
complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17)
- has_or_had_identifier: Darwin Core
has_or_had_label: Darwin Core
standard_domain:
- natural_history
- museum
research_department: true
description: Natural History Museum with science-specific metadata

View file

@ -8231,18 +8231,26 @@ fixes:
type: class
processed:
status: true
timestamp: '2026-01-17T22:03:00Z'
timestamp: '2026-01-17T23:45:00Z'
session: session-2026-01-17-slot-migration
notes: |
WELL_STRUCTURED_NO_MIGRATION_NEEDED: cataloging_standard has proper ontology alignment:
- slot_uri: dcterms:conformsTo (Dublin Core standard)
- Range: string (appropriate for standard names)
- related_mappings: dcterms:conformsTo
- Examples: LIDO, SPECTRUM, CIDOC-CRM, MARC21, RDA, BIBFRAME, Darwin Core
FULLY_MIGRATED per Rule 53/56 (2026-01-17):
The slot already uses the correct Dublin Core predicate for standards conformance.
Creating CatalogingStandard class hierarchy would be OVER-ENGINEERING.
Retaining with existing structure.
Files updated:
- MuseumType.yaml: imports (lines 8-10), slots list (line 141), slot_usage (lines 174-182),
examples (lines 213, 232) updated to use complies_or_complied_with + CatalogingStandard
- LibraryType.yaml: imports (lines 4-6), slots list (line 127), slot_usage (lines 148-156),
examples (lines 183, 202) updated to use complies_or_complied_with + CatalogingStandard
New files created:
- complies_or_complied_with.yaml: Generic slot for standards compliance
- CatalogingStandard.yaml: Class for structured standard representation (dcterms:Standard)
Archived:
- cataloging_standard.yaml → archive/cataloging_standard_archived_20260117.yaml
Migration pattern: string slot → complies_or_complied_with slot with CatalogingStandard class
(has_or_had_identifier, has_or_had_label, has_or_had_description, has_or_had_url, standard_domain)
- orignal_slot_id: https://nde.nl/ontology/hc/slot/category_measurement
revision:
- label: has_or_had_measurement_type

View file

@ -1190,7 +1190,8 @@ async def update_entity_with_wcms_identifiers(
wcms_ppid: str,
wcms_name: str = '',
wcms_email: str = '',
linkedin_url: str = ''
linkedin_url: str = '',
source_urls: Optional[List[dict]] = None
) -> Optional[str]:
"""
Update or create an entity file with WCMS identifiers when a match is confirmed.
@ -1198,6 +1199,8 @@ async def update_entity_with_wcms_identifiers(
If entity file exists: Updates it with wcms_identifiers.
If entity file doesn't exist: Fetches LinkedIn profile via Exa and creates entity file.
Also syncs any source_urls (non-LinkedIn evidence URLs) to the entity's web_claims.
Returns a status message string or None if nothing was done.
"""
now = datetime.now(timezone.utc)
@ -1239,6 +1242,22 @@ async def update_entity_with_wcms_identifiers(
'retrieval_agent': 'entity_review_api'
})
# Sync source_urls (non-LinkedIn evidence URLs) to web_claims
if source_urls:
for src in source_urls:
url = src.get('url', '')
if url:
entity_data['web_claims'].append({
'claim_type': 'source_url',
'claim_value': url,
'source_url': url,
'retrieved_on': src.get('added_at', now.isoformat()),
'statement_created_at': now.isoformat(),
'source_archived_at': src.get('added_at', now.isoformat()),
'retrieval_agent': 'entity_review_api',
'notes': src.get('comment', '')
})
# Write back
with open(existing_file, 'w', encoding='utf-8') as f:
json.dump(entity_data, f, indent=2, ensure_ascii=False)
@ -1311,6 +1330,22 @@ async def update_entity_with_wcms_identifiers(
'wcms_identifiers': wcms_ids
}
# Add source_urls (non-LinkedIn evidence URLs) to web_claims for new entity
if source_urls:
for src in source_urls:
url = src.get('url', '')
if url:
entity_data['web_claims'].append({
'claim_type': 'source_url',
'claim_value': url,
'source_url': url,
'retrieved_on': src.get('added_at', now.isoformat()),
'statement_created_at': now.isoformat(),
'source_archived_at': src.get('added_at', now.isoformat()),
'retrieval_agent': 'entity_review_api',
'notes': src.get('comment', '')
})
# Write new entity file
timestamp = now.strftime("%Y%m%dT%H%M%SZ")
# Normalize filename
@ -1428,12 +1463,18 @@ async def save_review_decision(
# If decision is MATCH, create or update entity file with WCMS identifiers
entity_update_result = None
if request.decision == ReviewDecision.MATCH:
# Get source_urls from in-memory cache (non-LinkedIn evidence URLs)
source_urls = None
if _candidates_by_wcms and request.wcms_ppid in _candidates_by_wcms:
source_urls = _candidates_by_wcms[request.wcms_ppid].get('source_urls', [])
entity_update_result = await update_entity_with_wcms_identifiers(
linkedin_slug=candidate.get('linkedin_slug'),
wcms_ppid=request.wcms_ppid,
wcms_name=candidate.get('wcms_name', ''),
wcms_email=candidate.get('wcms_email', ''),
linkedin_url=candidate.get('linkedin_url', f"https://www.linkedin.com/in/{candidate.get('linkedin_slug', '')}")
linkedin_url=candidate.get('linkedin_url', f"https://www.linkedin.com/in/{candidate.get('linkedin_slug', '')}"),
source_urls=source_urls
)
# Try git commit (may fail if not a git repo on server, that's OK)