From ed80fb316e88260ad814535948a8408f57a16da8 Mon Sep 17 00:00:00 2001 From: kempersc Date: Sat, 17 Jan 2026 20:58:12 +0100 Subject: [PATCH] refactor: migrate cataloging_standard to complies_or_complied_with and create CatalogingStandard class per Rule 53/56 --- .../schemas/20251121/linkml/manifest.json | 2 +- frontend/src/pages/EntityReviewPage.tsx | 21 +++++++-- schemas/20251121/linkml/manifest.json | 2 +- .../linkml/modules/classes/LibraryType.yaml | 26 +++++++++-- .../linkml/modules/classes/MuseumType.yaml | 23 ++++++++-- ...ataloging_standard_archived_20260117.yaml} | 0 .../linkml/modules/slots/slot_fixes.yaml | 26 +++++++---- src/glam_extractor/api/entity_review.py | 45 ++++++++++++++++++- 8 files changed, 122 insertions(+), 23 deletions(-) rename schemas/20251121/linkml/modules/slots/{cataloging_standard.yaml => archive/cataloging_standard_archived_20260117.yaml} (100%) diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index 2ff6043488..8a0becd0e0 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,5 +1,5 @@ { - "generated": "2026-01-17T18:50:28.754Z", + "generated": "2026-01-17T18:53:55.988Z", "schemaRoot": "/schemas/20251121/linkml", "totalFiles": 2968, "categoryCounts": { diff --git a/frontend/src/pages/EntityReviewPage.tsx b/frontend/src/pages/EntityReviewPage.tsx index 49535cc107..5759e3ff6a 100644 --- a/frontend/src/pages/EntityReviewPage.tsx +++ b/frontend/src/pages/EntityReviewPage.tsx @@ -2123,16 +2123,30 @@ export default function EntityReviewPage() { {language === 'nl' ? 'Toegevoegde bronnen' : 'Added sources'} - {selectedProfile.source_urls.map((source: SourceUrlItem) => ( + {selectedProfile.source_urls.map((source: SourceUrlItem) => { + // Extract path from URL for display (full URL minus protocol) + let displayUrl = source.source_url; + try { + const urlObj = new URL(source.source_url); + // Show hostname + path (truncate if too long) + displayUrl = urlObj.hostname + urlObj.pathname; + if (displayUrl.length > 50) { + displayUrl = displayUrl.substring(0, 47) + '...'; + } + } catch { + displayUrl = source.source_domain || source.source_url; + } + return (
- {source.source_domain || new URL(source.source_url).hostname} + {displayUrl} {source.comment && (
@@ -2145,7 +2159,8 @@ export default function EntityReviewPage() {
)}
- ))} + ); + })} )} diff --git a/schemas/20251121/linkml/manifest.json b/schemas/20251121/linkml/manifest.json index 8a0becd0e0..2d770acf7b 100644 --- a/schemas/20251121/linkml/manifest.json +++ b/schemas/20251121/linkml/manifest.json @@ -1,5 +1,5 @@ { - "generated": "2026-01-17T18:53:55.988Z", + "generated": "2026-01-17T19:58:12.596Z", "schemaRoot": "/schemas/20251121/linkml", "totalFiles": 2968, "categoryCounts": { diff --git a/schemas/20251121/linkml/modules/classes/LibraryType.yaml b/schemas/20251121/linkml/modules/classes/LibraryType.yaml index 43c6ff25ad..d56d65fb88 100644 --- a/schemas/20251121/linkml/modules/classes/LibraryType.yaml +++ b/schemas/20251121/linkml/modules/classes/LibraryType.yaml @@ -4,7 +4,9 @@ title: Library Type Classification imports: - linkml:types - ./CustodianType - - ../slots/cataloging_standard + # - ../slots/cataloging_standard # ARCHIVED 2026-01-17: migrated to complies_or_complied_with + CatalogingStandard per Rule 53/56 + - ../slots/complies_or_complied_with + - ./CatalogingStandard - ../enums/LibraryTypeEnum - ../slots/catalog_system - ../slots/has_or_had_custodian_type @@ -122,7 +124,7 @@ classes: \ 60 Wikidata entities with type='L'\nin `data/wikidata/GLAMORCUBEPSXHFN/hyponyms_curated_full.yaml`.\n" slots: - catalog_system - - cataloging_standard + - complies_or_complied_with # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) - has_or_had_custodian_type - interlibrary_loan - lending_policy @@ -144,6 +146,14 @@ classes: range: LibraryType has_or_had_custodian_type: equals_expression: '["hc:LibraryType"]' + complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) + description: | + Cataloging and metadata standards used by the library. + MIGRATED from cataloging_standard per Rule 53/56 (2026-01-17). + Common library standards: MARC21, RDA, BIBFRAME, Dublin Core. + range: CatalogingStandard + inlined: true + multivalued: true exact_mappings: - skos:Concept - schema:Library @@ -180,7 +190,11 @@ classes: - national heritage literature membership_required: false interlibrary_loan: true - cataloging_standard: RDA + complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) + - has_or_had_identifier: RDA + has_or_had_label: Resource Description and Access + standard_domain: + - library description: National Library with comprehensive national collection and research-level access - value: type_id: https://nde.nl/ontology/hc/type/library/Q1994819 @@ -199,7 +213,11 @@ classes: - regional heritage membership_required: false interlibrary_loan: true - cataloging_standard: MARC21 + complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) + - has_or_had_identifier: MARC21 + has_or_had_label: Machine-Readable Cataloging 21 + standard_domain: + - library has_applicable_country: - NL description: 'Dutch WSF Library combining public lending with research collections (geographic restriction: Netherlands diff --git a/schemas/20251121/linkml/modules/classes/MuseumType.yaml b/schemas/20251121/linkml/modules/classes/MuseumType.yaml index fcc1b36e52..132f3573fb 100644 --- a/schemas/20251121/linkml/modules/classes/MuseumType.yaml +++ b/schemas/20251121/linkml/modules/classes/MuseumType.yaml @@ -138,7 +138,7 @@ classes: - lido:administrativeMetadata - Collection management\n\n**Data Population**:\nMuseum subtypes extracted from 187 Wikidata\ \ entities with type='M'\nin `data/wikidata/GLAMORCUBEPSXHFN/hyponyms_curated_full.yaml`.\n" slots: - - cataloging_standard + - complies_or_complied_with # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) - collection_focus - conservation_lab - has_or_had_custodian_type @@ -171,6 +171,14 @@ classes: range: Facility inlined: true multivalued: true + complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) + description: | + Standards used for object cataloging and collection management. + MIGRATED from cataloging_standard per Rule 53/56 (2026-01-17). + Common museum standards: LIDO, SPECTRUM, CIDOC-CRM. + range: CatalogingStandard + inlined: true + multivalued: true exact_mappings: - skos:Concept - schema:Museum @@ -210,7 +218,11 @@ classes: facility_type: RETAIL - facility_name: Education Center facility_type: EDUCATION - cataloging_standard: LIDO + complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) + - has_or_had_identifier: LIDO + has_or_had_label: Lightweight Information Describing Objects + standard_domain: + - museum conservation_lab: true research_department: true description: Art Museum classification with domain-specific metadata @@ -229,6 +241,11 @@ classes: - minerals - taxidermy exhibition_program: permanent collection with themed galleries - cataloging_standard: Darwin Core + complies_or_complied_with: # was: cataloging_standard - migrated per Rule 53/56 (2026-01-17) + - has_or_had_identifier: Darwin Core + has_or_had_label: Darwin Core + standard_domain: + - natural_history + - museum research_department: true description: Natural History Museum with science-specific metadata diff --git a/schemas/20251121/linkml/modules/slots/cataloging_standard.yaml b/schemas/20251121/linkml/modules/slots/archive/cataloging_standard_archived_20260117.yaml similarity index 100% rename from schemas/20251121/linkml/modules/slots/cataloging_standard.yaml rename to schemas/20251121/linkml/modules/slots/archive/cataloging_standard_archived_20260117.yaml diff --git a/schemas/20251121/linkml/modules/slots/slot_fixes.yaml b/schemas/20251121/linkml/modules/slots/slot_fixes.yaml index b06c1c6b13..da5a86b71e 100644 --- a/schemas/20251121/linkml/modules/slots/slot_fixes.yaml +++ b/schemas/20251121/linkml/modules/slots/slot_fixes.yaml @@ -8231,18 +8231,26 @@ fixes: type: class processed: status: true - timestamp: '2026-01-17T22:03:00Z' + timestamp: '2026-01-17T23:45:00Z' session: session-2026-01-17-slot-migration notes: | - WELL_STRUCTURED_NO_MIGRATION_NEEDED: cataloging_standard has proper ontology alignment: - - slot_uri: dcterms:conformsTo (Dublin Core standard) - - Range: string (appropriate for standard names) - - related_mappings: dcterms:conformsTo - - Examples: LIDO, SPECTRUM, CIDOC-CRM, MARC21, RDA, BIBFRAME, Darwin Core + FULLY_MIGRATED per Rule 53/56 (2026-01-17): - The slot already uses the correct Dublin Core predicate for standards conformance. - Creating CatalogingStandard class hierarchy would be OVER-ENGINEERING. - Retaining with existing structure. + Files updated: + - MuseumType.yaml: imports (lines 8-10), slots list (line 141), slot_usage (lines 174-182), + examples (lines 213, 232) updated to use complies_or_complied_with + CatalogingStandard + - LibraryType.yaml: imports (lines 4-6), slots list (line 127), slot_usage (lines 148-156), + examples (lines 183, 202) updated to use complies_or_complied_with + CatalogingStandard + + New files created: + - complies_or_complied_with.yaml: Generic slot for standards compliance + - CatalogingStandard.yaml: Class for structured standard representation (dcterms:Standard) + + Archived: + - cataloging_standard.yaml → archive/cataloging_standard_archived_20260117.yaml + + Migration pattern: string slot → complies_or_complied_with slot with CatalogingStandard class + (has_or_had_identifier, has_or_had_label, has_or_had_description, has_or_had_url, standard_domain) - orignal_slot_id: https://nde.nl/ontology/hc/slot/category_measurement revision: - label: has_or_had_measurement_type diff --git a/src/glam_extractor/api/entity_review.py b/src/glam_extractor/api/entity_review.py index 767927ea0f..d3b96b6e36 100644 --- a/src/glam_extractor/api/entity_review.py +++ b/src/glam_extractor/api/entity_review.py @@ -1190,7 +1190,8 @@ async def update_entity_with_wcms_identifiers( wcms_ppid: str, wcms_name: str = '', wcms_email: str = '', - linkedin_url: str = '' + linkedin_url: str = '', + source_urls: Optional[List[dict]] = None ) -> Optional[str]: """ Update or create an entity file with WCMS identifiers when a match is confirmed. @@ -1198,6 +1199,8 @@ async def update_entity_with_wcms_identifiers( If entity file exists: Updates it with wcms_identifiers. If entity file doesn't exist: Fetches LinkedIn profile via Exa and creates entity file. + Also syncs any source_urls (non-LinkedIn evidence URLs) to the entity's web_claims. + Returns a status message string or None if nothing was done. """ now = datetime.now(timezone.utc) @@ -1239,6 +1242,22 @@ async def update_entity_with_wcms_identifiers( 'retrieval_agent': 'entity_review_api' }) + # Sync source_urls (non-LinkedIn evidence URLs) to web_claims + if source_urls: + for src in source_urls: + url = src.get('url', '') + if url: + entity_data['web_claims'].append({ + 'claim_type': 'source_url', + 'claim_value': url, + 'source_url': url, + 'retrieved_on': src.get('added_at', now.isoformat()), + 'statement_created_at': now.isoformat(), + 'source_archived_at': src.get('added_at', now.isoformat()), + 'retrieval_agent': 'entity_review_api', + 'notes': src.get('comment', '') + }) + # Write back with open(existing_file, 'w', encoding='utf-8') as f: json.dump(entity_data, f, indent=2, ensure_ascii=False) @@ -1311,6 +1330,22 @@ async def update_entity_with_wcms_identifiers( 'wcms_identifiers': wcms_ids } + # Add source_urls (non-LinkedIn evidence URLs) to web_claims for new entity + if source_urls: + for src in source_urls: + url = src.get('url', '') + if url: + entity_data['web_claims'].append({ + 'claim_type': 'source_url', + 'claim_value': url, + 'source_url': url, + 'retrieved_on': src.get('added_at', now.isoformat()), + 'statement_created_at': now.isoformat(), + 'source_archived_at': src.get('added_at', now.isoformat()), + 'retrieval_agent': 'entity_review_api', + 'notes': src.get('comment', '') + }) + # Write new entity file timestamp = now.strftime("%Y%m%dT%H%M%SZ") # Normalize filename @@ -1428,12 +1463,18 @@ async def save_review_decision( # If decision is MATCH, create or update entity file with WCMS identifiers entity_update_result = None if request.decision == ReviewDecision.MATCH: + # Get source_urls from in-memory cache (non-LinkedIn evidence URLs) + source_urls = None + if _candidates_by_wcms and request.wcms_ppid in _candidates_by_wcms: + source_urls = _candidates_by_wcms[request.wcms_ppid].get('source_urls', []) + entity_update_result = await update_entity_with_wcms_identifiers( linkedin_slug=candidate.get('linkedin_slug'), wcms_ppid=request.wcms_ppid, wcms_name=candidate.get('wcms_name', ''), wcms_email=candidate.get('wcms_email', ''), - linkedin_url=candidate.get('linkedin_url', f"https://www.linkedin.com/in/{candidate.get('linkedin_slug', '')}") + linkedin_url=candidate.get('linkedin_url', f"https://www.linkedin.com/in/{candidate.get('linkedin_slug', '')}"), + source_urls=source_urls ) # Try git commit (may fail if not a git repo on server, that's OK)