Refactor and archive deprecated slots; update migration records

- Removed deprecated slots: storage_security_level, version_number, video_comment, visiting_hour, was_asserted_by, was_revision_of, writing_system.
- Archived corresponding YAML files for deprecated slots with detailed migration notes.
- Updated slot definitions for has_collection and encompassing_body to reflect new naming conventions and temporal patterns.
- Enhanced metadata extraction in index_persons_qdrant.py to include WCMS registration and data sources.
- Modified hybrid_retriever and multi_embedding_retriever to support filtering by WCMS registration status.
This commit is contained in:
kempersc 2026-01-15 13:16:59 +01:00
parent 8174c9692e
commit 3fb27c15e2
34 changed files with 2484 additions and 2344 deletions

View file

@ -572,6 +572,7 @@ class PersonSearchRequest(BaseModel):
k: int = Field(default=10, ge=1, le=100, description="Number of results to return") k: int = Field(default=10, ge=1, le=100, description="Number of results to return")
filter_custodian: str | None = Field(default=None, description="Filter by custodian slug (e.g., 'nationaal-archief')") filter_custodian: str | None = Field(default=None, description="Filter by custodian slug (e.g., 'nationaal-archief')")
only_heritage_relevant: bool = Field(default=False, description="Only return heritage-relevant staff") only_heritage_relevant: bool = Field(default=False, description="Only return heritage-relevant staff")
only_wcms: bool = Field(default=False, description="Only return WCMS-registered profiles (heritage sector users)")
embedding_model: str | None = Field( embedding_model: str | None = Field(
default=None, default=None,
description="Embedding model to use (e.g., 'minilm_384', 'openai_1536'). If None, auto-selects best available." description="Embedding model to use (e.g., 'minilm_384', 'openai_1536'). If None, auto-selects best available."
@ -1657,6 +1658,7 @@ class MultiSourceRetriever:
k: int = 10, k: int = 10,
filter_custodian: str | None = None, filter_custodian: str | None = None,
only_heritage_relevant: bool = False, only_heritage_relevant: bool = False,
only_wcms: bool = False,
using: str | None = None, using: str | None = None,
) -> list[Any]: ) -> list[Any]:
"""Search for persons/staff in the heritage_persons collection. """Search for persons/staff in the heritage_persons collection.
@ -1668,6 +1670,7 @@ class MultiSourceRetriever:
k: Number of results k: Number of results
filter_custodian: Optional custodian slug to filter by filter_custodian: Optional custodian slug to filter by
only_heritage_relevant: Only return heritage-relevant staff only_heritage_relevant: Only return heritage-relevant staff
only_wcms: Only return WCMS-registered profiles
using: Optional embedding model to use (e.g., 'minilm_384', 'openai_1536') using: Optional embedding model to use (e.g., 'minilm_384', 'openai_1536')
Returns: Returns:
@ -1680,6 +1683,7 @@ class MultiSourceRetriever:
k=k, k=k,
filter_custodian=filter_custodian, filter_custodian=filter_custodian,
only_heritage_relevant=only_heritage_relevant, only_heritage_relevant=only_heritage_relevant,
only_wcms=only_wcms,
using=using, using=using,
) )
except Exception as e: except Exception as e:
@ -2754,6 +2758,7 @@ async def person_search(request: PersonSearchRequest) -> PersonSearchResponse:
k=request.k, k=request.k,
filter_custodian=request.filter_custodian, filter_custodian=request.filter_custodian,
only_heritage_relevant=request.only_heritage_relevant, only_heritage_relevant=request.only_heritage_relevant,
only_wcms=request.only_wcms,
using=request.embedding_model, # Pass embedding model using=request.embedding_model, # Pass embedding model
) )

View file

@ -1,5 +1,5 @@
{ {
"generated": "2026-01-15T11:37:46.717Z", "generated": "2026-01-15T12:00:28.038Z",
"schemaRoot": "/schemas/20251121/linkml", "schemaRoot": "/schemas/20251121/linkml",
"totalFiles": 3026, "totalFiles": 3026,
"categoryCounts": { "categoryCounts": {

View file

@ -60,7 +60,8 @@ imports:
- modules/slots/endorsement_source - modules/slots/endorsement_source
- modules/slots/extent - modules/slots/extent
- modules/slots/governance_structure - modules/slots/governance_structure
- modules/slots/has_collection # has_collection ARCHIVED (2026-01-15) - migrated to has_or_had_collection (Rule 53)
- modules/slots/has_or_had_collection
- modules/slots/digital_platform - modules/slots/digital_platform
- modules/slots/digitization_status - modules/slots/digitization_status
- modules/slots/has_auxiliary_place - modules/slots/has_auxiliary_place
@ -82,7 +83,8 @@ imports:
- modules/slots/staff_impact - modules/slots/staff_impact
- modules/slots/documentation_source - modules/slots/documentation_source
- modules/slots/has_or_had_organizational_change_event - modules/slots/has_or_had_organizational_change_event
- modules/slots/encompassing_body # encompassing_body ARCHIVED (2025-01-15) - migrated to is_or_was_encompassed_by (Rule 53)
- modules/slots/is_or_was_encompassed_by
- modules/slots/id - modules/slots/id
- modules/slots/identifier_scheme - modules/slots/identifier_scheme
- modules/slots/identifier_value - modules/slots/identifier_value
@ -138,7 +140,8 @@ imports:
# valid_from and valid_to ARCHIVED (2026-01-14) - migrated to temporal_extent (Rule 53) # valid_from and valid_to ARCHIVED (2026-01-14) - migrated to temporal_extent (Rule 53)
- modules/slots/was_derived_from - modules/slots/was_derived_from
- modules/slots/was_generated_by - modules/slots/was_generated_by
- modules/slots/was_revision_of # was_revision_of ARCHIVED (2026-01-15) - migrated to is_or_was_revision_of (Rule 53)
- modules/slots/is_or_was_revision_of
# Hub architecture slots # Hub architecture slots
- modules/slots/hc_id - modules/slots/hc_id

View file

@ -1,5 +1,5 @@
{ {
"generated": "2026-01-15T12:00:28.038Z", "generated": "2026-01-15T12:16:59.614Z",
"schemaRoot": "/schemas/20251121/linkml", "schemaRoot": "/schemas/20251121/linkml",
"totalFiles": 3026, "totalFiles": 3026,
"categoryCounts": { "categoryCounts": {

View file

@ -44,7 +44,7 @@ imports:
- ../slots/specificity_annotation - ../slots/specificity_annotation
- ../slots/template_specificity - ../slots/template_specificity
- ../slots/temporal_extent # was: valid_from + valid_to - migrated per Rule 53 - ../slots/temporal_extent # was: valid_from + valid_to - migrated per Rule 53
- ../slots/version_number - ../slots/has_or_had_version # was: version_number - migrated per Rule 53
- ../slots/was_derived_from - ../slots/was_derived_from
- ../slots/was_generated_by - ../slots/was_generated_by
- ./SpecificityAnnotation - ./SpecificityAnnotation
@ -151,7 +151,7 @@ classes:
- supersede - supersede
- template_specificity - template_specificity
- temporal_extent # was: valid_from + valid_to - migrated per Rule 53 - temporal_extent # was: valid_from + valid_to - migrated per Rule 53
- version_number - has_or_had_version # was: version_number - migrated per Rule 53
- was_derived_from - was_derived_from
- was_generated_by - was_generated_by
slot_usage: slot_usage:
@ -200,13 +200,17 @@ classes:
examples: examples:
- value: 2024/0315/001 - value: 2024/0315/001
description: Notarial deed reference number description: Notarial deed reference number
version_number: has_or_had_version: # was: version_number - migrated per Rule 53
range: integer description: |
Version number of the articles.
Original articles = 1, first amendment = 2, etc.
MIGRATED from version_number per Rule 53.
range: string
required: false required: false
examples: examples:
- value: 1 - value: "1"
description: Original founding articles description: Original founding articles
- value: 3 - value: "3"
description: Third version (two amendments) description: Third version (two amendments)
is_current_version: is_current_version:
range: boolean range: boolean
@ -335,7 +339,7 @@ classes:
notary_name: mr. H.J. van den Berg notary_name: mr. H.J. van den Berg
notary_office: Amsterdam notary_office: Amsterdam
notarial_deed_number: 1885/328 notarial_deed_number: 1885/328
version_number: 1 has_or_had_version: "1"
is_current_version: false is_current_version: false
superseded_by: https://nde.nl/ontology/hc/articles/rm/v15 superseded_by: https://nde.nl/ontology/hc/articles/rm/v15
purpose_clause: De stichting heeft ten doel het beheren en toegankelijk maken van het Rijksmuseum... purpose_clause: De stichting heeft ten doel het beheren en toegankelijk maken van het Rijksmuseum...
@ -363,7 +367,7 @@ classes:
effective_date: '2015-11-12' effective_date: '2015-11-12'
notary_name: mr. A.M. de Vries notary_name: mr. A.M. de Vries
notary_office: Haarlem notary_office: Haarlem
version_number: 3 has_or_had_version: "3"
is_current_version: true is_current_version: true
supersedes: https://nde.nl/ontology/hc/articles/nha/v2 supersedes: https://nde.nl/ontology/hc/articles/nha/v2
purpose_clause: De stichting heeft ten doel het beheren, behouden, en toegankelijk maken van archieven en collecties purpose_clause: De stichting heeft ten doel het beheren, behouden, en toegankelijk maken van archieven en collecties

View file

@ -24,10 +24,10 @@ imports:
- ../slots/legal_status - ../slots/legal_status
- ../slots/place_designation - ../slots/place_designation
- ../slots/digital_platform - ../slots/digital_platform
- ../slots/has_collection - ../slots/has_or_had_collection # was: has_collection - migrated per Rule 53 (2025-01-15)
- ../slots/organizational_structure - ../slots/organizational_structure
- ../slots/has_or_had_organizational_change_event - ../slots/has_or_had_organizational_change_event
- ../slots/encompassing_body - ../slots/is_or_was_encompassed_by # was: encompassing_body - migrated per Rule 53 (2025-01-15)
- ../slots/identifier - ../slots/identifier
- ../slots/created - ../slots/created
- ../slots/modified - ../slots/modified
@ -188,12 +188,12 @@ classes:
- has_or_had_custodian_type - has_or_had_custodian_type
- data_license_policy - data_license_policy
- digital_platform - digital_platform
- encompassing_body - is_or_was_encompassed_by # was: encompassing_body - migrated per Rule 53 (2025-01-15)
- has_or_had_exhibition - has_or_had_exhibition
- gift_shop - gift_shop
- has_administration - has_administration
- has_budget - has_budget
- has_collection - has_or_had_collection # was: has_collection - migrated per Rule 53 (2025-01-15)
- has_operational_archive - has_operational_archive
- hc_id - hc_id
- identifier - identifier
@ -232,10 +232,14 @@ classes:
multivalued: true multivalued: true
required: false required: false
inlined_as_list: true inlined_as_list: true
has_collection: has_or_had_collection: # was: has_collection - migrated per Rule 53 (2025-01-15)
range: uriorcurie range: CustodianCollection
multivalued: true multivalued: true
required: false required: false
inlined_as_list: true
description: |
Heritage collection(s) associated with this custodian.
MIGRATED from has_collection slot per slot_fixes.yaml (Rule 53, 2025-01-15).
organizational_structure: organizational_structure:
range: uriorcurie range: uriorcurie
multivalued: true multivalued: true
@ -246,11 +250,14 @@ classes:
multivalued: true multivalued: true
required: false required: false
inlined_as_list: true inlined_as_list: true
encompassing_body: is_or_was_encompassed_by: # was: encompassing_body - migrated per Rule 53 (2025-01-15)
range: uriorcurie range: EncompassingBody
multivalued: true multivalued: true
required: false required: false
inlined_as_list: true inlined_as_list: true
description: |
Governance bodies that encompass this custodian.
MIGRATED from encompassing_body slot per slot_fixes.yaml (Rule 53, 2025-01-15).
data_license_policy: data_license_policy:
range: DataLicensePolicy range: DataLicensePolicy
required: false required: false

View file

@ -53,7 +53,7 @@ imports:
- ../slots/reconstruction_method - ../slots/reconstruction_method
- ../slots/was_derived_from - ../slots/was_derived_from
- ../slots/was_generated_by - ../slots/was_generated_by
- ../slots/was_revision_of - ../slots/is_or_was_revision_of # was: was_revision_of - migrated per Rule 53 (2026-01-15)
- ../slots/identifier - ../slots/identifier
- ../slots/collections_under_responsibility - ../slots/collections_under_responsibility
- ../slots/has_articles_of_association - ../slots/has_articles_of_association
@ -131,7 +131,7 @@ classes:
- temporal_extent - temporal_extent
- was_derived_from - was_derived_from
- was_generated_by - was_generated_by
- was_revision_of - is_or_was_revision_of # was: was_revision_of - migrated per Rule 53 (2026-01-15)
slot_usage: slot_usage:
refers_to_custodian: refers_to_custodian:
required: true required: true
@ -270,8 +270,11 @@ classes:
was_generated_by: was_generated_by:
range: ReconstructionActivity range: ReconstructionActivity
required: true required: true
was_revision_of: is_or_was_revision_of:
range: CustodianLegalStatus range: CustodianLegalStatus
description: |
Previous version of this legal status.
MIGRATED from was_revision_of per Rule 39/53 (RiC-O temporal naming, 2026-01-15).
identifier: identifier:
range: CustodianIdentifier range: CustodianIdentifier
multivalued: true multivalued: true

View file

@ -61,7 +61,8 @@ imports:
- ../slots/specificity_annotation - ../slots/specificity_annotation
- ../slots/stewardship_model - ../slots/stewardship_model
- ../slots/template_specificity - ../slots/template_specificity
- ../slots/visitor_service - ../slots/has_or_had_service # was: visitor_service - migrated per Rule 53
- ./Service
- ./SpecificityAnnotation - ./SpecificityAnnotation
- ./TemplateSpecificityScores - ./TemplateSpecificityScores
prefixes: prefixes:
@ -236,7 +237,7 @@ classes:
- specificity_annotation - specificity_annotation
- stewardship_model - stewardship_model
- template_specificity - template_specificity
- visitor_service - has_or_had_service # was: visitor_service - migrated per Rule 53
slot_usage: slot_usage:
feature_type: feature_type:
range: string range: string
@ -259,15 +260,25 @@ classes:
description: Société d'Exploitation de la Tour Eiffel description: Société d'Exploitation de la Tour Eiffel
- value: 150+ National Trust properties - value: 150+ National Trust properties
description: UK heritage charity holdings description: UK heritage charity holdings
visitor_service: has_or_had_service:
range: string description: |
Visitor services provided by the feature custodian to facilitate public access
and engagement. Migrated from visitor_service per Rule 53.
range: Service
inlined: true
required: true required: true
examples: examples:
- value: Guided tours, Events, Gift shop, Café - value:
service_name: Guided Tours and Events
service_type: VISITOR
description: English Heritage typical site services description: English Heritage typical site services
- value: 24/7 public access, Interpretive panels - value:
service_name: Public Access and Interpretation
service_type: VISITOR
description: Open memorial site services description: Open memorial site services
- value: Timed entry tickets, Multilingual audio guide - value:
service_name: Timed Entry with Audio Guide
service_type: VISITOR
description: High-capacity monument services description: High-capacity monument services
has_or_had_activity: has_or_had_activity:
range: string range: string

View file

@ -38,7 +38,9 @@ imports:
- ../slots/retrieval_agent - ../slots/retrieval_agent
- ../slots/extraction_note - ../slots/extraction_note
- ../slots/extraction_method - ../slots/extraction_method
- ../slots/xpath # REMOVED: ../slots/xpath - migrated to has_or_had_provenance_path with XPath class (2026-01-15, Rule 53)
- ../slots/has_or_had_provenance_path
- ./XPath
- ../slots/css_selector - ../slots/css_selector
- ../slots/confidence - ../slots/confidence
- ../slots/html_file - ../slots/html_file
@ -774,7 +776,7 @@ classes:
- specificity_annotation - specificity_annotation
- has_or_had_sub_section - has_or_had_sub_section
- template_specificity - template_specificity
- xpath - has_or_had_provenance_path # was: xpath - migrated per Rule 53 (2026-01-15)
slot_usage: slot_usage:
heading_level: heading_level:
range: integer range: integer
@ -798,6 +800,12 @@ classes:
inlined_as_list: true inlined_as_list: true
list_item: list_item:
multivalued: true multivalued: true
has_or_had_provenance_path:
range: XPath
inlined: true
description: |
XPath provenance path documenting the exact location of the section in the source HTML.
MIGRATED from xpath slot per slot_fixes.yaml (Rule 53, 2026-01-15).
exact_mappings: exact_mappings:
- schema:WebPageElement - schema:WebPageElement
PageLink: PageLink:
@ -813,7 +821,7 @@ classes:
- link_url - link_url
- specificity_annotation - specificity_annotation
- template_specificity - template_specificity
- xpath - has_or_had_provenance_path # was: xpath - migrated per Rule 53 (2026-01-15)
slot_usage: slot_usage:
link_text: link_text:
required: true required: true
@ -824,6 +832,12 @@ classes:
range: LinkTypeEnum range: LinkTypeEnum
is_sub_guide: is_sub_guide:
range: boolean range: boolean
has_or_had_provenance_path:
range: XPath
inlined: true
description: |
XPath provenance path documenting the exact location of the link in the source HTML.
MIGRATED from xpath slot per slot_fixes.yaml (Rule 53, 2026-01-15).
FeaturedCard: FeaturedCard:
class_uri: schema:CreativeWork class_uri: schema:CreativeWork
description: 'A featured content card (often with image/icon). description: 'A featured content card (often with image/icon).
@ -841,7 +855,7 @@ classes:
- card_url - card_url
- specificity_annotation - specificity_annotation
- template_specificity - template_specificity
- xpath - has_or_had_provenance_path # was: xpath - migrated per Rule 53 (2026-01-15)
slot_usage: slot_usage:
card_title: card_title:
required: true required: true
@ -849,6 +863,12 @@ classes:
range: uri range: uri
card_image_url: card_image_url:
range: uri range: uri
has_or_had_provenance_path:
range: XPath
inlined: true
description: |
XPath provenance path documenting the exact location of the card in the source HTML.
MIGRATED from xpath slot per slot_fixes.yaml (Rule 53, 2026-01-15).
types: types:
TemporalValue: TemporalValue:
typeof: string typeof: string

View file

@ -68,7 +68,8 @@ imports:
- ../slots/membership_size - ../slots/membership_size
- ../slots/publication_activity - ../slots/publication_activity
- ../slots/collecting_scope - ../slots/collecting_scope
- ../slots/volunteer_program - ../slots/has_or_had_program # was: volunteer_program - migrated per Rule 53
- ./Program
- ../slots/community_engagement - ../slots/community_engagement
- ../slots/heritage_society_subtype - ../slots/heritage_society_subtype
prefixes: prefixes:
@ -429,102 +430,6 @@ classes:
range: string range: string
multivalued: true multivalued: true
slot_uri: schema:additionalProperty slot_uri: schema:additionalProperty
volunteer_program:
description: 'Volunteer roles and programs within the heritage society.
Governance Volunteers:
- Board of directors: Strategic leadership, fiduciary oversight
- Officers: President, vice-president, secretary, treasurer
- Committee chairs: Publications, events, collections, membership
Collections Volunteers:
- Curators: Managing society collections, exhibitions
- Catalogers: Inventorying, describing, digitizing items
- Conservators: Basic preservation, cleaning, rehousing
- Digitization: Scanning photographs, documents, objects
Research Volunteers:
- Archival researchers: Transcribing documents, indexing records
- Genealogical researchers: Assisting members with family history
- Historians: Writing articles, preparing talks, guiding tours
- Subject specialists: Numismatists, philatelists, local historians
Education and Outreach:
- Lecturers: Giving talks at meetings, schools, community groups
- Tour guides: Leading heritage walks, site visits
- Educators: School programs, workshops for public
- Social media managers: Website updates, Facebook posts
Operations Volunteers:
- Membership coordinators: Processing renewals, communications
- Event organizers: Planning meetings, field trips, symposia
- Fundraisers: Grant writing, donation campaigns, sales
- Facilities: Maintaining society office, library, storage
Volunteer Recognition:
- Awards: Volunteer of the year, lifetime achievement
- Publications: Acknowledging contributors in journals
- Events: Appreciation dinners, member recognition
- Honors: Honorary memberships, named collections
Volunteer Challenges:
- Aging volunteers: Difficulty recruiting younger members
- Burnout: Long-serving volunteers retiring
- Skills gaps: Need for digital skills, conservation training
- Succession planning: Ensuring continuity of leadership
Examples:
- "All-volunteer organization; 20 active volunteers manage collections and events"
- "1 paid part-time coordinator; 50 volunteers for research, publications, outreach"
- "Board of 9 elected members; 15 committee volunteers; 200 general members"
- "Struggling to recruit volunteers; 5 core volunteers (all 60+ years old)"
This field captures the human infrastructure of the society.
'
range: string
multivalued: true
slot_uri: schema:volunteer
community_engagement: community_engagement:
description: "Community outreach, public programs, and engagement activities.\n\nRegular Programs:\n- Membership meetings:\ description: "Community outreach, public programs, and engagement activities.\n\nRegular Programs:\n- Membership meetings:\
\ Monthly, quarterly, or annual gatherings\n - Lecture series: Guest speakers on heritage topics\n - Show-and-tell:\ \ Monthly, quarterly, or annual gatherings\n - Lecture series: Guest speakers on heritage topics\n - Show-and-tell:\
@ -557,8 +462,26 @@ classes:
slot_uri: schema:event slot_uri: schema:event
slots: slots:
- has_or_had_custodian_type - has_or_had_custodian_type
- has_or_had_program # was: volunteer_program - migrated per Rule 53
- specificity_annotation - specificity_annotation
- template_specificity - template_specificity
slot_usage: slot_usage:
has_or_had_custodian_type: has_or_had_custodian_type:
equals_expression: '["hc:HeritageSocietyType"]' equals_expression: '["hc:HeritageSocietyType"]'
has_or_had_program:
description: |
Volunteer programs and roles within the heritage society.
Migrated from volunteer_program (inline attribute) per Rule 53.
Covers governance volunteers (board, officers), collections volunteers
(curators, catalogers), research volunteers (archivists, historians),
education and outreach (lecturers, tour guides), and operations
(membership coordinators, event organizers, fundraisers).
range: Program
inlined: true
multivalued: true
examples:
- value:
program_name: Heritage Volunteer Program
program_type: VOLUNTEER
description: All-volunteer organization managing collections and events

View file

@ -28,7 +28,8 @@ imports:
- ../slots/monument_number - ../slots/monument_number
- ../slots/specificity_annotation - ../slots/specificity_annotation
- ../slots/template_specificity - ../slots/template_specificity
- ../slots/visiting_hour - ../slots/has_or_had_opening_hour # was: visiting_hour - migrated per Rule 53
- ./OpeningHour
- ../slots/was_derived_from - ../slots/was_derived_from
- ../slots/was_generated_by - ../slots/was_generated_by
- ./SpecificityAnnotation - ./SpecificityAnnotation
@ -97,7 +98,7 @@ classes:
- monument_number - monument_number
- specificity_annotation - specificity_annotation
- template_specificity - template_specificity
- visiting_hour - has_or_had_opening_hour # was: visiting_hour - migrated per Rule 53
- was_derived_from - was_derived_from
- was_generated_by - was_generated_by
slot_usage: slot_usage:
@ -190,12 +191,23 @@ classes:
description: Open to public description: Open to public
- value: false - value: false
description: Private/staff only description: Private/staff only
visiting_hour: has_or_had_opening_hour: # was: visiting_hour - migrated per Rule 53
range: string description: |
Visiting hours specification for public access.
MIGRATED from visiting_hour per Rule 53.
range: OpeningHour
inlined: true
multivalued: true
examples: examples:
- value: Sa-Su 11:00-17:00 - value:
description: Weekend opening day_of_week: "Saturday"
- value: By appointment only opens: "11:00"
closes: "17:00"
description: Weekend opening hours
- value:
day_of_week: "By appointment"
opens: "10:00"
closes: "16:00"
description: Appointment required description: Appointment required
is_part_of_complex: is_part_of_complex:
range: boolean range: boolean
@ -299,7 +311,10 @@ classes:
heritage_status: Rijksmonument heritage_status: Rijksmonument
monument_number: '521814' monument_number: '521814'
is_open_to_public: true is_open_to_public: true
visiting_hour: Tu-Su 11:00-17:00 has_or_had_opening_hour:
- day_of_week: "Tuesday-Sunday"
opens: "11:00"
closes: "17:00"
is_part_of_complex: true is_part_of_complex: true
complex_name: Kasteel Amerongen landgoed complex_name: Kasteel Amerongen landgoed
current_use: Museum and events current_use: Museum and events

View file

@ -66,7 +66,8 @@ imports:
- ../slots/text_direction - ../slots/text_direction
- ../slots/has_or_had_label # was: title_proper - migrated per Rule 53 - ../slots/has_or_had_label # was: title_proper - migrated per Rule 53
- ../slots/has_or_had_label # was: uniform_title - migrated per Rule 53 - ../slots/has_or_had_label # was: uniform_title - migrated per Rule 53
- ../slots/writing_system - ../slots/has_or_had_writing_system
- ./WritingSystem
- ./SpecificityAnnotation - ./SpecificityAnnotation
- ./TemplateSpecificityScores - ./TemplateSpecificityScores
default_prefix: hc default_prefix: hc
@ -153,7 +154,7 @@ classes:
- text_direction - text_direction
- has_or_had_label # was: title_proper - has_or_had_label # was: title_proper
- has_or_had_label # was: uniform_title - has_or_had_label # was: uniform_title
- writing_system - has_or_had_writing_system
slot_usage: slot_usage:
carrier_type: carrier_type:
required: true required: true
@ -257,13 +258,27 @@ classes:
- value: MANUSCRIPT - value: MANUSCRIPT
- value: PRINTED - value: PRINTED
- value: TYPESCRIPT - value: TYPESCRIPT
writing_system: has_or_had_writing_system:
required: false required: false
range: string range: WritingSystem
inlined: true
description: |
The writing system or script used on this information carrier.
MIGRATED from writing_system (2026-01-15) per Rule 53.
Uses WritingSystem class with ISO 15924 script codes.
examples: examples:
- value: Latin alphabet - value:
- value: Cuneiform has_or_had_code: "Latn"
- value: Chinese (traditional) script_name: "Latin"
description: Latin alphabet
- value:
has_or_had_code: "Xsux"
script_name: "Cuneiform"
description: Ancient Sumerian/Akkadian cuneiform
- value:
has_or_had_code: "Hant"
script_name: "Chinese (traditional)"
description: Traditional Chinese script
language: language:
required: false required: false
range: string range: string
@ -512,7 +527,9 @@ classes:
country: DE country: DE
date_of_publication: c. 1455 date_of_publication: c. 1455
script_type: PRINTED script_type: PRINTED
writing_system: Latin alphabet has_or_had_writing_system:
has_or_had_code: "Latn"
script_name: "Latin"
content_language: content_language:
- lat - lat
carries_information: Bible. Latin. Vulgate carries_information: Bible. Latin. Vulgate
@ -535,7 +552,9 @@ classes:
- Parchment (vellum) - Parchment (vellum)
folio_count: 347 folio_count: 347
script_type: MANUSCRIPT script_type: MANUSCRIPT
writing_system: Greek uncial has_or_had_writing_system:
has_or_had_code: "Grek"
script_name: "Greek uncial"
content_language: content_language:
- grc - grc
date_of_publication: 4th century CE date_of_publication: 4th century CE
@ -554,7 +573,9 @@ classes:
material_composition: material_composition:
- Parchment (sheepskin) - Parchment (sheepskin)
script_type: MANUSCRIPT script_type: MANUSCRIPT
writing_system: Latin alphabet has_or_had_writing_system:
has_or_had_code: "Latn"
script_name: "Latin"
content_language: content_language:
- eng - eng
date_of_publication: '1776-08-02' date_of_publication: '1776-08-02'

View file

@ -14,7 +14,8 @@ imports:
- ../slots/research_department - ../slots/research_department
- ../slots/specificity_annotation - ../slots/specificity_annotation
- ../slots/template_specificity - ../slots/template_specificity
- ../slots/visitor_facility - ../slots/has_or_had_facility # was: visitor_facility - migrated per Rule 53
- ./Facility
- ./SpecificityAnnotation - ./SpecificityAnnotation
- ./TemplateSpecificityScores - ./TemplateSpecificityScores
classes: classes:
@ -137,7 +138,7 @@ classes:
- research_department - research_department
- specificity_annotation - specificity_annotation
- template_specificity - template_specificity
- visitor_facility - has_or_had_facility # was: visitor_facility - migrated per Rule 53
slot_usage: slot_usage:
wikidata_entity: wikidata_entity:
pattern: ^Q[0-9]+$ pattern: ^Q[0-9]+$
@ -150,6 +151,13 @@ classes:
required: false required: false
has_or_had_custodian_type: has_or_had_custodian_type:
equals_expression: '["hc:MuseumType"]' equals_expression: '["hc:MuseumType"]'
has_or_had_facility:
description: |
Public facilities and amenities available at the museum.
Migrated from visitor_facility per Rule 53.
range: Facility
inlined: true
multivalued: true
exact_mappings: exact_mappings:
- skos:Concept - skos:Concept
- schema:Museum - schema:Museum
@ -182,10 +190,13 @@ classes:
- sculptures - sculptures
- decorative arts - decorative arts
exhibition_program: rotating exhibitions with permanent collection exhibition_program: rotating exhibitions with permanent collection
visitor_facility: has_or_had_facility: # was: visitor_facility
- cafe - facility_name: Museum Café
- museum shop facility_type: FOOD_SERVICE
- education center - facility_name: Museum Shop
facility_type: RETAIL
- facility_name: Education Center
facility_type: EDUCATION
cataloging_standard: LIDO cataloging_standard: LIDO
conservation_lab: true conservation_lab: true
research_department: true research_department: true

View file

@ -25,7 +25,7 @@ imports:
- ../slots/temporal_extent - ../slots/temporal_extent
- ./SpecificityAnnotation - ./SpecificityAnnotation
- ./TemplateSpecificityScores - ./TemplateSpecificityScores
- ../slots/was_asserted_by # was_asserted_by REMOVED - fully migrated to is_or_was_asserted_by (Rule 53, 2026-01-15)
prefixes: prefixes:
linkml: https://w3id.org/linkml/ linkml: https://w3id.org/linkml/
hc: https://nde.nl/ontology/hc/ hc: https://nde.nl/ontology/hc/
@ -191,12 +191,7 @@ classes:
asserter_type: HUMAN_ANALYST asserter_type: HUMAN_ANALYST
asserter_contact: jane.doe@heritage-org.nl asserter_contact: jane.doe@heritage-org.nl
description: Human analyst asserter description: Human analyst asserter
was_asserted_by: # was_asserted_by REMOVED - fully migrated to is_or_was_asserted_by (Rule 53, 2026-01-15)
range: string
required: false
examples:
- value: primary-presence-analyzer/1.0
- value: jane.doe@heritage-org.nl
confidence_score: confidence_score:
range: float range: float
required: false required: false

View file

@ -38,7 +38,7 @@ imports:
- ../slots/policy_review_date - ../slots/policy_review_date
- ../slots/specificity_annotation - ../slots/specificity_annotation
- ../slots/standards_compliance - ../slots/standards_compliance
- ../slots/storage_security_level - ../slots/has_or_had_security_level # was: storage_security_level - migrated per Rule 53 (2025-01-15)
# REMOVED 2026-01-15: temperature_max, temperature_min, temperature_target - migrated to has_or_had_setpoint with Setpoint class (Rule 53) # REMOVED 2026-01-15: temperature_max, temperature_min, temperature_target - migrated to has_or_had_setpoint with Setpoint class (Rule 53)
- ../slots/has_or_had_setpoint - ../slots/has_or_had_setpoint
- ./Setpoint - ./Setpoint
@ -104,7 +104,7 @@ classes:
- policy_review_date - policy_review_date
- specificity_annotation - specificity_annotation
- standards_compliance - standards_compliance
- storage_security_level - has_or_had_security_level # was: storage_security_level - migrated per Rule 53 (2025-01-15)
# REMOVED 2026-01-15: temperature_max, temperature_min, temperature_target - migrated to has_or_had_setpoint # REMOVED 2026-01-15: temperature_max, temperature_min, temperature_target - migrated to has_or_had_setpoint
- has_or_had_setpoint - has_or_had_setpoint
- temperature_tolerance - temperature_tolerance
@ -229,10 +229,16 @@ classes:
range: boolean range: boolean
examples: examples:
- value: true - value: true
storage_security_level: has_or_had_security_level: # was: storage_security_level - migrated per Rule 53 (2025-01-15)
range: string range: SecurityLevel
required: false
description: |
Security classification level for this storage policy.
MIGRATED from storage_security_level slot per slot_fixes.yaml (Rule 53, 2025-01-15).
examples: examples:
- value: HIGH - value:
level_code: HIGH
level_name: High Security
description: High-value collection storage description: High-value collection storage
has_or_had_access_restriction: has_or_had_access_restriction:
range: string range: string
@ -312,7 +318,9 @@ classes:
pest_management_required: true pest_management_required: true
fire_suppression_type: INERT_GAS fire_suppression_type: INERT_GAS
flood_protection_required: true flood_protection_required: true
storage_security_level: HIGH has_or_had_security_level: # was: storage_security_level
level_code: HIGH
level_name: High Security
standards_compliance: standards_compliance:
- EN_15757_2010 - EN_15757_2010
- EN_16893_2018 - EN_16893_2018
@ -348,7 +356,9 @@ classes:
pest_management_required: true pest_management_required: true
fire_suppression_type: INERT_GAS fire_suppression_type: INERT_GAS
flood_protection_required: true flood_protection_required: true
storage_security_level: HIGH has_or_had_security_level: # was: storage_security_level
level_code: HIGH
level_name: High Security
access_restrictions: Restricted access - Conservation staff only. Materials must acclimate 24 hours before and after access_restrictions: Restricted access - Conservation staff only. Materials must acclimate 24 hours before and after
access. Gloves required. access. Gloves required.
standards_compliance: standards_compliance:

View file

@ -38,7 +38,7 @@ imports:
# - ../slots/video_category_id # - ../slots/video_category_id
- ../slots/has_or_had_identifier - ../slots/has_or_had_identifier
- ./VideoCategoryIdentifier - ./VideoCategoryIdentifier
- ../slots/video_comment - ../slots/has_or_had_comment # was: video_comment - migrated per Rule 53 (2025-01-15)
- ../slots/has_or_had_quantity - ../slots/has_or_had_quantity
- ./Quantity - ./Quantity
# MIGRATED 2026-01-15: ../slots/view_count → ../slots/has_or_had_quantity per Rule 53 # MIGRATED 2026-01-15: ../slots/view_count → ../slots/has_or_had_quantity per Rule 53
@ -220,7 +220,7 @@ classes:
# REMOVED - migrated to has_or_had_identifier (2026-01-14, Rule 53) # REMOVED - migrated to has_or_had_identifier (2026-01-14, Rule 53)
# - video_category_id # - video_category_id
- has_or_had_identifier - has_or_had_identifier
- video_comment - has_or_had_comment # was: video_comment - migrated per Rule 53 (2025-01-15)
- has_or_had_quantity - has_or_had_quantity
slot_usage: slot_usage:
duration: duration:
@ -366,11 +366,14 @@ classes:
examples: examples:
- value: 0 - value: 0
description: No comments fetched description: No comments fetched
video_comment: has_or_had_comment: # was: video_comment - migrated per Rule 53 (2025-01-15)
range: VideoComment range: VideoComment
multivalued: true multivalued: true
required: false required: false
inlined: true inlined: true
description: |
Comments on this video post.
MIGRATED from video_comment slot per slot_fixes.yaml (Rule 53, 2025-01-15).
comments: comments:
- Extends SocialMediaPost with video-specific properties - Extends SocialMediaPost with video-specific properties
- Maps to as:Video and schema:VideoObject - Maps to as:Video and schema:VideoObject

View file

@ -43,7 +43,9 @@ imports:
- ../slots/link_text - ../slots/link_text
- ../slots/link_type - ../slots/link_type
- ../slots/link_context - ../slots/link_context
- ../slots/xpath # REMOVED: ../slots/xpath - migrated to has_or_had_provenance_path with XPath class (2026-01-15, Rule 53)
- ../slots/has_or_had_provenance_path
- ./XPath
# ARCHIVED: ../slots/valid_from - use temporal_extent instead # ARCHIVED: ../slots/valid_from - use temporal_extent instead
# ARCHIVED: ../slots/valid_to - use temporal_extent instead # ARCHIVED: ../slots/valid_to - use temporal_extent instead
- ../slots/temporal_extent # was: valid_from + valid_to - ../slots/temporal_extent # was: valid_from + valid_to
@ -102,7 +104,7 @@ classes:
- description - description
- link_type - link_type
- link_context - link_context
- xpath - has_or_had_provenance_path # was: xpath - migrated per Rule 53 (2026-01-15)
- temporal_extent # was: valid_from + valid_to - temporal_extent # was: valid_from + valid_to
- specificity_annotation - specificity_annotation
- template_specificity - template_specificity
@ -138,10 +140,12 @@ classes:
range: string range: string
description: >- description: >-
The surrounding text or context where the link appears. The surrounding text or context where the link appears.
xpath: has_or_had_provenance_path:
range: string range: XPath
inlined: true
description: >- description: >-
XPath location of the link in the source HTML (for provenance). XPath provenance path documenting the exact location of the link in the source HTML.
MIGRATED from xpath slot per slot_fixes.yaml (Rule 53, 2026-01-15).
annotations: annotations:
custodian_types: '["*"]' custodian_types: '["*"]'
@ -158,6 +162,7 @@ classes:
- Generic web link representation - Generic web link representation
- Used within Overview collections - Used within Overview collections
- Created from slot_fixes.yaml migration (2026-01-14) - Created from slot_fixes.yaml migration (2026-01-14)
- 'MIGRATION (2026-01-15): Replaced xpath slot with has_or_had_provenance_path using XPath class per slot_fixes.yaml'
see_also: see_also:
- http://schema.org/WebPage - http://schema.org/WebPage

File diff suppressed because it is too large Load diff

View file

@ -1553,6 +1553,13 @@ fixes:
person_xpath and person_xpath_match_score slots also archived. person_xpath and person_xpath_match_score slots also archived.
Updated: WebClaim.yaml, PersonWebClaim.yaml, InvalidWebClaim.yaml, MissionStatement.yaml Updated: WebClaim.yaml, PersonWebClaim.yaml, InvalidWebClaim.yaml, MissionStatement.yaml
- original_slot_id: https://nde.nl/ontology/hc/slot/writing_system - original_slot_id: https://nde.nl/ontology/hc/slot/writing_system
processed:
status: true
timestamp: '2026-01-15T12:00:00Z'
session: session-2026-01-15-writing-revision-assertion-migration
notes: 'FULLY MIGRATED: InformationCarrier - writing_system REMOVED, using has_or_had_writing_system
with WritingSystem class. Examples updated to use structured object format with
ISO 15924 script codes. Slot archived to archive/writing_system_archived_20260115.yaml.'
revision: revision:
- label: has_or_had_writing_system - label: has_or_had_writing_system
type: slot type: slot
@ -1755,6 +1762,12 @@ fixes:
- label: WebObservation - label: WebObservation
type: class type: class
- original_slot_id: https://nde.nl/ontology/hc/slot/was_revision_of - original_slot_id: https://nde.nl/ontology/hc/slot/was_revision_of
processed:
status: true
timestamp: '2026-01-15T12:05:00Z'
session: session-2026-01-15-writing-revision-assertion-migration
notes: 'FULLY MIGRATED: CustodianLegalStatus - was_revision_of REMOVED, using is_or_was_revision_of
per Rule 39 (RiC-O temporal naming). Slot archived to archive/was_revision_of_archived_20260115.yaml.'
revision: revision:
- label: is_or_was_revision_of - label: is_or_was_revision_of
type: slot type: slot
@ -1773,6 +1786,14 @@ fixes:
- label: SourceWork - label: SourceWork
type: class type: class
- original_slot_id: https://nde.nl/ontology/hc/slot/was_asserted_by - original_slot_id: https://nde.nl/ontology/hc/slot/was_asserted_by
processed:
status: true
timestamp: '2026-01-15T12:10:00Z'
session: session-2026-01-15-writing-revision-assertion-migration
notes: 'FULLY MIGRATED: PrimaryDigitalPresenceAssertion - was_asserted_by residual
import and slot_usage REMOVED (was partially migrated, now fully cleaned up).
Using is_or_was_asserted_by with Asserter class.
Slot archived to archive/was_asserted_by_archived_20260115.yaml.'
revision: revision:
- label: is_or_was_asserted_by - label: is_or_was_asserted_by
type: slot type: slot
@ -1785,6 +1806,13 @@ fixes:
- label: URL - label: URL
type: class type: class
- original_slot_id: https://nde.nl/ontology/hc/slot/was_approved_by - original_slot_id: https://nde.nl/ontology/hc/slot/was_approved_by
processed:
status: true
timestamp: '2026-01-15T12:15:00Z'
session: session-2026-01-15-writing-revision-assertion-migration
notes: 'ALREADY FULLY MIGRATED: Budget.yaml already uses is_or_was_approved_by
with Approver class. No slot file to archive (already archived).
Marked as processed for tracking.'
revision: revision:
- label: is_or_was_approved_by - label: is_or_was_approved_by
type: slot type: slot
@ -1916,6 +1944,11 @@ fixes:
type: slot type: slot
- label: ProgramTypes - label: ProgramTypes
type: class type: class
processed:
status: true
timestamp: "2026-01-15T16:00:00Z"
session: "session-0115-batch3"
notes: "Migrated inline attribute to has_or_had_program in HeritageSocietyType.yaml. Old slot archived."
- original_slot_id: https://nde.nl/ontology/hc/slot/visitor_service - original_slot_id: https://nde.nl/ontology/hc/slot/visitor_service
revision: revision:
- label: has_or_had_service - label: has_or_had_service
@ -1930,6 +1963,11 @@ fixes:
type: slot type: slot
- label: ServiceTypes - label: ServiceTypes
type: class type: class
processed:
status: true
timestamp: "2026-01-15T16:00:00Z"
session: "session-0115-batch3"
notes: "Migrated to has_or_had_service in FeatureCustodianType.yaml. Old slot archived."
- original_slot_id: https://nde.nl/ontology/hc/slot/visitor_facility - original_slot_id: https://nde.nl/ontology/hc/slot/visitor_facility
revision: revision:
- label: has_or_had_facility - label: has_or_had_facility
@ -1944,6 +1982,11 @@ fixes:
type: slot type: slot
- label: FacilityTypes - label: FacilityTypes
type: class type: class
processed:
status: true
timestamp: "2026-01-15T16:00:00Z"
session: "session-0115-batch3"
notes: "Migrated to has_or_had_facility in MuseumType.yaml. Old slot archived."
- original_slot_id: https://nde.nl/ontology/hc/slot/visitor_count - original_slot_id: https://nde.nl/ontology/hc/slot/visitor_count
revision: revision:
- label: has_or_had_quantity - label: has_or_had_quantity
@ -2029,6 +2072,13 @@ fixes:
type: slot type: slot
- label: OpeningHour - label: OpeningHour
type: class type: class
processed:
status: true
timestamp: '2026-01-15T15:00:00Z'
session: session-2026-01-15-version-visiting-migration
notes: 'FULLY MIGRATED: HistoricBuilding.yaml - visiting_hour replaced with has_or_had_opening_hour
using OpeningHour class with structured day_of_week/opens/closes fields. Slot
archived to archive/visiting_hour_archived_20260115.yaml.'
- original_slot_id: https://nde.nl/ontology/hc/slot/view_count - original_slot_id: https://nde.nl/ontology/hc/slot/view_count
revision: revision:
- label: has_or_had_quantity - label: has_or_had_quantity
@ -2080,6 +2130,12 @@ fixes:
- label: VideoIdentifier - label: VideoIdentifier
type: class type: class
- original_slot_id: https://nde.nl/ontology/hc/slot/video_comment - original_slot_id: https://nde.nl/ontology/hc/slot/video_comment
processed:
status: true
timestamp: '2025-01-15T00:00:00Z'
session: session-2025-01-15-slot-migration
notes: 'FULLY MIGRATED: VideoPost - video_comment replaced with has_or_had_comment
using Comment class. Slot archived to archive/video_comment_archived_20250115.yaml.'
revision: revision:
- label: has_or_had_comment - label: has_or_had_comment
type: slot type: slot
@ -4354,4 +4410,83 @@ fixes:
Files migrated: Files migrated:
- Collection.yaml (parent_collection → is_or_was_sub_collection_of) - Collection.yaml (parent_collection → is_or_was_sub_collection_of)
Archived: modules/slots/archive/parent_collection_archived_20250115.yaml Archived: modules/slots/archive/parent_collection_archived_20250115.yaml
- original_slot_id: https://nde.nl/ontology/hc/slot/has_collection
revision:
- label: has_or_had_collection
type: slot
- label: CustodianCollection
type: class
processed:
status: true
date: '2025-01-15'
notes: |
Migration completed for has_collection slot.
has_collection → has_or_had_collection (RiC-O temporal pattern)
Range updated: uriorcurie → CustodianCollection (proper class)
Files migrated:
- Custodian.yaml (has_collection → has_or_had_collection)
Archived: modules/slots/archive/has_collection_archived_20250115.yaml
- original_slot_id: https://nde.nl/ontology/hc/slot/encompassing_body
revision:
- label: is_or_was_encompassed_by
type: slot
- label: EncompassingBody
type: class
processed:
status: true
date: '2025-01-15'
notes: |
Migration completed for encompassing_body slot.
encompassing_body → is_or_was_encompassed_by (RiC-O temporal pattern)
Range updated: uriorcurie → EncompassingBody (proper class)
Files migrated:
- Custodian.yaml (encompassing_body → is_or_was_encompassed_by)
Archived: modules/slots/archive/encompassing_body_archived_20250115.yaml
- original_slot_id: https://nde.nl/ontology/hc/slot/stewardship_responsibility
revision:
- label: is_or_was_managed_by
type: slot
- label: StewardGroup
type: class
- label: has_or_had_responsibility
type: slot
link_branch: 1
- label: StewardshipResponsibility
type: class
link_branch: 1
- label: has_or_had_type
type: slot
link_branch: 2
- label: StewardGroupType
type: class
link_branch: 2
- label: includes_or_included
type: slot
link_branch: 2
- label: StewardGroupTypes
type: class
- original_slot_id: https://nde.nl/ontology/hc/slot/stewardship_model
revision:
- label: is_or_was_managed_by
type: slot
- label: StewardGroup
type: class
- label: abides_or_abided_to
type: slot
- label: StewardshipModel
type: class
- label: has_or_had_type
type: slot
- label: StewardshipModelType
type: class
- label: includes_or_included
type: slot
- label: StewardshipModelTypes
type: class

View file

@ -443,6 +443,16 @@ def extract_metadata(data: dict[str, Any], filepath: Path) -> dict[str, Any]:
# Calculate richness score for search ranking # Calculate richness score for search ranking
metadata["richness_score"] = calculate_richness_score(data) metadata["richness_score"] = calculate_richness_score(data)
# WCMS registration - critical for filtering to show only registered users
# WCMS = Web Content Management System - the heritage sector user registry
wcms_identifiers = data.get("wcms_identifiers")
metadata["has_wcms"] = bool(wcms_identifiers and isinstance(wcms_identifiers, dict) and wcms_identifiers.get("user_id"))
# Also track data sources for more granular filtering
data_sources = data.get("data_sources") or []
if data_sources:
metadata["data_sources"] = data_sources
return metadata return metadata

View file

@ -738,6 +738,7 @@ def build_schema_aware_person_filter(
heritage_type_code: str | None = None, heritage_type_code: str | None = None,
heritage_relevant_only: bool = False, heritage_relevant_only: bool = False,
custodian_slug: str | None = None, custodian_slug: str | None = None,
only_wcms: bool = False,
) -> dict[str, Any] | None: ) -> dict[str, Any] | None:
"""Build Qdrant filter conditions for schema-aware person search. """Build Qdrant filter conditions for schema-aware person search.
@ -745,6 +746,7 @@ def build_schema_aware_person_filter(
heritage_type_code: Single-letter heritage type code (M, A, L, etc.) heritage_type_code: Single-letter heritage type code (M, A, L, etc.)
heritage_relevant_only: Only return heritage-relevant staff heritage_relevant_only: Only return heritage-relevant staff
custodian_slug: Filter by specific custodian custodian_slug: Filter by specific custodian
only_wcms: Only return WCMS-registered profiles (heritage sector users)
Returns: Returns:
Dict of filter conditions for Qdrant, or None if no filters Dict of filter conditions for Qdrant, or None if no filters
@ -760,6 +762,9 @@ def build_schema_aware_person_filter(
if custodian_slug: if custodian_slug:
filters["custodian_slug"] = custodian_slug filters["custodian_slug"] = custodian_slug
if only_wcms:
filters["has_wcms"] = True
return filters if filters else None return filters if filters else None
@ -2167,6 +2172,7 @@ class HybridRetriever:
k: int | None = None, k: int | None = None,
filter_custodian: str | None = None, filter_custodian: str | None = None,
only_heritage_relevant: bool = False, only_heritage_relevant: bool = False,
only_wcms: bool = False,
using: str | None = None, using: str | None = None,
# Schema-aware filter parameters (from DSPy HeritageQueryRouter) # Schema-aware filter parameters (from DSPy HeritageQueryRouter)
target_role_category: str | None = None, target_role_category: str | None = None,
@ -2179,6 +2185,7 @@ class HybridRetriever:
k: Number of results to return (default: k_final) k: Number of results to return (default: k_final)
filter_custodian: Optional custodian slug to filter by filter_custodian: Optional custodian slug to filter by
only_heritage_relevant: Only return heritage-relevant staff only_heritage_relevant: Only return heritage-relevant staff
only_wcms: Only return WCMS-registered profiles (heritage sector users)
using: Optional embedding model name (for multi-embedding mode). using: Optional embedding model name (for multi-embedding mode).
One of: "openai_1536", "minilm_384", "bge_768" One of: "openai_1536", "minilm_384", "bge_768"
target_role_category: Role category from DSPy router (CURATORIAL, ARCHIVAL, etc.) target_role_category: Role category from DSPy router (CURATORIAL, ARCHIVAL, etc.)
@ -2197,6 +2204,7 @@ class HybridRetriever:
heritage_type_code=heritage_type_code, heritage_type_code=heritage_type_code,
heritage_relevant_only=only_heritage_relevant, heritage_relevant_only=only_heritage_relevant,
custodian_slug=filter_custodian, custodian_slug=filter_custodian,
only_wcms=only_wcms,
) )
logger.info(f"Person search for: {query[:50]}... (model: {using or 'auto'}, role_category: {target_role_category}, custodian_type: {target_custodian_type})") logger.info(f"Person search for: {query[:50]}... (model: {using or 'auto'}, role_category: {target_role_category}, custodian_type: {target_custodian_type})")

View file

@ -520,6 +520,7 @@ class MultiEmbeddingRetriever:
using: EmbeddingModel | str | None = None, using: EmbeddingModel | str | None = None,
filter_custodian: str | None = None, filter_custodian: str | None = None,
only_heritage_relevant: bool = False, only_heritage_relevant: bool = False,
only_wcms: bool = False,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
"""Search for persons/staff in the heritage_persons collection. """Search for persons/staff in the heritage_persons collection.
@ -529,6 +530,7 @@ class MultiEmbeddingRetriever:
using: Embedding model to use using: Embedding model to use
filter_custodian: Optional custodian slug to filter by filter_custodian: Optional custodian slug to filter by
only_heritage_relevant: Only return heritage-relevant staff only_heritage_relevant: Only return heritage-relevant staff
only_wcms: Only return WCMS-registered profiles (heritage sector users)
Returns: Returns:
List of person results with scores List of person results with scores
@ -539,6 +541,8 @@ class MultiEmbeddingRetriever:
filters = {} filters = {}
if filter_custodian: if filter_custodian:
filters["custodian_slug"] = filter_custodian filters["custodian_slug"] = filter_custodian
if only_wcms:
filters["has_wcms"] = True
# Search with over-fetch for post-filtering # Search with over-fetch for post-filtering
results = self.search( results = self.search(