# FindingAidMetadata Class - Instance data schema for finding aid metadata # # This schema defines the structure for finding aid metadata YAML files, such as # repository guides from the Nationaal Archief (NL-HaNA). It is distinct from # FindingAidType.yaml which defines the TYPE hierarchy/classification. # # Purpose: # - Validate YAML metadata files for individual finding aids # - Capture multilingual titles, descriptions, and topics # - Link finding aids to custodians via GHCID/ISIL # - Track sub-guides and external resources # - Maintain provenance for crawled/extracted data # - Capture page structure with XPath-based web claims # # Example usage: # linkml-validate -s FindingAidMetadata.yaml familiegeschiedenis.yaml # # Generation date: 2025-12-18 # Updated: 2025-12-18 (comprehensive web claims and page structure support) id: https://nde.nl/ontology/hc/class/FindingAidMetadata name: finding_aid_metadata_class title: FindingAidMetadata Class prefixes: linkml: https://w3id.org/linkml/ hc: https://nde.nl/ontology/hc/ rico: https://www.ica.org/standards/RiC/ontology# skos: http://www.w3.org/2004/02/skos/core# dcterms: http://purl.org/dc/terms/ schema: http://schema.org/ wd: http://www.wikidata.org/entity/ prov: http://www.w3.org/ns/prov# foaf: http://xmlns.com/foaf/0.1/ xsd: http://www.w3.org/2001/XMLSchema# default_prefix: hc default_range: string imports: - linkml:types # ============================================================================ # ENUMERATIONS # ============================================================================ enums: # NOTE: FindingAidTypeEnum was REMOVED per Rule 9 (Enum-to-Class Promotion). # The FindingAidType class hierarchy in FindingAidType.yaml is the single source of truth. # The finding_aid_type slot now uses string range with class name values. ExternalResourceTypeEnum: description: | Types of external resources linked from finding aids. permissible_values: database: description: Searchable online database organization: description: Related organization or society portal: description: Discovery portal or aggregator archive: description: External archive holding related materials library: description: Library with related holdings museum: description: Museum with related collections website: description: General website resource api: description: API endpoint for programmatic access reference: description: Reference material or documentation research_institute: description: Research institution or academic center SubGuideTypeEnum: description: | Types of sub-guide references within a repository guide. permissible_values: sub_guide: description: Direct sub-guide within same repository cross_reference: description: Cross-reference to another main guide thematic: description: Thematic sub-collection regional: description: Regional sub-guide RelationshipTypeEnum: description: | Types of relationships between finding aids. permissible_values: cross_reference: description: Related guide on related topic parent: description: Parent guide containing this guide child: description: Child guide within this guide see_also: description: See also reference predecessor: description: Previous version of this guide successor: description: Newer version of this guide LinkTypeEnum: description: | Types of hyperlinks extracted from pages. permissible_values: internal: description: Link within same domain external: description: Link to external domain anchor: description: Same-page anchor link download: description: Link to downloadable file sub_guide: description: Link to a sub-guide finding aid featured: description: Featured/highlighted link ClaimTypeEnum: description: | Types of claims extracted from web pages. permissible_values: page_title: description: Page title from title tag or h1 section_heading: description: Section heading (h2-h6) paragraph: description: Paragraph text content list_item: description: List item content link: description: Hyperlink text and URL featured_card: description: Featured content card metadata: description: Page metadata (description, keywords) date: description: Date or temporal reference temporal_range: description: Start-end date range geographic_scope: description: Geographic location or scope identifier: description: Identifier (ISIL, Wikidata, etc.) # ============================================================================ # CLASSES # ============================================================================ classes: # -------------------------------------------------------------------------- # Root Container Class # -------------------------------------------------------------------------- FindingAidMetadata: class_uri: rico:FindingAid tree_root: true description: | Metadata record for a single finding aid (inventory, guide, catalog, etc.). This class captures the descriptive metadata for finding aids produced by heritage custodians to facilitate access to their collections. It supports: - Multilingual titles and descriptions (nl, en, etc.) - Classification via FindingAidType hierarchy - Custodian linkage via GHCID, ISIL, Wikidata - Topic/subject indexing - Temporal and geographic coverage - References to sub-guides and external resources - Key archives and related archives - Related guides within the repository - Provenance tracking for web-harvested data - Page structure with web claims and XPath provenance **ONTOLOGY ALIGNMENT**: - RiC-O: rico:FindingAid - Dublin Core: dcterms:BibliographicResource - Schema.org: schema:CreativeWork **EXAMPLE FILES**: See data/custodian/findingaid/NL-ZH-DHA-A-NA/guide/repository_guide/metadata/ exact_mappings: - rico:FindingAid close_mappings: - dcterms:BibliographicResource - schema:CreativeWork slots: - id - slug - finding_aid_type - wikidata_class - title - url - custodian - description - topics - temporal_coverage - geographic_coverage - sub_guides - external_resources - key_archives - related_archives - related_guides - access_restrictions - finding_aid_notes - notes - page_sections - web_claims - all_links - provenance slot_usage: id: identifier: true required: true slug: required: true finding_aid_type: required: true title: required: true url: required: true custodian: required: true comments: - "Root class for finding aid metadata YAML files" - "Designed for validation with linkml-validate" - "Supports Nationaal Archief zoekhulpen structure" see_also: - "https://www.ica.org/standards/RiC/ontology#FindingAid" - "https://www.nationaalarchief.nl/onderzoeken/zoekhulpen" # -------------------------------------------------------------------------- # Multilingual Text Class # -------------------------------------------------------------------------- MultilingualText: class_uri: skos:Concept description: | Container for multilingual text values (titles, descriptions). Supports any ISO 639-1 language code as a slot name. Common slots: nl (Dutch), en (English), de (German), fr (French). slots: - nl - en - de - fr slot_usage: nl: description: Dutch text en: description: English text de: description: German text fr: description: French text comments: - "At least one language slot should be populated" - "nl is primary for Dutch archives" # -------------------------------------------------------------------------- # Custodian Reference Class # -------------------------------------------------------------------------- CustodianReference: class_uri: rico:Agent description: | Reference to the heritage custodian that created/maintains this finding aid. Uses standard identifiers: GHCID, ISIL, Wikidata ID. slots: - ghcid - isil - name - wikidata slot_usage: ghcid: description: Global Heritage Custodian Identifier required: true pattern: "^[A-Z]{2}-[A-Z]{2}-[A-Z]{3}-[A-Z]-[A-Z0-9]+$" isil: description: International Standard Identifier for Libraries (ISO 15511) pattern: "^[A-Z]{2}-[A-Za-z0-9]+$" name: description: Human-readable name of custodian required: true wikidata: description: Wikidata entity ID (Q-number) pattern: "^Q[0-9]+$" exact_mappings: - rico:Agent - schema:Organization # -------------------------------------------------------------------------- # Temporal Coverage Class (expanded) # -------------------------------------------------------------------------- TemporalCoverage: class_uri: dcterms:PeriodOfTime description: | Time period covered by the finding aid's materials. Supports both numeric years and special values like "present". Can include named periods (key_periods) and significant dates (key_dates). slots: - start - end - notes - periods - key_periods - key_dates slot_usage: start: description: Start year of coverage range: TemporalValue required: true end: description: End year of coverage (use "present" for ongoing) range: TemporalValue required: true notes: description: Additional notes about temporal coverage periods: description: Named periods within coverage range: TemporalPeriod multivalued: true inlined_as_list: true key_periods: description: Named historical periods (alias for periods) range: TemporalPeriod multivalued: true inlined_as_list: true key_dates: description: Significant historical dates range: KeyDate multivalued: true inlined_as_list: true exact_mappings: - dcterms:PeriodOfTime - schema:temporalCoverage # -------------------------------------------------------------------------- # Temporal Period Class # -------------------------------------------------------------------------- TemporalPeriod: class_uri: dcterms:PeriodOfTime description: A named historical period with optional start/end dates and description. slots: - period_name - period - period_start - period_end - period_description slot_usage: period_name: description: Name of the period range: string required: true aliases: - name period: description: Period as single string (e.g., "1945-1965") range: string period_start: description: Start year range: TemporalValue aliases: - start period_end: description: End year range: TemporalValue aliases: - end period_description: description: Description of the period range: string aliases: - description # -------------------------------------------------------------------------- # Key Date Class # -------------------------------------------------------------------------- KeyDate: class_uri: schema:Event description: | A significant historical date with event description. Used for key_dates within TemporalCoverage. slots: - date - event - event_en slot_usage: date: description: ISO 8601 date (YYYY-MM-DD or YYYY) range: string required: true event: description: Description of what happened (Dutch or original) range: string required: true event_en: description: English translation of event description range: string # -------------------------------------------------------------------------- # Geographic Coverage Class (expanded) # -------------------------------------------------------------------------- GeographicCoverage: class_uri: dcterms:Location description: | Geographic area covered by the finding aid's materials. Supports primary areas, secondary/related areas, and migration patterns. slots: - primary - scope - secondary - related - historical_regions - colonial - international - major_cities - inbound_from - outbound_to slot_usage: primary: description: Primary geographic areas covered multivalued: true scope: description: Description of geographic scope secondary: description: Secondary geographic areas (less prominent coverage) multivalued: true related: description: Related geographic areas (e.g., for WWII - Germany, Japan) multivalued: true historical_regions: description: Historical region names (may no longer exist) multivalued: true colonial: description: Colonial territories covered multivalued: true international: description: International scope descriptions multivalued: true major_cities: description: Major cities within coverage area multivalued: true inbound_from: description: Migration origin countries/regions (for migration guides) multivalued: true outbound_to: description: Migration destination countries/regions multivalued: true exact_mappings: - dcterms:spatial - schema:spatialCoverage # -------------------------------------------------------------------------- # Sub-Guide Reference Class (expanded) # -------------------------------------------------------------------------- SubGuideReference: class_uri: rico:FindingAid description: | Reference to a sub-guide or related finding aid within the same repository. Sub-guides are more specific finding aids linked from a repository guide. For example, a "Family History" repository guide links to specific sub-guides for "Civil Registry", "Notarial Records", etc. slots: - slug - title_nl - title_en - url - sub_guide_temporal_coverage - sub_guide_description - sub_guide_type - access_restricted slot_usage: slug: description: URL slug identifier required: true title_nl: description: Dutch title required: true title_en: description: English title url: description: Relative or absolute URL to sub-guide required: true sub_guide_temporal_coverage: description: Time period covered (free text, e.g., "1811-1935", "before 1811") sub_guide_description: description: Brief description of the sub-guide content sub_guide_type: description: Type of sub-guide reference range: SubGuideTypeEnum is_or_was_access_restricted: description: Whether access to this sub-guide is restricted range: boolean exact_mappings: - rico:isOrWasPartOf # -------------------------------------------------------------------------- # Archive Reference Class (NEW) # -------------------------------------------------------------------------- ArchiveReference: class_uri: rico:RecordSet description: | Reference to an archive collection or institution. Used for key_archives (main archives for a topic) and related_archives (external archives with related holdings). slots: - name - full_name - resource_description - location - url - isil - sub_guide_temporal_coverage - access slot_usage: name: description: Short name or abbreviation required: true full_name: description: Full official name resource_description: description: Description of the archive's relevance location: description: Physical location (city, country) url: description: URL to archive website range: uri required: false isil: description: ISIL code of the archive (ISO 15511) pattern: "^[A-Z]{2}-[A-Za-z0-9]+$" sub_guide_temporal_coverage: description: Time period of holdings (free text) has_or_had_access_condition: description: Access restrictions or requirements exact_mappings: - rico:RecordSet - schema:ArchiveOrganization # -------------------------------------------------------------------------- # Related Guide Reference Class (NEW) # -------------------------------------------------------------------------- RelatedGuideReference: class_uri: rico:FindingAid description: | Reference to a related finding aid guide within the same repository. Different from sub_guides: these are peer-level guides on related topics, not hierarchically nested guides. slots: - slug - title_nl - title_en - relationship slot_usage: slug: description: URL slug identifier required: true title_nl: description: Dutch title required: true title_en: description: English title relationship: description: Type of relationship range: RelationshipTypeEnum # -------------------------------------------------------------------------- # Access Restriction Class (NEW) # -------------------------------------------------------------------------- AccessRestriction: class_uri: dcterms:RightsStatement description: | Access restriction information for materials. slots: - restriction_type - restriction_description - years_restricted - permission_required slot_usage: restriction_type: description: Type of restriction (privacy, cabr, closed, etc.) required: true restriction_description: description: Description of the restriction years_restricted: description: Number of years materials are restricted range: integer permission_required: description: Whether explicit permission is required range: boolean # -------------------------------------------------------------------------- # External Resource Class (unchanged) # -------------------------------------------------------------------------- ExternalResource: class_uri: foaf:Document description: | Reference to an external resource (database, organization, portal). Used to link finding aids to related external resources that complement the archive's holdings (e.g., genealogical databases, historical societies). slots: - name - url - type - resource_description slot_usage: name: description: Name of external resource required: true url: description: URL to external resource range: uri required: true type: description: Type of resource range: ExternalResourceTypeEnum resource_description: description: Brief description of the resource exact_mappings: - foaf:Document - schema:WebPage # -------------------------------------------------------------------------- # Provenance Class (expanded) # -------------------------------------------------------------------------- FindingAidProvenance: class_uri: prov:Activity description: | Provenance metadata for web-harvested finding aid information. Tracks when and how the metadata was retrieved, enabling data freshness assessment and re-crawling decisions. slots: - date_retrieved - retrieval_agent - source_url - extraction_notes - extraction_method - crawler_version - html_snapshot_path - claims_count - validation_status slot_usage: date_retrieved: description: ISO 8601 date when data was retrieved range: date required: true retrieval_agent: description: Tool/method used for retrieval required: true source_url: description: Original source URL range: uri extraction_notes: description: Notes about extraction process or quality extraction_method: description: Method used (manual, automated, hybrid) crawler_version: description: Version of crawling tool used html_snapshot_path: description: Relative path to archived HTML snapshot claims_count: description: Number of web claims extracted range: integer validation_status: description: Status of validation (validated, pending, failed) exact_mappings: - prov:Activity # -------------------------------------------------------------------------- # Web Claim Class (NEW - per GLAM Rule 6) # -------------------------------------------------------------------------- WebClaim: class_uri: prov:Entity description: | A claim extracted from a web page with XPath provenance. Per GLAM project Rule 6: Every claim extracted from a webpage MUST have an XPath pointer to the exact location in archived HTML where that value appears. slots: - claim_id - claim_type - claim_value - claim_source_url - xpath - css_selector - html_tag - claim_extraction_date - confidence - claim_context - html_file slot_usage: claim_id: description: Unique identifier for this claim claim_type: description: Type of claim range: ClaimTypeEnum required: true claim_value: description: The extracted text/data value required: true claim_source_url: description: URL where claim was extracted from range: uri xpath: description: XPath to the element containing this value required: true css_selector: description: CSS selector to the element (optional alternative) html_tag: description: HTML tag of the element (e.g., h2, p, a, li) claim_extraction_date: description: ISO 8601 timestamp when claim was extracted range: datetime confidence: description: Confidence score (0.0-1.0) range: float claim_context: description: Surrounding text for disambiguation html_file: description: Relative path to archived HTML file exact_mappings: - prov:Entity comments: - "XPath is required per GLAM Rule 6" - "Claims without XPath provenance should be rejected" # -------------------------------------------------------------------------- # Page Section Class (NEW) # -------------------------------------------------------------------------- PageSection: class_uri: schema:WebPageElement description: | A section of a finding aid page with heading and content. Captures the hierarchical structure of web pages for preservation and semantic understanding. slots: - section_id - heading_level - heading_text - heading_text_en - content_blocks - sub_sections - section_links - featured_items - list_items - xpath slot_usage: section_id: description: Unique identifier for this section heading_level: description: Heading level (1-6 for h1-h6) range: integer minimum_value: 1 maximum_value: 6 heading_text: description: Text of the section heading (original language) required: true heading_text_en: description: English translation of heading content_blocks: description: Paragraph content within this section multivalued: true sub_sections: description: Nested sub-sections range: PageSection multivalued: true inlined_as_list: true section_links: description: Links within this section range: PageLink multivalued: true inlined_as_list: true featured_items: description: Featured cards/items in this section range: FeaturedCard multivalued: true inlined_as_list: true list_items: description: List items (bulleted/numbered) in this section multivalued: true xpath: description: XPath to this section element exact_mappings: - schema:WebPageElement # -------------------------------------------------------------------------- # Page Link Class (NEW) # -------------------------------------------------------------------------- PageLink: class_uri: schema:WebPage description: | A hyperlink extracted from a page. slots: - link_text - link_url - link_type - link_context - is_sub_guide - xpath slot_usage: link_text: description: Visible text of the link required: true link_url: description: URL of the link range: uri required: true link_type: description: Type of link range: LinkTypeEnum link_context: description: Parent section or surrounding context is_sub_guide: description: Whether this is a link to a sub-guide range: boolean xpath: description: XPath to link element # -------------------------------------------------------------------------- # Featured Card Class (NEW) # -------------------------------------------------------------------------- FeaturedCard: class_uri: schema:CreativeWork description: | A featured content card (often with image/icon). Common on archive websites to highlight important sub-guides or resources. slots: - card_title - card_title_en - card_description - card_description_en - card_url - card_image_url - xpath slot_usage: card_title: description: Title of the card (original language) required: true card_title_en: description: English translation of title card_description: description: Description text (original language) card_description_en: description: English translation of description card_url: description: URL the card links to range: uri card_image_url: description: URL of card image/icon range: uri xpath: description: XPath to card element # ============================================================================ # SLOTS # ============================================================================ slots: # Core identification slots id: slot_uri: dcterms:identifier description: Unique identifier for this finding aid metadata record range: string required: true examples: - value: "na-guide-familiegeschiedenis" slug: slot_uri: schema:identifier description: URL-friendly slug identifier range: string required: true pattern: "^[a-z0-9-]+$" examples: - value: "familiegeschiedenis" finding_aid_type: slot_uri: rico:hasRecordSetType description: | Type classification referencing FindingAidType class hierarchy. Use UPPER_SNAKE_CASE class names derived from the class hierarchy in FindingAidType.yaml. **VALID VALUES** (28 concrete types with Wikidata mappings): INVENTORY Branch: - INVENTORY (Q65769447) - General inventory - ARCHIVAL_INVENTORY (Q108809848) - Archival materials inventory - CALENDAR (Q25221891) - Chronological abstracts - ANALYTICAL_INVENTORY (Q116872592) - Detailed item descriptions - MUSEUM_INVENTORY (Q6059909) - Museum objects inventory - LOGISTICS_INVENTORY (Q815410) - Storage/access management - PERIODIC_INVENTORY (Q7168640) - Regular interval counts - PERPETUAL_INVENTORY (Q7169552) - Continuous updates - PHANTOM_INVENTORY (Q7180610) - Missing item tracking - PHYSICAL_INVENTORY (Q475356) - Hands-on verification - VENDOR_MANAGED_INVENTORY (Q609498) - External party managed CATALOGUE Branch: - CATALOGUE (Q2352616) - Organized structured listing - LIBRARY_CATALOG (Q856638) - Library holdings (OPAC) - LOCATION_CATALOG (Q19309479) - Shelf/topographic list GUIDE Branch: - GUIDE (Q116767694) - High-level overview - REPOSITORY_GUIDE (Q117649202) - Institution-wide guide - SUMMARY_GUIDE (Q117833265) - Collection-level summary LIST Branch: - LIST (Q12139612) - Simple enumeration - SPECIAL_LIST (Q116972118) - Subject/format focused - BIBLIOGRAPHY (Q1631107) - Published works listing DATABASE Branch: - BIBLIOGRAPHIC_DATABASE (Q1789476) - Searchable bibliographic records - LITERATURE_DATABASE (Q1865123) - Scholarly literature REVIEW Branch: - LITERATURE_REVIEW (Q2412849) - Scholarly survey - BIBLIOGRAPHIC_REVIEW (Q59156132) - Bibliographic essay INDEX Branch: - INDEX (Q873506) - Alphabetical/systematic reference - BIBLIOGRAPHIC_INDEX (Q2033233) - Subject/author index - LOCATION_INDEX (Q117311578) - Geographic/place index INSTRUCTIONAL Branch: - INSTRUCTIONAL_MATERIALS (Q1348645) - Educational resources - REFERENCE_WORK (Q13136) - Encyclopedias, handbooks See FindingAidType.yaml for complete class definitions and ontology alignments. range: string required: true pattern: "^[A-Z][A-Z0-9]*(_[A-Z0-9]+)*$" examples: - value: "REPOSITORY_GUIDE" description: Institution-wide repository guide (wd:Q117649202) - value: "ARCHIVAL_INVENTORY" description: Detailed archival materials inventory (wd:Q108809848) - value: "LIBRARY_CATALOG" description: Library holdings catalog/OPAC (wd:Q856638) wikidata_class: slot_uri: skos:exactMatch description: Wikidata class ID for this finding aid type range: string pattern: "^Q[0-9]+$" examples: - value: "Q117649202" description: "Wikidata ID for RepositoryGuide" # Multilingual content slots title: slot_uri: dcterms:title description: Multilingual title of the finding aid range: MultilingualText required: true inlined: true description: slot_uri: dcterms:description description: Multilingual description of the finding aid range: MultilingualText inlined: true # URL slot url: slot_uri: schema:url description: URL to the finding aid range: uri required: true # Custodian reference custodian: slot_uri: rico:hasOrHadHolder description: Heritage custodian that created/maintains this finding aid range: CustodianReference required: true inlined: true # Subject/topic slots topics: slot_uri: dcterms:subject description: Subject topics covered by this finding aid range: string multivalued: true examples: - value: "genealogy" - value: "civil_registry" - value: "notarial_records" # Coverage slots temporal_coverage: slot_uri: dcterms:temporal description: Time period covered by the finding aid materials range: TemporalCoverage inlined: true geographic_coverage: slot_uri: dcterms:spatial description: Geographic area covered by the finding aid range: GeographicCoverage inlined: true # Related resources sub_guides: slot_uri: rico:includes description: Sub-guides or related finding aids within this repository range: SubGuideReference multivalued: true inlined_as_list: true external_resources: slot_uri: dcterms:references description: External resources related to this finding aid range: ExternalResource multivalued: true inlined_as_list: true key_archives: slot_uri: rico:hasOrHadConstituent description: Key archives relevant to this finding aid topic range: ArchiveReference multivalued: true inlined_as_list: true related_archives: slot_uri: rico:isRelatedTo description: External archives with related holdings range: ArchiveReference multivalued: true inlined_as_list: true related_guides: slot_uri: rico:isRelatedTo description: Related finding aid guides (peer-level, not hierarchical) range: RelatedGuideReference multivalued: true inlined_as_list: true has_or_had_access_restriction: slot_uri: dcterms:accessRights description: Access restrictions for materials covered range: AccessRestriction multivalued: true inlined_as_list: true finding_aid_notes: slot_uri: skos:note description: General notes about this finding aid range: string multivalued: true # Page structure slots page_sections: slot_uri: schema:hasPart description: Hierarchical page sections extracted from web page range: PageSection multivalued: true inlined_as_list: true web_claims: slot_uri: prov:wasGeneratedBy description: Web claims with XPath provenance range: WebClaim multivalued: true inlined_as_list: true has_link: slot_uri: schema:relatedLink description: All links extracted from the page range: PageLink multivalued: true inlined_as_list: true # Provenance provenance: slot_uri: prov:wasGeneratedBy description: Provenance information for this metadata record range: FindingAidProvenance inlined: true # Nested class slots (used within classes above) nl: description: Dutch language text range: string en: description: English language text range: string de: description: German language text range: string fr: description: French language text range: string ghcid: slot_uri: hc:ghcid description: Global Heritage Custodian Identifier range: string isil: slot_uri: dcterms:identifier description: ISIL code (ISO 15511) range: string name: slot_uri: schema:name description: Human-readable name range: string wikidata: slot_uri: skos:exactMatch description: Wikidata entity ID range: string start: slot_uri: schema:startDate description: Start of temporal coverage range: string end: slot_uri: schema:endDate description: End of temporal coverage range: string notes: slot_uri: skos:note description: Additional notes range: string multivalued: true primary: slot_uri: dcterms:spatial description: Primary geographic areas range: string multivalued: true scope: slot_uri: dcterms:description description: Scope description range: string secondary: slot_uri: dcterms:spatial description: Secondary geographic areas range: string multivalued: true related: slot_uri: dcterms:spatial description: Related geographic areas range: string multivalued: true historical_regions: slot_uri: dcterms:spatial description: Historical region names range: string multivalued: true colonial: slot_uri: dcterms:spatial description: Colonial territories range: string multivalued: true international: slot_uri: dcterms:spatial description: International scope range: string multivalued: true major_cities: slot_uri: dcterms:spatial description: Major cities range: string multivalued: true inbound_from: slot_uri: dcterms:spatial description: Migration origin countries/regions range: string multivalued: true outbound_to: slot_uri: dcterms:spatial description: Migration destination countries/regions range: string multivalued: true title_nl: slot_uri: dcterms:title description: Dutch title range: string title_en: slot_uri: dcterms:title description: English title range: string type: slot_uri: dcterms:type description: Resource type range: string date_retrieved: slot_uri: prov:endedAtTime description: Date when data was retrieved range: date retrieval_agent: slot_uri: prov:wasAssociatedWith description: Agent/tool that retrieved data range: string source_url: slot_uri: prov:used description: Source URL for retrieved data range: uri extraction_notes: slot_uri: skos:note description: Notes about extraction process range: string extraction_method: slot_uri: prov:wasGeneratedBy description: Extraction method used range: string crawler_version: slot_uri: prov:wasAssociatedWith description: Version of crawler used range: string html_snapshot_path: slot_uri: prov:used description: Path to HTML snapshot range: string claims_count: slot_uri: schema:numberOfItems description: Number of claims extracted range: integer validation_status: slot_uri: schema:status description: Validation status range: string sub_guide_temporal_coverage: slot_uri: dcterms:temporal description: Time period covered by sub-guide (free text) range: string examples: - value: "1811-1935" - value: "before 1811" - value: "1811-present" sub_guide_description: slot_uri: dcterms:description description: Brief description of sub-guide content range: string sub_guide_type: slot_uri: dcterms:type description: Type of sub-guide reference range: SubGuideTypeEnum is_or_was_access_restricted: slot_uri: dcterms:accessRights description: Whether access is restricted range: boolean resource_description: slot_uri: dcterms:description description: Brief description of an external resource range: string examples: - value: "Dutch genealogical database aggregator" - value: "Center for Family History" # Archive reference slots full_name: slot_uri: schema:legalName description: Full official name range: string location: slot_uri: schema:location description: Physical location range: string has_or_had_access_condition: slot_uri: dcterms:accessRights description: Access restrictions or requirements range: string # Related guide slots relationship: slot_uri: dcterms:relation description: Type of relationship range: RelationshipTypeEnum # Access restriction slots restriction_type: slot_uri: dcterms:type description: Type of access restriction range: string restriction_description: slot_uri: dcterms:description description: Description of restriction range: string years_restricted: slot_uri: schema:duration description: Years restricted range: integer permission_required: slot_uri: dcterms:accessRights description: Whether permission is required range: boolean # Temporal period slots period_name: slot_uri: skos:prefLabel description: Name of the period range: string period: slot_uri: dcterms:temporal description: Period as string range: string period_start: slot_uri: schema:startDate description: Start of period range: string period_end: slot_uri: schema:endDate description: End of period range: string period_description: slot_uri: dcterms:description description: Description of period range: string # Key date slots date: slot_uri: dcterms:date description: Date value range: string event: slot_uri: schema:description description: Event description range: string event_en: slot_uri: schema:description description: Event description in English range: string # Web claim slots claim_id: slot_uri: dcterms:identifier description: Claim identifier range: string claim_type: slot_uri: dcterms:type description: Type of claim range: ClaimTypeEnum claim_value: slot_uri: rdf:value description: Extracted value range: string claim_source_url: slot_uri: prov:used description: Source URL of claim range: uri xpath: slot_uri: hc:xpath description: XPath to element range: string css_selector: slot_uri: hc:cssSelector description: CSS selector to element range: string html_tag: slot_uri: hc:htmlTag description: HTML tag name range: string claim_extraction_date: slot_uri: prov:generatedAtTime description: When claim was extracted range: datetime confidence: slot_uri: hc:confidence description: Confidence score range: float claim_context: slot_uri: hc:context description: Surrounding context range: string html_file: slot_uri: prov:used description: Path to HTML file range: string # Page section slots section_id: slot_uri: dcterms:identifier description: Section identifier range: string heading_level: slot_uri: hc:headingLevel description: Heading level (1-6) range: integer heading_text: slot_uri: dcterms:title description: Heading text range: string heading_text_en: slot_uri: dcterms:title description: Heading text in English range: string content_blocks: slot_uri: schema:text description: Content paragraphs range: string multivalued: true sub_sections: slot_uri: schema:hasPart description: Nested sections range: PageSection multivalued: true section_links: slot_uri: schema:relatedLink description: Links in section range: PageLink multivalued: true featured_items: slot_uri: schema:hasPart description: Featured items range: FeaturedCard multivalued: true list_items: slot_uri: schema:itemListElement description: List items range: string multivalued: true # Page link slots link_text: slot_uri: schema:name description: Link text range: string link_url: slot_uri: schema:url description: Link URL range: uri link_type: slot_uri: dcterms:type description: Type of link range: LinkTypeEnum link_context: slot_uri: hc:context description: Link context range: string is_sub_guide: slot_uri: hc:isSubGuide description: Whether link is to sub-guide range: boolean # Featured card slots card_title: slot_uri: dcterms:title description: Card title range: string card_title_en: slot_uri: dcterms:title description: Card title in English range: string card_description: slot_uri: dcterms:description description: Card description range: string card_description_en: slot_uri: dcterms:description description: Card description in English range: string card_url: slot_uri: schema:url description: Card link URL range: uri card_image_url: slot_uri: schema:image description: Card image URL range: uri # Key periods slot (for TemporalCoverage) key_periods: slot_uri: dcterms:temporal description: Named historical periods range: TemporalPeriod multivalued: true # Key dates slot (for TemporalCoverage) key_dates: slot_uri: dcterms:date description: Significant historical dates range: KeyDate multivalued: true # Periods slot (for TemporalCoverage) - uses TemporalPeriod for consistency periods: slot_uri: dcterms:temporal description: Named periods within coverage (uses TemporalPeriod class) range: TemporalPeriod multivalued: true # ============================================================================ # TYPES # ============================================================================ types: TemporalValue: typeof: string uri: xsd:string description: | A temporal value that can be either a year (integer) or special value like "present". Examples: 1572, 1811, "present", "ongoing"