312 lines
11 KiB
YAML
312 lines
11 KiB
YAML
# =============================================================================
|
|
# GLAM-NER: TOPONYM HYPERNYM MODULE
|
|
# =============================================================================
|
|
# Module: hypernyms/top.yaml
|
|
# Parent: entity_annotation_rules_v1.7.0_unified.yaml
|
|
# Purpose: TOPONYM entity type - named place references (nominal, not geometric)
|
|
# =============================================================================
|
|
# BREAKING CHANGE v1.7.0: Renamed from PLACE (PLC) to TOPONYM (TOP)
|
|
# Rationale: Distinguish between:
|
|
# - TOPONYM: Nominal place names ("Amsterdam", "the Alps") - textual references
|
|
# - GEOMETRY: Coordinates, polygons, spatial extents - see GEOMETRY hypernym
|
|
# =============================================================================
|
|
|
|
id: https://w3id.org/glam/ner/hypernym/toponym
|
|
name: glam-ner-toponym-hypernym
|
|
|
|
TOPONYM:
|
|
code: "TOP"
|
|
definition: |
|
|
Named references to places in text. Toponyms are NOMINAL - they are
|
|
linguistic labels for places, not the places themselves. The same place
|
|
may have multiple toponyms (historical names, variant spellings, exonyms).
|
|
|
|
Key distinction:
|
|
- TOPONYM: "Amsterdam", "Constantinople", "the Netherlands" (names in text)
|
|
- GEOMETRY: "52.3676° N, 4.9041° E" (coordinates, see GEOMETRY hypernym)
|
|
|
|
A toponym can reference:
|
|
- A persistent place (conceptual entity with temporal extent)
|
|
- A location (specific geometry at a point in time)
|
|
- An uncertain or legendary place (may lack precise geometry)
|
|
|
|
design_rationale: |
|
|
The Pleiades gazetteer model distinguishes:
|
|
- Place: A conceptual geographic entity with persistent identity
|
|
- Name: A toponym (linguistic label) used for that place
|
|
- Location: A geometry (coordinates) for that place at a specific time
|
|
|
|
TEI P5 follows similar logic:
|
|
- <placeName>: Nominal reference to a place
|
|
- <geo>: Geographic coordinates (separate element)
|
|
|
|
This separation is essential for:
|
|
- Historical places with unknown geometry ("Atlantis", "El Dorado")
|
|
- Places with changing geometry (coastlines, borders)
|
|
- Places with multiple names over time (Constantinople→Istanbul)
|
|
- Linking textual mentions to gazetteers (GeoNames, Pleiades, Wikidata)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ONTOLOGY MAPPINGS
|
|
# ---------------------------------------------------------------------------
|
|
ontology_mappings:
|
|
primary_class: "crm:E53_Place"
|
|
primary_class_definition: |
|
|
CIDOC-CRM E53 Place: "This class comprises extents in space, in
|
|
particular on the surface of the earth, in the pure sense of physics:
|
|
independent from temporal phenomena and matter."
|
|
alternative_classes:
|
|
- "schema:Place"
|
|
- "rico:Place"
|
|
- "edm:Place"
|
|
- "pleiades:Place"
|
|
linkml_mapping:
|
|
class_uri: "crm:E53_Place"
|
|
exact_mappings:
|
|
- "edm:Place"
|
|
- "rico:Place"
|
|
close_mappings:
|
|
- "schema:Place"
|
|
related_mappings:
|
|
- "gn:Feature" # GeoNames
|
|
nerd_class: "nerd:Location"
|
|
nerd_deprecation_note: |
|
|
DEPRECATED: NERD's Location class conflates toponyms (names) with
|
|
geometry (coordinates). For Digital Humanities, use crm:E53_Place for
|
|
conceptual places and geo:Geometry for spatial data.
|
|
Retain NERD mapping ONLY for NLP pipeline interchange.
|
|
tei_mapping:
|
|
element: "placeName"
|
|
attributes:
|
|
ref: "URI reference to gazetteer entry"
|
|
type: "settlement|region|country|address|building|natural"
|
|
cert: "high|medium|low (certainty of identification)"
|
|
note: |
|
|
EDM Place is equivalent to CIDOC-CRM E53_Place and is used in
|
|
Europeana cultural heritage contexts. Pleiades provides the
|
|
Place/Name/Location model for historical geography.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SUBCATEGORIES
|
|
# ---------------------------------------------------------------------------
|
|
subcategories:
|
|
# ----- ADMINISTRATIVE PLACES -----
|
|
SETTLEMENT:
|
|
code: "TOP.SET"
|
|
definition: "Cities, towns, villages, and other populated places"
|
|
examples:
|
|
- "Amsterdam"
|
|
- "New York City"
|
|
- "the village of Giethoorn"
|
|
- "古都京都 (Kyoto)"
|
|
ontology_class: "schema:City"
|
|
alternative_classes:
|
|
- "gn:P.PPL" # GeoNames populated place
|
|
linkml_mapping:
|
|
class_uri: "crm:E53_Place"
|
|
close_mappings:
|
|
- "schema:City"
|
|
- "gn:P.PPL"
|
|
|
|
REGION:
|
|
code: "TOP.REG"
|
|
definition: "Provinces, states, counties, and administrative regions"
|
|
examples:
|
|
- "North Holland"
|
|
- "Bavaria"
|
|
- "California"
|
|
- "Île-de-France"
|
|
ontology_class: "schema:AdministrativeArea"
|
|
alternative_classes:
|
|
- "gn:A.ADM1" # GeoNames first-order admin division
|
|
|
|
COUNTRY:
|
|
code: "TOP.CTY"
|
|
definition: "Nations and sovereign states (modern and historical)"
|
|
examples:
|
|
- "The Netherlands"
|
|
- "France"
|
|
- "Japan"
|
|
- "the Dutch Republic (historical)"
|
|
ontology_class: "schema:Country"
|
|
note: |
|
|
For historical polities (kingdoms, empires, republics), use
|
|
crm:P2_has_type to indicate temporal status. Link to Wikidata
|
|
for historical state succession chains.
|
|
|
|
# ----- PHYSICAL STRUCTURE PLACES -----
|
|
ADDRESS:
|
|
code: "TOP.ADR"
|
|
definition: "Street addresses and postal locations"
|
|
examples:
|
|
- "Museumstraat 1, Amsterdam"
|
|
- "1600 Pennsylvania Avenue"
|
|
- "Postbus 74888, 1070 DN Amsterdam"
|
|
ontology_class: "schema:PostalAddress"
|
|
vcard_mapping:
|
|
class: "vcard:Address"
|
|
properties:
|
|
street: "vcard:street-address"
|
|
locality: "vcard:locality"
|
|
region: "vcard:region"
|
|
postal_code: "vcard:postal-code"
|
|
country: "vcard:country-name"
|
|
note: |
|
|
Addresses are composite toponyms containing multiple components.
|
|
Parse into structured vCard properties when possible.
|
|
|
|
INSTADDR:
|
|
code: "TOP.IAD"
|
|
definition: "Full institutional addresses including building names"
|
|
examples:
|
|
- "Rijksmuseum, Museumstraat 1, 1071 XX Amsterdam"
|
|
- "British Museum, Great Russell St, London WC1B 3DG"
|
|
ontology_class: "schema:PostalAddress"
|
|
org_mapping: "org:Site"
|
|
note: |
|
|
Links to GROUP hypernym via schema:address or org:hasSite.
|
|
org:Site represents an office or premise at which the organization
|
|
is located - use for physical institutional locations.
|
|
|
|
BUILDING:
|
|
code: "TOP.BLD"
|
|
definition: "Named buildings, monuments, and architectural structures"
|
|
examples:
|
|
- "the Rijksmuseum building"
|
|
- "Anne Frank House"
|
|
- "Palace of Versailles"
|
|
- "the Parthenon"
|
|
ontology_class: "crm:E18_Physical_Thing"
|
|
alternative_classes:
|
|
- "edm:PhysicalThing"
|
|
- "schema:LandmarksOrHistoricalBuildings"
|
|
note: |
|
|
Buildings are physical things (E18) that occupy places (E53).
|
|
The building-as-place uses crm:P53_has_former_or_current_location.
|
|
|
|
# ----- NATURAL FEATURE PLACES -----
|
|
NATURAL:
|
|
code: "TOP.NAT"
|
|
definition: "Natural geographic features: mountains, rivers, lakes, etc."
|
|
examples:
|
|
- "the Alps"
|
|
- "Amazon River"
|
|
- "Mount Fuji"
|
|
- "Lake Baikal"
|
|
- "the Sahara Desert"
|
|
ontology_class: "crm:E53_Place"
|
|
alternative_classes:
|
|
- "gn:T" # GeoNames terrain features
|
|
- "gn:H" # GeoNames hydrographic features
|
|
|
|
# ----- TEMPORAL/UNCERTAIN PLACES -----
|
|
HISTORICAL:
|
|
code: "TOP.HIS"
|
|
definition: |
|
|
Historical toponyms: places that no longer exist, have changed
|
|
names, or have uncertain modern equivalents.
|
|
examples:
|
|
- "Constantinople (→ Istanbul)"
|
|
- "Batavia (→ Jakarta)"
|
|
- "New Amsterdam (→ New York)"
|
|
- "Babylon"
|
|
- "Tenochtitlan"
|
|
ontology_class: "crm:E53_Place"
|
|
pleiades_note: |
|
|
Pleiades is the authoritative gazetteer for ancient world places.
|
|
Use pleiades:Place identifiers for Greco-Roman and ancient Near
|
|
Eastern toponyms. Link via @ref in TEI.
|
|
note: |
|
|
Use crm:P2_has_type to indicate historical status.
|
|
Link historical toponyms to modern equivalents via owl:sameAs
|
|
or skos:closeMatch when identity is certain.
|
|
|
|
LEGENDARY:
|
|
code: "TOP.LEG"
|
|
definition: |
|
|
Legendary, mythological, or fictional places. These may have
|
|
cultural significance but lack verifiable geometry.
|
|
examples:
|
|
- "Atlantis"
|
|
- "El Dorado"
|
|
- "Avalon"
|
|
- "Middle-earth"
|
|
- "Narnia"
|
|
ontology_class: "crm:E53_Place"
|
|
tei_mapping:
|
|
element: "placeName"
|
|
attributes:
|
|
type: "mythological"
|
|
cert: "low"
|
|
note: |
|
|
Tag legendary places when they are subjects of scholarly analysis
|
|
(archaeology, literary studies, art history). Use crm:P2_has_type
|
|
to indicate legendary/fictional status.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# INCLUSION RULES
|
|
# ---------------------------------------------------------------------------
|
|
inclusion_rules:
|
|
- id: "TOP_INC001"
|
|
rule: "Tag place names even with directional or temporal modifiers"
|
|
examples:
|
|
- "northern France"
|
|
- "East Berlin"
|
|
- "southern Netherlands"
|
|
- "medieval Paris"
|
|
|
|
- id: "TOP_INC002"
|
|
rule: "Tag complete addresses as single entities"
|
|
examples:
|
|
- "Prinsengracht 263, 1016 GV Amsterdam"
|
|
|
|
- id: "TOP_INC003"
|
|
rule: "Tag historical toponyms with their historical form"
|
|
examples:
|
|
- "Batavia (not Jakarta, unless both appear)"
|
|
- "Constantinople (not Istanbul)"
|
|
|
|
- id: "TOP_INC004"
|
|
rule: "Tag exonyms (foreign names for places)"
|
|
examples:
|
|
- "The Hague (English exonym for Den Haag)"
|
|
- "Florence (English for Firenze)"
|
|
- "Cologne (English for Köln)"
|
|
|
|
- id: "TOP_INC005"
|
|
rule: "Tag legendary places when subjects of scholarly analysis"
|
|
examples:
|
|
- "the search for Atlantis"
|
|
- "representations of Avalon in medieval art"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# EXCLUSION RULES
|
|
# ---------------------------------------------------------------------------
|
|
exclusion_rules:
|
|
- id: "TOP_EXC001"
|
|
rule: "Do NOT tag generic spatial references"
|
|
examples:
|
|
- "here, there, nearby"
|
|
- "the city (without name)"
|
|
- "the museum (use GROUP hypernym)"
|
|
|
|
- id: "TOP_EXC002"
|
|
rule: "Do NOT tag directional words alone"
|
|
examples:
|
|
- "north"
|
|
- "south"
|
|
- "east"
|
|
- "west"
|
|
|
|
- id: "TOP_EXC003"
|
|
rule: "Do NOT tag room names within buildings (unless historically significant)"
|
|
examples:
|
|
- "Room 5"
|
|
- "the lobby"
|
|
note: "Exception: 'the Sistine Chapel' (historically significant)"
|
|
|
|
- id: "TOP_EXC004"
|
|
rule: "Do NOT tag coordinates or geometries (use GEOMETRY hypernym)"
|
|
examples:
|
|
- "52.3676° N, 4.9041° E (use GEO.PNT)"
|
|
- "bounding box: 52.0, 4.5, 53.0, 5.0 (use GEO.BOX)"
|