# ============================================================================= # GLAM-NER: TOPONYM HYPERNYM MODULE # ============================================================================= # Module: hypernyms/top.yaml # Parent: entity_annotation_rules_v1.7.0_unified.yaml # Purpose: TOPONYM entity type - named place references (nominal, not geometric) # ============================================================================= # BREAKING CHANGE v1.7.0: Renamed from PLACE (PLC) to TOPONYM (TOP) # Rationale: Distinguish between: # - TOPONYM: Nominal place names ("Amsterdam", "the Alps") - textual references # - GEOMETRY: Coordinates, polygons, spatial extents - see GEOMETRY hypernym # ============================================================================= id: https://w3id.org/glam/ner/hypernym/toponym name: glam-ner-toponym-hypernym TOPONYM: code: "TOP" definition: | Named references to places in text. Toponyms are NOMINAL - they are linguistic labels for places, not the places themselves. The same place may have multiple toponyms (historical names, variant spellings, exonyms). Key distinction: - TOPONYM: "Amsterdam", "Constantinople", "the Netherlands" (names in text) - GEOMETRY: "52.3676° N, 4.9041° E" (coordinates, see GEOMETRY hypernym) A toponym can reference: - A persistent place (conceptual entity with temporal extent) - A location (specific geometry at a point in time) - An uncertain or legendary place (may lack precise geometry) design_rationale: | The Pleiades gazetteer model distinguishes: - Place: A conceptual geographic entity with persistent identity - Name: A toponym (linguistic label) used for that place - Location: A geometry (coordinates) for that place at a specific time TEI P5 follows similar logic: - : Nominal reference to a place - : Geographic coordinates (separate element) This separation is essential for: - Historical places with unknown geometry ("Atlantis", "El Dorado") - Places with changing geometry (coastlines, borders) - Places with multiple names over time (Constantinople→Istanbul) - Linking textual mentions to gazetteers (GeoNames, Pleiades, Wikidata) # --------------------------------------------------------------------------- # ONTOLOGY MAPPINGS # --------------------------------------------------------------------------- ontology_mappings: primary_class: "crm:E53_Place" primary_class_definition: | CIDOC-CRM E53 Place: "This class comprises extents in space, in particular on the surface of the earth, in the pure sense of physics: independent from temporal phenomena and matter." alternative_classes: - "schema:Place" - "rico:Place" - "edm:Place" - "pleiades:Place" linkml_mapping: class_uri: "crm:E53_Place" exact_mappings: - "edm:Place" - "rico:Place" close_mappings: - "schema:Place" related_mappings: - "gn:Feature" # GeoNames nerd_class: "nerd:Location" nerd_deprecation_note: | DEPRECATED: NERD's Location class conflates toponyms (names) with geometry (coordinates). For Digital Humanities, use crm:E53_Place for conceptual places and geo:Geometry for spatial data. Retain NERD mapping ONLY for NLP pipeline interchange. tei_mapping: element: "placeName" attributes: ref: "URI reference to gazetteer entry" type: "settlement|region|country|address|building|natural" cert: "high|medium|low (certainty of identification)" note: | EDM Place is equivalent to CIDOC-CRM E53_Place and is used in Europeana cultural heritage contexts. Pleiades provides the Place/Name/Location model for historical geography. # --------------------------------------------------------------------------- # SUBCATEGORIES # --------------------------------------------------------------------------- subcategories: # ----- ADMINISTRATIVE PLACES ----- SETTLEMENT: code: "TOP.SET" definition: "Cities, towns, villages, and other populated places" examples: - "Amsterdam" - "New York City" - "the village of Giethoorn" - "古都京都 (Kyoto)" ontology_class: "schema:City" alternative_classes: - "gn:P.PPL" # GeoNames populated place linkml_mapping: class_uri: "crm:E53_Place" close_mappings: - "schema:City" - "gn:P.PPL" REGION: code: "TOP.REG" definition: "Provinces, states, counties, and administrative regions" examples: - "North Holland" - "Bavaria" - "California" - "Île-de-France" ontology_class: "schema:AdministrativeArea" alternative_classes: - "gn:A.ADM1" # GeoNames first-order admin division COUNTRY: code: "TOP.CTY" definition: "Nations and sovereign states (modern and historical)" examples: - "The Netherlands" - "France" - "Japan" - "the Dutch Republic (historical)" ontology_class: "schema:Country" note: | For historical polities (kingdoms, empires, republics), use crm:P2_has_type to indicate temporal status. Link to Wikidata for historical state succession chains. # ----- PHYSICAL STRUCTURE PLACES ----- ADDRESS: code: "TOP.ADR" definition: "Street addresses and postal locations" examples: - "Museumstraat 1, Amsterdam" - "1600 Pennsylvania Avenue" - "Postbus 74888, 1070 DN Amsterdam" ontology_class: "schema:PostalAddress" vcard_mapping: class: "vcard:Address" properties: street: "vcard:street-address" locality: "vcard:locality" region: "vcard:region" postal_code: "vcard:postal-code" country: "vcard:country-name" note: | Addresses are composite toponyms containing multiple components. Parse into structured vCard properties when possible. INSTADDR: code: "TOP.IAD" definition: "Full institutional addresses including building names" examples: - "Rijksmuseum, Museumstraat 1, 1071 XX Amsterdam" - "British Museum, Great Russell St, London WC1B 3DG" ontology_class: "schema:PostalAddress" org_mapping: "org:Site" note: | Links to GROUP hypernym via schema:address or org:hasSite. org:Site represents an office or premise at which the organization is located - use for physical institutional locations. BUILDING: code: "TOP.BLD" definition: "Named buildings, monuments, and architectural structures" examples: - "the Rijksmuseum building" - "Anne Frank House" - "Palace of Versailles" - "the Parthenon" ontology_class: "crm:E18_Physical_Thing" alternative_classes: - "edm:PhysicalThing" - "schema:LandmarksOrHistoricalBuildings" note: | Buildings are physical things (E18) that occupy places (E53). The building-as-place uses crm:P53_has_former_or_current_location. # ----- NATURAL FEATURE PLACES ----- NATURAL: code: "TOP.NAT" definition: "Natural geographic features: mountains, rivers, lakes, etc." examples: - "the Alps" - "Amazon River" - "Mount Fuji" - "Lake Baikal" - "the Sahara Desert" ontology_class: "crm:E53_Place" alternative_classes: - "gn:T" # GeoNames terrain features - "gn:H" # GeoNames hydrographic features # ----- TEMPORAL/UNCERTAIN PLACES ----- HISTORICAL: code: "TOP.HIS" definition: | Historical toponyms: places that no longer exist, have changed names, or have uncertain modern equivalents. examples: - "Constantinople (→ Istanbul)" - "Batavia (→ Jakarta)" - "New Amsterdam (→ New York)" - "Babylon" - "Tenochtitlan" ontology_class: "crm:E53_Place" pleiades_note: | Pleiades is the authoritative gazetteer for ancient world places. Use pleiades:Place identifiers for Greco-Roman and ancient Near Eastern toponyms. Link via @ref in TEI. note: | Use crm:P2_has_type to indicate historical status. Link historical toponyms to modern equivalents via owl:sameAs or skos:closeMatch when identity is certain. LEGENDARY: code: "TOP.LEG" definition: | Legendary, mythological, or fictional places. These may have cultural significance but lack verifiable geometry. examples: - "Atlantis" - "El Dorado" - "Avalon" - "Middle-earth" - "Narnia" ontology_class: "crm:E53_Place" tei_mapping: element: "placeName" attributes: type: "mythological" cert: "low" note: | Tag legendary places when they are subjects of scholarly analysis (archaeology, literary studies, art history). Use crm:P2_has_type to indicate legendary/fictional status. # --------------------------------------------------------------------------- # INCLUSION RULES # --------------------------------------------------------------------------- inclusion_rules: - id: "TOP_INC001" rule: "Tag place names even with directional or temporal modifiers" examples: - "northern France" - "East Berlin" - "southern Netherlands" - "medieval Paris" - id: "TOP_INC002" rule: "Tag complete addresses as single entities" examples: - "Prinsengracht 263, 1016 GV Amsterdam" - id: "TOP_INC003" rule: "Tag historical toponyms with their historical form" examples: - "Batavia (not Jakarta, unless both appear)" - "Constantinople (not Istanbul)" - id: "TOP_INC004" rule: "Tag exonyms (foreign names for places)" examples: - "The Hague (English exonym for Den Haag)" - "Florence (English for Firenze)" - "Cologne (English for Köln)" - id: "TOP_INC005" rule: "Tag legendary places when subjects of scholarly analysis" examples: - "the search for Atlantis" - "representations of Avalon in medieval art" # --------------------------------------------------------------------------- # EXCLUSION RULES # --------------------------------------------------------------------------- exclusion_rules: - id: "TOP_EXC001" rule: "Do NOT tag generic spatial references" examples: - "here, there, nearby" - "the city (without name)" - "the museum (use GROUP hypernym)" - id: "TOP_EXC002" rule: "Do NOT tag directional words alone" examples: - "north" - "south" - "east" - "west" - id: "TOP_EXC003" rule: "Do NOT tag room names within buildings (unless historically significant)" examples: - "Room 5" - "the lobby" note: "Exception: 'the Sistine Chapel' (historically significant)" - id: "TOP_EXC004" rule: "Do NOT tag coordinates or geometries (use GEOMETRY hypernym)" examples: - "52.3676° N, 4.9041° E (use GEO.PNT)" - "bounding box: 52.0, 4.5, 53.0, 5.0 (use GEO.BOX)"