172 lines
6.6 KiB
YAML
172 lines
6.6 KiB
YAML
# =============================================================================
|
|
# GLAM-NER: GEOMETRY HYPERNYM MODULE
|
|
# =============================================================================
|
|
# Module: hypernyms/geo.yaml
|
|
# Parent: entity_annotation_rules_v1.7.0_unified.yaml
|
|
# Purpose: GEOMETRY entity type - spatial coordinates and geometric representations
|
|
# =============================================================================
|
|
# NEW in v1.7.0: Separated from PLACE to distinguish nominal (toponyms)
|
|
# from geometric (coordinates) representations of space.
|
|
# Authority: GeoSPARQL, OGC Simple Features, ISO 19107 (geographic info)
|
|
# =============================================================================
|
|
|
|
id: https://w3id.org/glam/ner/hypernym/geometry
|
|
name: glam-ner-geometry-hypernym
|
|
|
|
GEOMETRY:
|
|
code: "GEO"
|
|
definition: |
|
|
Geometric representations of spatial extent: coordinates, bounding boxes,
|
|
polygons, and other spatial primitives. Geometries are MATHEMATICAL
|
|
representations, distinct from nominal toponyms.
|
|
|
|
Key distinction:
|
|
- TOPONYM: "Amsterdam" (name in text)
|
|
- GEOMETRY: "52.3676° N, 4.9041° E" (coordinates)
|
|
|
|
Geometries may be:
|
|
- Extracted from text (coordinate mentions)
|
|
- Resolved from toponyms (geocoding)
|
|
- Imported from GIS data (shapefiles, GeoJSON)
|
|
|
|
design_rationale: |
|
|
GeoSPARQL (OGC standard) distinguishes:
|
|
- geo:Feature: An entity with spatial extent (≈ crm:E53_Place)
|
|
- geo:Geometry: A geometric representation of that extent
|
|
|
|
A single Feature may have multiple Geometries:
|
|
- Different precision levels
|
|
- Different time periods (historical boundaries)
|
|
- Different representations (point vs. polygon)
|
|
|
|
This separation enables:
|
|
- Linking toponyms to multiple coordinate systems
|
|
- Representing uncertainty in historical geography
|
|
- Integrating with GIS systems
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ONTOLOGY MAPPINGS
|
|
# ---------------------------------------------------------------------------
|
|
ontology_mappings:
|
|
primary_class: "geo:Geometry"
|
|
primary_class_definition: |
|
|
GeoSPARQL geo:Geometry: "A coherent set of direct positions in space.
|
|
The basic geometries are Point, Curve, Surface, Solid."
|
|
alternative_classes:
|
|
- "sf:Geometry" # OGC Simple Features
|
|
- "crm:E94_Space_Primitive" # CIDOC-CRM spatial extension
|
|
linkml_mapping:
|
|
class_uri: "geo:Geometry"
|
|
exact_mappings:
|
|
- "sf:Geometry"
|
|
related_mappings:
|
|
- "schema:GeoCoordinates"
|
|
- "schema:GeoShape"
|
|
tei_mapping:
|
|
element: "geo"
|
|
note: |
|
|
TEI P5 <geo> contains coordinates in decimal degrees (WGS84).
|
|
Format: latitude,longitude or latitude longitude (space-separated).
|
|
note: |
|
|
GeoSPARQL is the W3C/OGC standard for representing geographic
|
|
information in RDF. Use geo:hasGeometry to link Features to Geometries.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SUBCATEGORIES
|
|
# ---------------------------------------------------------------------------
|
|
subcategories:
|
|
POINT:
|
|
code: "GEO.PNT"
|
|
definition: "A single coordinate point (latitude/longitude)"
|
|
examples:
|
|
- "52.3676° N, 4.9041° E"
|
|
- "52.3676, 4.9041"
|
|
- "lat: 52.3676, lon: 4.9041"
|
|
- "N 52° 22' 3.36\", E 4° 54' 14.76\""
|
|
ontology_class: "sf:Point"
|
|
alternative_classes:
|
|
- "schema:GeoCoordinates"
|
|
geosparql_wkt: "POINT(4.9041 52.3676)"
|
|
note: |
|
|
Points may appear in various formats:
|
|
- Decimal degrees (DD): 52.3676, 4.9041
|
|
- Degrees minutes seconds (DMS): N 52° 22' 3.36"
|
|
- Signed decimal: 52.3676, 4.9041 (N/E positive)
|
|
|
|
Normalize to WGS84 decimal degrees for storage.
|
|
|
|
BOX:
|
|
code: "GEO.BOX"
|
|
definition: "A bounding box (minimum bounding rectangle)"
|
|
examples:
|
|
- "bounding box: 52.0, 4.5, 53.0, 5.0"
|
|
- "extent: SW 52.0, 4.5 to NE 53.0, 5.0"
|
|
ontology_class: "geo:Geometry"
|
|
alternative_classes:
|
|
- "schema:GeoShape"
|
|
geosparql_wkt: "ENVELOPE(4.5, 5.0, 53.0, 52.0)"
|
|
note: |
|
|
Bounding boxes define rectangular extents.
|
|
Format conventions vary; normalize to: minLat, minLon, maxLat, maxLon.
|
|
|
|
POLYGON:
|
|
code: "GEO.PLY"
|
|
definition: "A closed polygon (administrative boundary, parcel)"
|
|
examples:
|
|
- "POLYGON((4.8 52.3, 4.9 52.3, 4.9 52.4, 4.8 52.4, 4.8 52.3))"
|
|
- "GeoJSON polygon for Noord-Holland province"
|
|
ontology_class: "sf:Polygon"
|
|
geosparql_wkt: "POLYGON((...))"
|
|
note: |
|
|
Polygons are typically imported from GIS data, not extracted from
|
|
natural language text. May appear in technical documentation.
|
|
|
|
LINE:
|
|
code: "GEO.LIN"
|
|
definition: "A line or path (route, river course, boundary segment)"
|
|
examples:
|
|
- "the route from Amsterdam to Rotterdam"
|
|
- "LINESTRING(4.9 52.4, 4.5 51.9)"
|
|
ontology_class: "sf:LineString"
|
|
note: |
|
|
Lines appear in route descriptions or boundary definitions.
|
|
Less common in heritage text; more typical in geographic data.
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# INCLUSION RULES
|
|
# ---------------------------------------------------------------------------
|
|
inclusion_rules:
|
|
- id: "GEO_INC001"
|
|
rule: "Tag coordinate mentions in any standard format"
|
|
examples:
|
|
- "located at 52.3676° N, 4.9041° E"
|
|
- "coordinates: 52.3676, 4.9041"
|
|
- "GPS: N 52° 22' 3.36\", E 4° 54' 14.76\""
|
|
|
|
- id: "GEO_INC002"
|
|
rule: "Tag WKT (Well-Known Text) geometry literals"
|
|
examples:
|
|
- "POINT(4.9041 52.3676)"
|
|
- "POLYGON((...))"
|
|
|
|
- id: "GEO_INC003"
|
|
rule: "Tag GeoJSON references when geometry is inline"
|
|
examples:
|
|
- '{"type": "Point", "coordinates": [4.9041, 52.3676]}'
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# EXCLUSION RULES
|
|
# ---------------------------------------------------------------------------
|
|
exclusion_rules:
|
|
- id: "GEO_EXC001"
|
|
rule: "Do NOT tag place names (use TOPONYM hypernym)"
|
|
examples:
|
|
- "Amsterdam (use TOP.SET)"
|
|
- "the Alps (use TOP.NAT)"
|
|
|
|
- id: "GEO_EXC002"
|
|
rule: "Do NOT tag references to external GIS files"
|
|
examples:
|
|
- "see shapefile boundaries.shp (external reference)"
|
|
- "GeoJSON file: regions.geojson (external)"
|
|
note: "Only tag inline geometry, not file references"
|