1629 lines
55 KiB
YAML
1629 lines
55 KiB
YAML
# Entity Annotation Rules Instance - Section 3 of Convention v1.5.0
|
|
# Captures all entity annotation rules for the Gado2 annotation scheme
|
|
# Version 1.5.0 - Introduced double tagging support and THING entity type
|
|
|
|
annotation_scheme: "Gado2"
|
|
no_double_tagging: false # CHANGED: Now allows overlapping annotations
|
|
context_sensitive: true
|
|
version: "1.5.0"
|
|
date_modified: "2025-11-12"
|
|
|
|
# General annotation policies
|
|
general_rules:
|
|
- Double tagging is now ALLOWED and ENCOURAGED for semantic richness
|
|
- The same span of text can receive multiple entity tags when it represents
|
|
multiple semantic concepts (e.g., 'Kapitein Jonker' = BEING + DENOMINATION + DENOMINATION)
|
|
- Double tagging applies to:
|
|
* Titles/denominations within personal names
|
|
* Places within personal descriptions
|
|
* Objects within quantity expressions
|
|
* Any semantically meaningful overlapping entities
|
|
- The exact same word can be tagged in different ways depending on its context
|
|
- When in doubt, check how words are tagged in previous ground truths
|
|
- This convention distinguishes titles from general designations:
|
|
* Titles are used/accepted by subjects (emic description)
|
|
* General designations are imposed upon subjects (ethnic description)
|
|
- Transcription status: Done → Final → Ground Truth (GT)
|
|
- Nested and overlapping annotations should capture all semantic layers
|
|
|
|
double_tagging_patterns:
|
|
- pattern: "Title + Personal Name"
|
|
description: "Tag the full person name, plus denomination for title, plus surname separately if historically significant"
|
|
examples:
|
|
- text: "Kapitein Jonker"
|
|
tags:
|
|
- entity: "Kapitein Jonker"
|
|
type: BEING
|
|
span: [0, 15]
|
|
- entity: "Kapitein"
|
|
type: DENOMINATION
|
|
span: [0, 8]
|
|
- entity: "Jonker"
|
|
type: DENOMINATION
|
|
span: [9, 15]
|
|
note: "Old spelling for Jonkheer (noble title/name)"
|
|
|
|
- pattern: "Being + Place"
|
|
description: "Tag the full person description plus embedded place reference"
|
|
examples:
|
|
- text: "Maria uit Bengaal"
|
|
tags:
|
|
- entity: "Maria uit Bengaal"
|
|
type: BEING
|
|
span: [0, 17]
|
|
- entity: "Bengaal"
|
|
type: PLACE
|
|
span: [10, 17]
|
|
|
|
- pattern: "Quantity + Thing"
|
|
description: "Tag the full quantity expression plus the object being counted"
|
|
examples:
|
|
- text: "een lepel"
|
|
tags:
|
|
- entity: "een lepel"
|
|
type: QUANTITY
|
|
span: [0, 9]
|
|
- entity: "lepel"
|
|
type: THING
|
|
span: [4, 9]
|
|
- text: "twee kandelaars"
|
|
tags:
|
|
- entity: "twee kandelaars"
|
|
type: QUANTITY
|
|
span: [0, 15]
|
|
- entity: "kandelaars"
|
|
type: THING
|
|
span: [5, 15]
|
|
|
|
entity_types:
|
|
# 3.1 BEING
|
|
- entity_type: BEING
|
|
key: BEI
|
|
label: BEING
|
|
description: >-
|
|
Personal names and specific references to persons (named or unnamed) or beings.
|
|
Includes persons, animals with proper names, fictional characters,
|
|
gods, spirits, saints and prophets. With double tagging enabled,
|
|
titles and denominations within person names are also tagged separately.
|
|
|
|
subcategories:
|
|
- name: personal_names
|
|
key: PNAME
|
|
label: Personal Names
|
|
description: Given name and surname, patronyms
|
|
examples:
|
|
- "Gouverneur-Generaal van Starkenborgh"
|
|
- "President Soekarno"
|
|
- "Sultan Muzaffar"
|
|
- "Dr. Raden Soetomo"
|
|
- "De Heer Balkenende"
|
|
- "mr. dr. Beel"
|
|
- "Tumenggung Sukapura"
|
|
- "Officier Suksapura"
|
|
- "Officier Tumenggung Sukapura"
|
|
- "Lieutenant Winkelaar"
|
|
- "Lieutenant Adolph Winkelaar"
|
|
- "Mr. van Agt"
|
|
- "Mej. den Uyl"
|
|
- "Marquis De La Chetardie"
|
|
- "Meneer Drees"
|
|
- "gesaghebber Van Arrewijne"
|
|
- "gewesen Sicacalsen Serlasker Abdul Nabij"
|
|
- "princesse Gamoelamoe (vertaling van Ratu Gamulamu)"
|
|
- "Engelschen vice admiraal Cornisch"
|
|
- "Japara 's resident Lepeltak"
|
|
- "Japaras resident Falck"
|
|
- "resident Falck"
|
|
- "vice admiraal Cornisch"
|
|
- "Kapitein Jonker" # ADDED: Also tag 'Kapitein' and 'Jonker' separately as DENOMINATION
|
|
- "Maria uit Bengaal" # ADDED: Also tag 'Bengaal' as PLACE
|
|
|
|
- name: animal_names
|
|
key: ANAME
|
|
label: Animal Names
|
|
description: Proper names of animals
|
|
examples:
|
|
- "Lassie"
|
|
- "Golden Retriever"
|
|
|
|
- name: fictional_characters
|
|
key: FICT
|
|
label: Fictional Characters
|
|
description: Proper names of fictional characters and artifical beings
|
|
examples:
|
|
- "Kuifje"
|
|
- "Sherlock Holmes"
|
|
- "Hercule Poirot"
|
|
- "{Suske} en {Wiske}"
|
|
|
|
- name: religious_figures
|
|
key: RELBEI
|
|
label: Religious Figures
|
|
description: Gods, spirits, saints and prophets
|
|
examples:
|
|
- "God"
|
|
- "Jesus"
|
|
- "Mohammed"
|
|
- "Heilige Geest"
|
|
- "Rama"
|
|
|
|
- name: specific_references
|
|
key: SPECBEI
|
|
label: Specific References
|
|
description: Specific references to a person without their name being mentioned. The denomination/title is also tagged separately with DENOMINATION
|
|
examples:
|
|
- "de Koning van Pruisen"
|
|
- "de Koningin van Nederland"
|
|
- "de Sultan van Ternate"
|
|
- "de Minister van Buitenlandse Zaken"
|
|
- "de Gouverneur-Generaal van Nederl.-Indië"
|
|
- "de Koning"
|
|
- "de Sultan"
|
|
- "de Directeur"
|
|
- "Tenno Heika"
|
|
- "zijn Vrouw"
|
|
- "haar slaaf"
|
|
- "de geseijde slavin"
|
|
- "de eerder genoemde leider"
|
|
- "den Capiteijn"
|
|
- "des Conincx"
|
|
- "dit Jongetie"
|
|
- "dat kind"
|
|
- "Zijne Majesteit"
|
|
- "haaren Coning"
|
|
- "dezen Prins"
|
|
- "dien Coning"
|
|
- "onsen Capiteijn"
|
|
- "Bapak itu" # Indonesian
|
|
- "gemelte gouvern:r"
|
|
- "De slaaf"
|
|
|
|
inclusion_rules:
|
|
- rule_id: BEI_INC001
|
|
description: Include titles with personal names - and tag titles separately as DENOMINATION
|
|
conditions:
|
|
- "Tag the full person name including titles"
|
|
- "ALSO tag titles/denominations separately as DENOMINATION (e.g., Jonker/Jonkheer)"
|
|
examples:
|
|
- "Gouverneur-Generaal van Starkenborgh → BEING (full), Gouverneur-Generaal → DENOMINATION"
|
|
- "President Soekarno → BEING (full), President → DENOMINATION"
|
|
- "Sultan Muzaffar → BEING (full), Sultan → DENOMINATION"
|
|
- "Kapitein Jonker → BEING (full), Kapitein → DENOMINATION, Jonker → DENOMINATION (noble name)"
|
|
|
|
- rule_id: BER_INC002
|
|
description: Include words between title and personal name
|
|
conditions:
|
|
- "The distance between a title and personal name can vary within a single phrase"
|
|
- "If another word stands between the title and the personal name, it will be tagged along"
|
|
- "Tag intermediate words separately if they have semantic meaning (e.g., place/denomination)"
|
|
examples:
|
|
- "Engelschen vice admiraal Cornisch → BEING (full), Engelschen vice admiraal → DENOMINATION"
|
|
|
|
- rule_id: BEI_INC003
|
|
description: Include articles and demonstratives with specific references
|
|
conditions:
|
|
- "The article/demonstrative/genitive case/possessive pronouns are tagged along"
|
|
examples:
|
|
- "de Koning van Pruisen → BEING, Pruisen → PLACE (separate)"
|
|
- "zijn Vrouw"
|
|
- "des Conincx"
|
|
- "dit Jongetie"
|
|
|
|
- rule_id: BEI_INC004
|
|
description: Include backreferences
|
|
conditions:
|
|
- "References that point back to previously mentioned BEINGs"
|
|
examples:
|
|
- "gemelte gouvern:r"
|
|
|
|
- rule_id: BEI_INC005
|
|
description: Include embedded place references but also tag the places separately as PLACE
|
|
conditions:
|
|
- "When a BEING description includes a place reference (e.g., 'uit Bengaal'), tag the full BEING AND the place separately"
|
|
examples:
|
|
- "Maria uit Bengaal → BEING (full), Bengaal → PLACE"
|
|
- "Jan van Amsterdam → BEING (full), Amsterdam → PLACE"
|
|
|
|
- rule_id: BEI_INC006
|
|
description: Include embedded associated organisationsbut also tag the organisations separately as ORGANISATION
|
|
conditions:
|
|
- "When a specific reference to a BEING includes an associated organisation or place (e.g., 'de Minister van Buitenlandse Zaken'), tag the full BEING AND the organisation/place separately"
|
|
examples:
|
|
- "De Minister van buitenlandse zaken → BEING (De Minister van buitenlandse zaken), ORGANISATION (buitenlandse zaken)"
|
|
exclusion_rules:
|
|
- rule_id: BEI_EXC001
|
|
description: Do not tag abstract references
|
|
rationale: >-
|
|
Abstract references (often indicated by plural forms or indefinite
|
|
articles) are not tagged, these often need to be tagged as 'denomination'
|
|
examples:
|
|
- "Een President"
|
|
- "Een Premier"
|
|
- "Een resident"
|
|
- "Een generaal-majoor"
|
|
- "Een Voorzitter"
|
|
- "hij ontving de titel dr."
|
|
- "ambassadeurs"
|
|
- "Koninginnen"
|
|
- "Koningen"
|
|
- "draagt de titel Graaf"
|
|
- "die persoon"
|
|
- "de mannen"
|
|
- "de vrouwen"
|
|
- "die lieden"
|
|
- "de kindere"
|
|
- "Bapak" # Indonesian, without 'itu' (that) or other demonstrative
|
|
|
|
|
|
- rule_id: BEI_EXC002
|
|
description: Do not tag pronouns
|
|
rationale: >-
|
|
Pronouns are not tagged. They are consistent enough to be traced
|
|
through regular expressions and do not require entity processing.
|
|
examples:
|
|
- "hij"
|
|
- rule_id: BEI_EXC003
|
|
description: Tag general designations separately when they occur with complete personal names
|
|
rationale:
|
|
- We tag both the full BEING AND the designation separately
|
|
examples:
|
|
- "geseijde slavin tanjong → BEING (tanjong), DENOMINATION (geseijde slavin)"
|
|
- "de slaaf Anthonij → BEING (Anthonij or full), DENOMINATION (slaaf)"
|
|
- "de Javaanschen Cap:n Soeta Wangsa → BEING (full), DENOMINATION (Javaanschen Cap:n)"
|
|
|
|
# 3.2 PLACE
|
|
- entity_type: PLACE
|
|
key: PLC
|
|
label: Place
|
|
description: >-
|
|
Geographic locations including streets, cities, provinces, countries,
|
|
continents, infrastructure, landforms, public spaces, buildings, and
|
|
astronomical objects. With double tagging enabled, places embedded in
|
|
BEING descriptions, denominations, or other entities are also tagged.
|
|
|
|
subcategories:
|
|
- name: street_names_addresses
|
|
key: SNAME
|
|
label: Street Names and Addresses
|
|
description: Street names and addresses
|
|
examples:
|
|
- "Laan van Meerdervoort"
|
|
|
|
- name: trajectories_directions
|
|
key: TDIR
|
|
label: Trajectories and Directions
|
|
description: Trajectories and wind directions
|
|
examples:
|
|
- "schoone wegh met seer schoone water plaatsen versien, streckende meest O:Z:O. en W:N:W. aen de noordt zijde"
|
|
- "Oostzijde van Molenvliet"
|
|
|
|
- name: locations
|
|
key: LOC
|
|
label: Locations
|
|
description: Dorp, Stad, Provincie, Land, Continent, Bisdom, Zones
|
|
examples:
|
|
- "Sectie [0-9, I-V]"
|
|
- "Bengaal" # ADDED: Can appear within BEING descriptions
|
|
- "Indonesia" # ADDED: Can appear as adjective base in denominations
|
|
|
|
- name: infrastructure
|
|
key: INFRA
|
|
label: Infrastructure
|
|
description: Brug, Haven, Dam
|
|
examples:
|
|
- "de haven van Rotterdam"
|
|
- "de brug over de Amstel"
|
|
|
|
- name: landform
|
|
key: LANDF
|
|
label: Landform
|
|
description: Berg, Gebergte, Bos, Rivier, Bron, Rijstvelden, Vallei, Tuin, Natuurreservaat, Nationaal park, Plantage, Strand
|
|
examples:
|
|
- "de rivier de Maas"
|
|
- "het bos bij Arnhem"
|
|
- "de berg Merapi"
|
|
- "de plantage Deli"
|
|
|
|
- name: public_space
|
|
key: PUBSP
|
|
label: Public Space
|
|
description: >-
|
|
Plein, Veld, Theater, Museum, School, Markt, Vliegtuig, Station,
|
|
Zwembad, Ziekenhuis, Sportveld, Bioscoop, Tentoonstelling, Campus,
|
|
Lanceerplatform, Club, Huis, Universiteit, Bibliotheek, Bidruimte,
|
|
Medisch centrum, Parkeergarage, Speeltuin, Grafplaats, Bedrijvenpark
|
|
examples:
|
|
- "het museum van Rotterdam"
|
|
- "de school aan de Laan van Meerdervoort"
|
|
- "het ziekenhuis St. Antonius"
|
|
|
|
- name: companies_as_places
|
|
key: COMPPLC
|
|
label: Companies as Places
|
|
description: >-
|
|
Companies only if contextualised as places (otherwise organisation):
|
|
Apotheek, Bar, Restaurant, Eettent, Depot, Hotel, Hostel, Fabriek,
|
|
Nachtclub, Muziekpodium
|
|
examples:
|
|
- "het restaurant De Kromme Watergang"
|
|
- "het hotel Grand Hotel Krasnapolsky"
|
|
|
|
- name: buildings
|
|
key: BLDG
|
|
label: Buildings
|
|
description: >-
|
|
Huis, Wooncomplex, Klooster, Kleuterzaal, Flat, Kazerne, Fort,
|
|
Verzorgingshuis, Winkelcentrum, Paleis
|
|
examples:
|
|
- "het paleis op de Dam"
|
|
- "het fort aan de kust"
|
|
|
|
- name: astronomical_objects
|
|
key: ASTR
|
|
label: Astronomical Objects
|
|
description: Zon, Aarde, Maan, Planeten, Comets (often named after persons/dates)
|
|
examples:
|
|
- "de maan"
|
|
- "de aarde"
|
|
- "Mars"
|
|
- "Halley"
|
|
|
|
- name: coordinates
|
|
key: COORD
|
|
label: Coordinates
|
|
description: Geographic coordinates
|
|
examples:
|
|
- "Z. breete van 35 — 31 ten langte 5 — 15"
|
|
- "parabel van 36 — 50"
|
|
- "3½ graed of 42 mijlen bewesten"
|
|
- "10 graed noorder breedte"
|
|
- "de evenaar"
|
|
|
|
inclusion_rules:
|
|
- rule_id: PLC_INC001
|
|
description: Include relevant adjectives
|
|
conditions:
|
|
- "Directional and descriptive adjectives are tagged with place names"
|
|
examples:
|
|
- "Noord/Oost/Zuid/West(-kust)"
|
|
- "Aziatische-Afrikaanse landen"
|
|
- "Amerikaanse defensie-gebied"
|
|
- "Asia Raya"
|
|
- "Muang Thai"
|
|
- "Indonesische eilandenrijk"
|
|
- "Indonesische republiek"
|
|
- "Delische plantagegebied"
|
|
- "West-Javaanse Bandoeng"
|
|
- "Joodse gedeelte"
|
|
- "de Europese landen"
|
|
|
|
- rule_id: PLC_INC002
|
|
description: Include metonymy references
|
|
conditions:
|
|
- "Metonymy does not disqualify a place name. Metonyms can be double tagged as PLACE as well as ORGANISATION and/or BEING depending on context"
|
|
examples:
|
|
- "Moskou weigert"
|
|
- "Den Haag bepaalt"
|
|
- "Nederland scoort"
|
|
|
|
- rule_id: PLC_INC003
|
|
description: Include articles for generic place references
|
|
conditions:
|
|
- "Articles are included when the place reference is generic"
|
|
examples:
|
|
- "{De Laan van Meerdervoort}"
|
|
- "in {het comptoir}"
|
|
|
|
- rule_id: PLC_INC004
|
|
description: Tag places embedded in other entities
|
|
conditions:
|
|
- "When a place appears within a BEING description, denomination, or other entity, tag it separately"
|
|
examples:
|
|
- "Maria uit Bengaal → tag 'Bengaal' as PLACE"
|
|
- "Jan van Amsterdam → tag 'Amsterdam' as PLACE"
|
|
- "de Koning van Pruisen → tag 'Pruisen' as PLACE"
|
|
|
|
exclusion_rules:
|
|
- rule_id: PLC_EXC001
|
|
description: Do not include articles for specific place names
|
|
rationale: Articles are not part of proper place names and often indicate that the place refer to an OBJECT, ORGANISATION or other entity
|
|
examples:
|
|
- "Amsterdam (not 'de Amsterdam')"
|
|
|
|
- rule_id: PLC_EXC002
|
|
description: Do not tag representatives of places as places
|
|
rationale: These are being or organization references
|
|
examples:
|
|
- "De ambassadeur van Nederland (De ambassadeur = BEING, Nederland = PLACE separately)"
|
|
|
|
# 3.3 ORGANISATION
|
|
- entity_type: ORGANISATION
|
|
key: ORG
|
|
label: Organisation
|
|
description: >-
|
|
Organizations including companies, institutions, governments, branches,
|
|
associations, legislative bodies, political parties, military forces,
|
|
sports teams, meetings, bands, religious orders, and ships.
|
|
|
|
subcategories:
|
|
- name: companies
|
|
key: COMP
|
|
label: Companies
|
|
description: Studio, Bank, etc.
|
|
examples:
|
|
- "Philips"
|
|
- "ING"
|
|
- "Shell"
|
|
|
|
- name: branches
|
|
key: BRANCH
|
|
label: Branches
|
|
description: Departments and organizational branches
|
|
examples:
|
|
- "ING Rotterdam"
|
|
- "Rekenkamer Gemeente Rotterdam"
|
|
- "Afdelingsbestuur NVM afdeling Arnhem"
|
|
- "NVM Arnhem"
|
|
|
|
- name: associations
|
|
key: ASSOC
|
|
label: Associations
|
|
description: Coöperatie, Markt (if not place)
|
|
examples:
|
|
- "NVM"
|
|
- "de vakbond"
|
|
|
|
- name: public_facilities
|
|
key: PUBFAC
|
|
label: Public Facilities
|
|
description: School, Universiteit (if not already tagged as place)
|
|
examples:
|
|
- "Middelbare school"
|
|
- "Technische Universiteit Delft"
|
|
|
|
- name: legislative_body
|
|
key: LEGI
|
|
label: Legislative Bodies
|
|
description: Parliamentary bodies, legislatures, councils, chambers, etc.
|
|
examples:
|
|
- "Tweede Kamer"
|
|
- "Staten-Generaal"
|
|
|
|
- name: grand_residence
|
|
key: GRANDRES
|
|
label: Grand Residences
|
|
description: Paleis (when referring to institution)
|
|
examples:
|
|
- "Paleis op de Dam" # ADDED: when referring to institution, otherwise tagged as PLACE
|
|
|
|
- name: printer
|
|
key: PRINTER
|
|
label: Printers
|
|
description: Printing houses
|
|
examples:
|
|
- "Drukkerij De Standaard"
|
|
- "Uitgeverij Van Dale"
|
|
|
|
- name: news_agency
|
|
key: NEWSAG
|
|
label: News Agencies
|
|
description: News agencies and media organizations when referred to as an entity that takes actions or is a place of employment rather then one being consumed (the latter is tagged as TEXTUAL_REFERENCE)
|
|
examples:
|
|
- "De NRC besloot"
|
|
- "medewerkers bij De Expres"
|
|
- "(cor. Volkskrant)"
|
|
- "hoofdredacteur van het Bataviaasch Nieuwsblad"
|
|
|
|
- name: media_campaign
|
|
key: MEDIACAMP
|
|
label: Media Campaigns
|
|
description: PR campaigns
|
|
examples:
|
|
- "Hij organiseert {de PR campagne}"
|
|
|
|
- name: factory
|
|
key: FACTORY
|
|
label: Factories
|
|
description: Manufacturing facilities as organizations
|
|
examples:
|
|
- "de fabriek van Philips"
|
|
- "de Shell raffinaderij"
|
|
|
|
- name: political_party
|
|
key: POLPART
|
|
label: Political Parties
|
|
description: Political parties
|
|
examples:
|
|
- "CDA"
|
|
- "VVD"
|
|
- "PvdA"
|
|
- "Partij Nasional Indonesia"
|
|
|
|
- name: international_organisation
|
|
key: INTORG
|
|
label: International Organizations
|
|
description: International organizations
|
|
examples:
|
|
- "Verenigde Naties"
|
|
- "Europese Unie"
|
|
|
|
- name: resistance_movement
|
|
key: RESIST
|
|
label: Resistance Movements
|
|
description: Resistance movements
|
|
examples:
|
|
- "Het verzet"
|
|
- "Kelompok Gerilya"
|
|
|
|
- name: authorities
|
|
key: AUTH
|
|
label: Authorities
|
|
description: >-
|
|
Government, Ministries, Councils, Courts
|
|
examples:
|
|
- "Ministerie van Financiën"
|
|
- "Overzeese Rijksdelen"
|
|
- "Buitenlandse Zaken"
|
|
- "Raad voor Aangelegenheden van Indonesië (RAVI)"
|
|
|
|
- name: dynasties
|
|
key: DYN
|
|
label: Dynasties
|
|
description: Royal and ruling dynasties
|
|
examples:
|
|
- "Omajjaden dynastie"
|
|
- "Nasriden dynastie"
|
|
- "Hashemieten dynastie"
|
|
- "Oranje-Nassau"
|
|
|
|
- name: military_forces
|
|
key: MILFOR
|
|
label: Military Forces
|
|
description: Army, Army units
|
|
examples:
|
|
- "Indische leger"
|
|
- "KNIL"
|
|
- "Mariniers"
|
|
- "Luchtmacht"
|
|
- "Marine"
|
|
- "Infanterie"
|
|
- "Korps Speciale Troepen"
|
|
- "Tijdelijke Wet {Koninklijke Landmacht}" # note: the entire string is also tagged as TEXTUAL_REFERENCE
|
|
|
|
- name: sports_team
|
|
key: SPORTTEAM
|
|
label: Sports Teams
|
|
description: Sports teams
|
|
examples:
|
|
- "Ajax"
|
|
- "Feyenoord"
|
|
- "PSV"
|
|
|
|
- name: sport_tournament
|
|
key: SPORTTOUR
|
|
label: Sports Tournaments
|
|
description: Championship, Match
|
|
examples:
|
|
- "EK 2020"
|
|
- "WK 2018"
|
|
- "Olympische Spelen 2024"
|
|
|
|
- name: meeting
|
|
key: MEET
|
|
label: Meetings
|
|
description: Conference
|
|
examples:
|
|
- "VN Klimaatconferentie"
|
|
- "G20 Top"
|
|
- "de OM vergadering"
|
|
|
|
- name: band_orchestra
|
|
key: BANDORCH
|
|
label: Bands and Orchestras
|
|
description: Musical groups
|
|
examples:
|
|
- "Dé Carels"
|
|
- "Het Metropole Orkest"
|
|
|
|
- name: theatre_group
|
|
key: THEATREG
|
|
label: Theatre Groups
|
|
description: Theatre groups
|
|
examples:
|
|
- "Toneelgroep Amsterdam"
|
|
- "Het Nationale Toneel"
|
|
|
|
- name: religious_order
|
|
key: RELIGORD
|
|
label: Religious Orders
|
|
description: Religious orders
|
|
examples:
|
|
- "Jezuïeten"
|
|
- "Franciscanen"
|
|
- "Dominicanen"
|
|
- "Shattariyah"
|
|
- "Sufi orde"
|
|
- "Katholieke Kerk"
|
|
|
|
- name: ship
|
|
key: SHIP
|
|
label: Ships
|
|
description: Uniquely named ships or vehicles (not model names! those are THING)
|
|
examples:
|
|
- "Stoomschip Sumatra"
|
|
- "Hr.Ms. De Ruyter"
|
|
|
|
|
|
inclusion_rules:
|
|
- rule_id: ORG_INC001
|
|
description: Tag branches with placenames without prepositions
|
|
conditions:
|
|
- "Placenames indicating branches are only tagged if there is no preposition between the organization and the placename"
|
|
- "also tag the place separately"
|
|
examples:
|
|
- "ING Rotterdam → ORGANISATION (full), Rotterdam → PLACE"
|
|
- "Rekenkamer Gemeente Rotterdam → ORGANISATION (full), Rotterdam → PLACE"
|
|
- "NVM Arnhem → ORGANISATION (full), Arnhem → PLACE"
|
|
|
|
- rule_id: ORG_INC002
|
|
description: Tag frequently repeated references to organisational groups
|
|
conditions:
|
|
- "Frequently repeated references to denominations which refer to organisations are tagged"
|
|
examples:
|
|
- "aen generael en raden (raden = council itself)"
|
|
|
|
- rule_id: ORG_INC003
|
|
description: tag the place as part of the organisation when the organisation is defined by its location. The place is also tagged separately WITH double tagging
|
|
examples:
|
|
- "Rechtbank te Carcassonne → ORGANISATION (Rechtbank te Carcassonne), PLACE (Carcassonne)"
|
|
- "Regering van Oostenrijk → ORGANISATION (Regering van Oostenrijk), PLACE (Oostenrijk)"
|
|
|
|
exclusion_rules:
|
|
- rule_id: ORG_EXC001
|
|
description: Do not include articles
|
|
rationale: Articles are not part of organization names
|
|
examples:
|
|
- "Tweede Kamer (not 'de Tweede Kamer' in annotation)"
|
|
|
|
- rule_id: ORG_EXC002
|
|
description: Do not tag abbreviations separately
|
|
rationale: Abbreviations are tagged with full name or separately as needed
|
|
examples:
|
|
- "{Nederlandse Vereniging van Makelaars (NVM)}" # NVM is not tagged separately from Nederlandse Vereniging van Makelaars"
|
|
- "{Koninklijk Nederlands Indisch Leger (KNIL)}" # KNIL is not tagged separately from Koninklijk Nederlands Indisch Leger
|
|
|
|
- rule_id: ORG_EXC003
|
|
description: Do not tag groups lacking formal structure or that are referred to in a generic sense
|
|
rationale: These are considered denominations
|
|
examples:
|
|
- "De jongerenbeweging"
|
|
- "De vakbond bewegingen"
|
|
|
|
- rule_id: ORG_EXC004
|
|
description: Do not tag representatives as organisations
|
|
rationale: These are beings or denominations
|
|
examples:
|
|
- "De Minster van Buitenlandse Zaken (De Minister van Buitenlandse Zaken = BEING, Buitenlandse Zaken = organisation separately)"
|
|
|
|
- rule_id: ORG_EXC005
|
|
description: Tag publications as textual references not organisations
|
|
conditions:
|
|
- "When the publication itself is referenced, not the company"
|
|
examples:
|
|
- "In de NRC stond ... (NRC = textual reference)"
|
|
|
|
- rule_id: ORG_EXC006
|
|
description: Do not tag general model names of ships/vehicles or other things as organisations
|
|
rationale: These are considered THING
|
|
examples:
|
|
- "Boeing 747 (not 'Boeing 747-400')"
|
|
- "Tesla Model S"
|
|
|
|
# 3.4 DENOMINATION
|
|
- entity_type: DENOMINATION
|
|
key: DEN
|
|
label: Denomination
|
|
description: >-
|
|
Ethnicity, profession, religion, demonym, ideology, language, community
|
|
references. Includes adjectives referring to places/organisations/religions,
|
|
demonyms, languages, pejorative terms, professions, ideological affiliations,
|
|
and group references. With double tagging, denominations within person names (BEING)
|
|
or other entities are also tagged separately.
|
|
|
|
subcategories:
|
|
- name: adjective_phrases
|
|
key: ADJPHR
|
|
label: Adjective Phrases
|
|
description: Phrases containing adjectives referring to place/organisation/religion/ideology/language/community
|
|
examples:
|
|
- "Islamitische gemeenschap"
|
|
- "Marxistische overtuiging"
|
|
- "Turkse taal"
|
|
|
|
- name: demonym
|
|
key: DEMO
|
|
label: Demonym
|
|
description: References to people from a place
|
|
examples:
|
|
- "Chinees"
|
|
- "China"
|
|
- "Arabier"
|
|
- "Westerlingen"
|
|
- "Bandunger"
|
|
- "Brabander"
|
|
- "Europeanen"
|
|
- "Europeesche dame"
|
|
- "Indonesische jeugd"
|
|
|
|
- name: titles_ranks
|
|
key: TITLERANK
|
|
label: Titles and Ranks
|
|
description: >-
|
|
Military, noble, and professional titles (now tagged separately from person names)
|
|
examples:
|
|
- "Kapitein" # ADDED: Tag separately from person name (BEING)
|
|
- "Gouverneur-Generaal"
|
|
- "President"
|
|
- "Sultan"
|
|
- "Jonker/Jonkheer" # ADDED: Noble title
|
|
- "Lieutenant"
|
|
- "Resident"
|
|
- "Vice admiraal"
|
|
|
|
- name: language
|
|
key: LANG
|
|
label: Languages
|
|
description: Language names as nouns
|
|
examples:
|
|
- "Duits"
|
|
- "Fries"
|
|
- "Azerbeidzjaans"
|
|
|
|
- name: abstract_bureaucratic
|
|
key: ABBUREAU
|
|
label: Abstract Bureaucratic Terms
|
|
description: >-
|
|
Implicitly refer to ideological groups or departments, typical for
|
|
late 20th/early 21st century texts
|
|
examples:
|
|
- "mix van communicatie- en beleidsverantwoordelijken"
|
|
|
|
- name: time_zone
|
|
key: TIMEZONE
|
|
label: Time Zones
|
|
description: Time zone references
|
|
examples:
|
|
- "Zuid-Sumatratijd"
|
|
|
|
- name: religion
|
|
key: RELIGION
|
|
label: Religions
|
|
description: Religion names as nouns
|
|
examples:
|
|
- "Protestantisme"
|
|
|
|
- name: pejorative
|
|
key: PEJOR
|
|
label: Pejorative Terms
|
|
description: Pejorative terms
|
|
examples:
|
|
- "Slaaf"
|
|
- "Koelie"
|
|
- "Inlander"
|
|
- "Zwarte"
|
|
- "Boschnegers"
|
|
- "Roodharige barbaren"
|
|
- "Totok"
|
|
- "Mohammedanen"
|
|
|
|
- name: profession
|
|
key: PROF
|
|
label: Professions
|
|
description: Professional titles and roles
|
|
examples:
|
|
- "Schrijver"
|
|
- "Klerk"
|
|
- "Agent"
|
|
- "Ministers"
|
|
- "Vertegenwoordiger"
|
|
- "Hoofden"
|
|
- "Mandoer"
|
|
|
|
- name: religious_ideological_members
|
|
key: RELIDEO
|
|
label: Religious and Ideological Members
|
|
description: Members of religions/ideologies
|
|
examples:
|
|
- "Kapitalist"
|
|
- "Communist"
|
|
- "Moslim"
|
|
- "Christen"
|
|
- "Pro-Russisch"
|
|
- "Anti-abortus"
|
|
|
|
- name: group_references
|
|
key: GROUPREF
|
|
label: Group References
|
|
description: General nouns referring to groups through prepositions/possessives
|
|
examples:
|
|
- "{Volk van West-Irian} (in addition: West-Irian = place)"
|
|
- "De {heren van de Volkrant} (in addition: Volkrant = organisation)"
|
|
- "{KPN medewerkers} (in addition: KPN = organisation)"
|
|
- "{die van sammadang} (in addition: sammadang = place)"
|
|
- "{Baccherachs volck} (in addition: Baccherach = BEING)"
|
|
|
|
inclusion_rules:
|
|
- rule_id: DEN_INC001
|
|
description: Tag both adjective and noun in denomination phrases
|
|
conditions:
|
|
- "When a phrase contains an adjective referring to place/organisation/religion/ideology"
|
|
examples:
|
|
- "Islamitische gemeenschap → DENOMINATION (full)"
|
|
- "Marxistische overtuiging → DENOMINATION (full)"
|
|
- "Indonesische jeugd → DENOMINATION (full)"
|
|
|
|
- rule_id: DEN_INC002
|
|
description: Tag profession/pejorative when appears alone without name
|
|
conditions:
|
|
- "Reference to profession or pejorative occurs without a personal name"
|
|
- "Follow rules from section 3.1.A"
|
|
- "double tag the denomination separately in case this also concerns a specific reference to a person"
|
|
examples:
|
|
- "De minister ('De minister' = BEING, 'minister' = denomination)"
|
|
- "De slaaf rent weg ('de slaaf' = BEING, 'slaaf' = denomination)"
|
|
|
|
- rule_id: DEN_INC003
|
|
description: Tag titles and denominations within person names separately
|
|
conditions:
|
|
- "When a person name includes a title, rank, or denomination, tag the full person AND the title/denomination separately"
|
|
examples:
|
|
- "Kapitein Jonker → BEING (full), Kapitein → DENOMINATION"
|
|
- "Gouverneur-Generaal van Starkenborgh → BEING (full), Gouverneur-Generaal → DENOMINATION"
|
|
- "de Javaanschen Cap:n Soeta Wangsa → BEING (full or name), Javaanschen Cap:n → DENOMINATION"
|
|
|
|
- rule_id: DEN_INC004
|
|
description: tag denominations in organisation names
|
|
rationale: Organisations have formal structure
|
|
examples:
|
|
- "Nederlandse groep --> organisation, 'Nederlandse' is denomination"
|
|
- "De Nederlandse Bank --> organisation, 'Nederlandse' is denomination"
|
|
|
|
- rule_id: DEN_INC005
|
|
description: Tag group references with denominations
|
|
rationale: These are denominations referring to groups
|
|
examples:
|
|
- "Volk van West-Irian → DENOMINATION (Volk van West-Irian), PLACE (West-Irian)"
|
|
- "KPN medewerkers → DENOMINATION (KPN medewerkers), ORGANISATION (KPN)"
|
|
|
|
- rule_id: DEN_INC006
|
|
description: Tag currencies as denominations in case denominations are used
|
|
rationale: Currencies are also textual references or quantities or objects
|
|
examples:
|
|
- "Spaenschen reael → DENOMINATION (Spaenschen), TEXTUAL REFERENCE (Spaenschen reael)"
|
|
|
|
- rule_id: DEN_INC007
|
|
description: Tag denominations with numerals as quantities, but also tag denomination
|
|
rationale: These are quantities, but the denomination part is also tagged separately
|
|
examples:
|
|
- "Twee Nederlanders → QUANTITY (full), Nederlanders → DENOMINATION"
|
|
- "Een twintigtal soldaten → QUANTITY (full), soldaten → DENOMINATION"
|
|
|
|
|
|
# 3.5 QUANTITY
|
|
- entity_type: QUANTITY
|
|
key: QTY
|
|
label: Quantity
|
|
description: >-
|
|
Quantities including currency, merchandise counts, people counts,
|
|
age, school class, weapons, settlements, area, distance, calibre,
|
|
enumerations, carat, degree, and weight. With double tagging,
|
|
objects within quantity expressions are also tagged as THING.
|
|
|
|
subcategories:
|
|
- name: currency
|
|
key: CURRQ
|
|
label: Currency
|
|
description: Monetary amounts
|
|
examples:
|
|
- "ƒ 1.50"
|
|
- "twee gulden" # Also tag 'gulden' as THING
|
|
|
|
- name: merchandise
|
|
key: MERCHQ
|
|
label: Merchandise
|
|
description: Counts of goods
|
|
examples:
|
|
- "een lepel" # Also tag 'lepel' as THING
|
|
- "twee kandelaars" # Also tag 'kandelaars' as THING
|
|
- "drie schootels" # Also tag 'schootels' as THING
|
|
|
|
- name: people
|
|
key: PPLQ
|
|
label: People
|
|
description: Counts of people
|
|
examples:
|
|
- "23: inlandsche zieken" # ALSO tag 'inlandsche zieken' as DENOMINATION
|
|
|
|
- name: troops
|
|
key: TROOPQ
|
|
label: Troops
|
|
description: Military unit quantities
|
|
examples:
|
|
- "vijfhonderd weerbare mannen" # ALSO tag 'weerbare mannen' as DENOMINATION
|
|
- "4. van de macassaeren" # ALSO tag 'macassaeren' as DENOMINATION
|
|
|
|
- name: age
|
|
key: AGEQ
|
|
label: Age
|
|
description: Age expressions
|
|
examples:
|
|
- "honderdjarige leeftijd"
|
|
|
|
- name: school_class
|
|
key: SCHCLASSQ
|
|
label: School Class
|
|
description: School class levels
|
|
examples:
|
|
- "derde klas middelbare school" # ALSO tag 'middelbare school' as ORGANISATION
|
|
|
|
- name: weapon
|
|
key: WEAPQ
|
|
label: Weapon
|
|
description: Weapon quantities
|
|
examples:
|
|
- "ruijm 1000. stx:s schiet geweeren" # ALSO tag 'schiet geweeren' as THING
|
|
|
|
- name: settlement
|
|
key: SETTQ
|
|
label: Settlement
|
|
description: Settlement counts
|
|
examples:
|
|
- "twee negorijen" # ALSO tag 'negorijen' as PLACE
|
|
|
|
- name: area
|
|
key: AREAQ
|
|
label: Area
|
|
description: Area measurements
|
|
examples:
|
|
- "een landt van 40 vierkante mijlen"
|
|
|
|
- name: distance
|
|
key: DISTQ
|
|
label: Distance
|
|
description: Distance measurements
|
|
examples:
|
|
- "25 mijlen"
|
|
- "2 mijl in zee"
|
|
- "drie â vier dagen varens"
|
|
- "5 uuren oostwaartsheenen"
|
|
|
|
- name: calibre
|
|
key: CALQ
|
|
label: Calibre
|
|
description: Weapon calibre
|
|
examples:
|
|
- "kaliber 6.5"
|
|
|
|
- name: enumeration
|
|
key: ENUMQ
|
|
label: Enumeration
|
|
description: Lists of counted items
|
|
examples:
|
|
- "3 schootels, zadel, 2 stijgh beugels" # ALSO tag 'schootels', 'zadel', 'stijgh beugels' as THING
|
|
|
|
- name: carat
|
|
key: CARATQ
|
|
label: Carat
|
|
description: Gold/gem quality measure
|
|
examples:
|
|
- "gouden sieraden 22, 23 en 24 Kt" # ALSO tag 'gouden sieraden' as THING
|
|
|
|
- name: degree
|
|
key: DEGQ
|
|
label: Degree
|
|
description: Degree measurements
|
|
examples:
|
|
- "10 graed noorder breedte"
|
|
|
|
- name: weight
|
|
key: WEIGHTQ
|
|
label: Weight
|
|
description: Weight measurements
|
|
examples:
|
|
- "14940 石 quiksilver" # ALSO tag 'quiksilver' as THING
|
|
|
|
inclusion_rules:
|
|
- rule_id: QTY_INC001
|
|
description: Infer single items in enumerations
|
|
conditions:
|
|
- "In enumerations, items without explicit numbers are assumed to be singular"
|
|
examples:
|
|
- "3 schootels, zadel, 2 stijgh beugels (zadel = 1 saddle, tagged as quantity)" # ALSO tag 'schootels', 'zadel', 'stijgh beugels' as THING
|
|
- rule_id: QTY_INC002
|
|
description: Tag denominations with numerals as quantities
|
|
conditions:
|
|
- "Denominations preceded by numerals or quantitative adjectives become quantities"
|
|
- "ALSO tag the denomination separately"
|
|
examples:
|
|
- "Twee Nederlanders → QUANTITY (full), Nederlanders → DENOMINATION"
|
|
- "Een twintigtal soldaten → QUANTITY (full), soldaten → DENOMINATION"
|
|
|
|
- rule_id: QTY_INC003
|
|
description: Tag travel time as distance not temporal
|
|
conditions:
|
|
- "Time expressions measuring distance are quantities not temporal references"
|
|
examples:
|
|
- "{drie â vier dagen varens} (not temporal reference)"
|
|
- "{5 uuren oostwaartsheenen} (not temporal reference)"
|
|
|
|
- rule_id: QTY_INC004
|
|
description: Tag objects within quantity expressions as THING
|
|
conditions:
|
|
- "When a quantity expression includes a countable object, tag the full quantity AND the object separately as THING"
|
|
examples:
|
|
- "een lepel → QUANTITY (full), lepel → THING"
|
|
- "twee kandelaars → QUANTITY (full), kandelaars → THING"
|
|
- "3 schootels → QUANTITY (full), schootels → THING"
|
|
- "zadel → QUANTITY (inferred 1), zadel → THING"
|
|
- rule_id: QTY_INC005
|
|
description: Tag textual references as quantities if they contain numbers
|
|
rationale: Written sources are textual references but can also contain quantities
|
|
examples:
|
|
- "2 brieven --> textual reference (full), quantity (full)"
|
|
- rule_id: QTY_INC006
|
|
description: Associated organisations are tagged as quantities too when appearing within quantity expressions
|
|
rationale: Organisation is tagged separately WITH the quantity
|
|
examples:
|
|
- "derde klas middelbare school → QUANTITY (full), ORGANISATION (middelbare school)"
|
|
|
|
# 3.6 TEMPORAL_REFERENCE
|
|
- entity_type: TEMPORAL_REFERENCE
|
|
key: TMP
|
|
label: Temporal Reference
|
|
description: >-
|
|
Temporal references including days, dates, campaigns/wars, holidays,
|
|
canonised periods, genitives, and temporal adjectives.
|
|
|
|
subcategories:
|
|
- name: days
|
|
key: DAY
|
|
label: Days
|
|
description: References to specific days
|
|
examples:
|
|
- "Deze Maandag"
|
|
|
|
- name: days_of_week
|
|
key: DOW
|
|
label: Days of the Week
|
|
description: Weekday names
|
|
examples:
|
|
- "Maandag"
|
|
|
|
- name: deictic_temporal
|
|
key: DEIC
|
|
label: Deictic Temporal Pronouns
|
|
description: Deictic temporal pronouns
|
|
examples:
|
|
- "gisteren"
|
|
- "morgen"
|
|
- "vandaag"
|
|
- "overmorgen"
|
|
- "eergisteren"
|
|
- "Hedenmiddag"
|
|
- "nu"
|
|
- "gisteravond"
|
|
|
|
- name: dates
|
|
key: DATE
|
|
label: Dates
|
|
description: Dates in every calendar
|
|
examples:
|
|
- "Vrijdag 8 November 1957"
|
|
|
|
- name: campaigns_wars
|
|
key: CAMPWAR
|
|
label: Campaigns and Wars
|
|
description: Campaigns/wars when referring to time periods
|
|
examples:
|
|
- "Hongitochten"
|
|
- "Twee Wereldoorlog"
|
|
- "1ste Nederlandse militaire actie"
|
|
|
|
- name: holidays_festivals
|
|
key: HOLFEST
|
|
label: Holidays and Festivals
|
|
description: Holiday and festival names
|
|
examples:
|
|
- "Nieuwjaar"
|
|
- "geboortedag van de Profeet Mohammad"
|
|
- "Heldendag"
|
|
|
|
- name: canonised_periods
|
|
key: CANPER
|
|
label: Canonised Historical Periods
|
|
description: Canonised historical periods (any historiography)
|
|
examples:
|
|
- "Middeleeuwen (Western Europe)"
|
|
- "Periode van de Strijdende Staten (China)"
|
|
- "Zaman Hindu-budis (Indonesia)"
|
|
|
|
- name: genitives
|
|
key: GENIT
|
|
label: Genitives
|
|
description: Genitive temporal expressions
|
|
examples:
|
|
- "9 dezer"
|
|
- "Dezer dagen"
|
|
|
|
- name: temporal_adjectives
|
|
key: TEMPADJ
|
|
label: Temporal Adjectives
|
|
description: Temporal adjectives before place names
|
|
examples:
|
|
- "eighteenth-century Europe"
|
|
|
|
inclusion_rules:
|
|
- rule_id: TMP_INC001
|
|
description: Always tag days
|
|
conditions:
|
|
- "Days are always tagged as temporal references, even when part of a full date"
|
|
examples:
|
|
- "Afgelopen Vrijdag (tagged)"
|
|
- "Vrijdag 8 November 1957 (entire date tagged, including Vrijdag)"
|
|
|
|
- rule_id: TMP_INC002
|
|
description: Tag campaigns/wars as temporal when referring to period
|
|
conditions:
|
|
- "Campaign/War is tagged as temporal reference when contextualised to refer to time period"
|
|
examples:
|
|
- "Hongitochten"
|
|
- "Tweede Wereldoorlog"
|
|
|
|
# 3.7 TEXTUAL_REFERENCE
|
|
- entity_type: TEXTUAL_REFERENCE
|
|
key: TXT
|
|
label: Textual Reference
|
|
description: >-
|
|
References to written sources, documents, laws, titles of cultural works,
|
|
inventory numbers, accounts, currency types, telephone numbers, URLs,
|
|
programs, policies, agreements, sanctions, statements, laws, surveys,
|
|
stocks, registers, meeting minutes, activities with recorded minutes,
|
|
slogans, proverbs, flags, and honours.
|
|
|
|
subcategories:
|
|
- name: radio_frequencies
|
|
key: RADIO
|
|
label: Radio Frequencies
|
|
description: Radio frequencies
|
|
examples:
|
|
- "FM 105.3"
|
|
|
|
- name: board_games
|
|
key: BOARD
|
|
label: Board Games
|
|
description: Board game names
|
|
examples:
|
|
- "Monopoly"
|
|
|
|
- name: reports
|
|
key: REPORT
|
|
label: Reports
|
|
description: Reports and documents
|
|
examples:
|
|
- "Jaarverslag 1995"
|
|
|
|
- name: cultural_titles
|
|
key: CULTITLE
|
|
label: Cultural Titles
|
|
description: Titles of books, songs, movies, pamphlets, records, manuscripts, musicals, programs, magazines, newspapers, journals
|
|
examples:
|
|
- "Stabat Mater van Pergolesi --> TEXTUAL Reference (full), BEING (Pergolesi)"
|
|
|
|
- name: inventory_numbers
|
|
key: INVNUM
|
|
label: Inventory Numbers
|
|
description: Archive inventory numbers
|
|
examples:
|
|
- "AVS INV. 61855-3"
|
|
|
|
- name: accounts
|
|
key: ACCOUNT
|
|
label: Accounts
|
|
description: Bank and postal accounts
|
|
examples:
|
|
- "Giro 158225"
|
|
- "postgirorekening No. 400"
|
|
|
|
- name: currency_types
|
|
key: CURRTYPE
|
|
label: Currency Types
|
|
description: Currency types (not amounts)
|
|
examples:
|
|
- "Spaenschen reael --> TEXTUAL REFERENCE (full), DENOMINATION (Spaenschen)"
|
|
- "reael pedangh ofte rycxdaelder --> TEXTUAL REFERENCE (full), DENOMINATION (pedangh)"
|
|
|
|
- name: mint_runs
|
|
key: MINTRUN
|
|
label: Mint Runs
|
|
description: Print/mint runs of currency
|
|
examples:
|
|
- "aan gehaalde staven, namentlijk No 72, 73, 74, 76" # staven is also tagged as THING
|
|
|
|
- name: telephone_numbers
|
|
key: TELNUM
|
|
label: Telephone Numbers
|
|
description: Telephone numbers
|
|
examples:
|
|
- "Tel. 020 -72 84 61"
|
|
|
|
- name: mailing_lists
|
|
key: MAILLIST
|
|
label: Mailing Lists
|
|
description: Mailing lists and message systems
|
|
examples:
|
|
- "zij organiseerde de {'message box'}"
|
|
|
|
- name: academic_references
|
|
key: ACADREF
|
|
label: Academic References
|
|
description: Academic citations
|
|
examples:
|
|
- "(Scheveningen, 1914)"
|
|
- "(Scholte, 1995)"
|
|
|
|
- name: page_numbers
|
|
key: PAGENUM
|
|
label: Page Numbers
|
|
description: Page number references
|
|
examples:
|
|
- "Pagina 39"
|
|
|
|
- name: religious_texts
|
|
key: RELTEXT
|
|
label: Religious Texts
|
|
description: Religious text references
|
|
examples:
|
|
- "Surah 17:19"
|
|
|
|
- name: urls
|
|
key: URL
|
|
label: URLs
|
|
description: Web URLs
|
|
examples:
|
|
- "www.colonialarchitecture.eu"
|
|
- name: email_addresses
|
|
key: EMAILAD
|
|
label: Email Addresses
|
|
description: Email addresses
|
|
examples:
|
|
- "info@colonialarchitecture.eu"
|
|
|
|
- name: programs
|
|
key: PROGRAM
|
|
label: Programs
|
|
description: Software or organizational programs
|
|
examples:
|
|
- "Microsoft Word"
|
|
- "Adobe Photoshop"
|
|
|
|
- name: policies
|
|
key: POLICY
|
|
label: Policies
|
|
description: Publicly announced policies
|
|
examples:
|
|
- "passen-stelsel"
|
|
- "pers breidel"
|
|
- "Manokwari-plan" # also tagged as PLACE (Manokwari)
|
|
- "Pax Neerlandica" # also tagged as DENOMINATION (Neerlandica)
|
|
- "non-coöperatie"
|
|
- "presidentieel besluit No. 9" # also tagged as DENOMINATION (presidentieel)
|
|
- "Handvest der Verenigde Naties" # also tagged as ORGANISATION (Verenigde Naties)
|
|
|
|
- name: agreements
|
|
key: AGREEMENT
|
|
label: Agreements
|
|
description: Written agreements
|
|
examples:
|
|
- "Linggarjati-overeenkomst" # also tagged as PLACE (Linggarjati)
|
|
- "Renville-overeenkomst" # also tagged as PLACE (Renville)
|
|
|
|
- name: sanctions
|
|
key: SANCTION
|
|
label: Sanctions
|
|
description: Written sanctions
|
|
examples:
|
|
- "Poenale Sanctie"
|
|
|
|
- name: statements
|
|
key: STATEMENT
|
|
label: Statements
|
|
description: Written/recorded statements
|
|
examples:
|
|
- "communicatie-uitingen"
|
|
|
|
- name: laws
|
|
key: LAW
|
|
label: Laws
|
|
description: Legal references
|
|
examples:
|
|
- "artikel 156 alinea 2"
|
|
|
|
- name: land_surveys
|
|
key: LANDSURVEY
|
|
label: Land Surveys
|
|
description: Land survey documentation
|
|
examples:
|
|
- "verponding No. 63"
|
|
|
|
- name: stocks
|
|
key: STOCK
|
|
label: Stocks
|
|
description: Stock certificates and registered numbers
|
|
examples:
|
|
- "Controleursgrant no. 3"
|
|
- "acte ddo. 12 December 1927 No. 59" # also tagged as DATE (12 December 1927)
|
|
|
|
- name: advertisement_registers
|
|
key: ADVREGISTER
|
|
label: Advertisement Registers
|
|
description: Registers of advertisements
|
|
examples:
|
|
- "No. 245 44 regels" # also tagged as QUANTITY (44 regels)
|
|
|
|
- name: meeting_minutes
|
|
key: MEETINGMINUTES
|
|
label: Meeting Minutes
|
|
description: Meeting minutes
|
|
examples:
|
|
- "Notulen van de vergadering" # also tagged as ORGANISATION (de vergadering)
|
|
|
|
- name: recorded_activities
|
|
key: RECORDACT
|
|
label: Recorded Activities
|
|
description: Activities with recorded minutes/reports if not referred to as an event (TEMPORAL_REFERENCE) or organisation (ORGANISATION)
|
|
examples:
|
|
- "Conference"
|
|
- "Forum"
|
|
- "Concert"
|
|
|
|
- name: slogans
|
|
key: SLOGAN
|
|
label: Slogans
|
|
description: Political or advertising slogans
|
|
examples:
|
|
- "Eenheid in verscheidenheid"
|
|
- "Just do it"
|
|
- "Het kan wèl!"
|
|
|
|
- name: proverbs
|
|
key: PROVERB
|
|
label: Proverbs
|
|
description: Proverbs and adages (not idioms)
|
|
examples:
|
|
- "Wie honing wil eten moet lijden dat de bijen hem steken"
|
|
- "j'en passe et des meilleurs"
|
|
|
|
- name: flags and banners
|
|
key: FLAG
|
|
label: Flags and Banners
|
|
description: Flag and banners
|
|
examples:
|
|
- "Rood, wit, blauw" # also tagged as THING
|
|
- "Bendera Kokki" # also tagged as THING
|
|
|
|
- name: honours
|
|
key: HONOUR
|
|
label: Honours and Awards
|
|
description: Titles of honours and awards
|
|
examples:
|
|
- "Ridder in de orde van Oranje Nassau" # also tagged as ORGANISATION (Oranje Nassau) and DENOMINATION (Ridder)
|
|
|
|
inclusion_rules:
|
|
- rule_id: TXT_INC001
|
|
description: Tag currency types as textual references
|
|
conditions:
|
|
- "Currency types (not amounts) are textual references"
|
|
examples:
|
|
- "Spaenschen reael" # ALSO tag 'Spaenschen' as DENOMINATION
|
|
|
|
- rule_id: TXT_INC002
|
|
description: Tag publications as textual references not organisations
|
|
conditions:
|
|
- "When the publication itself is referred to, not the company"
|
|
examples:
|
|
- "In de NRC stond ... (NRC = textual reference)"
|
|
|
|
- rule_id: TXT_INC003
|
|
description: Tag recorded activities as textual references, not events or organisations
|
|
conditions:
|
|
- "Activities of which minutes or reports have been recorded"
|
|
examples:
|
|
- "Conference"
|
|
- "Forum"
|
|
- "Concert"
|
|
|
|
- rule_id: TXT_INC004
|
|
description: quantities can contain textual references
|
|
rationale: Written sources are textual references but can also contain quantities
|
|
examples:
|
|
- "2 brieven --> textual reference (full), quantity (full)"
|
|
|
|
exclusion_rules:
|
|
- rule_id: TXT_EXC001
|
|
description: Do not tag currency amounts as textual references
|
|
rationale: Amounts are quantities
|
|
examples:
|
|
- "ƒ 1.50 (= quantity)"
|
|
|
|
- rule_id: TXT_EXC002
|
|
description: Do not tag news agencies as textual references when they are referred to as organisations
|
|
rationale: Context determines if organisation or textual reference
|
|
examples:
|
|
- "De NRC besloot ... (NRC = organisation)"
|
|
- "In de NRC stond ... (NRC = textual reference)"
|
|
|
|
# 3.8 THING
|
|
- entity_type: THING
|
|
key: THG
|
|
label: Thing
|
|
description: >-
|
|
Physical objects, goods, items, and countable entities. This category
|
|
captures concrete objects within quantity expressions, enumerations,
|
|
and descriptions. THING entities typically appear as double-tagged
|
|
elements within QUANTITY annotations.
|
|
|
|
subcategories:
|
|
- name: household_items
|
|
key: HOUSEHOLD
|
|
label: Household Items
|
|
description: Domestic objects and utensils
|
|
examples:
|
|
- "lepel" # spoon
|
|
- "schootels" # plates
|
|
- "kandelaars" # candlesticks
|
|
- "zadel" # saddle
|
|
- "stijgh beugels" # stirrups
|
|
|
|
- name: furniture
|
|
key: FURNIT
|
|
label: Furniture
|
|
description: Furniture and fixtures
|
|
examples:
|
|
- "tafel" # table
|
|
- "stoel" # chair
|
|
- "kast" # cabinet
|
|
|
|
- name: tools_equipment
|
|
key: TOOLS
|
|
label: Tools and Equipment
|
|
description: Tools, instruments, and equipment
|
|
examples:
|
|
- "hamer" # hammer
|
|
- "zaag" # saw
|
|
- "mes" # knife
|
|
|
|
- name: clothing_textiles
|
|
key: CLOTH
|
|
label: Clothing and Textiles
|
|
description: Clothing and fabric items
|
|
examples:
|
|
- "hemd" # shirt
|
|
- "broek" # trousers
|
|
- "doek" # cloth
|
|
|
|
- name: containers
|
|
key: CONTAINER
|
|
label: Containers and Vessels
|
|
description: Containers and vessels
|
|
examples:
|
|
- "kist" # chest
|
|
- "vat" # barrel
|
|
- "zak" # bag
|
|
|
|
- name: weapons
|
|
key: WEAPON
|
|
label: Weapons
|
|
description: Weapons as physical objects (not quantities of weapons)
|
|
examples:
|
|
- "geweeren" # rifles
|
|
- "zwaard" # sword
|
|
- "kanon" # cannon
|
|
|
|
- name: Commodities
|
|
key: COMMOD
|
|
label: Commodities
|
|
description: Trade goods and commodities
|
|
examples:
|
|
- "koffie" # coffee
|
|
- "suiker" # sugar
|
|
- "specerijen" # spices
|
|
|
|
- name: animals
|
|
key: ANIMAL
|
|
label: Animals
|
|
description: Animals refered to generically (not with names)
|
|
examples:
|
|
- "paard" # horse
|
|
- "koe" # cow
|
|
- "kip" # chicken
|
|
|
|
- name: vehicles
|
|
key: VEHICLE
|
|
label: Vehicles
|
|
description: Transportation vehicles
|
|
examples:
|
|
- "wagen" # wagon
|
|
- "boot" # boat
|
|
- "fiets" # bicycle
|
|
|
|
- name: documents_as_objects
|
|
key: DOCOBJ
|
|
label: Documents as Physical Objects
|
|
description: Physical documents (not as textual references)
|
|
examples:
|
|
- "brief" # letter (when referring to physical object in enumeration)
|
|
- "boek" # book (when referring to physical object)
|
|
|
|
inclusion_rules:
|
|
- rule_id: THG_INC001
|
|
description: Tag objects within quantity expressions
|
|
conditions:
|
|
- "When an object appears in a quantity expression (e.g., 'een lepel'), tag the full quantity AND the object as THING"
|
|
examples:
|
|
- "een lepel → QUANTITY (full), lepel → THING"
|
|
- "twee kandelaars → QUANTITY (full), kandelaars → THING"
|
|
- "3 schootels → QUANTITY (full), schootels → THING"
|
|
|
|
- rule_id: THG_INC002
|
|
description: Tag objects in enumerations
|
|
conditions:
|
|
- "Objects listed in enumerations are tagged as THING even without explicit quantities"
|
|
examples:
|
|
- "3 schootels, zadel, 2 stijgh beugels → 'zadel' is THING (and QUANTITY with inferred 1)"
|
|
|
|
- rule_id: THG_INC003
|
|
description: Tag countable nouns referring to physical objects
|
|
conditions:
|
|
- "Physical, tangible objects that can be counted or enumerated"
|
|
examples:
|
|
- "de lepel ligt op tafel → 'lepel' is THING (contextual)"
|
|
- "hij droeg een zwaard → 'zwaard' is THING"
|
|
|
|
exclusion_rules:
|
|
- rule_id: THG_EXC001
|
|
description: Do not tag abstract concepts
|
|
rationale: THING is for concrete, physical objects only
|
|
examples:
|
|
- "een idee (abstract, not THING)"
|
|
- "liefde (abstract, not THING)"
|
|
- "vrijheid (abstract, not THING)"
|
|
|
|
- rule_id: THG_EXC002
|
|
description: Do not tag places or buildings as THING is they are unique
|
|
rationale: These are PLACE entities
|
|
examples:
|
|
- "het huis (PLACE or building context)"
|
|
- "de fabriek (ORGANISATION or PLACE)"
|
|
|
|
- rule_id: THG_EXC003
|
|
description: Do not tag people or animals with names as THING
|
|
rationale: These are BEING entities
|
|
examples:
|
|
- "de slaaf Louis (BEING, not THING)"
|
|
- "Lassie (BEING - named animal)"
|
|
|
|
- rule_id: THG_EXC004
|
|
description: Do not tag documents when referring to content
|
|
rationale: These are TEXTUAL_REFERENCE when content is meant
|
|
examples:
|
|
- "In de brief stond ... (TEXTUAL_REFERENCE, not THING)"
|
|
- "Het boek vertelt ... (TEXTUAL_REFERENCE, not THING)"
|
|
- "But: '2 brieven op tafel' → 'brieven' can be THING if referring to physical objects"
|
|
|
|
usage_guidelines:
|
|
- "Always start with the broadest entity (e.g., full person name)"
|
|
- "Then identify and tag constituent semantic parts (title, place, object)"
|
|
- "Use span coordinates to track exact character positions of overlapping entities"
|
|
- "In ambiguous cases, prefer more semantic tags over fewer (enrich the data)"
|
|
- "Document rationale for double-tagging decisions in annotation metadata"
|
|
- "Maintain consistency within a single document or collection"
|