232 lines
11 KiB
YAML
232 lines
11 KiB
YAML
# Formulaic Phrases Instance - Section 4 of Convention v1.4.3
|
||
# Corpus-specific formulaic phrase patterns for 17th Century Dutch VOC archives
|
||
|
||
corpus: "VOC Archives (East India Company)"
|
||
language: "Dutch"
|
||
century: "17th Century"
|
||
|
||
phrase_patterns:
|
||
# Headers
|
||
- pattern_id: FP_HEADER_001
|
||
text_region_type: HEADER
|
||
pattern: "Int [LOC] [DAT]"
|
||
description: Standard header format with location and date
|
||
example:
|
||
- "Int Casteel Batavia A:o 1684: 28:e november"
|
||
|
||
- pattern_id: FP_HEADER_002
|
||
text_region_type: HEADER
|
||
pattern: "Van [LOC] onder [LOC] In dato [DAT]"
|
||
description: Header indicating location hierarchy and date
|
||
example:
|
||
- "Van Indramayou onder Batavia In dato 17' meert 1679"
|
||
|
||
# Salutations in letters
|
||
- pattern_id: FP_SALUTATION_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "Aen d' Edele Heer [PER] [DEN] en d' Edele Heeren [ORG] Van [LOC]"
|
||
description: Formal salutation to Governor General and Council
|
||
example:
|
||
- "Aen d' Edele Heer Johannes Camphuijs Gouverneur Generael en d' Edele Heeren Raden Van India"
|
||
|
||
# Reports of parades
|
||
- pattern_id: FP_PARADE_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "[DAT] verschynen binnen [LOC] de respective [ORG] benevens alle de andre [DEN] soo van [ORG] als de [ORG] mitsgaders oock de gequaliliceerde [ORG] [DEN] en meest allerhande onser [DEN]"
|
||
description: Report of military or official parade
|
||
example:
|
||
- "[Date] verschynen binnen [location] de respective [organization] benevens alle de andre [people] soo van [org] als de [org] mitsgaders oock de gequaliliceerde [org] [people] en meest allerhande onser [people]"
|
||
|
||
# Reports on shipping - Arrival
|
||
- pattern_id: FP_SHIPPING_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "Een [DAT] komt alhier direct uyt [LOC] te verschynen de fluyt, [ORG] eenlyck medebrengende een [REF] der [REF], gedaghtekent [DAT] waervan de [REF] op den [DAT] per het jaght [ORG] hier al aengebraght zy, ende bestaet de medegebraghte ladinge van dese fluyt in:"
|
||
description: Report of ship arrival with cargo manifest
|
||
example:
|
||
- "Een [date] komt alhier direct uyt [location] te verschynen de fluyt, [ship name] eenlyck medebrengende een [document] der [documents], gedaghtekent [date]..."
|
||
|
||
- pattern_id: FP_SHIPPING_002
|
||
text_region_type: PARAGRAPH
|
||
pattern: "[DAT] arriveert te deser rheede direct uyt [LOC] de fluyt [ORG], zynde met de bodems [ORG] en [ORG] en de fluyt [ORG], op [DAT] geciteert tegelyck uyt de [LOC] herwaerts gestevent"
|
||
description: Report of multiple ships arriving together
|
||
example:
|
||
- "Tegen den avont arriveert te deser rheede direct uyt Bengale de fluyt Swanenburgh, zynde met de bodems Goet Begin en Beemster en de fluyt Wimmenum, op gisteren geciteert tegelyck uyt de Ganges herwaerts gestevent"
|
||
|
||
# Reports on delegations
|
||
- pattern_id: FP_DELEGATION_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "[DAT] komt den [DEN] [PER] binnen de [DEN] van den [PER] tot [LOC] en soomede die van den [PER] op [LOC]"
|
||
description: Report of diplomatic delegation arrival
|
||
example:
|
||
- "[Date] komt den [title] [person] binnen de [title] van den [person] tot [location] en soomede die van den [person] op [location]"
|
||
|
||
# Reports on correspondence - General
|
||
- pattern_id: FP_CORRESPONDENCE_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "zijnde de [REF] van het [REF] door den [PER] aen ons gesonden als het [REF] van [PER]"
|
||
description: Description of received correspondence
|
||
example:
|
||
- "zijnde de copie van het briefie door den luijtenant grevingh aen ons gesonden als het originele geschrift vanden Soesoehoenangh"
|
||
|
||
- pattern_id: FP_CORRESPONDENCE_002
|
||
text_region_type: PARAGRAPH
|
||
pattern: "Dese is van [PER] [DEN] van den opgenoemden [PER]"
|
||
description: Identification of letter author
|
||
example:
|
||
- "Dese is van Aria Wierat Maddija broeder van den opgenoemden Manco Nagara"
|
||
|
||
- pattern_id: FP_CORRESPONDENCE_003
|
||
text_region_type: PARAGRAPH
|
||
pattern: "De tweede is van diverse inhoud, en spreekt int bijsonder van de [ORG]"
|
||
description: Summary of letter content
|
||
example:
|
||
- "De tweede is van diverse inhoud, en spreekt int bijsonder van de Tegaalse Regeringe"
|
||
|
||
- pattern_id: FP_CORRESPONDENCE_004
|
||
text_region_type: PARAGRAPH
|
||
pattern: "Voorts waren nog eenige [REF] daar nevens gevoegd, dewelke volgens het [REF] gerespondeerd hebben"
|
||
description: Reference to attached documents
|
||
example:
|
||
- "Voorts waren nog eenige bijlagen daar nevens gevoegd, dewelke volgens het regiter gerespondeerd hebben"
|
||
|
||
- pattern_id: FP_CORRESPONDENCE_005
|
||
text_region_type: PARAGRAPH
|
||
pattern: "[REF] den [DAT] ontfangen en als voren door [PER]"
|
||
description: Receipt notation of translated document
|
||
example:
|
||
- "Translaat briefje den 15 septb:r 1679 ontfangen en als voren door Crain glisson"
|
||
|
||
- pattern_id: FP_CORRESPONDENCE_006
|
||
text_region_type: PARAGRAPH
|
||
pattern: "[REF] gaande p:r den [DEN] [PER] gaande van hier naar [LOC] aen haar Ho: Ed:le de H:r [PER] [DEN] van [LOC]"
|
||
description: Register of outgoing correspondence
|
||
example:
|
||
- "Registertie der papieren gaande p:r den corporaal H: Simson gaande van hier naar batavia aen haar Ho: Ed:le de H:r Rijckloff van goens gouvern:r generaal van Nederlant India"
|
||
|
||
- pattern_id: FP_CORRESPONDENCE_007
|
||
text_region_type: PARAGRAPH
|
||
pattern: "Per ' t schip [ORG] ontfangen den [DAT]"
|
||
description: Simple receipt notation by ship
|
||
example:
|
||
- "Per ' t schip 't eylant mauritius ontfangen den 3 novemb:r a:o 1679"
|
||
|
||
- pattern_id: FP_CORRESPONDENCE_008
|
||
text_region_type: PARAGRAPH
|
||
pattern: "ontfangen tot [LOC] den [DAT] per d' fluijt [ORG]"
|
||
description: Receipt notation with location
|
||
example:
|
||
- "ontfangen tot Batavia den 30 maij 1684 per d' fluijt Delfshaven"
|
||
|
||
# Reports (general)
|
||
- pattern_id: FP_REPORT_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "[REF] door [PER] aan den [DEN] [PER] gedaen"
|
||
description: General report attribution
|
||
example:
|
||
- "[Report] door [person] aan den [title] [person] gedaen"
|
||
|
||
# Indices - Document lists
|
||
- pattern_id: FP_INDEX_001
|
||
text_region_type: TABLE
|
||
pattern: "[REF] door den [PER] en [PER] aen [PER] gesz: in dato [DAT]"
|
||
description: Index entry with multiple authors
|
||
example:
|
||
- "[Document type] door den [person] en [person] aen [person] gesz: in dato [date]"
|
||
|
||
- pattern_id: FP_INDEX_002
|
||
text_region_type: TABLE
|
||
pattern: "[REF] door den [PER] tot [LOC] aan den [PER] ges:"
|
||
description: Index entry with location and recipient
|
||
example:
|
||
- "Twee Translaat Javaense brieffies door den deman Timbanantingh tot bandong goelongon aen den Cap:n ruijs ges:"
|
||
|
||
- pattern_id: FP_INDEX_003
|
||
text_region_type: TABLE
|
||
pattern: "[REF] van gem: [PER] en [PER] en den [ORG] tot [LOC] aen [ORG] tot [LOC] gez: in dato [DAT]"
|
||
description: Index entry for missive with full attribution
|
||
example:
|
||
- "Missive van gem: Cap:n ruijs en Coopman samson en den Raad tot Chirrebon aen haer Ed:le tot batavia gez: in dato 23 feb:r 1684"
|
||
|
||
- pattern_id: FP_INDEX_004
|
||
text_region_type: TABLE
|
||
pattern: "[REF] van een aengehaelt vaertuijgh tot [LOC] toe bekorende den [PER] dato [DAT]"
|
||
description: Index entry for ship report
|
||
example:
|
||
- "Berigt van een aengehaelt vaertuijgh tot paccalongan toe bekorende den sabandaer sacradaer dato 17 Meij 1684"
|
||
|
||
# Treaties and daily registers
|
||
- pattern_id: FP_TREATY_001
|
||
text_region_type: HEADING
|
||
pattern: "[REF] waerinne werden aangeteend de voornaemste voorallen te deser Plaetze met een beknopte inzertie van alle de [REF] die Gedurende dit [DAT] na [LOC] en [LOC] geschreven, en van daer ontfangen zijn, mitsgaders oock de [REF] van [REF] van de [DEN] en [DEN] etc."
|
||
description: Title of daily register (Dagh Register)
|
||
example:
|
||
- "Batavias Dagh Register waerinne werden aangeteend de voornaemste voorallen te deser Plaetze met een beknopte inzertie van alle de brieven die Gedurende dit loopende Jaer na Javas oostcust en Bantam geschreven, en van daer ontfangen zijn, mitsgaders oock de Translaten van Hoofze biefkens van de Indiaenze vorsten en Coningen etc."
|
||
|
||
# Muster lists
|
||
- pattern_id: FP_MUSTER_001
|
||
text_region_type: TABLE
|
||
pattern: "Den [DEN] [PER]\n[DEN] [PER]\n[DEN] [PER]\nsterck [QTY]"
|
||
description: Military muster list format
|
||
example:
|
||
- "Luijtenant adolf winckelaar.\nvaandrigh Joannes van Buijtenhem\nsterck 42. Coppen"
|
||
|
||
- pattern_id: FP_MUSTER_002
|
||
text_region_type: TABLE
|
||
pattern: "Te samen in't geheel van [QTY]"
|
||
description: Total count in muster list
|
||
example:
|
||
- "Te samen in't geheel van 475."
|
||
|
||
- pattern_id: FP_MUSTER_003
|
||
text_region_type: TABLE
|
||
pattern: "Coppen weder terugge gecomen – [QTY]"
|
||
description: Return count in muster list
|
||
example:
|
||
- "Coppen weder terugge gecomen - 314. Coppen"
|
||
|
||
# Extract references
|
||
- pattern_id: FP_EXTRACT_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "gelijck bij 't [REF] breeder staat te leesen"
|
||
description: Reference to extract for more details
|
||
example:
|
||
- "gelijck bij 't [extract] breeder staat te leesen"
|
||
|
||
# Marginalia
|
||
- pattern_id: FP_MARGINALIA_001
|
||
text_region_type: MARGINALIA
|
||
pattern: "de affgekomen boot om de schuijt vande [LOC] etc. op te soeken vrugteloos weder gekeert"
|
||
description: Note about unsuccessful search mission
|
||
example:
|
||
- "de affgekomen boot om de schuijt vande Maronda etc. op te soeken vrugteloos weder gekeert"
|
||
|
||
- pattern_id: FP_MARGINALIA_002
|
||
text_region_type: MARGINALIA
|
||
pattern: "Nieuwe Jaers wensinge van den gewesen [DEN] van [LOC] [PER]"
|
||
description: Note about New Year's greeting
|
||
example:
|
||
- "Nieuwe Jaers wensinge van den gewesen Con:k van Goa Crain bissee"
|
||
|
||
# Resolutions
|
||
- pattern_id: FP_RESOLUTION_001
|
||
text_region_type: PARAGRAPH
|
||
pattern: "[DAT] ter vergaderinge onder 't nalesen van [REF], is goet gevonden, den [PER]"
|
||
description: Resolution format with decision
|
||
example:
|
||
- "[Date] ter vergaderinge onder 't nalesen van [document], is goet gevonden, den [person]"
|
||
|
||
# Entity type abbreviations used in patterns:
|
||
# [PER] = PERSON
|
||
# [LOC] = PLACE
|
||
# [ORG] = ORGANISATION
|
||
# [DEN] = DENOMINATION
|
||
# [QTY] = QUANTITY
|
||
# [DAT] = TEMPORAL_REFERENCE (date/time)
|
||
# [REF] = TEXTUAL_REFERENCE
|
||
|
||
usage_notes: >-
|
||
These formulaic phrases are used to distinguish different types of text
|
||
regions using regular expressions within PAGE-xml files. Over a million
|
||
of these phrases have been compiled from the East India Company archives.
|
||
The patterns use entity type placeholders to create flexible matching rules
|
||
that can identify document structure and content type automatically.
|