From 66ab2908d0bf2ce11c1d492b3562bdc83616669b Mon Sep 17 00:00:00 2001 From: kempersc Date: Sun, 11 Jan 2026 16:03:18 +0100 Subject: [PATCH] fix: remove deprecated AnnotationMotivationEnum, add European surname data - Move deprecated AnnotationMotivationEnum to archive-deprecated/ (outside served paths) - Add French, Italian, Polish, Spanish surname datasets for entity resolution - Update name_commonality.py with expanded European surname detection - Triggers GitOps workflow to test Forgejo Actions runner --- .../AnnotationMotivationEnum.yaml.deprecated | 0 .../linkml/01_custodian_name_modular.yaml | 2 +- .../schemas/20251121/linkml/manifest.json | 2 +- .../classes/AnnotationMotivationType.yaml | 2 +- .../AnnotationMotivationEnum.yaml.deprecated | 43 --- .../data/french_surnames.json | 43 +++ .../data/italian_surnames.json | 113 ++++++ .../data/polish_surnames.json | 33 ++ .../data/spanish_surnames.json | 54 +++ .../entity_resolution/name_commonality.py | 354 +++++++++++++++++- 10 files changed, 597 insertions(+), 49 deletions(-) rename {frontend/public/schemas/20251121/linkml/modules/enums/archive => archive-deprecated/enums}/AnnotationMotivationEnum.yaml.deprecated (100%) delete mode 100644 schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated create mode 100644 src/glam_extractor/entity_resolution/data/french_surnames.json create mode 100644 src/glam_extractor/entity_resolution/data/italian_surnames.json create mode 100644 src/glam_extractor/entity_resolution/data/polish_surnames.json create mode 100644 src/glam_extractor/entity_resolution/data/spanish_surnames.json diff --git a/frontend/public/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated b/archive-deprecated/enums/AnnotationMotivationEnum.yaml.deprecated similarity index 100% rename from frontend/public/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated rename to archive-deprecated/enums/AnnotationMotivationEnum.yaml.deprecated diff --git a/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml b/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml index 5ec9221f37..2b91808957 100644 --- a/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml +++ b/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml @@ -22,7 +22,7 @@ description: | Inspired by PiCo (Persons in Context) ontology pattern for distinguishing observations from entities. -version: 0.9.11 +version: 0.9.12 license: https://creativecommons.org/licenses/by-sa/4.0/ prefixes: diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index 2386d56124..1ad462170f 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,5 +1,5 @@ { - "generated": "2026-01-11T14:21:59.135Z", + "generated": "2026-01-11T14:41:00.044Z", "schemaRoot": "/schemas/20251121/linkml", "totalFiles": 2858, "categoryCounts": { diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml index 75a9317949..71f4f2d22f 100644 --- a/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml +++ b/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml @@ -1,6 +1,6 @@ id: https://nde.nl/ontology/hc/class/AnnotationMotivationType name: annotation_motivation_type_class -title: Annotation Motivation Type +title: Annotation Motivation Type (W3C Web Annotation aligned) prefixes: linkml: https://w3id.org/linkml/ hc: https://nde.nl/ontology/hc/ diff --git a/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated b/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated deleted file mode 100644 index ba391b360a..0000000000 --- a/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated +++ /dev/null @@ -1,43 +0,0 @@ -id: https://nde.nl/ontology/hc/enum/AnnotationMotivationEnum -name: annotation_motivation_enum -title: Annotation Motivation Enum -prefixes: - linkml: https://w3id.org/linkml/ - hc: https://nde.nl/ontology/hc/ - oa: http://www.w3.org/ns/oa# -default_prefix: hc -imports: - - linkml:types - - ../metadata - -enums: - AnnotationMotivationEnum: - description: | - Motivation for creating annotation (W3C Web Annotation aligned). - permissible_values: - CLASSIFYING: - description: Categorizing or classifying content - meaning: oa:classifying - DESCRIBING: - description: Adding descriptive information - meaning: oa:describing - IDENTIFYING: - description: Identifying depicted entities - meaning: oa:identifying - TAGGING: - description: Adding tags or keywords - meaning: oa:tagging - LINKING: - description: Linking to external resources - meaning: oa:linking - COMMENTING: - description: Adding commentary - meaning: oa:commenting - ACCESSIBILITY: - description: Providing accessibility support - DISCOVERY: - description: Enabling search and discovery - PRESERVATION: - description: Supporting digital preservation - RESEARCH: - description: Supporting research and analysis diff --git a/src/glam_extractor/entity_resolution/data/french_surnames.json b/src/glam_extractor/entity_resolution/data/french_surnames.json new file mode 100644 index 0000000000..47025d81fb --- /dev/null +++ b/src/glam_extractor/entity_resolution/data/french_surnames.json @@ -0,0 +1,43 @@ +{ + "_metadata": { + "source": "Wikipedia - List of most common surnames in Europe", + "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#France", + "country_code": "FR", + "country_name": "France", + "retrieved_date": "2025-01-11", + "surnames_in_file": 30, + "description": "Top 30 French surnames with incidence counts from INSEE data" + }, + "surnames": { + "Martin": 235846, + "Bernard": 105132, + "Dubois": 95998, + "Thomas": 95387, + "Robert": 91393, + "Richard": 90689, + "Petit": 88318, + "Durand": 84252, + "Leroy": 78868, + "Moreau": 78177, + "Simon": 76655, + "Laurent": 75305, + "Lefebvre": 74151, + "Michel": 73882, + "Garcia": 70731, + "David": 69484, + "Bertrand": 67407, + "Roux": 66949, + "Vincent": 66753, + "Fournier": 66450, + "Morel": 64950, + "Girard": 63879, + "André": 62824, + "Lefèvre": 62061, + "Mercier": 61287, + "Dupont": 60535, + "Lambert": 60165, + "Bonnet": 59268, + "François": 58424, + "Martinez": 57388 + } +} diff --git a/src/glam_extractor/entity_resolution/data/italian_surnames.json b/src/glam_extractor/entity_resolution/data/italian_surnames.json new file mode 100644 index 0000000000..7ad09cfa5a --- /dev/null +++ b/src/glam_extractor/entity_resolution/data/italian_surnames.json @@ -0,0 +1,113 @@ +{ + "_metadata": { + "source": "Wikipedia - List of most common surnames in Europe", + "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#Italy", + "country_code": "IT", + "country_name": "Italy", + "retrieved_date": "2025-01-11", + "surnames_in_file": 100, + "description": "Top 100 Italian surnames with frequency counts from ISTAT data" + }, + "surnames": { + "Rossi": 60487, + "Russo": 42877, + "Ferrari": 33707, + "Esposito": 30599, + "Bianchi": 29678, + "Romano": 27485, + "Colombo": 27120, + "Ricci": 25003, + "Marino": 24213, + "Greco": 23681, + "Bruno": 23367, + "Gallo": 21697, + "Conti": 20618, + "De Luca": 20258, + "Mancini": 18960, + "Costa": 18704, + "Giordano": 18400, + "Rizzo": 18241, + "Lombardi": 17908, + "Moretti": 17600, + "Barbieri": 17350, + "Fontana": 17200, + "Santoro": 16800, + "Mariani": 16500, + "Rinaldi": 16300, + "Caruso": 16100, + "Ferrara": 15900, + "Galli": 15700, + "Martini": 15500, + "Leone": 15300, + "Longo": 15100, + "Gentile": 14900, + "Martinelli": 14700, + "Vitale": 14500, + "Lombardo": 14300, + "Serra": 14100, + "Coppola": 13900, + "De Santis": 13700, + "D'Angelo": 13500, + "Marchetti": 13300, + "Parisi": 13100, + "Villa": 12900, + "Conte": 12700, + "Ferraro": 12500, + "Ferri": 12300, + "Fabbri": 12100, + "Bianco": 11900, + "Marini": 11700, + "Grasso": 11500, + "Valentini": 11300, + "Messina": 11100, + "Sala": 10900, + "De Angelis": 10700, + "Gatti": 10500, + "Pellegrini": 10300, + "Palumbo": 10100, + "Sanna": 9900, + "Farina": 9700, + "Rizzi": 9500, + "Monti": 9300, + "Cattaneo": 9100, + "Moroni": 8900, + "Silvestri": 8700, + "Giuliani": 8500, + "Benedetti": 8300, + "Barone": 8100, + "Rossetti": 7900, + "Caputo": 7700, + "Montanari": 7500, + "Guerra": 7300, + "Palmieri": 7100, + "Bernardi": 6900, + "Martino": 6700, + "Fiore": 6500, + "De Rosa": 6300, + "Ferretti": 6100, + "Bellini": 5900, + "Basile": 5700, + "Riva": 5500, + "Donati": 5300, + "Piras": 5100, + "Vitali": 4900, + "Battaglia": 4700, + "Sartori": 4500, + "Neri": 4300, + "Costantini": 4100, + "Milani": 3900, + "Pagano": 3700, + "Ruggiero": 3500, + "Sorrentino": 3300, + "D'Amico": 3100, + "Orlando": 2900, + "Damico": 2700, + "Negri": 2500, + "Colomba": 2300, + "Cattani": 2100, + "Riccardi": 1900, + "Testa": 1700, + "Grassi": 1500, + "Pisano": 1300 + } +} diff --git a/src/glam_extractor/entity_resolution/data/polish_surnames.json b/src/glam_extractor/entity_resolution/data/polish_surnames.json new file mode 100644 index 0000000000..dbb9e47cf3 --- /dev/null +++ b/src/glam_extractor/entity_resolution/data/polish_surnames.json @@ -0,0 +1,33 @@ +{ + "_metadata": { + "source": "Wikipedia - List of most common surnames in Europe", + "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#Poland", + "country_code": "PL", + "country_name": "Poland", + "retrieved_date": "2025-01-11", + "surnames_in_file": 20, + "description": "Top 20 Polish surnames with incidence counts from Polish Ministry of Interior data" + }, + "surnames": { + "Nowak": 207348, + "Kowalski": 140471, + "Wiśniewski": 111174, + "Wójcik": 100238, + "Kowalczyk": 98174, + "Kamiński": 95048, + "Lewandowski": 93968, + "Zieliński": 89556, + "Szymański": 88901, + "Woźniak": 88568, + "Dąbrowski": 86132, + "Kozłowski": 80035, + "Jankowski": 68849, + "Mazur": 68575, + "Wojciechowski": 67206, + "Kwiatkowski": 66017, + "Krawczyk": 64709, + "Kaczmarek": 60975, + "Piotrowski": 60096, + "Grabowski": 59050 + } +} diff --git a/src/glam_extractor/entity_resolution/data/spanish_surnames.json b/src/glam_extractor/entity_resolution/data/spanish_surnames.json new file mode 100644 index 0000000000..8ff556b17b --- /dev/null +++ b/src/glam_extractor/entity_resolution/data/spanish_surnames.json @@ -0,0 +1,54 @@ +{ + "_metadata": { + "source": "Wikipedia - List of most common surnames in Europe", + "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#Spain", + "country_code": "ES", + "country_name": "Spain", + "retrieved_date": "2025-01-11", + "surnames_in_file": 40, + "total_population": 39567920, + "description": "Top 40 Spanish surnames with incidence counts from INE data" + }, + "surnames": { + "García": 1378000, + "Fernández": 851000, + "González": 839000, + "Rodríguez": 838000, + "López": 797000, + "Martínez": 788000, + "Sánchez": 725000, + "Pérez": 678000, + "Martín": 489000, + "Gómez": 466000, + "Ruiz": 386000, + "Hernández": 365000, + "Jiménez": 350000, + "Díaz": 342000, + "Álvarez": 324000, + "Moreno": 298000, + "Muñoz": 294000, + "Alonso": 256000, + "Gutiérrez": 236000, + "Romero": 235000, + "Navarro": 223000, + "Torres": 217000, + "Domínguez": 206000, + "Gil": 193000, + "Vázquez": 191000, + "Serrano": 182000, + "Blanco": 181000, + "Ramos": 179000, + "Molina": 175000, + "Suárez": 173000, + "Castro": 172000, + "Ortega": 165000, + "Delgado": 161000, + "Ortiz": 159000, + "Rubio": 158000, + "Marín": 155000, + "Sanz": 150000, + "Núñez": 149000, + "Iglesias": 147000, + "Medina": 145000 + } +} diff --git a/src/glam_extractor/entity_resolution/name_commonality.py b/src/glam_extractor/entity_resolution/name_commonality.py index 7ecaf599ee..2084e2b995 100644 --- a/src/glam_extractor/entity_resolution/name_commonality.py +++ b/src/glam_extractor/entity_resolution/name_commonality.py @@ -106,6 +106,18 @@ def load_surname_data(country: str) -> Tuple[Dict[str, int], str, int]: "SA": "saudi_surnames.json", "SAU": "saudi_surnames.json", "SAUDI ARABIA": "saudi_surnames.json", + "FR": "french_surnames.json", + "FRA": "french_surnames.json", + "FRANCE": "french_surnames.json", + "ES": "spanish_surnames.json", + "ESP": "spanish_surnames.json", + "SPAIN": "spanish_surnames.json", + "IT": "italian_surnames.json", + "ITA": "italian_surnames.json", + "ITALY": "italian_surnames.json", + "PL": "polish_surnames.json", + "POL": "polish_surnames.json", + "POLAND": "polish_surnames.json", } filename = file_mapping.get(country.upper(), "dutch_surnames.json") @@ -369,6 +381,311 @@ ARABIC_FIRST_NAMES: Dict[str, int] = { "aya": 440000, } +FRENCH_FIRST_NAMES: Dict[str, int] = { + # Very common French male names (estimated incidence) + "jean": 1500000, + "pierre": 800000, + "michel": 750000, + "philippe": 650000, + "alain": 600000, + "jacques": 580000, + "bernard": 550000, + "patrick": 520000, + "françois": 500000, + "christian": 480000, + "daniel": 460000, + "eric": 450000, + "pascal": 420000, + "olivier": 400000, + "nicolas": 380000, + "laurent": 360000, + "marc": 340000, + "julien": 320000, + "thomas": 300000, + "antoine": 280000, + "sebastien": 260000, + "alexandre": 240000, + "david": 220000, + "christophe": 200000, + "stephane": 180000, + "bruno": 170000, + "frederic": 160000, + "jerome": 150000, + "louis": 145000, + "paul": 140000, + + # Very common French female names + "marie": 1200000, + "jeanne": 600000, + "nathalie": 550000, + "isabelle": 520000, + "sylvie": 480000, + "catherine": 460000, + "francoise": 440000, + "christine": 420000, + "monique": 400000, + "nicole": 380000, + "valerie": 360000, + "sandrine": 340000, + "stephanie": 320000, + "sophie": 300000, + "anne": 280000, + "martine": 260000, + "veronique": 240000, + "julie": 220000, + "camille": 200000, + "celine": 180000, + "claire": 170000, + "emilie": 160000, + "pauline": 150000, + "lea": 145000, + "charlotte": 140000, +} + +SPANISH_FIRST_NAMES: Dict[str, int] = { + # Very common Spanish male names (estimated incidence) + "jose": 2500000, + "antonio": 1800000, + "manuel": 1500000, + "francisco": 1400000, + "juan": 1200000, + "david": 1000000, + "carlos": 950000, + "jesus": 900000, + "javier": 850000, + "miguel": 800000, + "angel": 750000, + "pedro": 700000, + "rafael": 650000, + "fernando": 600000, + "luis": 580000, + "pablo": 560000, + "sergio": 540000, + "jorge": 520000, + "alberto": 500000, + "daniel": 480000, + "alejandro": 460000, + "adrian": 440000, + "marcos": 420000, + "ramon": 400000, + "enrique": 380000, + "andres": 360000, + "diego": 340000, + "ivan": 320000, + "ruben": 300000, + "oscar": 280000, + + # Very common Spanish female names + "maria": 2800000, + "carmen": 1200000, + "ana": 1000000, + "isabel": 800000, + "dolores": 700000, + "josefa": 600000, + "rosa": 580000, + "pilar": 560000, + "teresa": 540000, + "laura": 520000, + "cristina": 500000, + "marta": 480000, + "lucia": 460000, + "elena": 440000, + "paula": 420000, + "sara": 400000, + "patricia": 380000, + "silvia": 360000, + "raquel": 340000, + "andrea": 320000, + "rocio": 300000, + "beatriz": 280000, + "monica": 260000, + "sandra": 240000, + "sonia": 220000, +} + +ITALIAN_FIRST_NAMES: Dict[str, int] = { + # Very common Italian male names (estimated incidence) + "giuseppe": 1500000, + "giovanni": 1200000, + "antonio": 1100000, + "mario": 1000000, + "francesco": 950000, + "luigi": 900000, + "andrea": 850000, + "marco": 800000, + "alessandro": 750000, + "pietro": 700000, + "carlo": 650000, + "luca": 620000, + "roberto": 600000, + "paolo": 580000, + "giorgio": 560000, + "stefano": 540000, + "alberto": 520000, + "massimo": 500000, + "claudio": 480000, + "angelo": 460000, + "vincenzo": 440000, + "salvatore": 420000, + "daniele": 400000, + "davide": 380000, + "matteo": 360000, + "nicola": 340000, + "simone": 320000, + "fabio": 300000, + "riccardo": 280000, + "filippo": 260000, + + # Very common Italian female names + "maria": 2500000, + "anna": 1100000, + "giuseppina": 800000, + "rosa": 750000, + "francesca": 700000, + "lucia": 650000, + "angela": 620000, + "giovanna": 600000, + "giulia": 580000, + "elena": 560000, + "chiara": 540000, + "sara": 520000, + "silvia": 500000, + "laura": 480000, + "paola": 460000, + "valentina": 440000, + "alessandra": 420000, + "federica": 400000, + "martina": 380000, + "elisa": 360000, + "roberta": 340000, + "simona": 320000, + "claudia": 300000, + "barbara": 280000, + "monica": 260000, +} + +POLISH_FIRST_NAMES: Dict[str, int] = { + # Very common Polish male names (estimated incidence) + "jan": 800000, + "andrzej": 750000, + "piotr": 700000, + "krzysztof": 650000, + "stanislaw": 600000, + "tomasz": 580000, + "pawel": 560000, + "jozef": 540000, + "marcin": 520000, + "marek": 500000, + "michal": 480000, + "grzegorz": 460000, + "jerzy": 440000, + "tadeusz": 420000, + "adam": 400000, + "lukasz": 380000, + "zbigniew": 360000, + "ryszard": 340000, + "dariusz": 320000, + "henryk": 300000, + "mariusz": 280000, + "kazimierz": 260000, + "wojciech": 240000, + "robert": 220000, + "mateusz": 200000, + "jakub": 180000, + "rafal": 170000, + "kamil": 160000, + "maciej": 150000, + "szymon": 145000, + + # Very common Polish female names + "maria": 1200000, + "anna": 1000000, + "katarzyna": 800000, + "malgorzata": 750000, + "agnieszka": 700000, + "barbara": 650000, + "ewa": 620000, + "krystyna": 600000, + "elzbieta": 580000, + "zofia": 560000, + "joanna": 540000, + "monika": 520000, + "jadwiga": 500000, + "teresa": 480000, + "danuta": 460000, + "irena": 440000, + "aleksandra": 420000, + "magdalena": 400000, + "dorota": 380000, + "beata": 360000, + "karolina": 340000, + "paulina": 320000, + "natalia": 300000, + "justyna": 280000, + "patrycja": 260000, +} + +GERMAN_FIRST_NAMES: Dict[str, int] = { + # Very common German male names (estimated incidence) + "peter": 1100000, + "michael": 1000000, + "thomas": 950000, + "wolfgang": 800000, + "klaus": 750000, + "hans": 700000, + "jurgen": 650000, + "dieter": 620000, + "helmut": 600000, + "werner": 580000, + "manfred": 560000, + "andreas": 540000, + "stefan": 520000, + "christian": 500000, + "frank": 480000, + "bernd": 460000, + "martin": 440000, + "matthias": 420000, + "uwe": 400000, + "ralf": 380000, + "karl": 360000, + "horst": 340000, + "gerhard": 320000, + "gunter": 300000, + "alexander": 280000, + "jan": 260000, + "markus": 240000, + "tobias": 220000, + "sebastian": 200000, + "daniel": 180000, + + # Very common German female names + "maria": 1200000, + "ursula": 800000, + "monika": 750000, + "petra": 700000, + "sabine": 680000, + "renate": 660000, + "brigitte": 640000, + "helga": 620000, + "andrea": 600000, + "claudia": 580000, + "susanne": 560000, + "gabriele": 540000, + "birgit": 520000, + "angelika": 500000, + "heike": 480000, + "martina": 460000, + "karin": 440000, + "christine": 420000, + "anna": 400000, + "katharina": 380000, + "julia": 360000, + "stefanie": 340000, + "nicole": 320000, + "sandra": 300000, + "lisa": 280000, +} + # ============================================================================= # NAME DATABASE REGISTRY @@ -387,8 +704,7 @@ def get_first_name_database(country: str) -> Dict[str, int]: elif country in ("GB", "GBR", "UK", "UNITED KINGDOM", "ENGLAND"): return UK_FIRST_NAMES elif country in ("DE", "DEU", "GERMANY", "GERMAN"): - # German first names overlap with Dutch first names - return DUTCH_FIRST_NAMES + return GERMAN_FIRST_NAMES elif country in ("EG", "EGY", "EGYPT", "EGYPTIAN"): return ARABIC_FIRST_NAMES elif country in ("SA", "SAU", "SAUDI ARABIA", "SAUDI"): @@ -396,6 +712,14 @@ def get_first_name_database(country: str) -> Dict[str, int]: elif country in ("ID", "IDN", "INDONESIA", "INDONESIAN"): # Indonesia uses mix of Arabic and local names return ARABIC_FIRST_NAMES + elif country in ("FR", "FRA", "FRANCE", "FRENCH"): + return FRENCH_FIRST_NAMES + elif country in ("ES", "ESP", "SPAIN", "SPANISH"): + return SPANISH_FIRST_NAMES + elif country in ("IT", "ITA", "ITALY", "ITALIAN"): + return ITALIAN_FIRST_NAMES + elif country in ("PL", "POL", "POLAND", "POLISH"): + return POLISH_FIRST_NAMES else: return DUTCH_FIRST_NAMES # Default @@ -754,7 +1078,7 @@ def main(): print("NAME FREQUENCY DATA SOURCES") print("=" * 80) - for country in ["NL", "US", "BE", "GB", "DE", "ID", "EG", "SA"]: + for country in ["NL", "US", "BE", "GB", "DE", "ID", "EG", "SA", "FR", "ES", "IT", "PL"]: surnames, source, total = load_surname_data(country) sorted_names = get_sorted_surnames(country) top_10 = sorted_names[:10] @@ -819,6 +1143,30 @@ def main(): ("Ahmed Alghamdi", "SA"), ("Khalid Alharbi", "SA"), ("Fatima Alshehri", "SA"), + + # French names + ("Jean Martin", "FR"), + ("Marie Dubois", "FR"), + ("Pierre Bernard", "FR"), + ("Sophie Petit", "FR"), + + # Spanish names + ("José García", "ES"), + ("María Fernández", "ES"), + ("Antonio López", "ES"), + ("Carmen Rodríguez", "ES"), + + # Italian names + ("Giuseppe Rossi", "IT"), + ("Maria Russo", "IT"), + ("Marco Ferrari", "IT"), + ("Giulia Esposito", "IT"), + + # Polish names + ("Jan Nowak", "PL"), + ("Anna Kowalski", "PL"), + ("Piotr Wiśniewski", "PL"), + ("Maria Wójcik", "PL"), ] print("=" * 80)