From 66ab2908d0bf2ce11c1d492b3562bdc83616669b Mon Sep 17 00:00:00 2001
From: kempersc <sckemper@mailfence.com>
Date: Sun, 11 Jan 2026 16:03:18 +0100
Subject: [PATCH] fix: remove deprecated AnnotationMotivationEnum, add European
 surname data

- Move deprecated AnnotationMotivationEnum to archive-deprecated/ (outside served paths)
- Add French, Italian, Polish, Spanish surname datasets for entity resolution
- Update name_commonality.py with expanded European surname detection
- Triggers GitOps workflow to test Forgejo Actions runner
---
 .../AnnotationMotivationEnum.yaml.deprecated  |   0
 .../linkml/01_custodian_name_modular.yaml     |   2 +-
 .../schemas/20251121/linkml/manifest.json     |   2 +-
 .../classes/AnnotationMotivationType.yaml     |   2 +-
 .../AnnotationMotivationEnum.yaml.deprecated  |  43 ---
 .../data/french_surnames.json                 |  43 +++
 .../data/italian_surnames.json                | 113 ++++++
 .../data/polish_surnames.json                 |  33 ++
 .../data/spanish_surnames.json                |  54 +++
 .../entity_resolution/name_commonality.py     | 354 +++++++++++++++++-
 10 files changed, 597 insertions(+), 49 deletions(-)
 rename {frontend/public/schemas/20251121/linkml/modules/enums/archive => archive-deprecated/enums}/AnnotationMotivationEnum.yaml.deprecated (100%)
 delete mode 100644 schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated
 create mode 100644 src/glam_extractor/entity_resolution/data/french_surnames.json
 create mode 100644 src/glam_extractor/entity_resolution/data/italian_surnames.json
 create mode 100644 src/glam_extractor/entity_resolution/data/polish_surnames.json
 create mode 100644 src/glam_extractor/entity_resolution/data/spanish_surnames.json

diff --git a/frontend/public/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated b/archive-deprecated/enums/AnnotationMotivationEnum.yaml.deprecated
similarity index 100%
rename from frontend/public/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated
rename to archive-deprecated/enums/AnnotationMotivationEnum.yaml.deprecated
diff --git a/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml b/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml
index 5ec9221f37..2b91808957 100644
--- a/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml
+++ b/frontend/public/schemas/20251121/linkml/01_custodian_name_modular.yaml
@@ -22,7 +22,7 @@ description: |
   
   Inspired by PiCo (Persons in Context) ontology pattern for distinguishing observations from entities.
 
-version: 0.9.11
+version: 0.9.12
 license: https://creativecommons.org/licenses/by-sa/4.0/
 
 prefixes:
diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json
index 2386d56124..1ad462170f 100644
--- a/frontend/public/schemas/20251121/linkml/manifest.json
+++ b/frontend/public/schemas/20251121/linkml/manifest.json
@@ -1,5 +1,5 @@
 {
-  "generated": "2026-01-11T14:21:59.135Z",
+  "generated": "2026-01-11T14:41:00.044Z",
   "schemaRoot": "/schemas/20251121/linkml",
   "totalFiles": 2858,
   "categoryCounts": {
diff --git a/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml b/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml
index 75a9317949..71f4f2d22f 100644
--- a/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml
+++ b/frontend/public/schemas/20251121/linkml/modules/classes/AnnotationMotivationType.yaml
@@ -1,6 +1,6 @@
 id: https://nde.nl/ontology/hc/class/AnnotationMotivationType
 name: annotation_motivation_type_class
-title: Annotation Motivation Type
+title: Annotation Motivation Type (W3C Web Annotation aligned)
 prefixes:
   linkml: https://w3id.org/linkml/
   hc: https://nde.nl/ontology/hc/
diff --git a/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated b/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated
deleted file mode 100644
index ba391b360a..0000000000
--- a/schemas/20251121/linkml/modules/enums/archive/AnnotationMotivationEnum.yaml.deprecated
+++ /dev/null
@@ -1,43 +0,0 @@
-id: https://nde.nl/ontology/hc/enum/AnnotationMotivationEnum
-name: annotation_motivation_enum
-title: Annotation Motivation Enum
-prefixes:
-  linkml: https://w3id.org/linkml/
-  hc: https://nde.nl/ontology/hc/
-  oa: http://www.w3.org/ns/oa#
-default_prefix: hc
-imports:
-  - linkml:types
-  - ../metadata
-
-enums:
-  AnnotationMotivationEnum:
-    description: |
-      Motivation for creating annotation (W3C Web Annotation aligned).
-    permissible_values:
-      CLASSIFYING:
-        description: Categorizing or classifying content
-        meaning: oa:classifying
-      DESCRIBING:
-        description: Adding descriptive information
-        meaning: oa:describing
-      IDENTIFYING:
-        description: Identifying depicted entities
-        meaning: oa:identifying
-      TAGGING:
-        description: Adding tags or keywords
-        meaning: oa:tagging
-      LINKING:
-        description: Linking to external resources
-        meaning: oa:linking
-      COMMENTING:
-        description: Adding commentary
-        meaning: oa:commenting
-      ACCESSIBILITY:
-        description: Providing accessibility support
-      DISCOVERY:
-        description: Enabling search and discovery
-      PRESERVATION:
-        description: Supporting digital preservation
-      RESEARCH:
-        description: Supporting research and analysis
diff --git a/src/glam_extractor/entity_resolution/data/french_surnames.json b/src/glam_extractor/entity_resolution/data/french_surnames.json
new file mode 100644
index 0000000000..47025d81fb
--- /dev/null
+++ b/src/glam_extractor/entity_resolution/data/french_surnames.json
@@ -0,0 +1,43 @@
+{
+  "_metadata": {
+    "source": "Wikipedia - List of most common surnames in Europe",
+    "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#France",
+    "country_code": "FR",
+    "country_name": "France",
+    "retrieved_date": "2025-01-11",
+    "surnames_in_file": 30,
+    "description": "Top 30 French surnames with incidence counts from INSEE data"
+  },
+  "surnames": {
+    "Martin": 235846,
+    "Bernard": 105132,
+    "Dubois": 95998,
+    "Thomas": 95387,
+    "Robert": 91393,
+    "Richard": 90689,
+    "Petit": 88318,
+    "Durand": 84252,
+    "Leroy": 78868,
+    "Moreau": 78177,
+    "Simon": 76655,
+    "Laurent": 75305,
+    "Lefebvre": 74151,
+    "Michel": 73882,
+    "Garcia": 70731,
+    "David": 69484,
+    "Bertrand": 67407,
+    "Roux": 66949,
+    "Vincent": 66753,
+    "Fournier": 66450,
+    "Morel": 64950,
+    "Girard": 63879,
+    "André": 62824,
+    "Lefèvre": 62061,
+    "Mercier": 61287,
+    "Dupont": 60535,
+    "Lambert": 60165,
+    "Bonnet": 59268,
+    "François": 58424,
+    "Martinez": 57388
+  }
+}
diff --git a/src/glam_extractor/entity_resolution/data/italian_surnames.json b/src/glam_extractor/entity_resolution/data/italian_surnames.json
new file mode 100644
index 0000000000..7ad09cfa5a
--- /dev/null
+++ b/src/glam_extractor/entity_resolution/data/italian_surnames.json
@@ -0,0 +1,113 @@
+{
+  "_metadata": {
+    "source": "Wikipedia - List of most common surnames in Europe",
+    "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#Italy",
+    "country_code": "IT",
+    "country_name": "Italy",
+    "retrieved_date": "2025-01-11",
+    "surnames_in_file": 100,
+    "description": "Top 100 Italian surnames with frequency counts from ISTAT data"
+  },
+  "surnames": {
+    "Rossi": 60487,
+    "Russo": 42877,
+    "Ferrari": 33707,
+    "Esposito": 30599,
+    "Bianchi": 29678,
+    "Romano": 27485,
+    "Colombo": 27120,
+    "Ricci": 25003,
+    "Marino": 24213,
+    "Greco": 23681,
+    "Bruno": 23367,
+    "Gallo": 21697,
+    "Conti": 20618,
+    "De Luca": 20258,
+    "Mancini": 18960,
+    "Costa": 18704,
+    "Giordano": 18400,
+    "Rizzo": 18241,
+    "Lombardi": 17908,
+    "Moretti": 17600,
+    "Barbieri": 17350,
+    "Fontana": 17200,
+    "Santoro": 16800,
+    "Mariani": 16500,
+    "Rinaldi": 16300,
+    "Caruso": 16100,
+    "Ferrara": 15900,
+    "Galli": 15700,
+    "Martini": 15500,
+    "Leone": 15300,
+    "Longo": 15100,
+    "Gentile": 14900,
+    "Martinelli": 14700,
+    "Vitale": 14500,
+    "Lombardo": 14300,
+    "Serra": 14100,
+    "Coppola": 13900,
+    "De Santis": 13700,
+    "D'Angelo": 13500,
+    "Marchetti": 13300,
+    "Parisi": 13100,
+    "Villa": 12900,
+    "Conte": 12700,
+    "Ferraro": 12500,
+    "Ferri": 12300,
+    "Fabbri": 12100,
+    "Bianco": 11900,
+    "Marini": 11700,
+    "Grasso": 11500,
+    "Valentini": 11300,
+    "Messina": 11100,
+    "Sala": 10900,
+    "De Angelis": 10700,
+    "Gatti": 10500,
+    "Pellegrini": 10300,
+    "Palumbo": 10100,
+    "Sanna": 9900,
+    "Farina": 9700,
+    "Rizzi": 9500,
+    "Monti": 9300,
+    "Cattaneo": 9100,
+    "Moroni": 8900,
+    "Silvestri": 8700,
+    "Giuliani": 8500,
+    "Benedetti": 8300,
+    "Barone": 8100,
+    "Rossetti": 7900,
+    "Caputo": 7700,
+    "Montanari": 7500,
+    "Guerra": 7300,
+    "Palmieri": 7100,
+    "Bernardi": 6900,
+    "Martino": 6700,
+    "Fiore": 6500,
+    "De Rosa": 6300,
+    "Ferretti": 6100,
+    "Bellini": 5900,
+    "Basile": 5700,
+    "Riva": 5500,
+    "Donati": 5300,
+    "Piras": 5100,
+    "Vitali": 4900,
+    "Battaglia": 4700,
+    "Sartori": 4500,
+    "Neri": 4300,
+    "Costantini": 4100,
+    "Milani": 3900,
+    "Pagano": 3700,
+    "Ruggiero": 3500,
+    "Sorrentino": 3300,
+    "D'Amico": 3100,
+    "Orlando": 2900,
+    "Damico": 2700,
+    "Negri": 2500,
+    "Colomba": 2300,
+    "Cattani": 2100,
+    "Riccardi": 1900,
+    "Testa": 1700,
+    "Grassi": 1500,
+    "Pisano": 1300
+  }
+}
diff --git a/src/glam_extractor/entity_resolution/data/polish_surnames.json b/src/glam_extractor/entity_resolution/data/polish_surnames.json
new file mode 100644
index 0000000000..dbb9e47cf3
--- /dev/null
+++ b/src/glam_extractor/entity_resolution/data/polish_surnames.json
@@ -0,0 +1,33 @@
+{
+  "_metadata": {
+    "source": "Wikipedia - List of most common surnames in Europe",
+    "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#Poland",
+    "country_code": "PL",
+    "country_name": "Poland",
+    "retrieved_date": "2025-01-11",
+    "surnames_in_file": 20,
+    "description": "Top 20 Polish surnames with incidence counts from Polish Ministry of Interior data"
+  },
+  "surnames": {
+    "Nowak": 207348,
+    "Kowalski": 140471,
+    "Wiśniewski": 111174,
+    "Wójcik": 100238,
+    "Kowalczyk": 98174,
+    "Kamiński": 95048,
+    "Lewandowski": 93968,
+    "Zieliński": 89556,
+    "Szymański": 88901,
+    "Woźniak": 88568,
+    "Dąbrowski": 86132,
+    "Kozłowski": 80035,
+    "Jankowski": 68849,
+    "Mazur": 68575,
+    "Wojciechowski": 67206,
+    "Kwiatkowski": 66017,
+    "Krawczyk": 64709,
+    "Kaczmarek": 60975,
+    "Piotrowski": 60096,
+    "Grabowski": 59050
+  }
+}
diff --git a/src/glam_extractor/entity_resolution/data/spanish_surnames.json b/src/glam_extractor/entity_resolution/data/spanish_surnames.json
new file mode 100644
index 0000000000..8ff556b17b
--- /dev/null
+++ b/src/glam_extractor/entity_resolution/data/spanish_surnames.json
@@ -0,0 +1,54 @@
+{
+  "_metadata": {
+    "source": "Wikipedia - List of most common surnames in Europe",
+    "source_url": "https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe#Spain",
+    "country_code": "ES",
+    "country_name": "Spain",
+    "retrieved_date": "2025-01-11",
+    "surnames_in_file": 40,
+    "total_population": 39567920,
+    "description": "Top 40 Spanish surnames with incidence counts from INE data"
+  },
+  "surnames": {
+    "García": 1378000,
+    "Fernández": 851000,
+    "González": 839000,
+    "Rodríguez": 838000,
+    "López": 797000,
+    "Martínez": 788000,
+    "Sánchez": 725000,
+    "Pérez": 678000,
+    "Martín": 489000,
+    "Gómez": 466000,
+    "Ruiz": 386000,
+    "Hernández": 365000,
+    "Jiménez": 350000,
+    "Díaz": 342000,
+    "Álvarez": 324000,
+    "Moreno": 298000,
+    "Muñoz": 294000,
+    "Alonso": 256000,
+    "Gutiérrez": 236000,
+    "Romero": 235000,
+    "Navarro": 223000,
+    "Torres": 217000,
+    "Domínguez": 206000,
+    "Gil": 193000,
+    "Vázquez": 191000,
+    "Serrano": 182000,
+    "Blanco": 181000,
+    "Ramos": 179000,
+    "Molina": 175000,
+    "Suárez": 173000,
+    "Castro": 172000,
+    "Ortega": 165000,
+    "Delgado": 161000,
+    "Ortiz": 159000,
+    "Rubio": 158000,
+    "Marín": 155000,
+    "Sanz": 150000,
+    "Núñez": 149000,
+    "Iglesias": 147000,
+    "Medina": 145000
+  }
+}
diff --git a/src/glam_extractor/entity_resolution/name_commonality.py b/src/glam_extractor/entity_resolution/name_commonality.py
index 7ecaf599ee..2084e2b995 100644
--- a/src/glam_extractor/entity_resolution/name_commonality.py
+++ b/src/glam_extractor/entity_resolution/name_commonality.py
@@ -106,6 +106,18 @@ def load_surname_data(country: str) -> Tuple[Dict[str, int], str, int]:
         "SA": "saudi_surnames.json",
         "SAU": "saudi_surnames.json",
         "SAUDI ARABIA": "saudi_surnames.json",
+        "FR": "french_surnames.json",
+        "FRA": "french_surnames.json",
+        "FRANCE": "french_surnames.json",
+        "ES": "spanish_surnames.json",
+        "ESP": "spanish_surnames.json",
+        "SPAIN": "spanish_surnames.json",
+        "IT": "italian_surnames.json",
+        "ITA": "italian_surnames.json",
+        "ITALY": "italian_surnames.json",
+        "PL": "polish_surnames.json",
+        "POL": "polish_surnames.json",
+        "POLAND": "polish_surnames.json",
     }
     
     filename = file_mapping.get(country.upper(), "dutch_surnames.json")
@@ -369,6 +381,311 @@ ARABIC_FIRST_NAMES: Dict[str, int] = {
     "aya": 440000,
 }
 
+FRENCH_FIRST_NAMES: Dict[str, int] = {
+    # Very common French male names (estimated incidence)
+    "jean": 1500000,
+    "pierre": 800000,
+    "michel": 750000,
+    "philippe": 650000,
+    "alain": 600000,
+    "jacques": 580000,
+    "bernard": 550000,
+    "patrick": 520000,
+    "françois": 500000,
+    "christian": 480000,
+    "daniel": 460000,
+    "eric": 450000,
+    "pascal": 420000,
+    "olivier": 400000,
+    "nicolas": 380000,
+    "laurent": 360000,
+    "marc": 340000,
+    "julien": 320000,
+    "thomas": 300000,
+    "antoine": 280000,
+    "sebastien": 260000,
+    "alexandre": 240000,
+    "david": 220000,
+    "christophe": 200000,
+    "stephane": 180000,
+    "bruno": 170000,
+    "frederic": 160000,
+    "jerome": 150000,
+    "louis": 145000,
+    "paul": 140000,
+    
+    # Very common French female names
+    "marie": 1200000,
+    "jeanne": 600000,
+    "nathalie": 550000,
+    "isabelle": 520000,
+    "sylvie": 480000,
+    "catherine": 460000,
+    "francoise": 440000,
+    "christine": 420000,
+    "monique": 400000,
+    "nicole": 380000,
+    "valerie": 360000,
+    "sandrine": 340000,
+    "stephanie": 320000,
+    "sophie": 300000,
+    "anne": 280000,
+    "martine": 260000,
+    "veronique": 240000,
+    "julie": 220000,
+    "camille": 200000,
+    "celine": 180000,
+    "claire": 170000,
+    "emilie": 160000,
+    "pauline": 150000,
+    "lea": 145000,
+    "charlotte": 140000,
+}
+
+SPANISH_FIRST_NAMES: Dict[str, int] = {
+    # Very common Spanish male names (estimated incidence)
+    "jose": 2500000,
+    "antonio": 1800000,
+    "manuel": 1500000,
+    "francisco": 1400000,
+    "juan": 1200000,
+    "david": 1000000,
+    "carlos": 950000,
+    "jesus": 900000,
+    "javier": 850000,
+    "miguel": 800000,
+    "angel": 750000,
+    "pedro": 700000,
+    "rafael": 650000,
+    "fernando": 600000,
+    "luis": 580000,
+    "pablo": 560000,
+    "sergio": 540000,
+    "jorge": 520000,
+    "alberto": 500000,
+    "daniel": 480000,
+    "alejandro": 460000,
+    "adrian": 440000,
+    "marcos": 420000,
+    "ramon": 400000,
+    "enrique": 380000,
+    "andres": 360000,
+    "diego": 340000,
+    "ivan": 320000,
+    "ruben": 300000,
+    "oscar": 280000,
+    
+    # Very common Spanish female names
+    "maria": 2800000,
+    "carmen": 1200000,
+    "ana": 1000000,
+    "isabel": 800000,
+    "dolores": 700000,
+    "josefa": 600000,
+    "rosa": 580000,
+    "pilar": 560000,
+    "teresa": 540000,
+    "laura": 520000,
+    "cristina": 500000,
+    "marta": 480000,
+    "lucia": 460000,
+    "elena": 440000,
+    "paula": 420000,
+    "sara": 400000,
+    "patricia": 380000,
+    "silvia": 360000,
+    "raquel": 340000,
+    "andrea": 320000,
+    "rocio": 300000,
+    "beatriz": 280000,
+    "monica": 260000,
+    "sandra": 240000,
+    "sonia": 220000,
+}
+
+ITALIAN_FIRST_NAMES: Dict[str, int] = {
+    # Very common Italian male names (estimated incidence)
+    "giuseppe": 1500000,
+    "giovanni": 1200000,
+    "antonio": 1100000,
+    "mario": 1000000,
+    "francesco": 950000,
+    "luigi": 900000,
+    "andrea": 850000,
+    "marco": 800000,
+    "alessandro": 750000,
+    "pietro": 700000,
+    "carlo": 650000,
+    "luca": 620000,
+    "roberto": 600000,
+    "paolo": 580000,
+    "giorgio": 560000,
+    "stefano": 540000,
+    "alberto": 520000,
+    "massimo": 500000,
+    "claudio": 480000,
+    "angelo": 460000,
+    "vincenzo": 440000,
+    "salvatore": 420000,
+    "daniele": 400000,
+    "davide": 380000,
+    "matteo": 360000,
+    "nicola": 340000,
+    "simone": 320000,
+    "fabio": 300000,
+    "riccardo": 280000,
+    "filippo": 260000,
+    
+    # Very common Italian female names
+    "maria": 2500000,
+    "anna": 1100000,
+    "giuseppina": 800000,
+    "rosa": 750000,
+    "francesca": 700000,
+    "lucia": 650000,
+    "angela": 620000,
+    "giovanna": 600000,
+    "giulia": 580000,
+    "elena": 560000,
+    "chiara": 540000,
+    "sara": 520000,
+    "silvia": 500000,
+    "laura": 480000,
+    "paola": 460000,
+    "valentina": 440000,
+    "alessandra": 420000,
+    "federica": 400000,
+    "martina": 380000,
+    "elisa": 360000,
+    "roberta": 340000,
+    "simona": 320000,
+    "claudia": 300000,
+    "barbara": 280000,
+    "monica": 260000,
+}
+
+POLISH_FIRST_NAMES: Dict[str, int] = {
+    # Very common Polish male names (estimated incidence)
+    "jan": 800000,
+    "andrzej": 750000,
+    "piotr": 700000,
+    "krzysztof": 650000,
+    "stanislaw": 600000,
+    "tomasz": 580000,
+    "pawel": 560000,
+    "jozef": 540000,
+    "marcin": 520000,
+    "marek": 500000,
+    "michal": 480000,
+    "grzegorz": 460000,
+    "jerzy": 440000,
+    "tadeusz": 420000,
+    "adam": 400000,
+    "lukasz": 380000,
+    "zbigniew": 360000,
+    "ryszard": 340000,
+    "dariusz": 320000,
+    "henryk": 300000,
+    "mariusz": 280000,
+    "kazimierz": 260000,
+    "wojciech": 240000,
+    "robert": 220000,
+    "mateusz": 200000,
+    "jakub": 180000,
+    "rafal": 170000,
+    "kamil": 160000,
+    "maciej": 150000,
+    "szymon": 145000,
+    
+    # Very common Polish female names
+    "maria": 1200000,
+    "anna": 1000000,
+    "katarzyna": 800000,
+    "malgorzata": 750000,
+    "agnieszka": 700000,
+    "barbara": 650000,
+    "ewa": 620000,
+    "krystyna": 600000,
+    "elzbieta": 580000,
+    "zofia": 560000,
+    "joanna": 540000,
+    "monika": 520000,
+    "jadwiga": 500000,
+    "teresa": 480000,
+    "danuta": 460000,
+    "irena": 440000,
+    "aleksandra": 420000,
+    "magdalena": 400000,
+    "dorota": 380000,
+    "beata": 360000,
+    "karolina": 340000,
+    "paulina": 320000,
+    "natalia": 300000,
+    "justyna": 280000,
+    "patrycja": 260000,
+}
+
+GERMAN_FIRST_NAMES: Dict[str, int] = {
+    # Very common German male names (estimated incidence)
+    "peter": 1100000,
+    "michael": 1000000,
+    "thomas": 950000,
+    "wolfgang": 800000,
+    "klaus": 750000,
+    "hans": 700000,
+    "jurgen": 650000,
+    "dieter": 620000,
+    "helmut": 600000,
+    "werner": 580000,
+    "manfred": 560000,
+    "andreas": 540000,
+    "stefan": 520000,
+    "christian": 500000,
+    "frank": 480000,
+    "bernd": 460000,
+    "martin": 440000,
+    "matthias": 420000,
+    "uwe": 400000,
+    "ralf": 380000,
+    "karl": 360000,
+    "horst": 340000,
+    "gerhard": 320000,
+    "gunter": 300000,
+    "alexander": 280000,
+    "jan": 260000,
+    "markus": 240000,
+    "tobias": 220000,
+    "sebastian": 200000,
+    "daniel": 180000,
+    
+    # Very common German female names
+    "maria": 1200000,
+    "ursula": 800000,
+    "monika": 750000,
+    "petra": 700000,
+    "sabine": 680000,
+    "renate": 660000,
+    "brigitte": 640000,
+    "helga": 620000,
+    "andrea": 600000,
+    "claudia": 580000,
+    "susanne": 560000,
+    "gabriele": 540000,
+    "birgit": 520000,
+    "angelika": 500000,
+    "heike": 480000,
+    "martina": 460000,
+    "karin": 440000,
+    "christine": 420000,
+    "anna": 400000,
+    "katharina": 380000,
+    "julia": 360000,
+    "stefanie": 340000,
+    "nicole": 320000,
+    "sandra": 300000,
+    "lisa": 280000,
+}
+
 
 # =============================================================================
 # NAME DATABASE REGISTRY
@@ -387,8 +704,7 @@ def get_first_name_database(country: str) -> Dict[str, int]:
     elif country in ("GB", "GBR", "UK", "UNITED KINGDOM", "ENGLAND"):
         return UK_FIRST_NAMES
     elif country in ("DE", "DEU", "GERMANY", "GERMAN"):
-        # German first names overlap with Dutch first names
-        return DUTCH_FIRST_NAMES
+        return GERMAN_FIRST_NAMES
     elif country in ("EG", "EGY", "EGYPT", "EGYPTIAN"):
         return ARABIC_FIRST_NAMES
     elif country in ("SA", "SAU", "SAUDI ARABIA", "SAUDI"):
@@ -396,6 +712,14 @@ def get_first_name_database(country: str) -> Dict[str, int]:
     elif country in ("ID", "IDN", "INDONESIA", "INDONESIAN"):
         # Indonesia uses mix of Arabic and local names
         return ARABIC_FIRST_NAMES
+    elif country in ("FR", "FRA", "FRANCE", "FRENCH"):
+        return FRENCH_FIRST_NAMES
+    elif country in ("ES", "ESP", "SPAIN", "SPANISH"):
+        return SPANISH_FIRST_NAMES
+    elif country in ("IT", "ITA", "ITALY", "ITALIAN"):
+        return ITALIAN_FIRST_NAMES
+    elif country in ("PL", "POL", "POLAND", "POLISH"):
+        return POLISH_FIRST_NAMES
     else:
         return DUTCH_FIRST_NAMES  # Default
 
@@ -754,7 +1078,7 @@ def main():
         print("NAME FREQUENCY DATA SOURCES")
         print("=" * 80)
         
-        for country in ["NL", "US", "BE", "GB", "DE", "ID", "EG", "SA"]:
+        for country in ["NL", "US", "BE", "GB", "DE", "ID", "EG", "SA", "FR", "ES", "IT", "PL"]:
             surnames, source, total = load_surname_data(country)
             sorted_names = get_sorted_surnames(country)
             top_10 = sorted_names[:10]
@@ -819,6 +1143,30 @@ def main():
             ("Ahmed Alghamdi", "SA"),
             ("Khalid Alharbi", "SA"),
             ("Fatima Alshehri", "SA"),
+            
+            # French names
+            ("Jean Martin", "FR"),
+            ("Marie Dubois", "FR"),
+            ("Pierre Bernard", "FR"),
+            ("Sophie Petit", "FR"),
+            
+            # Spanish names
+            ("José García", "ES"),
+            ("María Fernández", "ES"),
+            ("Antonio López", "ES"),
+            ("Carmen Rodríguez", "ES"),
+            
+            # Italian names
+            ("Giuseppe Rossi", "IT"),
+            ("Maria Russo", "IT"),
+            ("Marco Ferrari", "IT"),
+            ("Giulia Esposito", "IT"),
+            
+            # Polish names
+            ("Jan Nowak", "PL"),
+            ("Anna Kowalski", "PL"),
+            ("Piotr Wiśniewski", "PL"),
+            ("Maria Wójcik", "PL"),
         ]
         
         print("=" * 80)