From 833bb568336a7efd3184a5766ff1612db4c63b9b Mon Sep 17 00:00:00 2001 From: kempersc Date: Tue, 13 Jan 2026 20:54:34 +0100 Subject: [PATCH] feat(entity-resolution): expand consumer email domain list Add additional Dutch ISP domains for better filtering: - gmail.nl, icloud.nl, aol.nl, aol.com - telfortglasvezel.nl, worldonline.nl, delta.nl, lijbrandt.nl - t-mobilethuis.nl, compaqnet.nl, filternet.nl, onsmail.nl, box.nl - mailinator.com (disposable email) --- frontend/public/schemas/20251121/linkml/manifest.json | 11 +++-------- .../entity_resolution/email_semantics.py | 11 ++++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json index 714bc2fde1..a19017c806 100644 --- a/frontend/public/schemas/20251121/linkml/manifest.json +++ b/frontend/public/schemas/20251121/linkml/manifest.json @@ -1,12 +1,12 @@ { - "generated": "2026-01-13T19:50:03.785Z", + "generated": "2026-01-13T19:53:50.189Z", "schemaRoot": "/schemas/20251121/linkml", - "totalFiles": 2894, + "totalFiles": 2893, "categoryCounts": { "main": 4, "class": 632, "enum": 147, - "slot": 2107, + "slot": 2106, "module": 4 }, "categories": [ @@ -3967,11 +3967,6 @@ "path": "modules/slots/activities_societies.yaml", "category": "slot" }, - { - "name": "activity_id", - "path": "modules/slots/activity_id.yaml", - "category": "slot" - }, { "name": "actual_end", "path": "modules/slots/actual_end.yaml", diff --git a/src/glam_extractor/entity_resolution/email_semantics.py b/src/glam_extractor/entity_resolution/email_semantics.py index 0ba23fad1f..08125f4297 100644 --- a/src/glam_extractor/entity_resolution/email_semantics.py +++ b/src/glam_extractor/entity_resolution/email_semantics.py @@ -228,23 +228,24 @@ HERITAGE_DOMAIN_MAP: Dict[str, Tuple[str, str, Optional[str]]] = { # Consumer email domains (to filter out) CONSUMER_DOMAINS: Set[str] = { - 'gmail.com', 'hotmail.com', 'hotmail.nl', 'outlook.com', 'outlook.nl', + 'gmail.com', 'gmail.nl', 'hotmail.com', 'hotmail.nl', 'outlook.com', 'outlook.nl', 'live.nl', 'live.com', 'msn.com', 'yahoo.com', 'yahoo.nl', 'yahoo.co.uk', - 'icloud.com', 'me.com', 'mac.com', + 'icloud.com', 'icloud.nl', 'me.com', 'mac.com', 'aol.nl', 'aol.com', # Dutch ISPs 'ziggo.nl', 'kpnmail.nl', 'kpnplanet.nl', 'planet.nl', 'hetnet.nl', 'xs4all.nl', 'casema.nl', 'home.nl', 'upcmail.nl', 'chello.nl', 'quicknet.nl', 'zonnet.nl', 'tele2.nl', 'solcon.nl', 'zeelandnet.nl', - 'wxs.nl', 'telfort.nl', 'online.nl', 'hccnet.nl', 'kabelfoon.nl', + 'wxs.nl', 'telfort.nl', 'telfortglasvezel.nl', 'online.nl', 'hccnet.nl', 'kabelfoon.nl', 'caiway.nl', 'tiscali.nl', 'versatel.nl', 'freeler.nl', 'kliksafe.nl', 'dds.nl', 'freedom.nl', 'xmsnet.nl', 'inter.nl.net', 'euronet.nl', - 'onsbrabantnet.nl', 'concepts.nl', + 'onsbrabantnet.nl', 'concepts.nl', 'worldonline.nl', 'delta.nl', 'lijbrandt.nl', + 't-mobilethuis.nl', 'compaqnet.nl', 'filternet.nl', 'onsmail.nl', 'box.nl', # Belgian 'telenet.be', 'skynet.be', 'proximus.be', # German 'gmx.de', 'web.de', 't-online.de', # Generic - 'mail.com', 'email.com', 'protonmail.com', 'pm.me', + 'mail.com', 'email.com', 'protonmail.com', 'pm.me', 'mailinator.com', } # Dutch name prefixes (tussenvoegsels)