feat(entity-resolution): expand consumer email domain list
All checks were successful
Deploy Frontend / build-and-deploy (push) Successful in 3m55s

Add additional Dutch ISP domains for better filtering:
- gmail.nl, icloud.nl, aol.nl, aol.com
- telfortglasvezel.nl, worldonline.nl, delta.nl, lijbrandt.nl
- t-mobilethuis.nl, compaqnet.nl, filternet.nl, onsmail.nl, box.nl
- mailinator.com (disposable email)
This commit is contained in:
kempersc 2026-01-13 20:54:34 +01:00
parent 6a3616beac
commit 833bb56833
2 changed files with 9 additions and 13 deletions

View file

@ -1,12 +1,12 @@
{
"generated": "2026-01-13T19:50:03.785Z",
"generated": "2026-01-13T19:53:50.189Z",
"schemaRoot": "/schemas/20251121/linkml",
"totalFiles": 2894,
"totalFiles": 2893,
"categoryCounts": {
"main": 4,
"class": 632,
"enum": 147,
"slot": 2107,
"slot": 2106,
"module": 4
},
"categories": [
@ -3967,11 +3967,6 @@
"path": "modules/slots/activities_societies.yaml",
"category": "slot"
},
{
"name": "activity_id",
"path": "modules/slots/activity_id.yaml",
"category": "slot"
},
{
"name": "actual_end",
"path": "modules/slots/actual_end.yaml",

View file

@ -228,23 +228,24 @@ HERITAGE_DOMAIN_MAP: Dict[str, Tuple[str, str, Optional[str]]] = {
# Consumer email domains (to filter out)
CONSUMER_DOMAINS: Set[str] = {
'gmail.com', 'hotmail.com', 'hotmail.nl', 'outlook.com', 'outlook.nl',
'gmail.com', 'gmail.nl', 'hotmail.com', 'hotmail.nl', 'outlook.com', 'outlook.nl',
'live.nl', 'live.com', 'msn.com', 'yahoo.com', 'yahoo.nl', 'yahoo.co.uk',
'icloud.com', 'me.com', 'mac.com',
'icloud.com', 'icloud.nl', 'me.com', 'mac.com', 'aol.nl', 'aol.com',
# Dutch ISPs
'ziggo.nl', 'kpnmail.nl', 'kpnplanet.nl', 'planet.nl', 'hetnet.nl',
'xs4all.nl', 'casema.nl', 'home.nl', 'upcmail.nl', 'chello.nl',
'quicknet.nl', 'zonnet.nl', 'tele2.nl', 'solcon.nl', 'zeelandnet.nl',
'wxs.nl', 'telfort.nl', 'online.nl', 'hccnet.nl', 'kabelfoon.nl',
'wxs.nl', 'telfort.nl', 'telfortglasvezel.nl', 'online.nl', 'hccnet.nl', 'kabelfoon.nl',
'caiway.nl', 'tiscali.nl', 'versatel.nl', 'freeler.nl', 'kliksafe.nl',
'dds.nl', 'freedom.nl', 'xmsnet.nl', 'inter.nl.net', 'euronet.nl',
'onsbrabantnet.nl', 'concepts.nl',
'onsbrabantnet.nl', 'concepts.nl', 'worldonline.nl', 'delta.nl', 'lijbrandt.nl',
't-mobilethuis.nl', 'compaqnet.nl', 'filternet.nl', 'onsmail.nl', 'box.nl',
# Belgian
'telenet.be', 'skynet.be', 'proximus.be',
# German
'gmx.de', 'web.de', 't-online.de',
# Generic
'mail.com', 'email.com', 'protonmail.com', 'pm.me',
'mail.com', 'email.com', 'protonmail.com', 'pm.me', 'mailinator.com',
}
# Dutch name prefixes (tussenvoegsels)