127 lines
3 KiB
YAML
127 lines
3 KiB
YAML
# =============================================================================
|
|
# Universal Exclusions Module
|
|
# =============================================================================
|
|
# Version: 1.7.0
|
|
# Module: processing/exclusions.yaml
|
|
#
|
|
# These exclusion rules apply to ALL entity types across all text sources.
|
|
# =============================================================================
|
|
|
|
id: universal_exclusions
|
|
name: "Universal Exclusion Rules"
|
|
description: |
|
|
Exclusion rules that apply to ALL entity types across all text sources.
|
|
These patterns should NEVER be tagged as named entities.
|
|
|
|
exclusion_rules:
|
|
|
|
navigation:
|
|
id: "UNI_EXC001"
|
|
description: "Navigation elements and menu items"
|
|
patterns:
|
|
- "Home"
|
|
- "About Us"
|
|
- "Contact"
|
|
- "Menu"
|
|
- "Search"
|
|
- "Login"
|
|
- "Sign Up"
|
|
- "Back to top"
|
|
- "Next"
|
|
- "Previous"
|
|
- "Read more"
|
|
- "Learn more"
|
|
- "Click here"
|
|
- "See all"
|
|
- "View more"
|
|
- "Show less"
|
|
note: "These are UI elements, not named entities"
|
|
|
|
calls_to_action:
|
|
id: "UNI_EXC002"
|
|
description: "Marketing and call-to-action phrases"
|
|
patterns:
|
|
- "Buy now"
|
|
- "Subscribe"
|
|
- "Book tickets"
|
|
- "Plan your visit"
|
|
- "Become a member"
|
|
- "Donate"
|
|
- "Shop"
|
|
- "Get tickets"
|
|
- "Reserve"
|
|
- "Join us"
|
|
note: "Action prompts, not entity references"
|
|
|
|
social_boilerplate:
|
|
id: "UNI_EXC003"
|
|
description: "Generic social media text"
|
|
patterns:
|
|
- "Follow us"
|
|
- "Share"
|
|
- "Like"
|
|
- "Tweet"
|
|
- "Pin it"
|
|
- "Share on Facebook"
|
|
- "Follow on Instagram"
|
|
note: "Tag actual handles (WRK.SOC), not these generic phrases"
|
|
|
|
technical_artifacts:
|
|
id: "UNI_EXC004"
|
|
description: "Content management system artifacts"
|
|
patterns:
|
|
- "Posted by"
|
|
- "Last updated"
|
|
- "Tags:"
|
|
- "Categories:"
|
|
- "Comments"
|
|
- "Leave a reply"
|
|
- "Related posts"
|
|
- "Powered by"
|
|
- "Cookie settings"
|
|
- "Privacy policy"
|
|
- "Terms of use"
|
|
- "All rights reserved"
|
|
note: "Technical/legal boilerplate, not content entities"
|
|
|
|
generic_words:
|
|
id: "UNI_EXC005"
|
|
description: "Common words that should not be tagged"
|
|
patterns:
|
|
- "Welcome"
|
|
- "Information"
|
|
- "Details"
|
|
- "Overview"
|
|
- "Introduction"
|
|
- "Summary"
|
|
- "Description"
|
|
- "Features"
|
|
- "Highlights"
|
|
- "News"
|
|
- "Events"
|
|
- "Updates"
|
|
note: "Section headers and generic labels, not named entities"
|
|
|
|
pronouns:
|
|
id: "UNI_EXC006"
|
|
description: "Pronouns and demonstrative words"
|
|
patterns:
|
|
- "he"
|
|
- "she"
|
|
- "it"
|
|
- "they"
|
|
- "we"
|
|
- "you"
|
|
- "I"
|
|
- "him"
|
|
- "her"
|
|
- "them"
|
|
- "us"
|
|
- "this"
|
|
- "that"
|
|
- "these"
|
|
- "those"
|
|
- "here"
|
|
- "there"
|
|
- "where"
|
|
note: "Pronouns require coreference resolution, not NER"
|