# ============================================================================= # Universal Exclusions Module # ============================================================================= # Version: 1.7.0 # Module: processing/exclusions.yaml # # These exclusion rules apply to ALL entity types across all text sources. # ============================================================================= id: universal_exclusions name: "Universal Exclusion Rules" description: | Exclusion rules that apply to ALL entity types across all text sources. These patterns should NEVER be tagged as named entities. exclusion_rules: navigation: id: "UNI_EXC001" description: "Navigation elements and menu items" patterns: - "Home" - "About Us" - "Contact" - "Menu" - "Search" - "Login" - "Sign Up" - "Back to top" - "Next" - "Previous" - "Read more" - "Learn more" - "Click here" - "See all" - "View more" - "Show less" note: "These are UI elements, not named entities" calls_to_action: id: "UNI_EXC002" description: "Marketing and call-to-action phrases" patterns: - "Buy now" - "Subscribe" - "Book tickets" - "Plan your visit" - "Become a member" - "Donate" - "Shop" - "Get tickets" - "Reserve" - "Join us" note: "Action prompts, not entity references" social_boilerplate: id: "UNI_EXC003" description: "Generic social media text" patterns: - "Follow us" - "Share" - "Like" - "Tweet" - "Pin it" - "Share on Facebook" - "Follow on Instagram" note: "Tag actual handles (WRK.SOC), not these generic phrases" technical_artifacts: id: "UNI_EXC004" description: "Content management system artifacts" patterns: - "Posted by" - "Last updated" - "Tags:" - "Categories:" - "Comments" - "Leave a reply" - "Related posts" - "Powered by" - "Cookie settings" - "Privacy policy" - "Terms of use" - "All rights reserved" note: "Technical/legal boilerplate, not content entities" generic_words: id: "UNI_EXC005" description: "Common words that should not be tagged" patterns: - "Welcome" - "Information" - "Details" - "Overview" - "Introduction" - "Summary" - "Description" - "Features" - "Highlights" - "News" - "Events" - "Updates" note: "Section headers and generic labels, not named entities" pronouns: id: "UNI_EXC006" description: "Pronouns and demonstrative words" patterns: - "he" - "she" - "it" - "they" - "we" - "you" - "I" - "him" - "her" - "them" - "us" - "this" - "that" - "these" - "those" - "here" - "there" - "where" note: "Pronouns require coreference resolution, not NER"