diff --git a/backend/postgres/geo_api.py b/backend/postgres/geo_api.py
index af254e9c2d..a74c67bdad 100644
--- a/backend/postgres/geo_api.py
+++ b/backend/postgres/geo_api.py
@@ -35,9 +35,12 @@ from decimal import Decimal
 from fastapi import FastAPI, HTTPException, Query, APIRouter
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, Response
 from pydantic import BaseModel, Field
 import asyncpg
+import httpx
+import hashlib
+from urllib.parse import urlparse
 
 
 # ============================================================================
@@ -155,6 +158,280 @@ class PersonDetail(BaseModel):
     source_file: Optional[str]
 
 
+# ============================================================================
+# Heritage Classification (copied from main.py for experience item classification)
+# ============================================================================
+
+import re
+
+# Heritage type detection keywords for GLAMORCUBESFIXPHDNT taxonomy
+HERITAGE_KEYWORDS = {
+    'G': ['gallery', 'galerie', 'kunsthal', 'art dealer', 'art gallery', 'exhibition space'],
+    'L': ['library', 'bibliotheek', 'bibliothek', 'librarian', 'bibliothecaris', 'KB ', 'national library'],
+    'A': ['archive', 'archief', 'archivist', 'archivaris', 'archival', 'beeld en geluid', 'beeld & geluid',
+          'NISV', 'filmmuseum', 'eye film', 'EYE ', 'audiovisual', 'nationaal archief', 'stadsarchief',
+          'gemeentearchief', 'rijksarchief', 'NIOD', 'IISH', 'IISG', 'archiefspecialist'],
+    'M': ['museum', 'musea', 'curator', 'conservator', 'collection manager', 'rijksmuseum', 'van gogh',
+          'stedelijk', 'mauritshuis', 'tropenmuseum', 'allard pierson', 'museale', 'collectiebeheerder',
+          'collectiespecialist', 'collectie'],
+    'O': ['ministry', 'ministerie', 'government', 'overheid', 'gemeente', 'province', 'provincie', 'OCW'],
+    'R': ['research', 'onderzoek', 'researcher', 'onderzoeker', 'KNAW', 'humanities cluster', 'NWO',
+          'documentatie', 'documentation', 'kenniscentrum', 'historicus'],
+    'C': ['corporate archive', 'bedrijfsarchief', 'company history'],
+    'E': ['university', 'universiteit', 'professor', 'lecturer', 'docent', 'hogeschool', 'academy',
+          'academie', 'PhD', 'phd candidate', 'student', 'teacher', 'onderwijs', 'education', 'UvA',
+          'VU ', 'leiden university', 'reinwardt', 'film academy', 'graduate', 'assistant professor',
+          'associate professor', 'hoogleraar', 'educatie', 'educator'],
+    'S': ['society', 'vereniging', 'genootschap', 'historical society', 'historische vereniging'],
+    'D': ['digital', 'digitaal', 'platform', 'software', 'IT ', 'tech', 'developer', 'engineer',
+          'data ', 'AI ', 'machine learning', 'digitalisering', 'datamanagement', 'data analist'],
+}
+
+NON_HERITAGE_KEYWORDS = [
+    'marketing', 'sales', 'HR ', 'human resources', 'recruiter', 'finance', 'accounting',
+    'legal', 'lawyer', 'advocaat', 'consultant', 'coach', 'therapy', 'health', 'medical',
+    'food', 'restaurant', 'retail', 'fashion', 'real estate', 'insurance', 'banking',
+    'investment', 'e-commerce', 'organiser', 'opruimhulp', 'verpleeg', 'nurse'
+]
+
+# Organizations that are explicitly NOT heritage institutions
+NON_HERITAGE_ORGANIZATIONS = [
+    # Banks & Financial
+    'ing ', 'ing nederland', 'rabobank', 'abn amro', 'postbank', 'triodos',
+    # Security companies
+    'i-sec', 'g4s', 'securitas', 'trigion', 'chubb',
+    # Police/Government (non-cultural)
+    'politie', 'police', 'rijkswaterstaat', 'belastingdienst', 'douane', 'defensie',
+    # Political parties
+    'vvd', 'pvda', 'cda', 'd66', 'groenlinks', 'pvv', 'bbb', 'nsc', 'volt',
+    'sp ', 'forum voor democratie', 'ja21', 'bij1', 'denk', 'sgp', 'cu ',
+    # Tech companies (non-heritage)
+    'google', 'microsoft', 'amazon', 'meta', 'facebook', 'apple', 'netflix',
+    'uber', 'airbnb', 'booking.com', 'adyen', 'mollie', 'messagebird',
+    'coolblue', 'bol.com', 'picnic', 'takeaway', 'just eat',
+    # Telecom
+    'kpn', 'vodafone', 't-mobile', 'ziggo',
+    # Postal / Logistics
+    'postnl', 'postkantoren', 'dhl', 'ups', 'fedex',
+    # Healthcare
+    'ziekenhuis', 'hospital', 'ggz', 'ggd', 'thuiszorg',
+    # Retail
+    'albert heijn', 'jumbo', 'lidl', 'aldi', 'ikea', 'hema', 'action',
+    # Consulting / Professional services
+    'deloitte', 'kpmg', 'pwc', 'ey ', 'ernst & young', 'mckinsey', 'bcg',
+    'accenture', 'capgemini', 'ordina', 'atos', 'cgi ',
+    # Recruitment / HR
+    'randstad', 'tempo-team', 'manpower', 'hays', 'brunel',
+    # Energy / Utilities
+    'shell', 'bp ', 'eneco', 'vattenfall', 'essent', 'nuon',
+    # Transport
+    'ns ', 'prorail', 'schiphol', 'klm', 'transavia',
+    # Other
+    'freelance', 'zelfstandig', 'zzp', 'eigen bedrijf',
+]
+
+# Heritage organization keywords - organizations that ARE heritage institutions
+HERITAGE_ORGANIZATION_KEYWORDS = [
+    # Archives
+    'archief', 'archive', 'nationaal archief', 'stadsarchief', 'regionaal archief',
+    'beeld en geluid', 'beeld & geluid', 'niod', 'iish', 'iisg',
+    # Museums  
+    'museum', 'musea', 'rijksmuseum', 'van gogh', 'stedelijk', 'mauritshuis',
+    'tropenmuseum', 'allard pierson', 'kröller', 'boijmans',
+    # Libraries
+    'bibliotheek', 'library', 'koninklijke bibliotheek', 'kb ',
+    # Film/AV heritage
+    'eye film', 'filmmuseum', 'eye ', 'sound and vision',
+    # Heritage platforms
+    'erfgoed', 'heritage', 'cultural', 'cultureel',
+    # Research institutes (heritage-focused)
+    'knaw', 'humanities cluster', 'meertens', 'huygens',
+]
+
+
+def detect_heritage_type(role: Optional[str], company: Optional[str]) -> tuple:
+    """
+    Detect if a position is heritage-relevant and what type.
+    
+    Two-stage classification:
+    1. Check if organization is explicitly non-heritage (blocklist)
+    2. Check if role/organization matches heritage patterns
+    
+    For 'D' (Digital) type, require BOTH a tech role AND a heritage organization.
+    This prevents generic IT workers at banks/police from being classified as heritage.
+    
+    Args:
+        role: Job title/role text
+        company: Company/organization name
+        
+    Returns:
+        Tuple of (heritage_relevant: bool, heritage_type: Optional[str])
+    """
+    # Combine role and company for full context
+    role_text = role or ''
+    company_text = company or ''
+    combined = f"{role_text} {company_text}".lower()
+    
+    if not combined.strip():
+        return (False, None)
+    
+    # Stage 1: Check for non-heritage organizations (blocklist)
+    # Use word boundary matching to avoid false positives like "sharing" matching "ing "
+    for org in NON_HERITAGE_ORGANIZATIONS:
+        org_pattern = org.lower().strip()
+        # Use word boundary regex for patterns that could have false positives
+        if re.search(r'\b' + re.escape(org_pattern) + r'\b', combined):
+            return (False, None)
+    
+    # Stage 2: Check for non-heritage role indicators
+    for keyword in NON_HERITAGE_KEYWORDS:
+        keyword_pattern = keyword.lower().strip()
+        if re.search(r'\b' + re.escape(keyword_pattern) + r'\b', combined):
+            return (False, None)
+    
+    # Stage 3: Check if this is a heritage organization
+    is_heritage_org = False
+    for org_keyword in HERITAGE_ORGANIZATION_KEYWORDS:
+        if org_keyword.lower() in combined:
+            is_heritage_org = True
+            break
+    
+    # Check heritage keywords by type (order matters - more specific first)
+    # 'D' (Digital) is checked last and requires heritage org validation
+    type_order = ['A', 'M', 'L', 'G', 'S', 'C', 'O', 'R', 'E']  # D removed from main loop
+    
+    for heritage_type in type_order:
+        keywords = HERITAGE_KEYWORDS.get(heritage_type, [])
+        for keyword in keywords:
+            if keyword.lower() in combined:
+                return (True, heritage_type)
+    
+    # Special handling for 'D' (Digital) - ONLY if at a heritage organization
+    if is_heritage_org:
+        digital_keywords = HERITAGE_KEYWORDS.get('D', [])
+        for keyword in digital_keywords:
+            if keyword.lower() in combined:
+                return (True, 'D')
+    
+    # Generic heritage terms (without specific type)
+    generic = ['heritage', 'erfgoed', 'culture', 'cultuur', 'cultural', 'film', 'cinema',
+               'media', 'arts', 'kunst', 'creative', 'preservation', 'conservation', 'collection']
+    for keyword in generic:
+        if keyword in combined:
+            return (True, None)
+    
+    return (False, None)
+
+
+def enrich_experience_with_heritage(experience: List) -> List[Dict]:
+    """
+    Add heritage_relevant and heritage_type fields to each experience item.
+    
+    Handles both dict and JSON string inputs (asyncpg returns jsonb array
+    elements as strings that need parsing).
+    
+    Args:
+        experience: List of experience items (dicts or JSON strings)
+        
+    Returns:
+        Same list with heritage_relevant and heritage_type added to each item
+    """
+    if not experience:
+        return []
+    
+    enriched = []
+    for exp in experience:
+        # Handle case where exp is a JSON string instead of dict
+        # (asyncpg returns jsonb array elements as strings)
+        if isinstance(exp, str):
+            try:
+                exp = json.loads(exp)
+            except json.JSONDecodeError:
+                continue
+        
+        # Skip if still not a dict
+        if not isinstance(exp, dict):
+            continue
+        
+        # Get role and company for classification
+        role = exp.get('title') or exp.get('role') or ''
+        company = exp.get('company') or exp.get('organization') or ''
+        
+        # Detect heritage relevance
+        heritage_relevant, heritage_type = detect_heritage_type(role, company)
+        
+        # Create new dict with heritage fields added
+        enriched_exp = {**exp}
+        enriched_exp['heritage_relevant'] = heritage_relevant
+        enriched_exp['heritage_type'] = heritage_type
+        enriched.append(enriched_exp)
+    
+    return enriched
+
+
+def parse_jsonb_list(data) -> List:
+    """
+    Parse a jsonb list field from PostgreSQL.
+    
+    asyncpg returns jsonb columns in various forms:
+    - Sometimes as a proper Python list with dict elements
+    - Sometimes as a JSON string that needs parsing
+    - Sometimes as a list where each element is a JSON string
+    - Sometimes as a list where each element is a Python repr string (single quotes)
+    
+    This function handles all these cases.
+    
+    Args:
+        data: Either a list, a JSON string representing a list, or None
+        
+    Returns:
+        Parsed list with all elements as proper Python objects (empty list if None or invalid)
+    """
+    import ast
+    
+    if data is None:
+        return []
+    
+    result = []
+    
+    # If it's a string, try to parse the whole thing as JSON first
+    if isinstance(data, str):
+        try:
+            data = json.loads(data)
+        except json.JSONDecodeError:
+            return []
+    
+    # Now data should be a list
+    if not isinstance(data, list):
+        return []
+    
+    # Parse each element if it's a string
+    for item in data:
+        if isinstance(item, str):
+            # Try JSON first (double quotes)
+            try:
+                parsed_item = json.loads(item)
+                result.append(parsed_item)
+                continue
+            except json.JSONDecodeError:
+                pass
+            
+            # Try Python literal (single quotes) - handles malformed data
+            try:
+                parsed_item = ast.literal_eval(item)
+                result.append(parsed_item)
+                continue
+            except (ValueError, SyntaxError):
+                pass
+            
+            # Keep as string if neither works (e.g., plain skill strings)
+            result.append(item)
+        else:
+            result.append(item)
+    
+    return result
+
+
 # ============================================================================
 # Global State
 # ============================================================================
@@ -2000,11 +2277,11 @@ async def get_person(staff_id: str):
         linkedin_url=row['linkedin_url'],
         profile_image_url=row['profile_image_url'],
         heritage_relevant=row['heritage_relevant'] if row['heritage_relevant'] is not None else True,
-        heritage_types=row['heritage_types'] if row['heritage_types'] else [],
-        experience=row['experience'] if row['experience'] else [],
-        education=row['education'] if row['education'] else [],
-        skills=row['skills'] if row['skills'] else [],
-        languages=row['languages'] if row['languages'] else [],
+        heritage_types=parse_jsonb_list(row['heritage_types']),
+        experience=enrich_experience_with_heritage(parse_jsonb_list(row['experience'])),
+        education=parse_jsonb_list(row['education']),
+        skills=parse_jsonb_list(row['skills']),
+        languages=parse_jsonb_list(row['languages']),
         about=row['about'],
         connections=row['connections'],
         extraction_date=row['extraction_date'].isoformat() if row['extraction_date'] else None,
@@ -2013,6 +2290,148 @@ async def get_person(staff_id: str):
     )
 
 
+# ============================================================================
+# Image Proxy (Avoid Hotlinking Issues)
+# ============================================================================
+
+# In-memory cache for proxied images (simple TTL-based)
+_image_cache: Dict[str, tuple] = {}  # hash -> (content, content_type, timestamp)
+IMAGE_CACHE_TTL = 3600  # 1 hour
+
+# Allowed image domains for security
+ALLOWED_IMAGE_DOMAINS = {
+    # Google Maps
+    'lh3.googleusercontent.com',
+    'lh4.googleusercontent.com', 
+    'lh5.googleusercontent.com',
+    'lh6.googleusercontent.com',
+    'maps.gstatic.com',
+    'maps.googleapis.com',
+    # Wikidata/Wikimedia
+    'upload.wikimedia.org',
+    'commons.wikimedia.org',
+    # Institution domains (add as needed)
+    # Generic patterns handled below
+}
+
+
+def is_allowed_image_url(url: str) -> bool:
+    """Check if URL is from an allowed domain for proxying."""
+    try:
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        
+        # Check exact matches
+        if domain in ALLOWED_IMAGE_DOMAINS:
+            return True
+        
+        # Allow any .nl domain (Dutch institutions)
+        if domain.endswith('.nl'):
+            return True
+            
+        # Allow any .org domain (many heritage institutions)
+        if domain.endswith('.org'):
+            return True
+            
+        # Allow any .museum domain
+        if domain.endswith('.museum'):
+            return True
+        
+        # Check for Google user content subdomains
+        if 'googleusercontent.com' in domain:
+            return True
+            
+        return False
+    except Exception:
+        return False
+
+
+@app.get("/image-proxy")
+async def proxy_image(url: str = Query(..., description="Image URL to proxy")):
+    """
+    Proxy external images to avoid hotlinking issues.
+    
+    Many external servers block direct embedding (hotlinking) of their images.
+    This endpoint fetches the image server-side and returns it with proper headers.
+    
+    Features:
+    - Validates URL is from allowed domains (security)
+    - Caches images in memory for 1 hour (performance)
+    - Sets proper Content-Type headers
+    - Avoids CORS issues
+    
+    Usage: /image-proxy?url=https://example.com/logo.png
+    """
+    # Security: validate URL
+    if not url or not url.startswith(('http://', 'https://')):
+        raise HTTPException(status_code=400, detail="Invalid URL")
+    
+    if not is_allowed_image_url(url):
+        raise HTTPException(status_code=403, detail="Domain not allowed for proxying")
+    
+    # Check cache
+    url_hash = hashlib.md5(url.encode()).hexdigest()
+    if url_hash in _image_cache:
+        content, content_type, timestamp = _image_cache[url_hash]
+        if datetime.now().timestamp() - timestamp < IMAGE_CACHE_TTL:
+            return Response(
+                content=content,
+                media_type=content_type,
+                headers={
+                    "Cache-Control": "public, max-age=3600",
+                    "X-Proxy-Cache": "HIT",
+                }
+            )
+    
+    # Fetch image
+    try:
+        async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
+            response = await client.get(
+                url,
+                headers={
+                    # Spoof headers to avoid hotlink detection
+                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                    "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
+                    "Accept-Language": "en-US,en;q=0.9,nl;q=0.8",
+                    "Referer": urlparse(url).scheme + "://" + urlparse(url).netloc + "/",
+                }
+            )
+            
+            if response.status_code != 200:
+                raise HTTPException(status_code=502, detail=f"Failed to fetch image: {response.status_code}")
+            
+            content = response.content
+            content_type = response.headers.get("content-type", "image/png")
+            
+            # Validate it's actually an image
+            if not content_type.startswith("image/"):
+                raise HTTPException(status_code=400, detail="URL does not point to an image")
+            
+            # Cache the result
+            _image_cache[url_hash] = (content, content_type, datetime.now().timestamp())
+            
+            # Limit cache size (simple LRU-like cleanup)
+            if len(_image_cache) > 1000:
+                # Remove oldest entries
+                sorted_entries = sorted(_image_cache.items(), key=lambda x: x[1][2])
+                for key, _ in sorted_entries[:500]:
+                    del _image_cache[key]
+            
+            return Response(
+                content=content,
+                media_type=content_type,
+                headers={
+                    "Cache-Control": "public, max-age=3600",
+                    "X-Proxy-Cache": "MISS",
+                }
+            )
+            
+    except httpx.TimeoutException:
+        raise HTTPException(status_code=504, detail="Timeout fetching image")
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=502, detail=f"Error fetching image: {str(e)}")
+
+
 # ============================================================================
 # Main
 # ============================================================================
diff --git a/backend/postgres/main.py b/backend/postgres/main.py
index 0c93391a54..eedaee5acb 100644
--- a/backend/postgres/main.py
+++ b/backend/postgres/main.py
@@ -40,6 +40,171 @@ class Settings(BaseModel):
 settings = Settings()
 
 
+# ============================================================================
+# Heritage Classification
+# ============================================================================
+
+# Heritage type detection keywords for GLAMORCUBESFIXPHDNT taxonomy
+HERITAGE_KEYWORDS = {
+    'G': ['gallery', 'galerie', 'kunsthal', 'art dealer', 'art gallery', 'exhibition space'],
+    'L': ['library', 'bibliotheek', 'bibliothek', 'librarian', 'bibliothecaris', 'KB ', 'national library'],
+    'A': ['archive', 'archief', 'archivist', 'archivaris', 'archival', 'beeld en geluid', 'beeld & geluid',
+          'NISV', 'filmmuseum', 'eye film', 'EYE ', 'audiovisual', 'nationaal archief', 'stadsarchief',
+          'gemeentearchief', 'rijksarchief', 'NIOD', 'IISH', 'IISG', 'archiefspecialist'],
+    'M': ['museum', 'musea', 'curator', 'conservator', 'collection manager', 'rijksmuseum', 'van gogh',
+          'stedelijk', 'mauritshuis', 'tropenmuseum', 'allard pierson', 'museale', 'collectiebeheerder',
+          'collectiespecialist', 'collectie'],
+    'O': ['ministry', 'ministerie', 'government', 'overheid', 'gemeente', 'province', 'provincie', 'OCW'],
+    'R': ['research', 'onderzoek', 'researcher', 'onderzoeker', 'KNAW', 'humanities cluster', 'NWO',
+          'documentatie', 'documentation', 'kenniscentrum', 'historicus'],
+    'C': ['corporate archive', 'bedrijfsarchief', 'company history'],
+    'E': ['university', 'universiteit', 'professor', 'lecturer', 'docent', 'hogeschool', 'academy',
+          'academie', 'PhD', 'phd candidate', 'student', 'teacher', 'onderwijs', 'education', 'UvA',
+          'VU ', 'leiden university', 'reinwardt', 'film academy', 'graduate', 'assistant professor',
+          'associate professor', 'hoogleraar', 'educatie', 'educator'],
+    'S': ['society', 'vereniging', 'genootschap', 'historical society', 'historische vereniging'],
+    'D': ['digital', 'digitaal', 'platform', 'software', 'IT ', 'tech', 'developer', 'engineer',
+          'data ', 'AI ', 'machine learning', 'digitalisering', 'datamanagement', 'data analist'],
+}
+
+NON_HERITAGE_KEYWORDS = [
+    'marketing', 'sales', 'HR ', 'human resources', 'recruiter', 'finance', 'accounting',
+    'legal', 'lawyer', 'advocaat', 'consultant', 'coach', 'therapy', 'health', 'medical',
+    'food', 'restaurant', 'retail', 'fashion', 'real estate', 'insurance', 'banking',
+    'investment', 'e-commerce', 'organiser', 'opruimhulp', 'verpleeg', 'nurse'
+]
+
+# Organizations that are explicitly NOT heritage institutions
+NON_HERITAGE_ORGANIZATIONS = [
+    # Banks & Financial
+    'ing ', 'ing nederland', 'rabobank', 'abn amro', 'postbank', 'triodos',
+    # Security companies
+    'i-sec', 'g4s', 'securitas', 'trigion', 'chubb',
+    # Police/Government (non-cultural)
+    'politie', 'police', 'rijkswaterstaat', 'belastingdienst', 'douane', 'defensie',
+    # Political parties
+    'vvd', 'pvda', 'cda', 'd66', 'groenlinks', 'pvv', 'bbb', 'nsc', 'volt',
+    'sp ', 'forum voor democratie', 'ja21', 'bij1', 'denk', 'sgp', 'cu ',
+    # Tech companies (non-heritage)
+    'google', 'microsoft', 'amazon', 'meta', 'facebook', 'apple', 'netflix',
+    'uber', 'airbnb', 'booking.com', 'adyen', 'mollie', 'messagebird',
+    'coolblue', 'bol.com', 'picnic', 'takeaway', 'just eat',
+    # Telecom
+    'kpn', 'vodafone', 't-mobile', 'ziggo',
+    # Postal / Logistics
+    'postnl', 'postkantoren', 'dhl', 'ups', 'fedex',
+    # Healthcare
+    'ziekenhuis', 'hospital', 'ggz', 'ggd', 'thuiszorg',
+    # Retail
+    'albert heijn', 'jumbo', 'lidl', 'aldi', 'ikea', 'hema', 'action',
+    # Consulting / Professional services
+    'deloitte', 'kpmg', 'pwc', 'ey ', 'ernst & young', 'mckinsey', 'bcg',
+    'accenture', 'capgemini', 'ordina', 'atos', 'cgi ',
+    # Recruitment / HR
+    'randstad', 'tempo-team', 'manpower', 'hays', 'brunel',
+    # Energy / Utilities
+    'shell', 'bp ', 'eneco', 'vattenfall', 'essent', 'nuon',
+    # Transport
+    'ns ', 'prorail', 'schiphol', 'klm', 'transavia',
+    # Other
+    'freelance', 'zelfstandig', 'zzp', 'eigen bedrijf',
+]
+
+# Heritage organization keywords - organizations that ARE heritage institutions
+HERITAGE_ORGANIZATION_KEYWORDS = [
+    # Archives
+    'archief', 'archive', 'nationaal archief', 'stadsarchief', 'regionaal archief',
+    'beeld en geluid', 'beeld & geluid', 'niod', 'iish', 'iisg',
+    # Museums  
+    'museum', 'musea', 'rijksmuseum', 'van gogh', 'stedelijk', 'mauritshuis',
+    'tropenmuseum', 'allard pierson', 'kröller', 'boijmans',
+    # Libraries
+    'bibliotheek', 'library', 'koninklijke bibliotheek', 'kb ',
+    # Film/AV heritage
+    'eye film', 'filmmuseum', 'eye ', 'sound and vision',
+    # Heritage platforms
+    'erfgoed', 'heritage', 'cultural', 'cultureel',
+    # Research institutes (heritage-focused)
+    'knaw', 'humanities cluster', 'meertens', 'huygens',
+]
+
+
+def detect_heritage_type(role: Optional[str], company: Optional[str]) -> tuple:
+    """
+    Detect if a position is heritage-relevant and what type.
+    
+    Two-stage classification:
+    1. Check if organization is explicitly non-heritage (blocklist)
+    2. Check if role/organization matches heritage patterns
+    
+    For 'D' (Digital) type, require BOTH a tech role AND a heritage organization.
+    This prevents generic IT workers at banks/police from being classified as heritage.
+    
+    Args:
+        role: Job title/role text
+        company: Company/organization name
+        
+    Returns:
+        Tuple of (heritage_relevant: bool, heritage_type: Optional[str])
+    """
+    import re
+    
+    # Combine role and company for full context
+    role_text = role or ''
+    company_text = company or ''
+    combined = f"{role_text} {company_text}".lower()
+    
+    if not combined.strip():
+        return (False, None)
+    
+    # Stage 1: Check for non-heritage organizations (blocklist)
+    # Use word boundary matching to avoid false positives like "sharing" matching "ing "
+    for org in NON_HERITAGE_ORGANIZATIONS:
+        org_pattern = org.lower().strip()
+        # Use word boundary regex for patterns that could have false positives
+        if re.search(r'\b' + re.escape(org_pattern) + r'\b', combined):
+            return (False, None)
+    
+    # Stage 2: Check for non-heritage role indicators
+    for keyword in NON_HERITAGE_KEYWORDS:
+        keyword_pattern = keyword.lower().strip()
+        if re.search(r'\b' + re.escape(keyword_pattern) + r'\b', combined):
+            return (False, None)
+    
+    # Stage 3: Check if this is a heritage organization
+    is_heritage_org = False
+    for org_keyword in HERITAGE_ORGANIZATION_KEYWORDS:
+        if org_keyword.lower() in combined:
+            is_heritage_org = True
+            break
+    
+    # Check heritage keywords by type (order matters - more specific first)
+    # 'D' (Digital) is checked last and requires heritage org validation
+    type_order = ['A', 'M', 'L', 'G', 'S', 'C', 'O', 'R', 'E']  # D removed from main loop
+    
+    for heritage_type in type_order:
+        keywords = HERITAGE_KEYWORDS.get(heritage_type, [])
+        for keyword in keywords:
+            if keyword.lower() in combined:
+                return (True, heritage_type)
+    
+    # Special handling for 'D' (Digital) - ONLY if at a heritage organization
+    if is_heritage_org:
+        digital_keywords = HERITAGE_KEYWORDS.get('D', [])
+        for keyword in digital_keywords:
+            if keyword.lower() in combined:
+                return (True, 'D')
+    
+    # Generic heritage terms (without specific type)
+    generic = ['heritage', 'erfgoed', 'culture', 'cultuur', 'cultural', 'film', 'cinema',
+               'media', 'arts', 'kunst', 'creative', 'preservation', 'conservation', 'collection']
+    for keyword in generic:
+        if keyword in combined:
+            return (True, None)
+    
+    return (False, None)
+
+
 # ============================================================================
 # Pydantic Models
 # ============================================================================
@@ -854,22 +1019,44 @@ async def get_profile(
             if inner_profile and 'experience' in inner_profile and 'career_history' not in inner_profile:
                 experience = inner_profile.get('experience', [])
                 if experience:
-                    # Map field names: title→role, company→organization, duration→dates
+                    # Map field names: title→role, company→organization, date_range→dates
+                    # Also classify each position as heritage-relevant or not
                     career_history = []
                     for job in experience:
+                        role = job.get('title')
+                        company = job.get('company')
+                        heritage_relevant, heritage_type = detect_heritage_type(role, company)
                         career_item = {
-                            'role': job.get('title'),
-                            'organization': job.get('company'),
-                            'dates': job.get('duration'),
+                            'role': role,
+                            'organization': company,
+                            'dates': job.get('date_range') or job.get('duration'),  # date_range has year info
                             'location': job.get('location'),
                             'description': job.get('description'),
                             'company_size': job.get('company_details'),
                             'current': job.get('current', False),
+                            'heritage_relevant': heritage_relevant,
+                            'heritage_type': heritage_type,
                         }
                         career_history.append(career_item)
                     inner_profile['career_history'] = career_history
                     profile_data['profile_data'] = inner_profile
             
+            # Also add heritage classification to existing career_history entries that lack it
+            if inner_profile and 'career_history' in inner_profile:
+                career_history = inner_profile.get('career_history', [])
+                needs_update = False
+                for job in career_history:
+                    if job.get('heritage_relevant') is None:
+                        needs_update = True
+                        role = job.get('role') or job.get('title')
+                        company = job.get('organization') or job.get('company')
+                        heritage_relevant, heritage_type = detect_heritage_type(role, company)
+                        job['heritage_relevant'] = heritage_relevant
+                        job['heritage_type'] = heritage_type
+                if needs_update:
+                    inner_profile['career_history'] = career_history
+                    profile_data['profile_data'] = inner_profile
+            
             return ProfileResponse(
                 profile_data=profile_data,
                 linkedin_slug=result['linkedin_slug'],
@@ -892,24 +1079,44 @@ async def get_profile(
                         file_profile_data = data.get('profile_data', {})
                         
                         # Transform experience → career_history for frontend compatibility
-                        inner_profile = file_profile_data.get('profile_data', {})
+                        # Handle both nested (profile_data.profile_data) and flat (profile_data) structures
+                        nested_profile = file_profile_data.get('profile_data', {})
+                        inner_profile = nested_profile if nested_profile else file_profile_data
                         if inner_profile and 'experience' in inner_profile and 'career_history' not in inner_profile:
                             experience = inner_profile.get('experience', [])
                             if experience:
+                                # Map field names: title→role, company→organization, date_range→dates
+                                # Also classify each position as heritage-relevant or not
                                 career_history = []
                                 for job in experience:
+                                    role = job.get('title')
+                                    company = job.get('company')
+                                    heritage_relevant, heritage_type = detect_heritage_type(role, company)
                                     career_item = {
-                                        'role': job.get('title'),
-                                        'organization': job.get('company'),
-                                        'dates': job.get('duration'),
+                                        'role': role,
+                                        'organization': company,
+                                        'dates': job.get('date_range') or job.get('duration'),  # date_range has year info
                                         'location': job.get('location'),
                                         'description': job.get('description'),
                                         'company_size': job.get('company_details'),
                                         'current': job.get('current', False),
+                                        'heritage_relevant': heritage_relevant,
+                                        'heritage_type': heritage_type,
                                     }
                                     career_history.append(career_item)
                                 inner_profile['career_history'] = career_history
-                                file_profile_data['profile_data'] = inner_profile
+                                # career_history is now in inner_profile which is either nested or file_profile_data directly
+                        
+                        # Also add heritage classification to existing career_history entries that lack it
+                        if inner_profile and 'career_history' in inner_profile:
+                            career_history = inner_profile.get('career_history', [])
+                            for job in career_history:
+                                if job.get('heritage_relevant') is None:
+                                    role = job.get('role') or job.get('title')
+                                    company = job.get('organization') or job.get('company')
+                                    heritage_relevant, heritage_type = detect_heritage_type(role, company)
+                                    job['heritage_relevant'] = heritage_relevant
+                                    job['heritage_type'] = heritage_type
                         
                         return ProfileResponse(
                             profile_data=file_profile_data,
diff --git a/backend/rag/dspy_heritage_rag.py b/backend/rag/dspy_heritage_rag.py
index 2f14d501ca..6da476f783 100644
--- a/backend/rag/dspy_heritage_rag.py
+++ b/backend/rag/dspy_heritage_rag.py
@@ -2250,11 +2250,27 @@ async def stream_heritage_rag(
     language: str = "nl",
     router: Optional[HeritageQueryRouter] = None,
     retriever: Optional[MultiHopHeritageRetriever] = None,
+    lm: Optional[Any] = None,
 ) -> AsyncIterator[str]:
     """Stream heritage RAG response with status updates.
     
     Yields NDJSON messages with status updates and final results.
+    
+    Args:
+        question: The user's question
+        language: Language code (default "nl")
+        router: Optional pre-configured HeritageQueryRouter
+        retriever: Optional pre-configured MultiHopHeritageRetriever
+        lm: Optional DSPy LM instance for async-safe context (DSPy 3.x requirement)
     """
+    from contextlib import nullcontext
+    
+    # Create DSPy context manager for async-safe LM access
+    def get_context():
+        if lm is not None:
+            return dspy.context(lm=lm)
+        return nullcontext()
+    
     start_time = datetime.now(timezone.utc)
     
     # Initialize modules if not provided
@@ -2271,8 +2287,9 @@ async def stream_heritage_rag(
         "timestamp": start_time.isoformat(),
     }) + "\n"
     
-    # Route query
-    routing = router(question=question, language=language)
+    # Route query (wrapped with DSPy context for async-safe LM access)
+    with get_context():
+        routing = router(question=question, language=language)
     
     yield json.dumps({
         "type": "routing",
@@ -2323,12 +2340,13 @@ async def stream_heritage_rag(
     
     # Use dspy.streamify for token streaming (if available)
     try:
-        # Create streamified version of synthesizer
-        streamified = dspy.streamify(retriever.synthesizer)
+        # Create streamified version of synthesizer (wrapped with DSPy context)
+        with get_context():
+            streamified = dspy.streamify(retriever.synthesizer)
         
         listener = HeritageStreamListener()
         
-        # Stream tokens
+        # Stream tokens (streamified retains context from creation)
         async for token in streamified(
             question=question,
             context="Sample context from retrieval",
diff --git a/backend/rag/main.py b/backend/rag/main.py
index e9321b0811..0d2060c1d9 100644
--- a/backend/rag/main.py
+++ b/backend/rag/main.py
@@ -1532,13 +1532,49 @@ async def dspy_query(request: DSPyQueryRequest) -> DSPyQueryResponse:
         pipeline = HeritageRAGPipeline(retriever=qdrant_retriever)
         
         # Execute query with conversation history
-        result = pipeline.forward(
-            embedding_model=request.embedding_model,
-            question=request.question,
-            language=request.language,
-            history=history,
-            include_viz=request.include_visualization,
-        )
+        # Retry logic for transient API errors (e.g., Anthropic "Overloaded" errors)
+        max_retries = 3
+        last_error: Exception | None = None
+        result = None
+        
+        for attempt in range(max_retries):
+            try:
+                result = pipeline.forward(
+                    embedding_model=request.embedding_model,
+                    question=request.question,
+                    language=request.language,
+                    history=history,
+                    include_viz=request.include_visualization,
+                )
+                break  # Success, exit retry loop
+            except Exception as e:
+                last_error = e
+                error_str = str(e).lower()
+                # Check for retryable errors (API overload, rate limits, temporary failures)
+                is_retryable = any(keyword in error_str for keyword in [
+                    "overloaded", "rate_limit", "rate limit", "too many requests",
+                    "529", "503", "502", "504",  # HTTP status codes
+                    "temporarily unavailable", "service unavailable",
+                    "connection reset", "connection refused", "timeout"
+                ])
+                
+                if is_retryable and attempt < max_retries - 1:
+                    wait_time = 2 ** attempt  # Exponential backoff: 1s, 2s, 4s
+                    logger.warning(
+                        f"Transient API error (attempt {attempt + 1}/{max_retries}): {e}. "
+                        f"Retrying in {wait_time}s..."
+                    )
+                    time.sleep(wait_time)
+                    continue
+                else:
+                    # Non-retryable error or max retries reached
+                    raise
+        
+        # If we get here without a result (all retries exhausted), raise the last error
+        if result is None:
+            if last_error:
+                raise last_error
+            raise HTTPException(status_code=500, detail="Pipeline execution failed with no result")
         
         elapsed_ms = (time.time() - start_time) * 1000