From ca219340f28ffc7cd59f40f4a122d34d959be9b1 Mon Sep 17 00:00:00 2001
From: kempersc <sckemper@mailfence.com>
Date: Fri, 26 Dec 2025 14:30:31 +0100
Subject: [PATCH] enrich entries

---
 docs/GRAPH_SCORE_INHERITANCE.md               | 292 ++++++
 frontend/package.json                         |  10 +-
 .../schemas/20251121/linkml/manifest.json     |   2 +-
 .../components/query/ConversationPanel.css    | 103 +++
 .../components/query/ConversationPanel.tsx    |  35 +
 .../uml/CustodianTypeIndicator3D.tsx          |  36 +-
 .../src/lib/linkml/linkml-schema-service.ts   |  18 +
 frontend/src/lib/schema-custodian-mapping.ts  |  47 +-
 frontend/src/pages/LinkMLViewerPage.tsx       |  19 +-
 node_modules/.modules.yaml                    |   2 +-
 node_modules/.pnpm/lock.yaml                  |  10 +-
 .../node_modules/lucide-react/LICENSE         |  39 +
 .../node_modules/lucide-react/README.md       |  73 ++
 .../node_modules/lucide-react/dynamic.mjs     |  10 +
 .../lucide-react/dynamicIconImports.mjs       |   1 +
 .../node_modules/lucide-react/package.json    |  74 ++
 .../node_modules/react                        |   1 +
 node_modules/.pnpm/node_modules/lucide-react  |   2 +-
 pnpm-lock.yaml                                |  10 +-
 scripts/discover_custodian_websites.py        | 561 ++++++++++++
 scripts/discover_websites_crawl4ai.py         | 150 ++++
 scripts/enrich_custodian_logos_crawl4ai.py    |  17 +-
 scripts/index_persons_qdrant.py               |  75 +-
 src/glam_extractor/api/hybrid_retriever.py    | 846 ++++++++++++++++--
 24 files changed, 2304 insertions(+), 129 deletions(-)
 create mode 100644 docs/GRAPH_SCORE_INHERITANCE.md
 create mode 100644 node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/LICENSE
 create mode 100644 node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/README.md
 create mode 100644 node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamic.mjs
 create mode 100644 node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamicIconImports.mjs
 create mode 100644 node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/package.json
 create mode 120000 node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/react
 create mode 100644 scripts/discover_custodian_websites.py
 create mode 100644 scripts/discover_websites_crawl4ai.py

diff --git a/docs/GRAPH_SCORE_INHERITANCE.md b/docs/GRAPH_SCORE_INHERITANCE.md
new file mode 100644
index 0000000000..16c07dd934
--- /dev/null
+++ b/docs/GRAPH_SCORE_INHERITANCE.md
@@ -0,0 +1,292 @@
+# Graph Score Inheritance in Hybrid Retrieval
+
+## Overview
+
+The Heritage RAG system uses a **hybrid retrieval** approach that combines:
+1. **Vector search** (semantic similarity via embeddings)
+2. **Knowledge graph expansion** (SPARQL-based relationship discovery)
+
+This document explains the **graph score inheritance** feature that ensures vector search results benefit from knowledge graph relationships.
+
+## The Problem
+
+Before graph score inheritance, the hybrid retrieval had a scoring gap:
+
+| Result Source | Vector Score | Graph Score | Combined Score |
+|---------------|--------------|-------------|----------------|
+| Vector search results | 0.5-0.8 | **0.0** | 0.35-0.56 |
+| Graph expansion results | 0.0 | 0.5-0.8 | 0.15-0.24 |
+
+**Why this happened:**
+- Vector search finds institutions semantically similar to the query
+- Graph expansion finds **different** institutions (same city/type) with different GHCIDs
+- Since GHCIDs don't match, no direct merging occurs
+- Vector results always dominate because `combined = 0.7 * vector + 0.3 * graph`
+
+**Example before fix:**
+```
+Query: "Archieven in Amsterdam"
+
+1. Stadsarchief Amsterdam      | V:0.659 G:0.000 C:0.461
+2. Noord-Hollands Archief      | V:0.675 G:0.000 C:0.472
+3. The Black Archives          | V:0.636 G:0.000 C:0.445
+```
+
+The graph expansion was finding related institutions in Amsterdam, but that information wasn't reflected in the scores.
+
+## The Solution: Graph Score Inheritance
+
+Vector results now **inherit** graph scores from related institutions found via graph expansion.
+
+### How It Works
+
+```
+1. Vector Search
+   └── Returns: [Inst_A, Inst_B, Inst_C] with vector_scores
+   
+2. Graph Expansion (for top 5 vector results)
+   └── For Inst_A in Amsterdam:
+       └── SPARQL finds: [Inst_X, Inst_Y] also in Amsterdam
+       └── These get graph_score=0.8 (same_city)
+       └── They track: related_institutions=[Inst_A.ghcid]
+
+3. Inheritance Calculation
+   └── Inst_A inherits from [Inst_X, Inst_Y]:
+       inherited_score = avg([0.8, 0.8]) * 0.5 = 0.4
+   └── Inst_A.graph_score = max(0.0, 0.4) = 0.4
+
+4. Combined Scoring
+   └── Inst_A.combined = 0.7 * vector + 0.3 * 0.4 = higher rank!
+```
+
+### Inheritance Factor
+
+```python
+INHERITANCE_FACTOR = 0.5  # Inherit 50% of related institutions' graph scores
+```
+
+This means:
+- Same-city institutions (graph_score=0.8) → inherited score of **0.40**
+- Same-type institutions (graph_score=0.5) → inherited score of **0.25**
+
+## Implementation Details
+
+### File Location
+
+```
+/Users/kempersc/apps/glam/src/glam_extractor/api/hybrid_retriever.py
+```
+
+### Key Method: `_combine_and_rank()`
+
+Located at lines ~1539-1671, this method:
+
+1. **Creates lookup by GHCID** for merging
+2. **Handles direct merges** when graph result GHCID matches vector result
+3. **Builds inheritance map** tracking which vector results each graph result was expanded from
+4. **Applies inheritance** calculating inherited scores for vector results
+5. **Computes combined scores** with the formula: `0.7 * vector + 0.3 * graph`
+
+### Code Structure
+
+```python
+def _combine_and_rank(
+    self,
+    vector_results: list[RetrievedInstitution],
+    graph_results: list[RetrievedInstitution],
+    k: int
+) -> list[RetrievedInstitution]:
+    """Combine vector and graph results with weighted scoring and graph inheritance."""
+    
+    # 1. Create lookup by GHCID
+    results_by_ghcid: dict[str, RetrievedInstitution] = {}
+    vector_ghcids = set()
+    
+    # 2. Add vector results
+    for inst in vector_results:
+        results_by_ghcid[inst.ghcid] = inst
+        vector_ghcids.add(inst.ghcid)
+    
+    # 3. Build inheritance map: vector_ghcid -> [(related_ghcid, graph_score, reason)]
+    inheritance_map: dict[str, list[tuple[str, float, str]]] = {g: [] for g in vector_ghcids}
+    
+    for inst in graph_results:
+        if inst.ghcid in results_by_ghcid:
+            # Direct merge
+            existing = results_by_ghcid[inst.ghcid]
+            existing.graph_score = max(existing.graph_score, inst.graph_score)
+        else:
+            # New from graph - track for inheritance
+            results_by_ghcid[inst.ghcid] = inst
+            for seed_ghcid in inst.related_institutions:
+                if seed_ghcid in inheritance_map:
+                    inheritance_map[seed_ghcid].append(
+                        (inst.ghcid, inst.graph_score, inst.expansion_reason)
+                    )
+    
+    # 4. Apply inheritance
+    INHERITANCE_FACTOR = 0.5
+    for vector_ghcid, related_list in inheritance_map.items():
+        if related_list:
+            inst = results_by_ghcid[vector_ghcid]
+            related_scores = [score for _, score, _ in related_list]
+            inherited_score = (sum(related_scores) / len(related_scores)) * INHERITANCE_FACTOR
+            inst.graph_score = max(inst.graph_score, inherited_score)
+    
+    # 5. Calculate combined scores
+    for inst in results_by_ghcid.values():
+        inst.combined_score = (
+            self.vector_weight * inst.vector_score +
+            self.graph_weight * inst.graph_score
+        )
+    
+    return sorted(results_by_ghcid.values(), key=lambda x: x.combined_score, reverse=True)[:k]
+```
+
+### Graph Expansion Scores
+
+The `_expand_via_graph()` method assigns these base scores:
+
+| Expansion Type | Graph Score | SPARQL Pattern |
+|----------------|-------------|----------------|
+| Same city | 0.8 | `?s schema:location ?loc . ?loc hc:cityCode ?cityCode` |
+| Same institution type | 0.5 | `?s hc:institutionType ?type` |
+
+## Results
+
+### Before (Graph Score = 0.0)
+
+```
+Query: "Welke musea zijn er in Utrecht?"
+
+1. Centraal Museum              | V:0.589 G:0.000 C:0.412
+2. Museum Speelklok             | V:0.591 G:0.000 C:0.414
+3. Universiteitsmuseum Utrecht  | V:0.641 G:0.000 C:0.449
+```
+
+### After (Graph Score Inherited)
+
+```
+Query: "Welke musea zijn er in Utrecht?"
+
+1. Universiteitsmuseum Utrecht  | V:0.641 G:0.400 C:0.569
+2. Museum Speelklok             | V:0.591 G:0.400 C:0.534
+3. Centraal Museum              | V:0.589 G:0.400 C:0.532
+```
+
+**Key improvements:**
+- Graph scores now **0.400** (inherited from same-city museums)
+- Combined scores **increased by ~25%** (0.412 → 0.532)
+- Ranking now considers **geographic relevance**
+
+### More Examples
+
+```
+Query: "Bibliotheken in Den Haag"
+
+1. Centrale Bibliotheek         | V:0.697 G:0.400 C:0.608
+2. Koninklijke Bibliotheek      | V:0.676 G:0.400 C:0.593
+3. Huis van het Boek            | V:0.630 G:0.400 C:0.561
+4. Bibliotheek Hoeksche Waard   | V:0.613 G:0.400 C:0.549
+5. Centrale Bibliotheek (other) | V:0.623 G:0.000 C:0.436  <- No inheritance (different city)
+```
+
+## Configuration
+
+### Weights (in `HybridRetriever.__init__`)
+
+```python
+self.vector_weight = 0.7  # Semantic similarity importance
+self.graph_weight = 0.3   # Knowledge graph importance
+```
+
+### Inheritance Factor
+
+```python
+INHERITANCE_FACTOR = 0.5  # In _combine_and_rank()
+```
+
+**Tuning considerations:**
+- Higher factor (0.6-0.8): Stronger influence from graph relationships
+- Lower factor (0.3-0.4): More conservative, vector similarity dominates
+- Current value (0.5): Balanced approach
+
+## Logging
+
+The implementation includes detailed logging for debugging:
+
+```python
+# INFO level (always visible)
+logger.info(f"Graph inheritance applied to {len(inheritance_boosts)} vector results: {ghcids}...")
+
+# DEBUG level (when LOG_LEVEL=DEBUG)
+logger.debug(f"Inheritance: {ghcid} graph_score: {old:.3f} -> {new:.3f} (from {n} related)")
+```
+
+**Check logs on production:**
+```bash
+ssh root@91.98.224.44 "journalctl -u glam-rag-api --since '5 minutes ago' | grep -i inheritance"
+```
+
+## API Response Structure
+
+The graph score is exposed in the API response:
+
+```json
+{
+  "retrieved_results": [
+    {
+      "ghcid": "NL-UT-UTR-M-CM",
+      "name": "Centraal Museum",
+      "scores": {
+        "vector": 0.589,
+        "graph": 0.400,    // <-- Now populated via inheritance
+        "combined": 0.532
+      },
+      "related_institutions": ["NL-UT-UTR-M-MS", "NL-UT-UTR-M-UMUU"]
+    }
+  ]
+}
+```
+
+## Deployment
+
+**File to deploy:**
+```bash
+scp /Users/kempersc/apps/glam/src/glam_extractor/api/hybrid_retriever.py \
+    root@91.98.224.44:/opt/glam-backend/rag/glam_extractor/api/
+```
+
+**Restart service:**
+```bash
+ssh root@91.98.224.44 "systemctl restart glam-rag-api"
+```
+
+**Verify:**
+```bash
+curl -s -X POST 'https://archief.support/api/rag/dspy/query' \
+  -H 'Content-Type: application/json' \
+  -d '{"question": "Musea in Rotterdam", "language": "nl"}' | \
+  python3 -c "import sys,json; r=json.load(sys.stdin)['retrieved_results']; print('\n'.join(f\"{x['name'][:30]:30} G:{x['scores']['graph']:.2f}\" for x in r[:5]))"
+```
+
+## Related Files
+
+| File | Purpose |
+|------|---------|
+| `hybrid_retriever.py` | Main implementation with `_combine_and_rank()` |
+| `dspy_heritage_rag.py` | RAG pipeline that calls `retriever.search()` |
+| `main.py` | FastAPI endpoints serving the RAG API |
+
+## Future Improvements
+
+1. **Dynamic inheritance factor**: Adjust based on query type (geographic vs. thematic)
+2. **Multi-hop expansion**: Inherit from institutions 2+ hops away
+3. **Weighted inheritance**: Weight by relationship type (same_city=0.8, same_type=0.5)
+4. **Negative inheritance**: Penalize results unrelated to graph findings
+
+---
+
+**Last Updated:** 2025-12-24  
+**Implemented:** 2025-12-23  
+**Status:** Production (archief.support)
diff --git a/frontend/package.json b/frontend/package.json
index 9b49371dea..5e85f74dbd 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -16,14 +16,14 @@
     "test:coverage": "vitest run --coverage"
   },
   "dependencies": {
-    "@glam/api-client": "workspace:*",
-    "@glam/hooks": "workspace:*",
-    "@glam/theme": "workspace:*",
-    "@glam/ui": "workspace:*",
     "@codemirror/lang-javascript": "^6.2.4",
     "@duckdb/duckdb-wasm": "^1.31.0",
     "@emotion/react": "^11.14.0",
     "@emotion/styled": "^11.14.1",
+    "@glam/api-client": "workspace:*",
+    "@glam/hooks": "workspace:*",
+    "@glam/theme": "workspace:*",
+    "@glam/ui": "workspace:*",
     "@mui/icons-material": "^7.3.6",
     "@mui/material": "^7.3.5",
     "@tanstack/react-query": "^5.90.10",
@@ -45,7 +45,7 @@
     "fdir": "^6.5.0",
     "js-yaml": "^4.1.1",
     "lodash": "^4.17.21",
-    "lucide-react": "^0.561.0",
+    "lucide-react": "^0.562.0",
     "maplibre-gl": "^5.14.0",
     "mermaid": "^11.4.0",
     "n3": "^1.26.0",
diff --git a/frontend/public/schemas/20251121/linkml/manifest.json b/frontend/public/schemas/20251121/linkml/manifest.json
index a8ea872d2f..7495688a84 100644
--- a/frontend/public/schemas/20251121/linkml/manifest.json
+++ b/frontend/public/schemas/20251121/linkml/manifest.json
@@ -1,5 +1,5 @@
 {
-  "generated": "2025-12-23T16:58:31.474Z",
+  "generated": "2025-12-25T12:42:29.931Z",
   "version": "1.0.0",
   "categories": [
     {
diff --git a/frontend/src/components/query/ConversationPanel.css b/frontend/src/components/query/ConversationPanel.css
index b5eec5700e..2cf41fc5c6 100644
--- a/frontend/src/components/query/ConversationPanel.css
+++ b/frontend/src/components/query/ConversationPanel.css
@@ -872,3 +872,106 @@
     padding: 0.125rem 0.375rem;
   }
 }
+
+/* ==========================================================================
+   Chain-of-Thought Reasoning Display (GLM 4.7 Interleaved Thinking)
+   ========================================================================== */
+
+.conversation-panel__reasoning {
+  margin: 0.75rem 0;
+  border: 1px solid var(--border-color, #e0e0e0);
+  border-radius: 0.5rem;
+  overflow: hidden;
+  background: var(--surface-secondary, #fafafa);
+}
+
+.conversation-panel__reasoning-toggle {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  padding: 0.5rem 0.75rem;
+  background: var(--surface-secondary, #f5f5f5);
+  cursor: pointer;
+  font-size: 0.8125rem;
+  color: var(--text-secondary, #757575);
+  border: none;
+  width: 100%;
+  transition: background-color 0.2s;
+  list-style: none; /* Remove default marker */
+}
+
+.conversation-panel__reasoning-toggle::-webkit-details-marker {
+  display: none; /* Hide default arrow in WebKit browsers */
+}
+
+.conversation-panel__reasoning-toggle::before {
+  content: '▶';
+  font-size: 0.625rem;
+  transition: transform 0.2s;
+}
+
+.conversation-panel__reasoning[open] .conversation-panel__reasoning-toggle::before {
+  transform: rotate(90deg);
+}
+
+.conversation-panel__reasoning-toggle:hover {
+  background: var(--surface-tertiary, #eeeeee);
+}
+
+.conversation-panel__reasoning-toggle svg {
+  color: var(--primary-color, #1976d2);
+  flex-shrink: 0;
+}
+
+.conversation-panel__reasoning-content {
+  padding: 0.75rem;
+  background: var(--surface-code, #1e1e1e);
+  max-height: 300px;
+  overflow-y: auto;
+  border-top: 1px solid var(--border-color, #e0e0e0);
+}
+
+.conversation-panel__reasoning-content pre {
+  margin: 0;
+  white-space: pre-wrap;
+  word-break: break-word;
+  font-size: 0.75rem;
+  line-height: 1.5;
+  color: var(--text-code, #d4d4d4);
+  font-family: 'Fira Code', 'Monaco', 'Consolas', monospace;
+}
+
+/* Scrollbar styling for reasoning content */
+.conversation-panel__reasoning-content::-webkit-scrollbar {
+  width: 6px;
+}
+
+.conversation-panel__reasoning-content::-webkit-scrollbar-track {
+  background: #2d2d2d;
+}
+
+.conversation-panel__reasoning-content::-webkit-scrollbar-thumb {
+  background: #555;
+  border-radius: 3px;
+}
+
+.conversation-panel__reasoning-content::-webkit-scrollbar-thumb:hover {
+  background: #777;
+}
+
+/* Responsive: Reasoning section */
+@media (max-width: 768px) {
+  .conversation-panel__reasoning-toggle {
+    font-size: 0.75rem;
+    padding: 0.375rem 0.5rem;
+  }
+  
+  .conversation-panel__reasoning-content {
+    max-height: 200px;
+    padding: 0.5rem;
+  }
+  
+  .conversation-panel__reasoning-content pre {
+    font-size: 0.6875rem;
+  }
+}
diff --git a/frontend/src/components/query/ConversationPanel.tsx b/frontend/src/components/query/ConversationPanel.tsx
index 1c25db7763..c012bedae9 100644
--- a/frontend/src/components/query/ConversationPanel.tsx
+++ b/frontend/src/components/query/ConversationPanel.tsx
@@ -164,6 +164,9 @@ const TEXT = {
   sourcesUsed: { nl: 'Bronnen gebruikt', en: 'Sources used' },
   llmProvider: { nl: 'Model', en: 'Model' },
   answer: { nl: 'Antwoord', en: 'Answer' },
+  showReasoning: { nl: 'Toon redenering', en: 'Show reasoning' },
+  hideReasoning: { nl: 'Verberg redenering', en: 'Hide reasoning' },
+  reasoningTitle: { nl: 'Denkproces', en: 'Thinking Process' },
 };
 
 // Example questions to help users get started - shorter list
@@ -180,6 +183,21 @@ const EXAMPLE_QUESTIONS = {
   ],
 };
 
+// LLM Response Metadata - matches backend LLMResponseMetadata model
+interface LLMResponseMetadata {
+  content?: string;
+  reasoning_content?: string;  // GLM 4.7 chain-of-thought reasoning
+  model?: string;
+  provider?: string;  // zai, anthropic, openai, groq
+  prompt_tokens?: number;
+  completion_tokens?: number;
+  total_tokens?: number;
+  thinking_mode?: string;  // enabled, disabled, interleaved
+  latency_ms?: number;
+  cached?: boolean;
+  finish_reason?: string;
+}
+
 interface Message {
   id: string;
   role: 'user' | 'assistant';
@@ -192,6 +210,7 @@ interface Message {
   error?: string;
   errorCode?: string;
   llmProviderUsed?: string;  // Which LLM provider generated this response
+  llmResponse?: LLMResponseMetadata;  // Full LLM response metadata including chain-of-thought
 }
 
 interface HistoryItem {
@@ -351,6 +370,7 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
     answer: string; 
     sourcesUsed: string[];
     llmProviderUsed?: string;
+    llmResponse?: LLMResponseMetadata;  // Full LLM response with reasoning_content
   }> => {
     // Determine API endpoint based on environment
     const hostname = window.location.hostname;
@@ -395,6 +415,7 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
       answer: data.answer || data.explanation || '',
       sourcesUsed: data.sources_used || selectedSources,
       llmProviderUsed: data.llm_provider_used,
+      llmResponse: data.llm_response,  // Pass through chain-of-thought metadata
     };
   };
   
@@ -445,6 +466,7 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
               sparql: result.sparql,
               sourcesUsed: result.sourcesUsed,
               llmProviderUsed: result.llmProviderUsed,
+              llmResponse: result.llmResponse,
               isLoading: false,
             }
           : msg
@@ -928,6 +950,19 @@ export const ConversationPanel: React.FC<ConversationPanelProps> = ({ onQueryGen
                     <>
                       <p>{message.content}</p>
                       
+                      {/* Chain-of-Thought Reasoning (GLM 4.7 Interleaved Thinking) */}
+                      {message.llmResponse?.reasoning_content && (
+                        <details className="conversation-panel__reasoning">
+                          <summary className="conversation-panel__reasoning-toggle">
+                            <Sparkles size={14} />
+                            <span>{t('showReasoning')}</span>
+                          </summary>
+                          <div className="conversation-panel__reasoning-content">
+                            <pre>{message.llmResponse.reasoning_content}</pre>
+                          </div>
+                        </details>
+                      )}
+                      
                       {/* Sources Used Badges */}
                       {message.sourcesUsed && message.sourcesUsed.length > 0 && (
                         <div className="conversation-panel__sources-used">
diff --git a/frontend/src/components/uml/CustodianTypeIndicator3D.tsx b/frontend/src/components/uml/CustodianTypeIndicator3D.tsx
index 5707191f59..e44ebb1e4d 100644
--- a/frontend/src/components/uml/CustodianTypeIndicator3D.tsx
+++ b/frontend/src/components/uml/CustodianTypeIndicator3D.tsx
@@ -196,9 +196,9 @@ function createGLAMPolyhedronGeometry(radius: number = 1): THREE.BufferGeometry
  * Creates text sprites positioned at the center of each icosahedron face.
  * 
  * Label visibility behavior:
- * - Relevant types: Always visible (opacity 1) - these are the types passed in highlightTypes
- * - Non-relevant types: Only visible when expanded (opacity 0 when collapsed)
- * - If highlightTypes is empty (universal), all labels are shown when expanded only
+ * - Empty array (no annotation): NO labels shown at all (blank cube)
+ * - 19+ types (universal): All labels shown when expanded only
+ * - Specific types (1-18): Only those letters shown (always visible)
  */
 function createFaceLabels(
   geometry: THREE.BufferGeometry,
@@ -209,14 +209,16 @@ function createFaceLabels(
   const positions = geometry.getAttribute('position');
   const faceCount = positions.count / 3;
   const highlightSet = new Set(highlightTypes);
-  const isUniversal = highlightTypes.length === 0 || highlightTypes.length >= 19;
+  const hasNoAnnotation = highlightTypes.length === 0;
+  const isUniversal = highlightTypes.length >= 19;
   
   for (let faceIndex = 0; faceIndex < Math.min(faceCount, 20); faceIndex++) {
     const typeIndex = faceIndex % 19;
     const typeCode = CUSTODIAN_TYPE_CODES[typeIndex];
     
     // Determine if this type is relevant (highlighted)
-    const isRelevant = highlightTypes.length === 0 || highlightSet.has(typeCode);
+    // Empty array = no annotation = nothing is relevant
+    const isRelevant = !hasNoAnnotation && (isUniversal || highlightSet.has(typeCode));
     
     // Calculate face center (average of 3 vertices)
     const v0 = new THREE.Vector3(
@@ -559,7 +561,7 @@ export const CustodianTypeIndicator3D: React.FC<CustodianTypeIndicator3DProps> =
   
   // Tooltip text
   const tooltipText = useMemo(() => {
-    if (types.length === 0) return 'Heritage Custodian Types (GLAMORCUBESFIXPHDNT)';
+    if (types.length === 0) return 'No custodian types';
     return types
       .map(code => getCustodianTypeByCode(code)?.label[language] ?? code)
       .join(', ');
@@ -667,23 +669,27 @@ export const CustodianTypeIndicator3D: React.FC<CustodianTypeIndicator3DProps> =
     
     // Update label visibility based on expanded state and highlighted types
     // Label visibility rules:
-    // - Non-universal elements (1-18 types): Show relevant letters only (both collapsed and expanded)
-    // - Universal elements (19 types or empty): Show all letters only when expanded
+    // - No annotation (empty array, length 0): Show NO letters (blank cube)
+    // - Universal annotation (19+ types): Show all letters only when expanded
+    // - Specific types (1-18 types): Show ONLY those letters (both collapsed and expanded)
     if (labelsGroupRef.current) {
       const highlightSet = new Set(types);
-      const isUniversal = types.length === 0 || types.length >= 19;
+      const hasNoAnnotation = types.length === 0;
+      const isUniversal = types.length >= 19;
       
       labelsGroupRef.current.children.forEach((child) => {
         if (child instanceof THREE.Sprite && child.userData.typeCode) {
           const typeCode = child.userData.typeCode as CustodianTypeCode;
-          const isRelevant = types.length === 0 || highlightSet.has(typeCode);
+          const isRelevant = highlightSet.has(typeCode);
           
-          if (isUniversal) {
-            // Universal elements: Show all letters only when expanded
+          if (hasNoAnnotation) {
+            // No annotation: Show NO letters at all (blank cube)
+            child.material.opacity = 0;
+          } else if (isUniversal) {
+            // Universal annotation (19+ types): Show all letters only when expanded
             child.material.opacity = isExpanded ? 1 : 0;
           } else {
-            // Non-universal elements: Show ONLY relevant letters (hidden otherwise)
-            // When expanded, relevant letters get full opacity
+            // Specific types (1-18): Show ONLY relevant letters (hidden otherwise)
             if (isRelevant) {
               child.material.opacity = 1; // Relevant letters always visible
             } else {
@@ -1172,7 +1178,7 @@ export const CustodianTypeIndicator3DFallback: React.FC<CustodianTypeIndicator3D
   const color = config?.color ?? '#888888';
   
   const tooltipText = useMemo(() => {
-    if (types.length === 0) return 'Heritage Custodian Types';
+    if (types.length === 0) return 'No custodian types';
     return types
       .map(code => getCustodianTypeByCode(code)?.label[language] ?? code)
       .join(', ');
diff --git a/frontend/src/lib/linkml/linkml-schema-service.ts b/frontend/src/lib/linkml/linkml-schema-service.ts
index bfc8947e42..9d17b76c44 100644
--- a/frontend/src/lib/linkml/linkml-schema-service.ts
+++ b/frontend/src/lib/linkml/linkml-schema-service.ts
@@ -1071,6 +1071,24 @@ class LinkMLSchemaService {
     return this.parseCustodianTypesAnnotation(slot.annotations.custodian_types);
   }
 
+  /**
+   * Get all classes that use a given slot
+   * Returns array of class names that have this slot in their slots array
+   */
+  async getClassesUsingSlot(slotName: string): Promise<string[]> {
+    await this.initialize();
+    const classes: string[] = [];
+    
+    for (const [className, schema] of this.classSchemas.entries()) {
+      const classDef = schema.classes?.[className];
+      if (classDef?.slots?.includes(slotName)) {
+        classes.push(className);
+      }
+    }
+    
+    return classes;
+  }
+
   /**
    * Get custodian_types annotation from an enum definition
    * Returns null if annotation not found
diff --git a/frontend/src/lib/schema-custodian-mapping.ts b/frontend/src/lib/schema-custodian-mapping.ts
index 395bf4533f..5ef49f41a0 100644
--- a/frontend/src/lib/schema-custodian-mapping.ts
+++ b/frontend/src/lib/schema-custodian-mapping.ts
@@ -260,7 +260,11 @@ function validateCustodianTypes(types: string[]): CustodianTypeCode[] {
  * Priority:
  * 1. Read from LinkML schema annotations (custodian_types)
  * 2. Fall back to static mapping
- * 3. Default to all types (universal)
+ * 3. Default to EMPTY ARRAY (no types assigned) - cube will show no letters
+ * 
+ * NOTE: We return [] instead of DEFAULT_CUSTODIAN_TYPES when no annotation exists
+ * because classes without explicit custodian_types annotations should NOT display
+ * all 19 letters on the cube. Only classes with explicit annotations should show letters.
  */
 export async function getCustodianTypesForClassAsync(className: string): Promise<CustodianTypeCode[]> {
   try {
@@ -276,15 +280,23 @@ export async function getCustodianTypesForClassAsync(className: string): Promise
     console.warn(`[CustodianMapping] Error reading annotations for class ${className}:`, error);
   }
   
-  // Fall back to static mapping
-  return CLASS_TO_CUSTODIAN_TYPE[className] || DEFAULT_CUSTODIAN_TYPES;
+  // Fall back to static mapping, or empty array if no mapping exists
+  // Empty array means "no custodian types assigned" - cube will show no letters
+  return CLASS_TO_CUSTODIAN_TYPE[className] || [];
 }
 
 /**
  * Get custodian types for a schema slot (async version)
+ * 
+ * Priority:
+ * 1. Read from slot's own LinkML schema annotations (custodian_types)
+ * 2. Inherit from parent class(es) that use this slot
+ * 3. Fall back to static mapping
+ * 4. Return empty array (no types assigned - cube shows no letters)
  */
 export async function getCustodianTypesForSlotAsync(slotName: string): Promise<CustodianTypeCode[]> {
   try {
+    // 1. Try slot's own annotation first
     const annotationTypes = await linkmlSchemaService.getSlotCustodianTypes(slotName);
     if (annotationTypes && annotationTypes.length > 0) {
       const validated = validateCustodianTypes(annotationTypes);
@@ -292,15 +304,38 @@ export async function getCustodianTypesForSlotAsync(slotName: string): Promise<C
         return validated;
       }
     }
+    
+    // 2. Try to inherit from parent class(es) that use this slot
+    const parentClasses = await linkmlSchemaService.getClassesUsingSlot(slotName);
+    if (parentClasses.length > 0) {
+      const inheritedTypes = new Set<CustodianTypeCode>();
+      for (const className of parentClasses) {
+        const classTypes = await linkmlSchemaService.getClassCustodianTypes(className);
+        if (classTypes && classTypes.length > 0) {
+          const validated = validateCustodianTypes(classTypes);
+          validated.forEach(t => inheritedTypes.add(t));
+        }
+      }
+      if (inheritedTypes.size > 0) {
+        return Array.from(inheritedTypes);
+      }
+    }
   } catch (error) {
     console.warn(`[CustodianMapping] Error reading annotations for slot ${slotName}:`, error);
   }
   
-  return SLOT_TO_CUSTODIAN_TYPE[slotName] || DEFAULT_CUSTODIAN_TYPES;
+  // 3. Fall back to static mapping, or empty array if no mapping exists
+  // Empty array means "no custodian types assigned" - cube will show no letters
+  return SLOT_TO_CUSTODIAN_TYPE[slotName] || [];
 }
 
 /**
  * Get custodian types for a schema enum (async version)
+ * 
+ * Priority:
+ * 1. Read from enum's LinkML schema annotations (custodian_types)
+ * 2. Fall back to static mapping
+ * 3. Return empty array (no types assigned - cube shows no letters)
  */
 export async function getCustodianTypesForEnumAsync(enumName: string): Promise<CustodianTypeCode[]> {
   try {
@@ -315,5 +350,7 @@ export async function getCustodianTypesForEnumAsync(enumName: string): Promise<C
     console.warn(`[CustodianMapping] Error reading annotations for enum ${enumName}:`, error);
   }
   
-  return ENUM_TO_CUSTODIAN_TYPE[enumName] || DEFAULT_CUSTODIAN_TYPES;
+  // Fall back to static mapping, or empty array if no mapping exists
+  // Empty array means "no custodian types assigned" - cube will show no letters
+  return ENUM_TO_CUSTODIAN_TYPE[enumName] || [];
 }
diff --git a/frontend/src/pages/LinkMLViewerPage.tsx b/frontend/src/pages/LinkMLViewerPage.tsx
index 7c61d0fceb..d8e6ce5afc 100644
--- a/frontend/src/pages/LinkMLViewerPage.tsx
+++ b/frontend/src/pages/LinkMLViewerPage.tsx
@@ -732,7 +732,7 @@ const LinkMLViewerPage: React.FC = () => {
   const mainContentRef = useRef<HTMLElement>(null);
   
   // Schema loading progress tracking
-  const { progress: schemaProgress, isLoading: isSchemaServiceLoading } = useSchemaLoadingProgress();
+  const { progress: schemaProgress, isLoading: isSchemaServiceLoading, isComplete: isSchemaServiceComplete } = useSchemaLoadingProgress();
   
   // Handler for filtering by custodian type (clicking polyhedron face or legend item)
   // Multi-select toggle behavior: clicking type adds/removes from set
@@ -881,17 +881,32 @@ const LinkMLViewerPage: React.FC = () => {
 
   // Load custodian types from schema annotations when schema changes
   // This pre-loads types asynchronously so they're available for rendering
+  // IMPORTANT: Wait for schema service to complete loading before fetching custodian types
+  // to avoid race condition where annotations aren't available yet
   useEffect(() => {
     if (!schema) {
       setCustodianTypesLoaded(false);
       return;
     }
     
+    // Don't load custodian types until schema service has finished loading all class files
+    // This prevents the race condition where we try to read annotations before they're loaded
+    if (!isSchemaServiceComplete) {
+      console.log('[LinkMLViewerPage] Waiting for schema service to complete before loading custodian types...');
+      return;
+    }
+    
     const loadCustodianTypes = async () => {
       const classes = extractClasses(schema);
       const slots = extractSlots(schema);
       const enums = extractEnums(schema);
       
+      console.log('[LinkMLViewerPage] Schema service complete, loading custodian types for', {
+        classes: classes.length,
+        slots: slots.length,
+        enums: enums.length
+      });
+      
       // Load types for all classes in parallel
       const classTypesPromises = classes.map(async (cls) => {
         const types = await getCustodianTypesForClassAsync(cls.name);
@@ -951,7 +966,7 @@ const LinkMLViewerPage: React.FC = () => {
     };
     
     loadCustodianTypes();
-  }, [schema]);
+  }, [schema, isSchemaServiceComplete]);
 
   const toggleSection = (section: string) => {
     setExpandedSections(prev => {
diff --git a/node_modules/.modules.yaml b/node_modules/.modules.yaml
index 72fe6ed40d..c2b2597015 100644
--- a/node_modules/.modules.yaml
+++ b/node_modules/.modules.yaml
@@ -987,7 +987,7 @@ hoistedDependencies:
     loose-envify: private
   lru-cache@11.2.4:
     lru-cache: private
-  lucide-react@0.561.0(react@19.2.3):
+  lucide-react@0.562.0(react@19.2.3):
     lucide-react: private
   lz-string@1.5.0:
     lz-string: private
diff --git a/node_modules/.pnpm/lock.yaml b/node_modules/.pnpm/lock.yaml
index d9bdb1092f..d74fd5f526 100644
--- a/node_modules/.pnpm/lock.yaml
+++ b/node_modules/.pnpm/lock.yaml
@@ -169,8 +169,8 @@ importers:
         specifier: ^4.17.21
         version: 4.17.21
       lucide-react:
-        specifier: ^0.561.0
-        version: 0.561.0(react@19.2.3)
+        specifier: ^0.562.0
+        version: 0.562.0(react@19.2.3)
       maplibre-gl:
         specifier: ^5.14.0
         version: 5.15.0
@@ -2507,8 +2507,8 @@ packages:
   lru-cache@5.1.1:
     resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
 
-  lucide-react@0.561.0:
-    resolution: {integrity: sha512-Y59gMY38tl4/i0qewcqohPdEbieBy7SovpBL9IFebhc2mDd8x4PZSOsiFRkpPcOq6bj1r/mjH/Rk73gSlIJP2A==}
+  lucide-react@0.562.0:
+    resolution: {integrity: sha512-82hOAu7y0dbVuFfmO4bYF1XEwYk/mEbM5E+b1jgci/udUBEE/R7LF5Ip0CCEmXe8AybRM8L+04eP+LGZeDvkiw==}
     peerDependencies:
       react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0
 
@@ -5660,7 +5660,7 @@ snapshots:
     dependencies:
       yallist: 3.1.1
 
-  lucide-react@0.561.0(react@19.2.3):
+  lucide-react@0.562.0(react@19.2.3):
     dependencies:
       react: 19.2.3
 
diff --git a/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/LICENSE b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/LICENSE
new file mode 100644
index 0000000000..46e6962181
--- /dev/null
+++ b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/LICENSE
@@ -0,0 +1,39 @@
+ISC License
+
+Copyright (c) for portions of Lucide are held by Cole Bemis 2013-2023 as part of Feather (MIT). All other copyright (c) for Lucide are held by Lucide Contributors 2025.
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+---
+
+The MIT License (MIT) (for portions derived from Feather)
+
+Copyright (c) 2013-2023 Cole Bemis
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/README.md b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/README.md
new file mode 100644
index 0000000000..8d02efe968
--- /dev/null
+++ b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/README.md
@@ -0,0 +1,73 @@
+<p align="center">
+  <a href="https://github.com/lucide-icons/lucide">
+    <img src="https://lucide.dev/package-logos/lucide-react.svg" alt="Lucide icon library for React applications." width="540">
+  </a>
+</p>
+
+<p align="center">
+Lucide icon library for React applications.
+</p>
+
+<div align="center">
+
+  [![npm](https://img.shields.io/npm/v/lucide-react?color=blue)](https://www.npmjs.com/package/lucide-react)
+  ![NPM Downloads](https://img.shields.io/npm/dw/lucide-react)
+  [![GitHub](https://img.shields.io/github/license/lucide-icons/lucide)](https://lucide.dev/license)
+</div>
+
+<p align="center">
+  <a href="https://lucide.dev/guide/">About</a>
+  ·
+  <a href="https://lucide.dev/icons/">Icons</a>
+  ·
+  <a href="https://lucide.dev/guide/packages/lucide-react">Documentation</a>
+  ·
+  <a href="https://lucide.dev/license">License</a>
+</p>
+
+# Lucide React
+
+Implementation of the lucide icon library for React applications.
+
+## Installation
+
+```sh
+pnpm add lucide-react
+```
+
+```sh
+npm install lucide-react
+```
+
+```sh
+yarn add lucide-react
+```
+
+```sh
+bun add lucide-react
+```
+
+## Documentation
+
+For full documentation, visit [lucide.dev](https://lucide.dev/guide/packages/lucide-react)
+
+## Community
+
+Join the [Discord server](https://discord.gg/EH6nSts) to chat with the maintainers and other users.
+
+## License
+
+Lucide is licensed under the ISC license. See [LICENSE](https://lucide.dev/license).
+
+## Sponsors
+
+<a href="https://vercel.com?utm_source=lucide&utm_campaign=oss">
+  <img src="https://lucide.dev/vercel.svg" alt="Powered by Vercel" width="200" />
+</a>
+
+<a href="https://www.digitalocean.com/?refcode=b0877a2caebd&utm_campaign=Referral_Invite&utm_medium=Referral_Program&utm_source=badge"><img src="https://lucide.dev/digitalocean.svg" width="200" alt="DigitalOcean Referral Badge" /></a>
+
+### Awesome backers 🍺
+
+<a href="https://www.scipress.io?utm_source=lucide"><img src="https://lucide.dev/sponsors/scipress.svg" width="180" alt="Scipress sponsor badge" /></a>
+<a href="https://github.com/pdfme/pdfme"><img src="https://lucide.dev/sponsors/pdfme.svg" width="180" alt="pdfme sponsor badge" /></a>
diff --git a/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamic.mjs b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamic.mjs
new file mode 100644
index 0000000000..29e1076def
--- /dev/null
+++ b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamic.mjs
@@ -0,0 +1,10 @@
+/**
+ * @license lucide-react v0.562.0 - ISC
+ *
+ * This source code is licensed under the ISC license.
+ * See the LICENSE file in the root directory of this source tree.
+ */
+
+export { default as DynamicIcon, iconNames } from './dist/esm/DynamicIcon.js';
+export { default as dynamicIconImports } from './dist/esm/dynamicIconImports.js';
+//# sourceMappingURL=dynamic.mjs.map
diff --git a/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamicIconImports.mjs b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamicIconImports.mjs
new file mode 100644
index 0000000000..7a725d5b50
--- /dev/null
+++ b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/dynamicIconImports.mjs
@@ -0,0 +1 @@
+export { default } from './dist/esm/dynamicIconImports.js';
diff --git a/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/package.json b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/package.json
new file mode 100644
index 0000000000..29e9a09b18
--- /dev/null
+++ b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react/package.json
@@ -0,0 +1,74 @@
+{
+  "name": "lucide-react",
+  "description": "A Lucide icon library package for React applications.",
+  "version": "0.562.0",
+  "license": "ISC",
+  "homepage": "https://lucide.dev",
+  "bugs": "https://github.com/lucide-icons/lucide/issues",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/lucide-icons/lucide.git",
+    "directory": "packages/lucide-react"
+  },
+  "keywords": [
+    "Lucide",
+    "React",
+    "Feather",
+    "Icons",
+    "Icon",
+    "SVG",
+    "Feather Icons",
+    "Fontawesome",
+    "Font Awesome"
+  ],
+  "author": "Eric Fennis",
+  "amdName": "lucide-react",
+  "main": "dist/cjs/lucide-react.js",
+  "main:umd": "dist/umd/lucide-react.js",
+  "module": "dist/esm/lucide-react.js",
+  "unpkg": "dist/umd/lucide-react.min.js",
+  "typings": "dist/lucide-react.d.ts",
+  "sideEffects": false,
+  "files": [
+    "dist",
+    "dynamic.mjs",
+    "dynamic.js.map",
+    "dynamic.d.ts",
+    "dynamicIconImports.mjs",
+    "dynamicIconImports.js.map",
+    "dynamicIconImports.d.ts"
+  ],
+  "devDependencies": {
+    "@testing-library/jest-dom": "^6.1.6",
+    "@testing-library/react": "^14.1.2",
+    "@types/react": "^18.2.37",
+    "@vitejs/plugin-react": "^4.4.1",
+    "jest-serializer-html": "^7.1.0",
+    "react": "18.2.0",
+    "react-dom": "18.2.0",
+    "rollup": "^4.53.3",
+    "rollup-plugin-dts": "^6.2.3",
+    "rollup-plugin-preserve-directives": "^0.4.0",
+    "typescript": "^5.8.3",
+    "vite": "^7.2.4",
+    "vitest": "^4.0.12",
+    "@lucide/shared": "1.0.0",
+    "@lucide/rollup-plugins": "1.0.0",
+    "@lucide/build-icons": "1.1.0"
+  },
+  "peerDependencies": {
+    "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+  },
+  "scripts": {
+    "build": "pnpm clean && pnpm copy:license && pnpm build:icons && pnpm typecheck && pnpm build:bundles",
+    "copy:license": "cp ../../LICENSE ./LICENSE",
+    "clean": "rm -rf dist && rm -rf stats && rm -rf ./src/icons/*.ts && rm -f dynamic.* && rm -f dynamicIconImports.d.ts",
+    "build:icons": "build-icons --output=./src --templateSrc=./scripts/exportTemplate.mts --renderUniqueKey --withAliases --withDynamicImports --separateAliasesFile --separateAliasesFileIgnore=fingerprint --aliasesFileExtension=.ts --iconFileExtension=.ts --exportFileName=index.ts",
+    "build:bundles": "rollup -c ./rollup.config.mjs",
+    "typecheck": "tsc",
+    "typecheck:watch": "tsc -w",
+    "test": "pnpm build:icons && vitest run",
+    "test:watch": "vitest watch",
+    "version": "pnpm version --git-tag-version=false"
+  }
+}
\ No newline at end of file
diff --git a/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/react b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/react
new file mode 120000
index 0000000000..01bc808511
--- /dev/null
+++ b/node_modules/.pnpm/lucide-react@0.562.0_react@19.2.3/node_modules/react
@@ -0,0 +1 @@
+../../react@19.2.3/node_modules/react
\ No newline at end of file
diff --git a/node_modules/.pnpm/node_modules/lucide-react b/node_modules/.pnpm/node_modules/lucide-react
index 9ba6205c2d..311e1120c7 120000
--- a/node_modules/.pnpm/node_modules/lucide-react
+++ b/node_modules/.pnpm/node_modules/lucide-react
@@ -1 +1 @@
-../lucide-react@0.561.0_react@19.2.3/node_modules/lucide-react
\ No newline at end of file
+../lucide-react@0.562.0_react@19.2.3/node_modules/lucide-react
\ No newline at end of file
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index d9bdb1092f..d74fd5f526 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -169,8 +169,8 @@ importers:
         specifier: ^4.17.21
         version: 4.17.21
       lucide-react:
-        specifier: ^0.561.0
-        version: 0.561.0(react@19.2.3)
+        specifier: ^0.562.0
+        version: 0.562.0(react@19.2.3)
       maplibre-gl:
         specifier: ^5.14.0
         version: 5.15.0
@@ -2507,8 +2507,8 @@ packages:
   lru-cache@5.1.1:
     resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
 
-  lucide-react@0.561.0:
-    resolution: {integrity: sha512-Y59gMY38tl4/i0qewcqohPdEbieBy7SovpBL9IFebhc2mDd8x4PZSOsiFRkpPcOq6bj1r/mjH/Rk73gSlIJP2A==}
+  lucide-react@0.562.0:
+    resolution: {integrity: sha512-82hOAu7y0dbVuFfmO4bYF1XEwYk/mEbM5E+b1jgci/udUBEE/R7LF5Ip0CCEmXe8AybRM8L+04eP+LGZeDvkiw==}
     peerDependencies:
       react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0
 
@@ -5660,7 +5660,7 @@ snapshots:
     dependencies:
       yallist: 3.1.1
 
-  lucide-react@0.561.0(react@19.2.3):
+  lucide-react@0.562.0(react@19.2.3):
     dependencies:
       react: 19.2.3
 
diff --git a/scripts/discover_custodian_websites.py b/scripts/discover_custodian_websites.py
new file mode 100644
index 0000000000..a4a69ce03b
--- /dev/null
+++ b/scripts/discover_custodian_websites.py
@@ -0,0 +1,561 @@
+#!/usr/bin/env python3
+"""
+Discover website URLs for custodian YAML files that are missing them.
+
+This script uses web search (via DuckDuckGo or Google) to find official websites
+for heritage institutions based on their name and location.
+
+Search strategy:
+1. Search for institution name + city + country
+2. Search for institution name + "official website"
+3. Search for institution name + institution type (museum, library, archive)
+
+Output:
+- Updates custodian YAML files with discovered website URLs
+- Stores provenance for discovered URLs
+
+Usage:
+    python scripts/discover_custodian_websites.py [options]
+
+Options:
+    --dry-run       Show what would be discovered without modifying files
+    --limit N       Process only first N files (for testing)
+    --file PATH     Process a single specific file
+    --country CODE  Filter by country code (e.g., JP, CZ)
+    --resume        Resume from last checkpoint
+
+Requirements:
+    pip install duckduckgo-search pyyaml httpx
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+import re
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from urllib.parse import urlparse
+
+import yaml
+
+try:
+    from duckduckgo_search import DDGS
+except ImportError:
+    print("Please install duckduckgo-search: pip install duckduckgo-search")
+    sys.exit(1)
+
+try:
+    import httpx
+except ImportError:
+    print("Please install httpx: pip install httpx")
+    sys.exit(1)
+
+# Logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Configuration
+CUSTODIAN_DIR = Path(__file__).parent.parent / "data" / "custodian"
+CHECKPOINT_FILE = CUSTODIAN_DIR / ".website_discovery_checkpoint.json"
+REQUEST_DELAY = 3.0  # seconds between searches (be nice to search engines)
+
+# Domain blacklist (not actual institution websites)
+DOMAIN_BLACKLIST = {
+    'wikipedia.org', 'wikidata.org', 'wikimedia.org',
+    'facebook.com', 'twitter.com', 'instagram.com', 'linkedin.com',
+    'youtube.com', 'tiktok.com', 'pinterest.com',
+    'tripadvisor.com', 'tripadvisor.jp', 'yelp.com',
+    'google.com', 'google.co.jp', 'maps.google.com',
+    'amazon.com', 'amazon.co.jp', 'ebay.com',
+    'booking.com', 'expedia.com', 'hotels.com',
+    'foursquare.com', 'bing.com', 'yahoo.com',
+    'findagrave.com', 'ancestry.com', 'familysearch.org',
+    'academia.edu', 'researchgate.net',
+    'timeanddate.com', 'weather.com',
+}
+
+# Domain preferences (prefer these TLDs for official sites)
+PREFERRED_TLDS = {
+    'JP': ['.go.jp', '.lg.jp', '.ac.jp', '.or.jp', '.jp'],
+    'CZ': ['.cz', '.gov.cz'],
+    'NL': ['.nl', '.gov.nl'],
+    'BE': ['.be', '.gov.be'],
+    'DE': ['.de', '.gov.de'],
+    'AT': ['.at', '.gv.at'],
+    'CH': ['.ch', '.admin.ch'],
+}
+
+
+def get_custodian_name(entry: dict) -> str | None:
+    """Extract institution name from entry."""
+    # Priority 1: Emic name (native language official name)
+    if entry.get('custodian_name', {}).get('emic_name'):
+        return entry['custodian_name']['emic_name']
+    
+    # Priority 2: Wikidata native language label (ja, zh, ko, etc.)
+    wikidata = entry.get('wikidata_enrichment', {})
+    country = get_country_from_entry(entry)
+    
+    # Map country to preferred label language
+    country_lang_map = {
+        'JP': 'ja',
+        'CN': 'zh',
+        'KR': 'ko',
+        'TW': 'zh',
+        'TH': 'th',
+        'VN': 'vi',
+        'RU': 'ru',
+        'GR': 'el',
+        'IL': 'he',
+        'SA': 'ar',
+        'IR': 'fa',
+    }
+    
+    if country in country_lang_map:
+        lang = country_lang_map[country]
+        native_label = wikidata.get(f'wikidata_label_{lang}') or wikidata.get('wikidata_labels', {}).get(lang)
+        if native_label:
+            return native_label
+    
+    # Priority 3: Claim value
+    if entry.get('custodian_name', {}).get('claim_value'):
+        return entry['custodian_name']['claim_value']
+    
+    # Priority 4: Original entry name
+    if entry.get('original_entry', {}).get('name'):
+        return entry['original_entry']['name']
+    
+    # Priority 5: Organisatie (Dutch)
+    if entry.get('original_entry', {}).get('organisatie'):
+        return entry['original_entry']['organisatie']
+    
+    return None
+
+
+def get_country_from_entry(entry: dict) -> str | None:
+    """Extract country code from entry."""
+    # Check location.country
+    if entry.get('location', {}).get('country'):
+        return entry['location']['country']
+    
+    # Check original_entry.locations
+    if entry.get('original_entry', {}).get('locations'):
+        loc = entry['original_entry']['locations'][0]
+        if loc.get('country'):
+            return loc['country']
+    
+    return None
+
+
+def get_location_info(entry: dict) -> dict:
+    """Extract location information from entry."""
+    location = {}
+    
+    # Check original_entry.locations
+    if entry.get('original_entry', {}).get('locations'):
+        loc = entry['original_entry']['locations'][0]
+        location['city'] = loc.get('city')
+        location['region'] = loc.get('region')
+        location['country'] = loc.get('country')
+        location['street_address'] = loc.get('street_address')
+    
+    # Check original_entry directly
+    if not location.get('city'):
+        orig = entry.get('original_entry', {})
+        location['city'] = orig.get('city') or orig.get('plaats')
+        location['country'] = orig.get('country')
+    
+    return location
+
+
+def get_institution_type(entry: dict) -> str | None:
+    """Get institution type for search refinement."""
+    inst_type = entry.get('original_entry', {}).get('institution_type')
+    if inst_type:
+        type_map = {
+            'LIBRARY': 'library',
+            'MUSEUM': 'museum', 
+            'ARCHIVE': 'archive',
+            'GALLERY': 'gallery',
+            'RESEARCH_CENTER': 'research center',
+            'EDUCATION_PROVIDER': 'university',
+        }
+        return type_map.get(inst_type)
+    return None
+
+
+def has_website(entry: dict) -> bool:
+    """Check if entry already has a website."""
+    # Check various website fields
+    if entry.get('original_entry', {}).get('webadres_organisatie'):
+        return True
+    
+    # Check identifiers
+    for ident in entry.get('original_entry', {}).get('identifiers', []):
+        if ident.get('identifier_scheme') == 'Website':
+            return True
+    
+    # Check enrichment fields
+    if entry.get('website_discovery', {}).get('website_url'):
+        return True
+    if entry.get('wikidata_enrichment', {}).get('wikidata_official_website'):
+        return True
+    if entry.get('google_maps_enrichment', {}).get('website'):
+        return True
+    
+    return False
+
+
+def is_valid_website(url: str, country: str | None = None) -> bool:
+    """Check if URL is a valid institutional website."""
+    if not url:
+        return False
+    
+    try:
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        
+        # Remove www prefix
+        if domain.startswith('www.'):
+            domain = domain[4:]
+        
+        # Check blacklist
+        for blacklisted in DOMAIN_BLACKLIST:
+            if blacklisted in domain:
+                return False
+        
+        return True
+    except Exception:
+        return False
+
+
+def score_website(url: str, country: str, name: str) -> int:
+    """Score a website URL based on likelihood of being official site."""
+    score = 0
+    
+    try:
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        
+        # Prefer country-specific TLDs
+        preferred = PREFERRED_TLDS.get(country, [])
+        for i, tld in enumerate(preferred):
+            if domain.endswith(tld):
+                score += (len(preferred) - i) * 10
+                break
+        
+        # Prefer HTTPS
+        if parsed.scheme == 'https':
+            score += 5
+        
+        # Prefer shorter paths (homepage vs deep link)
+        path_depth = len([p for p in parsed.path.split('/') if p])
+        score -= path_depth * 2
+        
+        # Check if institution name words appear in domain
+        name_words = set(re.findall(r'\w+', name.lower()))
+        domain_words = set(re.findall(r'\w+', domain))
+        common_words = name_words & domain_words
+        score += len(common_words) * 5
+        
+    except Exception:
+        pass
+    
+    return score
+
+
+def search_for_website(name: str, location: dict, inst_type: str | None = None) -> list[dict]:
+    """Search for institution website using DuckDuckGo."""
+    results = []
+    
+    # Build search queries
+    queries = []
+    
+    city = location.get('city', '')
+    country = location.get('country', '')
+    
+    # Primary query: name + city
+    if city:
+        queries.append(f'"{name}" {city}')
+    
+    # Secondary query: name + country + institution type
+    if inst_type:
+        queries.append(f'"{name}" {country} {inst_type} official')
+    
+    # Tertiary: just the name with "official website"
+    queries.append(f'"{name}" official website')
+    
+    ddgs = DDGS()
+    
+    for query in queries[:2]:  # Limit to 2 queries per institution
+        try:
+            search_results = list(ddgs.text(query, max_results=5))
+            
+            for r in search_results:
+                url = r.get('href') or r.get('url')
+                if url and is_valid_website(url, country):
+                    results.append({
+                        'url': url,
+                        'title': r.get('title', ''),
+                        'snippet': r.get('body', ''),
+                        'query': query,
+                        'score': score_website(url, country, name)
+                    })
+            
+            time.sleep(1)  # Rate limit between queries
+            
+        except Exception as e:
+            logger.warning(f"Search error for '{query}': {e}")
+            time.sleep(2)
+    
+    # Sort by score and deduplicate
+    seen_domains = set()
+    unique_results = []
+    for r in sorted(results, key=lambda x: -x['score']):
+        domain = urlparse(r['url']).netloc.lower()
+        if domain not in seen_domains:
+            seen_domains.add(domain)
+            unique_results.append(r)
+    
+    return unique_results[:3]  # Return top 3 unique results
+
+
+async def verify_website(url: str) -> dict:
+    """Verify that a website is accessible and get basic info."""
+    result = {
+        'accessible': False,
+        'final_url': url,
+        'status_code': None,
+        'title': None,
+    }
+    
+    try:
+        async with httpx.AsyncClient(follow_redirects=True, timeout=15.0) as client:
+            response = await client.get(url)
+            result['accessible'] = response.status_code == 200
+            result['status_code'] = response.status_code
+            result['final_url'] = str(response.url)
+            
+            # Extract title
+            if result['accessible']:
+                match = re.search(r'<title[^>]*>([^<]+)</title>', response.text, re.I)
+                if match:
+                    result['title'] = match.group(1).strip()
+    
+    except Exception as e:
+        logger.debug(f"Failed to verify {url}: {e}")
+    
+    return result
+
+
+def load_checkpoint() -> dict:
+    """Load progress checkpoint."""
+    if CHECKPOINT_FILE.exists():
+        with open(CHECKPOINT_FILE, 'r') as f:
+            return json.load(f)
+    return {'processed_files': [], 'found_count': 0, 'not_found_count': 0}
+
+
+def save_checkpoint(checkpoint: dict):
+    """Save progress checkpoint."""
+    with open(CHECKPOINT_FILE, 'w') as f:
+        json.dump(checkpoint, f, indent=2)
+
+
+def update_custodian_file(filepath: Path, website_url: str, discovery_info: dict) -> bool:
+    """Update custodian YAML file with discovered website."""
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            entry = yaml.safe_load(f)
+        
+        if not entry:
+            return False
+        
+        # Add website discovery section
+        entry['website_discovery'] = {
+            'website_url': website_url,
+            'discovery_date': datetime.now(timezone.utc).isoformat(),
+            'discovery_method': 'duckduckgo_search',
+            'search_query': discovery_info.get('query', ''),
+            'confidence_score': min(discovery_info.get('score', 0) / 50, 1.0),  # Normalize to 0-1
+            'verification': {
+                'accessible': discovery_info.get('verification', {}).get('accessible', False),
+                'page_title': discovery_info.get('verification', {}).get('title'),
+                'final_url': discovery_info.get('verification', {}).get('final_url'),
+            }
+        }
+        
+        with open(filepath, 'w', encoding='utf-8') as f:
+            yaml.dump(entry, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
+        
+        return True
+    
+    except Exception as e:
+        logger.error(f"Failed to update {filepath}: {e}")
+        return False
+
+
+async def process_file(filepath: Path, dry_run: bool = False) -> dict:
+    """Process a single custodian file."""
+    result = {
+        'filename': filepath.name,
+        'status': 'skipped',
+        'website': None,
+    }
+    
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            entry = yaml.safe_load(f)
+        
+        if not entry:
+            result['status'] = 'empty'
+            return result
+        
+        # Skip if already has website
+        if has_website(entry):
+            result['status'] = 'has_website'
+            return result
+        
+        # Get institution info
+        name = get_custodian_name(entry)
+        if not name:
+            result['status'] = 'no_name'
+            return result
+        
+        location = get_location_info(entry)
+        inst_type = get_institution_type(entry)
+        country = location.get('country', filepath.name[:2])
+        
+        logger.info(f"Searching for: {name} ({location.get('city', 'unknown city')}, {country})")
+        
+        # Search for website
+        search_results = search_for_website(name, location, inst_type)
+        
+        if not search_results:
+            result['status'] = 'not_found'
+            return result
+        
+        # Verify top result
+        best = search_results[0]
+        verification = await verify_website(best['url'])
+        best['verification'] = verification
+        
+        if verification['accessible']:
+            result['website'] = verification['final_url']
+            result['status'] = 'found'
+            result['discovery_info'] = best
+            
+            if not dry_run:
+                update_custodian_file(filepath, verification['final_url'], best)
+                logger.info(f"  → Found: {verification['final_url']}")
+        else:
+            # Try second result if first is inaccessible
+            if len(search_results) > 1:
+                second = search_results[1]
+                verification2 = await verify_website(second['url'])
+                if verification2['accessible']:
+                    second['verification'] = verification2
+                    result['website'] = verification2['final_url']
+                    result['status'] = 'found'
+                    result['discovery_info'] = second
+                    
+                    if not dry_run:
+                        update_custodian_file(filepath, verification2['final_url'], second)
+                        logger.info(f"  → Found (2nd): {verification2['final_url']}")
+                else:
+                    result['status'] = 'inaccessible'
+            else:
+                result['status'] = 'inaccessible'
+    
+    except Exception as e:
+        result['status'] = 'error'
+        result['error'] = str(e)
+        logger.error(f"Error processing {filepath}: {e}")
+    
+    return result
+
+
+async def main():
+    parser = argparse.ArgumentParser(description='Discover websites for custodian files')
+    parser.add_argument('--dry-run', action='store_true', help='Show what would be discovered')
+    parser.add_argument('--limit', type=int, help='Process only first N files')
+    parser.add_argument('--file', type=str, help='Process a single specific file')
+    parser.add_argument('--country', type=str, help='Filter by country code (e.g., JP, CZ)')
+    parser.add_argument('--resume', action='store_true', help='Resume from checkpoint')
+    
+    args = parser.parse_args()
+    
+    # Get files to process
+    if args.file:
+        files = [Path(args.file)]
+    else:
+        pattern = f"{args.country}-*.yaml" if args.country else "*.yaml"
+        files = sorted(CUSTODIAN_DIR.glob(pattern))
+    
+    # Filter out non-custodian files
+    files = [f for f in files if f.name[0].isupper() and '-' in f.name]
+    
+    # Load checkpoint
+    checkpoint = load_checkpoint() if args.resume else {'processed_files': [], 'found_count': 0, 'not_found_count': 0}
+    processed_set = set(checkpoint['processed_files'])
+    
+    if args.resume:
+        files = [f for f in files if f.name not in processed_set]
+        logger.info(f"Resuming: {len(processed_set)} files already processed, {len(files)} remaining")
+    
+    # Apply limit
+    if args.limit:
+        files = files[:args.limit]
+    
+    logger.info(f"Processing {len(files)} custodian files...")
+    
+    # Process files
+    found_count = checkpoint.get('found_count', 0)
+    not_found_count = checkpoint.get('not_found_count', 0)
+    
+    for i, filepath in enumerate(files):
+        result = await process_file(filepath, args.dry_run)
+        
+        # Update counts
+        if result['status'] == 'found':
+            found_count += 1
+        elif result['status'] in ('not_found', 'inaccessible'):
+            not_found_count += 1
+        
+        # Update checkpoint
+        if not args.dry_run:
+            checkpoint['processed_files'].append(filepath.name)
+            checkpoint['found_count'] = found_count
+            checkpoint['not_found_count'] = not_found_count
+            
+            if (i + 1) % 10 == 0:
+                save_checkpoint(checkpoint)
+        
+        # Progress update
+        if (i + 1) % 10 == 0:
+            logger.info(f"Progress: {i + 1}/{len(files)} - Found: {found_count}, Not found: {not_found_count}")
+        
+        # Rate limiting
+        time.sleep(REQUEST_DELAY)
+    
+    # Final checkpoint save
+    if not args.dry_run:
+        save_checkpoint(checkpoint)
+    
+    # Summary
+    logger.info(f"\n{'='*50}")
+    logger.info(f"Discovery complete!")
+    logger.info(f"  Files processed: {len(files)}")
+    logger.info(f"  Websites found: {found_count}")
+    logger.info(f"  Not found: {not_found_count}")
+    logger.info(f"{'='*50}")
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/scripts/discover_websites_crawl4ai.py b/scripts/discover_websites_crawl4ai.py
new file mode 100644
index 0000000000..26551ea1e7
--- /dev/null
+++ b/scripts/discover_websites_crawl4ai.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+"""
+Simplified Website Discovery for Custodians using crawl4ai.
+Discovers websites by:
+1. Searching DuckDuckGo
+2. Verifying with crawl4ai
+3. Updating YAML files with discovered URLs
+"""
+import asyncio
+import httpx
+import json
+import logging
+import re
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+import yaml
+
+# Logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Configuration
+CUSTODIAN_DIR = Path(__file__).parent.parent / "data" / "custodian"
+CHECKPOINT_FILE = CUSTODIAN_DIR / ".website_discovery_crawl4ai_checkpoint.json"
+REQUEST_DELAY = 3.0  # seconds between requests
+DUCKDUCKGO_SEARCH = "https://duckduckgo.com/html/?q="
+
+async def discover_websites(name, city, country):
+    """Search DuckDuckGo and verify websites."""
+    logger.info(f"Searching for: {name}")
+    
+    # Simple search - use .format() to avoid f-string issues
+    city_part = f" {city}" if city else ""
+    query = f"{name}{city_part}" if city_part else f"{name}"
+    
+    # Search DuckDuckGo
+    search_url = f"{DUCKDUCKGO_SEARCH}{query.replace(' ', '+')}"
+    
+    try:
+        async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client:
+            response = await client.get(search_url)
+            if response.status_code not in [200, 202]:
+                logger.warning(f"Search failed: {response.status_code}")
+                return None
+            
+            html = response.text
+            links = []
+            for match in re.finditer(r'<a[^>]+href="([^"]+)"[^"]*"([^"]*")\s*>([^<]+)</a>', html, re.I):
+                href = match.group(1).replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>')
+                if href:
+                    links.append({'url': href, 'title': match.group(3)})
+            
+            if not links:
+                logger.info(f"No results found")
+                return None
+            
+            logger.info(f"Found {len(links)} candidates, verifying...")
+            
+            verified = []
+            for link in sorted(links, key=lambda x: len(x['title'])):
+                try:
+                    async with httpx.AsyncClient(timeout=15.0) as client:
+                        verify_response = await client.get(link['url'])
+                        if verify_response.status_code == 200:
+                            logger.info(f"Verified: {link['url']}")
+                            verified.append({
+                                'url': link['url'],
+                                'title': link['title'],
+                                'status': 'found'
+                            })
+                        else:
+                            logger.debug(f"Verification failed for {link['url']}")
+                except Exception:
+                    logger.debug(f"Verification error for {link['url']}")
+            
+            if verified:
+                best = verified[0]
+                logger.info(f"Best candidate: {best['url']}")
+                return {
+                    'status': 'found',
+                    'message': f"Discovered and verified: {best['url']}",
+                    'website_url': best['url'],
+                    'title': best.get('title'),
+                }
+            else:
+                logger.info(f"No valid websites found")
+                return {
+                    'status': 'not_found',
+                    'message': 'No valid results found'
+                }
+    
+    except Exception as e:
+        logger.error(f"Search error: {e}")
+        return None
+
+def update_custodian_file(filepath, website_url, title):
+    """Update custodian YAML file with discovered website."""
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            entry = yaml.safe_load(f)
+        if not entry:
+                logger.error(f"Invalid file: {filepath}")
+                return False
+        
+        # Add website discovery section
+        entry['website_discovery'] = {
+            'website_url': website_url,
+            'discovery_date': datetime.now(timezone.utc).isoformat(),
+            'discovery_method': 'crawl4ai_search_and_verify',
+            'title': title,
+            'confidence_score': 0.0,  # Will be updated if verification succeeds
+        }
+        
+        with open(filepath, 'w', encoding='utf-8') as f:
+            yaml.dump(entry, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
+        
+        logger.info(f"Updated: {filepath}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to update {filepath}: {e}")
+        return False
+
+async def main():
+    files = sorted(CUSTODIAN_DIR.glob("JP-*.yaml"))[:1]  # Test with 1 file
+    
+    logger.info(f"Processing {len(files)} custodian files...")
+    
+    for filepath in files:
+        name = Path(filepath).stem.replace('_', ' ')
+        logger.info(f"Processing: {name}")
+        
+        url = await discover_websites(name, None, 'JP')
+        
+        if url:
+            website_url = url.get('website_url') or url.get('url')
+            title = url.get('title')
+            if update_custodian_file(filepath, website_url, title):
+                logger.info(f"  → Discovered: {website_url}")
+            else:
+                logger.info(f"No website found")
+    
+    logger.info("Done!")
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/scripts/enrich_custodian_logos_crawl4ai.py b/scripts/enrich_custodian_logos_crawl4ai.py
index 06004cb861..e45a770cbd 100644
--- a/scripts/enrich_custodian_logos_crawl4ai.py
+++ b/scripts/enrich_custodian_logos_crawl4ai.py
@@ -75,19 +75,26 @@ REQUEST_DELAY = 2.0  # seconds between requests
 
 def get_website_url(entry: dict) -> str | None:
     """Extract website URL from custodian entry."""
-    # Priority 1: Original entry webadres
+    # Priority 1: Original entry webadres (Dutch ISIL format)
     if entry.get('original_entry', {}).get('webadres_organisatie'):
         url = entry['original_entry']['webadres_organisatie']
         if url and url.strip() and url.strip().lower() not in ('null', 'none', ''):
             return normalize_url(url.strip())
     
-    # Priority 2: Museum register website
+    # Priority 2: Website in identifiers array (Czech ISIL and ARON format)
+    for ident in entry.get('original_entry', {}).get('identifiers', []):
+        if ident.get('identifier_scheme') == 'Website':
+            url = ident.get('identifier_value') or ident.get('identifier_url')
+            if url and url.strip():
+                return normalize_url(url.strip())
+    
+    # Priority 3: Museum register website
     if entry.get('museum_register_enrichment', {}).get('website_url'):
         url = entry['museum_register_enrichment']['website_url']
         if url and url.strip():
             return normalize_url(url.strip())
     
-    # Priority 3: Wikidata official website
+    # Priority 4: Wikidata official website
     if entry.get('wikidata_enrichment', {}).get('wikidata_official_website'):
         url = entry['wikidata_enrichment']['wikidata_official_website']
         # Handle list of URLs (take first one)
@@ -96,13 +103,13 @@ def get_website_url(entry: dict) -> str | None:
         if url and isinstance(url, str) and url.strip():
             return normalize_url(url.strip())
     
-    # Priority 4: Google Maps website
+    # Priority 5: Google Maps website
     if entry.get('google_maps_enrichment', {}).get('website'):
         url = entry['google_maps_enrichment']['website']
         if url and url.strip():
             return normalize_url(url.strip())
     
-    # Priority 5: Web enrichment source URL
+    # Priority 6: Web enrichment source URL
     if entry.get('web_enrichment', {}).get('source_url'):
         url = entry['web_enrichment']['source_url']
         if url and url.strip():
diff --git a/scripts/index_persons_qdrant.py b/scripts/index_persons_qdrant.py
index 63df6c221e..34cac9acc8 100644
--- a/scripts/index_persons_qdrant.py
+++ b/scripts/index_persons_qdrant.py
@@ -54,9 +54,22 @@ def extract_person_text(data: dict[str, Any]) -> str:
     parts = []
     
     profile = data.get("profile_data", {})
+    person = data.get("person", {})
+    source_staff = data.get("source_staff_info", {})
+    extraction = data.get("extraction_metadata", {})
     
-    # Full name (primary identifier)
-    name = profile.get("full_name", "")
+    # Full name - check ALL possible locations in order of priority
+    name = (
+        profile.get("full_name") or
+        profile.get("name") or
+        person.get("full_name") or
+        person.get("name") or
+        source_staff.get("name") or
+        source_staff.get("person_name") or
+        extraction.get("person_name") or
+        data.get("name") or
+        ""
+    )
     if name:
         parts.append(f"Name: {name}")
     
@@ -259,13 +272,21 @@ def extract_metadata(data: dict[str, Any], filepath: Path) -> dict[str, Any]:
     }
     
     profile = data.get("profile_data", {})
+    person = data.get("person", {})
+    source_staff = data.get("source_staff_info", {})
     extraction = data.get("extraction_metadata", {})
     
-    # Full name - check multiple possible field names
+    # Full name - check ALL possible field names (same as extract_person_text)
     name = (
-        profile.get("name", "") or 
-        profile.get("full_name", "") or 
-        data.get("name", "")
+        profile.get("full_name") or
+        profile.get("name") or
+        person.get("full_name") or
+        person.get("name") or
+        source_staff.get("name") or
+        source_staff.get("person_name") or
+        extraction.get("person_name") or
+        data.get("name") or
+        ""
     )
     if name:
         metadata["name"] = name
@@ -414,16 +435,19 @@ def find_person_files(data_dir: Path) -> list[Path]:
 
 
 class PersonRetriever:
-    """Qdrant retriever specifically for person entities."""
+    """Qdrant retriever specifically for person entities.
+    
+    Uses MiniLM (384-dim) embeddings by default for consistency with
+    the hybrid_retriever.py query-time embedding model.
+    """
     
     def __init__(
         self,
         host: str = "localhost",
         port: int = 6333,
         collection_name: str = "heritage_persons",
-        embedding_model: str = "text-embedding-3-small",
-        embedding_dim: int = 1536,
-        api_key: str | None = None,
+        embedding_model: str = "all-MiniLM-L6-v2",  # MiniLM for local embeddings
+        embedding_dim: int = 384,  # MiniLM output dimension
         url: str | None = None,
         https: bool = False,
         prefix: str | None = None,
@@ -434,7 +458,7 @@ class PersonRetriever:
         self.collection_name = collection_name
         self.embedding_model = embedding_model
         self.embedding_dim = embedding_dim
-        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        # MiniLM model runs locally, no API key needed
         
         # Initialize Qdrant client
         if url:
@@ -451,25 +475,23 @@ class PersonRetriever:
         else:
             self.client = QdrantClient(host=host, port=port, timeout=60)
         
-        self._openai_client = None
+        self._sentence_model = None
     
     @property
-    def openai_client(self):
-        """Lazy-load OpenAI client."""
-        if self._openai_client is None:
-            import openai
-            self._openai_client = openai.OpenAI(api_key=self.api_key)
-        return self._openai_client
+    def sentence_model(self):
+        """Lazy-load SentenceTransformer model."""
+        if self._sentence_model is None:
+            from sentence_transformers import SentenceTransformer
+            logger.info(f"Loading embedding model: {self.embedding_model}")
+            self._sentence_model = SentenceTransformer(self.embedding_model)
+        return self._sentence_model
     
     def _get_embeddings_batch(self, texts: list[str]) -> list[list[float]]:
-        """Get embedding vectors for multiple texts."""
+        """Get embedding vectors for multiple texts using MiniLM."""
         if not texts:
             return []
-        response = self.openai_client.embeddings.create(
-            input=texts,
-            model=self.embedding_model
-        )
-        return [item.embedding for item in sorted(response.data, key=lambda x: x.index)]
+        embeddings = self.sentence_model.encode(texts, show_progress_bar=False)
+        return embeddings.tolist()
     
     def ensure_collection(self) -> None:
         """Ensure the collection exists, create if not."""
@@ -655,10 +677,7 @@ def main():
             logger.info(f"    Metadata: {list(doc['metadata'].keys())}")
         sys.exit(0)
     
-    # Check for OpenAI API key
-    if not os.getenv("OPENAI_API_KEY"):
-        logger.error("OPENAI_API_KEY environment variable is required for embeddings")
-        sys.exit(1)
+    # Note: MiniLM model runs locally, no API key needed
     
     # Create retriever
     if args.url:
diff --git a/src/glam_extractor/api/hybrid_retriever.py b/src/glam_extractor/api/hybrid_retriever.py
index ead40947fb..4191442599 100644
--- a/src/glam_extractor/api/hybrid_retriever.py
+++ b/src/glam_extractor/api/hybrid_retriever.py
@@ -36,6 +36,8 @@ Example usage:
 import hashlib
 import logging
 import os
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass, field
 from typing import Any, TYPE_CHECKING
 
@@ -128,9 +130,150 @@ class RetrievedInstitution:
         }
 
 
+# ===================================================================
+# Linked Data URI Generation Utilities
+# ===================================================================
+# Generate stable ontology-aligned URIs for Person and PersonObservation
+# following the LinkML schema at schemas/20251121/linkml/
+# Namespace: https://nde.nl/ontology/hc/
+# ===================================================================
+
+import re
+import unicodedata
+
+# Ontology namespaces
+ONTOLOGY_BASE = "https://nde.nl/ontology/hc"
+PERSON_HUB_PREFIX = f"{ONTOLOGY_BASE}/person"
+PERSON_OBS_PREFIX = f"{ONTOLOGY_BASE}/person-obs"
+CUSTODIAN_PREFIX = f"{ONTOLOGY_BASE}/custodian"
+
+# JSON-LD context for person search responses
+PERSON_JSONLD_CONTEXT = {
+    "@vocab": f"{ONTOLOGY_BASE}/",
+    "schema": "http://schema.org/",
+    "pico": "https://personsincontext.org/model#",
+    "prov": "http://www.w3.org/ns/prov#",
+    "foaf": "http://xmlns.com/foaf/0.1/",
+    "name": "schema:name",
+    "jobTitle": "schema:jobTitle",
+    "affiliation": "schema:affiliation",
+    "sameAs": "schema:sameAs",
+    "refers_to_person": "pico:observationOf",
+    "observation_source": "prov:hadPrimarySource",
+}
+
+
+def generate_slug(text: str) -> str:
+    """Generate URL-safe slug from text.
+    
+    Examples:
+        "Kitty Bogte" → "kitty-bogte"
+        "Dr. Jane Smith" → "dr-jane-smith"
+        "Taco Dibbits" → "taco-dibbits"
+    """
+    if not text:
+        return "unknown"
+    
+    # Normalize unicode (NFD decomposition) and remove diacritics
+    normalized = unicodedata.normalize('NFD', text)
+    ascii_text = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
+    
+    # Convert to lowercase
+    lowercase = ascii_text.lower()
+    
+    # Replace non-alphanumeric with hyphens
+    slug = re.sub(r'[^a-z0-9]+', '-', lowercase)
+    
+    # Collapse multiple hyphens and strip leading/trailing
+    slug = re.sub(r'-+', '-', slug).strip('-')
+    
+    return slug or "unknown"
+
+
+def generate_role_slug(headline: str | None) -> str:
+    """Generate role slug from job title/headline.
+    
+    Examples:
+        "Programmer/curator" → "programmer-curator"
+        "Senior Archivist" → "senior-archivist"
+        None → "staff"
+    """
+    if not headline:
+        return "staff"
+    return generate_slug(headline)
+
+
+def generate_person_hub_uri(name: str, linkedin_slug: str | None = None) -> str:
+    """Generate Person hub URI (abstract identity).
+    
+    Format: https://nde.nl/ontology/hc/person/{person-slug}
+    
+    Uses LinkedIn slug if available for stability, otherwise derives from name.
+    
+    Examples:
+        generate_person_hub_uri("Kitty Bogte", "kittybogte") 
+            → "https://nde.nl/ontology/hc/person/kittybogte"
+        generate_person_hub_uri("Dr. Jane Smith") 
+            → "https://nde.nl/ontology/hc/person/dr-jane-smith"
+    """
+    if linkedin_slug:
+        slug = linkedin_slug
+    else:
+        slug = generate_slug(name)
+    
+    return f"{PERSON_HUB_PREFIX}/{slug}"
+
+
+def generate_observation_uri(
+    custodian_slug: str | None,
+    person_name: str,
+    role_slug: str | None = None,
+    linkedin_slug: str | None = None
+) -> str:
+    """Generate PersonObservation URI.
+    
+    Format: https://nde.nl/ontology/hc/person-obs/{custodian-slug}/{person-slug}/{role-slug}
+    
+    Examples:
+        generate_observation_uri("nl-ga-nationaal-archief", "Kitty Bogte", "programmer-curator")
+            → "https://nde.nl/ontology/hc/person-obs/nl-ga-nationaal-archief/kitty-bogte/programmer-curator"
+    """
+    custodian = custodian_slug or "unknown-custodian"
+    person = linkedin_slug or generate_slug(person_name)
+    role = role_slug or "staff"
+    
+    return f"{PERSON_OBS_PREFIX}/{custodian}/{person}/{role}"
+
+
+def generate_custodian_uri(custodian_slug: str | None, ghcid: str | None = None) -> str | None:
+    """Generate Custodian URI.
+    
+    Format: https://nde.nl/ontology/hc/custodian/{ghcid-or-slug}
+    """
+    if ghcid:
+        return f"{CUSTODIAN_PREFIX}/{ghcid}"
+    elif custodian_slug:
+        return f"{CUSTODIAN_PREFIX}/{custodian_slug}"
+    return None
+
+
+def extract_linkedin_slug(linkedin_url: str | None) -> str | None:
+    """Extract slug from LinkedIn URL.
+    
+    Examples:
+        "https://www.linkedin.com/in/kittybogte" → "kittybogte"
+        "https://linkedin.com/in/jane-smith-12345" → "jane-smith-12345"
+    """
+    if not linkedin_url:
+        return None
+    
+    match = re.search(r'linkedin\.com/in/([^/?]+)', linkedin_url)
+    return match.group(1) if match else None
+
+
 @dataclass
 class RetrievedPerson:
-    """A retrieved person/staff member with search scores."""
+    """A retrieved person/staff member with search scores and linked data URIs."""
     
     person_id: str
     name: str
@@ -148,9 +291,42 @@ class RetrievedPerson:
     source_type: str | None = None  # "staff_list" or "entity_profile"
     linkedin_url: str | None = None
     
-    def to_dict(self) -> dict[str, Any]:
-        """Convert to dictionary for API responses."""
-        return {
+    # Linked data fields (generated)
+    linkedin_profile_path: str | None = None  # Path to entity JSON file
+    
+    @property
+    def linkedin_slug(self) -> str | None:
+        """Extract LinkedIn slug from URL."""
+        return extract_linkedin_slug(self.linkedin_url)
+    
+    @property
+    def person_hub_uri(self) -> str:
+        """Generate Person hub URI (abstract identity)."""
+        return generate_person_hub_uri(self.name, self.linkedin_slug)
+    
+    @property
+    def observation_uri(self) -> str:
+        """Generate PersonObservation URI."""
+        role_slug = generate_role_slug(self.headline)
+        return generate_observation_uri(
+            self.custodian_slug,
+            self.name,
+            role_slug,
+            self.linkedin_slug
+        )
+    
+    @property
+    def custodian_uri(self) -> str | None:
+        """Generate Custodian URI."""
+        return generate_custodian_uri(self.custodian_slug)
+    
+    def to_dict(self, include_jsonld: bool = True) -> dict[str, Any]:
+        """Convert to dictionary for API responses.
+        
+        Args:
+            include_jsonld: If True, include JSON-LD linked data fields (@id, @type, etc.)
+        """
+        result = {
             "person_id": self.person_id,
             "name": self.name,
             "scores": {
@@ -169,6 +345,26 @@ class RetrievedPerson:
                 "linkedin_url": self.linkedin_url,
             }
         }
+        
+        if include_jsonld:
+            # Add JSON-LD linked data fields
+            result["@id"] = self.observation_uri
+            result["@type"] = "pico:PersonObservation"
+            result["refers_to_person"] = self.person_hub_uri
+            
+            # Add custodian affiliation if available
+            if self.custodian_uri:
+                result["unit_affiliation"] = self.custodian_uri
+            
+            # Add schema:sameAs for LinkedIn URL
+            if self.linkedin_url:
+                result["schema:sameAs"] = self.linkedin_url
+            
+            # Add linkedin_profile_path if available
+            if self.linkedin_profile_path:
+                result["linkedin_profile_path"] = self.linkedin_profile_path
+        
+        return result
 
 
 # Query type detection patterns
@@ -254,6 +450,103 @@ def get_province_code(province_name: str | None) -> str | None:
         return None
     return DUTCH_PROVINCE_CODES.get(province_name.lower().strip())
 
+def looks_like_person_name(query: str) -> bool:
+    """Detect if query looks like a person's name for name-boosted search.
+    
+    A query looks like a person name if it:
+    - Contains 2-4 capitalized words (first/last name pattern)
+    - Does NOT contain common non-name words (institutions, locations, etc.)
+    - Does NOT contain question words (who, what, where, etc.)
+    
+    Args:
+        query: Search query string
+        
+    Returns:
+        True if query appears to be a person name
+        
+    Examples:
+        >>> looks_like_person_name("Kitty Bogte")
+        True
+        >>> looks_like_person_name("Who works at the Rijksmuseum?")
+        False
+        >>> looks_like_person_name("archivist at Nationaal Archief")
+        False
+    """
+    # Skip if query contains question words or common phrases
+    non_name_indicators = [
+        # Question words
+        "who", "what", "where", "which", "how", "why",
+        "wie", "wat", "waar", "welk", "hoe", "waarom",
+        # Role/job indicators
+        "works at", "working at", "werkt bij", "werkzaam",
+        "archivist", "curator", "director", "librarian",
+        "archivaris", "directeur", "bibliothecaris",
+        # Prepositions indicating context
+        " at ", " in ", " of ", " for ", " the ",
+        " bij ", " in ", " van ", " voor ", " de ", " het ",
+        # Punctuation that indicates non-name queries
+        "?", "!", 
+    ]
+    
+    query_lower = query.lower()
+    for indicator in non_name_indicators:
+        if indicator in query_lower:
+            return False
+    
+    # Check for capitalized word pattern (typical of names)
+    words = query.strip().split()
+    if len(words) < 2 or len(words) > 4:
+        return False
+    
+    # Check if words look like name components (capitalized or all letters)
+    capitalized_count = sum(1 for w in words if w[0].isupper() and w.isalpha())
+    
+    # Most name words should be capitalized
+    return capitalized_count >= len(words) - 1  # Allow one lowercase (e.g., "van", "de")
+
+
+def calculate_name_match_boost(query: str, name: str) -> float:
+    """Calculate a score boost for name matching.
+    
+    Uses case-insensitive substring matching to boost results where
+    the query matches part or all of the person's name.
+    
+    Args:
+        query: Search query (potential name)
+        name: Person's name from search result
+        
+    Returns:
+        Boost factor (1.0 = no boost, >1.0 = boosted)
+        - 3.0: Exact match (case-insensitive)
+        - 2.5: Query contains full name or name contains full query
+        - 2.0: Partial match (first or last name matches)
+        - 1.0: No match
+    """
+    query_lower = query.lower().strip()
+    name_lower = name.lower().strip()
+    
+    # Exact match
+    if query_lower == name_lower:
+        return 3.0
+    
+    # Query is substring of name or vice versa
+    if query_lower in name_lower or name_lower in query_lower:
+        return 2.5
+    
+    # Check for partial matches (first or last name)
+    query_parts = set(query_lower.split())
+    name_parts = set(name_lower.split())
+    
+    # How many query parts match name parts?
+    matching_parts = query_parts & name_parts
+    if matching_parts:
+        # More matching parts = higher boost
+        match_ratio = len(matching_parts) / max(len(query_parts), len(name_parts))
+        return 1.0 + match_ratio  # 1.5-2.0 range for partial matches
+    
+    return 1.0  # No boost
+
+
 def detect_query_type(query: str, dspy_entity_type: str | None = None) -> str:
     """Detect if query is about institutions or persons.
     
@@ -529,29 +822,43 @@ class SPARQLClient:
     def __init__(
         self,
         endpoint: str = DEFAULT_SPARQL_ENDPOINT,
-        timeout: float = DEFAULT_SPARQL_TIMEOUT
+        timeout: float = DEFAULT_SPARQL_TIMEOUT,
+        max_connections: int = 20  # Allow concurrent connections for parallel queries
     ):
         self.endpoint = endpoint
         self.timeout = timeout
+        self.max_connections = max_connections
         self._client: httpx.Client | None = None
     
     @property
     def client(self) -> httpx.Client:
-        """Lazy-initialize HTTP client."""
+        """Lazy-initialize HTTP client with connection pooling."""
         if self._client is None:
-            self._client = httpx.Client(timeout=self.timeout)
+            # Configure connection pool for parallel SPARQL queries
+            limits = httpx.Limits(
+                max_keepalive_connections=self.max_connections,
+                max_connections=self.max_connections,
+                keepalive_expiry=30.0  # Keep connections alive for reuse
+            )
+            self._client = httpx.Client(
+                timeout=self.timeout,
+                limits=limits,
+                http2=False  # HTTP/1.1 is often faster for small queries
+            )
         return self._client
     
-    def query(self, sparql: str) -> list[dict[str, Any]]:
+    def query(self, sparql: str, log_timing: bool = False) -> list[dict[str, Any]]:
         """Execute SPARQL query and return results.
         
         Args:
             sparql: SPARQL query string
+            log_timing: Whether to log query execution time
             
         Returns:
             List of result bindings as dictionaries
         """
         full_query = SPARQL_PREFIXES + sparql
+        start_time = time.time() if log_timing else 0
         
         try:
             response = self.client.post(
@@ -572,6 +879,10 @@ class SPARQLClient:
                     row[key] = value.get("value", "")
                 results.append(row)
             
+            if log_timing:
+                duration_ms = (time.time() - start_time) * 1000
+                logger.debug(f"SPARQL query completed: {len(results)} results in {duration_ms:.0f}ms")
+            
             return results
             
         except httpx.HTTPError as e:
@@ -1060,9 +1371,220 @@ class HybridRetriever:
         # Return up to k results
         return filtered[:k]
     
+    def _build_batched_expansion_query(
+        self,
+        seed_institutions: list[RetrievedInstitution],
+        exclude_ghcids: set[str],
+        limit_per_expansion: int = 5
+    ) -> tuple[str, dict[str, dict]]:
+        """Build a single SPARQL query with UNION clauses for all expansions.
+        
+        DEDUPLICATES by city code and type+country to avoid redundant query patterns.
+        For example, if 5 seeds are all from Amsterdam with type MUSEUM, we only
+        create ONE city expansion (for AMS) and ONE type expansion (for NL + M),
+        not 10 redundant UNIONs.
+        
+        Args:
+            seed_institutions: Seed institutions to expand from
+            exclude_ghcids: GHCIDs to exclude from results
+            limit_per_expansion: Max results per expansion type
+            
+        Returns:
+            Tuple of (SPARQL query string, expansion_metadata dict)
+            expansion_metadata maps expansion_key -> {seed, type, city/type_code}
+        """
+        unions = []
+        expansion_metadata = {}
+        
+        # Track unique patterns to avoid duplicate queries
+        seen_city_codes: set[str] = set()
+        seen_type_patterns: set[str] = set()  # "country-type_code" pattern
+        
+        seeds_to_expand = seed_institutions[:5]
+        city_idx = 0
+        type_idx = 0
+        
+        for seed in seeds_to_expand:
+            # City expansion - deduplicate by city code
+            if seed.city:
+                city_code = seed.city[:3].upper()
+                if city_code not in seen_city_codes:
+                    seen_city_codes.add(city_code)
+                    expansion_key = f"city_{city_idx}"
+                    city_idx += 1
+                    unions.append(f"""
+                        {{
+                            SELECT ?s ?name ?ghcid ?type ("{expansion_key}" AS ?expansion_key) WHERE {{
+                                ?s a hcc:Custodian ;
+                                   skos:prefLabel ?name ;
+                                   hc:ghcid ?ghcid .
+                                FILTER(CONTAINS(?ghcid, "-{city_code}-"))
+                                OPTIONAL {{ ?s hc:institutionType ?type }}
+                            }}
+                            LIMIT {limit_per_expansion + len(exclude_ghcids)}
+                        }}
+                    """)
+                    expansion_metadata[expansion_key] = {
+                        "seed": seed,
+                        "type": "city",
+                        "city": seed.city,
+                        "city_code": city_code
+                    }
+            
+            # Type expansion - deduplicate by country + type_code pattern
+            if seed.institution_type and seed.country:
+                type_code = get_custodian_type_to_heritage_code().get(seed.institution_type, "")
+                if type_code:
+                    pattern_key = f"{seed.country}-{type_code}"
+                    if pattern_key not in seen_type_patterns:
+                        seen_type_patterns.add(pattern_key)
+                        expansion_key = f"type_{type_idx}"
+                        type_idx += 1
+                        unions.append(f"""
+                            {{
+                                SELECT ?s ?name ?ghcid ?city ("{expansion_key}" AS ?expansion_key) WHERE {{
+                                    ?s a hcc:Custodian ;
+                                       skos:prefLabel ?name ;
+                                       hc:ghcid ?ghcid .
+                                    FILTER(STRSTARTS(?ghcid, "{seed.country}-"))
+                                    FILTER(CONTAINS(?ghcid, "-{type_code}-"))
+                                    OPTIONAL {{ ?s schema:location ?city }}
+                                }}
+                                LIMIT {limit_per_expansion + len(exclude_ghcids)}
+                            }}
+                        """)
+                        expansion_metadata[expansion_key] = {
+                            "seed": seed,
+                            "type": "type",
+                            "institution_type": seed.institution_type,
+                            "type_code": type_code,
+                            "country": seed.country
+                        }
+        
+        if not unions:
+            return "", {}
+        
+        # Log deduplication stats
+        logger.info(f"Batched SPARQL: {len(unions)} UNIONs (deduplicated from max {len(seeds_to_expand) * 2}). "
+                   f"Unique cities: {seen_city_codes}, Unique types: {seen_type_patterns}")
+        
+        # Combine all unions into a single query
+        query = f"""
+        SELECT ?s ?name ?ghcid ?type ?city ?expansion_key WHERE {{
+            {" UNION ".join(unions)}
+        }}
+        """
+        
+        return query, expansion_metadata
+    
+    def _graph_expand_batched(
+        self,
+        seed_institutions: list[RetrievedInstitution]
+    ) -> list[RetrievedInstitution]:
+        """Expand seed results using a SINGLE batched SPARQL query.
+        
+        This is a significant optimization over the parallel ThreadPoolExecutor
+        approach. Instead of 10 HTTP requests (even in parallel), we execute
+        ONE SPARQL query with UNION clauses.
+        
+        Performance comparison:
+        - Sequential: 10 queries × ~100ms = 4+ seconds
+        - Parallel (ThreadPool): ~500ms-1s (limited by GIL/connection pool)
+        - Batched (this method): ONE query ~150-300ms
+        
+        Args:
+            seed_institutions: Initial vector search results
+            
+        Returns:
+            Additional institutions found via graph expansion
+        """
+        start_time = time.time()
+        exclude_ghcids = {inst.ghcid for inst in seed_institutions}
+        expanded = []
+        seen_ghcids = set(exclude_ghcids)
+        
+        # Build batched query
+        query, expansion_metadata = self._build_batched_expansion_query(
+            seed_institutions, exclude_ghcids, limit_per_expansion=self.k_expand
+        )
+        
+        if not query:
+            logger.debug("No graph expansion tasks to execute")
+            return expanded
+        
+        # Execute single batched query
+        query_start = time.time()
+        results = self.sparql_client.query(query)
+        query_duration = (time.time() - query_start) * 1000
+        
+        logger.debug(f"Batched SPARQL query: {len(results)} raw results in {query_duration:.0f}ms")
+        
+        # Group results by expansion_key
+        results_by_expansion: dict[str, list[dict]] = {}
+        for row in results:
+            exp_key = row.get("expansion_key", "")
+            if exp_key:
+                if exp_key not in results_by_expansion:
+                    results_by_expansion[exp_key] = []
+                results_by_expansion[exp_key].append(row)
+        
+        # Process results, filtering and creating RetrievedInstitution objects
+        for exp_key, rows in results_by_expansion.items():
+            if exp_key not in expansion_metadata:
+                continue
+            
+            meta = expansion_metadata[exp_key]
+            seed = meta["seed"]
+            exp_type = meta["type"]
+            
+            count = 0
+            for row in rows:
+                ghcid = row.get("ghcid", "")
+                if not ghcid or ghcid in seen_ghcids:
+                    continue
+                
+                if count >= self.k_expand:
+                    break
+                
+                seen_ghcids.add(ghcid)
+                count += 1
+                
+                if exp_type == "city":
+                    expanded.append(RetrievedInstitution(
+                        ghcid=ghcid,
+                        name=row.get("name", ""),
+                        uri=row.get("s", ""),
+                        graph_score=0.8,  # High score for same city
+                        institution_type=row.get("type"),
+                        expansion_reason="same_city",
+                        related_institutions=[seed.ghcid]
+                    ))
+                elif exp_type == "type":
+                    expanded.append(RetrievedInstitution(
+                        ghcid=ghcid,
+                        name=row.get("name", ""),
+                        uri=row.get("s", ""),
+                        graph_score=0.5,  # Medium score for same type
+                        institution_type=seed.institution_type,
+                        city=row.get("city"),
+                        expansion_reason="same_type",
+                        related_institutions=[seed.ghcid]
+                    ))
+            
+            logger.debug(f"Expansion {exp_key}: {count} results for {seed.ghcid}")
+        
+        total_time = (time.time() - start_time) * 1000
+        logger.info(f"Graph expansion (batched): 1 query, {len(results)} raw results, "
+                   f"{len(expanded)} expanded in {total_time:.0f}ms")
+        
+        return expanded
+    
     def _expand_by_city(self, city: str, exclude_ghcids: set[str], limit: int = 5) -> list[dict]:
         """Find other institutions in the same city via SPARQL.
         
+        Note: This method is kept for backwards compatibility and direct calls.
+        For batch operations, use _graph_expand_batched() instead.
+        
         Args:
             city: City name to search for
             exclude_ghcids: GHCIDs to exclude from results
@@ -1078,12 +1600,12 @@ class HybridRetriever:
         SELECT ?s ?name ?ghcid ?type WHERE {{
             ?s a hcc:Custodian ;
                skos:prefLabel ?name ;
-               ghc:ghcid ?ghcid .
+               hc:ghcid ?ghcid .
             
             # Match city in GHCID (format: CC-RR-CCC-T-ABBR)
             FILTER(CONTAINS(?ghcid, "-{city[:3].upper()}-"))
             
-            OPTIONAL {{ ?s hc:institution_type ?type }}
+            OPTIONAL {{ ?s hc:institutionType ?type }}
         }}
         LIMIT {limit + len(exclude_ghcids)}
         """
@@ -1126,13 +1648,13 @@ class HybridRetriever:
         SELECT ?s ?name ?ghcid ?city WHERE {{
             ?s a hcc:Custodian ;
                skos:prefLabel ?name ;
-               ghc:ghcid ?ghcid .
+               hc:ghcid ?ghcid .
             
             # Match country and type in GHCID
             FILTER(STRSTARTS(?ghcid, "{country}-"))
             FILTER(CONTAINS(?ghcid, "-{type_code}-"))
             
-            OPTIONAL {{ ?s hc:city ?city }}
+            OPTIONAL {{ ?s schema:location ?city }}
         }}
         LIMIT {limit + len(exclude_ghcids)}
         """
@@ -1167,10 +1689,10 @@ class HybridRetriever:
         SELECT ?s ?name ?ghcid ?type WHERE {{
             ?s a hcc:Custodian ;
                skos:prefLabel ?name ;
-               ghc:ghcid ?ghcid ;
+               hc:ghcid ?ghcid ;
                wdt:P17 wd:{wikidata_country} .
             
-            OPTIONAL {{ ?s hc:institution_type ?type }}
+            OPTIONAL {{ ?s hc:institutionType ?type }}
         }}
         LIMIT {limit + len(exclude_ghcids)}
         """
@@ -1189,59 +1711,171 @@ class HybridRetriever:
     
     def _graph_expand(
         self,
-        seed_institutions: list[RetrievedInstitution]
+        seed_institutions: list[RetrievedInstitution],
+        use_batched: bool = True
     ) -> list[RetrievedInstitution]:
         """Expand seed results using knowledge graph relationships.
         
+        By default uses batched SPARQL (single query with UNION) for best performance.
+        Falls back to parallel ThreadPoolExecutor if batched fails.
+        
+        Performance comparison:
+        - Sequential: 10 queries × ~100ms = 4+ seconds
+        - Parallel (ThreadPool): ~500ms-3s (limited by GIL/connection pool)
+        - Batched (UNION query): ONE query ~150-300ms ← DEFAULT
+        
+        Args:
+            seed_institutions: Initial vector search results
+            use_batched: If True (default), use batched SPARQL query.
+                        If False, use parallel ThreadPoolExecutor.
+            
+        Returns:
+            Additional institutions found via graph expansion
+        """
+        if use_batched:
+            try:
+                return self._graph_expand_batched(seed_institutions)
+            except Exception as e:
+                logger.warning(f"Batched graph expansion failed, falling back to parallel: {e}")
+                # Fall through to parallel implementation
+        
+        return self._graph_expand_parallel(seed_institutions)
+    
+    def _graph_expand_parallel(
+        self,
+        seed_institutions: list[RetrievedInstitution]
+    ) -> list[RetrievedInstitution]:
+        """Expand seed results using parallel SPARQL queries (fallback method).
+        
+        Uses ThreadPoolExecutor to parallelize SPARQL queries. This is slower than
+        the batched approach but serves as a fallback.
+        
         Args:
             seed_institutions: Initial vector search results
             
         Returns:
             Additional institutions found via graph expansion
         """
+        start_time = time.time()
         exclude_ghcids = {inst.ghcid for inst in seed_institutions}
         expanded = []
         seen_ghcids = set(exclude_ghcids)
         
-        for seed in seed_institutions[:5]:  # Expand top 5 seeds
-            # Expansion 1: Same city
+        # Prepare all expansion tasks
+        # Each task is a tuple: (task_type, seed, query_params)
+        tasks = []
+        seeds_to_expand = seed_institutions[:5]  # Expand top 5 seeds
+        
+        for seed in seeds_to_expand:
+            # City expansion task
             if seed.city:
-                city_results = self._expand_by_city(
-                    seed.city, seen_ghcids, limit=self.k_expand
-                )
-                for row in city_results:
-                    ghcid = row.get("ghcid", "")
-                    if ghcid and ghcid not in seen_ghcids:
-                        seen_ghcids.add(ghcid)
-                        expanded.append(RetrievedInstitution(
-                            ghcid=ghcid,
-                            name=row.get("name", ""),
-                            uri=row.get("s", ""),
-                            graph_score=0.8,  # High score for same city
-                            institution_type=row.get("type"),
-                            expansion_reason="same_city",
-                            related_institutions=[seed.ghcid]
-                        ))
+                tasks.append(("city", seed, {"city": seed.city}))
             
-            # Expansion 2: Same type in same country
+            # Type expansion task
             if seed.institution_type and seed.country:
-                type_results = self._expand_by_type(
-                    seed.institution_type, seed.country, seen_ghcids, limit=self.k_expand
-                )
-                for row in type_results:
-                    ghcid = row.get("ghcid", "")
-                    if ghcid and ghcid not in seen_ghcids:
-                        seen_ghcids.add(ghcid)
-                        expanded.append(RetrievedInstitution(
-                            ghcid=ghcid,
-                            name=row.get("name", ""),
-                            uri=row.get("s", ""),
-                            graph_score=0.5,  # Medium score for same type
-                            institution_type=seed.institution_type,
-                            city=row.get("city"),
-                            expansion_reason="same_type",
-                            related_institutions=[seed.ghcid]
-                        ))
+                tasks.append(("type", seed, {
+                    "institution_type": seed.institution_type,
+                    "country": seed.country
+                }))
+        
+        if not tasks:
+            logger.debug("No graph expansion tasks to execute")
+            return expanded
+        
+        # Execute SPARQL queries in parallel
+        # Use min(10, len(tasks)) workers to avoid over-parallelization
+        max_workers = min(10, len(tasks))
+        
+        def execute_expansion(task):
+            """Execute a single expansion task and return results with metadata."""
+            task_type, seed, params = task
+            task_start = time.time()
+            
+            try:
+                if task_type == "city":
+                    results = self._expand_by_city(
+                        params["city"], exclude_ghcids, limit=self.k_expand
+                    )
+                    return {
+                        "task_type": task_type,
+                        "seed": seed,
+                        "results": results,
+                        "duration_ms": (time.time() - task_start) * 1000
+                    }
+                elif task_type == "type":
+                    results = self._expand_by_type(
+                        params["institution_type"],
+                        params["country"],
+                        exclude_ghcids,
+                        limit=self.k_expand
+                    )
+                    return {
+                        "task_type": task_type,
+                        "seed": seed,
+                        "results": results,
+                        "duration_ms": (time.time() - task_start) * 1000
+                    }
+            except Exception as e:
+                logger.warning(f"Graph expansion task failed: {task_type} for {seed.ghcid}: {e}")
+                return {
+                    "task_type": task_type,
+                    "seed": seed,
+                    "results": [],
+                    "duration_ms": (time.time() - task_start) * 1000,
+                    "error": str(e)
+                }
+        
+        # Run all tasks in parallel
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = {executor.submit(execute_expansion, task): task for task in tasks}
+            
+            for future in as_completed(futures):
+                result = future.result()
+                if result is None:
+                    continue
+                
+                task_type = result["task_type"]
+                seed = result["seed"]
+                rows = result["results"]
+                duration = result.get("duration_ms", 0)
+                
+                logger.debug(f"Graph expansion {task_type} for {seed.ghcid}: "
+                           f"{len(rows)} results in {duration:.0f}ms")
+                
+                # Process results based on task type
+                if task_type == "city":
+                    for row in rows:
+                        ghcid = row.get("ghcid", "")
+                        if ghcid and ghcid not in seen_ghcids:
+                            seen_ghcids.add(ghcid)
+                            expanded.append(RetrievedInstitution(
+                                ghcid=ghcid,
+                                name=row.get("name", ""),
+                                uri=row.get("s", ""),
+                                graph_score=0.8,  # High score for same city
+                                institution_type=row.get("type"),
+                                expansion_reason="same_city",
+                                related_institutions=[seed.ghcid]
+                            ))
+                elif task_type == "type":
+                    for row in rows:
+                        ghcid = row.get("ghcid", "")
+                        if ghcid and ghcid not in seen_ghcids:
+                            seen_ghcids.add(ghcid)
+                            expanded.append(RetrievedInstitution(
+                                ghcid=ghcid,
+                                name=row.get("name", ""),
+                                uri=row.get("s", ""),
+                                graph_score=0.5,  # Medium score for same type
+                                institution_type=seed.institution_type,
+                                city=row.get("city"),
+                                expansion_reason="same_type",
+                                related_institutions=[seed.ghcid]
+                            ))
+        
+        total_time = (time.time() - start_time) * 1000
+        logger.info(f"Graph expansion completed: {len(tasks)} queries, "
+                   f"{len(expanded)} results in {total_time:.0f}ms (parallel)")
         
         return expanded
     
@@ -1251,7 +1885,13 @@ class HybridRetriever:
         graph_results: list[RetrievedInstitution],
         k: int
     ) -> list[RetrievedInstitution]:
-        """Combine vector and graph results with weighted scoring.
+        """Combine vector and graph results with weighted scoring and graph inheritance.
+        
+        This method implements a hybrid scoring approach:
+        1. Direct merge: If a graph result matches a vector result (same GHCID), 
+           the graph_score is directly applied
+        2. Graph inheritance: Vector results inherit a portion of graph scores from
+           related institutions found via graph expansion (same city/type)
         
         Args:
             vector_results: Results from vector search
@@ -1261,25 +1901,95 @@ class HybridRetriever:
         Returns:
             Combined and ranked results
         """
+        # Debug logging for investigation
+        logger.debug(f"Combining {len(vector_results)} vector + {len(graph_results)} graph results")
+        
         # Create lookup by GHCID for merging
         results_by_ghcid: dict[str, RetrievedInstitution] = {}
         
+        # Track which vector GHCIDs we have for inheritance
+        vector_ghcids = set()
+        
         # Add vector results
         for inst in vector_results:
             if inst.ghcid:
                 results_by_ghcid[inst.ghcid] = inst
+                vector_ghcids.add(inst.ghcid)
+                logger.debug(f"  Vector: {inst.ghcid} ({inst.name[:30] if inst.name else '?'}...) "
+                           f"v={inst.vector_score:.3f} g={inst.graph_score:.3f}")
+        
+        # Track direct merges and inheritance candidates
+        direct_merges = 0
+        inheritance_boosts = []
+        
+        # Merge graph results and build inheritance map
+        # inheritance_map: vector_ghcid -> list of (related_ghcid, graph_score, reason)
+        inheritance_map: dict[str, list[tuple[str, float, str]]] = {g: [] for g in vector_ghcids}
         
-        # Merge graph results
         for inst in graph_results:
+            logger.debug(f"  Graph: {inst.ghcid} ({inst.name[:30] if inst.name else '?'}...) "
+                       f"g={inst.graph_score:.3f} reason={inst.expansion_reason} "
+                       f"related_to={inst.related_institutions}")
+            
             if inst.ghcid in results_by_ghcid:
-                # Combine scores if already present
+                # Direct merge: graph result matches existing vector result
                 existing = results_by_ghcid[inst.ghcid]
+                old_graph_score = existing.graph_score
                 existing.graph_score = max(existing.graph_score, inst.graph_score)
                 existing.related_institutions.extend(inst.related_institutions)
                 if inst.expansion_reason:
                     existing.expansion_reason = inst.expansion_reason
+                direct_merges += 1
+                logger.debug(f"    -> Direct merge! {inst.ghcid} graph_score: {old_graph_score:.3f} -> {existing.graph_score:.3f}")
             else:
+                # New institution from graph expansion
                 results_by_ghcid[inst.ghcid] = inst
+                
+                # Build inheritance: this graph result was expanded FROM a vector result
+                # The related_institutions field contains the seed GHCID(s) it was expanded from
+                for seed_ghcid in inst.related_institutions:
+                    if seed_ghcid in inheritance_map:
+                        inheritance_map[seed_ghcid].append(
+                            (inst.ghcid, inst.graph_score, inst.expansion_reason or "related")
+                        )
+        
+        logger.debug(f"Direct merges: {direct_merges}")
+        
+        # Apply graph score inheritance to vector results
+        # Vector results inherit a portion of graph scores from their related institutions
+        INHERITANCE_FACTOR = 0.5  # Inherit 50% of related institutions' graph scores
+        
+        for vector_ghcid, related_list in inheritance_map.items():
+            if related_list and vector_ghcid in results_by_ghcid:
+                inst = results_by_ghcid[vector_ghcid]
+                
+                # Calculate inherited score: average of related graph scores * inheritance factor
+                related_scores = [score for _, score, _ in related_list]
+                inherited_score = (sum(related_scores) / len(related_scores)) * INHERITANCE_FACTOR
+                
+                old_graph_score = inst.graph_score
+                # Inherit: take max of current graph_score and inherited score
+                inst.graph_score = max(inst.graph_score, inherited_score)
+                
+                if inst.graph_score > old_graph_score:
+                    # Track related institutions for context
+                    related_ghcids = [ghcid for ghcid, _, _ in related_list]
+                    inst.related_institutions.extend(related_ghcids[:3])  # Add up to 3 related
+                    
+                    inheritance_boosts.append({
+                        "ghcid": vector_ghcid,
+                        "name": inst.name,
+                        "old_graph": old_graph_score,
+                        "new_graph": inst.graph_score,
+                        "inherited_from": len(related_list),
+                        "reasons": list(set(r for _, _, r in related_list))
+                    })
+                    logger.debug(f"  Inheritance: {vector_ghcid} graph_score: {old_graph_score:.3f} -> "
+                               f"{inst.graph_score:.3f} (from {len(related_list)} related institutions)")
+        
+        if inheritance_boosts:
+            logger.info(f"Graph inheritance applied to {len(inheritance_boosts)} vector results: "
+                       f"{[b['ghcid'] for b in inheritance_boosts[:3]]}...")
         
         # Calculate combined scores
         for inst in results_by_ghcid.values():
@@ -1295,6 +2005,12 @@ class HybridRetriever:
             reverse=True
         )
         
+        # Log top results for debugging
+        logger.debug(f"Top {min(5, len(ranked))} combined results:")
+        for i, inst in enumerate(ranked[:5]):
+            logger.debug(f"  {i+1}. {inst.ghcid} ({inst.name[:25] if inst.name else '?'}...) "
+                       f"combined={inst.combined_score:.3f} (v={inst.vector_score:.3f}, g={inst.graph_score:.3f})")
+        
         return ranked[:k]
     
     def _get_person_collection_vector_size(self) -> int | None:
@@ -1418,8 +2134,20 @@ class HybridRetriever:
             richness_boost = 0.7 + 0.3 * richness_score
             person.combined_score = person.vector_score * richness_boost
             
+            # Apply name-matching boost for queries that look like person names
+            # This ensures that searching for "Kitty Bogte" returns Kitty Bogte first,
+            # even if vector similarity ranks other Dutch names higher
+            if looks_like_person_name(query) and person.name:
+                name_boost = calculate_name_match_boost(query, person.name)
+                if name_boost > 1.0:
+                    logger.debug(f"Name match boost {name_boost}x for '{person.name}' (query: '{query}')")
+                    person.combined_score *= name_boost
+            
             persons.append(person)
         
+        # Re-sort by combined score after name boosting
+        persons.sort(key=lambda p: p.combined_score, reverse=True)
+        
         return persons
     
     def search_persons(
@@ -1462,9 +2190,15 @@ class HybridRetriever:
         
         logger.info(f"Person search for: {query[:50]}... (model: {using or 'auto'}, role_category: {target_role_category}, custodian_type: {target_custodian_type})")
         
-        # Over-fetch to allow for post-filtering by role category
+        # Over-fetch to allow for post-filtering and name boosting
+        # - Base multiplier: 2x for general queries
+        # - Role category filter: 3x (need more candidates for keyword filtering)
+        # - Name queries: fetch minimum 100 to ensure name boost can find exact matches
+        #   (vector similarity often ranks similar-sounding names higher than exact matches)
+        is_name_query = looks_like_person_name(query)
         fetch_multiplier = 3 if target_role_category else 2
-        results = self._person_vector_search(query, k * fetch_multiplier, using=using, filter_conditions=filter_conditions)
+        fetch_count = max(k * fetch_multiplier, 100 if is_name_query else 0)
+        results = self._person_vector_search(query, fetch_count, using=using, filter_conditions=filter_conditions)
         logger.info(f"Found {len(results)} person results after Qdrant filtering")
         
         # Apply role category post-filtering (keyword-based since not indexed)