feat: Add legal form filtering rule for CustodianName
- Introduced LEGAL-FORM-FILTER rule to standardize CustodianName by removing legal form designations. - Documented rationale, examples, and implementation guidelines for the filtering process. docs: Create README for value standardization rules - Established a comprehensive README outlining various value standardization rules applicable to Heritage Custodian classes. - Categorized rules into Name Standardization, Geographic Standardization, Web Observation, and Schema Evolution. feat: Implement transliteration standards for non-Latin scripts - Added TRANSLIT-ISO rule to ensure GHCID abbreviations are generated from emic names using ISO standards for transliteration. - Included detailed guidelines for various scripts and languages, along with implementation examples. feat: Define XPath provenance rules for web observations - Created XPATH-PROVENANCE rule mandating XPath pointers for claims extracted from web sources. - Established a workflow for archiving websites and verifying claims against archived HTML. chore: Update records lifecycle diagram - Generated a new Mermaid diagram illustrating the records lifecycle for heritage custodians. - Included phases for active records, inactive archives, and processed heritage collections with key relationships and classifications.
This commit is contained in:
parent
7b42d720d5
commit
3a6ead8fde
64 changed files with 18017 additions and 466 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -14921,6 +14921,7 @@ hypernym:
|
|||
rico:
|
||||
- label: recordSetTypes
|
||||
- label: Q112796578
|
||||
class: True
|
||||
hypernym:
|
||||
- archive
|
||||
type:
|
||||
|
|
@ -15176,6 +15177,7 @@ hypernym:
|
|||
rico:
|
||||
- label: recordSetTypes
|
||||
- label: Q3621648
|
||||
class: True
|
||||
hypernym:
|
||||
- archive
|
||||
type:
|
||||
|
|
@ -15281,6 +15283,7 @@ hypernym:
|
|||
type:
|
||||
- A
|
||||
- label: Q9854379
|
||||
class: True
|
||||
country:
|
||||
- Portugal
|
||||
hypernym:
|
||||
|
|
@ -15714,6 +15717,7 @@ hypernym:
|
|||
rico:
|
||||
- label: recordSetTypes
|
||||
- label: Q11906844
|
||||
class: True
|
||||
hypernym:
|
||||
- archive
|
||||
type:
|
||||
|
|
@ -15845,6 +15849,7 @@ hypernym:
|
|||
type:
|
||||
- D
|
||||
- label: Q5177943
|
||||
class: True
|
||||
hypernym:
|
||||
- archive
|
||||
type:
|
||||
|
|
|
|||
392
frontend/package-lock.json
generated
392
frontend/package-lock.json
generated
|
|
@ -19,6 +19,7 @@
|
|||
"@types/dagre": "^0.7.53",
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
"@types/lodash": "^4.17.20",
|
||||
"@types/three": "^0.181.0",
|
||||
"@uiw/react-codemirror": "^4.25.3",
|
||||
"axios": "^1.13.2",
|
||||
"chevrotain-allstar": "^0.3.1",
|
||||
|
|
@ -34,7 +35,7 @@
|
|||
"lodash": "^4.17.21",
|
||||
"lucide-react": "^0.554.0",
|
||||
"maplibre-gl": "^5.14.0",
|
||||
"mermaid": "^11.12.1",
|
||||
"mermaid": "^11.12.2",
|
||||
"n3": "^1.26.0",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
|
|
@ -43,6 +44,8 @@
|
|||
"rehype-raw": "^7.0.0",
|
||||
"rehype-sanitize": "^6.0.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"three": "^0.181.2",
|
||||
"umap-js": "^1.4.0",
|
||||
"zustand": "^5.0.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
@ -605,9 +608,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@codemirror/view": {
|
||||
"version": "6.38.8",
|
||||
"resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.38.8.tgz",
|
||||
"integrity": "sha512-XcE9fcnkHCbWkjeKyi0lllwXmBLtyYb5dt89dJyx23I9+LSh5vZDIuk7OLG4VM1lgrXZQcY6cxyZyk5WVPRv/A==",
|
||||
"version": "6.39.1",
|
||||
"resolved": "https://registry.npmjs.org/@codemirror/view/-/view-6.39.1.tgz",
|
||||
"integrity": "sha512-yxpbDf9JwUgLVuAzOS1r0upM+f482FCYkcc+ZbJ34SGBppKL26giehibMEX+nAzLonlrJYiFi9zrftGDrO4mrQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@codemirror/state": "^6.5.0",
|
||||
|
|
@ -712,9 +715,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@csstools/css-syntax-patches-for-csstree": {
|
||||
"version": "1.0.20",
|
||||
"resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.0.20.tgz",
|
||||
"integrity": "sha512-8BHsjXfSciZxjmHQOuVdW2b8WLUPts9a+mfL13/PzEviufUEW2xnvQuOlKs9dRBHgRqJ53SF/DUoK9+MZk72oQ==",
|
||||
"version": "1.0.14",
|
||||
"resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.0.14.tgz",
|
||||
"integrity": "sha512-zSlIxa20WvMojjpCSy8WrNpcZ61RqfTfX3XTaOeVlGJrt/8HF3YbzgFZa01yTbT4GWQLwfTcC3EB8i3XnB647Q==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
|
|
@ -729,6 +732,9 @@
|
|||
"license": "MIT-0",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"postcss": "^8.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@csstools/css-tokenizer": {
|
||||
|
|
@ -751,6 +757,12 @@
|
|||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@dimforge/rapier3d-compat": {
|
||||
"version": "0.12.0",
|
||||
"resolved": "https://registry.npmjs.org/@dimforge/rapier3d-compat/-/rapier3d-compat-0.12.0.tgz",
|
||||
"integrity": "sha512-uekIGetywIgopfD97oDL5PfeezkFpNhwlzlaEYNOA0N6ghdsOvh/HYjSMek5Q2O1PYvRSDFcqFVJl4r4ZBwOow==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@duckdb/duckdb-wasm": {
|
||||
"version": "1.30.0",
|
||||
"resolved": "https://registry.npmjs.org/@duckdb/duckdb-wasm/-/duckdb-wasm-1.30.0.tgz",
|
||||
|
|
@ -1692,9 +1704,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@lezer/lr": {
|
||||
"version": "1.4.4",
|
||||
"resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.4.tgz",
|
||||
"integrity": "sha512-LHL17Mq0OcFXm1pGQssuGTQFPPdxARjKM8f7GA5+sGtHi0K3R84YaSbmche0+RKWHnCsx9asEe5OWOI4FHfe4A==",
|
||||
"version": "1.4.5",
|
||||
"resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.5.tgz",
|
||||
"integrity": "sha512-/YTRKP5yPPSo1xImYQk7AZZMAgap0kegzqCSYHjAL9x1AZ0ZQW+IpcEzMKagCsbTsLnVeWkxYrCNeXG8xEPrjg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@lezer/common": "^1.0.0"
|
||||
|
|
@ -2098,9 +2110,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@rolldown/pluginutils": {
|
||||
"version": "1.0.0-beta.47",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.47.tgz",
|
||||
"integrity": "sha512-8QagwMH3kNCuzD8EWL8R2YPW5e4OrHNSAHRFDdmFqEwEaD/KcNKjVoumo+gP2vW5eKB2UPbM6vTYiGZX0ixLnw==",
|
||||
"version": "1.0.0-beta.53",
|
||||
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.53.tgz",
|
||||
"integrity": "sha512-vENRlFU4YbrwVqNDZ7fLvy+JR1CRkyr01jhSiDpE1u6py3OMzQfztQU2jxykW3ALNxO4kSlqIDeYyD0Y9RcQeQ==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
|
@ -2544,6 +2556,12 @@
|
|||
"@testing-library/dom": ">=7.21.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@tweenjs/tween.js": {
|
||||
"version": "23.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@tweenjs/tween.js/-/tween.js-23.1.3.tgz",
|
||||
"integrity": "sha512-vJmvvwFxYuGnF2axRtPYocag6Clbb5YS7kLL+SO/TeVFzHqDIWrNKYtcsPMibjDx9O+bu+psAy9NKfWklassUA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/aria-query": {
|
||||
"version": "5.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz",
|
||||
|
|
@ -2979,9 +2997,9 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "24.10.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
|
||||
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
||||
"version": "24.10.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.2.tgz",
|
||||
"integrity": "sha512-WOhQTZ4G8xZ1tjJTvKOpyEVSGgOTvJAfDK3FNFgELyaTpzhdgHVHeqW8V+UJvzF5BT+/B54T/1S2K6gd9c7bbA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
@ -3028,6 +3046,12 @@
|
|||
"@types/react": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/stats.js": {
|
||||
"version": "0.17.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/stats.js/-/stats.js-0.17.4.tgz",
|
||||
"integrity": "sha512-jIBvWWShCvlBqBNIZt0KAshWpvSjhkwkEu4ZUcASoAvhmrgAUI2t1dXrjSL4xXVLB4FznPrIsX3nKXFl/Dt4vA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/supercluster": {
|
||||
"version": "7.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/supercluster/-/supercluster-7.1.3.tgz",
|
||||
|
|
@ -3037,6 +3061,21 @@
|
|||
"@types/geojson": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/three": {
|
||||
"version": "0.181.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/three/-/three-0.181.0.tgz",
|
||||
"integrity": "sha512-MLF1ks8yRM2k71D7RprFpDb9DOX0p22DbdPqT/uAkc6AtQXjxWCVDjCy23G9t1o8HcQPk7woD2NIyiaWcWPYmA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@dimforge/rapier3d-compat": "~0.12.0",
|
||||
"@tweenjs/tween.js": "~23.1.3",
|
||||
"@types/stats.js": "*",
|
||||
"@types/webxr": "*",
|
||||
"@webgpu/types": "*",
|
||||
"fflate": "~0.8.2",
|
||||
"meshoptimizer": "~0.22.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/trusted-types": {
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz",
|
||||
|
|
@ -3050,19 +3089,24 @@
|
|||
"integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/webxr": {
|
||||
"version": "0.5.24",
|
||||
"resolved": "https://registry.npmjs.org/@types/webxr/-/webxr-0.5.24.tgz",
|
||||
"integrity": "sha512-h8fgEd/DpoS9CBrjEQXR+dIDraopAEfu4wYVNY2tEPwk60stPWhvZMf4Foo5FakuQ7HFZoa8WceaWFervK2Ovg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@typescript-eslint/eslint-plugin": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.48.1.tgz",
|
||||
"integrity": "sha512-X63hI1bxl5ohelzr0LY5coufyl0LJNthld+abwxpCoo6Gq+hSqhKwci7MUWkXo67mzgUK6YFByhmaHmUcuBJmA==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.49.0.tgz",
|
||||
"integrity": "sha512-JXij0vzIaTtCwu6SxTh8qBc66kmf1xs7pI4UOiMDFVct6q86G0Zs7KRcEoJgY3Cav3x5Tq0MF5jwgpgLqgKG3A==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@eslint-community/regexpp": "^4.10.0",
|
||||
"@typescript-eslint/scope-manager": "8.48.1",
|
||||
"@typescript-eslint/type-utils": "8.48.1",
|
||||
"@typescript-eslint/utils": "8.48.1",
|
||||
"@typescript-eslint/visitor-keys": "8.48.1",
|
||||
"graphemer": "^1.4.0",
|
||||
"@typescript-eslint/scope-manager": "8.49.0",
|
||||
"@typescript-eslint/type-utils": "8.49.0",
|
||||
"@typescript-eslint/utils": "8.49.0",
|
||||
"@typescript-eslint/visitor-keys": "8.49.0",
|
||||
"ignore": "^7.0.0",
|
||||
"natural-compare": "^1.4.0",
|
||||
"ts-api-utils": "^2.1.0"
|
||||
|
|
@ -3075,22 +3119,22 @@
|
|||
"url": "https://opencollective.com/typescript-eslint"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@typescript-eslint/parser": "^8.48.1",
|
||||
"@typescript-eslint/parser": "^8.49.0",
|
||||
"eslint": "^8.57.0 || ^9.0.0",
|
||||
"typescript": ">=4.8.4 <6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/parser": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.48.1.tgz",
|
||||
"integrity": "sha512-PC0PDZfJg8sP7cmKe6L3QIL8GZwU5aRvUFedqSIpw3B+QjRSUZeeITC2M5XKeMXEzL6wccN196iy3JLwKNvDVA==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.49.0.tgz",
|
||||
"integrity": "sha512-N9lBGA9o9aqb1hVMc9hzySbhKibHmB+N3IpoShyV6HyQYRGIhlrO5rQgttypi+yEeKsKI4idxC8Jw6gXKD4THA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/scope-manager": "8.48.1",
|
||||
"@typescript-eslint/types": "8.48.1",
|
||||
"@typescript-eslint/typescript-estree": "8.48.1",
|
||||
"@typescript-eslint/visitor-keys": "8.48.1",
|
||||
"@typescript-eslint/scope-manager": "8.49.0",
|
||||
"@typescript-eslint/types": "8.49.0",
|
||||
"@typescript-eslint/typescript-estree": "8.49.0",
|
||||
"@typescript-eslint/visitor-keys": "8.49.0",
|
||||
"debug": "^4.3.4"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -3106,14 +3150,14 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/project-service": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.48.1.tgz",
|
||||
"integrity": "sha512-HQWSicah4s9z2/HifRPQ6b6R7G+SBx64JlFQpgSSHWPKdvCZX57XCbszg/bapbRsOEv42q5tayTYcEFpACcX1w==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.49.0.tgz",
|
||||
"integrity": "sha512-/wJN0/DKkmRUMXjZUXYZpD1NEQzQAAn9QWfGwo+Ai8gnzqH7tvqS7oNVdTjKqOcPyVIdZdyCMoqN66Ia789e7g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/tsconfig-utils": "^8.48.1",
|
||||
"@typescript-eslint/types": "^8.48.1",
|
||||
"@typescript-eslint/tsconfig-utils": "^8.49.0",
|
||||
"@typescript-eslint/types": "^8.49.0",
|
||||
"debug": "^4.3.4"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -3128,14 +3172,14 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/scope-manager": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.48.1.tgz",
|
||||
"integrity": "sha512-rj4vWQsytQbLxC5Bf4XwZ0/CKd362DkWMUkviT7DCS057SK64D5lH74sSGzhI6PDD2HCEq02xAP9cX68dYyg1w==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.49.0.tgz",
|
||||
"integrity": "sha512-npgS3zi+/30KSOkXNs0LQXtsg9ekZ8OISAOLGWA/ZOEn0ZH74Ginfl7foziV8DT+D98WfQ5Kopwqb/PZOaIJGg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/types": "8.48.1",
|
||||
"@typescript-eslint/visitor-keys": "8.48.1"
|
||||
"@typescript-eslint/types": "8.49.0",
|
||||
"@typescript-eslint/visitor-keys": "8.49.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
|
||||
|
|
@ -3146,9 +3190,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/tsconfig-utils": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.48.1.tgz",
|
||||
"integrity": "sha512-k0Jhs4CpEffIBm6wPaCXBAD7jxBtrHjrSgtfCjUvPp9AZ78lXKdTR8fxyZO5y4vWNlOvYXRtngSZNSn+H53Jkw==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.49.0.tgz",
|
||||
"integrity": "sha512-8prixNi1/6nawsRYxet4YOhnbW+W9FK/bQPxsGB1D3ZrDzbJ5FXw5XmzxZv82X3B+ZccuSxo/X8q9nQ+mFecWA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
|
|
@ -3163,15 +3207,15 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/type-utils": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.48.1.tgz",
|
||||
"integrity": "sha512-1jEop81a3LrJQLTf/1VfPQdhIY4PlGDBc/i67EVWObrtvcziysbLN3oReexHOM6N3jyXgCrkBsZpqwH0hiDOQg==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.49.0.tgz",
|
||||
"integrity": "sha512-KTExJfQ+svY8I10P4HdxKzWsvtVnsuCifU5MvXrRwoP2KOlNZ9ADNEWWsQTJgMxLzS5VLQKDjkCT/YzgsnqmZg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/types": "8.48.1",
|
||||
"@typescript-eslint/typescript-estree": "8.48.1",
|
||||
"@typescript-eslint/utils": "8.48.1",
|
||||
"@typescript-eslint/types": "8.49.0",
|
||||
"@typescript-eslint/typescript-estree": "8.49.0",
|
||||
"@typescript-eslint/utils": "8.49.0",
|
||||
"debug": "^4.3.4",
|
||||
"ts-api-utils": "^2.1.0"
|
||||
},
|
||||
|
|
@ -3188,9 +3232,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/types": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.48.1.tgz",
|
||||
"integrity": "sha512-+fZ3LZNeiELGmimrujsDCT4CRIbq5oXdHe7chLiW8qzqyPMnn1puNstCrMNVAqwcl2FdIxkuJ4tOs/RFDBVc/Q==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.49.0.tgz",
|
||||
"integrity": "sha512-e9k/fneezorUo6WShlQpMxXh8/8wfyc+biu6tnAqA81oWrEic0k21RHzP9uqqpyBBeBKu4T+Bsjy9/b8u7obXQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
|
|
@ -3202,16 +3246,16 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.48.1.tgz",
|
||||
"integrity": "sha512-/9wQ4PqaefTK6POVTjJaYS0bynCgzh6ClJHGSBj06XEHjkfylzB+A3qvyaXnErEZSaxhIo4YdyBgq6j4RysxDg==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.49.0.tgz",
|
||||
"integrity": "sha512-jrLdRuAbPfPIdYNppHJ/D0wN+wwNfJ32YTAm10eJVsFmrVpXQnDWBn8niCSMlWjvml8jsce5E/O+86IQtTbJWA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/project-service": "8.48.1",
|
||||
"@typescript-eslint/tsconfig-utils": "8.48.1",
|
||||
"@typescript-eslint/types": "8.48.1",
|
||||
"@typescript-eslint/visitor-keys": "8.48.1",
|
||||
"@typescript-eslint/project-service": "8.49.0",
|
||||
"@typescript-eslint/tsconfig-utils": "8.49.0",
|
||||
"@typescript-eslint/types": "8.49.0",
|
||||
"@typescript-eslint/visitor-keys": "8.49.0",
|
||||
"debug": "^4.3.4",
|
||||
"minimatch": "^9.0.4",
|
||||
"semver": "^7.6.0",
|
||||
|
|
@ -3230,16 +3274,16 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/utils": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.48.1.tgz",
|
||||
"integrity": "sha512-fAnhLrDjiVfey5wwFRwrweyRlCmdz5ZxXz2G/4cLn0YDLjTapmN4gcCsTBR1N2rWnZSDeWpYtgLDsJt+FpmcwA==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.49.0.tgz",
|
||||
"integrity": "sha512-N3W7rJw7Rw+z1tRsHZbK395TWSYvufBXumYtEGzypgMUthlg0/hmCImeA8hgO2d2G4pd7ftpxxul2J8OdtdaFA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@eslint-community/eslint-utils": "^4.7.0",
|
||||
"@typescript-eslint/scope-manager": "8.48.1",
|
||||
"@typescript-eslint/types": "8.48.1",
|
||||
"@typescript-eslint/typescript-estree": "8.48.1"
|
||||
"@typescript-eslint/scope-manager": "8.49.0",
|
||||
"@typescript-eslint/types": "8.49.0",
|
||||
"@typescript-eslint/typescript-estree": "8.49.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
|
||||
|
|
@ -3254,13 +3298,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/visitor-keys": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.48.1.tgz",
|
||||
"integrity": "sha512-BmxxndzEWhE4TIEEMBs8lP3MBWN3jFPs/p6gPm/wkv02o41hI6cq9AuSmGAaTTHPtA1FTi2jBre4A9rm5ZmX+Q==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.49.0.tgz",
|
||||
"integrity": "sha512-LlKaciDe3GmZFphXIc79THF/YYBugZ7FS1pO581E/edlVVNbZKDy93evqmrfQ9/Y4uN0vVhX4iuchq26mK/iiA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/types": "8.48.1",
|
||||
"@typescript-eslint/types": "8.49.0",
|
||||
"eslint-visitor-keys": "^4.2.1"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -3344,16 +3388,16 @@
|
|||
"license": "ISC"
|
||||
},
|
||||
"node_modules/@vitejs/plugin-react": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.1.1.tgz",
|
||||
"integrity": "sha512-WQfkSw0QbQ5aJ2CHYw23ZGkqnRwqKHD/KYsMeTkZzPT4Jcf0DcBxBtwMJxnu6E7oxw5+JC6ZAiePgh28uJ1HBA==",
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.1.2.tgz",
|
||||
"integrity": "sha512-EcA07pHJouywpzsoTUqNh5NwGayl2PPVEJKUSinGGSxFGYn+shYbqMGBg6FXDqgXum9Ou/ecb+411ssw8HImJQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/core": "^7.28.5",
|
||||
"@babel/plugin-transform-react-jsx-self": "^7.27.1",
|
||||
"@babel/plugin-transform-react-jsx-source": "^7.27.1",
|
||||
"@rolldown/pluginutils": "1.0.0-beta.47",
|
||||
"@rolldown/pluginutils": "1.0.0-beta.53",
|
||||
"@types/babel__core": "^7.20.5",
|
||||
"react-refresh": "^0.18.0"
|
||||
},
|
||||
|
|
@ -3497,6 +3541,12 @@
|
|||
"url": "https://opencollective.com/vitest"
|
||||
}
|
||||
},
|
||||
"node_modules/@webgpu/types": {
|
||||
"version": "0.1.67",
|
||||
"resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.67.tgz",
|
||||
"integrity": "sha512-uk53+2ECGUkWoDFez/hymwpRfdgdIn6y1ref70fEecGMe5607f4sozNFgBk0oxlr7j2CRGWBEc3IBYMmFdGGTQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/abort-controller": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
|
||||
|
|
@ -3605,9 +3655,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/apache-arrow/node_modules/@types/node": {
|
||||
"version": "20.19.25",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||
"version": "20.19.26",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.26.tgz",
|
||||
"integrity": "sha512-0l6cjgF0XnihUpndDhk+nyD3exio3iKaYROSgvh/qSevPXax3L8p5DBRFjbvalnwatGgHEQn2R88y2fA3g4irg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
|
|
@ -3724,9 +3774,9 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/baseline-browser-mapping": {
|
||||
"version": "2.9.4",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.4.tgz",
|
||||
"integrity": "sha512-ZCQ9GEWl73BVm8bu5Fts8nt7MHdbt5vY9bP6WGnUh+r3l8M7CgfyTlwsgCbMC66BNxPr6Xoce3j66Ms5YUQTNA==",
|
||||
"version": "2.9.5",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.5.tgz",
|
||||
"integrity": "sha512-D5vIoztZOq1XM54LUdttJVc96ggEsIfju2JBvht06pSzpckp3C7HReun67Bghzrtdsq9XdMGbSSB3v3GhMNmAA==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
|
|
@ -3834,9 +3884,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/caniuse-lite": {
|
||||
"version": "1.0.30001759",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001759.tgz",
|
||||
"integrity": "sha512-Pzfx9fOKoKvevQf8oCXoyNRQ5QyxJj+3O0Rqx2V5oxT61KGx8+n6hV/IUyJeifUci2clnmmKVpvtiqRzgiWjSw==",
|
||||
"version": "1.0.30001760",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001760.tgz",
|
||||
"integrity": "sha512-7AAMPcueWELt1p3mi13HR/LHH0TJLT11cnwDJEs3xA4+CK/PLKeO9Kl1oru24htkyUKtkGCvAx4ohB0Ttry8Dw==",
|
||||
"dev": true,
|
||||
"funding": [
|
||||
{
|
||||
|
|
@ -4201,14 +4251,14 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/cssstyle": {
|
||||
"version": "5.3.3",
|
||||
"resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-5.3.3.tgz",
|
||||
"integrity": "sha512-OytmFH+13/QXONJcC75QNdMtKpceNk3u8ThBjyyYjkEcy/ekBwR1mMAuNvi3gdBPW3N5TlCzQ0WZw8H0lN/bDw==",
|
||||
"version": "5.3.4",
|
||||
"resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-5.3.4.tgz",
|
||||
"integrity": "sha512-KyOS/kJMEq5O9GdPnaf82noigg5X5DYn0kZPJTaAsCUaBizp6Xa1y9D4Qoqf/JazEXWuruErHgVXwjN5391ZJw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@asamuzakjp/css-color": "^4.0.3",
|
||||
"@csstools/css-syntax-patches-for-csstree": "^1.0.14",
|
||||
"@asamuzakjp/css-color": "^4.1.0",
|
||||
"@csstools/css-syntax-patches-for-csstree": "1.0.14",
|
||||
"css-tree": "^3.1.0"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -4863,9 +4913,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/dompurify": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.0.tgz",
|
||||
"integrity": "sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==",
|
||||
"version": "3.3.1",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.1.tgz",
|
||||
"integrity": "sha512-qkdCKzLNtrgPFP1Vo+98FRzJnBRGe4ffyCea9IwHB1fyxPOeNTHpLKYGd4Uk9xvNoH0ZoOjwZxNptyMwqrId1Q==",
|
||||
"license": "(MPL-2.0 OR Apache-2.0)",
|
||||
"optionalDependencies": {
|
||||
"@types/trusted-types": "^2.0.7"
|
||||
|
|
@ -4892,9 +4942,9 @@
|
|||
"license": "ISC"
|
||||
},
|
||||
"node_modules/electron-to-chromium": {
|
||||
"version": "1.5.266",
|
||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.266.tgz",
|
||||
"integrity": "sha512-kgWEglXvkEfMH7rxP5OSZZwnaDWT7J9EoZCujhnpLbfi0bbNtRkgdX2E3gt0Uer11c61qCYktB3hwkAS325sJg==",
|
||||
"version": "1.5.267",
|
||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.267.tgz",
|
||||
"integrity": "sha512-0Drusm6MVRXSOJpGbaSVgcQsuB4hEkMpHXaVstcPmhu5LIedxs1xNK/nIxmQIU/RPC0+1/o0AVZfBTkTNJOdUw==",
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
|
|
@ -5371,9 +5421,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/expect-type": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz",
|
||||
"integrity": "sha512-JhFGDVJ7tmDJItKhYgJCGLOWjuK9vPxiXoUFLwLDc99NlmklilbiQJwoctZtt13+xMw91MCk/REan6MWHqDjyA==",
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz",
|
||||
"integrity": "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
|
|
@ -5435,7 +5485,6 @@
|
|||
"version": "0.8.2",
|
||||
"resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.2.tgz",
|
||||
"integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/file-entry-cache": {
|
||||
|
|
@ -5682,13 +5731,6 @@
|
|||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/graphemer": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz",
|
||||
"integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/graphlib": {
|
||||
"version": "2.1.8",
|
||||
"resolved": "https://registry.npmjs.org/graphlib/-/graphlib-2.1.8.tgz",
|
||||
|
|
@ -6123,6 +6165,12 @@
|
|||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/is-any-array": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-0.1.1.tgz",
|
||||
"integrity": "sha512-qTiELO+kpTKqPgxPYbshMERlzaFu29JDnpB8s3bjg+JkxBpw29/qqSaOdKv2pCdaG92rLGeG/zG2GauX58hfoA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/is-arrayish": {
|
||||
"version": "0.2.1",
|
||||
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz",
|
||||
|
|
@ -6232,15 +6280,15 @@
|
|||
}
|
||||
},
|
||||
"node_modules/jsdom": {
|
||||
"version": "27.2.0",
|
||||
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-27.2.0.tgz",
|
||||
"integrity": "sha512-454TI39PeRDW1LgpyLPyURtB4Zx1tklSr6+OFOipsxGUH1WMTvk6C65JQdrj455+DP2uJ1+veBEHTGFKWVLFoA==",
|
||||
"version": "27.3.0",
|
||||
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-27.3.0.tgz",
|
||||
"integrity": "sha512-GtldT42B8+jefDUC4yUKAvsaOrH7PDHmZxZXNgF2xMmymjUbRYJvpAybZAKEmXDGTM0mCsz8duOa4vTm5AY2Kg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@acemir/cssom": "^0.9.23",
|
||||
"@asamuzakjp/dom-selector": "^6.7.4",
|
||||
"cssstyle": "^5.3.3",
|
||||
"@acemir/cssom": "^0.9.28",
|
||||
"@asamuzakjp/dom-selector": "^6.7.6",
|
||||
"cssstyle": "^5.3.4",
|
||||
"data-urls": "^6.0.0",
|
||||
"decimal.js": "^10.6.0",
|
||||
"html-encoding-sniffer": "^4.0.0",
|
||||
|
|
@ -6338,9 +6386,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/katex": {
|
||||
"version": "0.16.26",
|
||||
"resolved": "https://registry.npmjs.org/katex/-/katex-0.16.26.tgz",
|
||||
"integrity": "sha512-LvYwQDwfcFB3rCkxwzqVFxhIB21x1JivrWAs3HT9NsmtgvQrcVCZ6xihnNwXwiQ8UhqRtDJRmwrRz5EgzQ2DuA==",
|
||||
"version": "0.16.27",
|
||||
"resolved": "https://registry.npmjs.org/katex/-/katex-0.16.27.tgz",
|
||||
"integrity": "sha512-aeQoDkuRWSqQN6nSvVCEFvfXdqo1OQiCmmW1kc9xSdjutPv7BGO7pqY9sQRJpMOGrEdfDgF2TfRXe5eUAD2Waw==",
|
||||
"funding": [
|
||||
"https://opencollective.com/katex",
|
||||
"https://github.com/sponsors/katex"
|
||||
|
|
@ -6920,6 +6968,12 @@
|
|||
"integrity": "sha512-yQ3rwFWRfwNUY7H5vpU0wfdkNSnvnJinhF9830Swlaxl03zsOjCfmX0ugac+3LtK0lYSgwL/KXc8oYL3mG4YFQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/meshoptimizer": {
|
||||
"version": "0.22.0",
|
||||
"resolved": "https://registry.npmjs.org/meshoptimizer/-/meshoptimizer-0.22.0.tgz",
|
||||
"integrity": "sha512-IebiK79sqIy+E4EgOr+CAw+Ke8hAspXKzBd0JdgEmPHiAwmvEj2S4h1rfvo+o/BnfEYd/jAOg5IeeIjzlzSnDg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/micromark": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz",
|
||||
|
|
@ -7539,6 +7593,79 @@
|
|||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/ml-array-max": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/ml-array-max/-/ml-array-max-1.2.4.tgz",
|
||||
"integrity": "sha512-BlEeg80jI0tW6WaPyGxf5Sa4sqvcyY6lbSn5Vcv44lp1I2GR6AWojfUvLnGTNsIXrZ8uqWmo8VcG1WpkI2ONMQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-any-array": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ml-array-max/node_modules/is-any-array": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
|
||||
"integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ml-array-min": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/ml-array-min/-/ml-array-min-1.2.3.tgz",
|
||||
"integrity": "sha512-VcZ5f3VZ1iihtrGvgfh/q0XlMobG6GQ8FsNyQXD3T+IlstDv85g8kfV0xUG1QPRO/t21aukaJowDzMTc7j5V6Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-any-array": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ml-array-min/node_modules/is-any-array": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
|
||||
"integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ml-array-rescale": {
|
||||
"version": "1.3.7",
|
||||
"resolved": "https://registry.npmjs.org/ml-array-rescale/-/ml-array-rescale-1.3.7.tgz",
|
||||
"integrity": "sha512-48NGChTouvEo9KBctDfHC3udWnQKNKEWN0ziELvY3KG25GR5cA8K8wNVzracsqSW1QEkAXjTNx+ycgAv06/1mQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-any-array": "^2.0.0",
|
||||
"ml-array-max": "^1.2.4",
|
||||
"ml-array-min": "^1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/ml-array-rescale/node_modules/is-any-array": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
|
||||
"integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ml-levenberg-marquardt": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/ml-levenberg-marquardt/-/ml-levenberg-marquardt-2.1.1.tgz",
|
||||
"integrity": "sha512-2+HwUqew4qFFFYujYlQtmFUrxCB4iJAPqnUYro3P831wj70eJZcANwcRaIMGUVaH9NDKzfYuA4N5u67KExmaRA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-any-array": "^0.1.0",
|
||||
"ml-matrix": "^6.4.1"
|
||||
}
|
||||
},
|
||||
"node_modules/ml-matrix": {
|
||||
"version": "6.12.1",
|
||||
"resolved": "https://registry.npmjs.org/ml-matrix/-/ml-matrix-6.12.1.tgz",
|
||||
"integrity": "sha512-TJ+8eOFdp+INvzR4zAuwBQJznDUfktMtOB6g/hUcGh3rcyjxbz4Te57Pgri8Q9bhSQ7Zys4IYOGhFdnlgeB6Lw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-any-array": "^2.0.1",
|
||||
"ml-array-rescale": "^1.3.7"
|
||||
}
|
||||
},
|
||||
"node_modules/ml-matrix/node_modules/is-any-array": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
|
||||
"integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/mlly": {
|
||||
"version": "1.8.0",
|
||||
"resolved": "https://registry.npmjs.org/mlly/-/mlly-1.8.0.tgz",
|
||||
|
|
@ -8742,6 +8869,12 @@
|
|||
"node": ">=12.17"
|
||||
}
|
||||
},
|
||||
"node_modules/three": {
|
||||
"version": "0.181.2",
|
||||
"resolved": "https://registry.npmjs.org/three/-/three-0.181.2.tgz",
|
||||
"integrity": "sha512-k/CjiZ80bYss6Qs7/ex1TBlPD11whT9oKfT8oTGiHa34W4JRd1NiH/Tr1DbHWQ2/vMUypxksLnF2CfmlmM5XFQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tinybench": {
|
||||
"version": "2.9.0",
|
||||
"resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz",
|
||||
|
|
@ -8923,16 +9056,16 @@
|
|||
}
|
||||
},
|
||||
"node_modules/typescript-eslint": {
|
||||
"version": "8.48.1",
|
||||
"resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.48.1.tgz",
|
||||
"integrity": "sha512-FbOKN1fqNoXp1hIl5KYpObVrp0mCn+CLgn479nmu2IsRMrx2vyv74MmsBLVlhg8qVwNFGbXSp8fh1zp8pEoC2A==",
|
||||
"version": "8.49.0",
|
||||
"resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.49.0.tgz",
|
||||
"integrity": "sha512-zRSVH1WXD0uXczCXw+nsdjGPUdx4dfrs5VQoHnUWmv1U3oNlAKv4FUNdLDhVUg+gYn+a5hUESqch//Rv5wVhrg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/eslint-plugin": "8.48.1",
|
||||
"@typescript-eslint/parser": "8.48.1",
|
||||
"@typescript-eslint/typescript-estree": "8.48.1",
|
||||
"@typescript-eslint/utils": "8.48.1"
|
||||
"@typescript-eslint/eslint-plugin": "8.49.0",
|
||||
"@typescript-eslint/parser": "8.49.0",
|
||||
"@typescript-eslint/typescript-estree": "8.49.0",
|
||||
"@typescript-eslint/utils": "8.49.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
|
||||
|
|
@ -8961,6 +9094,15 @@
|
|||
"integrity": "sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/umap-js": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/umap-js/-/umap-js-1.4.0.tgz",
|
||||
"integrity": "sha512-xxpviF9wUO6Nxrx+C58SoDgea+h2PnVaRPKDelWv0HotmY6BeWeh0kAPJoumfqUkzUvowGsYfMbnsWI0b9do+A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ml-levenberg-marquardt": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
|
|
@ -9152,9 +9294,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "7.2.6",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.2.6.tgz",
|
||||
"integrity": "sha512-tI2l/nFHC5rLh7+5+o7QjKjSR04ivXDF4jcgV0f/bTQ+OJiITy5S6gaynVsEM+7RqzufMnVbIon6Sr5x1SDYaQ==",
|
||||
"version": "7.2.7",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.2.7.tgz",
|
||||
"integrity": "sha512-ITcnkFeR3+fI8P1wMgItjGrR10170d8auB4EpMLPqmx6uxElH3a/hHGQabSHKdqd4FXWO1nFIp9rRn7JQ34ACQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
"@types/dagre": "^0.7.53",
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
"@types/lodash": "^4.17.20",
|
||||
"@types/three": "^0.181.0",
|
||||
"@uiw/react-codemirror": "^4.25.3",
|
||||
"axios": "^1.13.2",
|
||||
"chevrotain-allstar": "^0.3.1",
|
||||
|
|
@ -42,7 +43,7 @@
|
|||
"lodash": "^4.17.21",
|
||||
"lucide-react": "^0.554.0",
|
||||
"maplibre-gl": "^5.14.0",
|
||||
"mermaid": "^11.12.1",
|
||||
"mermaid": "^11.12.2",
|
||||
"n3": "^1.26.0",
|
||||
"react": "^19.2.0",
|
||||
"react-dom": "^19.2.0",
|
||||
|
|
@ -51,6 +52,8 @@
|
|||
"rehype-raw": "^7.0.0",
|
||||
"rehype-sanitize": "^6.0.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"three": "^0.181.2",
|
||||
"umap-js": "^1.4.0",
|
||||
"zustand": "^5.0.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ imports:
|
|||
- modules/enums/ReconstructionActivityTypeEnum
|
||||
- modules/enums/SourceDocumentTypeEnum
|
||||
# StaffRoleTypeEnum REMOVED - replaced by StaffRole class hierarchy
|
||||
# See: .opencode/ENUM_TO_CLASS_PRINCIPLE.md for rationale
|
||||
# See: rules/ENUM_TO_CLASS_PRINCIPLE.md for rationale
|
||||
- modules/enums/CallForApplicationStatusEnum
|
||||
- modules/enums/FundingRequirementTypeEnum
|
||||
|
||||
|
|
@ -242,7 +242,7 @@ imports:
|
|||
- modules/classes/PersonObservation
|
||||
|
||||
# Staff role class hierarchy (replaces StaffRoleTypeEnum - Single Source of Truth)
|
||||
# See: .opencode/ENUM_TO_CLASS_PRINCIPLE.md
|
||||
# See: rules/ENUM_TO_CLASS_PRINCIPLE.md
|
||||
- modules/classes/StaffRole
|
||||
- modules/classes/StaffRoles
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"generated": "2025-12-09T10:49:54.625Z",
|
||||
"generated": "2025-12-09T15:58:27.582Z",
|
||||
"version": "1.0.0",
|
||||
"categories": [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -4,11 +4,16 @@ title: Company Archives Type
|
|||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
schema: http://schema.org/
|
||||
org: http://www.w3.org/ns/org#
|
||||
rico: https://www.ica.org/standards/RiC/ontology#
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ./ArchiveOrganizationType
|
||||
- ./CollectionType
|
||||
- ./Department
|
||||
- ./OrganizationBranch
|
||||
|
||||
classes:
|
||||
CompanyArchives:
|
||||
|
|
@ -32,6 +37,37 @@ classes:
|
|||
- Technical drawings and blueprints
|
||||
- Corporate publications
|
||||
|
||||
**Organizational Context**:
|
||||
Company archives are typically organized as:
|
||||
|
||||
1. **Departments within corporations** (`org:OrganizationalUnit`):
|
||||
- Archive department under Records Management division
|
||||
- Historical archives team within Communications/PR
|
||||
- Technical archives under Engineering department
|
||||
|
||||
2. **Branches at corporate facilities** (`org:OrganizationalUnit`):
|
||||
- Central archive at headquarters
|
||||
- Regional archive at manufacturing sites
|
||||
- Research archive at R&D centers
|
||||
|
||||
3. **Standalone heritage organizations** (rare):
|
||||
- Independent foundation managing corporate heritage
|
||||
- Heritage society for defunct companies
|
||||
|
||||
**Relationship to Parent Organization**:
|
||||
|
||||
| Pattern | Property | Example |
|
||||
|---------|----------|---------|
|
||||
| Archive as department | `schema:department` / `org:hasUnit` | Philips Archive is department of Philips N.V. |
|
||||
| Archive as branch | `org:hasSite` / `org:unitOf` | Shell Archive at The Hague HQ |
|
||||
| Archive with parent org | `schema:parentOrganization` | Unilever Historical Archives → Unilever PLC |
|
||||
|
||||
**W3C ORG / Schema.org Alignment**:
|
||||
- `schema:parentOrganization` - Links archive to the corporation it belongs to
|
||||
- `schema:department` - Corporation links to its archive department
|
||||
- `org:unitOf` - Archive is organizational unit of corporation
|
||||
- `org:hasUnit` - Corporation has archive as organizational unit
|
||||
|
||||
**Business Value**:
|
||||
Company archives support:
|
||||
- Legal and regulatory compliance
|
||||
|
|
@ -45,6 +81,8 @@ classes:
|
|||
- BankArchive (Q52718263) - Financial institution archives
|
||||
- EconomicArchive (Q27032167) - Economic history focus
|
||||
- InstitutionalArchive (Q124762372) - Institutional records
|
||||
- Department - Formal departmental structure within organization
|
||||
- OrganizationBranch - Physical branch locations of archive
|
||||
|
||||
**Professional Body**:
|
||||
Company archivists often belong to:
|
||||
|
|
@ -58,7 +96,8 @@ classes:
|
|||
|
||||
**Ontological Alignment**:
|
||||
- **SKOS**: skos:Concept with skos:broader Q166118 (archive)
|
||||
- **Schema.org**: schema:ArchiveOrganization
|
||||
- **Schema.org**: schema:ArchiveOrganization, schema:parentOrganization
|
||||
- **W3C ORG**: org:OrganizationalUnit, org:unitOf, org:hasUnit
|
||||
- **RiC-O**: rico:CorporateBody (as agent)
|
||||
|
||||
**Multilingual Labels**:
|
||||
|
|
@ -66,6 +105,11 @@ classes:
|
|||
- es: archivo empresarial
|
||||
- fr: archives d'entreprise
|
||||
|
||||
slots:
|
||||
- parent_corporation
|
||||
- archive_department_of
|
||||
- archive_branches
|
||||
|
||||
slot_usage:
|
||||
primary_type:
|
||||
description: |
|
||||
|
|
@ -90,23 +134,123 @@ classes:
|
|||
description: |
|
||||
Typically includes: governance records, financial records,
|
||||
product documentation, marketing materials, personnel files.
|
||||
|
||||
parent_corporation:
|
||||
slot_uri: schema:parentOrganization
|
||||
description: |
|
||||
The parent corporation that owns/operates this company archive.
|
||||
|
||||
**Schema.org Alignment**:
|
||||
`schema:parentOrganization` - "The larger organization that this
|
||||
organization is a subOrganization of, if any."
|
||||
|
||||
**Use Cases**:
|
||||
- Philips Company Archives → Philips N.V.
|
||||
- Shell Historical Archive → Shell PLC
|
||||
- Siemens Corporate Archives → Siemens AG
|
||||
|
||||
Can reference:
|
||||
- External URI for the parent corporation
|
||||
- Custodian instance if parent is also modeled as heritage custodian
|
||||
range: uriorcurie
|
||||
examples:
|
||||
- value: "https://www.wikidata.org/entity/Q163292"
|
||||
description: "Philips N.V. as parent of Philips Archives"
|
||||
- value: "https://nde.nl/ontology/hc/nl-corporation/shell-plc"
|
||||
description: "Shell PLC as parent organization"
|
||||
|
||||
archive_department_of:
|
||||
slot_uri: org:unitOf
|
||||
description: |
|
||||
Links this archive to the Department within which it operates.
|
||||
|
||||
**W3C ORG Alignment**:
|
||||
`org:unitOf` - "Indicates an Organization of which this Unit is a part."
|
||||
|
||||
Many company archives are organized as:
|
||||
- Sub-unit of Records Management department
|
||||
- Part of Corporate Communications
|
||||
- Under Legal/Compliance division
|
||||
|
||||
Links to Department class for formal departmental context.
|
||||
range: Department
|
||||
examples:
|
||||
- value:
|
||||
department_name: "Records Management Division"
|
||||
refers_to_custodian: "https://nde.nl/ontology/hc/nl-corporation/philips"
|
||||
description: "Archive is unit of Records Management"
|
||||
|
||||
archive_branches:
|
||||
slot_uri: org:hasSubOrganization
|
||||
description: |
|
||||
Physical branch locations of this company archive.
|
||||
|
||||
**W3C ORG Alignment**:
|
||||
`org:hasSubOrganization` - "Represents hierarchical containment of
|
||||
Organizations or Organizational Units."
|
||||
|
||||
Large corporations may have multiple archive locations:
|
||||
- Central archive at headquarters
|
||||
- Regional archives at major facilities
|
||||
- Research archives at R&D centers
|
||||
- Product archives at manufacturing sites
|
||||
|
||||
Links to OrganizationBranch class for physical locations.
|
||||
range: OrganizationBranch
|
||||
multivalued: true
|
||||
inlined_as_list: true
|
||||
examples:
|
||||
- value:
|
||||
- branch_name: "Philips Archives - Eindhoven"
|
||||
branch_type: REGIONAL_OFFICE
|
||||
- branch_name: "Philips Research Archives - High Tech Campus"
|
||||
branch_type: RESEARCH_CENTER
|
||||
description: "Multiple archive branches"
|
||||
|
||||
exact_mappings:
|
||||
- skos:Concept
|
||||
close_mappings:
|
||||
- schema:ArchiveOrganization
|
||||
- rico:CorporateBody
|
||||
- org:OrganizationalUnit
|
||||
related_mappings:
|
||||
- schema:parentOrganization
|
||||
- org:unitOf
|
||||
- org:hasSubOrganization
|
||||
|
||||
comments:
|
||||
- "Corporate archives preserving business heritage"
|
||||
- "Important for legal compliance and corporate identity"
|
||||
- "Part of dual-class pattern: custodian type + rico:RecordSetType"
|
||||
- "May have restricted access for commercial sensitivity"
|
||||
- "Typically organized as Department within larger corporation (org:unitOf)"
|
||||
- "May have multiple branch locations (org:hasSubOrganization)"
|
||||
- "Links to parent corporation via schema:parentOrganization"
|
||||
|
||||
see_also:
|
||||
- BankArchive
|
||||
- EconomicArchive
|
||||
- InstitutionalArchive
|
||||
- Department
|
||||
- OrganizationBranch
|
||||
|
||||
examples:
|
||||
- value:
|
||||
type_id: "https://nde.nl/ontology/hc/type/archive/company/philips"
|
||||
primary_type: "ARCHIVE"
|
||||
wikidata_entity: "Q10605195"
|
||||
type_label:
|
||||
- "Philips Company Archives@en"
|
||||
- "Philips Bedrijfsarchief@nl"
|
||||
parent_corporation: "https://www.wikidata.org/entity/Q163292"
|
||||
archive_department_of:
|
||||
department_name: "Corporate Communications & Heritage"
|
||||
archive_branches:
|
||||
- branch_name: "Philips Archives - Eindhoven HQ"
|
||||
branch_type: REGIONAL_OFFICE
|
||||
- branch_name: "Philips Research Archives"
|
||||
branch_type: RESEARCH_CENTER
|
||||
description: "Philips company archives with organizational context"
|
||||
|
||||
# rico:RecordSetType for collection classification
|
||||
CompanyArchivesRecordSetType:
|
||||
|
|
@ -123,3 +267,56 @@ classes:
|
|||
annotations:
|
||||
wikidata: Q10605195
|
||||
linked_custodian_type: CompanyArchives
|
||||
|
||||
# Slot definitions for organizational relationships
|
||||
slots:
|
||||
parent_corporation:
|
||||
slot_uri: schema:parentOrganization
|
||||
description: |
|
||||
The parent corporation that owns/operates this company archive.
|
||||
|
||||
Schema.org: parentOrganization - "The larger organization that this
|
||||
organization is a subOrganization of, if any."
|
||||
|
||||
Inverse of schema:subOrganization.
|
||||
range: uriorcurie
|
||||
exact_mappings:
|
||||
- schema:parentOrganization
|
||||
comments:
|
||||
- "Links company archive to owning corporation"
|
||||
- "Use Wikidata Q-number or organizational URI"
|
||||
|
||||
archive_department_of:
|
||||
slot_uri: org:unitOf
|
||||
description: |
|
||||
Links this archive to the Department within which it operates.
|
||||
|
||||
W3C ORG: unitOf - "Indicates an Organization of which this Unit is a part."
|
||||
|
||||
Company archives are often organized as sub-units of:
|
||||
- Records Management department
|
||||
- Corporate Communications
|
||||
- Legal/Compliance division
|
||||
range: Department
|
||||
exact_mappings:
|
||||
- org:unitOf
|
||||
comments:
|
||||
- "Links archive to formal department structure"
|
||||
- "Inverse of org:hasUnit"
|
||||
|
||||
archive_branches:
|
||||
slot_uri: org:hasSubOrganization
|
||||
description: |
|
||||
Physical branch locations of this company archive.
|
||||
|
||||
W3C ORG: hasSubOrganization - "Represents hierarchical containment of
|
||||
Organizations or Organizational Units."
|
||||
|
||||
Links to OrganizationBranch instances for each physical location.
|
||||
range: OrganizationBranch
|
||||
multivalued: true
|
||||
exact_mappings:
|
||||
- org:hasSubOrganization
|
||||
comments:
|
||||
- "Multiple archive branch locations"
|
||||
- "Each branch at different corporate facility"
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ id: https://nde.nl/ontology/hc/class/Conservatoria
|
|||
name: Conservatoria
|
||||
title: Conservatória Type (Lusophone)
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ./ArchiveOrganizationType
|
||||
|
|
@ -16,7 +19,8 @@ classes:
|
|||
|
||||
**Wikidata**: Q9854379
|
||||
|
||||
**Geographic Restriction**: Portugal, Brazil, and other Lusophone countries
|
||||
**Geographic Restriction**: Lusophone countries (PT, BR, AO, MZ, CV, GW, ST, TL)
|
||||
This constraint is enforced via LinkML `rules` with `postconditions`.
|
||||
|
||||
**CUSTODIAN-ONLY**: This type does NOT have a corresponding rico:RecordSetType
|
||||
class. Conservatórias are administrative offices with registration functions,
|
||||
|
|
@ -59,6 +63,7 @@ classes:
|
|||
|
||||
**Multilingual Labels**:
|
||||
- pt: Conservatória
|
||||
- pt-BR: Cartório de Registro
|
||||
|
||||
slot_usage:
|
||||
primary_type:
|
||||
|
|
@ -70,10 +75,49 @@ classes:
|
|||
|
||||
wikidata_entity:
|
||||
description: |
|
||||
Should be Q9854379 for Conservatórias.
|
||||
MUST be Q9854379 for Conservatórias.
|
||||
Lusophone civil/property registration offices.
|
||||
pattern: "^Q[0-9]+$"
|
||||
equals_string: "Q9854379"
|
||||
|
||||
applicable_countries:
|
||||
description: |
|
||||
**Geographic Restriction**: Lusophone countries only.
|
||||
|
||||
Conservatórias exist in Portuguese-speaking countries:
|
||||
- PT (Portugal) - Conservatórias do Registo
|
||||
- BR (Brazil) - Cartórios de Registro
|
||||
- AO (Angola) - Conservatórias
|
||||
- MZ (Mozambique) - Conservatórias
|
||||
- CV (Cape Verde) - Conservatórias
|
||||
- GW (Guinea-Bissau) - Conservatórias
|
||||
- ST (São Tomé and Príncipe) - Conservatórias
|
||||
- TL (Timor-Leste) - Conservatórias (Portuguese legal heritage)
|
||||
|
||||
The `rules` section below enforces this constraint during validation.
|
||||
multivalued: true
|
||||
required: true
|
||||
minimum_cardinality: 1
|
||||
|
||||
# LinkML rules for geographic constraint validation
|
||||
rules:
|
||||
- description: >-
|
||||
Conservatoria MUST have applicable_countries containing at least one
|
||||
Lusophone country (PT, BR, AO, MZ, CV, GW, ST, TL).
|
||||
This is a mandatory geographic restriction for Portuguese-speaking
|
||||
civil registry and notarial archive offices.
|
||||
postconditions:
|
||||
slot_conditions:
|
||||
applicable_countries:
|
||||
any_of:
|
||||
- equals_string: "PT"
|
||||
- equals_string: "BR"
|
||||
- equals_string: "AO"
|
||||
- equals_string: "MZ"
|
||||
- equals_string: "CV"
|
||||
- equals_string: "GW"
|
||||
- equals_string: "ST"
|
||||
- equals_string: "TL"
|
||||
|
||||
exact_mappings:
|
||||
- skos:Concept
|
||||
|
|
@ -82,8 +126,10 @@ classes:
|
|||
- rico:CorporateBody
|
||||
|
||||
comments:
|
||||
- "Conservatória (pt)"
|
||||
- "Cartório de Registro (pt-BR)"
|
||||
- "CUSTODIAN-ONLY type: No corresponding rico:RecordSetType class"
|
||||
- "Geographic restriction: Lusophone countries (Portugal, Brazil, etc.)"
|
||||
- "Geographic restriction enforced via LinkML rules: Lusophone countries only"
|
||||
- "Government registration office, not traditional archive"
|
||||
- "Essential for genealogical and legal research"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,21 +2,27 @@ id: https://nde.nl/ontology/hc/class/CountyRecordOffice
|
|||
name: CountyRecordOffice
|
||||
title: County Record Office Type
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
org: http://www.w3.org/ns/org#
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ./ArchiveOrganizationType
|
||||
- ./OrganizationBranch
|
||||
|
||||
classes:
|
||||
CountyRecordOffice:
|
||||
is_a: ArchiveOrganizationType
|
||||
class_uri: skos:Concept
|
||||
description: |
|
||||
Local authority repository in the United Kingdom and similar jurisdictions,
|
||||
preserving historical records of the county and its communities.
|
||||
Local authority repository in the United Kingdom, preserving historical
|
||||
records of the county and its communities.
|
||||
|
||||
**Wikidata**: Q5177943
|
||||
|
||||
**Geographic Context**: Primarily United Kingdom
|
||||
**Geographic Restriction**: United Kingdom (GB) only.
|
||||
This constraint is enforced via LinkML `rules` with `postconditions`.
|
||||
|
||||
**CUSTODIAN-ONLY**: This type does NOT have a corresponding rico:RecordSetType
|
||||
class. County Record Offices are institutional types, not collection
|
||||
|
|
@ -40,16 +46,25 @@ classes:
|
|||
- Often designated as place of deposit for public records
|
||||
- Increasingly rebranded as "Archives and Local Studies"
|
||||
|
||||
In Scotland:
|
||||
- Similar functions performed by local authority archives
|
||||
- National Records of Scotland at national level
|
||||
|
||||
In Northern Ireland:
|
||||
- Public Record Office of Northern Ireland (PRONI)
|
||||
- Local council archives
|
||||
|
||||
**Related Types**:
|
||||
- LocalGovernmentArchive (Q118281267) - Local authority records
|
||||
- MunicipalArchive (Q604177) - City/town archives
|
||||
- LocalHistoryArchive (Q12324798) - Local history focus
|
||||
|
||||
**Notable Examples**:
|
||||
- The National Archives (Kew) - National level
|
||||
- London Metropolitan Archives
|
||||
- Oxfordshire History Centre
|
||||
- Lancashire Archives
|
||||
- West Yorkshire Archive Service
|
||||
- Surrey History Centre
|
||||
|
||||
**Ontological Alignment**:
|
||||
- **SKOS**: skos:Concept with skos:broader Q166118 (archive)
|
||||
|
|
@ -57,6 +72,8 @@ classes:
|
|||
- **RiC-O**: rico:CorporateBody (as agent)
|
||||
|
||||
**Multilingual Labels**:
|
||||
- en: County Record Office
|
||||
- en-GB: County Record Office
|
||||
- it: archivio pubblico territoriale
|
||||
|
||||
slot_usage:
|
||||
|
|
@ -67,7 +84,7 @@ classes:
|
|||
|
||||
wikidata_entity:
|
||||
description: |
|
||||
Should be Q5177943 for county record offices.
|
||||
MUST be Q5177943 for county record offices.
|
||||
UK local authority archive type.
|
||||
pattern: "^Q[0-9]+$"
|
||||
equals_string: "Q5177943"
|
||||
|
|
@ -76,6 +93,66 @@ classes:
|
|||
description: |
|
||||
Typically 'county' or 'local' for this archive type.
|
||||
Corresponds to UK county administrative level.
|
||||
|
||||
is_branch_of_authority:
|
||||
description: |
|
||||
**Organizational Relationship**: County Record Offices may be branches
|
||||
of larger local authority structures.
|
||||
|
||||
**Common Parent Organizations**:
|
||||
- County Councils (e.g., Oxfordshire County Council)
|
||||
- Unitary Authorities (e.g., Bristol City Council)
|
||||
- Combined Authorities (e.g., Greater Manchester)
|
||||
- Joint Archive Services (e.g., East Sussex / Brighton & Hove)
|
||||
|
||||
**Legal Context**:
|
||||
County Record Offices are typically:
|
||||
- Designated "place of deposit" under Public Records Act 1958
|
||||
- Part of local authority heritage/cultural services
|
||||
- May share governance with local studies libraries
|
||||
|
||||
**Use org:unitOf pattern** from OrganizationBranch to link to parent
|
||||
authority when modeled as formal organizational unit.
|
||||
|
||||
**Examples**:
|
||||
- Oxfordshire History Centre → part of Oxfordshire County Council
|
||||
- London Metropolitan Archives → part of City of London Corporation
|
||||
- West Yorkshire Archive Service → joint service of five councils
|
||||
range: uriorcurie
|
||||
multivalued: false
|
||||
required: false
|
||||
examples:
|
||||
- value: "https://nde.nl/ontology/hc/uk/oxfordshire-county-council"
|
||||
description: "Parent local authority"
|
||||
|
||||
applicable_countries:
|
||||
description: |
|
||||
**Geographic Restriction**: United Kingdom (GB) only.
|
||||
|
||||
County Record Offices are a UK-specific institution type within
|
||||
the local authority structure of England, Wales, Scotland, and
|
||||
Northern Ireland.
|
||||
|
||||
Note: Uses ISO 3166-1 alpha-2 code "GB" for United Kingdom
|
||||
(not "UK" which is not a valid ISO code).
|
||||
|
||||
The `rules` section below enforces this constraint during validation.
|
||||
ifabsent: "string(GB)"
|
||||
required: true
|
||||
minimum_cardinality: 1
|
||||
maximum_cardinality: 1
|
||||
|
||||
# LinkML rules for geographic constraint validation
|
||||
rules:
|
||||
- description: >-
|
||||
CountyRecordOffice MUST have applicable_countries containing "GB"
|
||||
(United Kingdom). This is a mandatory geographic restriction for
|
||||
UK county record offices and local authority archives.
|
||||
postconditions:
|
||||
slot_conditions:
|
||||
applicable_countries:
|
||||
any_of:
|
||||
- equals_string: "GB"
|
||||
|
||||
exact_mappings:
|
||||
- skos:Concept
|
||||
|
|
@ -84,7 +161,9 @@ classes:
|
|||
- rico:CorporateBody
|
||||
|
||||
comments:
|
||||
- "County Record Office (en-GB)"
|
||||
- "CUSTODIAN-ONLY type: No corresponding rico:RecordSetType class"
|
||||
- "Geographic restriction enforced via LinkML rules: United Kingdom (GB) only"
|
||||
- "UK local authority archive institution type"
|
||||
- "Often designated place of deposit for public records"
|
||||
- "Key resource for local and family history research"
|
||||
|
|
@ -93,3 +172,12 @@ classes:
|
|||
- LocalGovernmentArchive
|
||||
- MunicipalArchive
|
||||
- LocalHistoryArchive
|
||||
- OrganizationBranch
|
||||
|
||||
slots:
|
||||
is_branch_of_authority:
|
||||
slot_uri: org:unitOf
|
||||
description: |
|
||||
Parent local authority or governing body for this County Record Office.
|
||||
Uses W3C Org ontology org:unitOf relationship.
|
||||
range: uriorcurie
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ imports:
|
|||
- linkml:types
|
||||
- ./ArchiveOrganizationType
|
||||
- ./CustodianAdministration
|
||||
- ./CustodianArchive
|
||||
|
||||
classes:
|
||||
CurrentArchive:
|
||||
|
|
@ -63,6 +64,24 @@ classes:
|
|||
- HistoricalArchive (Q3621673) - non-current permanent records
|
||||
- RecordsCenter - semi-current storage facility
|
||||
|
||||
**RELATIONSHIP TO CustodianArchive**:
|
||||
|
||||
CurrentArchive (this class) is a TYPE classification (skos:Concept) for
|
||||
archives managing records in the active/current phase of the lifecycle.
|
||||
|
||||
CustodianArchive is an INSTANCE class (rico:RecordSet) representing the
|
||||
actual operational archives of a heritage custodian awaiting processing.
|
||||
|
||||
**Semantic Relationship**:
|
||||
- CurrentArchive is a HYPERNYM (broader type) for the concept of active records
|
||||
- CustodianArchive records MAY be typed as CurrentArchive when in active use
|
||||
- When CustodianArchive.processing_status = "UNPROCESSED", records may still
|
||||
be in the current/active phase conceptually
|
||||
|
||||
**SKOS Alignment**:
|
||||
- skos:broader: CurrentArchive → DepositArchive (lifecycle progression)
|
||||
- skos:narrower: CurrentArchive ← specific current archive types
|
||||
|
||||
**ONTOLOGICAL ALIGNMENT**:
|
||||
- **SKOS**: skos:Concept (type classification)
|
||||
- **RiC-O**: rico:RecordSet for active record groups
|
||||
|
|
@ -74,6 +93,7 @@ classes:
|
|||
- retention_schedule
|
||||
- creating_organization
|
||||
- transfer_policy
|
||||
- has_narrower_instance
|
||||
|
||||
slot_usage:
|
||||
wikidata_entity:
|
||||
|
|
@ -101,6 +121,25 @@ classes:
|
|||
Policy for transferring records to intermediate or permanent archives.
|
||||
Describes triggers, timelines, and procedures for transfer.
|
||||
range: string
|
||||
|
||||
has_narrower_instance:
|
||||
slot_uri: skos:narrowerTransitive
|
||||
description: |
|
||||
Links this archive TYPE to specific CustodianArchive INSTANCES
|
||||
that are classified under this lifecycle phase.
|
||||
|
||||
**SKOS**: skos:narrowerTransitive for type-instance relationship.
|
||||
|
||||
**Usage**:
|
||||
When a CustodianArchive contains records in the "current/active" phase,
|
||||
it can be linked from CurrentArchive via this property.
|
||||
|
||||
**Example**:
|
||||
- CurrentArchive (type) → has_narrower_instance →
|
||||
CustodianArchive "Director's Active Files 2020-2024" (instance)
|
||||
range: CustodianArchive
|
||||
multivalued: true
|
||||
required: false
|
||||
|
||||
exact_mappings:
|
||||
- wikidata:Q3621648
|
||||
|
|
@ -145,3 +184,11 @@ slots:
|
|||
transfer_policy:
|
||||
description: Policy for transferring to permanent archive
|
||||
range: string
|
||||
|
||||
has_narrower_instance:
|
||||
slot_uri: skos:narrowerTransitive
|
||||
description: |
|
||||
Links archive TYPE to specific CustodianArchive INSTANCES.
|
||||
SKOS narrowerTransitive for type-to-instance relationship.
|
||||
range: CustodianArchive
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ imports:
|
|||
- ../slots/access_restrictions
|
||||
- ../slots/storage_location
|
||||
- ./ReconstructedEntity
|
||||
- ./CurrentArchive
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
|
|
@ -31,6 +32,8 @@ prefixes:
|
|||
time: http://www.w3.org/2006/time#
|
||||
org: http://www.w3.org/ns/org#
|
||||
premis: http://www.loc.gov/premis/rdf/v3/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
wikidata: http://www.wikidata.org/entity/
|
||||
|
||||
classes:
|
||||
CustodianArchive:
|
||||
|
|
@ -122,6 +125,18 @@ classes:
|
|||
- **Storage**: Physical location of unprocessed archives
|
||||
- **OrganizationalStructure**: Unit responsible for processing
|
||||
|
||||
**RELATIONSHIP TO LIFECYCLE TYPE CLASSES**:
|
||||
|
||||
CustodianArchive (this class) is an INSTANCE class representing actual
|
||||
operational archives. It can be TYPED using lifecycle phase classifications:
|
||||
|
||||
- **CurrentArchive** (Q3621648): Active records in daily use
|
||||
- skos:broaderTransitive links CustodianArchive → CurrentArchive type
|
||||
- **DepositArchive** (Q244904): Intermediate/semi-current records
|
||||
- **HistoricalArchive** (Q3621673): Permanent archival records
|
||||
|
||||
Use `lifecycle_phase_type` slot to classify by lifecycle position.
|
||||
|
||||
exact_mappings:
|
||||
- rico:RecordSet
|
||||
|
||||
|
|
@ -162,6 +177,7 @@ classes:
|
|||
- was_generated_by
|
||||
- valid_from
|
||||
- valid_to
|
||||
- lifecycle_phase_type
|
||||
|
||||
slot_usage:
|
||||
id:
|
||||
|
|
@ -591,6 +607,33 @@ classes:
|
|||
required: false
|
||||
description: |
|
||||
End of validity period (typically = transfer_to_collection_date).
|
||||
|
||||
lifecycle_phase_type:
|
||||
slot_uri: skos:broaderTransitive
|
||||
range: uriorcurie
|
||||
required: false
|
||||
description: |
|
||||
Links this CustodianArchive INSTANCE to its lifecycle phase TYPE.
|
||||
|
||||
**SKOS**: skos:broaderTransitive for instance-to-type relationship.
|
||||
|
||||
**Archive Lifecycle Types (Wikidata)**:
|
||||
- Q3621648 (CurrentArchive) - Active records phase
|
||||
- Q244904 (DepositArchive) - Intermediate/semi-current phase
|
||||
- Q3621673 (HistoricalArchive) - Archival/permanent phase
|
||||
|
||||
**Usage**:
|
||||
Classify this operational archive by its position in the records lifecycle.
|
||||
Most CustodianArchive records are in the intermediate phase (awaiting processing).
|
||||
|
||||
**Example**:
|
||||
- CustodianArchive "Ministry Records 2010-2020" → lifecycle_phase_type →
|
||||
DepositArchive (Q244904) - semi-current, awaiting processing
|
||||
examples:
|
||||
- value: "wikidata:Q244904"
|
||||
description: "Deposit archive / semi-current records"
|
||||
- value: "wikidata:Q3621648"
|
||||
description: "Current archive / active records"
|
||||
|
||||
comments:
|
||||
- "Represents operational archives BEFORE integration into CustodianCollection"
|
||||
|
|
@ -719,3 +762,12 @@ slots:
|
|||
arrangement_notes:
|
||||
description: Notes from arrangement process
|
||||
range: string
|
||||
|
||||
lifecycle_phase_type:
|
||||
slot_uri: skos:broaderTransitive
|
||||
description: |
|
||||
Links CustodianArchive INSTANCE to lifecycle phase TYPE.
|
||||
SKOS broaderTransitive for instance-to-type relationship.
|
||||
Values: CurrentArchive (Q3621648), DepositArchive (Q244904),
|
||||
HistoricalArchive (Q3621673).
|
||||
range: uriorcurie
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ classes:
|
|||
- Portuguese: Fundação, Associação, Ltda., S.A.
|
||||
- Italian: Fondazione, Associazione, S.p.A., S.r.l.
|
||||
|
||||
See: .opencode/LEGAL_FORM_FILTERING_RULE.md for comprehensive global list
|
||||
See: rules/LEGAL_FORM_FILTERING_RULE.md for comprehensive global list
|
||||
|
||||
===========================================================================
|
||||
MANDATORY RULE: Special Characters MUST Be Excluded from Abbreviations
|
||||
|
|
@ -112,7 +112,7 @@ classes:
|
|||
- "Heritage@Digital" → "HD" (not "H@D")
|
||||
- "Archives (Historical)" → "AH" (not "A(H)")
|
||||
|
||||
See: .opencode/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
See: rules/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
|
||||
===========================================================================
|
||||
MANDATORY RULE: Diacritics MUST Be Normalized to ASCII in Abbreviations
|
||||
|
|
@ -152,7 +152,7 @@ classes:
|
|||
ascii_text = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
```
|
||||
|
||||
See: .opencode/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
See: rules/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
|
||||
Can be generated by:
|
||||
1. ReconstructionActivity (formal entity resolution) - was_generated_by link
|
||||
|
|
|
|||
|
|
@ -35,6 +35,11 @@ imports:
|
|||
- ./OrganizationalStructure
|
||||
- ./Collection
|
||||
- ./PersonObservation
|
||||
# Import global slots
|
||||
- ../slots/staff_members
|
||||
- ../slots/contact_point
|
||||
- ../slots/located_at
|
||||
- ../slots/refers_to_custodian
|
||||
|
||||
classes:
|
||||
Department:
|
||||
|
|
@ -456,6 +461,20 @@ slots:
|
|||
description: Person heading the department
|
||||
range: PersonObservation
|
||||
|
||||
# NOTE: staff_members imported from global slot ../slots/staff_members.yaml
|
||||
|
||||
manages_collections:
|
||||
slot_uri: rico:isManagerOf
|
||||
description: Collections managed by this department
|
||||
range: Collection
|
||||
multivalued: true
|
||||
|
||||
# NOTE: located_at imported from global slot ../slots/located_at.yaml
|
||||
|
||||
# NOTE: contact_point imported from global slot ../slots/contact_point.yaml
|
||||
|
||||
# NOTE: refers_to_custodian imported from global slot ../slots/refers_to_custodian.yaml
|
||||
|
||||
established_date:
|
||||
description: Date department was established
|
||||
range: date
|
||||
|
|
|
|||
|
|
@ -470,7 +470,7 @@ classes:
|
|||
- "Follows 4-stage GLAM-NER pipeline: recognition → layout → resolution → linking"
|
||||
|
||||
see_also:
|
||||
- ".opencode/WEB_OBSERVATION_PROVENANCE_RULES.md"
|
||||
- "rules/WEB_OBSERVATION_PROVENANCE_RULES.md"
|
||||
- "scripts/fetch_website_playwright.py"
|
||||
- "scripts/add_xpath_provenance.py"
|
||||
- "docs/convention/schema/20251202/entity_annotation_rules_v1.6.0_unified.yaml"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,303 @@
|
|||
# Abbreviation Character Filtering Rules
|
||||
|
||||
**Rule ID**: ABBREV-CHAR-FILTER
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: GHCID abbreviation component generation
|
||||
**Created**: 2025-12-07
|
||||
**Updated**: 2025-12-08 (added diacritics rule)
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
**When generating abbreviations for GHCID, ONLY ASCII uppercase letters (A-Z) are permitted. Both special characters AND diacritics MUST be removed/normalized.**
|
||||
|
||||
This is a **MANDATORY** rule. Abbreviations containing special characters or diacritics are INVALID and must be regenerated.
|
||||
|
||||
### Two Mandatory Sub-Rules:
|
||||
|
||||
1. **ABBREV-SPECIAL-CHAR**: Remove all special characters and symbols
|
||||
2. **ABBREV-DIACRITICS**: Normalize all diacritics to ASCII equivalents
|
||||
|
||||
---
|
||||
|
||||
## Rule 1: Diacritics MUST Be Normalized to ASCII (ABBREV-DIACRITICS)
|
||||
|
||||
**Diacritics (accented characters) MUST be normalized to their ASCII base letter equivalents.**
|
||||
|
||||
### Example (Real Case)
|
||||
|
||||
```
|
||||
❌ WRONG: CZ-VY-TEL-L-VHSPAOČRZS (contains Č)
|
||||
✅ CORRECT: CZ-VY-TEL-L-VHSPAOCRZS (ASCII only)
|
||||
```
|
||||
|
||||
### Diacritics Normalization Table
|
||||
|
||||
| Diacritic | ASCII | Example |
|
||||
|-----------|-------|---------|
|
||||
| Á, À, Â, Ã, Ä, Å, Ā | A | "Ålborg" → A |
|
||||
| Č, Ć, Ç | C | "Český" → C |
|
||||
| Ď | D | "Ďáblice" → D |
|
||||
| É, È, Ê, Ë, Ě, Ē | E | "Éire" → E |
|
||||
| Í, Ì, Î, Ï, Ī | I | "Ísland" → I |
|
||||
| Ñ, Ń, Ň | N | "España" → N |
|
||||
| Ó, Ò, Ô, Õ, Ö, Ø, Ō | O | "Österreich" → O |
|
||||
| Ř | R | "Říčany" → R |
|
||||
| Š, Ś, Ş | S | "Šumperk" → S |
|
||||
| Ť | T | "Ťažký" → T |
|
||||
| Ú, Ù, Û, Ü, Ů, Ū | U | "Ústí" → U |
|
||||
| Ý, Ÿ | Y | "Ýmir" → Y |
|
||||
| Ž, Ź, Ż | Z | "Žilina" → Z |
|
||||
| Ł | L | "Łódź" → L |
|
||||
| Æ | AE | "Ærø" → AE |
|
||||
| Œ | OE | "Œuvre" → OE |
|
||||
| ß | SS | "Straße" → SS |
|
||||
|
||||
### Implementation
|
||||
|
||||
```python
|
||||
import unicodedata
|
||||
|
||||
def normalize_diacritics(text: str) -> str:
|
||||
"""
|
||||
Normalize diacritics to ASCII equivalents.
|
||||
|
||||
Examples:
|
||||
"Č" → "C"
|
||||
"Ř" → "R"
|
||||
"Ö" → "O"
|
||||
"ñ" → "n"
|
||||
"""
|
||||
# NFD decomposition separates base characters from combining marks
|
||||
normalized = unicodedata.normalize('NFD', text)
|
||||
# Remove combining marks (category 'Mn' = Mark, Nonspacing)
|
||||
ascii_text = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
return ascii_text
|
||||
|
||||
# Example
|
||||
normalize_diacritics("VHSPAOČRZS") # Returns "VHSPAOCRZS"
|
||||
```
|
||||
|
||||
### Languages Commonly Affected
|
||||
|
||||
| Language | Common Diacritics | Example Institution |
|
||||
|----------|-------------------|---------------------|
|
||||
| **Czech** | Č, Ř, Š, Ž, Ě, Ů | Vlastivědné muzeum → VM (not VM with háček) |
|
||||
| **Polish** | Ł, Ń, Ó, Ś, Ź, Ż, Ą, Ę | Biblioteka Łódzka → BL |
|
||||
| **German** | Ä, Ö, Ü, ß | Österreichische Nationalbibliothek → ON |
|
||||
| **French** | É, È, Ê, Ç, Ô | Bibliothèque nationale → BN |
|
||||
| **Spanish** | Ñ, Á, É, Í, Ó, Ú | Museo Nacional → MN |
|
||||
| **Portuguese** | Ã, Õ, Ç, Á, É | Biblioteca Nacional → BN |
|
||||
| **Nordic** | Å, Ä, Ö, Ø, Æ | Nationalmuseet → N |
|
||||
| **Turkish** | Ç, Ğ, İ, Ö, Ş, Ü | İstanbul Üniversitesi → IU |
|
||||
| **Hungarian** | Á, É, Í, Ó, Ö, Ő, Ú, Ü, Ű | Országos Levéltár → OL |
|
||||
| **Romanian** | Ă, Â, Î, Ș, Ț | Biblioteca Națională → BN |
|
||||
|
||||
---
|
||||
|
||||
## Rule 2: Special Characters MUST Be Removed (ABBREV-SPECIAL-CHAR)
|
||||
|
||||
---
|
||||
|
||||
## Rationale
|
||||
|
||||
### 1. URL/URI Safety
|
||||
Special characters require percent-encoding in URIs. For example:
|
||||
- `&` becomes `%26`
|
||||
- `+` becomes `%2B`
|
||||
|
||||
This makes identifiers harder to share, copy, and verify.
|
||||
|
||||
### 2. Filename Safety
|
||||
Many special characters are invalid in filenames across operating systems:
|
||||
- Windows: `\ / : * ? " < > |`
|
||||
- macOS/Linux: `/` and null bytes
|
||||
|
||||
Files like `SX-XX-PHI-O-DR&IMSM.yaml` may cause issues on some systems.
|
||||
|
||||
### 3. Parsing Consistency
|
||||
Special characters can conflict with delimiters in data pipelines:
|
||||
- `&` is used in query strings
|
||||
- `:` is used in YAML, JSON
|
||||
- `/` is a path separator
|
||||
- `|` is a common CSV delimiter alternative
|
||||
|
||||
### 4. Cross-System Compatibility
|
||||
Identifiers should work across all systems:
|
||||
- Databases (SQL, TypeDB, Neo4j)
|
||||
- RDF/SPARQL endpoints
|
||||
- REST APIs
|
||||
- Command-line tools
|
||||
- Spreadsheets
|
||||
|
||||
### 5. Human Readability
|
||||
Clean identifiers are easier to:
|
||||
- Communicate verbally
|
||||
- Type correctly
|
||||
- Proofread
|
||||
- Remember
|
||||
|
||||
---
|
||||
|
||||
## Characters to Remove
|
||||
|
||||
The following characters MUST be completely removed (not replaced) when generating abbreviations:
|
||||
|
||||
| Character | Name | Example Issue |
|
||||
|-----------|------|---------------|
|
||||
| `&` | Ampersand | "R&A" in URLs, HTML entities |
|
||||
| `/` | Slash | Path separator confusion |
|
||||
| `\` | Backslash | Escape sequence issues |
|
||||
| `+` | Plus | URL encoding (`+` = space) |
|
||||
| `@` | At sign | Email/handle confusion |
|
||||
| `#` | Hash/Pound | Fragment identifier in URLs |
|
||||
| `%` | Percent | URL encoding prefix |
|
||||
| `$` | Dollar | Variable prefix in shells |
|
||||
| `*` | Asterisk | Glob/wildcard character |
|
||||
| `(` `)` | Parentheses | Grouping in regex, code |
|
||||
| `[` `]` | Square brackets | Array notation |
|
||||
| `{` `}` | Curly braces | Object notation |
|
||||
| `\|` | Pipe | Command chaining, OR operator |
|
||||
| `:` | Colon | YAML key-value, namespace separator |
|
||||
| `;` | Semicolon | Statement terminator |
|
||||
| `"` `'` `` ` `` | Quotes | String delimiters |
|
||||
| `,` | Comma | List separator |
|
||||
| `.` | Period | File extension, namespace |
|
||||
| `-` | Hyphen | Already used as GHCID component separator |
|
||||
| `_` | Underscore | Reserved for name suffix in collisions |
|
||||
| `=` | Equals | Assignment operator |
|
||||
| `?` | Question mark | Query string indicator |
|
||||
| `!` | Exclamation | Negation, shell history |
|
||||
| `~` | Tilde | Home directory, bitwise NOT |
|
||||
| `^` | Caret | Regex anchor, power operator |
|
||||
| `<` `>` | Angle brackets | HTML tags, redirects |
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
### Algorithm
|
||||
|
||||
When extracting abbreviation from institution name:
|
||||
|
||||
```python
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
def extract_abbreviation_from_name(name: str, skip_words: set) -> str:
|
||||
"""
|
||||
Extract abbreviation from institution name.
|
||||
|
||||
Args:
|
||||
name: Full institution name (emic)
|
||||
skip_words: Set of prepositions/articles to skip
|
||||
|
||||
Returns:
|
||||
Uppercase abbreviation with only A-Z characters
|
||||
"""
|
||||
# Step 1: Normalize unicode (remove diacritics)
|
||||
normalized = unicodedata.normalize('NFD', name)
|
||||
ascii_name = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
|
||||
# Step 2: Replace special characters with spaces (to split words)
|
||||
# This handles cases like "Records&Information" -> "Records Information"
|
||||
clean_name = re.sub(r'[^a-zA-Z\s]', ' ', ascii_name)
|
||||
|
||||
# Step 3: Split into words
|
||||
words = clean_name.split()
|
||||
|
||||
# Step 4: Filter out skip words (prepositions, articles)
|
||||
significant_words = [w for w in words if w.lower() not in skip_words]
|
||||
|
||||
# Step 5: Take first letter of each significant word
|
||||
abbreviation = ''.join(w[0].upper() for w in significant_words if w)
|
||||
|
||||
# Step 6: Limit to 10 characters
|
||||
return abbreviation[:10]
|
||||
```
|
||||
|
||||
### Handling Special Cases
|
||||
|
||||
**Case 1: "Records & Information Management"**
|
||||
1. Input: `"Records & Information Management"`
|
||||
2. After special char removal: `"Records Information Management"`
|
||||
3. After split: `["Records", "Information", "Management"]`
|
||||
4. Abbreviation: `RIM`
|
||||
|
||||
**Case 2: "Art/Design Museum"**
|
||||
1. Input: `"Art/Design Museum"`
|
||||
2. After special char removal: `"Art Design Museum"`
|
||||
3. After split: `["Art", "Design", "Museum"]`
|
||||
4. Abbreviation: `ADM`
|
||||
|
||||
**Case 3: "Culture+"**
|
||||
1. Input: `"Culture+"`
|
||||
2. After special char removal: `"Culture"`
|
||||
3. After split: `["Culture"]`
|
||||
4. Abbreviation: `C`
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
| Institution Name | Correct | Incorrect |
|
||||
|------------------|---------|-----------|
|
||||
| Department of Records & Information Management | DRIM | DR&IM |
|
||||
| Art + Culture Center | ACC | A+CC |
|
||||
| Museum/Gallery Amsterdam | MGA | M/GA |
|
||||
| Heritage@Digital | HD | H@D |
|
||||
| Archives (Historical) | AH | A(H) |
|
||||
| Research & Development Institute | RDI | R&DI |
|
||||
| Sint Maarten Records & Information | SMRI | SMR&I |
|
||||
|
||||
---
|
||||
|
||||
## Validation
|
||||
|
||||
### Check for Invalid Abbreviations
|
||||
|
||||
```bash
|
||||
# Find GHCID files with special characters in abbreviation
|
||||
find data/custodian -name "*.yaml" | xargs grep -l '[&+@#%$*|:;?!=~^<>]' | head -20
|
||||
|
||||
# Specifically check for & in filenames
|
||||
find data/custodian -name "*&*.yaml"
|
||||
```
|
||||
|
||||
### Programmatic Validation
|
||||
|
||||
```python
|
||||
import re
|
||||
|
||||
def validate_abbreviation(abbrev: str) -> bool:
|
||||
"""
|
||||
Validate that abbreviation contains only A-Z.
|
||||
|
||||
Returns True if valid, False if contains special characters.
|
||||
"""
|
||||
return bool(re.match(r'^[A-Z]+$', abbrev))
|
||||
|
||||
# Examples
|
||||
validate_abbreviation("DRIMSM") # True - valid
|
||||
validate_abbreviation("DR&IMSM") # False - contains &
|
||||
validate_abbreviation("A+CC") # False - contains +
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- `AGENTS.md` - Section "INSTITUTION ABBREVIATION: EMIC NAME FIRST-LETTER PROTOCOL"
|
||||
- `schemas/20251121/linkml/modules/classes/CustodianName.yaml` - Schema description
|
||||
- `rules/LEGAL_FORM_FILTER.md` - Related filtering rule for legal forms
|
||||
- `docs/PERSISTENT_IDENTIFIERS.md` - GHCID specification
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Date | Change |
|
||||
|------|--------|
|
||||
| 2025-12-07 | Initial rule created after discovery of `&` in GHCID |
|
||||
| 2025-12-08 | Added diacritics normalization rule |
|
||||
237
frontend/public/schemas/20251121/linkml/rules/ENUM_TO_CLASS.md
Normal file
237
frontend/public/schemas/20251121/linkml/rules/ENUM_TO_CLASS.md
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
# Enum-to-Class Principle: Single Source of Truth
|
||||
|
||||
**Rule ID**: ENUM-TO-CLASS
|
||||
**Status**: ACTIVE
|
||||
**Applies To**: Schema evolution decisions
|
||||
**Version**: 1.0
|
||||
**Last Updated**: 2025-12-06
|
||||
|
||||
---
|
||||
|
||||
## Core Principle
|
||||
|
||||
**Enums are TEMPORARY scaffolding. Once an enum is promoted to a class hierarchy, the enum MUST be deleted to maintain a Single Source of Truth.**
|
||||
|
||||
---
|
||||
|
||||
## Rationale
|
||||
|
||||
### The Problem: Dual Representation
|
||||
|
||||
When both an enum AND a class hierarchy exist for the same concept:
|
||||
- **Data sync issues**: Enum values and class names can drift apart
|
||||
- **Maintenance burden**: Changes must be made in two places
|
||||
- **Developer confusion**: Which one should I use?
|
||||
- **Validation conflicts**: Enum constraints vs class ranges may diverge
|
||||
|
||||
### The Solution: Single Source of Truth
|
||||
|
||||
- **Enums**: Use for simple, fixed value constraints (e.g., `DataTierEnum: TIER_1, TIER_2, TIER_3, TIER_4`)
|
||||
- **Classes**: Use when the concept needs properties, relationships, or rich documentation
|
||||
- **NEVER BOTH**: Once promoted to classes, DELETE the enum
|
||||
|
||||
---
|
||||
|
||||
## When to Promote Enum to Classes
|
||||
|
||||
**Promote when the concept needs**:
|
||||
|
||||
| Need | Enum Can Do? | Class Required? |
|
||||
|------|-------------|-----------------|
|
||||
| Fixed value constraint | Yes | Yes |
|
||||
| Properties (e.g., `role_category`, `typical_domains`) | No | Yes |
|
||||
| Rich description per value | Limited | Yes |
|
||||
| Relationships to other entities | No | Yes |
|
||||
| Inheritance hierarchy | No | Yes |
|
||||
| Independent identity (URI) | Limited | Yes |
|
||||
| Ontology class mapping (`class_uri`) | Via `meaning` | Native |
|
||||
|
||||
**Rule of thumb**: If you're adding detailed documentation to each enum value, or want to attach properties, it's time to promote to classes.
|
||||
|
||||
---
|
||||
|
||||
## Promotion Workflow
|
||||
|
||||
### Step 1: Create Class Hierarchy
|
||||
|
||||
```yaml
|
||||
# modules/classes/StaffRole.yaml (base class)
|
||||
StaffRole:
|
||||
abstract: true
|
||||
description: Base class for staff role categories
|
||||
slots:
|
||||
- role_id
|
||||
- role_name
|
||||
- role_category
|
||||
- typical_domains
|
||||
|
||||
# modules/classes/StaffRoles.yaml (subclasses)
|
||||
Curator:
|
||||
is_a: StaffRole
|
||||
description: Museum curator specializing in collection research...
|
||||
|
||||
Conservator:
|
||||
is_a: StaffRole
|
||||
description: Conservator specializing in preservation...
|
||||
```
|
||||
|
||||
### Step 2: Update Slot Ranges
|
||||
|
||||
```yaml
|
||||
# BEFORE (enum)
|
||||
staff_role:
|
||||
range: StaffRoleTypeEnum
|
||||
|
||||
# AFTER (class)
|
||||
staff_role:
|
||||
range: StaffRole
|
||||
```
|
||||
|
||||
### Step 3: Update Modular Schema Imports
|
||||
|
||||
```yaml
|
||||
# REMOVE enum import
|
||||
# - modules/enums/StaffRoleTypeEnum # DELETED
|
||||
|
||||
# ADD class imports
|
||||
- modules/classes/StaffRole
|
||||
- modules/classes/StaffRoles
|
||||
```
|
||||
|
||||
### Step 4: Archive the Enum
|
||||
|
||||
```bash
|
||||
mkdir -p schemas/.../archive/enums
|
||||
mv modules/enums/OldEnum.yaml archive/enums/OldEnum.yaml.archived_$(date +%Y%m%d)
|
||||
```
|
||||
|
||||
### Step 5: Document the Change
|
||||
|
||||
- Update `archive/enums/README.md` with migration entry
|
||||
- Add comment in modular schema explaining removal
|
||||
- Update any documentation referencing the old enum
|
||||
|
||||
---
|
||||
|
||||
## Example: StaffRoleTypeEnum to StaffRole
|
||||
|
||||
**Before** (2025-12-05):
|
||||
```yaml
|
||||
# StaffRoleTypeEnum.yaml
|
||||
StaffRoleTypeEnum:
|
||||
permissible_values:
|
||||
CURATOR:
|
||||
description: Museum curator
|
||||
CONSERVATOR:
|
||||
description: Conservator
|
||||
# ... 51 values with limited documentation
|
||||
```
|
||||
|
||||
**After** (2025-12-06):
|
||||
```yaml
|
||||
# StaffRole.yaml (abstract base)
|
||||
StaffRole:
|
||||
abstract: true
|
||||
slots:
|
||||
- role_id
|
||||
- role_name
|
||||
- role_category
|
||||
- typical_domains
|
||||
- typical_responsibilities
|
||||
- requires_qualification
|
||||
|
||||
# StaffRoles.yaml (51 subclasses)
|
||||
Curator:
|
||||
is_a: StaffRole
|
||||
class_uri: schema:curator
|
||||
description: |
|
||||
Museum curator specializing in collection research...
|
||||
|
||||
**IMPORTANT - FORMAL TITLE vs DE FACTO WORK**:
|
||||
This is the OFFICIAL job appellation/title. Actual work may differ.
|
||||
slot_usage:
|
||||
role_category:
|
||||
equals_string: CURATORIAL
|
||||
typical_domains:
|
||||
equals_expression: "[Museums, Galleries]"
|
||||
```
|
||||
|
||||
**Why the promotion?**
|
||||
1. Need to distinguish FORMAL TITLE from DE FACTO WORK
|
||||
2. Each role has `role_category`, `common_variants`, `typical_domains`, `typical_responsibilities`
|
||||
3. Roles benefit from inheritance (`Curator is_a StaffRole`)
|
||||
4. Richer documentation per role
|
||||
|
||||
---
|
||||
|
||||
## Enums That Should REMAIN Enums
|
||||
|
||||
Some enums are appropriate as permanent fixtures:
|
||||
|
||||
| Enum | Why Keep as Enum |
|
||||
|------|------------------|
|
||||
| `DataTierEnum` | Simple 4-value tier (TIER_1 through TIER_4), no properties needed |
|
||||
| `DataSourceEnum` | Fixed source types, simple strings |
|
||||
| `CountryCodeEnum` | ISO 3166-1 standard, no custom properties |
|
||||
| `LanguageCodeEnum` | ISO 639 standard, no custom properties |
|
||||
|
||||
**Characteristics of "permanent" enums**:
|
||||
- Based on external standards (ISO, etc.)
|
||||
- Simple values with no need for properties
|
||||
- Unlikely to require rich per-value documentation
|
||||
- Used purely for validation/constraint
|
||||
|
||||
---
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
### WRONG: Keep Both Enum and Classes
|
||||
|
||||
```yaml
|
||||
# modules/enums/StaffRoleTypeEnum.yaml # Still exists!
|
||||
# modules/classes/StaffRole.yaml # Also exists!
|
||||
# Which one is authoritative? CONFUSION!
|
||||
```
|
||||
|
||||
### WRONG: Create Classes but Keep Enum "for backwards compatibility"
|
||||
|
||||
```yaml
|
||||
# "Let's keep the enum for old code"
|
||||
# Result: Two sources of truth, guaranteed drift
|
||||
```
|
||||
|
||||
### CORRECT: Delete Enum After Creating Classes
|
||||
|
||||
```yaml
|
||||
# modules/enums/StaffRoleTypeEnum.yaml # ARCHIVED
|
||||
# modules/classes/StaffRole.yaml # Single source of truth
|
||||
# modules/classes/StaffRoles.yaml # All 51 role subclasses
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
After promoting an enum to classes:
|
||||
|
||||
- [ ] Old enum file moved to `archive/enums/`
|
||||
- [ ] Modular schema import removed for enum
|
||||
- [ ] Modular schema import added for new class(es)
|
||||
- [ ] All slot ranges updated from enum to class
|
||||
- [ ] No grep results for old enum name in active schema files
|
||||
- [ ] `archive/enums/README.md` updated with migration entry
|
||||
- [ ] Comment added in modular schema explaining removal
|
||||
|
||||
```bash
|
||||
# Verify enum is fully removed (should return only archive hits)
|
||||
grep -r "StaffRoleTypeEnum" schemas/20251121/linkml/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- `docs/ENUM_CLASS_SINGLE_SOURCE.md` - Extended documentation
|
||||
- `schemas/20251121/linkml/archive/enums/README.md` - Archive directory
|
||||
- LinkML documentation on enums: https://linkml.io/linkml/schemas/enums.html
|
||||
- LinkML documentation on classes: https://linkml.io/linkml/schemas/models.html
|
||||
|
|
@ -0,0 +1,436 @@
|
|||
# GeoNames Settlement Standardization Rules
|
||||
|
||||
**Rule ID**: GEONAMES-SETTLEMENT
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: GHCID settlement component generation
|
||||
**Version**: 1.1.0
|
||||
**Effective Date**: 2025-12-01
|
||||
**Last Updated**: 2025-12-01
|
||||
|
||||
---
|
||||
|
||||
## Purpose
|
||||
|
||||
This document defines the rules for standardizing settlement names in GHCID (Global Heritage Custodian Identifier) generation using the GeoNames geographical database.
|
||||
|
||||
## Core Principle
|
||||
|
||||
**ALL settlement names in GHCID must be derived from GeoNames standardized names, not from source data.**
|
||||
|
||||
The GeoNames database serves as the **single source of truth** for:
|
||||
- Settlement names (cities, towns, villages)
|
||||
- Settlement abbreviations/codes
|
||||
- Administrative region codes (admin1)
|
||||
- Geographic coordinates validation
|
||||
|
||||
## Why GeoNames Standardization?
|
||||
|
||||
1. **Consistency**: Same settlement = same GHCID component, regardless of source data variations
|
||||
2. **Disambiguation**: Handles duplicate city names across regions
|
||||
3. **Internationalization**: Provides ASCII-safe names for identifiers
|
||||
4. **Authority**: GeoNames is a well-maintained, CC-licensed geographic database
|
||||
5. **Persistence**: Settlement names don't change frequently, ensuring GHCID stability
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL: Feature Code Filtering
|
||||
|
||||
**NEVER use neighborhoods or districts (PPLX) for GHCID generation. ONLY use proper settlements (cities, towns, villages).**
|
||||
|
||||
GeoNames classifies populated places with feature codes. When reverse geocoding coordinates to find a settlement, you MUST filter by feature code.
|
||||
|
||||
### ALLOWED Feature Codes
|
||||
|
||||
| Code | Description | Example |
|
||||
|------|-------------|---------|
|
||||
| **PPL** | Populated place (city/town/village) | Apeldoorn, Hamont, Lelystad |
|
||||
| **PPLA** | Seat of first-order admin division | Provincial capitals |
|
||||
| **PPLA2** | Seat of second-order admin division | Municipal seats |
|
||||
| **PPLA3** | Seat of third-order admin division | District seats |
|
||||
| **PPLA4** | Seat of fourth-order admin division | Sub-district seats |
|
||||
| **PPLC** | Capital of a political entity | Amsterdam, Brussels |
|
||||
| **PPLS** | Populated places (multiple) | Settlement clusters |
|
||||
| **PPLG** | Seat of government | The Hague |
|
||||
|
||||
### EXCLUDED Feature Codes
|
||||
|
||||
| Code | Description | Why Excluded |
|
||||
|------|-------------|--------------|
|
||||
| **PPLX** | Section of populated place | Neighborhoods, districts, quarters (e.g., "Binnenstad", "Amsterdam Binnenstad") |
|
||||
|
||||
### Implementation
|
||||
|
||||
```python
|
||||
VALID_FEATURE_CODES = ('PPL', 'PPLA', 'PPLA2', 'PPLA3', 'PPLA4', 'PPLC', 'PPLS', 'PPLG')
|
||||
|
||||
query = """
|
||||
SELECT name, feature_code, geonames_id, ...
|
||||
FROM cities
|
||||
WHERE country_code = ?
|
||||
AND feature_code IN (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ORDER BY distance_sq
|
||||
LIMIT 1
|
||||
"""
|
||||
cursor.execute(query, (country_code, *VALID_FEATURE_CODES))
|
||||
```
|
||||
|
||||
### Verification
|
||||
|
||||
Always check `feature_code` in location_resolution metadata:
|
||||
|
||||
```yaml
|
||||
location_resolution:
|
||||
geonames_name: Apeldoorn
|
||||
feature_code: PPL # ← MUST be PPL, PPLA*, PPLC, PPLS, or PPLG
|
||||
```
|
||||
|
||||
**If you see `feature_code: PPLX`**, the GHCID is WRONG and must be regenerated.
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL: Country Code Detection
|
||||
|
||||
**Determine country code from entry data BEFORE calling GeoNames reverse geocoding.**
|
||||
|
||||
GeoNames queries are country-specific. Using the wrong country code will return incorrect results.
|
||||
|
||||
### Country Code Resolution Priority
|
||||
|
||||
1. `zcbs_enrichment.country` - Most explicit source
|
||||
2. `location.country` - Direct location field
|
||||
3. `locations[].country` - Array location field
|
||||
4. `original_entry.country` - CSV source field
|
||||
5. `google_maps_enrichment.address` - Parse from address string
|
||||
6. `wikidata_enrichment.located_in.label` - Infer from Wikidata
|
||||
7. Default: `"NL"` (Netherlands) - Only if no other source
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
# Determine country code FIRST
|
||||
country_code = "NL" # Default
|
||||
|
||||
if entry.get('zcbs_enrichment', {}).get('country'):
|
||||
country_code = entry['zcbs_enrichment']['country']
|
||||
elif entry.get('google_maps_enrichment', {}).get('address', ''):
|
||||
address = entry['google_maps_enrichment']['address']
|
||||
if ', Belgium' in address:
|
||||
country_code = "BE"
|
||||
elif ', Germany' in address:
|
||||
country_code = "DE"
|
||||
|
||||
# THEN call reverse geocoding
|
||||
result = reverse_geocode_to_city(latitude, longitude, country_code)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Settlement Resolution Process
|
||||
|
||||
### Step 1: Coordinate-Based Resolution (Preferred)
|
||||
|
||||
When coordinates are available, use reverse geocoding to find the nearest GeoNames settlement:
|
||||
|
||||
```python
|
||||
def resolve_settlement_from_coordinates(latitude: float, longitude: float, country_code: str = "NL") -> dict:
|
||||
"""
|
||||
Find the GeoNames settlement nearest to given coordinates.
|
||||
|
||||
Returns:
|
||||
{
|
||||
'settlement_name': 'Lelystad', # GeoNames standardized name
|
||||
'settlement_code': 'LEL', # 3-letter abbreviation
|
||||
'admin1_code': '16', # GeoNames admin1 code
|
||||
'region_code': 'FL', # ISO 3166-2 region code
|
||||
'geonames_id': 2751792, # GeoNames ID for provenance
|
||||
'distance_km': 0.5 # Distance from coords to settlement center
|
||||
}
|
||||
"""
|
||||
```
|
||||
|
||||
### Step 2: Name-Based Resolution (Fallback)
|
||||
|
||||
When only a settlement name is available (no coordinates), look up in GeoNames:
|
||||
|
||||
```python
|
||||
def resolve_settlement_from_name(name: str, country_code: str = "NL") -> dict:
|
||||
"""
|
||||
Find the GeoNames settlement matching the given name.
|
||||
|
||||
Uses fuzzy matching and disambiguation when multiple matches exist.
|
||||
"""
|
||||
```
|
||||
|
||||
### Step 3: Manual Resolution (Last Resort)
|
||||
|
||||
If GeoNames lookup fails, flag the entry for manual review with:
|
||||
- `settlement_source: MANUAL`
|
||||
- `settlement_needs_review: true`
|
||||
|
||||
---
|
||||
|
||||
## GHCID Settlement Component Rules
|
||||
|
||||
### Format
|
||||
|
||||
The settlement component in GHCID uses a **3-letter uppercase code**:
|
||||
|
||||
```
|
||||
NL-{REGION}-{SETTLEMENT}-{TYPE}-{ABBREV}
|
||||
^^^^^^^^^^^
|
||||
3-letter code from GeoNames
|
||||
```
|
||||
|
||||
### Code Generation Rules
|
||||
|
||||
1. **Single-word settlements**: First 3 letters uppercase
|
||||
- `Amsterdam` → `AMS`
|
||||
- `Rotterdam` → `ROT`
|
||||
- `Lelystad` → `LEL`
|
||||
|
||||
2. **Settlements with Dutch articles** (`de`, `het`, `den`, `'s`):
|
||||
- First letter of article + first 2 letters of main word
|
||||
- `Den Haag` → `DHA`
|
||||
- `'s-Hertogenbosch` → `SHE`
|
||||
- `De Bilt` → `DBI`
|
||||
|
||||
3. **Multi-word settlements** (no article):
|
||||
- First letter of each word (up to 3)
|
||||
- `Nieuw Amsterdam` → `NAM`
|
||||
- `Oud Beijerland` → `OBE`
|
||||
|
||||
4. **GeoNames Disambiguation Database**:
|
||||
- For known problematic settlements, use pre-defined codes from disambiguation table
|
||||
- Example: Both `Zwolle` (OV) and `Zwolle` (LI) exist - use `ZWO` with region for uniqueness
|
||||
|
||||
### Measurement Point for Historical Custodians
|
||||
|
||||
**Rule**: For heritage custodians that no longer exist or have historical coordinates, the **modern-day settlement** (as of 2025-12-01) is used.
|
||||
|
||||
Rationale:
|
||||
- GHCIDs should be stable over time
|
||||
- Historical place names may have changed
|
||||
- Modern settlements are easier to verify and look up
|
||||
- GeoNames reflects current geographic reality
|
||||
|
||||
Example:
|
||||
- A museum that operated 1900-1950 in what was then "Nieuw Land" (before Flevoland province existed)
|
||||
- Modern coordinates fall within Lelystad municipality
|
||||
- GHCID uses `LEL` (Lelystad) as settlement code, not historical name
|
||||
|
||||
---
|
||||
|
||||
## GeoNames Database Integration
|
||||
|
||||
### Database Location
|
||||
|
||||
```
|
||||
/data/reference/geonames.db
|
||||
```
|
||||
|
||||
### Required Tables
|
||||
|
||||
```sql
|
||||
-- Cities/settlements table
|
||||
CREATE TABLE cities (
|
||||
geonames_id INTEGER PRIMARY KEY,
|
||||
name TEXT, -- Local name (may have diacritics)
|
||||
ascii_name TEXT, -- ASCII-safe name for identifiers
|
||||
country_code TEXT, -- ISO 3166-1 alpha-2
|
||||
admin1_code TEXT, -- First-level administrative division
|
||||
admin1_name TEXT, -- Region/province name
|
||||
latitude REAL,
|
||||
longitude REAL,
|
||||
population INTEGER,
|
||||
feature_code TEXT -- PPL, PPLA, PPLC, etc.
|
||||
);
|
||||
|
||||
-- Disambiguation table for problematic settlements
|
||||
CREATE TABLE settlement_codes (
|
||||
geonames_id INTEGER PRIMARY KEY,
|
||||
country_code TEXT,
|
||||
settlement_code TEXT, -- 3-letter code
|
||||
is_primary BOOLEAN, -- Primary code for this settlement
|
||||
notes TEXT
|
||||
);
|
||||
```
|
||||
|
||||
### Admin1 Code Mapping (Netherlands)
|
||||
|
||||
**IMPORTANT**: GeoNames admin1 codes differ from historical numbering. Use this mapping:
|
||||
|
||||
| GeoNames admin1 | Province | ISO 3166-2 |
|
||||
|-----------------|----------|------------|
|
||||
| 01 | Drenthe | NL-DR |
|
||||
| 02 | Friesland | NL-FR |
|
||||
| 03 | Gelderland | NL-GE |
|
||||
| 04 | Groningen | NL-GR |
|
||||
| 05 | Limburg | NL-LI |
|
||||
| 06 | Noord-Brabant | NL-NB |
|
||||
| 07 | Noord-Holland | NL-NH |
|
||||
| 09 | Utrecht | NL-UT |
|
||||
| 10 | Zeeland | NL-ZE |
|
||||
| 11 | Zuid-Holland | NL-ZH |
|
||||
| 15 | Overijssel | NL-OV |
|
||||
| 16 | Flevoland | NL-FL |
|
||||
|
||||
**Note**: Code 08 is not used in Netherlands (was assigned to former region).
|
||||
|
||||
---
|
||||
|
||||
## Validation Requirements
|
||||
|
||||
### Before GHCID Generation
|
||||
|
||||
Every entry MUST have:
|
||||
- [ ] Settlement name resolved via GeoNames
|
||||
- [ ] `geonames_id` recorded in entry metadata
|
||||
- [ ] Settlement code (3-letter) generated consistently
|
||||
- [ ] Admin1/region code mapped correctly
|
||||
|
||||
### Provenance Tracking
|
||||
|
||||
Record GeoNames resolution in entry metadata:
|
||||
|
||||
```yaml
|
||||
location_resolution:
|
||||
method: REVERSE_GEOCODE # or NAME_LOOKUP or MANUAL
|
||||
geonames_id: 2751792
|
||||
geonames_name: Lelystad
|
||||
settlement_code: LEL
|
||||
admin1_code: "16"
|
||||
region_code: FL
|
||||
resolution_date: "2025-12-01T00:00:00Z"
|
||||
source_coordinates:
|
||||
latitude: 52.52111
|
||||
longitude: 5.43722
|
||||
distance_to_settlement_km: 0.5
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL: XXX Placeholders Are TEMPORARY - Research Required
|
||||
|
||||
**XXX placeholders for region/settlement codes are NEVER acceptable as a final state.**
|
||||
|
||||
When an entry has `XX` (unknown region) or `XXX` (unknown settlement), the agent MUST conduct research to resolve the location.
|
||||
|
||||
### Resolution Strategy by Institution Type
|
||||
|
||||
| Institution Type | Location Resolution Method |
|
||||
|------------------|---------------------------|
|
||||
| **Destroyed institution** | Use last known physical location before destruction |
|
||||
| **Historical (closed)** | Use last operating location |
|
||||
| **Refugee/diaspora org** | Use current headquarters OR original founding location |
|
||||
| **Digital-only platform** | Use parent/founding organization's headquarters |
|
||||
| **Decentralized initiative** | Use founding location or primary organizer location |
|
||||
| **Unknown city, known country** | Research via Wikidata, Google Maps, official website |
|
||||
|
||||
### Research Sources (Priority Order)
|
||||
|
||||
1. **Wikidata** - P131 (located in), P159 (headquarters location), P625 (coordinates)
|
||||
2. **Google Maps** - Search institution name
|
||||
3. **Official Website** - Contact page, about page
|
||||
4. **Web Archive** - archive.org for destroyed/closed institutions
|
||||
5. **Academic Sources** - Papers, reports
|
||||
6. **News Articles** - Particularly for destroyed heritage sites
|
||||
|
||||
### Location Resolution Metadata
|
||||
|
||||
When resolving XXX placeholders, update `location_resolution`:
|
||||
|
||||
```yaml
|
||||
location_resolution:
|
||||
method: MANUAL_RESEARCH # Previously was NAME_LOOKUP with XXX
|
||||
country_code: PS
|
||||
region_code: GZ
|
||||
region_name: Gaza Strip
|
||||
city_code: GAZ
|
||||
city_name: Gaza City
|
||||
geonames_id: 281133
|
||||
research_date: "2025-12-06T00:00:00Z"
|
||||
research_sources:
|
||||
- type: wikidata
|
||||
id: Q123456
|
||||
claim: P131
|
||||
- type: web_archive
|
||||
url: https://web.archive.org/web/20231001/https://institution-website.org/contact
|
||||
notes: "Located in Gaza City prior to destruction in 2024"
|
||||
```
|
||||
|
||||
### File Renaming After Resolution
|
||||
|
||||
When GHCID changes due to XXX resolution, the file MUST be renamed:
|
||||
|
||||
```bash
|
||||
# Before
|
||||
data/custodian/PS-XX-XXX-A-NAPR.yaml
|
||||
|
||||
# After
|
||||
data/custodian/PS-GZ-GAZ-A-NAPR.yaml
|
||||
```
|
||||
|
||||
### Prohibited Practices
|
||||
|
||||
- ❌ Leaving XXX placeholders in production data
|
||||
- ❌ Using "Online" or country name as location
|
||||
- ❌ Skipping research because it's difficult
|
||||
- ❌ Using XX/XXX for diaspora organizations
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### No GeoNames Match
|
||||
|
||||
If a settlement cannot be resolved via automated lookup:
|
||||
1. Log warning with entry details
|
||||
2. Set `settlement_code: XXX` (temporary placeholder)
|
||||
3. Set `settlement_needs_review: true`
|
||||
4. Do NOT skip the entry - generate GHCID with XXX placeholder
|
||||
5. **IMMEDIATELY** begin manual research to resolve
|
||||
|
||||
### Multiple GeoNames Matches
|
||||
|
||||
When multiple settlements match a name:
|
||||
1. Use coordinates to disambiguate (if available)
|
||||
2. Use admin1/region context (if available)
|
||||
3. Use population as tiebreaker (prefer larger settlement)
|
||||
4. Flag for manual review if still ambiguous
|
||||
|
||||
### Coordinates Outside Country
|
||||
|
||||
If coordinates fall outside the expected country:
|
||||
1. Log warning
|
||||
2. Use nearest settlement within country
|
||||
3. Flag for manual review
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- `AGENTS.md` - Section on GHCID generation
|
||||
- `docs/PERSISTENT_IDENTIFIERS.md` - Complete GHCID specification
|
||||
- `docs/GHCID_PID_SCHEME.md` - PID scheme details
|
||||
- `scripts/enrich_nde_entries_ghcid.py` - Implementation
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.1.0 (2025-12-01)
|
||||
- **CRITICAL**: Added feature code filtering rules
|
||||
- MUST filter for PPL, PPLA, PPLA2, PPLA3, PPLA4, PPLC, PPLS, PPLG
|
||||
- MUST exclude PPLX (neighborhoods/districts)
|
||||
- Example: Apeldoorn (PPL) not "Binnenstad" (PPLX)
|
||||
- **CRITICAL**: Added country code detection rules
|
||||
- Must determine country from entry data BEFORE reverse geocoding
|
||||
- Priority: zcbs_enrichment.country > location.country > address parsing
|
||||
- Example: Belgian institutions use BE, not NL
|
||||
- Added Belgium admin1 code mapping (BRU, VLG, WAL)
|
||||
|
||||
### v1.0.0 (2025-12-01)
|
||||
- Initial version
|
||||
- Established GeoNames as authoritative source for settlement standardization
|
||||
- Defined measurement point rule for historical custodians
|
||||
- Documented admin1 code mapping for Netherlands
|
||||
|
|
@ -0,0 +1,346 @@
|
|||
# Legal Form Filtering Rule for CustodianName
|
||||
|
||||
**Rule ID**: LEGAL-FORM-FILTER
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: CustodianName standardization
|
||||
**Created**: 2025-12-02
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
**CRITICAL RULE**: Legal form designations MUST ALWAYS be filtered from `CustodianName`, even when the custodian self-identifies with them.
|
||||
|
||||
This is the **ONE EXCEPTION** to the emic (insider name) principle in the Heritage Custodian Ontology.
|
||||
|
||||
## Rationale
|
||||
|
||||
### Why Legal Forms Are NOT Part of Identity
|
||||
|
||||
1. **Legal Form ≠ Identity**: The legal structure is administrative metadata, not the custodian's core identity
|
||||
- "Stichting Rijksmuseum" → Identity is "Rijksmuseum", legal form is "Stichting"
|
||||
|
||||
2. **Legal Forms Change Over Time**: Organizations transform while identity persists
|
||||
- Association → Foundation → Corporation (same museum, different legal structures)
|
||||
|
||||
3. **Cross-Jurisdictional Consistency**: Same organization may have different legal forms in different countries
|
||||
- "Getty Foundation" (US) = "Stichting Getty" (NL) = same identity
|
||||
|
||||
4. **Deduplication**: Prevents false duplicates
|
||||
- "Museum X" and "Stichting Museum X" should NOT be separate entities
|
||||
|
||||
5. **ISO 20275 Alignment**: The Legal Entity Identifier (LEI) standard explicitly separates legal form from entity name
|
||||
|
||||
### Where Legal Form IS Stored
|
||||
|
||||
Legal form information is NOT discarded - it is stored in appropriate metadata fields:
|
||||
|
||||
| Field | Location | Purpose |
|
||||
|-------|----------|---------|
|
||||
| `legal_form` | `CustodianLegalStatus` | ISO 20275 legal form code |
|
||||
| `legal_name` | `CustodianLegalStatus` | Full registered name including legal form |
|
||||
| `observed_name` | `CustodianObservation` | Original name as observed in source (may include legal form) |
|
||||
|
||||
## Examples
|
||||
|
||||
### Dutch Examples
|
||||
|
||||
| Source Name | CustodianName | Legal Form | Notes |
|
||||
|-------------|---------------|------------|-------|
|
||||
| Stichting Rijksmuseum | Rijksmuseum | Stichting | Prefix removal |
|
||||
| Hidde Nijland Stichting | Hidde Nijland | Stichting | Suffix removal |
|
||||
| Stichting Het Loo | Het Loo | Stichting | Preserve article "Het" |
|
||||
| Coöperatie Erfgoed | Erfgoed | Coöperatie | |
|
||||
| Vereniging Ons Huis | Ons Huis | Vereniging | |
|
||||
| Museum B.V. | Museum | B.V. | |
|
||||
|
||||
### International Examples
|
||||
|
||||
| Source Name | CustodianName | Legal Form | Language |
|
||||
|-------------|---------------|------------|----------|
|
||||
| The Getty Foundation | The Getty | Foundation | English |
|
||||
| British Museum Trust Ltd | British Museum | Trust Ltd | English |
|
||||
| Smithsonian Institution Inc. | Smithsonian Institution | Inc. | English |
|
||||
| Fundação Biblioteca Nacional | Biblioteca Nacional | Fundação | Portuguese |
|
||||
| Verein Deutsches Museum | Deutsches Museum | Verein | German |
|
||||
| Association des Amis du Louvre | Amis du Louvre | Association | French |
|
||||
| Fondazione Musei Civici | Musei Civici | Fondazione | Italian |
|
||||
| Fundación Museo del Prado | Museo del Prado | Fundación | Spanish |
|
||||
|
||||
---
|
||||
|
||||
## Global Legal Form Terms Reference
|
||||
|
||||
### Dutch (Netherlands, Belgium-Flanders)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Stichting (foundation)
|
||||
- Vereniging (association)
|
||||
- Coöperatie, Coöperatieve (cooperative)
|
||||
|
||||
**Business Entities:**
|
||||
- B.V., BV (besloten vennootschap - private limited company)
|
||||
- N.V., NV (naamloze vennootschap - public limited company)
|
||||
- V.O.F., VOF (vennootschap onder firma - general partnership)
|
||||
- C.V., CV (commanditaire vennootschap - limited partnership)
|
||||
- Maatschap (partnership)
|
||||
- Eenmanszaak (sole proprietorship)
|
||||
|
||||
### English (UK, US, Ireland, Australia, etc.)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Foundation
|
||||
- Trust
|
||||
- Association
|
||||
- Society
|
||||
- Institute
|
||||
- Institution (when followed by Inc./Ltd.)
|
||||
- Charity
|
||||
- Fund
|
||||
|
||||
**Business Entities:**
|
||||
- Inc., Incorporated
|
||||
- Ltd., Limited
|
||||
- LLC, L.L.C. (limited liability company)
|
||||
- LLP, L.L.P. (limited liability partnership)
|
||||
- Corp., Corporation
|
||||
- Co., Company
|
||||
- PLC, plc (public limited company - UK)
|
||||
- Pty Ltd (proprietary limited - Australia)
|
||||
|
||||
### German (Germany, Austria, Switzerland)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Stiftung (foundation)
|
||||
- Verein (association)
|
||||
- e.V., eingetragener Verein (registered association)
|
||||
- gGmbH (gemeinnützige GmbH - charitable limited company)
|
||||
|
||||
**Business Entities:**
|
||||
- GmbH (Gesellschaft mit beschränkter Haftung - limited liability company)
|
||||
- AG (Aktiengesellschaft - stock corporation)
|
||||
- KG (Kommanditgesellschaft - limited partnership)
|
||||
- OHG (offene Handelsgesellschaft - general partnership)
|
||||
- GmbH & Co. KG
|
||||
- UG (Unternehmergesellschaft - mini-GmbH)
|
||||
|
||||
### French (France, Belgium-Wallonia, Switzerland, Canada-Quebec)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fondation (foundation)
|
||||
- Association (association)
|
||||
- Fonds (fund)
|
||||
|
||||
**Business Entities:**
|
||||
- S.A., SA (société anonyme - public limited company)
|
||||
- S.A.R.L., SARL (société à responsabilité limitée - private limited company)
|
||||
- S.A.S., SAS (société par actions simplifiée)
|
||||
- S.C.I., SCI (société civile immobilière)
|
||||
- S.N.C., SNC (société en nom collectif - general partnership)
|
||||
- S.C.S., SCS (société en commandite simple)
|
||||
- EURL (entreprise unipersonnelle à responsabilité limitée)
|
||||
|
||||
### Spanish (Spain, Latin America)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fundación (foundation)
|
||||
- Asociación (association)
|
||||
- Sociedad (society) - when not followed by commercial designator
|
||||
|
||||
**Business Entities:**
|
||||
- S.A., SA (sociedad anónima - public limited company)
|
||||
- S.L., SL (sociedad limitada - private limited company)
|
||||
- S.L.L., SLL (sociedad limitada laboral)
|
||||
- S.Coop. (sociedad cooperativa)
|
||||
- S.C., SC (sociedad colectiva - general partnership)
|
||||
- S.Com., S. en C. (sociedad en comandita)
|
||||
|
||||
### Portuguese (Portugal, Brazil)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fundação (foundation)
|
||||
- Associação (association)
|
||||
- Instituto (institute)
|
||||
|
||||
**Business Entities:**
|
||||
- Ltda., Limitada (limited liability company)
|
||||
- S.A., SA (sociedade anônima - corporation)
|
||||
- S/A
|
||||
- Cia., Companhia (company)
|
||||
- ME (microempresa)
|
||||
- EPP (empresa de pequeno porte)
|
||||
|
||||
### Italian (Italy, Switzerland-Ticino)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fondazione (foundation)
|
||||
- Associazione (association)
|
||||
- Ente (entity/institution)
|
||||
- Onlus (non-profit organization)
|
||||
|
||||
**Business Entities:**
|
||||
- S.p.A., SpA (società per azioni - joint-stock company)
|
||||
- S.r.l., Srl (società a responsabilità limitata - limited liability company)
|
||||
- S.a.s., Sas (società in accomandita semplice)
|
||||
- S.n.c., Snc (società in nome collettivo)
|
||||
- S.c.a.r.l. (società cooperativa a responsabilità limitata)
|
||||
|
||||
### Scandinavian Languages
|
||||
|
||||
**Danish:**
|
||||
- Fond (foundation)
|
||||
- Forening (association)
|
||||
- A/S (aktieselskab - public limited company)
|
||||
- ApS (anpartsselskab - private limited company)
|
||||
|
||||
**Swedish:**
|
||||
- Stiftelse (foundation)
|
||||
- Förening (association)
|
||||
- AB (aktiebolag - limited company)
|
||||
|
||||
**Norwegian:**
|
||||
- Stiftelse (foundation)
|
||||
- Forening (association)
|
||||
- AS (aksjeselskap - limited company)
|
||||
- ASA (allmennaksjeselskap - public limited company)
|
||||
|
||||
### Other European Languages
|
||||
|
||||
**Polish:**
|
||||
- Fundacja (foundation)
|
||||
- Stowarzyszenie (association)
|
||||
- Sp. z o.o. (limited liability company)
|
||||
- S.A. (joint-stock company)
|
||||
|
||||
**Czech:**
|
||||
- Nadace (foundation)
|
||||
- Spolek (association)
|
||||
- s.r.o. (limited liability company)
|
||||
- a.s. (joint-stock company)
|
||||
|
||||
**Hungarian:**
|
||||
- Alapítvány (foundation)
|
||||
- Egyesület (association)
|
||||
- Kft. (limited liability company)
|
||||
- Zrt. (private limited company)
|
||||
- Nyrt. (public limited company)
|
||||
|
||||
**Greek:**
|
||||
- Ίδρυμα (Idryma - foundation)
|
||||
- Σύλλογος (Syllogos - association)
|
||||
- Α.Ε., ΑΕ (Ανώνυμη Εταιρεία - corporation)
|
||||
- Ε.Π.Ε., ΕΠΕ (limited liability company)
|
||||
|
||||
**Finnish:**
|
||||
- Säätiö (foundation)
|
||||
- Yhdistys (association)
|
||||
- Oy (osakeyhtiö - limited company)
|
||||
- Oyj (public limited company)
|
||||
|
||||
### Asian Languages
|
||||
|
||||
**Japanese:**
|
||||
- 財団法人 (zaidan hōjin - incorporated foundation)
|
||||
- 社団法人 (shadan hōjin - incorporated association)
|
||||
- 株式会社, K.K. (kabushiki kaisha - corporation)
|
||||
- 合同会社, G.K. (gōdō kaisha - LLC)
|
||||
- 有限会社, Y.K. (yūgen kaisha - limited company)
|
||||
|
||||
**Chinese:**
|
||||
- 基金会 (jījīn huì - foundation)
|
||||
- 协会 (xiéhuì - association)
|
||||
- 有限公司 (yǒuxiàn gōngsī - limited company)
|
||||
- 股份有限公司 (gǔfèn yǒuxiàn gōngsī - joint-stock company)
|
||||
|
||||
**Korean:**
|
||||
- 재단법인 (jaedan beobin - incorporated foundation)
|
||||
- 사단법인 (sadan beobin - incorporated association)
|
||||
- 주식회사 (jusik hoesa - corporation)
|
||||
- 유한회사 (yuhan hoesa - limited company)
|
||||
|
||||
### Middle Eastern Languages
|
||||
|
||||
**Arabic:**
|
||||
- مؤسسة (mu'assasa - foundation/institution)
|
||||
- جمعية (jam'iyya - association)
|
||||
- شركة (sharika - company)
|
||||
- ش.م.م (limited liability company)
|
||||
- ش.م.ع (public joint-stock company)
|
||||
|
||||
**Hebrew:**
|
||||
- עמותה (amuta - non-profit association)
|
||||
- חל"צ (company for public benefit)
|
||||
- בע"מ (limited company)
|
||||
|
||||
**Turkish:**
|
||||
- Vakıf (foundation)
|
||||
- Dernek (association)
|
||||
- A.Ş. (anonim şirket - joint-stock company)
|
||||
- Ltd. Şti. (limited şirket - limited company)
|
||||
|
||||
### Latin American Specific
|
||||
|
||||
**Brazilian Portuguese:**
|
||||
- OSCIP (organização da sociedade civil de interesse público)
|
||||
- ONG (organização não governamental)
|
||||
- EIRELI (empresa individual de responsabilidade limitada)
|
||||
|
||||
**Mexican Spanish:**
|
||||
- A.C. (asociación civil - civil association)
|
||||
- S.C. (sociedad civil)
|
||||
- S. de R.L. (sociedad de responsabilidad limitada)
|
||||
|
||||
---
|
||||
|
||||
## Implementation Guidelines
|
||||
|
||||
### Filtering Algorithm
|
||||
|
||||
```python
|
||||
def filter_legal_form(name: str, language: str = None) -> tuple[str, str | None]:
|
||||
"""
|
||||
Remove legal form terms from custodian name.
|
||||
|
||||
Returns:
|
||||
tuple: (filtered_name, legal_form_found)
|
||||
"""
|
||||
# Apply language-specific patterns first if language known
|
||||
# Then apply universal patterns
|
||||
# Handle both prefix and suffix positions
|
||||
# Preserve articles (the, het, de, la, le, etc.)
|
||||
pass
|
||||
```
|
||||
|
||||
### Position Handling
|
||||
|
||||
Legal forms can appear as:
|
||||
|
||||
1. **Prefix**: "Stichting Rijksmuseum" → Remove "Stichting "
|
||||
2. **Suffix**: "British Museum Trust Ltd" → Remove " Trust Ltd"
|
||||
3. **Infix** (rare): Handle case-by-case
|
||||
|
||||
### Edge Cases
|
||||
|
||||
1. **Multiple legal forms**: "Foundation Trust Ltd" → Remove all
|
||||
2. **Abbreviation variations**: "Inc." = "Inc" = "Incorporated"
|
||||
3. **Case insensitivity**: "STICHTING" = "Stichting" = "stichting"
|
||||
4. **With punctuation**: "B.V." = "BV" = "B.V"
|
||||
5. **Compound terms**: "GmbH & Co. KG" → Remove entire compound
|
||||
|
||||
### Validation Script
|
||||
|
||||
Use `scripts/validate_organization_names.py` to detect names that still contain legal form terms after filtering.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- ISO 20275:2017 - Financial services — Entity legal forms (ELF)
|
||||
- GLEIF Legal Entity Identifier documentation
|
||||
- LinkML Schema: `schemas/20251121/linkml/modules/classes/CustodianName.yaml`
|
||||
- AGENTS.md: Rule 8 (Legal Form Filtering)
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: 2025-12-02
|
||||
**Maintained By**: GLAM Heritage Custodian Ontology Project
|
||||
156
frontend/public/schemas/20251121/linkml/rules/README.md
Normal file
156
frontend/public/schemas/20251121/linkml/rules/README.md
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
# Value Standardization Rules
|
||||
|
||||
**Location**: `schemas/20251121/linkml/rules/`
|
||||
**Purpose**: Data transformation and processing rules for achieving standardized values required by Heritage Custodian (HC) classes.
|
||||
|
||||
---
|
||||
|
||||
## About These Rules
|
||||
|
||||
These rules are **formally outside the LinkML schema convention** but document HOW data values are:
|
||||
- Transformed
|
||||
- Converted
|
||||
- Processed
|
||||
- Normalized
|
||||
|
||||
to achieve the standardized values required by particular HC classes.
|
||||
|
||||
**IMPORTANT**: These are NOT LinkML validation rules. They are **processing instructions** for data pipelines and extraction agents.
|
||||
|
||||
---
|
||||
|
||||
## Rule Categories
|
||||
|
||||
### 1. Name Standardization Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **LEGAL-FORM-FILTER** | [`LEGAL_FORM_FILTER.md`](LEGAL_FORM_FILTER.md) | `CustodianName` | Remove legal form terms (Stichting, Foundation, Inc.) from emic names |
|
||||
| **ABBREV-CHAR-FILTER** | [`ABBREVIATION_RULES.md`](ABBREVIATION_RULES.md) | GHCID abbreviation | Remove special characters (&, /, +, @) and normalize diacritics to ASCII |
|
||||
| **TRANSLIT-ISO** | [`TRANSLITERATION.md`](TRANSLITERATION.md) | GHCID abbreviation | Transliterate non-Latin scripts (Cyrillic, CJK, Arabic) using ISO standards |
|
||||
|
||||
### 2. Geographic Standardization Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **GEONAMES-SETTLEMENT** | [`GEONAMES_SETTLEMENT.md`](GEONAMES_SETTLEMENT.md) | Settlement codes | Use GeoNames as single source for settlement names |
|
||||
| **FEATURE-CODE-FILTER** | [`GEONAMES_SETTLEMENT.md`](GEONAMES_SETTLEMENT.md) | Reverse geocoding | Only use PPL* feature codes, never PPLX (neighborhoods) |
|
||||
|
||||
### 3. Web Observation Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **XPATH-PROVENANCE** | [`XPATH_PROVENANCE.md`](XPATH_PROVENANCE.md) | `WebClaim` | Every web claim MUST have XPath pointer to archived HTML |
|
||||
|
||||
### 4. Schema Evolution Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **ENUM-TO-CLASS** | [`ENUM_TO_CLASS.md`](ENUM_TO_CLASS.md) | Enums/Classes | When enum promoted to class hierarchy, delete original enum |
|
||||
|
||||
---
|
||||
|
||||
## GLAMORCUBESFIXPHDNT Taxonomy Applicability
|
||||
|
||||
Each rule primarily applies to certain custodian types:
|
||||
|
||||
| Rule | Primary Types | All Types |
|
||||
|------|--------------|-----------|
|
||||
| LEGAL-FORM-FILTER | All | ✅ |
|
||||
| ABBREV-SPECIAL-CHAR | All | ✅ |
|
||||
| ABBREV-DIACRITICS | All | ✅ |
|
||||
| TRANSLITERATION | International (non-Latin script countries) | Partial |
|
||||
| GEONAMES-SETTLEMENT | All | ✅ |
|
||||
| XPATH-PROVENANCE | D (Digital platforms) | Partial |
|
||||
|
||||
---
|
||||
|
||||
## Integration with bronhouder.nl
|
||||
|
||||
These rules are displayed under a separate "Regels" (Rules) category on the bronhouder.nl LinkML visualization page, distinct from:
|
||||
- Classes
|
||||
- Slots
|
||||
- Enums
|
||||
- Instances
|
||||
|
||||
Each rule includes:
|
||||
- Rule ID (short identifier)
|
||||
- Applicable class(es)
|
||||
- GLAMORCUBESFIXPHDNT type indicator
|
||||
- Transformation examples
|
||||
- Implementation code (Python)
|
||||
|
||||
---
|
||||
|
||||
## Rule Template
|
||||
|
||||
New rules should follow this template:
|
||||
|
||||
```markdown
|
||||
# Rule Title
|
||||
|
||||
**Rule ID**: SHORT-ID
|
||||
**Status**: MANDATORY | RECOMMENDED | OPTIONAL
|
||||
**Applies To**: Class or slot name
|
||||
**Created**: YYYY-MM-DD
|
||||
**Updated**: YYYY-MM-DD
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
One-paragraph summary of what this rule does.
|
||||
|
||||
---
|
||||
|
||||
## Rationale
|
||||
|
||||
Why this rule exists (numbered list of reasons).
|
||||
|
||||
---
|
||||
|
||||
## Specification
|
||||
|
||||
Detailed specification with examples.
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
Python code showing how to implement this rule.
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
| Input | Output | Explanation |
|
||||
|-------|--------|-------------|
|
||||
|
||||
---
|
||||
|
||||
## Related Rules
|
||||
|
||||
- Other related rules
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Date | Change |
|
||||
|------|--------|
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File List
|
||||
|
||||
```
|
||||
rules/
|
||||
├── README.md # This file (rule index)
|
||||
├── ABBREVIATION_RULES.md # ABBREV-CHAR-FILTER: Special char + diacritics normalization
|
||||
├── LEGAL_FORM_FILTER.md # LEGAL-FORM-FILTER: Legal form removal from emic names
|
||||
├── GEONAMES_SETTLEMENT.md # GEONAMES-SETTLEMENT: Geographic standardization via GeoNames
|
||||
├── XPATH_PROVENANCE.md # XPATH-PROVENANCE: WebClaim XPath requirements
|
||||
├── TRANSLITERATION.md # TRANSLIT-ISO: Non-Latin script transliteration
|
||||
└── ENUM_TO_CLASS.md # ENUM-TO-CLASS: Schema evolution pattern
|
||||
```
|
||||
337
frontend/public/schemas/20251121/linkml/rules/TRANSLITERATION.md
Normal file
337
frontend/public/schemas/20251121/linkml/rules/TRANSLITERATION.md
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
# Transliteration Standards for Non-Latin Scripts
|
||||
|
||||
**Rule ID**: TRANSLIT-ISO
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: GHCID abbreviation generation from emic names in non-Latin scripts
|
||||
**Created**: 2025-12-08
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
**When generating GHCID abbreviations from institution names written in non-Latin scripts, the emic name MUST first be transliterated to Latin characters using the designated ISO or recognized standard for that script.**
|
||||
|
||||
This rule affects **170 institutions** across **21 languages** with non-Latin writing systems.
|
||||
|
||||
### Key Principles
|
||||
|
||||
1. **Emic name is preserved** - The original script is stored in `custodian_name.emic_name`
|
||||
2. **Transliteration is for processing only** - Used to generate abbreviations
|
||||
3. **ISO/recognized standards required** - No ad-hoc romanization
|
||||
4. **Deterministic output** - Same input always produces same Latin output
|
||||
5. **Existing GHCIDs grandfathered** - Only applies to NEW custodians
|
||||
|
||||
---
|
||||
|
||||
## Transliteration Standards by Script/Language
|
||||
|
||||
### Cyrillic Scripts
|
||||
|
||||
| Language | ISO Code | Standard | Library/Tool | Notes |
|
||||
|----------|----------|----------|--------------|-------|
|
||||
| **Russian** | ru | ISO 9:1995 | `transliterate` | Scientific transliteration |
|
||||
| **Ukrainian** | uk | ISO 9:1995 | `transliterate` | Includes Ukrainian-specific letters |
|
||||
| **Bulgarian** | bg | ISO 9:1995 | `transliterate` | Uses same Cyrillic base |
|
||||
| **Serbian** | sr | ISO 9:1995 | `transliterate` | Serbian Cyrillic variant |
|
||||
| **Kazakh** | kk | ISO 9:1995 | `transliterate` | Cyrillic-based (pre-2023) |
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: Институт восточных рукописей РАН
|
||||
ISO 9: Institut vostocnyh rukopisej RAN
|
||||
Abbrev: IVRRAN (after diacritic normalization)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CJK Scripts
|
||||
|
||||
#### Chinese (Hanzi)
|
||||
|
||||
| Variant | Standard | Library/Tool | Notes |
|
||||
|---------|----------|--------------|-------|
|
||||
| Simplified | Hanyu Pinyin (ISO 7098) | `pypinyin` | Standard PRC romanization |
|
||||
| Traditional | Hanyu Pinyin | `pypinyin` | Same standard applies |
|
||||
|
||||
**Pinyin Rules**:
|
||||
- Tone marks are OMITTED for abbreviation (diacritics removed anyway)
|
||||
- Word boundaries follow natural spacing
|
||||
- Proper nouns capitalized
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: 东巴文化博物院
|
||||
Pinyin: Dongba Wenhua Bowuyuan
|
||||
ASCII: Dongba Wenhua Bowuyuan
|
||||
Abbrev: DWB
|
||||
```
|
||||
|
||||
#### Japanese (Kanji/Kana)
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| Modified Hepburn | `pykakasi`, `romkan` | Most widely used internationally |
|
||||
|
||||
**Hepburn Rules**:
|
||||
- Long vowels: o, u (normalized to o, u for abbreviation)
|
||||
- Particles: ha (wa), wo (wo), he (e)
|
||||
- Syllabic n: n = n (before vowels: n')
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: 国立中央博物館
|
||||
Romaji: Kokuritsu Chuo Hakubutsukan
|
||||
ASCII: Kokuritsu Chuo Hakubutsukan
|
||||
Abbrev: KCH
|
||||
```
|
||||
|
||||
#### Korean (Hangul)
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| Revised Romanization (RR) | `korean-romanizer`, `hangul-romanize` | Official South Korean standard (2000) |
|
||||
|
||||
**RR Rules**:
|
||||
- No diacritics (unlike McCune-Reischauer)
|
||||
- Consonant assimilation reflected in spelling
|
||||
- Word boundaries at natural breaks
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: 독립기념관
|
||||
RR: Dongnip Ginyeomgwan
|
||||
Abbrev: DG
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Arabic Script
|
||||
|
||||
| Language | ISO Code | Standard | Library/Tool | Notes |
|
||||
|----------|----------|----------|--------------|-------|
|
||||
| **Arabic** | ar | ISO 233-2:1993 | `arabic-transliteration` | Simplified standard |
|
||||
| **Persian/Farsi** | fa | ISO 233-3:1999 | `persian-transliteration` | Persian extensions |
|
||||
| **Urdu** | ur | ISO 233-3 + Urdu extensions | `urdu-transliteration` | Additional characters |
|
||||
|
||||
**Example (Arabic)**:
|
||||
```
|
||||
Input: المكتبة الوطنية للمملكة المغربية
|
||||
ISO: al-Maktaba al-Wataniya lil-Mamlaka al-Maghribiya
|
||||
ASCII: al-Maktaba al-Wataniya lil-Mamlaka al-Maghribiya
|
||||
Abbrev: MWMM (skip "al-" articles)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Hebrew Script
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| ISO 259-3:1999 | `hebrew-transliteration` | Simplified romanization |
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: ארכיון הסיפור העממי בישראל
|
||||
ISO: Arkhiyon ha-Sipur ha-Amami be-Yisrael
|
||||
ASCII: Arkhiyon ha-Sipur ha-Amami be-Yisrael
|
||||
Abbrev: ASAY (skip "ha-" and "be-" articles)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Greek Script
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| ISO 843:1997 | `greek-transliteration` | Romanization of Greek |
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: Αρχαιολογικό Μουσείο Θεσσαλονίκης
|
||||
ISO: Archaiologiko Mouseio Thessalonikis
|
||||
ASCII: Archaiologiko Mouseio Thessalonikis
|
||||
Abbrev: AMT
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Indic Scripts
|
||||
|
||||
| Language | Script | Standard | Library/Tool |
|
||||
|----------|--------|----------|--------------|
|
||||
| **Hindi** | Devanagari | ISO 15919 | `indic-transliteration` |
|
||||
| **Bengali** | Bengali | ISO 15919 | `indic-transliteration` |
|
||||
| **Nepali** | Devanagari | ISO 15919 | `indic-transliteration` |
|
||||
| **Sinhala** | Sinhala | ISO 15919 | `indic-transliteration` |
|
||||
|
||||
**Example (Hindi)**:
|
||||
```
|
||||
Input: राजस्थान प्राच्यविद्या प्रतिष्ठान
|
||||
ISO: Rajasthana Pracyavidya Pratishthana
|
||||
ASCII: Rajasthana Pracyavidya Pratishthana
|
||||
Abbrev: RPP
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Southeast Asian Scripts
|
||||
|
||||
| Language | Script | Standard | Library/Tool |
|
||||
|----------|--------|----------|--------------|
|
||||
| **Thai** | Thai | ISO 11940-2 | `thai-romanization` |
|
||||
| **Khmer** | Khmer | ALA-LC | `khmer-romanization` |
|
||||
|
||||
**Thai Example**:
|
||||
```
|
||||
Input: สำนักหอจดหมายเหตุแห่งชาติ
|
||||
ISO: Samnak Ho Chotmaihet Haeng Chat
|
||||
Abbrev: SHCHC
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Other Scripts
|
||||
|
||||
| Language | Script | Standard | Library/Tool |
|
||||
|----------|--------|----------|--------------|
|
||||
| **Armenian** | Armenian | ISO 9985 | `armenian-transliteration` |
|
||||
| **Georgian** | Georgian | ISO 9984 | `georgian-transliteration` |
|
||||
|
||||
**Georgian Example**:
|
||||
```
|
||||
Input: ხელნაწერთა ეროვნული ცენტრი
|
||||
ISO: Khelnawerti Erovnuli Centri
|
||||
ASCII: Khelnawerti Erovnuli Centri
|
||||
Abbrev: KEC
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
### Python Transliteration Utility
|
||||
|
||||
```python
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
|
||||
def detect_script(text: str) -> str:
|
||||
"""
|
||||
Detect the primary script of the input text.
|
||||
|
||||
Returns one of: 'latin', 'cyrillic', 'chinese', 'japanese',
|
||||
'korean', 'arabic', 'hebrew', 'greek', 'devanagari', etc.
|
||||
"""
|
||||
script_ranges = {
|
||||
'cyrillic': (0x0400, 0x04FF),
|
||||
'arabic': (0x0600, 0x06FF),
|
||||
'hebrew': (0x0590, 0x05FF),
|
||||
'devanagari': (0x0900, 0x097F),
|
||||
'thai': (0x0E00, 0x0E7F),
|
||||
'greek': (0x0370, 0x03FF),
|
||||
'korean': (0xAC00, 0xD7AF),
|
||||
'chinese': (0x4E00, 0x9FFF),
|
||||
}
|
||||
|
||||
for char in text:
|
||||
code = ord(char)
|
||||
for script, (start, end) in script_ranges.items():
|
||||
if start <= code <= end:
|
||||
return script
|
||||
|
||||
return 'latin'
|
||||
|
||||
|
||||
def transliterate_for_abbreviation(emic_name: str, lang: str) -> str:
|
||||
"""
|
||||
Transliterate emic name for GHCID abbreviation generation.
|
||||
|
||||
Args:
|
||||
emic_name: Institution name in original script
|
||||
lang: ISO 639-1 language code
|
||||
|
||||
Returns:
|
||||
Transliterated name ready for abbreviation extraction
|
||||
"""
|
||||
import re
|
||||
|
||||
# Step 1: Transliterate to Latin (implementation depends on script)
|
||||
latin = transliterate(emic_name, lang)
|
||||
|
||||
# Step 2: Normalize diacritics
|
||||
normalized = unicodedata.normalize('NFD', latin)
|
||||
ascii_text = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
|
||||
# Step 3: Remove special characters (except spaces)
|
||||
clean = re.sub(r'[^a-zA-Z\s]', ' ', ascii_text)
|
||||
|
||||
# Step 4: Normalize whitespace
|
||||
clean = ' '.join(clean.split())
|
||||
|
||||
return clean
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Skip Words by Language
|
||||
|
||||
When extracting abbreviations from transliterated text, skip these articles/prepositions:
|
||||
|
||||
### Arabic
|
||||
- `al-` (the definite article)
|
||||
- `bi-`, `li-`, `fi-` (prepositions)
|
||||
|
||||
### Hebrew
|
||||
- `ha-` (the)
|
||||
- `ve-` (and)
|
||||
- `be-`, `le-`, `me-` (prepositions)
|
||||
|
||||
### Persian
|
||||
- `-e`, `-ye` (ezafe connector)
|
||||
- `va` (and)
|
||||
|
||||
### CJK Languages
|
||||
- No skip words (particles are integral to meaning)
|
||||
|
||||
### Indic Languages
|
||||
- `ka`, `ki`, `ke` (Hindi: of)
|
||||
- `aur` (Hindi: and)
|
||||
|
||||
---
|
||||
|
||||
## Validation
|
||||
|
||||
### Check Transliteration Output
|
||||
|
||||
```python
|
||||
def validate_transliteration(result: str) -> bool:
|
||||
"""
|
||||
Validate that transliteration output contains only ASCII letters and spaces.
|
||||
"""
|
||||
import re
|
||||
return bool(re.match(r'^[a-zA-Z\s]+$', result))
|
||||
```
|
||||
|
||||
### Manual Review Queue
|
||||
|
||||
Non-Latin institutions should be flagged for manual review if:
|
||||
1. Transliteration library not available for that script
|
||||
2. Confidence in transliteration is low
|
||||
3. Institution has multiple official romanizations
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- `AGENTS.md` - Rule 12: Transliteration Standards
|
||||
- `rules/ABBREVIATION_RULES.md` - Character filtering after transliteration
|
||||
- `docs/TRANSLITERATION_CONVENTIONS.md` - Extended examples and edge cases
|
||||
- `scripts/transliterate_emic_names.py` - Production transliteration script
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Date | Change |
|
||||
|------|--------|
|
||||
| 2025-12-08 | Initial standards document created |
|
||||
|
|
@ -0,0 +1,210 @@
|
|||
# WebObservation XPath Provenance Rules
|
||||
|
||||
**Rule ID**: XPATH-PROVENANCE
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: WebClaim extraction from websites
|
||||
**Created**: 2025-11-29
|
||||
|
||||
---
|
||||
|
||||
## Core Principle: Every Claim MUST Have Verifiable Provenance
|
||||
|
||||
**If a claim allegedly came from a webpage, it MUST have an XPath pointer to the exact location in the archived HTML where that value appears. Claims without XPath provenance are considered FABRICATED and must be removed.**
|
||||
|
||||
This is not about "confidence" or "uncertainty" - it's about **verifiability**. Either the claim value exists in the HTML at a specific XPath, or it was hallucinated/fabricated by an LLM.
|
||||
|
||||
---
|
||||
|
||||
## Required Fields for WebObservation Claims
|
||||
|
||||
Every claim in `web_enrichment.claims` MUST have:
|
||||
|
||||
| Field | Required | Description |
|
||||
|-------|----------|-------------|
|
||||
| `claim_type` | YES | Type of claim (full_name, description, email, etc.) |
|
||||
| `claim_value` | YES | The extracted value |
|
||||
| `source_url` | YES | URL the claim was extracted from |
|
||||
| `retrieved_on` | YES | ISO 8601 timestamp when page was archived |
|
||||
| `xpath` | YES | XPath to the element containing this value |
|
||||
| `html_file` | YES | Relative path to archived HTML file |
|
||||
| `xpath_match_score` | YES | 1.0 for exact match, <1.0 for fuzzy match |
|
||||
|
||||
### Example - CORRECT (Verifiable)
|
||||
|
||||
```yaml
|
||||
web_enrichment:
|
||||
claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Historische Vereniging Nijeveen
|
||||
source_url: https://historischeverenigingnijeveen.nl/
|
||||
retrieved_on: "2025-11-29T12:28:00Z"
|
||||
xpath: /[document][1]/html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]
|
||||
html_file: web/0021/historischeverenigingnijeveen.nl/rendered.html
|
||||
xpath_match_score: 1.0
|
||||
```
|
||||
|
||||
### Example - WRONG (Fabricated - Must Be Removed)
|
||||
|
||||
```yaml
|
||||
web_enrichment:
|
||||
claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Historische Vereniging Nijeveen
|
||||
confidence: 0.95 # ← NO! This is meaningless without XPath
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Forbidden: Confidence Scores Without XPath
|
||||
|
||||
**NEVER use arbitrary confidence scores for web-extracted claims.**
|
||||
|
||||
Confidence scores like `0.95`, `0.90`, `0.85` are meaningless because:
|
||||
1. There is NO methodology defining what these numbers mean
|
||||
2. They cannot be verified or reproduced
|
||||
3. They give false impression of rigor
|
||||
4. They mask the fact that claims may be fabricated
|
||||
|
||||
If a value appears in the HTML → `xpath_match_score: 1.0`
|
||||
If a value does NOT appear in the HTML → **REMOVE THE CLAIM**
|
||||
|
||||
---
|
||||
|
||||
## Website Archiving Workflow
|
||||
|
||||
### Step 1: Archive the Website
|
||||
|
||||
Use Playwright to archive websites with JavaScript rendering:
|
||||
|
||||
```bash
|
||||
python scripts/fetch_website_playwright.py <entry_number> <url>
|
||||
|
||||
# Example:
|
||||
python scripts/fetch_website_playwright.py 0021 https://historischeverenigingnijeveen.nl/
|
||||
```
|
||||
|
||||
This creates:
|
||||
```
|
||||
data/nde/enriched/entries/web/{entry_number}/{domain}/
|
||||
├── index.html # Raw HTML as received
|
||||
├── rendered.html # HTML after JS execution
|
||||
├── content.md # Markdown conversion
|
||||
└── metadata.yaml # XPath extractions for provenance
|
||||
```
|
||||
|
||||
### Step 2: Add XPath Provenance to Claims
|
||||
|
||||
Run the XPath migration script:
|
||||
|
||||
```bash
|
||||
python scripts/add_xpath_provenance.py
|
||||
|
||||
# Or for specific entries:
|
||||
python scripts/add_xpath_provenance.py --entries 0021,0022,0023
|
||||
```
|
||||
|
||||
This script:
|
||||
1. Reads each entry's `web_enrichment.claims`
|
||||
2. Searches archived HTML for each claim value
|
||||
3. Adds `xpath` + `html_file` if found
|
||||
4. **REMOVES claims that cannot be verified** (stores in `removed_unverified_claims`)
|
||||
|
||||
### Step 3: Audit Removed Claims
|
||||
|
||||
Check `removed_unverified_claims` in each entry file:
|
||||
|
||||
```yaml
|
||||
removed_unverified_claims:
|
||||
- claim_type: phone
|
||||
claim_value: "+31 6 12345678"
|
||||
reason: "Value not found in archived HTML - likely fabricated"
|
||||
removed_on: "2025-11-29T14:30:00Z"
|
||||
```
|
||||
|
||||
These claims were NOT in the HTML and should NOT be restored without proper sourcing.
|
||||
|
||||
---
|
||||
|
||||
## Claim Types and Expected Sources
|
||||
|
||||
| Claim Type | Expected Source | Notes |
|
||||
|------------|-----------------|-------|
|
||||
| `full_name` | Page title, heading, logo text | Usually in `<h1>`, `<title>`, or prominent `<div>` |
|
||||
| `description` | Meta description, about text | Check `<meta name="description">` first |
|
||||
| `email` | Contact page, footer | Often in `<a href="mailto:...">` |
|
||||
| `phone` | Contact page, footer | May need normalization |
|
||||
| `address` | Contact page, footer | Check for structured data too |
|
||||
| `social_media` | Footer, contact page | Links to social platforms |
|
||||
| `opening_hours` | Contact/visit page | May be in structured data |
|
||||
|
||||
---
|
||||
|
||||
## XPath Matching Strategy
|
||||
|
||||
The `add_xpath_provenance.py` script uses this matching strategy:
|
||||
|
||||
1. **Exact match**: Claim value appears exactly in element text
|
||||
2. **Normalized match**: After whitespace normalization
|
||||
3. **Substring match**: Claim value is substring of element text (score < 1.0)
|
||||
|
||||
Priority order for matching:
|
||||
1. `rendered.html` (after JS execution) - preferred
|
||||
2. `index.html` (raw HTML) - fallback
|
||||
|
||||
---
|
||||
|
||||
## Integration with LinkML Schema
|
||||
|
||||
The `WebClaim` class in the LinkML schema requires:
|
||||
|
||||
```yaml
|
||||
# schemas/20251121/linkml/modules/classes/WebClaim.yaml
|
||||
WebClaim:
|
||||
slots:
|
||||
- source_url # Required
|
||||
- retrieved_on # Required (timestamp)
|
||||
- xpath # Required for claims
|
||||
- html_archive_path # Path to archived HTML
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rules for AI Agents
|
||||
|
||||
### When Extracting Claims from Websites
|
||||
|
||||
1. **ALWAYS archive the website first** using Playwright
|
||||
2. **ALWAYS extract claims with XPath provenance** using the archived HTML
|
||||
3. **NEVER invent or infer claims** not present in the HTML
|
||||
4. **NEVER use confidence scores** without XPath backing
|
||||
|
||||
### When Processing Existing Claims
|
||||
|
||||
1. **Verify each claim** against archived HTML
|
||||
2. **Add XPath provenance** to verified claims
|
||||
3. **REMOVE fabricated claims** that cannot be verified
|
||||
4. **Document removed claims** in `removed_unverified_claims`
|
||||
|
||||
### When Reviewing Data Quality
|
||||
|
||||
1. Claims with `xpath` + `html_file` = **VERIFIED**
|
||||
2. Claims with only `confidence` = **SUSPECT** (migrate or remove)
|
||||
3. Claims in `removed_unverified_claims` = **FABRICATED** (do not restore)
|
||||
|
||||
---
|
||||
|
||||
## Scripts Reference
|
||||
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `scripts/fetch_website_playwright.py` | Archive website with Playwright |
|
||||
| `scripts/add_xpath_provenance.py` | Add XPath to claims, remove fabricated |
|
||||
| `scripts/batch_fetch_websites.py` | Batch archive multiple entries |
|
||||
|
||||
---
|
||||
|
||||
## Version History
|
||||
|
||||
- **2025-11-29**: Initial version - established XPath provenance requirement
|
||||
- Replaced confidence scores with verifiable XPath pointers
|
||||
- Established policy of removing fabricated claims
|
||||
|
|
@ -25,6 +25,7 @@ import NDEMapPage from './pages/NDEMapPageMapLibre';
|
|||
import NDEStatsPage from './pages/NDEStatsPage';
|
||||
import ProjectPlanPage from './pages/ProjectPlanPage';
|
||||
import OverviewPage from './pages/OverviewPage';
|
||||
import GesprekPage from './pages/GesprekPage';
|
||||
import './App.css';
|
||||
|
||||
// Create router configuration with protected routes
|
||||
|
|
@ -88,6 +89,10 @@ const router = createBrowserRouter([
|
|||
path: 'overview',
|
||||
element: <OverviewPage />,
|
||||
},
|
||||
{
|
||||
path: 'gesprek',
|
||||
element: <GesprekPage />,
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
|
|
|||
1404
frontend/src/components/database/EmbeddingProjector.tsx
Normal file
1404
frontend/src/components/database/EmbeddingProjector.tsx
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -18,6 +18,8 @@ import { useState, useCallback, useMemo } from 'react';
|
|||
import { useQdrant } from '@/hooks/useQdrant';
|
||||
import type { QdrantCollection, QdrantPoint } from '@/hooks/useQdrant';
|
||||
import { useLanguage } from '@/contexts/LanguageContext';
|
||||
import { EmbeddingProjector } from './EmbeddingProjector';
|
||||
import type { EmbeddingPoint } from './EmbeddingProjector';
|
||||
|
||||
interface QdrantPanelProps {
|
||||
compact?: boolean;
|
||||
|
|
@ -85,65 +87,6 @@ const TEXT = {
|
|||
dimensions: { nl: 'dimensies', en: 'dimensions' },
|
||||
};
|
||||
|
||||
// Simple PCA implementation for initial visualization (UMAP/t-SNE would require additional libraries)
|
||||
function computePCA(vectors: number[][], dimensions: number = 2): number[][] {
|
||||
if (vectors.length === 0) return [];
|
||||
|
||||
const n = vectors.length;
|
||||
const d = vectors[0].length;
|
||||
|
||||
// Center the data
|
||||
const means = new Array(d).fill(0);
|
||||
for (const vec of vectors) {
|
||||
for (let i = 0; i < d; i++) {
|
||||
means[i] += vec[i] / n;
|
||||
}
|
||||
}
|
||||
|
||||
const centered = vectors.map(vec => vec.map((v, i) => v - means[i]));
|
||||
|
||||
// Power iteration for top eigenvectors (simplified PCA)
|
||||
const result: number[][] = [];
|
||||
|
||||
for (const vec of centered) {
|
||||
// Simple projection using first `dimensions` components
|
||||
const projected = vec.slice(0, dimensions);
|
||||
result.push(projected);
|
||||
}
|
||||
|
||||
// Normalize to [-1, 1] range
|
||||
const mins = new Array(dimensions).fill(Infinity);
|
||||
const maxs = new Array(dimensions).fill(-Infinity);
|
||||
|
||||
for (const point of result) {
|
||||
for (let i = 0; i < dimensions; i++) {
|
||||
mins[i] = Math.min(mins[i], point[i]);
|
||||
maxs[i] = Math.max(maxs[i], point[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return result.map(point =>
|
||||
point.map((v, i) => {
|
||||
const range = maxs[i] - mins[i];
|
||||
return range > 0 ? ((v - mins[i]) / range) * 2 - 1 : 0;
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Color palette for categorical data
|
||||
const COLORS = [
|
||||
'#6366f1', '#8b5cf6', '#a855f7', '#d946ef', '#ec4899',
|
||||
'#f43f5e', '#ef4444', '#f97316', '#f59e0b', '#eab308',
|
||||
'#84cc16', '#22c55e', '#10b981', '#14b8a6', '#06b6d4',
|
||||
'#0ea5e9', '#3b82f6', '#6366f1',
|
||||
];
|
||||
|
||||
// Get color for a category
|
||||
function getCategoryColor(value: string, categories: string[]): string {
|
||||
const index = categories.indexOf(value);
|
||||
return COLORS[index % COLORS.length];
|
||||
}
|
||||
|
||||
// Get status icon
|
||||
const getStatusIcon = (status: string): string => {
|
||||
switch (status) {
|
||||
|
|
@ -188,14 +131,21 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
const [isLoadingPoints, setIsLoadingPoints] = useState(false);
|
||||
const [nextOffset, setNextOffset] = useState<string | number | null>(null);
|
||||
|
||||
// Visualization state
|
||||
const [projectedPoints, setProjectedPoints] = useState<number[][]>([]);
|
||||
const [projectionMethod, setProjectionMethod] = useState<'pca' | 'umap' | 'tsne'>('pca');
|
||||
const [colorByField, setColorByField] = useState<string>('');
|
||||
const [selectedPointIndex, setSelectedPointIndex] = useState<number | null>(null);
|
||||
const [isComputing, setIsComputing] = useState(false);
|
||||
// Visualization state (simplified - most logic moved to EmbeddingProjector)
|
||||
const [colorByField, _setColorByField] = useState<string>('');
|
||||
const [_selectedPointIndex, setSelectedPointIndex] = useState<number | null>(null);
|
||||
|
||||
// Extract unique payload fields for color coding
|
||||
// Convert QdrantPoints to EmbeddingPoints for the projector
|
||||
const embeddingPoints: EmbeddingPoint[] = useMemo(() => {
|
||||
return points.map(p => ({
|
||||
id: p.id,
|
||||
vector: p.vector,
|
||||
payload: p.payload,
|
||||
}));
|
||||
}, [points]);
|
||||
|
||||
// Extract unique payload fields for color coding (available for future use)
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const payloadFields = useMemo(() => {
|
||||
const fields = new Set<string>();
|
||||
for (const point of points) {
|
||||
|
|
@ -205,19 +155,9 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
}
|
||||
return Array.from(fields).sort();
|
||||
}, [points]);
|
||||
|
||||
// Get unique values for selected field (for color legend)
|
||||
const fieldCategories = useMemo(() => {
|
||||
if (!colorByField) return [];
|
||||
const values = new Set<string>();
|
||||
for (const point of points) {
|
||||
const value = point.payload[colorByField];
|
||||
if (value !== undefined && value !== null) {
|
||||
values.add(String(value));
|
||||
}
|
||||
}
|
||||
return Array.from(values).slice(0, 20); // Limit to 20 categories
|
||||
}, [points, colorByField]);
|
||||
|
||||
// Suppress unused variable warning - available for future features
|
||||
void payloadFields;
|
||||
|
||||
// Load points from selected collection
|
||||
const loadPoints = useCallback(async (append: boolean = false) => {
|
||||
|
|
@ -244,33 +184,10 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
}
|
||||
}, [selectedCollection, nextOffset, scrollPoints]);
|
||||
|
||||
// Compute 2D projection
|
||||
const computeProjection = useCallback(() => {
|
||||
if (points.length === 0) return;
|
||||
|
||||
setIsComputing(true);
|
||||
|
||||
// Use setTimeout to allow UI to update
|
||||
setTimeout(() => {
|
||||
const vectors = points.map(p => p.vector).filter(v => v.length > 0);
|
||||
|
||||
if (vectors.length === 0) {
|
||||
setIsComputing(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// For now, use PCA. UMAP/t-SNE would require additional libraries
|
||||
const projected = computePCA(vectors, 2);
|
||||
setProjectedPoints(projected);
|
||||
setIsComputing(false);
|
||||
}, 100);
|
||||
}, [points]);
|
||||
|
||||
// Select a collection
|
||||
const selectCollection = useCallback(async (collection: QdrantCollection) => {
|
||||
setSelectedCollection(collection);
|
||||
setPoints([]);
|
||||
setProjectedPoints([]);
|
||||
setSelectedPointIndex(null);
|
||||
setExplorerView('data');
|
||||
}, []);
|
||||
|
|
@ -280,7 +197,6 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
setExplorerView('list');
|
||||
setSelectedCollection(null);
|
||||
setPoints([]);
|
||||
setProjectedPoints([]);
|
||||
setSelectedPointIndex(null);
|
||||
};
|
||||
|
||||
|
|
@ -507,13 +423,8 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
|
||||
{activeTab === 'visualize' && (
|
||||
<div className="visualization-panel">
|
||||
<div className="viz-header">
|
||||
<h3>{t('embeddingVisualization')}</h3>
|
||||
<p>{t('visualizationDescription')}</p>
|
||||
</div>
|
||||
|
||||
{/* Controls */}
|
||||
<div className="viz-controls">
|
||||
{/* Collection selector for visualization */}
|
||||
<div className="viz-collection-selector">
|
||||
<div className="control-group">
|
||||
<label>{t('collections')}:</label>
|
||||
<select
|
||||
|
|
@ -531,125 +442,61 @@ export function QdrantPanel({ compact = false }: QdrantPanelProps) {
|
|||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className="control-group">
|
||||
<label>{t('projectionMethod')}:</label>
|
||||
<select
|
||||
value={projectionMethod}
|
||||
onChange={(e) => setProjectionMethod(e.target.value as 'pca' | 'umap' | 'tsne')}
|
||||
>
|
||||
<option value="pca">{t('pca')}</option>
|
||||
<option value="umap" disabled>{t('umap')} (coming soon)</option>
|
||||
<option value="tsne" disabled>{t('tsne')} (coming soon)</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className="control-group">
|
||||
<label>{t('colorBy')}:</label>
|
||||
<select
|
||||
value={colorByField}
|
||||
onChange={(e) => setColorByField(e.target.value)}
|
||||
>
|
||||
<option value="">{t('noField')}</option>
|
||||
{payloadFields.map(field => (
|
||||
<option key={field} value={field}>{field}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className="control-actions">
|
||||
<button
|
||||
className="primary-button"
|
||||
onClick={() => loadPoints(false)}
|
||||
disabled={!selectedCollection || isLoadingPoints}
|
||||
>
|
||||
{isLoadingPoints ? t('loadingVectors') : t('loadVectors')}
|
||||
</button>
|
||||
<button
|
||||
className="secondary-button"
|
||||
onClick={computeProjection}
|
||||
disabled={points.length === 0 || isComputing}
|
||||
>
|
||||
{isComputing ? t('computing') : t('computeProjection')}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Visualization Canvas */}
|
||||
<div className="viz-container">
|
||||
{projectedPoints.length > 0 ? (
|
||||
<div className="scatter-plot">
|
||||
<svg viewBox="-1.2 -1.2 2.4 2.4" className="viz-svg">
|
||||
{/* Grid lines */}
|
||||
<line x1="-1" y1="0" x2="1" y2="0" stroke="#e2e8f0" strokeWidth="0.01" />
|
||||
<line x1="0" y1="-1" x2="0" y2="1" stroke="#e2e8f0" strokeWidth="0.01" />
|
||||
|
||||
{/* Points */}
|
||||
{projectedPoints.map((point, idx) => {
|
||||
const payload = points[idx]?.payload || {};
|
||||
const colorValue = colorByField ? String(payload[colorByField] ?? '') : '';
|
||||
const color = colorByField && colorValue
|
||||
? getCategoryColor(colorValue, fieldCategories)
|
||||
: '#6366f1';
|
||||
const isSelected = selectedPointIndex === idx;
|
||||
|
||||
return (
|
||||
<circle
|
||||
key={idx}
|
||||
cx={point[0]}
|
||||
cy={-point[1]} // Flip Y axis
|
||||
r={isSelected ? 0.04 : 0.02}
|
||||
fill={color}
|
||||
opacity={isSelected ? 1 : 0.7}
|
||||
stroke={isSelected ? '#1e293b' : 'none'}
|
||||
strokeWidth="0.01"
|
||||
className="viz-point"
|
||||
onClick={() => setSelectedPointIndex(idx)}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</svg>
|
||||
|
||||
{/* Legend */}
|
||||
{colorByField && fieldCategories.length > 0 && (
|
||||
<div className="viz-legend">
|
||||
<strong>{colorByField}:</strong>
|
||||
{fieldCategories.map((value, idx) => (
|
||||
<span key={value} className="legend-item">
|
||||
<span
|
||||
className="legend-color"
|
||||
style={{ backgroundColor: COLORS[idx % COLORS.length] }}
|
||||
/>
|
||||
{value.length > 20 ? value.slice(0, 17) + '...' : value}
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{selectedCollection && (
|
||||
<div className="viz-load-controls">
|
||||
<button
|
||||
className="primary-button"
|
||||
onClick={() => loadPoints(false)}
|
||||
disabled={isLoadingPoints}
|
||||
>
|
||||
{isLoadingPoints ? t('loadingVectors') : t('loadVectors')}
|
||||
</button>
|
||||
{points.length > 0 && (
|
||||
<span className="loaded-count">
|
||||
{points.length} {t('vectorsLoaded')}
|
||||
</span>
|
||||
)}
|
||||
{nextOffset !== null && points.length > 0 && (
|
||||
<button
|
||||
className="secondary-button"
|
||||
onClick={() => loadPoints(true)}
|
||||
disabled={isLoadingPoints}
|
||||
>
|
||||
Load more
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="viz-placeholder">
|
||||
<p>{points.length > 0
|
||||
? t('computeProjection')
|
||||
: selectedCollection
|
||||
? t('loadVectors')
|
||||
: t('selectCollection')
|
||||
}</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Selected Point Details */}
|
||||
{selectedPointIndex !== null && points[selectedPointIndex] && (
|
||||
<div className="selected-point-details">
|
||||
<h4>{t('selectedPoint')}</h4>
|
||||
<div className="point-info">
|
||||
<div className="point-id">
|
||||
<strong>{t('id')}:</strong> {String(points[selectedPointIndex].id)}
|
||||
</div>
|
||||
<div className="point-payload">
|
||||
<strong>{t('payload')}:</strong>
|
||||
<pre>{JSON.stringify(points[selectedPointIndex].payload, null, 2)}</pre>
|
||||
</div>
|
||||
{/* Embedding Projector */}
|
||||
{points.length > 0 ? (
|
||||
<EmbeddingProjector
|
||||
points={embeddingPoints}
|
||||
onPointSelect={(point) => {
|
||||
if (point) {
|
||||
const idx = points.findIndex(p => p.id === point.id);
|
||||
setSelectedPointIndex(idx >= 0 ? idx : null);
|
||||
} else {
|
||||
setSelectedPointIndex(null);
|
||||
}
|
||||
}}
|
||||
colorByField={colorByField}
|
||||
height={600}
|
||||
/>
|
||||
) : (
|
||||
<div className="viz-placeholder">
|
||||
<div className="placeholder-content">
|
||||
<span className="placeholder-icon">⚡</span>
|
||||
<h3>{t('embeddingVisualization')}</h3>
|
||||
<p>{t('visualizationDescription')}</p>
|
||||
<p className="placeholder-hint">
|
||||
{selectedCollection
|
||||
? t('loadVectors')
|
||||
: t('selectCollection')
|
||||
}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -10,3 +10,5 @@ export { PostgreSQLPanel } from './PostgreSQLPanel';
|
|||
export { TypeDBPanel } from './TypeDBPanel';
|
||||
export { OxigraphPanel } from './OxigraphPanel';
|
||||
export { QdrantPanel } from './QdrantPanel';
|
||||
export { EmbeddingProjector } from './EmbeddingProjector';
|
||||
export type { EmbeddingPoint, ProjectedPoint, ProjectionMethod, ViewMode } from './EmbeddingProjector';
|
||||
|
|
|
|||
434
frontend/src/components/gesprek/GesprekBarChart.tsx
Normal file
434
frontend/src/components/gesprek/GesprekBarChart.tsx
Normal file
|
|
@ -0,0 +1,434 @@
|
|||
/**
|
||||
* GesprekBarChart.tsx - D3 Bar Chart Visualization for Gesprek Page
|
||||
*
|
||||
* Features:
|
||||
* - Horizontal and vertical bar charts
|
||||
* - Animated transitions
|
||||
* - Hover interactions with tooltips
|
||||
* - Responsive sizing
|
||||
* - NDE house style colors
|
||||
*
|
||||
* Uses D3.js v7 with React 19
|
||||
*/
|
||||
|
||||
import React, { useRef, useEffect, useState, useMemo } from 'react';
|
||||
import * as d3 from 'd3';
|
||||
import type { ChartData } from '../../hooks/useMultiDatabaseRAG';
|
||||
|
||||
// NDE House Style Colors
|
||||
const COLORS = {
|
||||
primary: '#154273',
|
||||
secondary: '#2E5A8B',
|
||||
accent: '#3B82F6',
|
||||
background: '#f8fafc',
|
||||
text: '#1e293b',
|
||||
textLight: '#64748b',
|
||||
grid: '#e2e8f0',
|
||||
barDefault: '#154273',
|
||||
barHover: '#3B82F6',
|
||||
};
|
||||
|
||||
// Default color palette for multiple datasets
|
||||
const COLOR_PALETTE = [
|
||||
'#154273', // Primary blue
|
||||
'#ef4444', // Red (museum)
|
||||
'#10b981', // Green (archive)
|
||||
'#f59e0b', // Amber (gallery)
|
||||
'#8b5cf6', // Purple (university)
|
||||
'#ec4899', // Pink
|
||||
'#06b6d4', // Cyan
|
||||
'#84cc16', // Lime
|
||||
];
|
||||
|
||||
export interface GesprekBarChartProps {
|
||||
data: ChartData;
|
||||
width?: number;
|
||||
height?: number;
|
||||
orientation?: 'vertical' | 'horizontal';
|
||||
showGrid?: boolean;
|
||||
showValues?: boolean;
|
||||
animate?: boolean;
|
||||
onBarClick?: (label: string, value: number, datasetIndex: number) => void;
|
||||
language?: 'nl' | 'en';
|
||||
className?: string;
|
||||
title?: string;
|
||||
}
|
||||
|
||||
interface TooltipState {
|
||||
visible: boolean;
|
||||
x: number;
|
||||
y: number;
|
||||
label: string;
|
||||
value: number;
|
||||
dataset: string;
|
||||
}
|
||||
|
||||
export const GesprekBarChart: React.FC<GesprekBarChartProps> = ({
|
||||
data,
|
||||
width = 500,
|
||||
height = 300,
|
||||
orientation = 'vertical',
|
||||
showGrid = true,
|
||||
showValues = true,
|
||||
animate = true,
|
||||
onBarClick,
|
||||
language = 'nl',
|
||||
className,
|
||||
title,
|
||||
}) => {
|
||||
const svgRef = useRef<SVGSVGElement>(null);
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [tooltip, setTooltip] = useState<TooltipState>({
|
||||
visible: false,
|
||||
x: 0,
|
||||
y: 0,
|
||||
label: '',
|
||||
value: 0,
|
||||
dataset: '',
|
||||
});
|
||||
|
||||
// Margins for axes
|
||||
const margin = useMemo(() => ({
|
||||
top: title ? 40 : 20,
|
||||
right: 20,
|
||||
bottom: 60,
|
||||
left: orientation === 'horizontal' ? 120 : 50,
|
||||
}), [orientation, title]);
|
||||
|
||||
const innerWidth = width - margin.left - margin.right;
|
||||
const innerHeight = height - margin.top - margin.bottom;
|
||||
|
||||
// Main D3 visualization
|
||||
useEffect(() => {
|
||||
if (!svgRef.current || !data.labels.length || !data.datasets.length) return;
|
||||
|
||||
// Clear previous content
|
||||
d3.select(svgRef.current).selectAll('*').remove();
|
||||
|
||||
const svg = d3.select(svgRef.current)
|
||||
.attr('width', width)
|
||||
.attr('height', height)
|
||||
.attr('viewBox', [0, 0, width, height]);
|
||||
|
||||
// Create main group with margins
|
||||
const g = svg.append('g')
|
||||
.attr('transform', `translate(${margin.left},${margin.top})`);
|
||||
|
||||
// Add title if provided
|
||||
if (title) {
|
||||
svg.append('text')
|
||||
.attr('x', width / 2)
|
||||
.attr('y', 20)
|
||||
.attr('text-anchor', 'middle')
|
||||
.attr('font-size', '14px')
|
||||
.attr('font-weight', '600')
|
||||
.attr('fill', COLORS.text)
|
||||
.text(title);
|
||||
}
|
||||
|
||||
// Calculate max value across all datasets
|
||||
const maxValue = d3.max(data.datasets.flatMap(d => d.data)) || 0;
|
||||
|
||||
// Create scales based on orientation
|
||||
const xScale = orientation === 'vertical'
|
||||
? d3.scaleBand()
|
||||
.domain(data.labels)
|
||||
.range([0, innerWidth])
|
||||
.padding(0.2)
|
||||
: d3.scaleLinear()
|
||||
.domain([0, maxValue * 1.1])
|
||||
.range([0, innerWidth])
|
||||
.nice();
|
||||
|
||||
const yScale = orientation === 'vertical'
|
||||
? d3.scaleLinear()
|
||||
.domain([0, maxValue * 1.1])
|
||||
.range([innerHeight, 0])
|
||||
.nice()
|
||||
: d3.scaleBand()
|
||||
.domain(data.labels)
|
||||
.range([0, innerHeight])
|
||||
.padding(0.2);
|
||||
|
||||
// Add grid lines
|
||||
if (showGrid) {
|
||||
const gridGroup = g.append('g').attr('class', 'grid');
|
||||
|
||||
if (orientation === 'vertical') {
|
||||
gridGroup.selectAll('.grid-line')
|
||||
.data((yScale as d3.ScaleLinear<number, number>).ticks(5))
|
||||
.join('line')
|
||||
.attr('class', 'grid-line')
|
||||
.attr('x1', 0)
|
||||
.attr('x2', innerWidth)
|
||||
.attr('y1', d => (yScale as d3.ScaleLinear<number, number>)(d))
|
||||
.attr('y2', d => (yScale as d3.ScaleLinear<number, number>)(d))
|
||||
.attr('stroke', COLORS.grid)
|
||||
.attr('stroke-dasharray', '3,3');
|
||||
} else {
|
||||
gridGroup.selectAll('.grid-line')
|
||||
.data((xScale as d3.ScaleLinear<number, number>).ticks(5))
|
||||
.join('line')
|
||||
.attr('class', 'grid-line')
|
||||
.attr('x1', d => (xScale as d3.ScaleLinear<number, number>)(d))
|
||||
.attr('x2', d => (xScale as d3.ScaleLinear<number, number>)(d))
|
||||
.attr('y1', 0)
|
||||
.attr('y2', innerHeight)
|
||||
.attr('stroke', COLORS.grid)
|
||||
.attr('stroke-dasharray', '3,3');
|
||||
}
|
||||
}
|
||||
|
||||
// Add axes
|
||||
const xAxis = orientation === 'vertical'
|
||||
? d3.axisBottom(xScale as d3.ScaleBand<string>)
|
||||
: d3.axisBottom(xScale as d3.ScaleLinear<number, number>).ticks(5);
|
||||
|
||||
const yAxis = orientation === 'vertical'
|
||||
? d3.axisLeft(yScale as d3.ScaleLinear<number, number>).ticks(5)
|
||||
: d3.axisLeft(yScale as d3.ScaleBand<string>);
|
||||
|
||||
g.append('g')
|
||||
.attr('class', 'x-axis')
|
||||
.attr('transform', `translate(0,${innerHeight})`)
|
||||
.call(xAxis)
|
||||
.selectAll('text')
|
||||
.attr('font-size', '11px')
|
||||
.attr('fill', COLORS.textLight)
|
||||
.attr('transform', orientation === 'vertical' ? 'rotate(-45)' : null)
|
||||
.attr('text-anchor', orientation === 'vertical' ? 'end' : 'middle')
|
||||
.attr('dx', orientation === 'vertical' ? '-0.5em' : null)
|
||||
.attr('dy', orientation === 'vertical' ? '0.5em' : '1em');
|
||||
|
||||
g.append('g')
|
||||
.attr('class', 'y-axis')
|
||||
.call(yAxis)
|
||||
.selectAll('text')
|
||||
.attr('font-size', '11px')
|
||||
.attr('fill', COLORS.textLight);
|
||||
|
||||
// Draw bars for each dataset
|
||||
const numDatasets = data.datasets.length;
|
||||
const bandWidth = orientation === 'vertical'
|
||||
? (xScale as d3.ScaleBand<string>).bandwidth()
|
||||
: (yScale as d3.ScaleBand<string>).bandwidth();
|
||||
const barWidth = bandWidth / numDatasets;
|
||||
|
||||
data.datasets.forEach((dataset, datasetIndex) => {
|
||||
const barGroup = g.append('g')
|
||||
.attr('class', `bars-${datasetIndex}`);
|
||||
|
||||
const color = Array.isArray(dataset.backgroundColor)
|
||||
? dataset.backgroundColor
|
||||
: dataset.backgroundColor || COLOR_PALETTE[datasetIndex % COLOR_PALETTE.length];
|
||||
|
||||
barGroup.selectAll('rect')
|
||||
.data(dataset.data.map((value, i) => ({
|
||||
value,
|
||||
label: data.labels[i],
|
||||
index: i,
|
||||
})))
|
||||
.join('rect')
|
||||
.attr('x', d => {
|
||||
if (orientation === 'vertical') {
|
||||
const bandX = (xScale as d3.ScaleBand<string>)(d.label) || 0;
|
||||
return bandX + barWidth * datasetIndex;
|
||||
}
|
||||
return 0;
|
||||
})
|
||||
.attr('y', d => {
|
||||
if (orientation === 'vertical') {
|
||||
return animate ? innerHeight : (yScale as d3.ScaleLinear<number, number>)(d.value);
|
||||
}
|
||||
const bandY = (yScale as d3.ScaleBand<string>)(d.label) || 0;
|
||||
return bandY + barWidth * datasetIndex;
|
||||
})
|
||||
.attr('width', orientation === 'vertical' ? barWidth - 2 : (animate ? 0 : (xScale as d3.ScaleLinear<number, number>)(0)))
|
||||
.attr('height', orientation === 'vertical'
|
||||
? (animate ? 0 : innerHeight - (yScale as d3.ScaleLinear<number, number>)(0))
|
||||
: barWidth - 2)
|
||||
.attr('fill', d => Array.isArray(color) ? color[d.index % color.length] : color)
|
||||
.attr('rx', 2)
|
||||
.attr('ry', 2)
|
||||
.style('cursor', 'pointer')
|
||||
.on('mouseenter', function(event, d) {
|
||||
d3.select(this)
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('opacity', 0.8);
|
||||
|
||||
const [x, y] = d3.pointer(event, containerRef.current);
|
||||
setTooltip({
|
||||
visible: true,
|
||||
x: x + 10,
|
||||
y: y - 10,
|
||||
label: d.label,
|
||||
value: d.value,
|
||||
dataset: dataset.label,
|
||||
});
|
||||
})
|
||||
.on('mouseleave', function() {
|
||||
d3.select(this)
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('opacity', 1);
|
||||
|
||||
setTooltip(prev => ({ ...prev, visible: false }));
|
||||
})
|
||||
.on('click', (_event, d) => {
|
||||
if (onBarClick) {
|
||||
onBarClick(d.label, d.value, datasetIndex);
|
||||
}
|
||||
});
|
||||
|
||||
// Animate bars
|
||||
if (animate) {
|
||||
barGroup.selectAll<SVGRectElement, { value: number; label: string; index: number }>('rect')
|
||||
.transition()
|
||||
.duration(800)
|
||||
.delay((_d, i) => i * 50)
|
||||
.ease(d3.easeCubicOut)
|
||||
.attr('y', d => {
|
||||
if (orientation === 'vertical') {
|
||||
return (yScale as d3.ScaleLinear<number, number>)(d.value);
|
||||
}
|
||||
const bandY = (yScale as d3.ScaleBand<string>)(d.label) || 0;
|
||||
return bandY + barWidth * datasetIndex;
|
||||
})
|
||||
.attr('width', d => {
|
||||
if (orientation === 'horizontal') {
|
||||
return (xScale as d3.ScaleLinear<number, number>)(d.value);
|
||||
}
|
||||
return barWidth - 2;
|
||||
})
|
||||
.attr('height', d => {
|
||||
if (orientation === 'vertical') {
|
||||
return innerHeight - (yScale as d3.ScaleLinear<number, number>)(d.value);
|
||||
}
|
||||
return barWidth - 2;
|
||||
});
|
||||
}
|
||||
|
||||
// Add value labels
|
||||
if (showValues) {
|
||||
barGroup.selectAll('.value-label')
|
||||
.data(dataset.data.map((value, i) => ({
|
||||
value,
|
||||
label: data.labels[i],
|
||||
index: i,
|
||||
})))
|
||||
.join('text')
|
||||
.attr('class', 'value-label')
|
||||
.attr('x', d => {
|
||||
if (orientation === 'vertical') {
|
||||
const bandX = (xScale as d3.ScaleBand<string>)(d.label) || 0;
|
||||
return bandX + barWidth * datasetIndex + (barWidth - 2) / 2;
|
||||
}
|
||||
return (xScale as d3.ScaleLinear<number, number>)(d.value) + 5;
|
||||
})
|
||||
.attr('y', d => {
|
||||
if (orientation === 'vertical') {
|
||||
return (yScale as d3.ScaleLinear<number, number>)(d.value) - 5;
|
||||
}
|
||||
const bandY = (yScale as d3.ScaleBand<string>)(d.label) || 0;
|
||||
return bandY + barWidth * datasetIndex + (barWidth - 2) / 2;
|
||||
})
|
||||
.attr('text-anchor', orientation === 'vertical' ? 'middle' : 'start')
|
||||
.attr('dominant-baseline', orientation === 'horizontal' ? 'middle' : 'auto')
|
||||
.attr('font-size', '10px')
|
||||
.attr('font-weight', '500')
|
||||
.attr('fill', COLORS.text)
|
||||
.attr('opacity', animate ? 0 : 1)
|
||||
.text(d => d.value.toLocaleString(language === 'nl' ? 'nl-NL' : 'en-US'));
|
||||
|
||||
// Animate value labels
|
||||
if (animate) {
|
||||
barGroup.selectAll('.value-label')
|
||||
.transition()
|
||||
.duration(800)
|
||||
.delay((_d, i) => i * 50 + 400)
|
||||
.attr('opacity', 1);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Add legend if multiple datasets
|
||||
if (numDatasets > 1) {
|
||||
const legend = svg.append('g')
|
||||
.attr('class', 'legend')
|
||||
.attr('transform', `translate(${margin.left}, ${height - 15})`);
|
||||
|
||||
data.datasets.forEach((dataset, i) => {
|
||||
const legendItem = legend.append('g')
|
||||
.attr('transform', `translate(${i * 100}, 0)`);
|
||||
|
||||
legendItem.append('rect')
|
||||
.attr('width', 12)
|
||||
.attr('height', 12)
|
||||
.attr('rx', 2)
|
||||
.attr('fill', Array.isArray(dataset.backgroundColor)
|
||||
? dataset.backgroundColor[0]
|
||||
: dataset.backgroundColor || COLOR_PALETTE[i % COLOR_PALETTE.length]);
|
||||
|
||||
legendItem.append('text')
|
||||
.attr('x', 16)
|
||||
.attr('y', 10)
|
||||
.attr('font-size', '11px')
|
||||
.attr('fill', COLORS.textLight)
|
||||
.text(dataset.label);
|
||||
});
|
||||
}
|
||||
|
||||
}, [data, width, height, orientation, showGrid, showValues, animate, innerWidth, innerHeight, margin, language, onBarClick]);
|
||||
|
||||
// Empty state
|
||||
if (!data.labels.length || !data.datasets.length) {
|
||||
return (
|
||||
<div className={`gesprek-bar-chart gesprek-bar-chart--empty ${className || ''}`}>
|
||||
<div className="gesprek-bar-chart__empty">
|
||||
<span>{language === 'nl' ? 'Geen gegevens beschikbaar' : 'No data available'}</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`gesprek-bar-chart ${className || ''}`}
|
||||
style={{ position: 'relative' }}
|
||||
>
|
||||
<svg ref={svgRef} />
|
||||
|
||||
{/* Tooltip */}
|
||||
{tooltip.visible && (
|
||||
<div
|
||||
className="gesprek-bar-chart__tooltip"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
left: tooltip.x,
|
||||
top: tooltip.y,
|
||||
pointerEvents: 'none',
|
||||
zIndex: 100,
|
||||
backgroundColor: 'white',
|
||||
border: '1px solid #e2e8f0',
|
||||
borderRadius: '4px',
|
||||
padding: '8px 12px',
|
||||
boxShadow: '0 2px 8px rgba(0,0,0,0.1)',
|
||||
fontSize: '12px',
|
||||
}}
|
||||
>
|
||||
<div style={{ fontWeight: '600', color: COLORS.text }}>{tooltip.label}</div>
|
||||
{data.datasets.length > 1 && (
|
||||
<div style={{ color: COLORS.textLight, fontSize: '11px' }}>{tooltip.dataset}</div>
|
||||
)}
|
||||
<div style={{ color: COLORS.primary, fontWeight: '500', marginTop: '4px' }}>
|
||||
{tooltip.value.toLocaleString(language === 'nl' ? 'nl-NL' : 'en-US')}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default GesprekBarChart;
|
||||
433
frontend/src/components/gesprek/GesprekGeoMap.tsx
Normal file
433
frontend/src/components/gesprek/GesprekGeoMap.tsx
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
/**
|
||||
* GesprekGeoMap.tsx - D3 Geographic Map Visualization for Gesprek Page
|
||||
*
|
||||
* Features:
|
||||
* - Netherlands province boundaries
|
||||
* - Bubble map with institution markers
|
||||
* - Clustering for dense areas
|
||||
* - Zoom and pan
|
||||
* - Tooltips with institution details
|
||||
*
|
||||
* Uses D3.js v7 with React 19
|
||||
*/
|
||||
|
||||
import React, { useRef, useEffect, useState, useCallback } from 'react';
|
||||
import * as d3 from 'd3';
|
||||
import type { GeoCoordinate, InstitutionData } from '../../hooks/useMultiDatabaseRAG';
|
||||
|
||||
// NDE House Style Colors
|
||||
const COLORS = {
|
||||
primary: '#154273',
|
||||
secondary: '#2E5A8B',
|
||||
accent: '#3B82F6',
|
||||
background: '#f8fafc',
|
||||
water: '#e0f2fe',
|
||||
land: '#f1f5f9',
|
||||
border: '#cbd5e1',
|
||||
marker: '#154273',
|
||||
markerHover: '#3B82F6',
|
||||
markerSelected: '#ef4444',
|
||||
text: '#1e293b',
|
||||
};
|
||||
|
||||
// Institution type to color mapping
|
||||
const TYPE_COLORS: Record<string, string> = {
|
||||
museum: '#ef4444',
|
||||
library: '#3b82f6',
|
||||
archive: '#10b981',
|
||||
gallery: '#f59e0b',
|
||||
university: '#8b5cf6',
|
||||
default: '#154273',
|
||||
};
|
||||
|
||||
export interface GesprekGeoMapProps {
|
||||
coordinates: GeoCoordinate[];
|
||||
width?: number;
|
||||
height?: number;
|
||||
onMarkerClick?: (data: InstitutionData) => void;
|
||||
onMarkerHover?: (data: InstitutionData | null) => void;
|
||||
selectedId?: string | null;
|
||||
language?: 'nl' | 'en';
|
||||
showClustering?: boolean;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
interface TooltipState {
|
||||
visible: boolean;
|
||||
x: number;
|
||||
y: number;
|
||||
data: InstitutionData | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get marker color based on institution type
|
||||
*/
|
||||
function getMarkerColor(type?: string): string {
|
||||
if (!type) return TYPE_COLORS.default;
|
||||
const normalizedType = type.toLowerCase();
|
||||
|
||||
if (normalizedType.includes('museum')) return TYPE_COLORS.museum;
|
||||
if (normalizedType.includes('bibliotheek') || normalizedType.includes('library')) return TYPE_COLORS.library;
|
||||
if (normalizedType.includes('archief') || normalizedType.includes('archive')) return TYPE_COLORS.archive;
|
||||
if (normalizedType.includes('galerie') || normalizedType.includes('gallery')) return TYPE_COLORS.gallery;
|
||||
if (normalizedType.includes('universiteit') || normalizedType.includes('university')) return TYPE_COLORS.university;
|
||||
|
||||
return TYPE_COLORS.default;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate marker radius based on data (e.g., rating or reviews)
|
||||
*/
|
||||
function getMarkerRadius(data?: InstitutionData): number {
|
||||
if (!data) return 6;
|
||||
|
||||
// Scale based on reviews if available
|
||||
if (data.reviews && data.reviews > 0) {
|
||||
return Math.max(4, Math.min(20, 4 + Math.log10(data.reviews + 1) * 5));
|
||||
}
|
||||
|
||||
// Scale based on rating if available
|
||||
if (data.rating && data.rating > 0) {
|
||||
return Math.max(4, Math.min(15, 4 + data.rating * 2));
|
||||
}
|
||||
|
||||
return 6;
|
||||
}
|
||||
|
||||
export const GesprekGeoMap: React.FC<GesprekGeoMapProps> = ({
|
||||
coordinates,
|
||||
width = 600,
|
||||
height = 500,
|
||||
onMarkerClick,
|
||||
onMarkerHover,
|
||||
selectedId,
|
||||
language = 'nl',
|
||||
showClustering = true,
|
||||
className,
|
||||
}) => {
|
||||
const svgRef = useRef<SVGSVGElement>(null);
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [tooltip, setTooltip] = useState<TooltipState>({
|
||||
visible: false,
|
||||
x: 0,
|
||||
y: 0,
|
||||
data: null,
|
||||
});
|
||||
const [geoData, setGeoData] = useState<GeoJSON.FeatureCollection | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Load Netherlands GeoJSON
|
||||
useEffect(() => {
|
||||
const loadGeoJSON = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await fetch('/data/netherlands_provinces.geojson');
|
||||
if (!response.ok) throw new Error('Failed to load map data');
|
||||
const data = await response.json();
|
||||
setGeoData(data);
|
||||
setError(null);
|
||||
} catch (err) {
|
||||
console.error('Failed to load GeoJSON:', err);
|
||||
setError(language === 'nl' ? 'Kaartgegevens laden mislukt' : 'Failed to load map data');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
loadGeoJSON();
|
||||
}, [language]);
|
||||
|
||||
// Fit to bounds function
|
||||
const fitToBounds = useCallback(() => {
|
||||
if (!svgRef.current) return;
|
||||
window.dispatchEvent(new CustomEvent('gesprek-map-fit'));
|
||||
}, []);
|
||||
|
||||
// Main D3 visualization
|
||||
useEffect(() => {
|
||||
if (!svgRef.current || !geoData || loading) return;
|
||||
|
||||
// Clear previous content
|
||||
d3.select(svgRef.current).selectAll('*').remove();
|
||||
|
||||
const svg = d3.select(svgRef.current)
|
||||
.attr('width', width)
|
||||
.attr('height', height)
|
||||
.attr('viewBox', [0, 0, width, height]);
|
||||
|
||||
// Create container group for zoom
|
||||
const g = svg.append('g');
|
||||
|
||||
// Setup projection centered on Netherlands
|
||||
const projection = d3.geoMercator()
|
||||
.center([5.5, 52.2]) // Center of Netherlands
|
||||
.scale(width * 15)
|
||||
.translate([width / 2, height / 2]);
|
||||
|
||||
const pathGenerator = d3.geoPath().projection(projection);
|
||||
|
||||
// Setup zoom behavior
|
||||
const zoom = d3.zoom<SVGSVGElement, unknown>()
|
||||
.scaleExtent([0.5, 20])
|
||||
.on('zoom', (event) => {
|
||||
g.attr('transform', event.transform);
|
||||
});
|
||||
|
||||
svg.call(zoom);
|
||||
|
||||
// Listen for fit-to-bounds event
|
||||
const handleFit = () => {
|
||||
if (coordinates.length === 0) return;
|
||||
|
||||
// Calculate bounds of all markers
|
||||
let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
|
||||
coordinates.forEach(coord => {
|
||||
const [x, y] = projection([coord.lng, coord.lat]) || [0, 0];
|
||||
minX = Math.min(minX, x);
|
||||
minY = Math.min(minY, y);
|
||||
maxX = Math.max(maxX, x);
|
||||
maxY = Math.max(maxY, y);
|
||||
});
|
||||
|
||||
const padding = 50;
|
||||
const boundsWidth = maxX - minX + padding * 2;
|
||||
const boundsHeight = maxY - minY + padding * 2;
|
||||
const scale = Math.min(width / boundsWidth, height / boundsHeight, 4);
|
||||
const translateX = (width - boundsWidth * scale) / 2 - (minX - padding) * scale;
|
||||
const translateY = (height - boundsHeight * scale) / 2 - (minY - padding) * scale;
|
||||
|
||||
svg.transition()
|
||||
.duration(750)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
.call(zoom.transform as any, d3.zoomIdentity.translate(translateX, translateY).scale(scale));
|
||||
};
|
||||
|
||||
window.addEventListener('gesprek-map-fit', handleFit);
|
||||
|
||||
// Draw water background
|
||||
svg.insert('rect', ':first-child')
|
||||
.attr('width', width)
|
||||
.attr('height', height)
|
||||
.attr('fill', COLORS.water);
|
||||
|
||||
// Draw province boundaries
|
||||
g.append('g')
|
||||
.attr('class', 'provinces')
|
||||
.selectAll('path')
|
||||
.data(geoData.features)
|
||||
.join('path')
|
||||
.attr('d', pathGenerator as never)
|
||||
.attr('fill', COLORS.land)
|
||||
.attr('stroke', COLORS.border)
|
||||
.attr('stroke-width', 1)
|
||||
.attr('stroke-linejoin', 'round');
|
||||
|
||||
// Draw markers
|
||||
const markersGroup = g.append('g').attr('class', 'markers');
|
||||
|
||||
// Filter out invalid coordinates
|
||||
const validCoords = coordinates.filter(c =>
|
||||
c.lat && c.lng &&
|
||||
!isNaN(c.lat) && !isNaN(c.lng) &&
|
||||
c.lat >= 50 && c.lat <= 54 && // Netherlands bounds
|
||||
c.lng >= 3 && c.lng <= 8
|
||||
);
|
||||
|
||||
// Simple clustering for dense areas (if enabled)
|
||||
let displayCoords = validCoords;
|
||||
if (showClustering && validCoords.length > 100) {
|
||||
// Use quadtree-based clustering
|
||||
const clustered: GeoCoordinate[] = [];
|
||||
const cellSize = 0.1; // degrees
|
||||
const cells = new Map<string, GeoCoordinate[]>();
|
||||
|
||||
validCoords.forEach(coord => {
|
||||
const key = `${Math.floor(coord.lat / cellSize)},${Math.floor(coord.lng / cellSize)}`;
|
||||
if (!cells.has(key)) cells.set(key, []);
|
||||
cells.get(key)!.push(coord);
|
||||
});
|
||||
|
||||
cells.forEach(cellCoords => {
|
||||
if (cellCoords.length === 1) {
|
||||
clustered.push(cellCoords[0]);
|
||||
} else {
|
||||
// Create cluster centroid
|
||||
const avgLat = cellCoords.reduce((s, c) => s + c.lat, 0) / cellCoords.length;
|
||||
const avgLng = cellCoords.reduce((s, c) => s + c.lng, 0) / cellCoords.length;
|
||||
clustered.push({
|
||||
lat: avgLat,
|
||||
lng: avgLng,
|
||||
label: `${cellCoords.length} ${language === 'nl' ? 'instellingen' : 'institutions'}`,
|
||||
data: {
|
||||
id: `cluster-${avgLat}-${avgLng}`,
|
||||
name: `${cellCoords.length} ${language === 'nl' ? 'instellingen' : 'institutions'}`,
|
||||
reviews: cellCoords.length * 10, // Use for sizing
|
||||
},
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
displayCoords = clustered;
|
||||
}
|
||||
|
||||
// Draw marker circles
|
||||
markersGroup.selectAll('circle')
|
||||
.data(displayCoords)
|
||||
.join('circle')
|
||||
.attr('cx', d => {
|
||||
const [x] = projection([d.lng, d.lat]) || [0, 0];
|
||||
return x;
|
||||
})
|
||||
.attr('cy', d => {
|
||||
const [, y] = projection([d.lng, d.lat]) || [0, 0];
|
||||
return y;
|
||||
})
|
||||
.attr('r', d => getMarkerRadius(d.data))
|
||||
.attr('fill', d => getMarkerColor(d.type))
|
||||
.attr('fill-opacity', 0.7)
|
||||
.attr('stroke', d => selectedId && d.data?.id === selectedId ? COLORS.markerSelected : '#fff')
|
||||
.attr('stroke-width', d => selectedId && d.data?.id === selectedId ? 3 : 1.5)
|
||||
.style('cursor', 'pointer')
|
||||
.on('mouseenter', function(event, d) {
|
||||
d3.select(this)
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('r', getMarkerRadius(d.data) * 1.3)
|
||||
.attr('fill-opacity', 1);
|
||||
|
||||
const [x, y] = d3.pointer(event, containerRef.current);
|
||||
setTooltip({
|
||||
visible: true,
|
||||
x: x + 10,
|
||||
y: y - 10,
|
||||
data: d.data || null,
|
||||
});
|
||||
|
||||
if (onMarkerHover && d.data) {
|
||||
onMarkerHover(d.data);
|
||||
}
|
||||
})
|
||||
.on('mouseleave', function(_event, d) {
|
||||
d3.select(this)
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('r', getMarkerRadius(d.data))
|
||||
.attr('fill-opacity', 0.7);
|
||||
|
||||
setTooltip(prev => ({ ...prev, visible: false }));
|
||||
|
||||
if (onMarkerHover) {
|
||||
onMarkerHover(null);
|
||||
}
|
||||
})
|
||||
.on('click', (_event, d) => {
|
||||
if (onMarkerClick && d.data) {
|
||||
onMarkerClick(d.data);
|
||||
}
|
||||
});
|
||||
|
||||
// Cleanup
|
||||
return () => {
|
||||
window.removeEventListener('gesprek-map-fit', handleFit);
|
||||
};
|
||||
}, [geoData, coordinates, width, height, loading, selectedId, showClustering, language, onMarkerClick, onMarkerHover]);
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className={`gesprek-geo-map gesprek-geo-map--loading ${className || ''}`}>
|
||||
<div className="gesprek-geo-map__loading">
|
||||
<div className="gesprek-geo-map__spinner" />
|
||||
<span>{language === 'nl' ? 'Kaart laden...' : 'Loading map...'}</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className={`gesprek-geo-map gesprek-geo-map--error ${className || ''}`}>
|
||||
<span>{error}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`gesprek-geo-map ${className || ''}`}
|
||||
style={{ position: 'relative' }}
|
||||
>
|
||||
<svg ref={svgRef} />
|
||||
|
||||
{/* Tooltip */}
|
||||
{tooltip.visible && tooltip.data && (
|
||||
<div
|
||||
className="gesprek-geo-map__tooltip"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
left: tooltip.x,
|
||||
top: tooltip.y,
|
||||
pointerEvents: 'none',
|
||||
zIndex: 100,
|
||||
}}
|
||||
>
|
||||
<div className="gesprek-geo-map__tooltip-name">{tooltip.data.name}</div>
|
||||
{tooltip.data.type && (
|
||||
<div className="gesprek-geo-map__tooltip-type">{tooltip.data.type}</div>
|
||||
)}
|
||||
{tooltip.data.city && (
|
||||
<div className="gesprek-geo-map__tooltip-city">{tooltip.data.city}</div>
|
||||
)}
|
||||
{tooltip.data.rating && tooltip.data.rating > 0 && (
|
||||
<div className="gesprek-geo-map__tooltip-rating">
|
||||
{'★'.repeat(Math.round(tooltip.data.rating))}
|
||||
{' '}
|
||||
{tooltip.data.rating.toFixed(1)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Map controls */}
|
||||
<div className="gesprek-geo-map__controls">
|
||||
<button
|
||||
className="gesprek-geo-map__control-btn"
|
||||
onClick={fitToBounds}
|
||||
title={language === 'nl' ? 'Zoom naar markers' : 'Fit to markers'}
|
||||
>
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
|
||||
<path d="M15 3h6v6M9 21H3v-6M21 3l-7 7M3 21l7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="gesprek-geo-map__legend">
|
||||
<div className="gesprek-geo-map__legend-item">
|
||||
<span className="gesprek-geo-map__legend-dot" style={{ backgroundColor: TYPE_COLORS.museum }} />
|
||||
<span>Museum</span>
|
||||
</div>
|
||||
<div className="gesprek-geo-map__legend-item">
|
||||
<span className="gesprek-geo-map__legend-dot" style={{ backgroundColor: TYPE_COLORS.library }} />
|
||||
<span>{language === 'nl' ? 'Bibliotheek' : 'Library'}</span>
|
||||
</div>
|
||||
<div className="gesprek-geo-map__legend-item">
|
||||
<span className="gesprek-geo-map__legend-dot" style={{ backgroundColor: TYPE_COLORS.archive }} />
|
||||
<span>{language === 'nl' ? 'Archief' : 'Archive'}</span>
|
||||
</div>
|
||||
<div className="gesprek-geo-map__legend-item">
|
||||
<span className="gesprek-geo-map__legend-dot" style={{ backgroundColor: TYPE_COLORS.gallery }} />
|
||||
<span>{language === 'nl' ? 'Galerie' : 'Gallery'}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Stats */}
|
||||
<div className="gesprek-geo-map__stats">
|
||||
{coordinates.length} {language === 'nl' ? 'locaties' : 'locations'}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default GesprekGeoMap;
|
||||
549
frontend/src/components/gesprek/GesprekNetworkGraph.tsx
Normal file
549
frontend/src/components/gesprek/GesprekNetworkGraph.tsx
Normal file
|
|
@ -0,0 +1,549 @@
|
|||
/**
|
||||
* GesprekNetworkGraph.tsx - D3 Force-Directed Network Graph for Gesprek Page
|
||||
*
|
||||
* Features:
|
||||
* - Force-directed layout with collision detection
|
||||
* - Node dragging
|
||||
* - Zoom and pan
|
||||
* - Node and edge highlighting on hover
|
||||
* - Tooltips with entity details
|
||||
* - Edge labels
|
||||
*
|
||||
* Uses D3.js v7 with React 19
|
||||
*/
|
||||
|
||||
import React, { useRef, useEffect, useState, useMemo } from 'react';
|
||||
import * as d3 from 'd3';
|
||||
import type { GraphVisualizationData } from '../../hooks/useMultiDatabaseRAG';
|
||||
|
||||
// NDE House Style Colors
|
||||
const COLORS = {
|
||||
primary: '#154273',
|
||||
secondary: '#2E5A8B',
|
||||
accent: '#3B82F6',
|
||||
background: '#f8fafc',
|
||||
text: '#1e293b',
|
||||
textLight: '#64748b',
|
||||
link: '#94a3b8',
|
||||
linkHighlight: '#3B82F6',
|
||||
nodeStroke: '#fff',
|
||||
};
|
||||
|
||||
// Node type to color mapping
|
||||
const NODE_TYPE_COLORS: Record<string, string> = {
|
||||
institution: '#154273',
|
||||
museum: '#ef4444',
|
||||
library: '#3b82f6',
|
||||
archive: '#10b981',
|
||||
gallery: '#f59e0b',
|
||||
person: '#8b5cf6',
|
||||
collection: '#ec4899',
|
||||
event: '#06b6d4',
|
||||
place: '#84cc16',
|
||||
organization: '#154273',
|
||||
default: '#154273',
|
||||
};
|
||||
|
||||
export interface GesprekNetworkGraphProps {
|
||||
data: GraphVisualizationData;
|
||||
width?: number;
|
||||
height?: number;
|
||||
onNodeClick?: (nodeId: string, nodeData: GraphVisualizationData['nodes'][0]) => void;
|
||||
onNodeHover?: (nodeId: string | null, nodeData: GraphVisualizationData['nodes'][0] | null) => void;
|
||||
selectedNodeId?: string | null;
|
||||
language?: 'nl' | 'en';
|
||||
showLabels?: boolean;
|
||||
showEdgeLabels?: boolean;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
interface SimulationNode extends d3.SimulationNodeDatum {
|
||||
id: string;
|
||||
label: string;
|
||||
type: string;
|
||||
attributes?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface SimulationLink extends d3.SimulationLinkDatum<SimulationNode> {
|
||||
id: string;
|
||||
label: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
interface TooltipState {
|
||||
visible: boolean;
|
||||
x: number;
|
||||
y: number;
|
||||
node: GraphVisualizationData['nodes'][0] | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get node color based on type
|
||||
*/
|
||||
function getNodeColor(type?: string): string {
|
||||
if (!type) return NODE_TYPE_COLORS.default;
|
||||
const normalizedType = type.toLowerCase();
|
||||
|
||||
for (const [key, color] of Object.entries(NODE_TYPE_COLORS)) {
|
||||
if (normalizedType.includes(key)) {
|
||||
return color;
|
||||
}
|
||||
}
|
||||
|
||||
return NODE_TYPE_COLORS.default;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get node radius based on connections
|
||||
*/
|
||||
function getNodeRadius(id: string, edges: GraphVisualizationData['edges']): number {
|
||||
const connections = edges.filter(e => e.source === id || e.target === id).length;
|
||||
return Math.max(8, Math.min(25, 8 + connections * 2));
|
||||
}
|
||||
|
||||
export const GesprekNetworkGraph: React.FC<GesprekNetworkGraphProps> = ({
|
||||
data,
|
||||
width = 600,
|
||||
height = 400,
|
||||
onNodeClick,
|
||||
onNodeHover,
|
||||
selectedNodeId,
|
||||
language = 'nl',
|
||||
showLabels = true,
|
||||
showEdgeLabels = false,
|
||||
className,
|
||||
}) => {
|
||||
const svgRef = useRef<SVGSVGElement>(null);
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const simulationRef = useRef<d3.Simulation<SimulationNode, SimulationLink> | null>(null);
|
||||
const [tooltip, setTooltip] = useState<TooltipState>({
|
||||
visible: false,
|
||||
x: 0,
|
||||
y: 0,
|
||||
node: null,
|
||||
});
|
||||
|
||||
// Process data for D3 simulation
|
||||
const { nodes, links } = useMemo(() => {
|
||||
const simNodes: SimulationNode[] = data.nodes.map(node => ({
|
||||
...node,
|
||||
x: undefined,
|
||||
y: undefined,
|
||||
}));
|
||||
|
||||
const simLinks: SimulationLink[] = data.edges.map(edge => ({
|
||||
...edge,
|
||||
source: edge.source,
|
||||
target: edge.target,
|
||||
}));
|
||||
|
||||
return { nodes: simNodes, links: simLinks };
|
||||
}, [data]);
|
||||
|
||||
// Main D3 visualization
|
||||
useEffect(() => {
|
||||
if (!svgRef.current || nodes.length === 0) return;
|
||||
|
||||
// Clear previous content
|
||||
d3.select(svgRef.current).selectAll('*').remove();
|
||||
|
||||
const svg = d3.select(svgRef.current)
|
||||
.attr('width', width)
|
||||
.attr('height', height)
|
||||
.attr('viewBox', [0, 0, width, height]);
|
||||
|
||||
// Create container group for zoom
|
||||
const g = svg.append('g');
|
||||
|
||||
// Create arrow marker for directed edges
|
||||
svg.append('defs')
|
||||
.append('marker')
|
||||
.attr('id', 'arrowhead')
|
||||
.attr('viewBox', '0 -5 10 10')
|
||||
.attr('refX', 20)
|
||||
.attr('refY', 0)
|
||||
.attr('markerWidth', 6)
|
||||
.attr('markerHeight', 6)
|
||||
.attr('orient', 'auto')
|
||||
.append('path')
|
||||
.attr('d', 'M0,-5L10,0L0,5')
|
||||
.attr('fill', COLORS.link);
|
||||
|
||||
// Setup zoom behavior
|
||||
const zoom = d3.zoom<SVGSVGElement, unknown>()
|
||||
.scaleExtent([0.2, 4])
|
||||
.on('zoom', (event) => {
|
||||
g.attr('transform', event.transform);
|
||||
});
|
||||
|
||||
svg.call(zoom);
|
||||
|
||||
// Create force simulation
|
||||
const simulation = d3.forceSimulation<SimulationNode>(nodes)
|
||||
.force('link', d3.forceLink<SimulationNode, SimulationLink>(links)
|
||||
.id(d => d.id)
|
||||
.distance(100)
|
||||
.strength(0.5))
|
||||
.force('charge', d3.forceManyBody().strength(-300))
|
||||
.force('center', d3.forceCenter(width / 2, height / 2))
|
||||
.force('collision', d3.forceCollide<SimulationNode>().radius(d => getNodeRadius(d.id, data.edges) + 10));
|
||||
|
||||
simulationRef.current = simulation;
|
||||
|
||||
// Draw links
|
||||
const linksGroup = g.append('g').attr('class', 'links');
|
||||
|
||||
const link = linksGroup.selectAll('.link')
|
||||
.data(links)
|
||||
.join('g')
|
||||
.attr('class', 'link');
|
||||
|
||||
const linkLine = link.append('line')
|
||||
.attr('stroke', COLORS.link)
|
||||
.attr('stroke-width', 1.5)
|
||||
.attr('stroke-opacity', 0.6)
|
||||
.attr('marker-end', 'url(#arrowhead)');
|
||||
|
||||
// Edge labels
|
||||
let linkLabels: d3.Selection<SVGTextElement, SimulationLink, SVGGElement, unknown> | null = null;
|
||||
if (showEdgeLabels) {
|
||||
linkLabels = link.append('text')
|
||||
.attr('class', 'link-label')
|
||||
.attr('font-size', '9px')
|
||||
.attr('fill', COLORS.textLight)
|
||||
.attr('text-anchor', 'middle')
|
||||
.attr('dy', '-5')
|
||||
.text(d => d.label || '');
|
||||
}
|
||||
|
||||
// Draw nodes
|
||||
const nodesGroup = g.append('g').attr('class', 'nodes');
|
||||
|
||||
const node = nodesGroup.selectAll('.node')
|
||||
.data(nodes)
|
||||
.join('g')
|
||||
.attr('class', 'node')
|
||||
.style('cursor', 'pointer');
|
||||
|
||||
// Node circles
|
||||
const nodeCircle = node.append('circle')
|
||||
.attr('r', d => getNodeRadius(d.id, data.edges))
|
||||
.attr('fill', d => getNodeColor(d.type))
|
||||
.attr('stroke', d => selectedNodeId === d.id ? COLORS.accent : COLORS.nodeStroke)
|
||||
.attr('stroke-width', d => selectedNodeId === d.id ? 3 : 2)
|
||||
.attr('stroke-opacity', 0.9);
|
||||
|
||||
// Node labels
|
||||
if (showLabels) {
|
||||
node.append('text')
|
||||
.attr('class', 'node-label')
|
||||
.attr('dy', d => getNodeRadius(d.id, data.edges) + 12)
|
||||
.attr('text-anchor', 'middle')
|
||||
.attr('font-size', '10px')
|
||||
.attr('font-weight', '500')
|
||||
.attr('fill', COLORS.text)
|
||||
.text(d => {
|
||||
const maxLength = 15;
|
||||
return d.label.length > maxLength
|
||||
? d.label.substring(0, maxLength) + '...'
|
||||
: d.label;
|
||||
});
|
||||
}
|
||||
|
||||
// Drag behavior
|
||||
const drag = d3.drag<SVGGElement, SimulationNode>()
|
||||
.on('start', (event, d) => {
|
||||
if (!event.active) simulation.alphaTarget(0.3).restart();
|
||||
d.fx = d.x;
|
||||
d.fy = d.y;
|
||||
})
|
||||
.on('drag', (event, d) => {
|
||||
d.fx = event.x;
|
||||
d.fy = event.y;
|
||||
})
|
||||
.on('end', (event, d) => {
|
||||
if (!event.active) simulation.alphaTarget(0);
|
||||
d.fx = null;
|
||||
d.fy = null;
|
||||
});
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
node.call(drag as any);
|
||||
|
||||
// Hover interactions
|
||||
node
|
||||
.on('mouseenter', function(event, d) {
|
||||
// Highlight node
|
||||
d3.select(this).select('circle')
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('r', getNodeRadius(d.id, data.edges) * 1.2)
|
||||
.attr('stroke', COLORS.accent)
|
||||
.attr('stroke-width', 3);
|
||||
|
||||
// Highlight connected links
|
||||
linkLine
|
||||
.attr('stroke', l =>
|
||||
(l.source as SimulationNode).id === d.id || (l.target as SimulationNode).id === d.id
|
||||
? COLORS.linkHighlight
|
||||
: COLORS.link
|
||||
)
|
||||
.attr('stroke-width', l =>
|
||||
(l.source as SimulationNode).id === d.id || (l.target as SimulationNode).id === d.id
|
||||
? 2.5
|
||||
: 1.5
|
||||
)
|
||||
.attr('stroke-opacity', l =>
|
||||
(l.source as SimulationNode).id === d.id || (l.target as SimulationNode).id === d.id
|
||||
? 1
|
||||
: 0.3
|
||||
);
|
||||
|
||||
// Fade unconnected nodes
|
||||
nodeCircle
|
||||
.attr('opacity', n => {
|
||||
if (n.id === d.id) return 1;
|
||||
const connected = links.some(l =>
|
||||
((l.source as SimulationNode).id === d.id && (l.target as SimulationNode).id === n.id) ||
|
||||
((l.target as SimulationNode).id === d.id && (l.source as SimulationNode).id === n.id)
|
||||
);
|
||||
return connected ? 1 : 0.3;
|
||||
});
|
||||
|
||||
// Show tooltip
|
||||
const [x, y] = d3.pointer(event, containerRef.current);
|
||||
setTooltip({
|
||||
visible: true,
|
||||
x: x + 10,
|
||||
y: y - 10,
|
||||
node: data.nodes.find(n => n.id === d.id) || null,
|
||||
});
|
||||
|
||||
if (onNodeHover) {
|
||||
onNodeHover(d.id, data.nodes.find(n => n.id === d.id) || null);
|
||||
}
|
||||
})
|
||||
.on('mouseleave', function(_event, d) {
|
||||
// Reset node
|
||||
d3.select(this).select('circle')
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('r', getNodeRadius(d.id, data.edges))
|
||||
.attr('stroke', selectedNodeId === d.id ? COLORS.accent : COLORS.nodeStroke)
|
||||
.attr('stroke-width', selectedNodeId === d.id ? 3 : 2);
|
||||
|
||||
// Reset links
|
||||
linkLine
|
||||
.attr('stroke', COLORS.link)
|
||||
.attr('stroke-width', 1.5)
|
||||
.attr('stroke-opacity', 0.6);
|
||||
|
||||
// Reset nodes
|
||||
nodeCircle.attr('opacity', 1);
|
||||
|
||||
setTooltip(prev => ({ ...prev, visible: false }));
|
||||
|
||||
if (onNodeHover) {
|
||||
onNodeHover(null, null);
|
||||
}
|
||||
})
|
||||
.on('click', (_event, d) => {
|
||||
if (onNodeClick) {
|
||||
onNodeClick(d.id, data.nodes.find(n => n.id === d.id)!);
|
||||
}
|
||||
});
|
||||
|
||||
// Update positions on simulation tick
|
||||
simulation.on('tick', () => {
|
||||
linkLine
|
||||
.attr('x1', d => (d.source as SimulationNode).x || 0)
|
||||
.attr('y1', d => (d.source as SimulationNode).y || 0)
|
||||
.attr('x2', d => (d.target as SimulationNode).x || 0)
|
||||
.attr('y2', d => (d.target as SimulationNode).y || 0);
|
||||
|
||||
if (linkLabels) {
|
||||
linkLabels
|
||||
.attr('x', d => (((d.source as SimulationNode).x || 0) + ((d.target as SimulationNode).x || 0)) / 2)
|
||||
.attr('y', d => (((d.source as SimulationNode).y || 0) + ((d.target as SimulationNode).y || 0)) / 2);
|
||||
}
|
||||
|
||||
node.attr('transform', d => `translate(${d.x || 0},${d.y || 0})`);
|
||||
});
|
||||
|
||||
// Fit to bounds function
|
||||
const handleFit = () => {
|
||||
const bounds = g.node()?.getBBox();
|
||||
if (!bounds) return;
|
||||
|
||||
const padding = 50;
|
||||
const fullWidth = bounds.width + padding * 2;
|
||||
const fullHeight = bounds.height + padding * 2;
|
||||
const scale = Math.min(width / fullWidth, height / fullHeight, 1.5);
|
||||
const translateX = (width - bounds.width * scale) / 2 - bounds.x * scale;
|
||||
const translateY = (height - bounds.height * scale) / 2 - bounds.y * scale;
|
||||
|
||||
svg.transition()
|
||||
.duration(750)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
.call(zoom.transform as any, d3.zoomIdentity.translate(translateX, translateY).scale(scale));
|
||||
};
|
||||
|
||||
window.addEventListener('gesprek-network-fit', handleFit);
|
||||
|
||||
// Initial fit after simulation stabilizes
|
||||
simulation.on('end', () => {
|
||||
setTimeout(handleFit, 100);
|
||||
});
|
||||
|
||||
// Cleanup
|
||||
return () => {
|
||||
simulation.stop();
|
||||
window.removeEventListener('gesprek-network-fit', handleFit);
|
||||
};
|
||||
}, [data, nodes, links, width, height, selectedNodeId, showLabels, showEdgeLabels, onNodeClick, onNodeHover]);
|
||||
|
||||
// Control handlers
|
||||
const handleFitToBounds = () => {
|
||||
window.dispatchEvent(new CustomEvent('gesprek-network-fit'));
|
||||
};
|
||||
|
||||
const handleRestartSimulation = () => {
|
||||
if (simulationRef.current) {
|
||||
simulationRef.current.alpha(1).restart();
|
||||
}
|
||||
};
|
||||
|
||||
// Empty state
|
||||
if (nodes.length === 0) {
|
||||
return (
|
||||
<div className={`gesprek-network-graph gesprek-network-graph--empty ${className || ''}`}>
|
||||
<div className="gesprek-network-graph__empty">
|
||||
<span>{language === 'nl' ? 'Geen netwerkgegevens beschikbaar' : 'No network data available'}</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`gesprek-network-graph ${className || ''}`}
|
||||
style={{ position: 'relative' }}
|
||||
>
|
||||
<svg ref={svgRef} />
|
||||
|
||||
{/* Tooltip */}
|
||||
{tooltip.visible && tooltip.node && (
|
||||
<div
|
||||
className="gesprek-network-graph__tooltip"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
left: Math.min(tooltip.x, width - 180),
|
||||
top: Math.max(tooltip.y, 10),
|
||||
pointerEvents: 'none',
|
||||
zIndex: 100,
|
||||
backgroundColor: 'white',
|
||||
border: '1px solid #e2e8f0',
|
||||
borderRadius: '4px',
|
||||
padding: '8px 12px',
|
||||
boxShadow: '0 2px 8px rgba(0,0,0,0.1)',
|
||||
fontSize: '12px',
|
||||
maxWidth: '180px',
|
||||
}}
|
||||
>
|
||||
<div style={{ fontWeight: '600', color: COLORS.text }}>{tooltip.node.label}</div>
|
||||
{tooltip.node.type && (
|
||||
<div style={{
|
||||
display: 'inline-block',
|
||||
marginTop: '4px',
|
||||
padding: '2px 6px',
|
||||
fontSize: '10px',
|
||||
borderRadius: '3px',
|
||||
backgroundColor: getNodeColor(tooltip.node.type) + '20',
|
||||
color: getNodeColor(tooltip.node.type),
|
||||
}}>
|
||||
{tooltip.node.type}
|
||||
</div>
|
||||
)}
|
||||
{tooltip.node.attributes && Object.keys(tooltip.node.attributes).length > 0 && (
|
||||
<div style={{ marginTop: '6px', fontSize: '11px', color: COLORS.textLight }}>
|
||||
{Object.entries(tooltip.node.attributes).slice(0, 3).map(([key, value]) => (
|
||||
<div key={key}>
|
||||
<span style={{ fontWeight: '500' }}>{key}:</span> {String(value)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
<div
|
||||
className="gesprek-network-graph__controls"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
top: '5px',
|
||||
right: '5px',
|
||||
display: 'flex',
|
||||
gap: '4px',
|
||||
}}
|
||||
>
|
||||
<button
|
||||
onClick={handleFitToBounds}
|
||||
title={language === 'nl' ? 'Zoom aanpassen' : 'Fit to view'}
|
||||
style={{
|
||||
padding: '4px 8px',
|
||||
fontSize: '11px',
|
||||
backgroundColor: 'white',
|
||||
border: '1px solid #e2e8f0',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
color: COLORS.textLight,
|
||||
}}
|
||||
>
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
|
||||
<path d="M15 3h6v6M9 21H3v-6M21 3l-7 7M3 21l7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
<button
|
||||
onClick={handleRestartSimulation}
|
||||
title={language === 'nl' ? 'Herstart simulatie' : 'Restart simulation'}
|
||||
style={{
|
||||
padding: '4px 8px',
|
||||
fontSize: '11px',
|
||||
backgroundColor: 'white',
|
||||
border: '1px solid #e2e8f0',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
color: COLORS.textLight,
|
||||
}}
|
||||
>
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
|
||||
<path d="M1 4v6h6M23 20v-6h-6" />
|
||||
<path d="M20.49 9A9 9 0 0 0 5.64 5.64L1 10m22 4l-4.64 4.36A9 9 0 0 1 3.51 15" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div
|
||||
className="gesprek-network-graph__legend"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
bottom: '5px',
|
||||
left: '5px',
|
||||
display: 'flex',
|
||||
gap: '12px',
|
||||
fontSize: '10px',
|
||||
color: COLORS.textLight,
|
||||
}}
|
||||
>
|
||||
<span>{nodes.length} {language === 'nl' ? 'knopen' : 'nodes'}</span>
|
||||
<span>•</span>
|
||||
<span>{links.length} {language === 'nl' ? 'verbindingen' : 'edges'}</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default GesprekNetworkGraph;
|
||||
497
frontend/src/components/gesprek/GesprekTimeline.tsx
Normal file
497
frontend/src/components/gesprek/GesprekTimeline.tsx
Normal file
|
|
@ -0,0 +1,497 @@
|
|||
/**
|
||||
* GesprekTimeline.tsx - D3 Timeline Visualization for Gesprek Page
|
||||
*
|
||||
* Features:
|
||||
* - Horizontal timeline with event markers
|
||||
* - Zoom and pan support
|
||||
* - Event clustering for dense periods
|
||||
* - Tooltips with event details
|
||||
* - Animated transitions
|
||||
*
|
||||
* Uses D3.js v7 with React 19
|
||||
*/
|
||||
|
||||
import React, { useRef, useEffect, useState, useMemo } from 'react';
|
||||
import * as d3 from 'd3';
|
||||
import type { TimelineEvent } from '../../hooks/useMultiDatabaseRAG';
|
||||
|
||||
// NDE House Style Colors
|
||||
const COLORS = {
|
||||
primary: '#154273',
|
||||
secondary: '#2E5A8B',
|
||||
accent: '#3B82F6',
|
||||
background: '#f8fafc',
|
||||
text: '#1e293b',
|
||||
textLight: '#64748b',
|
||||
axis: '#94a3b8',
|
||||
axisLine: '#cbd5e1',
|
||||
marker: '#154273',
|
||||
markerHover: '#3B82F6',
|
||||
};
|
||||
|
||||
// Event type to color mapping
|
||||
const EVENT_TYPE_COLORS: Record<string, string> = {
|
||||
founding: '#10b981', // Green
|
||||
closure: '#ef4444', // Red
|
||||
merger: '#8b5cf6', // Purple
|
||||
relocation: '#f59e0b', // Amber
|
||||
name_change: '#06b6d4', // Cyan
|
||||
acquisition: '#ec4899', // Pink
|
||||
default: '#154273', // Primary blue
|
||||
};
|
||||
|
||||
export interface GesprekTimelineProps {
|
||||
events: TimelineEvent[];
|
||||
width?: number;
|
||||
height?: number;
|
||||
onEventClick?: (event: TimelineEvent) => void;
|
||||
onEventHover?: (event: TimelineEvent | null) => void;
|
||||
selectedEventId?: string | null;
|
||||
language?: 'nl' | 'en';
|
||||
showLabels?: boolean;
|
||||
className?: string;
|
||||
}
|
||||
|
||||
interface ParsedEvent extends TimelineEvent {
|
||||
parsedDate: Date;
|
||||
}
|
||||
|
||||
interface TooltipState {
|
||||
visible: boolean;
|
||||
x: number;
|
||||
y: number;
|
||||
event: TimelineEvent | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse various date formats to Date objects
|
||||
*/
|
||||
function parseDate(dateStr: string): Date | null {
|
||||
if (!dateStr) return null;
|
||||
|
||||
// Try ISO format first
|
||||
let date = new Date(dateStr);
|
||||
if (!isNaN(date.getTime())) return date;
|
||||
|
||||
// Try year-only format
|
||||
const yearMatch = dateStr.match(/^(\d{4})$/);
|
||||
if (yearMatch) {
|
||||
return new Date(parseInt(yearMatch[1]), 0, 1);
|
||||
}
|
||||
|
||||
// Try "Month YYYY" format
|
||||
const monthYearMatch = dateStr.match(/^(\w+)\s+(\d{4})$/);
|
||||
if (monthYearMatch) {
|
||||
date = new Date(`${monthYearMatch[1]} 1, ${monthYearMatch[2]}`);
|
||||
if (!isNaN(date.getTime())) return date;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get marker color based on event type
|
||||
*/
|
||||
function getEventColor(type?: string): string {
|
||||
if (!type) return EVENT_TYPE_COLORS.default;
|
||||
const normalizedType = type.toLowerCase().replace(/[_-]/g, '');
|
||||
|
||||
for (const [key, color] of Object.entries(EVENT_TYPE_COLORS)) {
|
||||
if (normalizedType.includes(key) || key.includes(normalizedType)) {
|
||||
return color;
|
||||
}
|
||||
}
|
||||
|
||||
return EVENT_TYPE_COLORS.default;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format date for display
|
||||
*/
|
||||
function formatDate(date: Date, language: 'nl' | 'en'): string {
|
||||
return date.toLocaleDateString(language === 'nl' ? 'nl-NL' : 'en-US', {
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: date.getDate() !== 1 ? 'numeric' : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
export const GesprekTimeline: React.FC<GesprekTimelineProps> = ({
|
||||
events,
|
||||
width = 800,
|
||||
height = 200,
|
||||
onEventClick,
|
||||
onEventHover,
|
||||
selectedEventId,
|
||||
language = 'nl',
|
||||
showLabels = true,
|
||||
className,
|
||||
}) => {
|
||||
const svgRef = useRef<SVGSVGElement>(null);
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const [tooltip, setTooltip] = useState<TooltipState>({
|
||||
visible: false,
|
||||
x: 0,
|
||||
y: 0,
|
||||
event: null,
|
||||
});
|
||||
|
||||
// Margins
|
||||
const margin = useMemo(() => ({
|
||||
top: 30,
|
||||
right: 30,
|
||||
bottom: 40,
|
||||
left: 30,
|
||||
}), []);
|
||||
|
||||
const innerWidth = width - margin.left - margin.right;
|
||||
const innerHeight = height - margin.top - margin.bottom;
|
||||
|
||||
// Parse and filter events
|
||||
const parsedEvents = useMemo<ParsedEvent[]>(() => {
|
||||
return events
|
||||
.map(event => ({
|
||||
...event,
|
||||
parsedDate: parseDate(event.date),
|
||||
}))
|
||||
.filter((event): event is ParsedEvent => event.parsedDate !== null)
|
||||
.sort((a, b) => a.parsedDate.getTime() - b.parsedDate.getTime());
|
||||
}, [events]);
|
||||
|
||||
// Main D3 visualization
|
||||
useEffect(() => {
|
||||
if (!svgRef.current || parsedEvents.length === 0) return;
|
||||
|
||||
// Clear previous content
|
||||
d3.select(svgRef.current).selectAll('*').remove();
|
||||
|
||||
const svg = d3.select(svgRef.current)
|
||||
.attr('width', width)
|
||||
.attr('height', height)
|
||||
.attr('viewBox', [0, 0, width, height]);
|
||||
|
||||
// Create clip path
|
||||
svg.append('defs')
|
||||
.append('clipPath')
|
||||
.attr('id', 'timeline-clip')
|
||||
.append('rect')
|
||||
.attr('x', margin.left)
|
||||
.attr('y', margin.top)
|
||||
.attr('width', innerWidth)
|
||||
.attr('height', innerHeight);
|
||||
|
||||
// Create main group with margins
|
||||
const g = svg.append('g')
|
||||
.attr('transform', `translate(${margin.left},${margin.top})`);
|
||||
|
||||
// Calculate time extent with padding
|
||||
const timeExtent = d3.extent(parsedEvents, d => d.parsedDate) as [Date, Date];
|
||||
const timePadding = (timeExtent[1].getTime() - timeExtent[0].getTime()) * 0.05;
|
||||
|
||||
const xScale = d3.scaleTime()
|
||||
.domain([
|
||||
new Date(timeExtent[0].getTime() - timePadding),
|
||||
new Date(timeExtent[1].getTime() + timePadding),
|
||||
])
|
||||
.range([0, innerWidth]);
|
||||
|
||||
// Store original scale for zoom reset
|
||||
const xScaleOriginal = xScale.copy();
|
||||
|
||||
// Create clipped group for content
|
||||
const content = g.append('g')
|
||||
.attr('clip-path', 'url(#timeline-clip)');
|
||||
|
||||
// Draw timeline axis line
|
||||
content.append('line')
|
||||
.attr('class', 'axis-line')
|
||||
.attr('x1', 0)
|
||||
.attr('x2', innerWidth)
|
||||
.attr('y1', innerHeight / 2)
|
||||
.attr('y2', innerHeight / 2)
|
||||
.attr('stroke', COLORS.axisLine)
|
||||
.attr('stroke-width', 2);
|
||||
|
||||
// Draw axis
|
||||
const xAxis = d3.axisBottom(xScale)
|
||||
.ticks(Math.min(parsedEvents.length, 10))
|
||||
.tickFormat((d) => formatDate(d as Date, language));
|
||||
|
||||
const axisGroup = g.append('g')
|
||||
.attr('class', 'x-axis')
|
||||
.attr('transform', `translate(0,${innerHeight})`)
|
||||
.call(xAxis);
|
||||
|
||||
axisGroup.selectAll('text')
|
||||
.attr('font-size', '10px')
|
||||
.attr('fill', COLORS.textLight)
|
||||
.attr('transform', 'rotate(-30)')
|
||||
.attr('text-anchor', 'end')
|
||||
.attr('dx', '-0.5em')
|
||||
.attr('dy', '0.5em');
|
||||
|
||||
axisGroup.selectAll('line')
|
||||
.attr('stroke', COLORS.axis);
|
||||
|
||||
axisGroup.select('.domain')
|
||||
.attr('stroke', COLORS.axis);
|
||||
|
||||
// Draw event markers
|
||||
const markersGroup = content.append('g').attr('class', 'markers');
|
||||
|
||||
const markers = markersGroup.selectAll('.event-marker')
|
||||
.data(parsedEvents)
|
||||
.join('g')
|
||||
.attr('class', 'event-marker')
|
||||
.attr('transform', d => `translate(${xScale(d.parsedDate)},${innerHeight / 2})`)
|
||||
.style('cursor', 'pointer');
|
||||
|
||||
// Marker circles
|
||||
markers.append('circle')
|
||||
.attr('r', 0)
|
||||
.attr('fill', d => getEventColor(d.type))
|
||||
.attr('stroke', '#fff')
|
||||
.attr('stroke-width', 2)
|
||||
.transition()
|
||||
.duration(500)
|
||||
.delay((_, i) => i * 50)
|
||||
.attr('r', d => selectedEventId === d.date ? 10 : 7);
|
||||
|
||||
// Marker connectors (vertical lines)
|
||||
markers.append('line')
|
||||
.attr('class', 'connector')
|
||||
.attr('x1', 0)
|
||||
.attr('x2', 0)
|
||||
.attr('y1', 0)
|
||||
.attr('y2', 0)
|
||||
.attr('stroke', d => getEventColor(d.type))
|
||||
.attr('stroke-width', 1.5)
|
||||
.attr('stroke-dasharray', '3,3')
|
||||
.attr('opacity', 0)
|
||||
.transition()
|
||||
.duration(500)
|
||||
.delay((_, i) => i * 50 + 300)
|
||||
.attr('y2', (_, i) => (i % 2 === 0 ? -25 : 25))
|
||||
.attr('opacity', showLabels ? 0.7 : 0);
|
||||
|
||||
// Event labels
|
||||
if (showLabels) {
|
||||
markers.append('text')
|
||||
.attr('class', 'event-label')
|
||||
.attr('x', 0)
|
||||
.attr('y', (_, i) => (i % 2 === 0 ? -32 : 40))
|
||||
.attr('text-anchor', 'middle')
|
||||
.attr('font-size', '10px')
|
||||
.attr('fill', COLORS.text)
|
||||
.attr('opacity', 0)
|
||||
.text(d => {
|
||||
// Truncate long labels
|
||||
const maxLength = 20;
|
||||
return d.label.length > maxLength
|
||||
? d.label.substring(0, maxLength) + '...'
|
||||
: d.label;
|
||||
})
|
||||
.transition()
|
||||
.duration(500)
|
||||
.delay((_, i) => i * 50 + 500)
|
||||
.attr('opacity', 1);
|
||||
}
|
||||
|
||||
// Interaction handlers
|
||||
markers
|
||||
.on('mouseenter', function(event, d) {
|
||||
d3.select(this).select('circle')
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('r', 10);
|
||||
|
||||
const [x, y] = d3.pointer(event, containerRef.current);
|
||||
setTooltip({
|
||||
visible: true,
|
||||
x: x + 10,
|
||||
y: y - 10,
|
||||
event: d,
|
||||
});
|
||||
|
||||
if (onEventHover) {
|
||||
onEventHover(d);
|
||||
}
|
||||
})
|
||||
.on('mouseleave', function(_event, d) {
|
||||
d3.select(this).select('circle')
|
||||
.transition()
|
||||
.duration(200)
|
||||
.attr('r', selectedEventId === d.date ? 10 : 7);
|
||||
|
||||
setTooltip(prev => ({ ...prev, visible: false }));
|
||||
|
||||
if (onEventHover) {
|
||||
onEventHover(null);
|
||||
}
|
||||
})
|
||||
.on('click', (_event, d) => {
|
||||
if (onEventClick) {
|
||||
onEventClick(d);
|
||||
}
|
||||
});
|
||||
|
||||
// Zoom behavior
|
||||
const zoom = d3.zoom<SVGSVGElement, unknown>()
|
||||
.scaleExtent([0.5, 10])
|
||||
.translateExtent([[-innerWidth, -innerHeight], [innerWidth * 2, innerHeight * 2]])
|
||||
.on('zoom', (event) => {
|
||||
const newXScale = event.transform.rescaleX(xScaleOriginal);
|
||||
|
||||
// Update axis
|
||||
axisGroup.call(xAxis.scale(newXScale));
|
||||
|
||||
// Update markers
|
||||
markers.attr('transform', d =>
|
||||
`translate(${newXScale(d.parsedDate)},${innerHeight / 2})`
|
||||
);
|
||||
|
||||
// Update axis styling
|
||||
axisGroup.selectAll('text')
|
||||
.attr('font-size', '10px')
|
||||
.attr('fill', COLORS.textLight)
|
||||
.attr('transform', 'rotate(-30)')
|
||||
.attr('text-anchor', 'end');
|
||||
});
|
||||
|
||||
svg.call(zoom);
|
||||
|
||||
// Add zoom reset button handler
|
||||
const handleReset = () => {
|
||||
svg.transition()
|
||||
.duration(750)
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
.call(zoom.transform as any, d3.zoomIdentity);
|
||||
};
|
||||
|
||||
window.addEventListener('gesprek-timeline-reset', handleReset);
|
||||
|
||||
return () => {
|
||||
window.removeEventListener('gesprek-timeline-reset', handleReset);
|
||||
};
|
||||
}, [parsedEvents, width, height, innerWidth, innerHeight, margin, selectedEventId, showLabels, language, onEventClick, onEventHover]);
|
||||
|
||||
// Handle zoom reset
|
||||
const handleResetZoom = () => {
|
||||
window.dispatchEvent(new CustomEvent('gesprek-timeline-reset'));
|
||||
};
|
||||
|
||||
// Empty state
|
||||
if (parsedEvents.length === 0) {
|
||||
return (
|
||||
<div className={`gesprek-timeline gesprek-timeline--empty ${className || ''}`}>
|
||||
<div className="gesprek-timeline__empty">
|
||||
<span>{language === 'nl' ? 'Geen tijdlijngegevens beschikbaar' : 'No timeline data available'}</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`gesprek-timeline ${className || ''}`}
|
||||
style={{ position: 'relative' }}
|
||||
>
|
||||
<svg ref={svgRef} />
|
||||
|
||||
{/* Tooltip */}
|
||||
{tooltip.visible && tooltip.event && (
|
||||
<div
|
||||
className="gesprek-timeline__tooltip"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
left: Math.min(tooltip.x, width - 200),
|
||||
top: Math.max(tooltip.y, 10),
|
||||
pointerEvents: 'none',
|
||||
zIndex: 100,
|
||||
backgroundColor: 'white',
|
||||
border: '1px solid #e2e8f0',
|
||||
borderRadius: '4px',
|
||||
padding: '8px 12px',
|
||||
boxShadow: '0 2px 8px rgba(0,0,0,0.1)',
|
||||
fontSize: '12px',
|
||||
maxWidth: '200px',
|
||||
}}
|
||||
>
|
||||
<div style={{ fontWeight: '600', color: COLORS.text }}>{tooltip.event.label}</div>
|
||||
<div style={{ color: COLORS.primary, fontSize: '11px', marginTop: '2px' }}>
|
||||
{formatDate(parseDate(tooltip.event.date)!, language)}
|
||||
</div>
|
||||
{tooltip.event.type && (
|
||||
<div style={{
|
||||
display: 'inline-block',
|
||||
marginTop: '4px',
|
||||
padding: '2px 6px',
|
||||
fontSize: '10px',
|
||||
borderRadius: '3px',
|
||||
backgroundColor: getEventColor(tooltip.event.type) + '20',
|
||||
color: getEventColor(tooltip.event.type),
|
||||
}}>
|
||||
{tooltip.event.type}
|
||||
</div>
|
||||
)}
|
||||
{tooltip.event.description && (
|
||||
<div style={{ color: COLORS.textLight, fontSize: '11px', marginTop: '4px' }}>
|
||||
{tooltip.event.description}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Controls */}
|
||||
<div
|
||||
className="gesprek-timeline__controls"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
top: '5px',
|
||||
right: '5px',
|
||||
display: 'flex',
|
||||
gap: '4px',
|
||||
}}
|
||||
>
|
||||
<button
|
||||
onClick={handleResetZoom}
|
||||
title={language === 'nl' ? 'Zoom resetten' : 'Reset zoom'}
|
||||
style={{
|
||||
padding: '4px 8px',
|
||||
fontSize: '11px',
|
||||
backgroundColor: 'white',
|
||||
border: '1px solid #e2e8f0',
|
||||
borderRadius: '4px',
|
||||
cursor: 'pointer',
|
||||
color: COLORS.textLight,
|
||||
}}
|
||||
>
|
||||
{language === 'nl' ? 'Reset' : 'Reset'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div
|
||||
className="gesprek-timeline__legend"
|
||||
style={{
|
||||
position: 'absolute',
|
||||
bottom: '5px',
|
||||
left: '5px',
|
||||
display: 'flex',
|
||||
gap: '12px',
|
||||
fontSize: '10px',
|
||||
color: COLORS.textLight,
|
||||
}}
|
||||
>
|
||||
<span>{parsedEvents.length} {language === 'nl' ? 'gebeurtenissen' : 'events'}</span>
|
||||
<span>•</span>
|
||||
<span>
|
||||
{formatDate(parsedEvents[0].parsedDate, language)} — {formatDate(parsedEvents[parsedEvents.length - 1].parsedDate, language)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default GesprekTimeline;
|
||||
18
frontend/src/components/gesprek/index.ts
Normal file
18
frontend/src/components/gesprek/index.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
/**
|
||||
* Gesprek Components Index
|
||||
*
|
||||
* D3 visualization components for the Gesprek (Conversation) page.
|
||||
* All components follow NDE house style and support Dutch/English.
|
||||
*/
|
||||
|
||||
export { GesprekGeoMap } from './GesprekGeoMap';
|
||||
export type { GesprekGeoMapProps } from './GesprekGeoMap';
|
||||
|
||||
export { GesprekBarChart } from './GesprekBarChart';
|
||||
export type { GesprekBarChartProps } from './GesprekBarChart';
|
||||
|
||||
export { GesprekTimeline } from './GesprekTimeline';
|
||||
export type { GesprekTimelineProps } from './GesprekTimeline';
|
||||
|
||||
export { GesprekNetworkGraph } from './GesprekNetworkGraph';
|
||||
export type { GesprekNetworkGraphProps } from './GesprekNetworkGraph';
|
||||
|
|
@ -178,6 +178,12 @@ export function Navigation() {
|
|||
>
|
||||
{t('overview')}
|
||||
</Link>
|
||||
<Link
|
||||
to="/gesprek"
|
||||
className={`nav-link ${isActive('/gesprek') ? 'active' : ''}`}
|
||||
>
|
||||
{t('gesprek')}
|
||||
</Link>
|
||||
<Link
|
||||
to="/settings"
|
||||
className={`nav-link ${isActive('/settings') ? 'active' : ''}`}
|
||||
|
|
@ -268,6 +274,9 @@ export function Navigation() {
|
|||
<Link to="/overview" className={`nav-mobile-link ${isActive('/overview') ? 'active' : ''}`}>
|
||||
{t('overview')}
|
||||
</Link>
|
||||
<Link to="/gesprek" className={`nav-mobile-link ${isActive('/gesprek') ? 'active' : ''}`}>
|
||||
{t('gesprek')}
|
||||
</Link>
|
||||
<Link to="/settings" className={`nav-mobile-link ${isActive('/settings') ? 'active' : ''}`}>
|
||||
{t('settings')}
|
||||
</Link>
|
||||
|
|
|
|||
|
|
@ -6,10 +6,26 @@
|
|||
*/
|
||||
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import mermaid from 'mermaid';
|
||||
import type { SparqlClient } from '../../lib/sparql/client';
|
||||
import './OntologyVisualizer.css';
|
||||
|
||||
// Lazy load mermaid to avoid bundling issues
|
||||
let mermaidInstance: typeof import('mermaid').default | null = null;
|
||||
const getMermaid = async () => {
|
||||
if (!mermaidInstance) {
|
||||
const mod = await import('mermaid');
|
||||
mermaidInstance = mod.default;
|
||||
mermaidInstance.initialize({
|
||||
startOnLoad: false,
|
||||
theme: 'default',
|
||||
securityLevel: 'loose',
|
||||
fontFamily: 'Arial, sans-serif',
|
||||
logLevel: 'error',
|
||||
});
|
||||
}
|
||||
return mermaidInstance;
|
||||
};
|
||||
|
||||
export interface OntologyVisualizerProps {
|
||||
/** Pre-loaded Mermaid diagram source */
|
||||
mermaidSource?: string;
|
||||
|
|
@ -35,16 +51,11 @@ export const OntologyVisualizer: React.FC<OntologyVisualizerProps> = ({
|
|||
const [error, setError] = useState<string | null>(null);
|
||||
const [zoom, setZoom] = useState(1);
|
||||
const [generatedSource, setGeneratedSource] = useState<string | null>(null);
|
||||
const [mermaidReady, setMermaidReady] = useState(false);
|
||||
|
||||
// Initialize Mermaid
|
||||
// Initialize Mermaid (lazy loaded)
|
||||
useEffect(() => {
|
||||
mermaid.initialize({
|
||||
startOnLoad: true,
|
||||
theme: 'default',
|
||||
securityLevel: 'loose',
|
||||
fontFamily: 'Arial, sans-serif',
|
||||
logLevel: 'error',
|
||||
});
|
||||
getMermaid().then(() => setMermaidReady(true)).catch(console.error);
|
||||
}, []);
|
||||
|
||||
// Generate Mermaid diagram from RDF data
|
||||
|
|
@ -72,10 +83,11 @@ export const OntologyVisualizer: React.FC<OntologyVisualizerProps> = ({
|
|||
// Render Mermaid diagram
|
||||
useEffect(() => {
|
||||
const source = mermaidSource || generatedSource;
|
||||
if (!source || !containerRef.current) return;
|
||||
if (!source || !containerRef.current || !mermaidReady) return;
|
||||
|
||||
const renderDiagram = async () => {
|
||||
try {
|
||||
const mermaid = await getMermaid();
|
||||
const { svg } = await mermaid.render('mermaid-diagram', source);
|
||||
if (containerRef.current) {
|
||||
containerRef.current.innerHTML = svg;
|
||||
|
|
@ -87,7 +99,7 @@ export const OntologyVisualizer: React.FC<OntologyVisualizerProps> = ({
|
|||
};
|
||||
|
||||
renderDiagram();
|
||||
}, [mermaidSource, generatedSource]);
|
||||
}, [mermaidSource, generatedSource, mermaidReady]);
|
||||
|
||||
// Generate diagram when sparqlClient is provided
|
||||
useEffect(() => {
|
||||
|
|
|
|||
477
frontend/src/components/uml/CustodianTypeIndicator.tsx
Normal file
477
frontend/src/components/uml/CustodianTypeIndicator.tsx
Normal file
|
|
@ -0,0 +1,477 @@
|
|||
/**
|
||||
* CustodianTypeIndicator.tsx - Three.js 19-sided Polygon for Custodian Type Display
|
||||
*
|
||||
* Displays a 3D polygon badge showing which CustodianType(s) a schema element
|
||||
* relates to. Uses the GLAMORCUBESFIXPHDNT taxonomy with color-coded polygons.
|
||||
*
|
||||
* The polygon has 19 sides - one for each letter in GLAMORCUBESFIXPHDNT:
|
||||
* G-L-A-M-O-R-C-U-B-E-S-F-I-X-P-H-D-N-T
|
||||
*
|
||||
* Usage:
|
||||
* - Pass one or more custodian type codes (e.g., ['M', 'A'] for Museum + Archive)
|
||||
* - Component renders a rotating 3D 19-gon (enneadecagon) with the type letter(s)
|
||||
* - Colors match the centralized custodian-types.ts configuration
|
||||
*/
|
||||
|
||||
import React, { useEffect, useRef, useMemo } from 'react';
|
||||
import * as THREE from 'three';
|
||||
import {
|
||||
getCustodianTypeByCode,
|
||||
type CustodianTypeCode,
|
||||
} from '@/lib/custodian-types';
|
||||
import { useLanguage } from '@/contexts/LanguageContext';
|
||||
|
||||
// Total number of custodian types in GLAMORCUBESFIXPHDNT = 19
|
||||
const POLYGON_SIDES = 19;
|
||||
|
||||
export interface CustodianTypeIndicatorProps {
|
||||
/** Array of custodian type codes (single letters from GLAMORCUBESFIXPHDNT) */
|
||||
types: CustodianTypeCode[];
|
||||
/** Size of the indicator in pixels */
|
||||
size?: number;
|
||||
/** Whether to animate the polygon rotation */
|
||||
animate?: boolean;
|
||||
/** Show tooltip on hover */
|
||||
showTooltip?: boolean;
|
||||
/** Custom CSS class */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a regular polygon geometry with the specified number of sides
|
||||
*
|
||||
* For GLAMORCUBESFIXPHDNT taxonomy, this creates a 19-sided polygon (enneadecagon)
|
||||
* where each side represents one custodian type letter.
|
||||
*
|
||||
* @param sides - Number of sides (default: 19 for GLAMORCUBESFIXPHDNT)
|
||||
* @param radius - Radius of the polygon
|
||||
*/
|
||||
function createPolygonGeometry(sides: number = POLYGON_SIDES, radius: number = 1): THREE.BufferGeometry {
|
||||
console.log('[CustodianTypeIndicator] Creating polygon geometry with', sides, 'sides (one per GLAMORCUBESFIXPHDNT letter)');
|
||||
const shape = new THREE.Shape();
|
||||
|
||||
for (let i = 0; i <= sides; i++) {
|
||||
const angle = (i / sides) * Math.PI * 2 - Math.PI / 2; // Start from top
|
||||
const x = Math.cos(angle) * radius;
|
||||
const y = Math.sin(angle) * radius;
|
||||
|
||||
if (i === 0) {
|
||||
shape.moveTo(x, y);
|
||||
} else {
|
||||
shape.lineTo(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
const extrudeSettings = {
|
||||
depth: 0.15,
|
||||
bevelEnabled: true,
|
||||
bevelThickness: 0.02,
|
||||
bevelSize: 0.02,
|
||||
bevelSegments: 2,
|
||||
};
|
||||
|
||||
return new THREE.ExtrudeGeometry(shape, extrudeSettings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a canvas texture with the type letter
|
||||
*/
|
||||
function createLetterTexture(letter: string, color: string): THREE.CanvasTexture {
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = 128;
|
||||
canvas.height = 128;
|
||||
const ctx = canvas.getContext('2d')!;
|
||||
|
||||
// Transparent background
|
||||
ctx.clearRect(0, 0, 128, 128);
|
||||
|
||||
// Draw letter
|
||||
ctx.fillStyle = color;
|
||||
ctx.font = 'bold 80px system-ui, -apple-system, sans-serif';
|
||||
ctx.textAlign = 'center';
|
||||
ctx.textBaseline = 'middle';
|
||||
ctx.fillText(letter, 64, 68);
|
||||
|
||||
const texture = new THREE.CanvasTexture(canvas);
|
||||
texture.needsUpdate = true;
|
||||
return texture;
|
||||
}
|
||||
|
||||
/**
|
||||
* CustodianTypeIndicator Component
|
||||
*
|
||||
* Renders a 3D polygon with the custodian type letter(s)
|
||||
*/
|
||||
export const CustodianTypeIndicator: React.FC<CustodianTypeIndicatorProps> = ({
|
||||
types,
|
||||
size = 32,
|
||||
animate = false,
|
||||
showTooltip = true,
|
||||
className = '',
|
||||
}) => {
|
||||
const { language } = useLanguage();
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const rendererRef = useRef<THREE.WebGLRenderer | null>(null);
|
||||
const sceneRef = useRef<THREE.Scene | null>(null);
|
||||
const cameraRef = useRef<THREE.OrthographicCamera | null>(null);
|
||||
const meshRef = useRef<THREE.Mesh | null>(null);
|
||||
const animationFrameRef = useRef<number | null>(null);
|
||||
|
||||
// Get type configurations
|
||||
const typeConfigs = useMemo(() => {
|
||||
return types
|
||||
.map(code => getCustodianTypeByCode(code))
|
||||
.filter((config): config is NonNullable<typeof config> => config !== undefined);
|
||||
}, [types]);
|
||||
|
||||
// Primary type for color (first one if multiple)
|
||||
const primaryConfig = typeConfigs[0];
|
||||
|
||||
// Tooltip text
|
||||
const tooltipText = useMemo(() => {
|
||||
if (typeConfigs.length === 0) return '';
|
||||
return typeConfigs
|
||||
.map(config => config.label[language])
|
||||
.join(', ');
|
||||
}, [typeConfigs, language]);
|
||||
|
||||
// Letters to display
|
||||
const displayLetters = useMemo(() => {
|
||||
if (types.length === 0) return '?';
|
||||
if (types.length === 1) return types[0];
|
||||
if (types.length <= 3) return types.join('');
|
||||
return types.slice(0, 2).join('') + '+';
|
||||
}, [types]);
|
||||
|
||||
useEffect(() => {
|
||||
console.log('[CustodianTypeIndicator] useEffect triggered');
|
||||
console.log('[CustodianTypeIndicator] containerRef.current:', containerRef.current);
|
||||
console.log('[CustodianTypeIndicator] primaryConfig:', primaryConfig);
|
||||
console.log('[CustodianTypeIndicator] types:', types);
|
||||
console.log('[CustodianTypeIndicator] displayLetters:', displayLetters);
|
||||
|
||||
if (!containerRef.current) {
|
||||
console.warn('[CustodianTypeIndicator] No container ref - cannot render 3D polygon');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!primaryConfig) {
|
||||
console.warn('[CustodianTypeIndicator] No primaryConfig - cannot determine color');
|
||||
return;
|
||||
}
|
||||
|
||||
const container = containerRef.current;
|
||||
console.log('[CustodianTypeIndicator] Container dimensions:', container.clientWidth, 'x', container.clientHeight);
|
||||
|
||||
// Initialize scene
|
||||
const scene = new THREE.Scene();
|
||||
sceneRef.current = scene;
|
||||
console.log('[CustodianTypeIndicator] Scene created');
|
||||
|
||||
// Orthographic camera for flat 2D-like appearance
|
||||
const aspect = 1;
|
||||
const frustumSize = 2.5;
|
||||
const camera = new THREE.OrthographicCamera(
|
||||
-frustumSize * aspect / 2,
|
||||
frustumSize * aspect / 2,
|
||||
frustumSize / 2,
|
||||
-frustumSize / 2,
|
||||
0.1,
|
||||
100
|
||||
);
|
||||
camera.position.z = 5;
|
||||
cameraRef.current = camera;
|
||||
console.log('[CustodianTypeIndicator] Camera created');
|
||||
|
||||
// Renderer with transparency
|
||||
let renderer: THREE.WebGLRenderer;
|
||||
try {
|
||||
renderer = new THREE.WebGLRenderer({
|
||||
antialias: true,
|
||||
alpha: true,
|
||||
});
|
||||
renderer.setSize(size, size);
|
||||
renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
|
||||
renderer.setClearColor(0x000000, 0);
|
||||
container.appendChild(renderer.domElement);
|
||||
rendererRef.current = renderer;
|
||||
console.log('[CustodianTypeIndicator] Renderer created and appended to container');
|
||||
} catch (err) {
|
||||
console.error('[CustodianTypeIndicator] Failed to create WebGL renderer:', err);
|
||||
return;
|
||||
}
|
||||
|
||||
// Create 19-sided polygon geometry (one side per GLAMORCUBESFIXPHDNT letter)
|
||||
console.log('[CustodianTypeIndicator] Creating 19-sided polygon (enneadecagon) for GLAMORCUBESFIXPHDNT');
|
||||
const geometry = createPolygonGeometry(POLYGON_SIDES, 1);
|
||||
console.log('[CustodianTypeIndicator] Geometry created:', geometry);
|
||||
|
||||
// Material with custodian type color
|
||||
const primaryColor = new THREE.Color(primaryConfig.color);
|
||||
console.log('[CustodianTypeIndicator] Using color:', primaryConfig.color);
|
||||
const material = new THREE.MeshStandardMaterial({
|
||||
color: primaryColor,
|
||||
metalness: 0.3,
|
||||
roughness: 0.4,
|
||||
side: THREE.DoubleSide,
|
||||
});
|
||||
|
||||
const mesh = new THREE.Mesh(geometry, material);
|
||||
mesh.rotation.x = 0.15; // Slight tilt for 3D effect
|
||||
scene.add(mesh);
|
||||
meshRef.current = mesh;
|
||||
console.log('[CustodianTypeIndicator] Mesh added to scene');
|
||||
|
||||
// Create text sprite for the letter
|
||||
const letterTexture = createLetterTexture(displayLetters, '#ffffff');
|
||||
const spriteMaterial = new THREE.SpriteMaterial({
|
||||
map: letterTexture,
|
||||
transparent: true,
|
||||
});
|
||||
const sprite = new THREE.Sprite(spriteMaterial);
|
||||
sprite.scale.set(1.4, 1.4, 1);
|
||||
sprite.position.z = 0.2;
|
||||
scene.add(sprite);
|
||||
console.log('[CustodianTypeIndicator] Letter sprite added:', displayLetters);
|
||||
|
||||
// Lighting
|
||||
const ambientLight = new THREE.AmbientLight(0xffffff, 0.8);
|
||||
scene.add(ambientLight);
|
||||
|
||||
const directionalLight = new THREE.DirectionalLight(0xffffff, 0.6);
|
||||
directionalLight.position.set(2, 2, 5);
|
||||
scene.add(directionalLight);
|
||||
console.log('[CustodianTypeIndicator] Lighting added');
|
||||
|
||||
// Animation loop
|
||||
let rotationAngle = 0;
|
||||
const animateScene = () => {
|
||||
animationFrameRef.current = requestAnimationFrame(animateScene);
|
||||
|
||||
if (animate && mesh) {
|
||||
rotationAngle += 0.01;
|
||||
mesh.rotation.y = Math.sin(rotationAngle) * 0.3;
|
||||
}
|
||||
|
||||
renderer.render(scene, camera);
|
||||
};
|
||||
animateScene();
|
||||
console.log('[CustodianTypeIndicator] Animation loop started');
|
||||
|
||||
// Initial render
|
||||
renderer.render(scene, camera);
|
||||
console.log('[CustodianTypeIndicator] Initial render complete');
|
||||
|
||||
// Cleanup
|
||||
return () => {
|
||||
console.log('[CustodianTypeIndicator] Cleanup triggered');
|
||||
if (animationFrameRef.current) {
|
||||
cancelAnimationFrame(animationFrameRef.current);
|
||||
}
|
||||
if (renderer) {
|
||||
container.removeChild(renderer.domElement);
|
||||
renderer.dispose();
|
||||
}
|
||||
geometry.dispose();
|
||||
material.dispose();
|
||||
letterTexture.dispose();
|
||||
spriteMaterial.dispose();
|
||||
};
|
||||
}, [primaryConfig, displayLetters, size, animate, types]);
|
||||
|
||||
if (!primaryConfig) {
|
||||
console.warn('[CustodianTypeIndicator] Rendering null - no primaryConfig for types:', types);
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log('[CustodianTypeIndicator] Rendering container for types:', types, 'with size:', size);
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
className={`custodian-type-indicator ${className}`}
|
||||
title={showTooltip ? tooltipText : undefined}
|
||||
style={{
|
||||
width: size,
|
||||
height: size,
|
||||
display: 'inline-flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
cursor: showTooltip ? 'help' : 'default',
|
||||
}}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Simplified 2D Badge version (CSS-based, no Three.js)
|
||||
* For use in lists or where 3D is overkill
|
||||
*/
|
||||
export interface CustodianTypeBadgeProps {
|
||||
/** Array of custodian type codes */
|
||||
types: CustodianTypeCode[];
|
||||
/** Size variant */
|
||||
size?: 'small' | 'medium' | 'large';
|
||||
/** Show label text */
|
||||
showLabel?: boolean;
|
||||
/** Custom CSS class */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export const CustodianTypeBadge: React.FC<CustodianTypeBadgeProps> = ({
|
||||
types,
|
||||
size = 'medium',
|
||||
showLabel = false,
|
||||
className = '',
|
||||
}) => {
|
||||
const { language } = useLanguage();
|
||||
|
||||
const typeConfigs = useMemo(() => {
|
||||
return types
|
||||
.map(code => getCustodianTypeByCode(code))
|
||||
.filter((config): config is NonNullable<typeof config> => config !== undefined);
|
||||
}, [types]);
|
||||
|
||||
if (typeConfigs.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const primaryConfig = typeConfigs[0];
|
||||
|
||||
const sizeClasses = {
|
||||
small: { fontSize: '10px', padding: '2px 4px', minWidth: '16px' },
|
||||
medium: { fontSize: '12px', padding: '3px 6px', minWidth: '20px' },
|
||||
large: { fontSize: '14px', padding: '4px 8px', minWidth: '24px' },
|
||||
};
|
||||
|
||||
const displayLetters = types.length <= 3
|
||||
? types.join('')
|
||||
: types.slice(0, 2).join('') + '+';
|
||||
|
||||
return (
|
||||
<span
|
||||
className={`custodian-type-badge ${className}`}
|
||||
title={typeConfigs.map(c => c.label[language]).join(', ')}
|
||||
style={{
|
||||
display: 'inline-flex',
|
||||
alignItems: 'center',
|
||||
gap: '4px',
|
||||
backgroundColor: primaryConfig.bgColor,
|
||||
color: primaryConfig.textColor,
|
||||
border: `1px solid ${primaryConfig.borderColor}`,
|
||||
borderRadius: '4px',
|
||||
fontWeight: 600,
|
||||
fontFamily: 'system-ui, -apple-system, sans-serif',
|
||||
whiteSpace: 'nowrap',
|
||||
...sizeClasses[size],
|
||||
}}
|
||||
>
|
||||
<span
|
||||
style={{
|
||||
backgroundColor: primaryConfig.color,
|
||||
color: '#ffffff',
|
||||
borderRadius: '2px',
|
||||
padding: '1px 3px',
|
||||
fontSize: 'inherit',
|
||||
lineHeight: 1,
|
||||
}}
|
||||
>
|
||||
{displayLetters}
|
||||
</span>
|
||||
{showLabel && (
|
||||
<span style={{ color: primaryConfig.textColor }}>
|
||||
{primaryConfig.label[language]}
|
||||
</span>
|
||||
)}
|
||||
</span>
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Multi-type indicator showing all types in a row
|
||||
*/
|
||||
export interface CustodianTypeRowProps {
|
||||
/** Array of custodian type codes */
|
||||
types: CustodianTypeCode[];
|
||||
/** Maximum types to show before collapsing */
|
||||
maxVisible?: number;
|
||||
/** Size variant */
|
||||
size?: 'small' | 'medium' | 'large';
|
||||
/** Custom CSS class */
|
||||
className?: string;
|
||||
}
|
||||
|
||||
export const CustodianTypeRow: React.FC<CustodianTypeRowProps> = ({
|
||||
types,
|
||||
maxVisible = 5,
|
||||
size = 'small',
|
||||
className = '',
|
||||
}) => {
|
||||
const { language } = useLanguage();
|
||||
|
||||
const visibleTypes = types.slice(0, maxVisible);
|
||||
const hiddenCount = types.length - maxVisible;
|
||||
|
||||
const sizeStyles = {
|
||||
small: { width: '16px', height: '16px', fontSize: '10px' },
|
||||
medium: { width: '20px', height: '20px', fontSize: '12px' },
|
||||
large: { width: '24px', height: '24px', fontSize: '14px' },
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`custodian-type-row ${className}`}
|
||||
style={{
|
||||
display: 'inline-flex',
|
||||
alignItems: 'center',
|
||||
gap: '2px',
|
||||
}}
|
||||
>
|
||||
{visibleTypes.map(code => {
|
||||
const config = getCustodianTypeByCode(code);
|
||||
if (!config) return null;
|
||||
|
||||
return (
|
||||
<span
|
||||
key={code}
|
||||
title={config.label[language]}
|
||||
style={{
|
||||
display: 'inline-flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
backgroundColor: config.color,
|
||||
color: '#ffffff',
|
||||
borderRadius: '3px',
|
||||
fontWeight: 700,
|
||||
fontFamily: 'system-ui, -apple-system, sans-serif',
|
||||
...sizeStyles[size],
|
||||
}}
|
||||
>
|
||||
{code}
|
||||
</span>
|
||||
);
|
||||
})}
|
||||
{hiddenCount > 0 && (
|
||||
<span
|
||||
style={{
|
||||
display: 'inline-flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
backgroundColor: '#94a3b8',
|
||||
color: '#ffffff',
|
||||
borderRadius: '3px',
|
||||
fontWeight: 600,
|
||||
fontSize: sizeStyles[size].fontSize,
|
||||
padding: '0 3px',
|
||||
height: sizeStyles[size].height,
|
||||
}}
|
||||
title={`+${hiddenCount} more`}
|
||||
>
|
||||
+{hiddenCount}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default CustodianTypeIndicator;
|
||||
|
|
@ -75,6 +75,7 @@ export const translations = {
|
|||
map: { nl: 'Kaart', en: 'Map' },
|
||||
stats: { nl: 'Statistieken', en: 'Stats' },
|
||||
overview: { nl: 'Overzicht', en: 'Overview' },
|
||||
gesprek: { nl: 'Gesprek', en: 'Chat' },
|
||||
settings: { nl: 'Instellingen', en: 'Settings' },
|
||||
signOut: { nl: 'Uitloggen', en: 'Sign Out' },
|
||||
},
|
||||
|
|
|
|||
657
frontend/src/hooks/useMultiDatabaseRAG.ts
Normal file
657
frontend/src/hooks/useMultiDatabaseRAG.ts
Normal file
|
|
@ -0,0 +1,657 @@
|
|||
/**
|
||||
* useMultiDatabaseRAG.ts - Multi-Database RAG (Retrieval-Augmented Generation) Hook
|
||||
*
|
||||
* Orchestrates queries across multiple databases for conversational AI:
|
||||
* - Qdrant: Vector similarity search for semantic retrieval
|
||||
* - Oxigraph: SPARQL queries for structured RDF data
|
||||
* - TypeDB: TypeQL queries for knowledge graph traversal
|
||||
*
|
||||
* Based on DSPy RAG patterns for heritage institution conversations.
|
||||
* Self-hosted infrastructure - no external API keys required.
|
||||
*
|
||||
* @see https://dspy.ai/
|
||||
*/
|
||||
|
||||
import { useState, useCallback } from 'react';
|
||||
import type { QdrantSearchResult } from './useQdrant';
|
||||
|
||||
// Configuration - all services use Caddy proxy paths
|
||||
const API_BASE = ''; // Relative URLs via Caddy proxy
|
||||
const QDRANT_URL = '/qdrant';
|
||||
const SPARQL_URL = '/sparql';
|
||||
const TYPEDB_URL = '/api/typedb';
|
||||
const DSPY_URL = '/api/dspy';
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export interface RAGContext {
|
||||
qdrantResults: QdrantSearchResult[];
|
||||
sparqlResults: Record<string, unknown>[];
|
||||
typedbResults: Record<string, unknown>[];
|
||||
totalRetrieved: number;
|
||||
}
|
||||
|
||||
export interface RAGResponse {
|
||||
answer: string;
|
||||
sparqlQuery?: string;
|
||||
typeqlQuery?: string;
|
||||
context: RAGContext;
|
||||
visualizationType?: VisualizationType;
|
||||
visualizationData?: VisualizationData;
|
||||
sources: RAGSource[];
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
export interface RAGSource {
|
||||
database: 'qdrant' | 'oxigraph' | 'typedb';
|
||||
id: string;
|
||||
name?: string;
|
||||
score?: number;
|
||||
snippet?: string;
|
||||
}
|
||||
|
||||
export type VisualizationType =
|
||||
| 'none'
|
||||
| 'map' // Geographic visualization
|
||||
| 'timeline' // Temporal visualization
|
||||
| 'network' // Graph/relationship visualization
|
||||
| 'chart' // Bar/line charts
|
||||
| 'table' // Tabular data
|
||||
| 'card' // Institution cards
|
||||
| 'gallery'; // Image gallery
|
||||
|
||||
export interface VisualizationData {
|
||||
type: VisualizationType;
|
||||
institutions?: InstitutionData[];
|
||||
coordinates?: GeoCoordinate[];
|
||||
timeline?: TimelineEvent[];
|
||||
graphData?: GraphVisualizationData;
|
||||
chartData?: ChartData;
|
||||
}
|
||||
|
||||
export interface InstitutionData {
|
||||
id: string;
|
||||
name: string;
|
||||
type?: string;
|
||||
city?: string;
|
||||
province?: string;
|
||||
country?: string;
|
||||
latitude?: number;
|
||||
longitude?: number;
|
||||
description?: string;
|
||||
website?: string;
|
||||
isil?: string;
|
||||
wikidata?: string;
|
||||
rating?: number;
|
||||
reviews?: number;
|
||||
photoCount?: number;
|
||||
}
|
||||
|
||||
export interface GeoCoordinate {
|
||||
lat: number;
|
||||
lng: number;
|
||||
label: string;
|
||||
type?: string;
|
||||
data?: InstitutionData;
|
||||
}
|
||||
|
||||
export interface TimelineEvent {
|
||||
date: string;
|
||||
label: string;
|
||||
description?: string;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
export interface GraphVisualizationData {
|
||||
nodes: Array<{
|
||||
id: string;
|
||||
label: string;
|
||||
type: string;
|
||||
attributes?: Record<string, unknown>;
|
||||
}>;
|
||||
edges: Array<{
|
||||
id: string;
|
||||
source: string;
|
||||
target: string;
|
||||
label: string;
|
||||
type?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface ChartData {
|
||||
labels: string[];
|
||||
datasets: Array<{
|
||||
label: string;
|
||||
data: number[];
|
||||
backgroundColor?: string | string[];
|
||||
borderColor?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface ConversationMessage {
|
||||
id: string;
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string;
|
||||
timestamp: Date;
|
||||
response?: RAGResponse;
|
||||
isLoading?: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface UseMultiDatabaseRAGReturn {
|
||||
// State
|
||||
isLoading: boolean;
|
||||
error: Error | null;
|
||||
lastContext: RAGContext | null;
|
||||
|
||||
// Core RAG function
|
||||
queryRAG: (
|
||||
question: string,
|
||||
options?: RAGOptions
|
||||
) => Promise<RAGResponse>;
|
||||
|
||||
// Individual database queries (for debugging/advanced use)
|
||||
searchQdrant: (query: string, limit?: number) => Promise<QdrantSearchResult[]>;
|
||||
querySparql: (sparql: string) => Promise<Record<string, unknown>[]>;
|
||||
queryTypeDB: (typeql: string) => Promise<Record<string, unknown>[]>;
|
||||
|
||||
// Utility functions
|
||||
clearContext: () => void;
|
||||
detectVisualizationType: (question: string, results: RAGContext) => VisualizationType;
|
||||
}
|
||||
|
||||
export interface RAGOptions {
|
||||
model?: string;
|
||||
language?: 'nl' | 'en';
|
||||
maxQdrantResults?: number;
|
||||
maxSparqlResults?: number;
|
||||
maxTypeDBResults?: number;
|
||||
includeSparql?: boolean;
|
||||
includeTypeDB?: boolean;
|
||||
conversationHistory?: ConversationMessage[];
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Helper Functions
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Generate text embedding using local embedding service or fallback
|
||||
* In production, this would use a local embedding model (e.g., sentence-transformers)
|
||||
* For now, we'll use keyword-based Qdrant filtering as a fallback
|
||||
*/
|
||||
async function generateEmbedding(text: string): Promise<number[] | null> {
|
||||
try {
|
||||
// Try local embedding service first
|
||||
const response = await fetch(`${API_BASE}/api/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
return data.embedding;
|
||||
}
|
||||
} catch {
|
||||
// Fallback: return null to use keyword search
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search Qdrant using vector similarity or keyword filter
|
||||
*/
|
||||
async function qdrantSearch(
|
||||
query: string,
|
||||
limit: number = 10
|
||||
): Promise<QdrantSearchResult[]> {
|
||||
const collectionName = 'heritage_custodians';
|
||||
|
||||
// Try to get embedding for semantic search
|
||||
const embedding = await generateEmbedding(query);
|
||||
|
||||
if (embedding) {
|
||||
// Vector similarity search
|
||||
const response = await fetch(`${QDRANT_URL}/collections/${collectionName}/points/search`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
vector: embedding,
|
||||
limit,
|
||||
with_payload: true,
|
||||
}),
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
return data.result || [];
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Scroll through points with keyword filter
|
||||
// Extract keywords from query for filtering
|
||||
const keywords = query.toLowerCase().split(/\s+/).filter(w => w.length > 2);
|
||||
|
||||
const response = await fetch(`${QDRANT_URL}/collections/${collectionName}/points/scroll`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
limit: limit * 2, // Get more to filter
|
||||
with_payload: true,
|
||||
with_vector: false,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Qdrant scroll failed: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const points = data.result?.points || [];
|
||||
|
||||
// Simple keyword matching in payload
|
||||
const scored = points.map((p: { id: string | number; payload: Record<string, unknown> }) => {
|
||||
const payload = p.payload || {};
|
||||
const text = JSON.stringify(payload).toLowerCase();
|
||||
const matches = keywords.filter(k => text.includes(k)).length;
|
||||
return {
|
||||
id: p.id,
|
||||
score: matches / Math.max(keywords.length, 1),
|
||||
payload,
|
||||
};
|
||||
});
|
||||
|
||||
// Sort by score and return top results
|
||||
return scored
|
||||
.filter((p: { score: number }) => p.score > 0)
|
||||
.sort((a: { score: number }, b: { score: number }) => b.score - a.score)
|
||||
.slice(0, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute SPARQL query against Oxigraph
|
||||
*/
|
||||
async function sparqlQuery(query: string): Promise<Record<string, unknown>[]> {
|
||||
const response = await fetch(`${SPARQL_URL}/query`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/sparql-query',
|
||||
'Accept': 'application/sparql-results+json',
|
||||
},
|
||||
body: query,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`SPARQL query failed: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.results?.bindings || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute TypeQL query against TypeDB
|
||||
*/
|
||||
async function typedbQuery(query: string): Promise<Record<string, unknown>[]> {
|
||||
const response = await fetch(`${TYPEDB_URL}/query`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query, queryType: 'read' }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`TypeDB query failed: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.results || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Call DSPy backend to generate queries and response
|
||||
*/
|
||||
async function callDSPy(
|
||||
question: string,
|
||||
context: RAGContext,
|
||||
options: RAGOptions
|
||||
): Promise<{
|
||||
answer: string;
|
||||
sparqlQuery?: string;
|
||||
typeqlQuery?: string;
|
||||
visualizationType?: VisualizationType;
|
||||
confidence: number;
|
||||
}> {
|
||||
const response = await fetch(`${DSPY_URL}/rag-query`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
question,
|
||||
context: {
|
||||
qdrant_results: context.qdrantResults.slice(0, 5),
|
||||
sparql_results: context.sparqlResults.slice(0, 10),
|
||||
typedb_results: context.typedbResults.slice(0, 10),
|
||||
},
|
||||
language: options.language || 'nl',
|
||||
model: options.model || 'claude-sonnet-4-5-20250929',
|
||||
conversation_history: options.conversationHistory?.slice(-4).map(m => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
})),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
// Fallback response if DSPy service unavailable
|
||||
return {
|
||||
answer: generateFallbackAnswer(question, context, options.language || 'nl'),
|
||||
confidence: 0.5,
|
||||
};
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a fallback answer when DSPy service is unavailable
|
||||
*/
|
||||
function generateFallbackAnswer(
|
||||
_question: string,
|
||||
context: RAGContext,
|
||||
language: 'nl' | 'en'
|
||||
): string {
|
||||
const count = context.totalRetrieved;
|
||||
|
||||
if (count === 0) {
|
||||
return language === 'nl'
|
||||
? 'Geen resultaten gevonden voor uw vraag.'
|
||||
: 'No results found for your question.';
|
||||
}
|
||||
|
||||
const institutions = context.qdrantResults.slice(0, 5).map(r => {
|
||||
const name = r.payload?.name || r.payload?.custodian_name || 'Unknown';
|
||||
return name;
|
||||
});
|
||||
|
||||
if (language === 'nl') {
|
||||
return `Ik heb ${count} resultaten gevonden. Enkele relevante instellingen: ${institutions.join(', ')}.`;
|
||||
}
|
||||
return `I found ${count} results. Some relevant institutions: ${institutions.join(', ')}.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect appropriate visualization type based on question and results
|
||||
*/
|
||||
function detectVisualizationType(
|
||||
question: string,
|
||||
context: RAGContext
|
||||
): VisualizationType {
|
||||
const q = question.toLowerCase();
|
||||
|
||||
// Map visualization keywords
|
||||
if (q.includes('kaart') || q.includes('map') || q.includes('waar') ||
|
||||
q.includes('where') || q.includes('locatie') || q.includes('location') ||
|
||||
q.includes('provincie') || q.includes('province') || q.includes('stad') ||
|
||||
q.includes('city') || q.includes('geografisch') || q.includes('geographic')) {
|
||||
return 'map';
|
||||
}
|
||||
|
||||
// Timeline keywords
|
||||
if (q.includes('wanneer') || q.includes('when') || q.includes('geschiedenis') ||
|
||||
q.includes('history') || q.includes('tijdlijn') || q.includes('timeline') ||
|
||||
q.includes('opgericht') || q.includes('founded') || q.includes('jaar') ||
|
||||
q.includes('year')) {
|
||||
return 'timeline';
|
||||
}
|
||||
|
||||
// Network/graph keywords
|
||||
if (q.includes('relatie') || q.includes('relationship') || q.includes('verbinding') ||
|
||||
q.includes('connection') || q.includes('netwerk') || q.includes('network') ||
|
||||
q.includes('samenwer') || q.includes('collaborat')) {
|
||||
return 'network';
|
||||
}
|
||||
|
||||
// Chart keywords
|
||||
if (q.includes('hoeveel') || q.includes('how many') || q.includes('aantal') ||
|
||||
q.includes('count') || q.includes('statistiek') || q.includes('statistic') ||
|
||||
q.includes('verdeling') || q.includes('distribution') || q.includes('vergelijk') ||
|
||||
q.includes('compare')) {
|
||||
return 'chart';
|
||||
}
|
||||
|
||||
// If we have location data, show map
|
||||
const hasCoordinates = context.qdrantResults.some(r =>
|
||||
r.payload?.latitude || r.payload?.coordinates
|
||||
);
|
||||
if (hasCoordinates && context.totalRetrieved > 0) {
|
||||
return 'map';
|
||||
}
|
||||
|
||||
// Default to cards for institution results
|
||||
if (context.qdrantResults.length > 0) {
|
||||
return 'card';
|
||||
}
|
||||
|
||||
return 'table';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract visualization data from RAG context
|
||||
*/
|
||||
function extractVisualizationData(
|
||||
type: VisualizationType,
|
||||
context: RAGContext
|
||||
): VisualizationData {
|
||||
const data: VisualizationData = { type };
|
||||
|
||||
// Extract institution data from Qdrant results
|
||||
data.institutions = context.qdrantResults.map(r => {
|
||||
const p = (r.payload || {}) as Record<string, unknown>;
|
||||
const location = (p.location || {}) as Record<string, unknown>;
|
||||
const coordinates = (p.coordinates || {}) as Record<string, unknown>;
|
||||
return {
|
||||
id: String(r.id),
|
||||
name: String(p.name || p.custodian_name || p.institution_name || 'Unknown'),
|
||||
type: String(p.type || p.institution_type || ''),
|
||||
city: String(p.city || location.city || ''),
|
||||
province: String(p.province || p.region || ''),
|
||||
country: String(p.country || 'NL'),
|
||||
latitude: Number(p.latitude || coordinates.lat || location.latitude),
|
||||
longitude: Number(p.longitude || coordinates.lng || location.longitude),
|
||||
description: String(p.description || ''),
|
||||
website: String(p.website || p.url || ''),
|
||||
isil: String(p.isil || p.isil_code || ''),
|
||||
wikidata: String(p.wikidata || p.wikidata_id || ''),
|
||||
rating: Number(p.rating || p.google_rating || 0),
|
||||
reviews: Number(p.reviews || p.review_count || 0),
|
||||
photoCount: Number(p.photoCount || p.photo_count || 0),
|
||||
};
|
||||
});
|
||||
|
||||
// Extract coordinates for map
|
||||
if (type === 'map') {
|
||||
data.coordinates = data.institutions
|
||||
.filter(i => i.latitude && i.longitude && !isNaN(i.latitude) && !isNaN(i.longitude))
|
||||
.map(i => ({
|
||||
lat: i.latitude!,
|
||||
lng: i.longitude!,
|
||||
label: i.name,
|
||||
type: i.type,
|
||||
data: i,
|
||||
}));
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Hook Implementation
|
||||
// ============================================================================
|
||||
|
||||
export function useMultiDatabaseRAG(): UseMultiDatabaseRAGReturn {
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [error, setError] = useState<Error | null>(null);
|
||||
const [lastContext, setLastContext] = useState<RAGContext | null>(null);
|
||||
|
||||
/**
|
||||
* Main RAG query function - orchestrates multi-database retrieval
|
||||
*/
|
||||
const queryRAG = useCallback(async (
|
||||
question: string,
|
||||
options: RAGOptions = {}
|
||||
): Promise<RAGResponse> => {
|
||||
setIsLoading(true);
|
||||
setError(null);
|
||||
|
||||
const {
|
||||
maxQdrantResults = 20,
|
||||
maxSparqlResults = 50,
|
||||
maxTypeDBResults = 50,
|
||||
includeSparql = true,
|
||||
includeTypeDB = false, // Disabled by default (may not be running)
|
||||
} = options;
|
||||
|
||||
try {
|
||||
// Parallel retrieval from all databases
|
||||
const retrievalPromises: Promise<unknown>[] = [
|
||||
qdrantSearch(question, maxQdrantResults),
|
||||
];
|
||||
|
||||
// Add SPARQL if enabled (construct a basic query from keywords)
|
||||
if (includeSparql) {
|
||||
const keywords = question.split(/\s+/).filter(w => w.length > 2).slice(0, 3);
|
||||
const sparqlSearchQuery = `
|
||||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||
PREFIX schema: <http://schema.org/>
|
||||
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
|
||||
|
||||
SELECT ?s ?label ?type WHERE {
|
||||
?s rdfs:label|schema:name|skos:prefLabel ?label .
|
||||
OPTIONAL { ?s a ?type }
|
||||
FILTER(CONTAINS(LCASE(STR(?label)), "${keywords[0]?.toLowerCase() || ''}"))
|
||||
}
|
||||
LIMIT ${maxSparqlResults}
|
||||
`;
|
||||
retrievalPromises.push(
|
||||
sparqlQuery(sparqlSearchQuery).catch(() => [])
|
||||
);
|
||||
}
|
||||
|
||||
// Add TypeDB if enabled
|
||||
if (includeTypeDB) {
|
||||
const typeqlSearchQuery = `match $x isa heritage_custodian, has name $n; get $x, $n; limit ${maxTypeDBResults};`;
|
||||
retrievalPromises.push(
|
||||
typedbQuery(typeqlSearchQuery).catch(() => [])
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for all retrievals
|
||||
const results = await Promise.all(retrievalPromises);
|
||||
|
||||
const qdrantResults = results[0] as QdrantSearchResult[];
|
||||
const sparqlResults = (includeSparql ? results[1] : []) as Record<string, unknown>[];
|
||||
const typedbResults = (includeTypeDB ? results[2] || results[1] : []) as Record<string, unknown>[];
|
||||
|
||||
const context: RAGContext = {
|
||||
qdrantResults,
|
||||
sparqlResults,
|
||||
typedbResults,
|
||||
totalRetrieved: qdrantResults.length + sparqlResults.length + typedbResults.length,
|
||||
};
|
||||
|
||||
setLastContext(context);
|
||||
|
||||
// Call DSPy to generate response
|
||||
const dspyResponse = await callDSPy(question, context, options);
|
||||
|
||||
// Detect visualization type
|
||||
const vizType = dspyResponse.visualizationType || detectVisualizationType(question, context);
|
||||
|
||||
// Extract visualization data
|
||||
const vizData = extractVisualizationData(vizType, context);
|
||||
|
||||
// Build sources list
|
||||
const sources: RAGSource[] = [
|
||||
...qdrantResults.slice(0, 5).map(r => ({
|
||||
database: 'qdrant' as const,
|
||||
id: String(r.id),
|
||||
name: String(r.payload?.name || r.payload?.custodian_name || ''),
|
||||
score: r.score,
|
||||
snippet: String(r.payload?.description || '').slice(0, 200),
|
||||
})),
|
||||
];
|
||||
|
||||
return {
|
||||
answer: dspyResponse.answer,
|
||||
sparqlQuery: dspyResponse.sparqlQuery,
|
||||
typeqlQuery: dspyResponse.typeqlQuery,
|
||||
context,
|
||||
visualizationType: vizType,
|
||||
visualizationData: vizData,
|
||||
sources,
|
||||
confidence: dspyResponse.confidence,
|
||||
};
|
||||
|
||||
} catch (err) {
|
||||
const error = err instanceof Error ? err : new Error('RAG query failed');
|
||||
setError(error);
|
||||
throw error;
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Direct Qdrant search (for debugging/advanced use)
|
||||
*/
|
||||
const searchQdrant = useCallback(async (
|
||||
query: string,
|
||||
limit: number = 10
|
||||
): Promise<QdrantSearchResult[]> => {
|
||||
return qdrantSearch(query, limit);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Direct SPARQL query (for debugging/advanced use)
|
||||
*/
|
||||
const querySparql = useCallback(async (
|
||||
sparql: string
|
||||
): Promise<Record<string, unknown>[]> => {
|
||||
return sparqlQuery(sparql);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Direct TypeDB query (for debugging/advanced use)
|
||||
*/
|
||||
const queryTypeDB = useCallback(async (
|
||||
typeql: string
|
||||
): Promise<Record<string, unknown>[]> => {
|
||||
return typedbQuery(typeql);
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Clear cached context
|
||||
*/
|
||||
const clearContext = useCallback(() => {
|
||||
setLastContext(null);
|
||||
setError(null);
|
||||
}, []);
|
||||
|
||||
return {
|
||||
isLoading,
|
||||
error,
|
||||
lastContext,
|
||||
queryRAG,
|
||||
searchQdrant,
|
||||
querySparql,
|
||||
queryTypeDB,
|
||||
clearContext,
|
||||
detectVisualizationType,
|
||||
};
|
||||
}
|
||||
|
||||
export default useMultiDatabaseRAG;
|
||||
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
import { useState, useEffect, useCallback, useRef } from 'react';
|
||||
import maplibregl from 'maplibre-gl';
|
||||
import type { GeoJSONSource, LngLatLike } from 'maplibre-gl';
|
||||
import type {
|
||||
Archive,
|
||||
WerkgebiedMapping,
|
||||
|
|
@ -375,7 +376,7 @@ export function useWerkgebiedMapLibre(map: maplibregl.Map | null): WerkgebiedHoo
|
|||
const hideWerkgebied = useCallback(() => {
|
||||
if (!map) return;
|
||||
|
||||
const source = map.getSource(WERKGEBIED_SOURCE_ID) as maplibregl.GeoJSONSource | undefined;
|
||||
const source = map.getSource(WERKGEBIED_SOURCE_ID) as GeoJSONSource | undefined;
|
||||
if (source) {
|
||||
source.setData({
|
||||
type: 'FeatureCollection',
|
||||
|
|
@ -486,7 +487,7 @@ export function useWerkgebiedMapLibre(map: maplibregl.Map | null): WerkgebiedHoo
|
|||
}
|
||||
|
||||
// Update source data
|
||||
const source = map.getSource(WERKGEBIED_SOURCE_ID) as maplibregl.GeoJSONSource | undefined;
|
||||
const source = map.getSource(WERKGEBIED_SOURCE_ID) as GeoJSONSource | undefined;
|
||||
if (source) {
|
||||
source.setData({
|
||||
type: 'FeatureCollection',
|
||||
|
|
@ -509,12 +510,12 @@ export function useWerkgebiedMapLibre(map: maplibregl.Map | null): WerkgebiedHoo
|
|||
const geometry = feature.geometry;
|
||||
if (geometry.type === 'Polygon') {
|
||||
geometry.coordinates[0].forEach((coord: number[]) => {
|
||||
bounds.extend([coord[0], coord[1]] as maplibregl.LngLatLike);
|
||||
bounds.extend([coord[0], coord[1]] as LngLatLike);
|
||||
});
|
||||
} else if (geometry.type === 'MultiPolygon') {
|
||||
geometry.coordinates.forEach((polygon: number[][][]) => {
|
||||
polygon[0].forEach((coord: number[]) => {
|
||||
bounds.extend([coord[0], coord[1]] as maplibregl.LngLatLike);
|
||||
bounds.extend([coord[0], coord[1]] as LngLatLike);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
@ -843,7 +844,7 @@ export function useWerkgebiedMapLibre(map: maplibregl.Map | null): WerkgebiedHoo
|
|||
}
|
||||
|
||||
// Update source
|
||||
const source = map.getSource(WERKGEBIED_SOURCE_ID) as maplibregl.GeoJSONSource | undefined;
|
||||
const source = map.getSource(WERKGEBIED_SOURCE_ID) as GeoJSONSource | undefined;
|
||||
if (source) {
|
||||
source.setData({
|
||||
type: 'FeatureCollection',
|
||||
|
|
@ -864,12 +865,12 @@ export function useWerkgebiedMapLibre(map: maplibregl.Map | null): WerkgebiedHoo
|
|||
const geometry = feature.geometry;
|
||||
if (geometry.type === 'Polygon') {
|
||||
(geometry.coordinates[0] as number[][]).forEach((coord) => {
|
||||
bounds.extend([coord[0], coord[1]] as maplibregl.LngLatLike);
|
||||
bounds.extend([coord[0], coord[1]] as LngLatLike);
|
||||
});
|
||||
} else if (geometry.type === 'MultiPolygon') {
|
||||
(geometry.coordinates as number[][][][]).forEach((polygon) => {
|
||||
(polygon[0] as number[][]).forEach((coord) => {
|
||||
bounds.extend([coord[0], coord[1]] as maplibregl.LngLatLike);
|
||||
bounds.extend([coord[0], coord[1]] as LngLatLike);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
|
|||
386
frontend/src/lib/custodian-types.ts
Normal file
386
frontend/src/lib/custodian-types.ts
Normal file
|
|
@ -0,0 +1,386 @@
|
|||
/**
|
||||
* GLAMORCUBESFIXPHDNT Taxonomy - Heritage Custodian Type Configuration
|
||||
*
|
||||
* This module provides centralized color, label, and metadata configuration
|
||||
* for the 19-type GLAMORCUBESFIXPHDNT heritage custodian taxonomy.
|
||||
*
|
||||
* Mnemonic: Galleries, Libraries, Archives, Museums, Official institutions,
|
||||
* Research centers, Corporations, Unknown, Botanical gardens/zoos,
|
||||
* Education providers, Societies, Features, Intangible heritage groups,
|
||||
* miXed, Personal collections, Holy sites, Digital platforms, NGOs,
|
||||
* Taste/smell heritage
|
||||
*
|
||||
* @see AGENTS.md - Institution Type Taxonomy section
|
||||
* @see schemas/20251121/linkml/modules/enums/CustodianTypeEnum.yaml
|
||||
*/
|
||||
|
||||
/**
|
||||
* Single-letter codes for each custodian type (used in GHCID generation)
|
||||
*/
|
||||
export type CustodianTypeCode =
|
||||
| 'G' | 'L' | 'A' | 'M' | 'O' | 'R' | 'C' | 'U' | 'B' | 'E'
|
||||
| 'S' | 'F' | 'I' | 'X' | 'P' | 'H' | 'D' | 'N' | 'T';
|
||||
|
||||
/**
|
||||
* Full custodian type names (matches LinkML enum values)
|
||||
*/
|
||||
export type CustodianType =
|
||||
| 'GALLERY' | 'LIBRARY' | 'ARCHIVE' | 'MUSEUM' | 'OFFICIAL_INSTITUTION'
|
||||
| 'RESEARCH_CENTER' | 'CORPORATION' | 'UNKNOWN' | 'BOTANICAL_ZOO'
|
||||
| 'EDUCATION_PROVIDER' | 'COLLECTING_SOCIETY' | 'FEATURES'
|
||||
| 'INTANGIBLE_HERITAGE_GROUP' | 'MIXED' | 'PERSONAL_COLLECTION'
|
||||
| 'HOLY_SITES' | 'DIGITAL_PLATFORM' | 'NGO' | 'TASTE_SMELL';
|
||||
|
||||
/**
|
||||
* Bilingual labels for each custodian type
|
||||
*/
|
||||
export interface BilingualLabel {
|
||||
nl: string;
|
||||
en: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete configuration for a single custodian type
|
||||
*/
|
||||
export interface CustodianTypeConfig {
|
||||
/** Single-letter code for GHCID */
|
||||
code: CustodianTypeCode;
|
||||
/** Full enum name (LinkML) */
|
||||
name: CustodianType;
|
||||
/** Primary color (hex) - used for map markers, badges, etc. */
|
||||
color: string;
|
||||
/** Light background color (hex) - used for cards, highlights */
|
||||
bgColor: string;
|
||||
/** Border/accent color (hex) - used for outlines */
|
||||
borderColor: string;
|
||||
/** Text color for high contrast on bgColor */
|
||||
textColor: string;
|
||||
/** Bilingual display labels */
|
||||
label: BilingualLabel;
|
||||
/** Short description */
|
||||
description: BilingualLabel;
|
||||
/** Icon name (Lucide React) */
|
||||
icon: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete GLAMORCUBESFIXPHDNT taxonomy configuration
|
||||
*
|
||||
* Colors are designed to be:
|
||||
* - Distinguishable from each other
|
||||
* - Colorblind-friendly where possible
|
||||
* - Consistent with the existing map page colors
|
||||
* - Suitable for both light and dark modes (primary colors work on both)
|
||||
*/
|
||||
export const CUSTODIAN_TYPES: Record<CustodianTypeCode, CustodianTypeConfig> = {
|
||||
G: {
|
||||
code: 'G',
|
||||
name: 'GALLERY',
|
||||
color: '#00bcd4', // Cyan
|
||||
bgColor: '#e0f7fa',
|
||||
borderColor: '#0097a7',
|
||||
textColor: '#006064',
|
||||
label: { nl: 'Galerie', en: 'Gallery' },
|
||||
description: { nl: 'Kunstgalerij of tentoonstellingsruimte', en: 'Art gallery or exhibition space' },
|
||||
icon: 'Frame',
|
||||
},
|
||||
L: {
|
||||
code: 'L',
|
||||
name: 'LIBRARY',
|
||||
color: '#2ecc71', // Green
|
||||
bgColor: '#e8f5e9',
|
||||
borderColor: '#27ae60',
|
||||
textColor: '#1b5e20',
|
||||
label: { nl: 'Bibliotheek', en: 'Library' },
|
||||
description: { nl: 'Openbare, academische of gespecialiseerde bibliotheek', en: 'Public, academic, or specialized library' },
|
||||
icon: 'BookOpen',
|
||||
},
|
||||
A: {
|
||||
code: 'A',
|
||||
name: 'ARCHIVE',
|
||||
color: '#3498db', // Blue
|
||||
bgColor: '#e3f2fd',
|
||||
borderColor: '#2980b9',
|
||||
textColor: '#0d47a1',
|
||||
label: { nl: 'Archief', en: 'Archive' },
|
||||
description: { nl: 'Overheids-, bedrijfs- of persoonlijk archief', en: 'Government, corporate, or personal archive' },
|
||||
icon: 'Archive',
|
||||
},
|
||||
M: {
|
||||
code: 'M',
|
||||
name: 'MUSEUM',
|
||||
color: '#e74c3c', // Red
|
||||
bgColor: '#ffebee',
|
||||
borderColor: '#c0392b',
|
||||
textColor: '#b71c1c',
|
||||
label: { nl: 'Museum', en: 'Museum' },
|
||||
description: { nl: 'Kunst-, geschiedenis- of wetenschapsmuseum', en: 'Art, history, or science museum' },
|
||||
icon: 'Building2',
|
||||
},
|
||||
O: {
|
||||
code: 'O',
|
||||
name: 'OFFICIAL_INSTITUTION',
|
||||
color: '#f39c12', // Orange
|
||||
bgColor: '#fff8e1',
|
||||
borderColor: '#e67e22',
|
||||
textColor: '#e65100',
|
||||
label: { nl: 'Officieel', en: 'Official' },
|
||||
description: { nl: 'Overheidserfgoedinstantie of -platform', en: 'Government heritage agency or platform' },
|
||||
icon: 'Landmark',
|
||||
},
|
||||
R: {
|
||||
code: 'R',
|
||||
name: 'RESEARCH_CENTER',
|
||||
color: '#1abc9c', // Teal
|
||||
bgColor: '#e0f2f1',
|
||||
borderColor: '#16a085',
|
||||
textColor: '#004d40',
|
||||
label: { nl: 'Onderzoek', en: 'Research' },
|
||||
description: { nl: 'Onderzoeksinstituut of documentatiecentrum', en: 'Research institute or documentation center' },
|
||||
icon: 'Search',
|
||||
},
|
||||
C: {
|
||||
code: 'C',
|
||||
name: 'CORPORATION',
|
||||
color: '#795548', // Brown
|
||||
bgColor: '#efebe9',
|
||||
borderColor: '#5d4037',
|
||||
textColor: '#3e2723',
|
||||
label: { nl: 'Bedrijf', en: 'Corporation' },
|
||||
description: { nl: 'Bedrijfserfgoedcollectie', en: 'Corporate heritage collection' },
|
||||
icon: 'Building',
|
||||
},
|
||||
U: {
|
||||
code: 'U',
|
||||
name: 'UNKNOWN',
|
||||
color: '#9e9e9e', // Gray
|
||||
bgColor: '#f5f5f5',
|
||||
borderColor: '#757575',
|
||||
textColor: '#424242',
|
||||
label: { nl: 'Onbekend', en: 'Unknown' },
|
||||
description: { nl: 'Type kan niet worden bepaald', en: 'Type cannot be determined' },
|
||||
icon: 'HelpCircle',
|
||||
},
|
||||
B: {
|
||||
code: 'B',
|
||||
name: 'BOTANICAL_ZOO',
|
||||
color: '#4caf50', // Green (different shade)
|
||||
bgColor: '#e8f5e9',
|
||||
borderColor: '#388e3c',
|
||||
textColor: '#1b5e20',
|
||||
label: { nl: 'Botanisch', en: 'Botanical' },
|
||||
description: { nl: 'Botanische tuin of dierentuin', en: 'Botanical garden or zoo' },
|
||||
icon: 'Leaf',
|
||||
},
|
||||
E: {
|
||||
code: 'E',
|
||||
name: 'EDUCATION_PROVIDER',
|
||||
color: '#ff9800', // Amber
|
||||
bgColor: '#fff3e0',
|
||||
borderColor: '#f57c00',
|
||||
textColor: '#e65100',
|
||||
label: { nl: 'Onderwijs', en: 'Education' },
|
||||
description: { nl: 'Onderwijsinstelling met collecties', en: 'Educational institution with collections' },
|
||||
icon: 'GraduationCap',
|
||||
},
|
||||
S: {
|
||||
code: 'S',
|
||||
name: 'COLLECTING_SOCIETY',
|
||||
color: '#9b59b6', // Purple
|
||||
bgColor: '#f3e5f5',
|
||||
borderColor: '#8e24aa',
|
||||
textColor: '#4a148c',
|
||||
label: { nl: 'Vereniging', en: 'Society' },
|
||||
description: { nl: 'Vereniging die gespecialiseerde materialen verzamelt', en: 'Society collecting specialized materials' },
|
||||
icon: 'Users',
|
||||
},
|
||||
F: {
|
||||
code: 'F',
|
||||
name: 'FEATURES',
|
||||
color: '#95a5a6', // Gray-green
|
||||
bgColor: '#eceff1',
|
||||
borderColor: '#78909c',
|
||||
textColor: '#37474f',
|
||||
label: { nl: 'Monumenten', en: 'Features' },
|
||||
description: { nl: 'Fysieke landschapskenmerken met erfgoedwaarde', en: 'Physical landscape features with heritage significance' },
|
||||
icon: 'Map',
|
||||
},
|
||||
I: {
|
||||
code: 'I',
|
||||
name: 'INTANGIBLE_HERITAGE_GROUP',
|
||||
color: '#673ab7', // Deep purple
|
||||
bgColor: '#ede7f6',
|
||||
borderColor: '#5e35b1',
|
||||
textColor: '#311b92',
|
||||
label: { nl: 'Immaterieel', en: 'Intangible' },
|
||||
description: { nl: 'Organisatie die immaterieel erfgoed bewaart', en: 'Organization preserving intangible heritage' },
|
||||
icon: 'Music',
|
||||
},
|
||||
X: {
|
||||
code: 'X',
|
||||
name: 'MIXED',
|
||||
color: '#607d8b', // Blue-gray
|
||||
bgColor: '#eceff1',
|
||||
borderColor: '#546e7a',
|
||||
textColor: '#263238',
|
||||
label: { nl: 'Gemengd', en: 'Mixed' },
|
||||
description: { nl: 'Meerdere types (gecombineerde faciliteit)', en: 'Multiple types (combined facility)' },
|
||||
icon: 'Layers',
|
||||
},
|
||||
P: {
|
||||
code: 'P',
|
||||
name: 'PERSONAL_COLLECTION',
|
||||
color: '#8bc34a', // Light green
|
||||
bgColor: '#f1f8e9',
|
||||
borderColor: '#689f38',
|
||||
textColor: '#33691e',
|
||||
label: { nl: 'Persoonlijk', en: 'Personal' },
|
||||
description: { nl: 'Privé persoonlijke collectie', en: 'Private personal collection' },
|
||||
icon: 'User',
|
||||
},
|
||||
H: {
|
||||
code: 'H',
|
||||
name: 'HOLY_SITES',
|
||||
color: '#607d8b', // Blue-gray (same as Mixed - consider changing)
|
||||
bgColor: '#fce4ec',
|
||||
borderColor: '#c2185b',
|
||||
textColor: '#880e4f',
|
||||
label: { nl: 'Heilige plaatsen', en: 'Holy sites' },
|
||||
description: { nl: 'Religieuze erfgoedlocaties en -instellingen', en: 'Religious heritage sites and institutions' },
|
||||
icon: 'Church',
|
||||
},
|
||||
D: {
|
||||
code: 'D',
|
||||
name: 'DIGITAL_PLATFORM',
|
||||
color: '#34495e', // Dark gray-blue
|
||||
bgColor: '#e8eaf6',
|
||||
borderColor: '#3949ab',
|
||||
textColor: '#1a237e',
|
||||
label: { nl: 'Digitaal', en: 'Digital' },
|
||||
description: { nl: 'Digitale erfgoedplatforms en repositories', en: 'Digital heritage platforms and repositories' },
|
||||
icon: 'Monitor',
|
||||
},
|
||||
N: {
|
||||
code: 'N',
|
||||
name: 'NGO',
|
||||
color: '#e91e63', // Pink
|
||||
bgColor: '#fce4ec',
|
||||
borderColor: '#c2185b',
|
||||
textColor: '#880e4f',
|
||||
label: { nl: 'NGO', en: 'NGO' },
|
||||
description: { nl: 'Niet-gouvernementele erfgoedorganisatie', en: 'Non-governmental heritage organization' },
|
||||
icon: 'Heart',
|
||||
},
|
||||
T: {
|
||||
code: 'T',
|
||||
name: 'TASTE_SMELL',
|
||||
color: '#ff5722', // Deep orange
|
||||
bgColor: '#fbe9e7',
|
||||
borderColor: '#e64a19',
|
||||
textColor: '#bf360c',
|
||||
label: { nl: 'Smaak/geur', en: 'Taste/smell' },
|
||||
description: { nl: 'Culinair en olfactorisch erfgoedinstelling', en: 'Culinary and olfactory heritage institution' },
|
||||
icon: 'ChefHat',
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Get custodian type configuration by single-letter code
|
||||
* Returns undefined if code is not valid
|
||||
*/
|
||||
export function getCustodianTypeByCode(code: CustodianTypeCode | string): CustodianTypeConfig | undefined {
|
||||
if (!code || typeof code !== 'string') return undefined;
|
||||
const upperCode = code.toUpperCase();
|
||||
if (upperCode in CUSTODIAN_TYPES) {
|
||||
return CUSTODIAN_TYPES[upperCode as CustodianTypeCode];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get custodian type configuration by single-letter code, with fallback to UNKNOWN
|
||||
* Always returns a valid config (never undefined)
|
||||
*/
|
||||
export function getCustodianTypeByCodeSafe(code: CustodianTypeCode | string): CustodianTypeConfig {
|
||||
return getCustodianTypeByCode(code) ?? CUSTODIAN_TYPES.U;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get custodian type configuration by full name
|
||||
*/
|
||||
export function getCustodianTypeByName(name: CustodianType): CustodianTypeConfig | undefined {
|
||||
return Object.values(CUSTODIAN_TYPES).find(t => t.name === name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all custodian type codes as array (in GLAMORCUBESFIXPHDNT order)
|
||||
*/
|
||||
export const CUSTODIAN_TYPE_CODES: CustodianTypeCode[] = [
|
||||
'G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E',
|
||||
'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'
|
||||
];
|
||||
|
||||
/**
|
||||
* Color-only map for backwards compatibility with existing code
|
||||
* (Same as TYPE_COLORS in NDEMapPageMapLibre.tsx)
|
||||
*/
|
||||
export const CUSTODIAN_TYPE_COLORS: Record<CustodianTypeCode, string> = Object.fromEntries(
|
||||
CUSTODIAN_TYPE_CODES.map(code => [code, CUSTODIAN_TYPES[code].color])
|
||||
) as Record<CustodianTypeCode, string>;
|
||||
|
||||
/**
|
||||
* Label-only map for backwards compatibility
|
||||
* (Same as TYPE_NAMES in NDEMapPageMapLibre.tsx)
|
||||
*/
|
||||
export const CUSTODIAN_TYPE_LABELS: Record<CustodianTypeCode, BilingualLabel> = Object.fromEntries(
|
||||
CUSTODIAN_TYPE_CODES.map(code => [code, CUSTODIAN_TYPES[code].label])
|
||||
) as Record<CustodianTypeCode, BilingualLabel>;
|
||||
|
||||
/**
|
||||
* Full name to code mapping
|
||||
*/
|
||||
export const NAME_TO_CODE: Record<CustodianType, CustodianTypeCode> = Object.fromEntries(
|
||||
Object.entries(CUSTODIAN_TYPES).map(([code, config]) => [config.name, code as CustodianTypeCode])
|
||||
) as Record<CustodianType, CustodianTypeCode>;
|
||||
|
||||
/**
|
||||
* Code to full name mapping
|
||||
*/
|
||||
export const CODE_TO_NAME: Record<CustodianTypeCode, CustodianType> = Object.fromEntries(
|
||||
CUSTODIAN_TYPE_CODES.map(code => [code, CUSTODIAN_TYPES[code].name])
|
||||
) as Record<CustodianTypeCode, CustodianType>;
|
||||
|
||||
/**
|
||||
* Parse a custodian type string (code or full name) to a code
|
||||
* Returns undefined if not recognized
|
||||
*/
|
||||
export function parseCustodianType(input: string): CustodianTypeCode | undefined {
|
||||
// Check if it's already a single-letter code
|
||||
if (input.length === 1 && CUSTODIAN_TYPE_CODES.includes(input.toUpperCase() as CustodianTypeCode)) {
|
||||
return input.toUpperCase() as CustodianTypeCode;
|
||||
}
|
||||
|
||||
// Check if it's a full name
|
||||
const upperInput = input.toUpperCase().replace(/[-\s]/g, '_');
|
||||
if (upperInput in NAME_TO_CODE) {
|
||||
return NAME_TO_CODE[upperInput as CustodianType];
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get display color for a custodian type (by code or name)
|
||||
*/
|
||||
export function getCustodianTypeColor(input: string): string {
|
||||
const code = parseCustodianType(input);
|
||||
return code ? CUSTODIAN_TYPES[code].color : CUSTODIAN_TYPES.U.color;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get display label for a custodian type in specified language
|
||||
*/
|
||||
export function getCustodianTypeLabel(input: string, lang: 'nl' | 'en' = 'en'): string {
|
||||
const code = parseCustodianType(input);
|
||||
return code ? CUSTODIAN_TYPES[code].label[lang] : CUSTODIAN_TYPES.U.label[lang];
|
||||
}
|
||||
237
frontend/src/lib/schema-custodian-mapping.ts
Normal file
237
frontend/src/lib/schema-custodian-mapping.ts
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
/**
|
||||
* schema-custodian-mapping.ts - Maps LinkML schema elements to CustodianTypes
|
||||
*
|
||||
* This module provides mappings between schema elements (classes, slots, enums)
|
||||
* and the GLAMORCUBESFIXPHDNT custodian types they primarily relate to.
|
||||
*
|
||||
* Used by the CustodianTypeIndicator component to show which type(s) a schema
|
||||
* element is most relevant to.
|
||||
*/
|
||||
|
||||
import type { CustodianTypeCode } from './custodian-types';
|
||||
|
||||
/**
|
||||
* Mapping of schema class names to relevant custodian types
|
||||
*
|
||||
* Key: Class name (as it appears in LinkML schema)
|
||||
* Value: Array of CustodianTypeCode(s) the class relates to
|
||||
*
|
||||
* Most classes relate to ALL types (universal), but some are type-specific.
|
||||
*/
|
||||
export const CLASS_TO_CUSTODIAN_TYPE: Record<string, CustodianTypeCode[]> = {
|
||||
// Universal classes (apply to all custodian types)
|
||||
'CustodianObservation': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'CustodianName': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'CustodianReconstruction': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'Location': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'GHCID': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'Provenance': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
|
||||
// Place-related classes
|
||||
'FeaturePlace': ['F'], // Features (monuments, statues)
|
||||
'FeaturePlaceClass': ['F'],
|
||||
|
||||
// Collection-related classes
|
||||
'Collection': ['G', 'L', 'A', 'M', 'B', 'H'], // Galleries, Libraries, Archives, Museums, Botanical, Holy sites
|
||||
'CollectionItem': ['G', 'L', 'A', 'M', 'B', 'H'],
|
||||
|
||||
// Digital platform classes
|
||||
'DigitalPlatform': ['D'], // Digital platforms
|
||||
'DigitalPlatformClass': ['D'],
|
||||
'WebObservation': ['D'],
|
||||
'WebClaim': ['D'],
|
||||
|
||||
// Archive-specific
|
||||
'ArchivalFonds': ['A'], // Archives
|
||||
'ArchivalSeries': ['A'],
|
||||
'ArchivalRecord': ['A'],
|
||||
|
||||
// Library-specific
|
||||
'BibliographicRecord': ['L'], // Libraries
|
||||
'Catalog': ['L'],
|
||||
|
||||
// Museum-specific
|
||||
'Exhibition': ['M', 'G'], // Museums, Galleries
|
||||
'MuseumObject': ['M'],
|
||||
|
||||
// Research-related
|
||||
'ResearchProject': ['R'], // Research centers
|
||||
'Publication': ['R', 'L'], // Research centers, Libraries
|
||||
|
||||
// Education-related
|
||||
'Course': ['E'], // Education providers
|
||||
'LearningResource': ['E', 'D'], // Education, Digital platforms
|
||||
|
||||
// Religious heritage
|
||||
'ReligiousCollection': ['H'], // Holy sites
|
||||
'LiturgicalObject': ['H'],
|
||||
|
||||
// Botanical/Zoo
|
||||
'LivingCollection': ['B'], // Botanical gardens/zoos
|
||||
'Specimen': ['B'],
|
||||
|
||||
// Intangible heritage
|
||||
'IntangibleHeritage': ['I'], // Intangible heritage groups
|
||||
'Performance': ['I'],
|
||||
'Tradition': ['I'],
|
||||
|
||||
// Organizational
|
||||
'StaffRole': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'E', 'S', 'H', 'N'],
|
||||
'OrganizationalChange': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
|
||||
// Personal collections
|
||||
'PersonalCollection': ['P'], // Personal collections
|
||||
'PrivateArchive': ['P'],
|
||||
|
||||
// Corporate
|
||||
'CorporateCollection': ['C'], // Corporations
|
||||
'CorporateArchive': ['C'],
|
||||
|
||||
// Society-related
|
||||
'SocietyMembership': ['S'], // Collecting societies
|
||||
'HeemkundigeKring': ['S'],
|
||||
|
||||
// Taste/Smell heritage
|
||||
'CulinaryHeritage': ['T'], // Taste/smell heritage
|
||||
'Recipe': ['T'],
|
||||
'Formulation': ['T'],
|
||||
|
||||
// NGO-specific
|
||||
'AdvocacyOrganization': ['N'], // NGOs
|
||||
'HeritageInitiative': ['N'],
|
||||
};
|
||||
|
||||
/**
|
||||
* Mapping of schema slot names to relevant custodian types
|
||||
*/
|
||||
export const SLOT_TO_CUSTODIAN_TYPE: Record<string, CustodianTypeCode[]> = {
|
||||
// Universal slots
|
||||
'custodian_name': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'location': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'ghcid': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'provenance': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
|
||||
// Archive-specific slots
|
||||
'fonds': ['A'],
|
||||
'finding_aid': ['A'],
|
||||
'archival_hierarchy': ['A'],
|
||||
|
||||
// Library-specific slots
|
||||
'call_number': ['L'],
|
||||
'bibliographic_record': ['L'],
|
||||
'catalog_entry': ['L'],
|
||||
|
||||
// Museum-specific slots
|
||||
'accession_number': ['M'],
|
||||
'exhibition_history': ['M', 'G'],
|
||||
'conservation_status': ['M', 'G'],
|
||||
|
||||
// Digital platform slots
|
||||
'platform_url': ['D'],
|
||||
'api_endpoint': ['D'],
|
||||
'metadata_format': ['D', 'L', 'A', 'M'],
|
||||
|
||||
// Religious heritage slots
|
||||
'denomination': ['H'],
|
||||
'consecration_date': ['H'],
|
||||
'liturgical_calendar': ['H'],
|
||||
|
||||
// Botanical/Zoo slots
|
||||
'species': ['B'],
|
||||
'habitat': ['B'],
|
||||
'conservation_program': ['B'],
|
||||
|
||||
// Intangible heritage slots
|
||||
'tradition_type': ['I'],
|
||||
'transmission_method': ['I'],
|
||||
'practitioners': ['I'],
|
||||
|
||||
// Taste/Smell slots
|
||||
'recipe_origin': ['T'],
|
||||
'ingredients': ['T'],
|
||||
'preparation_method': ['T'],
|
||||
};
|
||||
|
||||
/**
|
||||
* Mapping of enum names to relevant custodian types
|
||||
*/
|
||||
export const ENUM_TO_CUSTODIAN_TYPE: Record<string, CustodianTypeCode[]> = {
|
||||
// Universal enums
|
||||
'CustodianTypeEnum': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'DataTierEnum': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'DataSourceEnum': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'CountryCodeEnum': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
'LanguageCodeEnum': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'],
|
||||
|
||||
// Type-specific enums
|
||||
'ArchivalLevelEnum': ['A'],
|
||||
'BibliographicFormatEnum': ['L'],
|
||||
'ExhibitionTypeEnum': ['M', 'G'],
|
||||
'DigitalPlatformTypeEnum': ['D'],
|
||||
'ReligiousDenominationEnum': ['H'],
|
||||
'SpeciesClassificationEnum': ['B'],
|
||||
'IntangibleHeritageTypeEnum': ['I'],
|
||||
'CulinaryHeritageTypeEnum': ['T'],
|
||||
'StaffRoleTypeEnum': ['G', 'L', 'A', 'M', 'O', 'R', 'C', 'E', 'S', 'H', 'N'],
|
||||
};
|
||||
|
||||
/**
|
||||
* Default types for elements not explicitly mapped (universal)
|
||||
*/
|
||||
export const DEFAULT_CUSTODIAN_TYPES: CustodianTypeCode[] = [
|
||||
'G', 'L', 'A', 'M', 'O', 'R', 'C', 'U', 'B', 'E', 'S', 'F', 'I', 'X', 'P', 'H', 'D', 'N', 'T'
|
||||
];
|
||||
|
||||
/**
|
||||
* Get custodian types for a schema class
|
||||
*/
|
||||
export function getCustodianTypesForClass(className: string): CustodianTypeCode[] {
|
||||
return CLASS_TO_CUSTODIAN_TYPE[className] || DEFAULT_CUSTODIAN_TYPES;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get custodian types for a schema slot
|
||||
*/
|
||||
export function getCustodianTypesForSlot(slotName: string): CustodianTypeCode[] {
|
||||
return SLOT_TO_CUSTODIAN_TYPE[slotName] || DEFAULT_CUSTODIAN_TYPES;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get custodian types for a schema enum
|
||||
*/
|
||||
export function getCustodianTypesForEnum(enumName: string): CustodianTypeCode[] {
|
||||
return ENUM_TO_CUSTODIAN_TYPE[enumName] || DEFAULT_CUSTODIAN_TYPES;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a schema element is universal (applies to all types)
|
||||
*/
|
||||
export function isUniversalElement(types: CustodianTypeCode[]): boolean {
|
||||
return types.length >= 19; // All 19 types
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the primary custodian type for a schema element
|
||||
* Returns the first type, or 'U' (Unknown) if empty
|
||||
*/
|
||||
export function getPrimaryCustodianType(types: CustodianTypeCode[]): CustodianTypeCode {
|
||||
if (types.length === 0) return 'U';
|
||||
// For universal elements, return the most common types first
|
||||
if (isUniversalElement(types)) {
|
||||
return 'M'; // Museum as default primary for universal
|
||||
}
|
||||
return types[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a compact representation of custodian types
|
||||
* For universal: returns "ALL"
|
||||
* For few types: returns joined string (e.g., "MAL")
|
||||
* For many types: returns abbreviated (e.g., "MA+5")
|
||||
*/
|
||||
export function getCompactTypeRepresentation(types: CustodianTypeCode[]): string {
|
||||
if (types.length === 0) return '?';
|
||||
if (isUniversalElement(types)) return 'ALL';
|
||||
if (types.length <= 4) return types.join('');
|
||||
return types.slice(0, 2).join('') + `+${types.length - 2}`;
|
||||
}
|
||||
|
|
@ -3662,3 +3662,730 @@ body.resizing-row * {
|
|||
color: #888;
|
||||
}
|
||||
}
|
||||
|
||||
/* ============================================
|
||||
EMBEDDING PROJECTOR STYLES
|
||||
============================================ */
|
||||
|
||||
.embedding-projector {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
background: #fafafa;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.projector-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 1rem 1.25rem;
|
||||
background: #fff;
|
||||
border-bottom: 1px solid #e0e0e0;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.projector-header h3 {
|
||||
margin: 0;
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.projector-stats {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
font-size: 0.85rem;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.projector-stats span {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.projector-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 1.5rem;
|
||||
padding: 0.75rem 1.25rem;
|
||||
background: #fff;
|
||||
border-bottom: 1px solid #e0e0e0;
|
||||
flex-shrink: 0;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.control-section {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.control-section label {
|
||||
font-size: 0.8rem;
|
||||
font-weight: 500;
|
||||
color: #555;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.control-group {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.control-group select,
|
||||
.control-group input[type="number"] {
|
||||
padding: 0.4rem 0.6rem;
|
||||
font-size: 0.85rem;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
background: #fff;
|
||||
color: #333;
|
||||
min-width: 80px;
|
||||
}
|
||||
|
||||
.control-group select:focus,
|
||||
.control-group input[type="number"]:focus {
|
||||
outline: none;
|
||||
border-color: #FFC107;
|
||||
box-shadow: 0 0 0 2px rgba(255, 193, 7, 0.2);
|
||||
}
|
||||
|
||||
.control-group input[type="number"] {
|
||||
width: 70px;
|
||||
}
|
||||
|
||||
.button-group {
|
||||
display: flex;
|
||||
gap: 0.25rem;
|
||||
}
|
||||
|
||||
.button-group button {
|
||||
padding: 0.4rem 0.75rem;
|
||||
font-size: 0.8rem;
|
||||
border: 1px solid #ddd;
|
||||
background: #fff;
|
||||
color: #555;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
|
||||
.button-group button:first-child {
|
||||
border-radius: 4px 0 0 4px;
|
||||
}
|
||||
|
||||
.button-group button:last-child {
|
||||
border-radius: 0 4px 4px 0;
|
||||
}
|
||||
|
||||
.button-group button:not(:last-child) {
|
||||
border-right: none;
|
||||
}
|
||||
|
||||
.button-group button:hover {
|
||||
background: #f5f5f5;
|
||||
}
|
||||
|
||||
.button-group button.active {
|
||||
background: #FFC107;
|
||||
border-color: #FFC107;
|
||||
color: #000;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.compute-btn {
|
||||
padding: 0.5rem 1rem;
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
background: #FFC107;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
color: #000;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
}
|
||||
|
||||
.compute-btn:hover:not(:disabled) {
|
||||
background: #ffca2c;
|
||||
transform: translateY(-1px);
|
||||
}
|
||||
|
||||
.compute-btn:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.projector-body {
|
||||
display: flex;
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.projector-main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
overflow: hidden;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.projector-canvas {
|
||||
flex: 1;
|
||||
position: relative;
|
||||
background: #fff;
|
||||
border-right: 1px solid #e0e0e0;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.projector-canvas svg {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.projector-canvas .three-container {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
min-height: 500px;
|
||||
}
|
||||
|
||||
.projector-canvas .three-container canvas {
|
||||
width: 100% !important;
|
||||
height: 100% !important;
|
||||
}
|
||||
|
||||
.projector-canvas .point {
|
||||
cursor: pointer;
|
||||
transition: r 0.15s, opacity 0.15s;
|
||||
}
|
||||
|
||||
.projector-canvas .point:hover {
|
||||
r: 6;
|
||||
}
|
||||
|
||||
.projector-canvas .point.selected {
|
||||
stroke: #000;
|
||||
stroke-width: 2;
|
||||
}
|
||||
|
||||
.projector-canvas .point.neighbor {
|
||||
stroke: #FFC107;
|
||||
stroke-width: 2;
|
||||
}
|
||||
|
||||
.projector-canvas .point.dimmed {
|
||||
opacity: 0.15;
|
||||
}
|
||||
|
||||
.projector-sidebar {
|
||||
width: 280px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: #fff;
|
||||
border-left: 1px solid #e0e0e0;
|
||||
overflow: hidden;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.projector-search {
|
||||
padding: 0.75rem;
|
||||
border-bottom: 1px solid #e0e0e0;
|
||||
}
|
||||
|
||||
.projector-search input {
|
||||
width: 100%;
|
||||
padding: 0.5rem 0.75rem;
|
||||
font-size: 0.85rem;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
background: #f8f8f8;
|
||||
}
|
||||
|
||||
.projector-search input:focus {
|
||||
outline: none;
|
||||
border-color: #FFC107;
|
||||
background: #fff;
|
||||
}
|
||||
|
||||
.projector-legend {
|
||||
padding: 0.75rem;
|
||||
border-bottom: 1px solid #e0e0e0;
|
||||
max-height: 200px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.projector-legend h4 {
|
||||
margin: 0 0 0.5rem 0;
|
||||
font-size: 0.8rem;
|
||||
font-weight: 600;
|
||||
color: #555;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.legend-items {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.legend-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.8rem;
|
||||
color: #444;
|
||||
cursor: pointer;
|
||||
padding: 0.25rem;
|
||||
border-radius: 3px;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
|
||||
.legend-item:hover {
|
||||
background: #f5f5f5;
|
||||
}
|
||||
|
||||
.legend-color {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 2px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.legend-label {
|
||||
flex: 1;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
|
||||
.legend-count {
|
||||
font-size: 0.75rem;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
.projector-details {
|
||||
flex: 1;
|
||||
padding: 0.75rem;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.projector-details h4 {
|
||||
margin: 0 0 0.75rem 0;
|
||||
font-size: 0.8rem;
|
||||
font-weight: 600;
|
||||
color: #555;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.detail-section {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.detail-section:last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.detail-label {
|
||||
font-size: 0.75rem;
|
||||
color: #888;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.detail-value {
|
||||
font-size: 0.85rem;
|
||||
color: #333;
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
.detail-value.id {
|
||||
font-family: 'SF Mono', 'Monaco', 'Inconsolata', monospace;
|
||||
font-size: 0.8rem;
|
||||
background: #f5f5f5;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.nearest-neighbors {
|
||||
margin-top: 1rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid #e0e0e0;
|
||||
}
|
||||
|
||||
.nearest-neighbors h5 {
|
||||
margin: 0 0 0.5rem 0;
|
||||
font-size: 0.8rem;
|
||||
font-weight: 600;
|
||||
color: #555;
|
||||
}
|
||||
|
||||
.neighbor-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.neighbor-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0.35rem 0.5rem;
|
||||
background: #f8f8f8;
|
||||
border-radius: 3px;
|
||||
font-size: 0.8rem;
|
||||
cursor: pointer;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
|
||||
.neighbor-item:hover {
|
||||
background: #FFC107;
|
||||
}
|
||||
|
||||
.neighbor-id {
|
||||
font-family: 'SF Mono', 'Monaco', 'Inconsolata', monospace;
|
||||
font-size: 0.75rem;
|
||||
color: #555;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
max-width: 140px;
|
||||
}
|
||||
|
||||
.neighbor-distance {
|
||||
font-size: 0.75rem;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
.no-selection {
|
||||
text-align: center;
|
||||
padding: 2rem 1rem;
|
||||
color: #888;
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
.viz-placeholder {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
height: 100%;
|
||||
padding: 2rem;
|
||||
text-align: center;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.viz-placeholder svg {
|
||||
width: 64px;
|
||||
height: 64px;
|
||||
margin-bottom: 1rem;
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
.viz-placeholder p {
|
||||
margin: 0;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.viz-placeholder p:first-of-type {
|
||||
font-weight: 500;
|
||||
color: #444;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.viz-collection-selector {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
|
||||
.viz-collection-selector select {
|
||||
padding: 0.5rem 1rem;
|
||||
font-size: 0.9rem;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
background: #fff;
|
||||
min-width: 200px;
|
||||
}
|
||||
|
||||
.viz-collection-selector button {
|
||||
padding: 0.5rem 1.5rem;
|
||||
font-size: 0.9rem;
|
||||
font-weight: 500;
|
||||
background: #FFC107;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
color: #000;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
|
||||
.viz-collection-selector button:hover:not(:disabled) {
|
||||
background: #ffca2c;
|
||||
}
|
||||
|
||||
.viz-collection-selector button:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.variance-info {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.8rem;
|
||||
color: #666;
|
||||
padding: 0.5rem 1rem;
|
||||
background: #f8f8f8;
|
||||
border-radius: 4px;
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.variance-info strong {
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.computing-overlay {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background: rgba(255, 255, 255, 0.9);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
z-index: 10;
|
||||
}
|
||||
|
||||
.computing-spinner {
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
border: 3px solid #e0e0e0;
|
||||
border-top-color: #FFC107;
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.computing-overlay p {
|
||||
font-size: 0.9rem;
|
||||
color: #666;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.tooltip {
|
||||
position: absolute;
|
||||
pointer-events: none;
|
||||
background: rgba(0, 0, 0, 0.85);
|
||||
color: #fff;
|
||||
padding: 0.5rem 0.75rem;
|
||||
border-radius: 4px;
|
||||
font-size: 0.8rem;
|
||||
max-width: 250px;
|
||||
z-index: 100;
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
|
||||
.tooltip-id {
|
||||
font-family: 'SF Mono', 'Monaco', 'Inconsolata', monospace;
|
||||
font-size: 0.75rem;
|
||||
opacity: 0.8;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.tooltip-payload {
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
/* Dark mode for Embedding Projector */
|
||||
[data-theme="dark"] .embedding-projector {
|
||||
background: #16161e;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-header {
|
||||
background: #1a1a2e;
|
||||
border-color: #333;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-header h3 {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-stats {
|
||||
color: #888;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-controls {
|
||||
background: #1a1a2e;
|
||||
border-color: #333;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .control-section label {
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .control-group select,
|
||||
[data-theme="dark"] .control-group input[type="number"] {
|
||||
background: #252538;
|
||||
border-color: #404050;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .button-group button {
|
||||
background: #252538;
|
||||
border-color: #404050;
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .button-group button:hover {
|
||||
background: #3a3a4e;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .button-group button.active {
|
||||
background: #FFC107;
|
||||
border-color: #FFC107;
|
||||
color: #000;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-canvas {
|
||||
background: #1a1a2e;
|
||||
border-color: #333;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-canvas .three-container {
|
||||
background: #1a1a2e;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-sidebar {
|
||||
background: #1a1a2e;
|
||||
border-color: #333;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-search input {
|
||||
background: #252538;
|
||||
border-color: #404050;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-search input:focus {
|
||||
background: #2a2a3e;
|
||||
border-color: #FFC107;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-legend {
|
||||
border-color: #333;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-legend h4 {
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .legend-item {
|
||||
color: #ccc;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .legend-item:hover {
|
||||
background: #252538;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .legend-count {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .projector-details h4 {
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .detail-label {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .detail-value {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .detail-value.id {
|
||||
background: #252538;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .nearest-neighbors {
|
||||
border-color: #333;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .nearest-neighbors h5 {
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .neighbor-item {
|
||||
background: #252538;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .neighbor-item:hover {
|
||||
background: #FFC107;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .neighbor-item:hover .neighbor-id,
|
||||
[data-theme="dark"] .neighbor-item:hover .neighbor-distance {
|
||||
color: #000;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .neighbor-id {
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .neighbor-distance {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .no-selection {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .viz-placeholder {
|
||||
color: #888;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .viz-placeholder p:first-of-type {
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .viz-collection-selector select {
|
||||
background: #252538;
|
||||
border-color: #404050;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .variance-info {
|
||||
background: #252538;
|
||||
color: #888;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .variance-info strong {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .computing-overlay {
|
||||
background: rgba(22, 22, 30, 0.95);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .computing-spinner {
|
||||
border-color: #333;
|
||||
border-top-color: #FFC107;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .computing-overlay p {
|
||||
color: #888;
|
||||
}
|
||||
|
|
|
|||
1370
frontend/src/pages/GesprekPage.css
Normal file
1370
frontend/src/pages/GesprekPage.css
Normal file
File diff suppressed because it is too large
Load diff
1075
frontend/src/pages/GesprekPage.tsx
Normal file
1075
frontend/src/pages/GesprekPage.tsx
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -318,6 +318,28 @@
|
|||
border-color: var(--primary-color, #1976d2);
|
||||
}
|
||||
|
||||
/* Tab separator for 2D/3D toggle */
|
||||
.linkml-viewer-page__tab-separator {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
color: var(--border-color, #e0e0e0);
|
||||
padding: 0 0.25rem;
|
||||
font-weight: 300;
|
||||
}
|
||||
|
||||
/* 3D/2D indicator toggle button */
|
||||
.linkml-viewer-page__tab--indicator {
|
||||
font-size: 0.8125rem;
|
||||
min-width: auto;
|
||||
padding: 0.375rem 0.75rem;
|
||||
}
|
||||
|
||||
/* 3D Custodian Type Indicator container */
|
||||
.linkml-viewer__custodian-indicator {
|
||||
margin-left: 0.5rem;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
/* Content Area */
|
||||
.linkml-viewer-page__content {
|
||||
flex: 1;
|
||||
|
|
@ -476,6 +498,20 @@
|
|||
color: var(--warning-color, #f57c00);
|
||||
}
|
||||
|
||||
/* Custodian Type Badge - shows which GLAMORCUBESFIXPHDNT types apply */
|
||||
.linkml-viewer__custodian-badge {
|
||||
margin-left: 0.5rem;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
/* Item header with badges - flex layout for proper alignment */
|
||||
.linkml-viewer__item-name {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
align-items: center;
|
||||
gap: 0.375rem;
|
||||
}
|
||||
|
||||
/* URI and Range */
|
||||
.linkml-viewer__uri,
|
||||
.linkml-viewer__range,
|
||||
|
|
|
|||
|
|
@ -29,6 +29,13 @@ import {
|
|||
import { useLanguage } from '../contexts/LanguageContext';
|
||||
import { useCollapsibleHeader } from '../hooks/useCollapsibleHeader';
|
||||
import { ChevronUp, ChevronDown } from 'lucide-react';
|
||||
import { CustodianTypeBadge, CustodianTypeIndicator } from '../components/uml/CustodianTypeIndicator';
|
||||
import {
|
||||
getCustodianTypesForClass,
|
||||
getCustodianTypesForSlot,
|
||||
getCustodianTypesForEnum,
|
||||
isUniversalElement,
|
||||
} from '../lib/schema-custodian-mapping';
|
||||
import './LinkMLViewerPage.css';
|
||||
import '../styles/collapsible.css';
|
||||
|
||||
|
|
@ -160,6 +167,8 @@ const TEXT = {
|
|||
noMatchingSchemas: { nl: 'Geen overeenkomende schema\'s', en: 'No matching schemas' },
|
||||
copyToClipboard: { nl: 'Kopieer naar klembord', en: 'Copy to clipboard' },
|
||||
copied: { nl: 'Gekopieerd!', en: 'Copied!' },
|
||||
use3DPolygon: { nl: '3D-polygoon', en: '3D Polygon' },
|
||||
use2DBadge: { nl: '2D-badge', en: '2D Badge' },
|
||||
};
|
||||
|
||||
// Dynamically discover schema files from the modules directory
|
||||
|
|
@ -203,6 +212,10 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
// State for copy to clipboard feedback
|
||||
const [copyFeedback, setCopyFeedback] = useState(false);
|
||||
|
||||
// State for 3D polygon indicator toggle (future feature)
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const [use3DIndicator, setUse3DIndicator] = useState(false);
|
||||
|
||||
// Collapsible header
|
||||
const mainContentRef = useRef<HTMLElement>(null);
|
||||
const { isCollapsed: isHeaderCollapsed, setIsCollapsed: setIsHeaderCollapsed } = useCollapsibleHeader(mainContentRef);
|
||||
|
|
@ -490,6 +503,8 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
|
||||
const renderClassDetails = (cls: LinkMLClass) => {
|
||||
const isHighlighted = highlightedClass === cls.name;
|
||||
const custodianTypes = getCustodianTypesForClass(cls.name);
|
||||
const isUniversal = isUniversalElement(custodianTypes);
|
||||
|
||||
return (
|
||||
<div
|
||||
|
|
@ -500,6 +515,23 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
<h4 className="linkml-viewer__item-name">
|
||||
{cls.name}
|
||||
{cls.abstract && <span className="linkml-viewer__badge linkml-viewer__badge--abstract">{t('abstract')}</span>}
|
||||
{!isUniversal && (
|
||||
use3DIndicator ? (
|
||||
<CustodianTypeIndicator
|
||||
types={custodianTypes}
|
||||
size={28}
|
||||
animate={true}
|
||||
showTooltip={true}
|
||||
className="linkml-viewer__custodian-indicator"
|
||||
/>
|
||||
) : (
|
||||
<CustodianTypeBadge
|
||||
types={custodianTypes}
|
||||
size="small"
|
||||
className="linkml-viewer__custodian-badge"
|
||||
/>
|
||||
)
|
||||
)}
|
||||
</h4>
|
||||
{cls.class_uri && (
|
||||
<div className="linkml-viewer__uri">
|
||||
|
|
@ -550,6 +582,8 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
const rangeIsEnum = slot.range && isEnumRange(slot.range);
|
||||
const enumKey = slot.range ? `${slot.name}:${slot.range}` : '';
|
||||
const isExpanded = expandedEnumRanges.has(enumKey);
|
||||
const custodianTypes = getCustodianTypesForSlot(slot.name);
|
||||
const isUniversal = isUniversalElement(custodianTypes);
|
||||
|
||||
return (
|
||||
<div key={slot.name} className="linkml-viewer__item">
|
||||
|
|
@ -557,6 +591,23 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
{slot.name}
|
||||
{slot.required && <span className="linkml-viewer__badge linkml-viewer__badge--required">{t('required')}</span>}
|
||||
{slot.multivalued && <span className="linkml-viewer__badge linkml-viewer__badge--multi">{t('multivalued')}</span>}
|
||||
{!isUniversal && (
|
||||
use3DIndicator ? (
|
||||
<CustodianTypeIndicator
|
||||
types={custodianTypes}
|
||||
size={28}
|
||||
animate={true}
|
||||
showTooltip={true}
|
||||
className="linkml-viewer__custodian-indicator"
|
||||
/>
|
||||
) : (
|
||||
<CustodianTypeBadge
|
||||
types={custodianTypes}
|
||||
size="small"
|
||||
className="linkml-viewer__custodian-badge"
|
||||
/>
|
||||
)
|
||||
)}
|
||||
</h4>
|
||||
{slot.slot_uri && (
|
||||
<div className="linkml-viewer__uri">
|
||||
|
|
@ -604,6 +655,8 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
const searchFilter = enumSearchFilters[enumName] || '';
|
||||
const showAll = enumShowAll[enumName] || false;
|
||||
const displayCount = 20;
|
||||
const custodianTypes = getCustodianTypesForEnum(enumDef.name);
|
||||
const isUniversal = isUniversalElement(custodianTypes);
|
||||
|
||||
// Filter values based on search
|
||||
const filteredValues = searchFilter
|
||||
|
|
@ -622,7 +675,26 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
|
||||
return (
|
||||
<div key={enumDef.name} className="linkml-viewer__item">
|
||||
<h4 className="linkml-viewer__item-name">{enumDef.name}</h4>
|
||||
<h4 className="linkml-viewer__item-name">
|
||||
{enumDef.name}
|
||||
{!isUniversal && (
|
||||
use3DIndicator ? (
|
||||
<CustodianTypeIndicator
|
||||
types={custodianTypes}
|
||||
size={28}
|
||||
animate={true}
|
||||
showTooltip={true}
|
||||
className="linkml-viewer__custodian-indicator"
|
||||
/>
|
||||
) : (
|
||||
<CustodianTypeBadge
|
||||
types={custodianTypes}
|
||||
size="small"
|
||||
className="linkml-viewer__custodian-badge"
|
||||
/>
|
||||
)
|
||||
)}
|
||||
</h4>
|
||||
{enumDef.description && (
|
||||
<div className="linkml-viewer__description linkml-viewer__markdown">
|
||||
<ReactMarkdown remarkPlugins={[remarkGfm]} rehypePlugins={[rehypeRaw]}>{transformContent(enumDef.description)}</ReactMarkdown>
|
||||
|
|
@ -1035,6 +1107,14 @@ const LinkMLViewerPage: React.FC = () => {
|
|||
>
|
||||
{t('rawYaml')}
|
||||
</button>
|
||||
<span className="linkml-viewer-page__tab-separator">|</span>
|
||||
<button
|
||||
className={`linkml-viewer-page__tab linkml-viewer-page__tab--indicator ${use3DIndicator ? 'linkml-viewer-page__tab--active' : ''}`}
|
||||
onClick={() => setUse3DIndicator(!use3DIndicator)}
|
||||
title={use3DIndicator ? t('use2DBadge') : t('use3DPolygon')}
|
||||
>
|
||||
{use3DIndicator ? '🔷 3D' : '🏷️ 2D'}
|
||||
</button>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
import { useEffect, useRef, useState, useMemo, useCallback } from 'react';
|
||||
import { useSearchParams } from 'react-router-dom';
|
||||
import maplibregl from 'maplibre-gl';
|
||||
import type { StyleSpecification, MapLayerMouseEvent, GeoJSONSource } from 'maplibre-gl';
|
||||
import 'maplibre-gl/dist/maplibre-gl.css';
|
||||
import { useLanguage } from '../contexts/LanguageContext';
|
||||
import { useUIState } from '../contexts/UIStateContext';
|
||||
|
|
@ -89,7 +90,7 @@ const TYPE_NAMES: Record<string, { nl: string; en: string }> = {
|
|||
};
|
||||
|
||||
// Map tile styles for light and dark modes
|
||||
const getMapStyle = (isDarkMode: boolean): maplibregl.StyleSpecification => {
|
||||
const getMapStyle = (isDarkMode: boolean): StyleSpecification => {
|
||||
if (isDarkMode) {
|
||||
// CartoDB Dark Matter - dark mode tiles
|
||||
return {
|
||||
|
|
@ -824,7 +825,8 @@ export default function NDEMapPage() {
|
|||
zoom: 7,
|
||||
});
|
||||
|
||||
map.addControl(new maplibregl.NavigationControl(), 'top-right');
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
map.addControl(new maplibregl.NavigationControl() as any, 'top-right');
|
||||
|
||||
map.on('load', () => {
|
||||
mapInstanceRef.current = map;
|
||||
|
|
@ -947,9 +949,9 @@ export default function NDEMapPage() {
|
|||
}
|
||||
|
||||
// Check if source already exists (with safety check)
|
||||
let existingSource: maplibregl.GeoJSONSource | undefined;
|
||||
let existingSource: GeoJSONSource | undefined;
|
||||
try {
|
||||
existingSource = map.getSource('institutions') as maplibregl.GeoJSONSource | undefined;
|
||||
existingSource = map.getSource('institutions') as GeoJSONSource | undefined;
|
||||
} catch {
|
||||
console.log('[Markers] Error getting source, map may be destroyed');
|
||||
return;
|
||||
|
|
@ -1056,12 +1058,12 @@ export default function NDEMapPage() {
|
|||
const map = mapInstanceRef.current;
|
||||
|
||||
// Click handler using ref to always get current filtered data
|
||||
const handleClick = (e: maplibregl.MapLayerMouseEvent) => {
|
||||
const handleClick = (e: MapLayerMouseEvent) => {
|
||||
if (!e.features || e.features.length === 0) return;
|
||||
|
||||
const feature = e.features[0];
|
||||
const index = feature.properties?.index;
|
||||
if (index === undefined) return;
|
||||
const feature = e.features[0] as GeoJSON.Feature;
|
||||
const index = feature.properties?.index as number | undefined;
|
||||
if (index === undefined || typeof index !== 'number') return;
|
||||
|
||||
// Use ref to get current filtered institutions, not stale closure
|
||||
const inst = filteredInstitutionsRef.current[index];
|
||||
|
|
|
|||
|
|
@ -765,7 +765,7 @@ export default function ProjectPlanPage() {
|
|||
<Paper sx={{ mb: 4 }}>
|
||||
<Tabs
|
||||
value={activeTab}
|
||||
onChange={(_, newValue) => setActiveTab(newValue)}
|
||||
onChange={(_: React.SyntheticEvent, newValue: number) => setActiveTab(newValue)}
|
||||
variant="scrollable"
|
||||
scrollButtons="auto"
|
||||
>
|
||||
|
|
|
|||
560
frontend/src/vite-env.d.ts
vendored
Normal file
560
frontend/src/vite-env.d.ts
vendored
Normal file
|
|
@ -0,0 +1,560 @@
|
|||
/// <reference types="vite/client" />
|
||||
|
||||
// MUI Icons Material module declaration
|
||||
// The package exports individual icon components but lacks a proper barrel index.d.ts
|
||||
declare module '@mui/icons-material' {
|
||||
import type { SvgIconComponent } from '@mui/icons-material/esm';
|
||||
export const ExpandMore: SvgIconComponent;
|
||||
export const CheckCircle: SvgIconComponent;
|
||||
export const RadioButtonUnchecked: SvgIconComponent;
|
||||
export const Schedule: SvgIconComponent;
|
||||
export const Assignment: SvgIconComponent;
|
||||
export const AccountTree: SvgIconComponent;
|
||||
export const Timeline: SvgIconComponent;
|
||||
export const Block: SvgIconComponent;
|
||||
export const Link: SvgIconComponent;
|
||||
}
|
||||
|
||||
// For default icon component type
|
||||
declare module '@mui/icons-material/esm' {
|
||||
import type { SvgIconProps } from '@mui/material/SvgIcon';
|
||||
import type React from 'react';
|
||||
export type SvgIconComponent = React.FC<SvgIconProps>;
|
||||
}
|
||||
|
||||
// Module declarations for packages without type definitions
|
||||
declare module 'lucide-react' {
|
||||
import type { FC, SVGAttributes } from 'react';
|
||||
|
||||
interface LucideIconProps extends SVGAttributes<SVGElement> {
|
||||
size?: number | string;
|
||||
color?: string;
|
||||
strokeWidth?: number | string;
|
||||
absoluteStrokeWidth?: boolean;
|
||||
}
|
||||
|
||||
type LucideIcon = FC<LucideIconProps>;
|
||||
|
||||
export const ChevronUp: LucideIcon;
|
||||
export const ChevronDown: LucideIcon;
|
||||
export const ChevronRight: LucideIcon;
|
||||
export const ChevronLeft: LucideIcon;
|
||||
export const X: LucideIcon;
|
||||
export const Search: LucideIcon;
|
||||
export const Filter: LucideIcon;
|
||||
export const RefreshCw: LucideIcon;
|
||||
export const Download: LucideIcon;
|
||||
export const Upload: LucideIcon;
|
||||
export const Settings: LucideIcon;
|
||||
export const Info: LucideIcon;
|
||||
export const AlertTriangle: LucideIcon;
|
||||
export const AlertCircle: LucideIcon;
|
||||
export const CheckCircle: LucideIcon;
|
||||
export const XCircle: LucideIcon;
|
||||
export const HelpCircle: LucideIcon;
|
||||
export const Loader2: LucideIcon;
|
||||
export const Send: LucideIcon;
|
||||
export const MessageSquare: LucideIcon;
|
||||
export const Bot: LucideIcon;
|
||||
export const User: LucideIcon;
|
||||
export const Copy: LucideIcon;
|
||||
export const Check: LucideIcon;
|
||||
export const Play: LucideIcon;
|
||||
export const Pause: LucideIcon;
|
||||
export const RotateCcw: LucideIcon;
|
||||
export const ExternalLink: LucideIcon;
|
||||
export const Sparkles: LucideIcon;
|
||||
export const MapPin: LucideIcon;
|
||||
export const Map: LucideIcon;
|
||||
export const Layers: LucideIcon;
|
||||
export const Globe: LucideIcon;
|
||||
export const Building: LucideIcon;
|
||||
export const Building2: LucideIcon;
|
||||
export const Library: LucideIcon;
|
||||
export const Archive: LucideIcon;
|
||||
export const Landmark: LucideIcon;
|
||||
export const Database: LucideIcon;
|
||||
export const Network: LucideIcon;
|
||||
export const Share2: LucideIcon;
|
||||
export const ZoomIn: LucideIcon;
|
||||
export const ZoomOut: LucideIcon;
|
||||
export const Maximize: LucideIcon;
|
||||
export const Maximize2: LucideIcon;
|
||||
export const Minimize: LucideIcon;
|
||||
export const Minimize2: LucideIcon;
|
||||
export const Palette: LucideIcon;
|
||||
export const Image: LucideIcon;
|
||||
export const ImageIcon: LucideIcon;
|
||||
export const FileText: LucideIcon;
|
||||
export const Eye: LucideIcon;
|
||||
export const EyeOff: LucideIcon;
|
||||
export const Sun: LucideIcon;
|
||||
export const Moon: LucideIcon;
|
||||
export const Menu: LucideIcon;
|
||||
export const ArrowRight: LucideIcon;
|
||||
export const ArrowLeft: LucideIcon;
|
||||
export const ArrowUp: LucideIcon;
|
||||
export const ArrowDown: LucideIcon;
|
||||
export const Plus: LucideIcon;
|
||||
export const Minus: LucideIcon;
|
||||
export const Trash2: LucideIcon;
|
||||
export const Edit: LucideIcon;
|
||||
export const Save: LucideIcon;
|
||||
export const Clock: LucideIcon;
|
||||
export const Calendar: LucideIcon;
|
||||
export const Star: LucideIcon;
|
||||
export const Heart: LucideIcon;
|
||||
export const ThumbsUp: LucideIcon;
|
||||
export const ThumbsDown: LucideIcon;
|
||||
export const Flag: LucideIcon;
|
||||
export const Bookmark: LucideIcon;
|
||||
export const Tag: LucideIcon;
|
||||
export const Hash: LucideIcon;
|
||||
export const AtSign: LucideIcon;
|
||||
export const Link2: LucideIcon;
|
||||
export const Unlink: LucideIcon;
|
||||
export const Lock: LucideIcon;
|
||||
export const Unlock: LucideIcon;
|
||||
export const Key: LucideIcon;
|
||||
export const Shield: LucideIcon;
|
||||
export const Bell: LucideIcon;
|
||||
export const BellOff: LucideIcon;
|
||||
export const Volume2: LucideIcon;
|
||||
export const VolumeX: LucideIcon;
|
||||
export const Mic: LucideIcon;
|
||||
export const MicOff: LucideIcon;
|
||||
export const Camera: LucideIcon;
|
||||
export const Video: LucideIcon;
|
||||
export const Printer: LucideIcon;
|
||||
export const Mail: LucideIcon;
|
||||
export const Phone: LucideIcon;
|
||||
export const Home: LucideIcon;
|
||||
export const List: LucideIcon;
|
||||
export const Grid: LucideIcon;
|
||||
export const LayoutGrid: LucideIcon;
|
||||
export const LayoutList: LucideIcon;
|
||||
export const Columns: LucideIcon;
|
||||
export const Rows: LucideIcon;
|
||||
export const SlidersHorizontal: LucideIcon;
|
||||
export const History: LucideIcon;
|
||||
export const Languages: LucideIcon;
|
||||
export const BarChart3: LucideIcon;
|
||||
export const BarChart: LucideIcon;
|
||||
export const PieChart: LucideIcon;
|
||||
export const LineChart: LucideIcon;
|
||||
export const TrendingUp: LucideIcon;
|
||||
export const TrendingDown: LucideIcon;
|
||||
export const Activity: LucideIcon;
|
||||
export const Zap: LucideIcon;
|
||||
export const Terminal: LucideIcon;
|
||||
export const Code: LucideIcon;
|
||||
export const Code2: LucideIcon;
|
||||
export const FileCode: LucideIcon;
|
||||
export const Folder: LucideIcon;
|
||||
export const FolderOpen: LucideIcon;
|
||||
export const File: LucideIcon;
|
||||
export const Files: LucideIcon;
|
||||
export const MoreHorizontal: LucideIcon;
|
||||
export const MoreVertical: LucideIcon;
|
||||
export const Grip: LucideIcon;
|
||||
export const GripVertical: LucideIcon;
|
||||
export const Move: LucideIcon;
|
||||
export const Crosshair: LucideIcon;
|
||||
export const Target: LucideIcon;
|
||||
export const Compass: LucideIcon;
|
||||
export const Navigation: LucideIcon;
|
||||
export const Focus: LucideIcon;
|
||||
export const Scan: LucideIcon;
|
||||
export const QrCode: LucideIcon;
|
||||
export const Table2: LucideIcon;
|
||||
export const Table: LucideIcon;
|
||||
export const CreditCard: LucideIcon;
|
||||
export const LayoutGrid: LucideIcon;
|
||||
export const IdCard: LucideIcon;
|
||||
}
|
||||
|
||||
declare module 'mermaid' {
|
||||
interface MermaidConfig {
|
||||
startOnLoad?: boolean;
|
||||
theme?: string;
|
||||
securityLevel?: string;
|
||||
fontFamily?: string;
|
||||
logLevel?: string;
|
||||
flowchart?: Record<string, unknown>;
|
||||
sequence?: Record<string, unknown>;
|
||||
gantt?: Record<string, unknown>;
|
||||
class?: Record<string, unknown>;
|
||||
}
|
||||
interface MermaidAPI {
|
||||
initialize: (config: MermaidConfig) => void;
|
||||
render: (id: string, text: string, svgContainingElement?: Element) => Promise<{ svg: string }>;
|
||||
parse: (text: string) => Promise<boolean>;
|
||||
}
|
||||
const mermaid: MermaidAPI;
|
||||
export default mermaid;
|
||||
}
|
||||
|
||||
declare module 'maplibre-gl' {
|
||||
export interface IControl {
|
||||
onAdd(map: Map): HTMLElement;
|
||||
onRemove(map: Map): void;
|
||||
getDefaultPosition?: () => string;
|
||||
}
|
||||
|
||||
export class Map {
|
||||
constructor(options: MapOptions);
|
||||
on(type: string, listener: (e: MapLayerMouseEvent) => void): this;
|
||||
on(type: string, layerId: string, listener: (e: MapLayerMouseEvent) => void): this;
|
||||
off(type: string, listener: (e: MapLayerMouseEvent) => void): this;
|
||||
off(type: string, layerId: string, listener: (e: MapLayerMouseEvent) => void): this;
|
||||
once(type: string, listener: () => void): this;
|
||||
remove(): void;
|
||||
getSource(id: string): GeoJSONSource | undefined;
|
||||
getLayer(id: string): unknown;
|
||||
addSource(id: string, source: unknown): this;
|
||||
addLayer(layer: unknown, before?: string): this;
|
||||
removeLayer(id: string): this;
|
||||
removeSource(id: string): this;
|
||||
moveLayer(id: string, beforeId?: string): this;
|
||||
setFilter(layerId: string, filter: unknown): this;
|
||||
setPaintProperty(layerId: string, property: string, value: unknown): this;
|
||||
setLayoutProperty(layerId: string, property: string, value: unknown): this;
|
||||
fitBounds(bounds: LngLatBoundsLike, options?: FitBoundsOptions): this;
|
||||
flyTo(options: FlyToOptions): this;
|
||||
getCenter(): LngLat;
|
||||
getZoom(): number;
|
||||
setCenter(center: LngLatLike): this;
|
||||
setZoom(zoom: number): this;
|
||||
resize(): this;
|
||||
getBounds(): LngLatBounds;
|
||||
getCanvas(): HTMLCanvasElement;
|
||||
getContainer(): HTMLElement;
|
||||
queryRenderedFeatures(point?: PointLike, options?: unknown): MapGeoJSONFeature[];
|
||||
project(lngLat: LngLatLike): Point;
|
||||
unproject(point: PointLike): LngLat;
|
||||
loaded(): boolean;
|
||||
isStyleLoaded(): boolean;
|
||||
isMoving(): boolean;
|
||||
isZooming(): boolean;
|
||||
isRotating(): boolean;
|
||||
triggerRepaint(): void;
|
||||
easeTo(options: unknown): this;
|
||||
jumpTo(options: unknown): this;
|
||||
panTo(lngLat: LngLatLike, options?: unknown): this;
|
||||
zoomTo(zoom: number, options?: unknown): this;
|
||||
addControl(control: IControl | NavigationControl | ScaleControl, position?: string): this;
|
||||
removeControl(control: IControl): this;
|
||||
setStyle(style: StyleSpecification | string, options?: { diff?: boolean }): this;
|
||||
getStyle(): StyleSpecification;
|
||||
}
|
||||
|
||||
export interface MapGeoJSONFeature {
|
||||
type: 'Feature';
|
||||
geometry: GeoJSON.Geometry;
|
||||
properties: Record<string, unknown>;
|
||||
id?: string | number;
|
||||
layer?: unknown;
|
||||
source?: string;
|
||||
sourceLayer?: string;
|
||||
state?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export class GeoJSONSource {
|
||||
setData(data: GeoJSON.GeoJSON): this;
|
||||
}
|
||||
|
||||
export class Popup {
|
||||
constructor(options?: PopupOptions);
|
||||
setLngLat(lngLat: LngLatLike): this;
|
||||
setHTML(html: string): this;
|
||||
setText(text: string): this;
|
||||
addTo(map: Map): this;
|
||||
remove(): this;
|
||||
isOpen(): boolean;
|
||||
}
|
||||
|
||||
export class Marker {
|
||||
constructor(options?: MarkerOptions);
|
||||
setLngLat(lngLat: LngLatLike): this;
|
||||
addTo(map: Map): this;
|
||||
remove(): this;
|
||||
getElement(): HTMLElement;
|
||||
setPopup(popup: Popup): this;
|
||||
getPopup(): Popup;
|
||||
}
|
||||
|
||||
export class NavigationControl {
|
||||
constructor(options?: NavigationControlOptions);
|
||||
}
|
||||
|
||||
export class ScaleControl {
|
||||
constructor(options?: ScaleControlOptions);
|
||||
}
|
||||
|
||||
export class LngLat {
|
||||
constructor(lng: number, lat: number);
|
||||
lng: number;
|
||||
lat: number;
|
||||
wrap(): LngLat;
|
||||
toArray(): [number, number];
|
||||
toString(): string;
|
||||
distanceTo(lngLat: LngLat): number;
|
||||
}
|
||||
|
||||
export class LngLatBounds {
|
||||
constructor(sw?: LngLatLike, ne?: LngLatLike);
|
||||
extend(obj: LngLatLike | LngLatBoundsLike): this;
|
||||
getCenter(): LngLat;
|
||||
getSouthWest(): LngLat;
|
||||
getNorthEast(): LngLat;
|
||||
getNorthWest(): LngLat;
|
||||
getSouthEast(): LngLat;
|
||||
getWest(): number;
|
||||
getSouth(): number;
|
||||
getEast(): number;
|
||||
getNorth(): number;
|
||||
toArray(): [[number, number], [number, number]];
|
||||
toString(): string;
|
||||
isEmpty(): boolean;
|
||||
contains(lngLat: LngLatLike): boolean;
|
||||
}
|
||||
|
||||
export class Point {
|
||||
constructor(x: number, y: number);
|
||||
x: number;
|
||||
y: number;
|
||||
}
|
||||
|
||||
export type LngLatLike = LngLat | [number, number] | { lng: number; lat: number } | { lon: number; lat: number };
|
||||
export type LngLatBoundsLike = LngLatBounds | [LngLatLike, LngLatLike] | [number, number, number, number];
|
||||
export type PointLike = Point | [number, number];
|
||||
|
||||
export interface MapOptions {
|
||||
container: HTMLElement | string;
|
||||
style: StyleSpecification | string;
|
||||
center?: LngLatLike;
|
||||
zoom?: number;
|
||||
bearing?: number;
|
||||
pitch?: number;
|
||||
bounds?: LngLatBoundsLike;
|
||||
fitBoundsOptions?: FitBoundsOptions;
|
||||
attributionControl?: boolean;
|
||||
customAttribution?: string | string[];
|
||||
interactive?: boolean;
|
||||
hash?: boolean | string;
|
||||
maxBounds?: LngLatBoundsLike;
|
||||
maxZoom?: number;
|
||||
minZoom?: number;
|
||||
maxPitch?: number;
|
||||
minPitch?: number;
|
||||
scrollZoom?: boolean;
|
||||
boxZoom?: boolean;
|
||||
dragRotate?: boolean;
|
||||
dragPan?: boolean;
|
||||
keyboard?: boolean;
|
||||
doubleClickZoom?: boolean;
|
||||
touchZoomRotate?: boolean;
|
||||
touchPitch?: boolean;
|
||||
cooperativeGestures?: boolean;
|
||||
trackResize?: boolean;
|
||||
locale?: Record<string, string>;
|
||||
fadeDuration?: number;
|
||||
crossSourceCollisions?: boolean;
|
||||
collectResourceTiming?: boolean;
|
||||
clickTolerance?: number;
|
||||
preserveDrawingBuffer?: boolean;
|
||||
antialias?: boolean;
|
||||
refreshExpiredTiles?: boolean;
|
||||
maxTileCacheSize?: number;
|
||||
transformRequest?: (url: string, resourceType: string) => unknown;
|
||||
localIdeographFontFamily?: string;
|
||||
pitchWithRotate?: boolean;
|
||||
pixelRatio?: number;
|
||||
validateStyle?: boolean;
|
||||
}
|
||||
|
||||
export interface StyleSpecification {
|
||||
version: number;
|
||||
name?: string;
|
||||
metadata?: unknown;
|
||||
center?: [number, number];
|
||||
zoom?: number;
|
||||
bearing?: number;
|
||||
pitch?: number;
|
||||
light?: unknown;
|
||||
sources: Record<string, unknown>;
|
||||
sprite?: string;
|
||||
glyphs?: string;
|
||||
layers: unknown[];
|
||||
terrain?: unknown;
|
||||
fog?: unknown;
|
||||
transition?: unknown;
|
||||
}
|
||||
|
||||
export interface PopupOptions {
|
||||
closeButton?: boolean;
|
||||
closeOnClick?: boolean;
|
||||
closeOnMove?: boolean;
|
||||
focusAfterOpen?: boolean;
|
||||
anchor?: string;
|
||||
offset?: number | PointLike | Record<string, PointLike>;
|
||||
className?: string;
|
||||
maxWidth?: string;
|
||||
}
|
||||
|
||||
export interface MarkerOptions {
|
||||
element?: HTMLElement;
|
||||
anchor?: string;
|
||||
offset?: PointLike;
|
||||
color?: string;
|
||||
scale?: number;
|
||||
draggable?: boolean;
|
||||
clickTolerance?: number;
|
||||
rotation?: number;
|
||||
rotationAlignment?: string;
|
||||
pitchAlignment?: string;
|
||||
}
|
||||
|
||||
export interface NavigationControlOptions {
|
||||
showCompass?: boolean;
|
||||
showZoom?: boolean;
|
||||
visualizePitch?: boolean;
|
||||
}
|
||||
|
||||
export interface ScaleControlOptions {
|
||||
maxWidth?: number;
|
||||
unit?: string;
|
||||
}
|
||||
|
||||
export interface FitBoundsOptions {
|
||||
padding?: number | { top?: number; bottom?: number; left?: number; right?: number };
|
||||
offset?: PointLike;
|
||||
maxZoom?: number;
|
||||
maxDuration?: number;
|
||||
linear?: boolean;
|
||||
easing?: (t: number) => number;
|
||||
essential?: boolean;
|
||||
}
|
||||
|
||||
export interface FlyToOptions {
|
||||
center?: LngLatLike;
|
||||
zoom?: number;
|
||||
bearing?: number;
|
||||
pitch?: number;
|
||||
duration?: number;
|
||||
easing?: (t: number) => number;
|
||||
offset?: PointLike;
|
||||
animate?: boolean;
|
||||
essential?: boolean;
|
||||
padding?: number | { top?: number; bottom?: number; left?: number; right?: number };
|
||||
}
|
||||
|
||||
export interface MapLayerMouseEvent {
|
||||
type: string;
|
||||
target: Map;
|
||||
originalEvent: MouseEvent;
|
||||
point: Point;
|
||||
lngLat: LngLat;
|
||||
preventDefault(): void;
|
||||
defaultPrevented: boolean;
|
||||
features?: MapGeoJSONFeature[];
|
||||
}
|
||||
}
|
||||
|
||||
declare module '@mui/material' {
|
||||
export const Box: React.FC<Record<string, unknown>>;
|
||||
export const Container: React.FC<Record<string, unknown>>;
|
||||
export const Typography: React.FC<Record<string, unknown>>;
|
||||
export const Paper: React.FC<Record<string, unknown>>;
|
||||
export const Grid: React.FC<Record<string, unknown>>;
|
||||
export const Card: React.FC<Record<string, unknown>>;
|
||||
export const CardContent: React.FC<Record<string, unknown>>;
|
||||
export const CardHeader: React.FC<Record<string, unknown>>;
|
||||
export const CardActions: React.FC<Record<string, unknown>>;
|
||||
export const Button: React.FC<Record<string, unknown>>;
|
||||
export const IconButton: React.FC<Record<string, unknown>>;
|
||||
export const TextField: React.FC<Record<string, unknown>>;
|
||||
export const Select: React.FC<Record<string, unknown>>;
|
||||
export const MenuItem: React.FC<Record<string, unknown>>;
|
||||
export const FormControl: React.FC<Record<string, unknown>>;
|
||||
export const FormLabel: React.FC<Record<string, unknown>>;
|
||||
export const FormHelperText: React.FC<Record<string, unknown>>;
|
||||
export const InputLabel: React.FC<Record<string, unknown>>;
|
||||
export const Input: React.FC<Record<string, unknown>>;
|
||||
export const Checkbox: React.FC<Record<string, unknown>>;
|
||||
export const Radio: React.FC<Record<string, unknown>>;
|
||||
export const RadioGroup: React.FC<Record<string, unknown>>;
|
||||
export const Switch: React.FC<Record<string, unknown>>;
|
||||
export const Slider: React.FC<Record<string, unknown>>;
|
||||
export const Tabs: React.FC<Record<string, unknown>>;
|
||||
export const Tab: React.FC<Record<string, unknown>>;
|
||||
export const TabPanel: React.FC<Record<string, unknown>>;
|
||||
export const AppBar: React.FC<Record<string, unknown>>;
|
||||
export const Toolbar: React.FC<Record<string, unknown>>;
|
||||
export const Drawer: React.FC<Record<string, unknown>>;
|
||||
export const Dialog: React.FC<Record<string, unknown>>;
|
||||
export const DialogTitle: React.FC<Record<string, unknown>>;
|
||||
export const DialogContent: React.FC<Record<string, unknown>>;
|
||||
export const DialogActions: React.FC<Record<string, unknown>>;
|
||||
export const Modal: React.FC<Record<string, unknown>>;
|
||||
export const Tooltip: React.FC<Record<string, unknown>>;
|
||||
export const Popover: React.FC<Record<string, unknown>>;
|
||||
export const Menu: React.FC<Record<string, unknown>>;
|
||||
export const List: React.FC<Record<string, unknown>>;
|
||||
export const ListItem: React.FC<Record<string, unknown>>;
|
||||
export const ListItemText: React.FC<Record<string, unknown>>;
|
||||
export const ListItemIcon: React.FC<Record<string, unknown>>;
|
||||
export const ListItemButton: React.FC<Record<string, unknown>>;
|
||||
export const Divider: React.FC<Record<string, unknown>>;
|
||||
export const Avatar: React.FC<Record<string, unknown>>;
|
||||
export const Badge: React.FC<Record<string, unknown>>;
|
||||
export const Chip: React.FC<Record<string, unknown>>;
|
||||
export const Alert: React.FC<Record<string, unknown>>;
|
||||
export const AlertTitle: React.FC<Record<string, unknown>>;
|
||||
export const Snackbar: React.FC<Record<string, unknown>>;
|
||||
export const CircularProgress: React.FC<Record<string, unknown>>;
|
||||
export const LinearProgress: React.FC<Record<string, unknown>>;
|
||||
export const Skeleton: React.FC<Record<string, unknown>>;
|
||||
export const Table: React.FC<Record<string, unknown>>;
|
||||
export const TableBody: React.FC<Record<string, unknown>>;
|
||||
export const TableCell: React.FC<Record<string, unknown>>;
|
||||
export const TableContainer: React.FC<Record<string, unknown>>;
|
||||
export const TableHead: React.FC<Record<string, unknown>>;
|
||||
export const TableRow: React.FC<Record<string, unknown>>;
|
||||
export const TablePagination: React.FC<Record<string, unknown>>;
|
||||
export const TableSortLabel: React.FC<Record<string, unknown>>;
|
||||
export const Accordion: React.FC<Record<string, unknown>>;
|
||||
export const AccordionSummary: React.FC<Record<string, unknown>>;
|
||||
export const AccordionDetails: React.FC<Record<string, unknown>>;
|
||||
export const Breadcrumbs: React.FC<Record<string, unknown>>;
|
||||
export const Link: React.FC<Record<string, unknown>>;
|
||||
export const Pagination: React.FC<Record<string, unknown>>;
|
||||
export const Rating: React.FC<Record<string, unknown>>;
|
||||
export const Stepper: React.FC<Record<string, unknown>>;
|
||||
export const Step: React.FC<Record<string, unknown>>;
|
||||
export const StepLabel: React.FC<Record<string, unknown>>;
|
||||
export const StepContent: React.FC<Record<string, unknown>>;
|
||||
export const SpeedDial: React.FC<Record<string, unknown>>;
|
||||
export const SpeedDialAction: React.FC<Record<string, unknown>>;
|
||||
export const SpeedDialIcon: React.FC<Record<string, unknown>>;
|
||||
export const ToggleButton: React.FC<Record<string, unknown>>;
|
||||
export const ToggleButtonGroup: React.FC<Record<string, unknown>>;
|
||||
export const Fab: React.FC<Record<string, unknown>>;
|
||||
export const FormGroup: React.FC<Record<string, unknown>>;
|
||||
export const FormControlLabel: React.FC<Record<string, unknown>>;
|
||||
export const InputAdornment: React.FC<Record<string, unknown>>;
|
||||
export const Autocomplete: React.FC<Record<string, unknown>>;
|
||||
export const Stack: React.FC<Record<string, unknown>>;
|
||||
export const Collapse: React.FC<Record<string, unknown>>;
|
||||
export const Fade: React.FC<Record<string, unknown>>;
|
||||
export const Grow: React.FC<Record<string, unknown>>;
|
||||
export const Slide: React.FC<Record<string, unknown>>;
|
||||
export const Zoom: React.FC<Record<string, unknown>>;
|
||||
export const useTheme: () => unknown;
|
||||
export const useMediaQuery: (query: string) => boolean;
|
||||
export const createTheme: (options: unknown) => unknown;
|
||||
export const ThemeProvider: React.FC<Record<string, unknown>>;
|
||||
export const CssBaseline: React.FC;
|
||||
export const GlobalStyles: React.FC<Record<string, unknown>>;
|
||||
export const styled: (component: unknown, options?: unknown) => unknown;
|
||||
}
|
||||
|
|
@ -4,12 +4,30 @@ import path from 'path'
|
|||
|
||||
// https://vite.dev/config/
|
||||
export default defineConfig({
|
||||
logLevel: 'info',
|
||||
plugins: [react()],
|
||||
resolve: {
|
||||
alias: {
|
||||
'@': path.resolve(__dirname, './src'),
|
||||
},
|
||||
},
|
||||
build: {
|
||||
// Increase chunk size warning limit (mermaid is large)
|
||||
chunkSizeWarningLimit: 2000,
|
||||
rollupOptions: {
|
||||
output: {
|
||||
// Manual chunks to separate large dependencies
|
||||
manualChunks: {
|
||||
maplibre: ['maplibre-gl'],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
optimizeDeps: {
|
||||
include: ['maplibre-gl'],
|
||||
// Exclude mermaid from pre-bundling - it's dynamically imported
|
||||
exclude: ['mermaid'],
|
||||
},
|
||||
server: {
|
||||
port: 5173,
|
||||
proxy: {
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ imports:
|
|||
- modules/enums/ReconstructionActivityTypeEnum
|
||||
- modules/enums/SourceDocumentTypeEnum
|
||||
# StaffRoleTypeEnum REMOVED - replaced by StaffRole class hierarchy
|
||||
# See: .opencode/ENUM_TO_CLASS_PRINCIPLE.md for rationale
|
||||
# See: rules/ENUM_TO_CLASS_PRINCIPLE.md for rationale
|
||||
- modules/enums/CallForApplicationStatusEnum
|
||||
- modules/enums/FundingRequirementTypeEnum
|
||||
|
||||
|
|
@ -242,7 +242,7 @@ imports:
|
|||
- modules/classes/PersonObservation
|
||||
|
||||
# Staff role class hierarchy (replaces StaffRoleTypeEnum - Single Source of Truth)
|
||||
# See: .opencode/ENUM_TO_CLASS_PRINCIPLE.md
|
||||
# See: rules/ENUM_TO_CLASS_PRINCIPLE.md
|
||||
- modules/classes/StaffRole
|
||||
- modules/classes/StaffRoles
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ id: https://nde.nl/ontology/hc/class/Conservatoria
|
|||
name: Conservatoria
|
||||
title: Conservatória Type (Lusophone)
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ./ArchiveOrganizationType
|
||||
|
|
@ -16,7 +19,8 @@ classes:
|
|||
|
||||
**Wikidata**: Q9854379
|
||||
|
||||
**Geographic Restriction**: Portugal, Brazil, and other Lusophone countries
|
||||
**Geographic Restriction**: Lusophone countries (PT, BR, AO, MZ, CV, GW, ST, TL)
|
||||
This constraint is enforced via LinkML `rules` with `postconditions`.
|
||||
|
||||
**CUSTODIAN-ONLY**: This type does NOT have a corresponding rico:RecordSetType
|
||||
class. Conservatórias are administrative offices with registration functions,
|
||||
|
|
@ -59,6 +63,7 @@ classes:
|
|||
|
||||
**Multilingual Labels**:
|
||||
- pt: Conservatória
|
||||
- pt-BR: Cartório de Registro
|
||||
|
||||
slot_usage:
|
||||
primary_type:
|
||||
|
|
@ -70,10 +75,49 @@ classes:
|
|||
|
||||
wikidata_entity:
|
||||
description: |
|
||||
Should be Q9854379 for Conservatórias.
|
||||
MUST be Q9854379 for Conservatórias.
|
||||
Lusophone civil/property registration offices.
|
||||
pattern: "^Q[0-9]+$"
|
||||
equals_string: "Q9854379"
|
||||
|
||||
applicable_countries:
|
||||
description: |
|
||||
**Geographic Restriction**: Lusophone countries only.
|
||||
|
||||
Conservatórias exist in Portuguese-speaking countries:
|
||||
- PT (Portugal) - Conservatórias do Registo
|
||||
- BR (Brazil) - Cartórios de Registro
|
||||
- AO (Angola) - Conservatórias
|
||||
- MZ (Mozambique) - Conservatórias
|
||||
- CV (Cape Verde) - Conservatórias
|
||||
- GW (Guinea-Bissau) - Conservatórias
|
||||
- ST (São Tomé and Príncipe) - Conservatórias
|
||||
- TL (Timor-Leste) - Conservatórias (Portuguese legal heritage)
|
||||
|
||||
The `rules` section below enforces this constraint during validation.
|
||||
multivalued: true
|
||||
required: true
|
||||
minimum_cardinality: 1
|
||||
|
||||
# LinkML rules for geographic constraint validation
|
||||
rules:
|
||||
- description: >-
|
||||
Conservatoria MUST have applicable_countries containing at least one
|
||||
Lusophone country (PT, BR, AO, MZ, CV, GW, ST, TL).
|
||||
This is a mandatory geographic restriction for Portuguese-speaking
|
||||
civil registry and notarial archive offices.
|
||||
postconditions:
|
||||
slot_conditions:
|
||||
applicable_countries:
|
||||
any_of:
|
||||
- equals_string: "PT"
|
||||
- equals_string: "BR"
|
||||
- equals_string: "AO"
|
||||
- equals_string: "MZ"
|
||||
- equals_string: "CV"
|
||||
- equals_string: "GW"
|
||||
- equals_string: "ST"
|
||||
- equals_string: "TL"
|
||||
|
||||
exact_mappings:
|
||||
- skos:Concept
|
||||
|
|
@ -82,8 +126,10 @@ classes:
|
|||
- rico:CorporateBody
|
||||
|
||||
comments:
|
||||
- "Conservatória (pt)"
|
||||
- "Cartório de Registro (pt-BR)"
|
||||
- "CUSTODIAN-ONLY type: No corresponding rico:RecordSetType class"
|
||||
- "Geographic restriction: Lusophone countries (Portugal, Brazil, etc.)"
|
||||
- "Geographic restriction enforced via LinkML rules: Lusophone countries only"
|
||||
- "Government registration office, not traditional archive"
|
||||
- "Essential for genealogical and legal research"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,21 +2,27 @@ id: https://nde.nl/ontology/hc/class/CountyRecordOffice
|
|||
name: CountyRecordOffice
|
||||
title: County Record Office Type
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
org: http://www.w3.org/ns/org#
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ./ArchiveOrganizationType
|
||||
- ./OrganizationBranch
|
||||
|
||||
classes:
|
||||
CountyRecordOffice:
|
||||
is_a: ArchiveOrganizationType
|
||||
class_uri: skos:Concept
|
||||
description: |
|
||||
Local authority repository in the United Kingdom and similar jurisdictions,
|
||||
preserving historical records of the county and its communities.
|
||||
Local authority repository in the United Kingdom, preserving historical
|
||||
records of the county and its communities.
|
||||
|
||||
**Wikidata**: Q5177943
|
||||
|
||||
**Geographic Context**: Primarily United Kingdom
|
||||
**Geographic Restriction**: United Kingdom (GB) only.
|
||||
This constraint is enforced via LinkML `rules` with `postconditions`.
|
||||
|
||||
**CUSTODIAN-ONLY**: This type does NOT have a corresponding rico:RecordSetType
|
||||
class. County Record Offices are institutional types, not collection
|
||||
|
|
@ -40,16 +46,25 @@ classes:
|
|||
- Often designated as place of deposit for public records
|
||||
- Increasingly rebranded as "Archives and Local Studies"
|
||||
|
||||
In Scotland:
|
||||
- Similar functions performed by local authority archives
|
||||
- National Records of Scotland at national level
|
||||
|
||||
In Northern Ireland:
|
||||
- Public Record Office of Northern Ireland (PRONI)
|
||||
- Local council archives
|
||||
|
||||
**Related Types**:
|
||||
- LocalGovernmentArchive (Q118281267) - Local authority records
|
||||
- MunicipalArchive (Q604177) - City/town archives
|
||||
- LocalHistoryArchive (Q12324798) - Local history focus
|
||||
|
||||
**Notable Examples**:
|
||||
- The National Archives (Kew) - National level
|
||||
- London Metropolitan Archives
|
||||
- Oxfordshire History Centre
|
||||
- Lancashire Archives
|
||||
- West Yorkshire Archive Service
|
||||
- Surrey History Centre
|
||||
|
||||
**Ontological Alignment**:
|
||||
- **SKOS**: skos:Concept with skos:broader Q166118 (archive)
|
||||
|
|
@ -57,6 +72,8 @@ classes:
|
|||
- **RiC-O**: rico:CorporateBody (as agent)
|
||||
|
||||
**Multilingual Labels**:
|
||||
- en: County Record Office
|
||||
- en-GB: County Record Office
|
||||
- it: archivio pubblico territoriale
|
||||
|
||||
slot_usage:
|
||||
|
|
@ -67,7 +84,7 @@ classes:
|
|||
|
||||
wikidata_entity:
|
||||
description: |
|
||||
Should be Q5177943 for county record offices.
|
||||
MUST be Q5177943 for county record offices.
|
||||
UK local authority archive type.
|
||||
pattern: "^Q[0-9]+$"
|
||||
equals_string: "Q5177943"
|
||||
|
|
@ -76,6 +93,66 @@ classes:
|
|||
description: |
|
||||
Typically 'county' or 'local' for this archive type.
|
||||
Corresponds to UK county administrative level.
|
||||
|
||||
is_branch_of_authority:
|
||||
description: |
|
||||
**Organizational Relationship**: County Record Offices may be branches
|
||||
of larger local authority structures.
|
||||
|
||||
**Common Parent Organizations**:
|
||||
- County Councils (e.g., Oxfordshire County Council)
|
||||
- Unitary Authorities (e.g., Bristol City Council)
|
||||
- Combined Authorities (e.g., Greater Manchester)
|
||||
- Joint Archive Services (e.g., East Sussex / Brighton & Hove)
|
||||
|
||||
**Legal Context**:
|
||||
County Record Offices are typically:
|
||||
- Designated "place of deposit" under Public Records Act 1958
|
||||
- Part of local authority heritage/cultural services
|
||||
- May share governance with local studies libraries
|
||||
|
||||
**Use org:unitOf pattern** from OrganizationBranch to link to parent
|
||||
authority when modeled as formal organizational unit.
|
||||
|
||||
**Examples**:
|
||||
- Oxfordshire History Centre → part of Oxfordshire County Council
|
||||
- London Metropolitan Archives → part of City of London Corporation
|
||||
- West Yorkshire Archive Service → joint service of five councils
|
||||
range: uriorcurie
|
||||
multivalued: false
|
||||
required: false
|
||||
examples:
|
||||
- value: "https://nde.nl/ontology/hc/uk/oxfordshire-county-council"
|
||||
description: "Parent local authority"
|
||||
|
||||
applicable_countries:
|
||||
description: |
|
||||
**Geographic Restriction**: United Kingdom (GB) only.
|
||||
|
||||
County Record Offices are a UK-specific institution type within
|
||||
the local authority structure of England, Wales, Scotland, and
|
||||
Northern Ireland.
|
||||
|
||||
Note: Uses ISO 3166-1 alpha-2 code "GB" for United Kingdom
|
||||
(not "UK" which is not a valid ISO code).
|
||||
|
||||
The `rules` section below enforces this constraint during validation.
|
||||
ifabsent: "string(GB)"
|
||||
required: true
|
||||
minimum_cardinality: 1
|
||||
maximum_cardinality: 1
|
||||
|
||||
# LinkML rules for geographic constraint validation
|
||||
rules:
|
||||
- description: >-
|
||||
CountyRecordOffice MUST have applicable_countries containing "GB"
|
||||
(United Kingdom). This is a mandatory geographic restriction for
|
||||
UK county record offices and local authority archives.
|
||||
postconditions:
|
||||
slot_conditions:
|
||||
applicable_countries:
|
||||
any_of:
|
||||
- equals_string: "GB"
|
||||
|
||||
exact_mappings:
|
||||
- skos:Concept
|
||||
|
|
@ -84,7 +161,9 @@ classes:
|
|||
- rico:CorporateBody
|
||||
|
||||
comments:
|
||||
- "County Record Office (en-GB)"
|
||||
- "CUSTODIAN-ONLY type: No corresponding rico:RecordSetType class"
|
||||
- "Geographic restriction enforced via LinkML rules: United Kingdom (GB) only"
|
||||
- "UK local authority archive institution type"
|
||||
- "Often designated place of deposit for public records"
|
||||
- "Key resource for local and family history research"
|
||||
|
|
@ -93,3 +172,12 @@ classes:
|
|||
- LocalGovernmentArchive
|
||||
- MunicipalArchive
|
||||
- LocalHistoryArchive
|
||||
- OrganizationBranch
|
||||
|
||||
slots:
|
||||
is_branch_of_authority:
|
||||
slot_uri: org:unitOf
|
||||
description: |
|
||||
Parent local authority or governing body for this County Record Office.
|
||||
Uses W3C Org ontology org:unitOf relationship.
|
||||
range: uriorcurie
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ imports:
|
|||
- linkml:types
|
||||
- ./ArchiveOrganizationType
|
||||
- ./CustodianAdministration
|
||||
- ./CustodianArchive
|
||||
|
||||
classes:
|
||||
CurrentArchive:
|
||||
|
|
@ -63,6 +64,24 @@ classes:
|
|||
- HistoricalArchive (Q3621673) - non-current permanent records
|
||||
- RecordsCenter - semi-current storage facility
|
||||
|
||||
**RELATIONSHIP TO CustodianArchive**:
|
||||
|
||||
CurrentArchive (this class) is a TYPE classification (skos:Concept) for
|
||||
archives managing records in the active/current phase of the lifecycle.
|
||||
|
||||
CustodianArchive is an INSTANCE class (rico:RecordSet) representing the
|
||||
actual operational archives of a heritage custodian awaiting processing.
|
||||
|
||||
**Semantic Relationship**:
|
||||
- CurrentArchive is a HYPERNYM (broader type) for the concept of active records
|
||||
- CustodianArchive records MAY be typed as CurrentArchive when in active use
|
||||
- When CustodianArchive.processing_status = "UNPROCESSED", records may still
|
||||
be in the current/active phase conceptually
|
||||
|
||||
**SKOS Alignment**:
|
||||
- skos:broader: CurrentArchive → DepositArchive (lifecycle progression)
|
||||
- skos:narrower: CurrentArchive ← specific current archive types
|
||||
|
||||
**ONTOLOGICAL ALIGNMENT**:
|
||||
- **SKOS**: skos:Concept (type classification)
|
||||
- **RiC-O**: rico:RecordSet for active record groups
|
||||
|
|
@ -74,6 +93,7 @@ classes:
|
|||
- retention_schedule
|
||||
- creating_organization
|
||||
- transfer_policy
|
||||
- has_narrower_instance
|
||||
|
||||
slot_usage:
|
||||
wikidata_entity:
|
||||
|
|
@ -101,6 +121,25 @@ classes:
|
|||
Policy for transferring records to intermediate or permanent archives.
|
||||
Describes triggers, timelines, and procedures for transfer.
|
||||
range: string
|
||||
|
||||
has_narrower_instance:
|
||||
slot_uri: skos:narrowerTransitive
|
||||
description: |
|
||||
Links this archive TYPE to specific CustodianArchive INSTANCES
|
||||
that are classified under this lifecycle phase.
|
||||
|
||||
**SKOS**: skos:narrowerTransitive for type-instance relationship.
|
||||
|
||||
**Usage**:
|
||||
When a CustodianArchive contains records in the "current/active" phase,
|
||||
it can be linked from CurrentArchive via this property.
|
||||
|
||||
**Example**:
|
||||
- CurrentArchive (type) → has_narrower_instance →
|
||||
CustodianArchive "Director's Active Files 2020-2024" (instance)
|
||||
range: CustodianArchive
|
||||
multivalued: true
|
||||
required: false
|
||||
|
||||
exact_mappings:
|
||||
- wikidata:Q3621648
|
||||
|
|
@ -145,3 +184,11 @@ slots:
|
|||
transfer_policy:
|
||||
description: Policy for transferring to permanent archive
|
||||
range: string
|
||||
|
||||
has_narrower_instance:
|
||||
slot_uri: skos:narrowerTransitive
|
||||
description: |
|
||||
Links archive TYPE to specific CustodianArchive INSTANCES.
|
||||
SKOS narrowerTransitive for type-to-instance relationship.
|
||||
range: CustodianArchive
|
||||
multivalued: true
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ imports:
|
|||
- ../slots/access_restrictions
|
||||
- ../slots/storage_location
|
||||
- ./ReconstructedEntity
|
||||
- ./CurrentArchive
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
|
|
@ -31,6 +32,8 @@ prefixes:
|
|||
time: http://www.w3.org/2006/time#
|
||||
org: http://www.w3.org/ns/org#
|
||||
premis: http://www.loc.gov/premis/rdf/v3/
|
||||
skos: http://www.w3.org/2004/02/skos/core#
|
||||
wikidata: http://www.wikidata.org/entity/
|
||||
|
||||
classes:
|
||||
CustodianArchive:
|
||||
|
|
@ -122,6 +125,18 @@ classes:
|
|||
- **Storage**: Physical location of unprocessed archives
|
||||
- **OrganizationalStructure**: Unit responsible for processing
|
||||
|
||||
**RELATIONSHIP TO LIFECYCLE TYPE CLASSES**:
|
||||
|
||||
CustodianArchive (this class) is an INSTANCE class representing actual
|
||||
operational archives. It can be TYPED using lifecycle phase classifications:
|
||||
|
||||
- **CurrentArchive** (Q3621648): Active records in daily use
|
||||
- skos:broaderTransitive links CustodianArchive → CurrentArchive type
|
||||
- **DepositArchive** (Q244904): Intermediate/semi-current records
|
||||
- **HistoricalArchive** (Q3621673): Permanent archival records
|
||||
|
||||
Use `lifecycle_phase_type` slot to classify by lifecycle position.
|
||||
|
||||
exact_mappings:
|
||||
- rico:RecordSet
|
||||
|
||||
|
|
@ -162,6 +177,7 @@ classes:
|
|||
- was_generated_by
|
||||
- valid_from
|
||||
- valid_to
|
||||
- lifecycle_phase_type
|
||||
|
||||
slot_usage:
|
||||
id:
|
||||
|
|
@ -591,6 +607,33 @@ classes:
|
|||
required: false
|
||||
description: |
|
||||
End of validity period (typically = transfer_to_collection_date).
|
||||
|
||||
lifecycle_phase_type:
|
||||
slot_uri: skos:broaderTransitive
|
||||
range: uriorcurie
|
||||
required: false
|
||||
description: |
|
||||
Links this CustodianArchive INSTANCE to its lifecycle phase TYPE.
|
||||
|
||||
**SKOS**: skos:broaderTransitive for instance-to-type relationship.
|
||||
|
||||
**Archive Lifecycle Types (Wikidata)**:
|
||||
- Q3621648 (CurrentArchive) - Active records phase
|
||||
- Q244904 (DepositArchive) - Intermediate/semi-current phase
|
||||
- Q3621673 (HistoricalArchive) - Archival/permanent phase
|
||||
|
||||
**Usage**:
|
||||
Classify this operational archive by its position in the records lifecycle.
|
||||
Most CustodianArchive records are in the intermediate phase (awaiting processing).
|
||||
|
||||
**Example**:
|
||||
- CustodianArchive "Ministry Records 2010-2020" → lifecycle_phase_type →
|
||||
DepositArchive (Q244904) - semi-current, awaiting processing
|
||||
examples:
|
||||
- value: "wikidata:Q244904"
|
||||
description: "Deposit archive / semi-current records"
|
||||
- value: "wikidata:Q3621648"
|
||||
description: "Current archive / active records"
|
||||
|
||||
comments:
|
||||
- "Represents operational archives BEFORE integration into CustodianCollection"
|
||||
|
|
@ -719,3 +762,12 @@ slots:
|
|||
arrangement_notes:
|
||||
description: Notes from arrangement process
|
||||
range: string
|
||||
|
||||
lifecycle_phase_type:
|
||||
slot_uri: skos:broaderTransitive
|
||||
description: |
|
||||
Links CustodianArchive INSTANCE to lifecycle phase TYPE.
|
||||
SKOS broaderTransitive for instance-to-type relationship.
|
||||
Values: CurrentArchive (Q3621648), DepositArchive (Q244904),
|
||||
HistoricalArchive (Q3621673).
|
||||
range: uriorcurie
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ classes:
|
|||
- Portuguese: Fundação, Associação, Ltda., S.A.
|
||||
- Italian: Fondazione, Associazione, S.p.A., S.r.l.
|
||||
|
||||
See: .opencode/LEGAL_FORM_FILTERING_RULE.md for comprehensive global list
|
||||
See: rules/LEGAL_FORM_FILTERING_RULE.md for comprehensive global list
|
||||
|
||||
===========================================================================
|
||||
MANDATORY RULE: Special Characters MUST Be Excluded from Abbreviations
|
||||
|
|
@ -112,7 +112,7 @@ classes:
|
|||
- "Heritage@Digital" → "HD" (not "H@D")
|
||||
- "Archives (Historical)" → "AH" (not "A(H)")
|
||||
|
||||
See: .opencode/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
See: rules/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
|
||||
===========================================================================
|
||||
MANDATORY RULE: Diacritics MUST Be Normalized to ASCII in Abbreviations
|
||||
|
|
@ -152,7 +152,7 @@ classes:
|
|||
ascii_text = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
```
|
||||
|
||||
See: .opencode/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
See: rules/ABBREVIATION_SPECIAL_CHAR_RULE.md for complete documentation
|
||||
|
||||
Can be generated by:
|
||||
1. ReconstructionActivity (formal entity resolution) - was_generated_by link
|
||||
|
|
|
|||
|
|
@ -470,7 +470,7 @@ classes:
|
|||
- "Follows 4-stage GLAM-NER pipeline: recognition → layout → resolution → linking"
|
||||
|
||||
see_also:
|
||||
- ".opencode/WEB_OBSERVATION_PROVENANCE_RULES.md"
|
||||
- "rules/WEB_OBSERVATION_PROVENANCE_RULES.md"
|
||||
- "scripts/fetch_website_playwright.py"
|
||||
- "scripts/add_xpath_provenance.py"
|
||||
- "docs/convention/schema/20251202/entity_annotation_rules_v1.6.0_unified.yaml"
|
||||
|
|
|
|||
303
schemas/20251121/linkml/rules/ABBREVIATION_RULES.md
Normal file
303
schemas/20251121/linkml/rules/ABBREVIATION_RULES.md
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
# Abbreviation Character Filtering Rules
|
||||
|
||||
**Rule ID**: ABBREV-CHAR-FILTER
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: GHCID abbreviation component generation
|
||||
**Created**: 2025-12-07
|
||||
**Updated**: 2025-12-08 (added diacritics rule)
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
**When generating abbreviations for GHCID, ONLY ASCII uppercase letters (A-Z) are permitted. Both special characters AND diacritics MUST be removed/normalized.**
|
||||
|
||||
This is a **MANDATORY** rule. Abbreviations containing special characters or diacritics are INVALID and must be regenerated.
|
||||
|
||||
### Two Mandatory Sub-Rules:
|
||||
|
||||
1. **ABBREV-SPECIAL-CHAR**: Remove all special characters and symbols
|
||||
2. **ABBREV-DIACRITICS**: Normalize all diacritics to ASCII equivalents
|
||||
|
||||
---
|
||||
|
||||
## Rule 1: Diacritics MUST Be Normalized to ASCII (ABBREV-DIACRITICS)
|
||||
|
||||
**Diacritics (accented characters) MUST be normalized to their ASCII base letter equivalents.**
|
||||
|
||||
### Example (Real Case)
|
||||
|
||||
```
|
||||
❌ WRONG: CZ-VY-TEL-L-VHSPAOČRZS (contains Č)
|
||||
✅ CORRECT: CZ-VY-TEL-L-VHSPAOCRZS (ASCII only)
|
||||
```
|
||||
|
||||
### Diacritics Normalization Table
|
||||
|
||||
| Diacritic | ASCII | Example |
|
||||
|-----------|-------|---------|
|
||||
| Á, À, Â, Ã, Ä, Å, Ā | A | "Ålborg" → A |
|
||||
| Č, Ć, Ç | C | "Český" → C |
|
||||
| Ď | D | "Ďáblice" → D |
|
||||
| É, È, Ê, Ë, Ě, Ē | E | "Éire" → E |
|
||||
| Í, Ì, Î, Ï, Ī | I | "Ísland" → I |
|
||||
| Ñ, Ń, Ň | N | "España" → N |
|
||||
| Ó, Ò, Ô, Õ, Ö, Ø, Ō | O | "Österreich" → O |
|
||||
| Ř | R | "Říčany" → R |
|
||||
| Š, Ś, Ş | S | "Šumperk" → S |
|
||||
| Ť | T | "Ťažký" → T |
|
||||
| Ú, Ù, Û, Ü, Ů, Ū | U | "Ústí" → U |
|
||||
| Ý, Ÿ | Y | "Ýmir" → Y |
|
||||
| Ž, Ź, Ż | Z | "Žilina" → Z |
|
||||
| Ł | L | "Łódź" → L |
|
||||
| Æ | AE | "Ærø" → AE |
|
||||
| Œ | OE | "Œuvre" → OE |
|
||||
| ß | SS | "Straße" → SS |
|
||||
|
||||
### Implementation
|
||||
|
||||
```python
|
||||
import unicodedata
|
||||
|
||||
def normalize_diacritics(text: str) -> str:
|
||||
"""
|
||||
Normalize diacritics to ASCII equivalents.
|
||||
|
||||
Examples:
|
||||
"Č" → "C"
|
||||
"Ř" → "R"
|
||||
"Ö" → "O"
|
||||
"ñ" → "n"
|
||||
"""
|
||||
# NFD decomposition separates base characters from combining marks
|
||||
normalized = unicodedata.normalize('NFD', text)
|
||||
# Remove combining marks (category 'Mn' = Mark, Nonspacing)
|
||||
ascii_text = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
return ascii_text
|
||||
|
||||
# Example
|
||||
normalize_diacritics("VHSPAOČRZS") # Returns "VHSPAOCRZS"
|
||||
```
|
||||
|
||||
### Languages Commonly Affected
|
||||
|
||||
| Language | Common Diacritics | Example Institution |
|
||||
|----------|-------------------|---------------------|
|
||||
| **Czech** | Č, Ř, Š, Ž, Ě, Ů | Vlastivědné muzeum → VM (not VM with háček) |
|
||||
| **Polish** | Ł, Ń, Ó, Ś, Ź, Ż, Ą, Ę | Biblioteka Łódzka → BL |
|
||||
| **German** | Ä, Ö, Ü, ß | Österreichische Nationalbibliothek → ON |
|
||||
| **French** | É, È, Ê, Ç, Ô | Bibliothèque nationale → BN |
|
||||
| **Spanish** | Ñ, Á, É, Í, Ó, Ú | Museo Nacional → MN |
|
||||
| **Portuguese** | Ã, Õ, Ç, Á, É | Biblioteca Nacional → BN |
|
||||
| **Nordic** | Å, Ä, Ö, Ø, Æ | Nationalmuseet → N |
|
||||
| **Turkish** | Ç, Ğ, İ, Ö, Ş, Ü | İstanbul Üniversitesi → IU |
|
||||
| **Hungarian** | Á, É, Í, Ó, Ö, Ő, Ú, Ü, Ű | Országos Levéltár → OL |
|
||||
| **Romanian** | Ă, Â, Î, Ș, Ț | Biblioteca Națională → BN |
|
||||
|
||||
---
|
||||
|
||||
## Rule 2: Special Characters MUST Be Removed (ABBREV-SPECIAL-CHAR)
|
||||
|
||||
---
|
||||
|
||||
## Rationale
|
||||
|
||||
### 1. URL/URI Safety
|
||||
Special characters require percent-encoding in URIs. For example:
|
||||
- `&` becomes `%26`
|
||||
- `+` becomes `%2B`
|
||||
|
||||
This makes identifiers harder to share, copy, and verify.
|
||||
|
||||
### 2. Filename Safety
|
||||
Many special characters are invalid in filenames across operating systems:
|
||||
- Windows: `\ / : * ? " < > |`
|
||||
- macOS/Linux: `/` and null bytes
|
||||
|
||||
Files like `SX-XX-PHI-O-DR&IMSM.yaml` may cause issues on some systems.
|
||||
|
||||
### 3. Parsing Consistency
|
||||
Special characters can conflict with delimiters in data pipelines:
|
||||
- `&` is used in query strings
|
||||
- `:` is used in YAML, JSON
|
||||
- `/` is a path separator
|
||||
- `|` is a common CSV delimiter alternative
|
||||
|
||||
### 4. Cross-System Compatibility
|
||||
Identifiers should work across all systems:
|
||||
- Databases (SQL, TypeDB, Neo4j)
|
||||
- RDF/SPARQL endpoints
|
||||
- REST APIs
|
||||
- Command-line tools
|
||||
- Spreadsheets
|
||||
|
||||
### 5. Human Readability
|
||||
Clean identifiers are easier to:
|
||||
- Communicate verbally
|
||||
- Type correctly
|
||||
- Proofread
|
||||
- Remember
|
||||
|
||||
---
|
||||
|
||||
## Characters to Remove
|
||||
|
||||
The following characters MUST be completely removed (not replaced) when generating abbreviations:
|
||||
|
||||
| Character | Name | Example Issue |
|
||||
|-----------|------|---------------|
|
||||
| `&` | Ampersand | "R&A" in URLs, HTML entities |
|
||||
| `/` | Slash | Path separator confusion |
|
||||
| `\` | Backslash | Escape sequence issues |
|
||||
| `+` | Plus | URL encoding (`+` = space) |
|
||||
| `@` | At sign | Email/handle confusion |
|
||||
| `#` | Hash/Pound | Fragment identifier in URLs |
|
||||
| `%` | Percent | URL encoding prefix |
|
||||
| `$` | Dollar | Variable prefix in shells |
|
||||
| `*` | Asterisk | Glob/wildcard character |
|
||||
| `(` `)` | Parentheses | Grouping in regex, code |
|
||||
| `[` `]` | Square brackets | Array notation |
|
||||
| `{` `}` | Curly braces | Object notation |
|
||||
| `\|` | Pipe | Command chaining, OR operator |
|
||||
| `:` | Colon | YAML key-value, namespace separator |
|
||||
| `;` | Semicolon | Statement terminator |
|
||||
| `"` `'` `` ` `` | Quotes | String delimiters |
|
||||
| `,` | Comma | List separator |
|
||||
| `.` | Period | File extension, namespace |
|
||||
| `-` | Hyphen | Already used as GHCID component separator |
|
||||
| `_` | Underscore | Reserved for name suffix in collisions |
|
||||
| `=` | Equals | Assignment operator |
|
||||
| `?` | Question mark | Query string indicator |
|
||||
| `!` | Exclamation | Negation, shell history |
|
||||
| `~` | Tilde | Home directory, bitwise NOT |
|
||||
| `^` | Caret | Regex anchor, power operator |
|
||||
| `<` `>` | Angle brackets | HTML tags, redirects |
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
### Algorithm
|
||||
|
||||
When extracting abbreviation from institution name:
|
||||
|
||||
```python
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
def extract_abbreviation_from_name(name: str, skip_words: set) -> str:
|
||||
"""
|
||||
Extract abbreviation from institution name.
|
||||
|
||||
Args:
|
||||
name: Full institution name (emic)
|
||||
skip_words: Set of prepositions/articles to skip
|
||||
|
||||
Returns:
|
||||
Uppercase abbreviation with only A-Z characters
|
||||
"""
|
||||
# Step 1: Normalize unicode (remove diacritics)
|
||||
normalized = unicodedata.normalize('NFD', name)
|
||||
ascii_name = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
|
||||
# Step 2: Replace special characters with spaces (to split words)
|
||||
# This handles cases like "Records&Information" -> "Records Information"
|
||||
clean_name = re.sub(r'[^a-zA-Z\s]', ' ', ascii_name)
|
||||
|
||||
# Step 3: Split into words
|
||||
words = clean_name.split()
|
||||
|
||||
# Step 4: Filter out skip words (prepositions, articles)
|
||||
significant_words = [w for w in words if w.lower() not in skip_words]
|
||||
|
||||
# Step 5: Take first letter of each significant word
|
||||
abbreviation = ''.join(w[0].upper() for w in significant_words if w)
|
||||
|
||||
# Step 6: Limit to 10 characters
|
||||
return abbreviation[:10]
|
||||
```
|
||||
|
||||
### Handling Special Cases
|
||||
|
||||
**Case 1: "Records & Information Management"**
|
||||
1. Input: `"Records & Information Management"`
|
||||
2. After special char removal: `"Records Information Management"`
|
||||
3. After split: `["Records", "Information", "Management"]`
|
||||
4. Abbreviation: `RIM`
|
||||
|
||||
**Case 2: "Art/Design Museum"**
|
||||
1. Input: `"Art/Design Museum"`
|
||||
2. After special char removal: `"Art Design Museum"`
|
||||
3. After split: `["Art", "Design", "Museum"]`
|
||||
4. Abbreviation: `ADM`
|
||||
|
||||
**Case 3: "Culture+"**
|
||||
1. Input: `"Culture+"`
|
||||
2. After special char removal: `"Culture"`
|
||||
3. After split: `["Culture"]`
|
||||
4. Abbreviation: `C`
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
| Institution Name | Correct | Incorrect |
|
||||
|------------------|---------|-----------|
|
||||
| Department of Records & Information Management | DRIM | DR&IM |
|
||||
| Art + Culture Center | ACC | A+CC |
|
||||
| Museum/Gallery Amsterdam | MGA | M/GA |
|
||||
| Heritage@Digital | HD | H@D |
|
||||
| Archives (Historical) | AH | A(H) |
|
||||
| Research & Development Institute | RDI | R&DI |
|
||||
| Sint Maarten Records & Information | SMRI | SMR&I |
|
||||
|
||||
---
|
||||
|
||||
## Validation
|
||||
|
||||
### Check for Invalid Abbreviations
|
||||
|
||||
```bash
|
||||
# Find GHCID files with special characters in abbreviation
|
||||
find data/custodian -name "*.yaml" | xargs grep -l '[&+@#%$*|:;?!=~^<>]' | head -20
|
||||
|
||||
# Specifically check for & in filenames
|
||||
find data/custodian -name "*&*.yaml"
|
||||
```
|
||||
|
||||
### Programmatic Validation
|
||||
|
||||
```python
|
||||
import re
|
||||
|
||||
def validate_abbreviation(abbrev: str) -> bool:
|
||||
"""
|
||||
Validate that abbreviation contains only A-Z.
|
||||
|
||||
Returns True if valid, False if contains special characters.
|
||||
"""
|
||||
return bool(re.match(r'^[A-Z]+$', abbrev))
|
||||
|
||||
# Examples
|
||||
validate_abbreviation("DRIMSM") # True - valid
|
||||
validate_abbreviation("DR&IMSM") # False - contains &
|
||||
validate_abbreviation("A+CC") # False - contains +
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- `AGENTS.md` - Section "INSTITUTION ABBREVIATION: EMIC NAME FIRST-LETTER PROTOCOL"
|
||||
- `schemas/20251121/linkml/modules/classes/CustodianName.yaml` - Schema description
|
||||
- `rules/LEGAL_FORM_FILTER.md` - Related filtering rule for legal forms
|
||||
- `docs/PERSISTENT_IDENTIFIERS.md` - GHCID specification
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Date | Change |
|
||||
|------|--------|
|
||||
| 2025-12-07 | Initial rule created after discovery of `&` in GHCID |
|
||||
| 2025-12-08 | Added diacritics normalization rule |
|
||||
237
schemas/20251121/linkml/rules/ENUM_TO_CLASS.md
Normal file
237
schemas/20251121/linkml/rules/ENUM_TO_CLASS.md
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
# Enum-to-Class Principle: Single Source of Truth
|
||||
|
||||
**Rule ID**: ENUM-TO-CLASS
|
||||
**Status**: ACTIVE
|
||||
**Applies To**: Schema evolution decisions
|
||||
**Version**: 1.0
|
||||
**Last Updated**: 2025-12-06
|
||||
|
||||
---
|
||||
|
||||
## Core Principle
|
||||
|
||||
**Enums are TEMPORARY scaffolding. Once an enum is promoted to a class hierarchy, the enum MUST be deleted to maintain a Single Source of Truth.**
|
||||
|
||||
---
|
||||
|
||||
## Rationale
|
||||
|
||||
### The Problem: Dual Representation
|
||||
|
||||
When both an enum AND a class hierarchy exist for the same concept:
|
||||
- **Data sync issues**: Enum values and class names can drift apart
|
||||
- **Maintenance burden**: Changes must be made in two places
|
||||
- **Developer confusion**: Which one should I use?
|
||||
- **Validation conflicts**: Enum constraints vs class ranges may diverge
|
||||
|
||||
### The Solution: Single Source of Truth
|
||||
|
||||
- **Enums**: Use for simple, fixed value constraints (e.g., `DataTierEnum: TIER_1, TIER_2, TIER_3, TIER_4`)
|
||||
- **Classes**: Use when the concept needs properties, relationships, or rich documentation
|
||||
- **NEVER BOTH**: Once promoted to classes, DELETE the enum
|
||||
|
||||
---
|
||||
|
||||
## When to Promote Enum to Classes
|
||||
|
||||
**Promote when the concept needs**:
|
||||
|
||||
| Need | Enum Can Do? | Class Required? |
|
||||
|------|-------------|-----------------|
|
||||
| Fixed value constraint | Yes | Yes |
|
||||
| Properties (e.g., `role_category`, `typical_domains`) | No | Yes |
|
||||
| Rich description per value | Limited | Yes |
|
||||
| Relationships to other entities | No | Yes |
|
||||
| Inheritance hierarchy | No | Yes |
|
||||
| Independent identity (URI) | Limited | Yes |
|
||||
| Ontology class mapping (`class_uri`) | Via `meaning` | Native |
|
||||
|
||||
**Rule of thumb**: If you're adding detailed documentation to each enum value, or want to attach properties, it's time to promote to classes.
|
||||
|
||||
---
|
||||
|
||||
## Promotion Workflow
|
||||
|
||||
### Step 1: Create Class Hierarchy
|
||||
|
||||
```yaml
|
||||
# modules/classes/StaffRole.yaml (base class)
|
||||
StaffRole:
|
||||
abstract: true
|
||||
description: Base class for staff role categories
|
||||
slots:
|
||||
- role_id
|
||||
- role_name
|
||||
- role_category
|
||||
- typical_domains
|
||||
|
||||
# modules/classes/StaffRoles.yaml (subclasses)
|
||||
Curator:
|
||||
is_a: StaffRole
|
||||
description: Museum curator specializing in collection research...
|
||||
|
||||
Conservator:
|
||||
is_a: StaffRole
|
||||
description: Conservator specializing in preservation...
|
||||
```
|
||||
|
||||
### Step 2: Update Slot Ranges
|
||||
|
||||
```yaml
|
||||
# BEFORE (enum)
|
||||
staff_role:
|
||||
range: StaffRoleTypeEnum
|
||||
|
||||
# AFTER (class)
|
||||
staff_role:
|
||||
range: StaffRole
|
||||
```
|
||||
|
||||
### Step 3: Update Modular Schema Imports
|
||||
|
||||
```yaml
|
||||
# REMOVE enum import
|
||||
# - modules/enums/StaffRoleTypeEnum # DELETED
|
||||
|
||||
# ADD class imports
|
||||
- modules/classes/StaffRole
|
||||
- modules/classes/StaffRoles
|
||||
```
|
||||
|
||||
### Step 4: Archive the Enum
|
||||
|
||||
```bash
|
||||
mkdir -p schemas/.../archive/enums
|
||||
mv modules/enums/OldEnum.yaml archive/enums/OldEnum.yaml.archived_$(date +%Y%m%d)
|
||||
```
|
||||
|
||||
### Step 5: Document the Change
|
||||
|
||||
- Update `archive/enums/README.md` with migration entry
|
||||
- Add comment in modular schema explaining removal
|
||||
- Update any documentation referencing the old enum
|
||||
|
||||
---
|
||||
|
||||
## Example: StaffRoleTypeEnum to StaffRole
|
||||
|
||||
**Before** (2025-12-05):
|
||||
```yaml
|
||||
# StaffRoleTypeEnum.yaml
|
||||
StaffRoleTypeEnum:
|
||||
permissible_values:
|
||||
CURATOR:
|
||||
description: Museum curator
|
||||
CONSERVATOR:
|
||||
description: Conservator
|
||||
# ... 51 values with limited documentation
|
||||
```
|
||||
|
||||
**After** (2025-12-06):
|
||||
```yaml
|
||||
# StaffRole.yaml (abstract base)
|
||||
StaffRole:
|
||||
abstract: true
|
||||
slots:
|
||||
- role_id
|
||||
- role_name
|
||||
- role_category
|
||||
- typical_domains
|
||||
- typical_responsibilities
|
||||
- requires_qualification
|
||||
|
||||
# StaffRoles.yaml (51 subclasses)
|
||||
Curator:
|
||||
is_a: StaffRole
|
||||
class_uri: schema:curator
|
||||
description: |
|
||||
Museum curator specializing in collection research...
|
||||
|
||||
**IMPORTANT - FORMAL TITLE vs DE FACTO WORK**:
|
||||
This is the OFFICIAL job appellation/title. Actual work may differ.
|
||||
slot_usage:
|
||||
role_category:
|
||||
equals_string: CURATORIAL
|
||||
typical_domains:
|
||||
equals_expression: "[Museums, Galleries]"
|
||||
```
|
||||
|
||||
**Why the promotion?**
|
||||
1. Need to distinguish FORMAL TITLE from DE FACTO WORK
|
||||
2. Each role has `role_category`, `common_variants`, `typical_domains`, `typical_responsibilities`
|
||||
3. Roles benefit from inheritance (`Curator is_a StaffRole`)
|
||||
4. Richer documentation per role
|
||||
|
||||
---
|
||||
|
||||
## Enums That Should REMAIN Enums
|
||||
|
||||
Some enums are appropriate as permanent fixtures:
|
||||
|
||||
| Enum | Why Keep as Enum |
|
||||
|------|------------------|
|
||||
| `DataTierEnum` | Simple 4-value tier (TIER_1 through TIER_4), no properties needed |
|
||||
| `DataSourceEnum` | Fixed source types, simple strings |
|
||||
| `CountryCodeEnum` | ISO 3166-1 standard, no custom properties |
|
||||
| `LanguageCodeEnum` | ISO 639 standard, no custom properties |
|
||||
|
||||
**Characteristics of "permanent" enums**:
|
||||
- Based on external standards (ISO, etc.)
|
||||
- Simple values with no need for properties
|
||||
- Unlikely to require rich per-value documentation
|
||||
- Used purely for validation/constraint
|
||||
|
||||
---
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
### WRONG: Keep Both Enum and Classes
|
||||
|
||||
```yaml
|
||||
# modules/enums/StaffRoleTypeEnum.yaml # Still exists!
|
||||
# modules/classes/StaffRole.yaml # Also exists!
|
||||
# Which one is authoritative? CONFUSION!
|
||||
```
|
||||
|
||||
### WRONG: Create Classes but Keep Enum "for backwards compatibility"
|
||||
|
||||
```yaml
|
||||
# "Let's keep the enum for old code"
|
||||
# Result: Two sources of truth, guaranteed drift
|
||||
```
|
||||
|
||||
### CORRECT: Delete Enum After Creating Classes
|
||||
|
||||
```yaml
|
||||
# modules/enums/StaffRoleTypeEnum.yaml # ARCHIVED
|
||||
# modules/classes/StaffRole.yaml # Single source of truth
|
||||
# modules/classes/StaffRoles.yaml # All 51 role subclasses
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
After promoting an enum to classes:
|
||||
|
||||
- [ ] Old enum file moved to `archive/enums/`
|
||||
- [ ] Modular schema import removed for enum
|
||||
- [ ] Modular schema import added for new class(es)
|
||||
- [ ] All slot ranges updated from enum to class
|
||||
- [ ] No grep results for old enum name in active schema files
|
||||
- [ ] `archive/enums/README.md` updated with migration entry
|
||||
- [ ] Comment added in modular schema explaining removal
|
||||
|
||||
```bash
|
||||
# Verify enum is fully removed (should return only archive hits)
|
||||
grep -r "StaffRoleTypeEnum" schemas/20251121/linkml/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- `docs/ENUM_CLASS_SINGLE_SOURCE.md` - Extended documentation
|
||||
- `schemas/20251121/linkml/archive/enums/README.md` - Archive directory
|
||||
- LinkML documentation on enums: https://linkml.io/linkml/schemas/enums.html
|
||||
- LinkML documentation on classes: https://linkml.io/linkml/schemas/models.html
|
||||
436
schemas/20251121/linkml/rules/GEONAMES_SETTLEMENT.md
Normal file
436
schemas/20251121/linkml/rules/GEONAMES_SETTLEMENT.md
Normal file
|
|
@ -0,0 +1,436 @@
|
|||
# GeoNames Settlement Standardization Rules
|
||||
|
||||
**Rule ID**: GEONAMES-SETTLEMENT
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: GHCID settlement component generation
|
||||
**Version**: 1.1.0
|
||||
**Effective Date**: 2025-12-01
|
||||
**Last Updated**: 2025-12-01
|
||||
|
||||
---
|
||||
|
||||
## Purpose
|
||||
|
||||
This document defines the rules for standardizing settlement names in GHCID (Global Heritage Custodian Identifier) generation using the GeoNames geographical database.
|
||||
|
||||
## Core Principle
|
||||
|
||||
**ALL settlement names in GHCID must be derived from GeoNames standardized names, not from source data.**
|
||||
|
||||
The GeoNames database serves as the **single source of truth** for:
|
||||
- Settlement names (cities, towns, villages)
|
||||
- Settlement abbreviations/codes
|
||||
- Administrative region codes (admin1)
|
||||
- Geographic coordinates validation
|
||||
|
||||
## Why GeoNames Standardization?
|
||||
|
||||
1. **Consistency**: Same settlement = same GHCID component, regardless of source data variations
|
||||
2. **Disambiguation**: Handles duplicate city names across regions
|
||||
3. **Internationalization**: Provides ASCII-safe names for identifiers
|
||||
4. **Authority**: GeoNames is a well-maintained, CC-licensed geographic database
|
||||
5. **Persistence**: Settlement names don't change frequently, ensuring GHCID stability
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL: Feature Code Filtering
|
||||
|
||||
**NEVER use neighborhoods or districts (PPLX) for GHCID generation. ONLY use proper settlements (cities, towns, villages).**
|
||||
|
||||
GeoNames classifies populated places with feature codes. When reverse geocoding coordinates to find a settlement, you MUST filter by feature code.
|
||||
|
||||
### ALLOWED Feature Codes
|
||||
|
||||
| Code | Description | Example |
|
||||
|------|-------------|---------|
|
||||
| **PPL** | Populated place (city/town/village) | Apeldoorn, Hamont, Lelystad |
|
||||
| **PPLA** | Seat of first-order admin division | Provincial capitals |
|
||||
| **PPLA2** | Seat of second-order admin division | Municipal seats |
|
||||
| **PPLA3** | Seat of third-order admin division | District seats |
|
||||
| **PPLA4** | Seat of fourth-order admin division | Sub-district seats |
|
||||
| **PPLC** | Capital of a political entity | Amsterdam, Brussels |
|
||||
| **PPLS** | Populated places (multiple) | Settlement clusters |
|
||||
| **PPLG** | Seat of government | The Hague |
|
||||
|
||||
### EXCLUDED Feature Codes
|
||||
|
||||
| Code | Description | Why Excluded |
|
||||
|------|-------------|--------------|
|
||||
| **PPLX** | Section of populated place | Neighborhoods, districts, quarters (e.g., "Binnenstad", "Amsterdam Binnenstad") |
|
||||
|
||||
### Implementation
|
||||
|
||||
```python
|
||||
VALID_FEATURE_CODES = ('PPL', 'PPLA', 'PPLA2', 'PPLA3', 'PPLA4', 'PPLC', 'PPLS', 'PPLG')
|
||||
|
||||
query = """
|
||||
SELECT name, feature_code, geonames_id, ...
|
||||
FROM cities
|
||||
WHERE country_code = ?
|
||||
AND feature_code IN (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ORDER BY distance_sq
|
||||
LIMIT 1
|
||||
"""
|
||||
cursor.execute(query, (country_code, *VALID_FEATURE_CODES))
|
||||
```
|
||||
|
||||
### Verification
|
||||
|
||||
Always check `feature_code` in location_resolution metadata:
|
||||
|
||||
```yaml
|
||||
location_resolution:
|
||||
geonames_name: Apeldoorn
|
||||
feature_code: PPL # ← MUST be PPL, PPLA*, PPLC, PPLS, or PPLG
|
||||
```
|
||||
|
||||
**If you see `feature_code: PPLX`**, the GHCID is WRONG and must be regenerated.
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL: Country Code Detection
|
||||
|
||||
**Determine country code from entry data BEFORE calling GeoNames reverse geocoding.**
|
||||
|
||||
GeoNames queries are country-specific. Using the wrong country code will return incorrect results.
|
||||
|
||||
### Country Code Resolution Priority
|
||||
|
||||
1. `zcbs_enrichment.country` - Most explicit source
|
||||
2. `location.country` - Direct location field
|
||||
3. `locations[].country` - Array location field
|
||||
4. `original_entry.country` - CSV source field
|
||||
5. `google_maps_enrichment.address` - Parse from address string
|
||||
6. `wikidata_enrichment.located_in.label` - Infer from Wikidata
|
||||
7. Default: `"NL"` (Netherlands) - Only if no other source
|
||||
|
||||
### Example
|
||||
|
||||
```python
|
||||
# Determine country code FIRST
|
||||
country_code = "NL" # Default
|
||||
|
||||
if entry.get('zcbs_enrichment', {}).get('country'):
|
||||
country_code = entry['zcbs_enrichment']['country']
|
||||
elif entry.get('google_maps_enrichment', {}).get('address', ''):
|
||||
address = entry['google_maps_enrichment']['address']
|
||||
if ', Belgium' in address:
|
||||
country_code = "BE"
|
||||
elif ', Germany' in address:
|
||||
country_code = "DE"
|
||||
|
||||
# THEN call reverse geocoding
|
||||
result = reverse_geocode_to_city(latitude, longitude, country_code)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Settlement Resolution Process
|
||||
|
||||
### Step 1: Coordinate-Based Resolution (Preferred)
|
||||
|
||||
When coordinates are available, use reverse geocoding to find the nearest GeoNames settlement:
|
||||
|
||||
```python
|
||||
def resolve_settlement_from_coordinates(latitude: float, longitude: float, country_code: str = "NL") -> dict:
|
||||
"""
|
||||
Find the GeoNames settlement nearest to given coordinates.
|
||||
|
||||
Returns:
|
||||
{
|
||||
'settlement_name': 'Lelystad', # GeoNames standardized name
|
||||
'settlement_code': 'LEL', # 3-letter abbreviation
|
||||
'admin1_code': '16', # GeoNames admin1 code
|
||||
'region_code': 'FL', # ISO 3166-2 region code
|
||||
'geonames_id': 2751792, # GeoNames ID for provenance
|
||||
'distance_km': 0.5 # Distance from coords to settlement center
|
||||
}
|
||||
"""
|
||||
```
|
||||
|
||||
### Step 2: Name-Based Resolution (Fallback)
|
||||
|
||||
When only a settlement name is available (no coordinates), look up in GeoNames:
|
||||
|
||||
```python
|
||||
def resolve_settlement_from_name(name: str, country_code: str = "NL") -> dict:
|
||||
"""
|
||||
Find the GeoNames settlement matching the given name.
|
||||
|
||||
Uses fuzzy matching and disambiguation when multiple matches exist.
|
||||
"""
|
||||
```
|
||||
|
||||
### Step 3: Manual Resolution (Last Resort)
|
||||
|
||||
If GeoNames lookup fails, flag the entry for manual review with:
|
||||
- `settlement_source: MANUAL`
|
||||
- `settlement_needs_review: true`
|
||||
|
||||
---
|
||||
|
||||
## GHCID Settlement Component Rules
|
||||
|
||||
### Format
|
||||
|
||||
The settlement component in GHCID uses a **3-letter uppercase code**:
|
||||
|
||||
```
|
||||
NL-{REGION}-{SETTLEMENT}-{TYPE}-{ABBREV}
|
||||
^^^^^^^^^^^
|
||||
3-letter code from GeoNames
|
||||
```
|
||||
|
||||
### Code Generation Rules
|
||||
|
||||
1. **Single-word settlements**: First 3 letters uppercase
|
||||
- `Amsterdam` → `AMS`
|
||||
- `Rotterdam` → `ROT`
|
||||
- `Lelystad` → `LEL`
|
||||
|
||||
2. **Settlements with Dutch articles** (`de`, `het`, `den`, `'s`):
|
||||
- First letter of article + first 2 letters of main word
|
||||
- `Den Haag` → `DHA`
|
||||
- `'s-Hertogenbosch` → `SHE`
|
||||
- `De Bilt` → `DBI`
|
||||
|
||||
3. **Multi-word settlements** (no article):
|
||||
- First letter of each word (up to 3)
|
||||
- `Nieuw Amsterdam` → `NAM`
|
||||
- `Oud Beijerland` → `OBE`
|
||||
|
||||
4. **GeoNames Disambiguation Database**:
|
||||
- For known problematic settlements, use pre-defined codes from disambiguation table
|
||||
- Example: Both `Zwolle` (OV) and `Zwolle` (LI) exist - use `ZWO` with region for uniqueness
|
||||
|
||||
### Measurement Point for Historical Custodians
|
||||
|
||||
**Rule**: For heritage custodians that no longer exist or have historical coordinates, the **modern-day settlement** (as of 2025-12-01) is used.
|
||||
|
||||
Rationale:
|
||||
- GHCIDs should be stable over time
|
||||
- Historical place names may have changed
|
||||
- Modern settlements are easier to verify and look up
|
||||
- GeoNames reflects current geographic reality
|
||||
|
||||
Example:
|
||||
- A museum that operated 1900-1950 in what was then "Nieuw Land" (before Flevoland province existed)
|
||||
- Modern coordinates fall within Lelystad municipality
|
||||
- GHCID uses `LEL` (Lelystad) as settlement code, not historical name
|
||||
|
||||
---
|
||||
|
||||
## GeoNames Database Integration
|
||||
|
||||
### Database Location
|
||||
|
||||
```
|
||||
/data/reference/geonames.db
|
||||
```
|
||||
|
||||
### Required Tables
|
||||
|
||||
```sql
|
||||
-- Cities/settlements table
|
||||
CREATE TABLE cities (
|
||||
geonames_id INTEGER PRIMARY KEY,
|
||||
name TEXT, -- Local name (may have diacritics)
|
||||
ascii_name TEXT, -- ASCII-safe name for identifiers
|
||||
country_code TEXT, -- ISO 3166-1 alpha-2
|
||||
admin1_code TEXT, -- First-level administrative division
|
||||
admin1_name TEXT, -- Region/province name
|
||||
latitude REAL,
|
||||
longitude REAL,
|
||||
population INTEGER,
|
||||
feature_code TEXT -- PPL, PPLA, PPLC, etc.
|
||||
);
|
||||
|
||||
-- Disambiguation table for problematic settlements
|
||||
CREATE TABLE settlement_codes (
|
||||
geonames_id INTEGER PRIMARY KEY,
|
||||
country_code TEXT,
|
||||
settlement_code TEXT, -- 3-letter code
|
||||
is_primary BOOLEAN, -- Primary code for this settlement
|
||||
notes TEXT
|
||||
);
|
||||
```
|
||||
|
||||
### Admin1 Code Mapping (Netherlands)
|
||||
|
||||
**IMPORTANT**: GeoNames admin1 codes differ from historical numbering. Use this mapping:
|
||||
|
||||
| GeoNames admin1 | Province | ISO 3166-2 |
|
||||
|-----------------|----------|------------|
|
||||
| 01 | Drenthe | NL-DR |
|
||||
| 02 | Friesland | NL-FR |
|
||||
| 03 | Gelderland | NL-GE |
|
||||
| 04 | Groningen | NL-GR |
|
||||
| 05 | Limburg | NL-LI |
|
||||
| 06 | Noord-Brabant | NL-NB |
|
||||
| 07 | Noord-Holland | NL-NH |
|
||||
| 09 | Utrecht | NL-UT |
|
||||
| 10 | Zeeland | NL-ZE |
|
||||
| 11 | Zuid-Holland | NL-ZH |
|
||||
| 15 | Overijssel | NL-OV |
|
||||
| 16 | Flevoland | NL-FL |
|
||||
|
||||
**Note**: Code 08 is not used in Netherlands (was assigned to former region).
|
||||
|
||||
---
|
||||
|
||||
## Validation Requirements
|
||||
|
||||
### Before GHCID Generation
|
||||
|
||||
Every entry MUST have:
|
||||
- [ ] Settlement name resolved via GeoNames
|
||||
- [ ] `geonames_id` recorded in entry metadata
|
||||
- [ ] Settlement code (3-letter) generated consistently
|
||||
- [ ] Admin1/region code mapped correctly
|
||||
|
||||
### Provenance Tracking
|
||||
|
||||
Record GeoNames resolution in entry metadata:
|
||||
|
||||
```yaml
|
||||
location_resolution:
|
||||
method: REVERSE_GEOCODE # or NAME_LOOKUP or MANUAL
|
||||
geonames_id: 2751792
|
||||
geonames_name: Lelystad
|
||||
settlement_code: LEL
|
||||
admin1_code: "16"
|
||||
region_code: FL
|
||||
resolution_date: "2025-12-01T00:00:00Z"
|
||||
source_coordinates:
|
||||
latitude: 52.52111
|
||||
longitude: 5.43722
|
||||
distance_to_settlement_km: 0.5
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL: XXX Placeholders Are TEMPORARY - Research Required
|
||||
|
||||
**XXX placeholders for region/settlement codes are NEVER acceptable as a final state.**
|
||||
|
||||
When an entry has `XX` (unknown region) or `XXX` (unknown settlement), the agent MUST conduct research to resolve the location.
|
||||
|
||||
### Resolution Strategy by Institution Type
|
||||
|
||||
| Institution Type | Location Resolution Method |
|
||||
|------------------|---------------------------|
|
||||
| **Destroyed institution** | Use last known physical location before destruction |
|
||||
| **Historical (closed)** | Use last operating location |
|
||||
| **Refugee/diaspora org** | Use current headquarters OR original founding location |
|
||||
| **Digital-only platform** | Use parent/founding organization's headquarters |
|
||||
| **Decentralized initiative** | Use founding location or primary organizer location |
|
||||
| **Unknown city, known country** | Research via Wikidata, Google Maps, official website |
|
||||
|
||||
### Research Sources (Priority Order)
|
||||
|
||||
1. **Wikidata** - P131 (located in), P159 (headquarters location), P625 (coordinates)
|
||||
2. **Google Maps** - Search institution name
|
||||
3. **Official Website** - Contact page, about page
|
||||
4. **Web Archive** - archive.org for destroyed/closed institutions
|
||||
5. **Academic Sources** - Papers, reports
|
||||
6. **News Articles** - Particularly for destroyed heritage sites
|
||||
|
||||
### Location Resolution Metadata
|
||||
|
||||
When resolving XXX placeholders, update `location_resolution`:
|
||||
|
||||
```yaml
|
||||
location_resolution:
|
||||
method: MANUAL_RESEARCH # Previously was NAME_LOOKUP with XXX
|
||||
country_code: PS
|
||||
region_code: GZ
|
||||
region_name: Gaza Strip
|
||||
city_code: GAZ
|
||||
city_name: Gaza City
|
||||
geonames_id: 281133
|
||||
research_date: "2025-12-06T00:00:00Z"
|
||||
research_sources:
|
||||
- type: wikidata
|
||||
id: Q123456
|
||||
claim: P131
|
||||
- type: web_archive
|
||||
url: https://web.archive.org/web/20231001/https://institution-website.org/contact
|
||||
notes: "Located in Gaza City prior to destruction in 2024"
|
||||
```
|
||||
|
||||
### File Renaming After Resolution
|
||||
|
||||
When GHCID changes due to XXX resolution, the file MUST be renamed:
|
||||
|
||||
```bash
|
||||
# Before
|
||||
data/custodian/PS-XX-XXX-A-NAPR.yaml
|
||||
|
||||
# After
|
||||
data/custodian/PS-GZ-GAZ-A-NAPR.yaml
|
||||
```
|
||||
|
||||
### Prohibited Practices
|
||||
|
||||
- ❌ Leaving XXX placeholders in production data
|
||||
- ❌ Using "Online" or country name as location
|
||||
- ❌ Skipping research because it's difficult
|
||||
- ❌ Using XX/XXX for diaspora organizations
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### No GeoNames Match
|
||||
|
||||
If a settlement cannot be resolved via automated lookup:
|
||||
1. Log warning with entry details
|
||||
2. Set `settlement_code: XXX` (temporary placeholder)
|
||||
3. Set `settlement_needs_review: true`
|
||||
4. Do NOT skip the entry - generate GHCID with XXX placeholder
|
||||
5. **IMMEDIATELY** begin manual research to resolve
|
||||
|
||||
### Multiple GeoNames Matches
|
||||
|
||||
When multiple settlements match a name:
|
||||
1. Use coordinates to disambiguate (if available)
|
||||
2. Use admin1/region context (if available)
|
||||
3. Use population as tiebreaker (prefer larger settlement)
|
||||
4. Flag for manual review if still ambiguous
|
||||
|
||||
### Coordinates Outside Country
|
||||
|
||||
If coordinates fall outside the expected country:
|
||||
1. Log warning
|
||||
2. Use nearest settlement within country
|
||||
3. Flag for manual review
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- `AGENTS.md` - Section on GHCID generation
|
||||
- `docs/PERSISTENT_IDENTIFIERS.md` - Complete GHCID specification
|
||||
- `docs/GHCID_PID_SCHEME.md` - PID scheme details
|
||||
- `scripts/enrich_nde_entries_ghcid.py` - Implementation
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.1.0 (2025-12-01)
|
||||
- **CRITICAL**: Added feature code filtering rules
|
||||
- MUST filter for PPL, PPLA, PPLA2, PPLA3, PPLA4, PPLC, PPLS, PPLG
|
||||
- MUST exclude PPLX (neighborhoods/districts)
|
||||
- Example: Apeldoorn (PPL) not "Binnenstad" (PPLX)
|
||||
- **CRITICAL**: Added country code detection rules
|
||||
- Must determine country from entry data BEFORE reverse geocoding
|
||||
- Priority: zcbs_enrichment.country > location.country > address parsing
|
||||
- Example: Belgian institutions use BE, not NL
|
||||
- Added Belgium admin1 code mapping (BRU, VLG, WAL)
|
||||
|
||||
### v1.0.0 (2025-12-01)
|
||||
- Initial version
|
||||
- Established GeoNames as authoritative source for settlement standardization
|
||||
- Defined measurement point rule for historical custodians
|
||||
- Documented admin1 code mapping for Netherlands
|
||||
346
schemas/20251121/linkml/rules/LEGAL_FORM_FILTER.md
Normal file
346
schemas/20251121/linkml/rules/LEGAL_FORM_FILTER.md
Normal file
|
|
@ -0,0 +1,346 @@
|
|||
# Legal Form Filtering Rule for CustodianName
|
||||
|
||||
**Rule ID**: LEGAL-FORM-FILTER
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: CustodianName standardization
|
||||
**Created**: 2025-12-02
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
**CRITICAL RULE**: Legal form designations MUST ALWAYS be filtered from `CustodianName`, even when the custodian self-identifies with them.
|
||||
|
||||
This is the **ONE EXCEPTION** to the emic (insider name) principle in the Heritage Custodian Ontology.
|
||||
|
||||
## Rationale
|
||||
|
||||
### Why Legal Forms Are NOT Part of Identity
|
||||
|
||||
1. **Legal Form ≠ Identity**: The legal structure is administrative metadata, not the custodian's core identity
|
||||
- "Stichting Rijksmuseum" → Identity is "Rijksmuseum", legal form is "Stichting"
|
||||
|
||||
2. **Legal Forms Change Over Time**: Organizations transform while identity persists
|
||||
- Association → Foundation → Corporation (same museum, different legal structures)
|
||||
|
||||
3. **Cross-Jurisdictional Consistency**: Same organization may have different legal forms in different countries
|
||||
- "Getty Foundation" (US) = "Stichting Getty" (NL) = same identity
|
||||
|
||||
4. **Deduplication**: Prevents false duplicates
|
||||
- "Museum X" and "Stichting Museum X" should NOT be separate entities
|
||||
|
||||
5. **ISO 20275 Alignment**: The Legal Entity Identifier (LEI) standard explicitly separates legal form from entity name
|
||||
|
||||
### Where Legal Form IS Stored
|
||||
|
||||
Legal form information is NOT discarded - it is stored in appropriate metadata fields:
|
||||
|
||||
| Field | Location | Purpose |
|
||||
|-------|----------|---------|
|
||||
| `legal_form` | `CustodianLegalStatus` | ISO 20275 legal form code |
|
||||
| `legal_name` | `CustodianLegalStatus` | Full registered name including legal form |
|
||||
| `observed_name` | `CustodianObservation` | Original name as observed in source (may include legal form) |
|
||||
|
||||
## Examples
|
||||
|
||||
### Dutch Examples
|
||||
|
||||
| Source Name | CustodianName | Legal Form | Notes |
|
||||
|-------------|---------------|------------|-------|
|
||||
| Stichting Rijksmuseum | Rijksmuseum | Stichting | Prefix removal |
|
||||
| Hidde Nijland Stichting | Hidde Nijland | Stichting | Suffix removal |
|
||||
| Stichting Het Loo | Het Loo | Stichting | Preserve article "Het" |
|
||||
| Coöperatie Erfgoed | Erfgoed | Coöperatie | |
|
||||
| Vereniging Ons Huis | Ons Huis | Vereniging | |
|
||||
| Museum B.V. | Museum | B.V. | |
|
||||
|
||||
### International Examples
|
||||
|
||||
| Source Name | CustodianName | Legal Form | Language |
|
||||
|-------------|---------------|------------|----------|
|
||||
| The Getty Foundation | The Getty | Foundation | English |
|
||||
| British Museum Trust Ltd | British Museum | Trust Ltd | English |
|
||||
| Smithsonian Institution Inc. | Smithsonian Institution | Inc. | English |
|
||||
| Fundação Biblioteca Nacional | Biblioteca Nacional | Fundação | Portuguese |
|
||||
| Verein Deutsches Museum | Deutsches Museum | Verein | German |
|
||||
| Association des Amis du Louvre | Amis du Louvre | Association | French |
|
||||
| Fondazione Musei Civici | Musei Civici | Fondazione | Italian |
|
||||
| Fundación Museo del Prado | Museo del Prado | Fundación | Spanish |
|
||||
|
||||
---
|
||||
|
||||
## Global Legal Form Terms Reference
|
||||
|
||||
### Dutch (Netherlands, Belgium-Flanders)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Stichting (foundation)
|
||||
- Vereniging (association)
|
||||
- Coöperatie, Coöperatieve (cooperative)
|
||||
|
||||
**Business Entities:**
|
||||
- B.V., BV (besloten vennootschap - private limited company)
|
||||
- N.V., NV (naamloze vennootschap - public limited company)
|
||||
- V.O.F., VOF (vennootschap onder firma - general partnership)
|
||||
- C.V., CV (commanditaire vennootschap - limited partnership)
|
||||
- Maatschap (partnership)
|
||||
- Eenmanszaak (sole proprietorship)
|
||||
|
||||
### English (UK, US, Ireland, Australia, etc.)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Foundation
|
||||
- Trust
|
||||
- Association
|
||||
- Society
|
||||
- Institute
|
||||
- Institution (when followed by Inc./Ltd.)
|
||||
- Charity
|
||||
- Fund
|
||||
|
||||
**Business Entities:**
|
||||
- Inc., Incorporated
|
||||
- Ltd., Limited
|
||||
- LLC, L.L.C. (limited liability company)
|
||||
- LLP, L.L.P. (limited liability partnership)
|
||||
- Corp., Corporation
|
||||
- Co., Company
|
||||
- PLC, plc (public limited company - UK)
|
||||
- Pty Ltd (proprietary limited - Australia)
|
||||
|
||||
### German (Germany, Austria, Switzerland)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Stiftung (foundation)
|
||||
- Verein (association)
|
||||
- e.V., eingetragener Verein (registered association)
|
||||
- gGmbH (gemeinnützige GmbH - charitable limited company)
|
||||
|
||||
**Business Entities:**
|
||||
- GmbH (Gesellschaft mit beschränkter Haftung - limited liability company)
|
||||
- AG (Aktiengesellschaft - stock corporation)
|
||||
- KG (Kommanditgesellschaft - limited partnership)
|
||||
- OHG (offene Handelsgesellschaft - general partnership)
|
||||
- GmbH & Co. KG
|
||||
- UG (Unternehmergesellschaft - mini-GmbH)
|
||||
|
||||
### French (France, Belgium-Wallonia, Switzerland, Canada-Quebec)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fondation (foundation)
|
||||
- Association (association)
|
||||
- Fonds (fund)
|
||||
|
||||
**Business Entities:**
|
||||
- S.A., SA (société anonyme - public limited company)
|
||||
- S.A.R.L., SARL (société à responsabilité limitée - private limited company)
|
||||
- S.A.S., SAS (société par actions simplifiée)
|
||||
- S.C.I., SCI (société civile immobilière)
|
||||
- S.N.C., SNC (société en nom collectif - general partnership)
|
||||
- S.C.S., SCS (société en commandite simple)
|
||||
- EURL (entreprise unipersonnelle à responsabilité limitée)
|
||||
|
||||
### Spanish (Spain, Latin America)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fundación (foundation)
|
||||
- Asociación (association)
|
||||
- Sociedad (society) - when not followed by commercial designator
|
||||
|
||||
**Business Entities:**
|
||||
- S.A., SA (sociedad anónima - public limited company)
|
||||
- S.L., SL (sociedad limitada - private limited company)
|
||||
- S.L.L., SLL (sociedad limitada laboral)
|
||||
- S.Coop. (sociedad cooperativa)
|
||||
- S.C., SC (sociedad colectiva - general partnership)
|
||||
- S.Com., S. en C. (sociedad en comandita)
|
||||
|
||||
### Portuguese (Portugal, Brazil)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fundação (foundation)
|
||||
- Associação (association)
|
||||
- Instituto (institute)
|
||||
|
||||
**Business Entities:**
|
||||
- Ltda., Limitada (limited liability company)
|
||||
- S.A., SA (sociedade anônima - corporation)
|
||||
- S/A
|
||||
- Cia., Companhia (company)
|
||||
- ME (microempresa)
|
||||
- EPP (empresa de pequeno porte)
|
||||
|
||||
### Italian (Italy, Switzerland-Ticino)
|
||||
|
||||
**Foundations and Non-Profits:**
|
||||
- Fondazione (foundation)
|
||||
- Associazione (association)
|
||||
- Ente (entity/institution)
|
||||
- Onlus (non-profit organization)
|
||||
|
||||
**Business Entities:**
|
||||
- S.p.A., SpA (società per azioni - joint-stock company)
|
||||
- S.r.l., Srl (società a responsabilità limitata - limited liability company)
|
||||
- S.a.s., Sas (società in accomandita semplice)
|
||||
- S.n.c., Snc (società in nome collettivo)
|
||||
- S.c.a.r.l. (società cooperativa a responsabilità limitata)
|
||||
|
||||
### Scandinavian Languages
|
||||
|
||||
**Danish:**
|
||||
- Fond (foundation)
|
||||
- Forening (association)
|
||||
- A/S (aktieselskab - public limited company)
|
||||
- ApS (anpartsselskab - private limited company)
|
||||
|
||||
**Swedish:**
|
||||
- Stiftelse (foundation)
|
||||
- Förening (association)
|
||||
- AB (aktiebolag - limited company)
|
||||
|
||||
**Norwegian:**
|
||||
- Stiftelse (foundation)
|
||||
- Forening (association)
|
||||
- AS (aksjeselskap - limited company)
|
||||
- ASA (allmennaksjeselskap - public limited company)
|
||||
|
||||
### Other European Languages
|
||||
|
||||
**Polish:**
|
||||
- Fundacja (foundation)
|
||||
- Stowarzyszenie (association)
|
||||
- Sp. z o.o. (limited liability company)
|
||||
- S.A. (joint-stock company)
|
||||
|
||||
**Czech:**
|
||||
- Nadace (foundation)
|
||||
- Spolek (association)
|
||||
- s.r.o. (limited liability company)
|
||||
- a.s. (joint-stock company)
|
||||
|
||||
**Hungarian:**
|
||||
- Alapítvány (foundation)
|
||||
- Egyesület (association)
|
||||
- Kft. (limited liability company)
|
||||
- Zrt. (private limited company)
|
||||
- Nyrt. (public limited company)
|
||||
|
||||
**Greek:**
|
||||
- Ίδρυμα (Idryma - foundation)
|
||||
- Σύλλογος (Syllogos - association)
|
||||
- Α.Ε., ΑΕ (Ανώνυμη Εταιρεία - corporation)
|
||||
- Ε.Π.Ε., ΕΠΕ (limited liability company)
|
||||
|
||||
**Finnish:**
|
||||
- Säätiö (foundation)
|
||||
- Yhdistys (association)
|
||||
- Oy (osakeyhtiö - limited company)
|
||||
- Oyj (public limited company)
|
||||
|
||||
### Asian Languages
|
||||
|
||||
**Japanese:**
|
||||
- 財団法人 (zaidan hōjin - incorporated foundation)
|
||||
- 社団法人 (shadan hōjin - incorporated association)
|
||||
- 株式会社, K.K. (kabushiki kaisha - corporation)
|
||||
- 合同会社, G.K. (gōdō kaisha - LLC)
|
||||
- 有限会社, Y.K. (yūgen kaisha - limited company)
|
||||
|
||||
**Chinese:**
|
||||
- 基金会 (jījīn huì - foundation)
|
||||
- 协会 (xiéhuì - association)
|
||||
- 有限公司 (yǒuxiàn gōngsī - limited company)
|
||||
- 股份有限公司 (gǔfèn yǒuxiàn gōngsī - joint-stock company)
|
||||
|
||||
**Korean:**
|
||||
- 재단법인 (jaedan beobin - incorporated foundation)
|
||||
- 사단법인 (sadan beobin - incorporated association)
|
||||
- 주식회사 (jusik hoesa - corporation)
|
||||
- 유한회사 (yuhan hoesa - limited company)
|
||||
|
||||
### Middle Eastern Languages
|
||||
|
||||
**Arabic:**
|
||||
- مؤسسة (mu'assasa - foundation/institution)
|
||||
- جمعية (jam'iyya - association)
|
||||
- شركة (sharika - company)
|
||||
- ش.م.م (limited liability company)
|
||||
- ش.م.ع (public joint-stock company)
|
||||
|
||||
**Hebrew:**
|
||||
- עמותה (amuta - non-profit association)
|
||||
- חל"צ (company for public benefit)
|
||||
- בע"מ (limited company)
|
||||
|
||||
**Turkish:**
|
||||
- Vakıf (foundation)
|
||||
- Dernek (association)
|
||||
- A.Ş. (anonim şirket - joint-stock company)
|
||||
- Ltd. Şti. (limited şirket - limited company)
|
||||
|
||||
### Latin American Specific
|
||||
|
||||
**Brazilian Portuguese:**
|
||||
- OSCIP (organização da sociedade civil de interesse público)
|
||||
- ONG (organização não governamental)
|
||||
- EIRELI (empresa individual de responsabilidade limitada)
|
||||
|
||||
**Mexican Spanish:**
|
||||
- A.C. (asociación civil - civil association)
|
||||
- S.C. (sociedad civil)
|
||||
- S. de R.L. (sociedad de responsabilidad limitada)
|
||||
|
||||
---
|
||||
|
||||
## Implementation Guidelines
|
||||
|
||||
### Filtering Algorithm
|
||||
|
||||
```python
|
||||
def filter_legal_form(name: str, language: str = None) -> tuple[str, str | None]:
|
||||
"""
|
||||
Remove legal form terms from custodian name.
|
||||
|
||||
Returns:
|
||||
tuple: (filtered_name, legal_form_found)
|
||||
"""
|
||||
# Apply language-specific patterns first if language known
|
||||
# Then apply universal patterns
|
||||
# Handle both prefix and suffix positions
|
||||
# Preserve articles (the, het, de, la, le, etc.)
|
||||
pass
|
||||
```
|
||||
|
||||
### Position Handling
|
||||
|
||||
Legal forms can appear as:
|
||||
|
||||
1. **Prefix**: "Stichting Rijksmuseum" → Remove "Stichting "
|
||||
2. **Suffix**: "British Museum Trust Ltd" → Remove " Trust Ltd"
|
||||
3. **Infix** (rare): Handle case-by-case
|
||||
|
||||
### Edge Cases
|
||||
|
||||
1. **Multiple legal forms**: "Foundation Trust Ltd" → Remove all
|
||||
2. **Abbreviation variations**: "Inc." = "Inc" = "Incorporated"
|
||||
3. **Case insensitivity**: "STICHTING" = "Stichting" = "stichting"
|
||||
4. **With punctuation**: "B.V." = "BV" = "B.V"
|
||||
5. **Compound terms**: "GmbH & Co. KG" → Remove entire compound
|
||||
|
||||
### Validation Script
|
||||
|
||||
Use `scripts/validate_organization_names.py` to detect names that still contain legal form terms after filtering.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- ISO 20275:2017 - Financial services — Entity legal forms (ELF)
|
||||
- GLEIF Legal Entity Identifier documentation
|
||||
- LinkML Schema: `schemas/20251121/linkml/modules/classes/CustodianName.yaml`
|
||||
- AGENTS.md: Rule 8 (Legal Form Filtering)
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: 2025-12-02
|
||||
**Maintained By**: GLAM Heritage Custodian Ontology Project
|
||||
156
schemas/20251121/linkml/rules/README.md
Normal file
156
schemas/20251121/linkml/rules/README.md
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
# Value Standardization Rules
|
||||
|
||||
**Location**: `schemas/20251121/linkml/rules/`
|
||||
**Purpose**: Data transformation and processing rules for achieving standardized values required by Heritage Custodian (HC) classes.
|
||||
|
||||
---
|
||||
|
||||
## About These Rules
|
||||
|
||||
These rules are **formally outside the LinkML schema convention** but document HOW data values are:
|
||||
- Transformed
|
||||
- Converted
|
||||
- Processed
|
||||
- Normalized
|
||||
|
||||
to achieve the standardized values required by particular HC classes.
|
||||
|
||||
**IMPORTANT**: These are NOT LinkML validation rules. They are **processing instructions** for data pipelines and extraction agents.
|
||||
|
||||
---
|
||||
|
||||
## Rule Categories
|
||||
|
||||
### 1. Name Standardization Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **LEGAL-FORM-FILTER** | [`LEGAL_FORM_FILTER.md`](LEGAL_FORM_FILTER.md) | `CustodianName` | Remove legal form terms (Stichting, Foundation, Inc.) from emic names |
|
||||
| **ABBREV-CHAR-FILTER** | [`ABBREVIATION_RULES.md`](ABBREVIATION_RULES.md) | GHCID abbreviation | Remove special characters (&, /, +, @) and normalize diacritics to ASCII |
|
||||
| **TRANSLIT-ISO** | [`TRANSLITERATION.md`](TRANSLITERATION.md) | GHCID abbreviation | Transliterate non-Latin scripts (Cyrillic, CJK, Arabic) using ISO standards |
|
||||
|
||||
### 2. Geographic Standardization Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **GEONAMES-SETTLEMENT** | [`GEONAMES_SETTLEMENT.md`](GEONAMES_SETTLEMENT.md) | Settlement codes | Use GeoNames as single source for settlement names |
|
||||
| **FEATURE-CODE-FILTER** | [`GEONAMES_SETTLEMENT.md`](GEONAMES_SETTLEMENT.md) | Reverse geocoding | Only use PPL* feature codes, never PPLX (neighborhoods) |
|
||||
|
||||
### 3. Web Observation Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **XPATH-PROVENANCE** | [`XPATH_PROVENANCE.md`](XPATH_PROVENANCE.md) | `WebClaim` | Every web claim MUST have XPath pointer to archived HTML |
|
||||
|
||||
### 4. Schema Evolution Rules
|
||||
|
||||
| Rule ID | File | Applies To | Summary |
|
||||
|---------|------|------------|---------|
|
||||
| **ENUM-TO-CLASS** | [`ENUM_TO_CLASS.md`](ENUM_TO_CLASS.md) | Enums/Classes | When enum promoted to class hierarchy, delete original enum |
|
||||
|
||||
---
|
||||
|
||||
## GLAMORCUBESFIXPHDNT Taxonomy Applicability
|
||||
|
||||
Each rule primarily applies to certain custodian types:
|
||||
|
||||
| Rule | Primary Types | All Types |
|
||||
|------|--------------|-----------|
|
||||
| LEGAL-FORM-FILTER | All | ✅ |
|
||||
| ABBREV-SPECIAL-CHAR | All | ✅ |
|
||||
| ABBREV-DIACRITICS | All | ✅ |
|
||||
| TRANSLITERATION | International (non-Latin script countries) | Partial |
|
||||
| GEONAMES-SETTLEMENT | All | ✅ |
|
||||
| XPATH-PROVENANCE | D (Digital platforms) | Partial |
|
||||
|
||||
---
|
||||
|
||||
## Integration with bronhouder.nl
|
||||
|
||||
These rules are displayed under a separate "Regels" (Rules) category on the bronhouder.nl LinkML visualization page, distinct from:
|
||||
- Classes
|
||||
- Slots
|
||||
- Enums
|
||||
- Instances
|
||||
|
||||
Each rule includes:
|
||||
- Rule ID (short identifier)
|
||||
- Applicable class(es)
|
||||
- GLAMORCUBESFIXPHDNT type indicator
|
||||
- Transformation examples
|
||||
- Implementation code (Python)
|
||||
|
||||
---
|
||||
|
||||
## Rule Template
|
||||
|
||||
New rules should follow this template:
|
||||
|
||||
```markdown
|
||||
# Rule Title
|
||||
|
||||
**Rule ID**: SHORT-ID
|
||||
**Status**: MANDATORY | RECOMMENDED | OPTIONAL
|
||||
**Applies To**: Class or slot name
|
||||
**Created**: YYYY-MM-DD
|
||||
**Updated**: YYYY-MM-DD
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
One-paragraph summary of what this rule does.
|
||||
|
||||
---
|
||||
|
||||
## Rationale
|
||||
|
||||
Why this rule exists (numbered list of reasons).
|
||||
|
||||
---
|
||||
|
||||
## Specification
|
||||
|
||||
Detailed specification with examples.
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
Python code showing how to implement this rule.
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
| Input | Output | Explanation |
|
||||
|-------|--------|-------------|
|
||||
|
||||
---
|
||||
|
||||
## Related Rules
|
||||
|
||||
- Other related rules
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Date | Change |
|
||||
|------|--------|
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File List
|
||||
|
||||
```
|
||||
rules/
|
||||
├── README.md # This file (rule index)
|
||||
├── ABBREVIATION_RULES.md # ABBREV-CHAR-FILTER: Special char + diacritics normalization
|
||||
├── LEGAL_FORM_FILTER.md # LEGAL-FORM-FILTER: Legal form removal from emic names
|
||||
├── GEONAMES_SETTLEMENT.md # GEONAMES-SETTLEMENT: Geographic standardization via GeoNames
|
||||
├── XPATH_PROVENANCE.md # XPATH-PROVENANCE: WebClaim XPath requirements
|
||||
├── TRANSLITERATION.md # TRANSLIT-ISO: Non-Latin script transliteration
|
||||
└── ENUM_TO_CLASS.md # ENUM-TO-CLASS: Schema evolution pattern
|
||||
```
|
||||
337
schemas/20251121/linkml/rules/TRANSLITERATION.md
Normal file
337
schemas/20251121/linkml/rules/TRANSLITERATION.md
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
# Transliteration Standards for Non-Latin Scripts
|
||||
|
||||
**Rule ID**: TRANSLIT-ISO
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: GHCID abbreviation generation from emic names in non-Latin scripts
|
||||
**Created**: 2025-12-08
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
**When generating GHCID abbreviations from institution names written in non-Latin scripts, the emic name MUST first be transliterated to Latin characters using the designated ISO or recognized standard for that script.**
|
||||
|
||||
This rule affects **170 institutions** across **21 languages** with non-Latin writing systems.
|
||||
|
||||
### Key Principles
|
||||
|
||||
1. **Emic name is preserved** - The original script is stored in `custodian_name.emic_name`
|
||||
2. **Transliteration is for processing only** - Used to generate abbreviations
|
||||
3. **ISO/recognized standards required** - No ad-hoc romanization
|
||||
4. **Deterministic output** - Same input always produces same Latin output
|
||||
5. **Existing GHCIDs grandfathered** - Only applies to NEW custodians
|
||||
|
||||
---
|
||||
|
||||
## Transliteration Standards by Script/Language
|
||||
|
||||
### Cyrillic Scripts
|
||||
|
||||
| Language | ISO Code | Standard | Library/Tool | Notes |
|
||||
|----------|----------|----------|--------------|-------|
|
||||
| **Russian** | ru | ISO 9:1995 | `transliterate` | Scientific transliteration |
|
||||
| **Ukrainian** | uk | ISO 9:1995 | `transliterate` | Includes Ukrainian-specific letters |
|
||||
| **Bulgarian** | bg | ISO 9:1995 | `transliterate` | Uses same Cyrillic base |
|
||||
| **Serbian** | sr | ISO 9:1995 | `transliterate` | Serbian Cyrillic variant |
|
||||
| **Kazakh** | kk | ISO 9:1995 | `transliterate` | Cyrillic-based (pre-2023) |
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: Институт восточных рукописей РАН
|
||||
ISO 9: Institut vostocnyh rukopisej RAN
|
||||
Abbrev: IVRRAN (after diacritic normalization)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CJK Scripts
|
||||
|
||||
#### Chinese (Hanzi)
|
||||
|
||||
| Variant | Standard | Library/Tool | Notes |
|
||||
|---------|----------|--------------|-------|
|
||||
| Simplified | Hanyu Pinyin (ISO 7098) | `pypinyin` | Standard PRC romanization |
|
||||
| Traditional | Hanyu Pinyin | `pypinyin` | Same standard applies |
|
||||
|
||||
**Pinyin Rules**:
|
||||
- Tone marks are OMITTED for abbreviation (diacritics removed anyway)
|
||||
- Word boundaries follow natural spacing
|
||||
- Proper nouns capitalized
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: 东巴文化博物院
|
||||
Pinyin: Dongba Wenhua Bowuyuan
|
||||
ASCII: Dongba Wenhua Bowuyuan
|
||||
Abbrev: DWB
|
||||
```
|
||||
|
||||
#### Japanese (Kanji/Kana)
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| Modified Hepburn | `pykakasi`, `romkan` | Most widely used internationally |
|
||||
|
||||
**Hepburn Rules**:
|
||||
- Long vowels: o, u (normalized to o, u for abbreviation)
|
||||
- Particles: ha (wa), wo (wo), he (e)
|
||||
- Syllabic n: n = n (before vowels: n')
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: 国立中央博物館
|
||||
Romaji: Kokuritsu Chuo Hakubutsukan
|
||||
ASCII: Kokuritsu Chuo Hakubutsukan
|
||||
Abbrev: KCH
|
||||
```
|
||||
|
||||
#### Korean (Hangul)
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| Revised Romanization (RR) | `korean-romanizer`, `hangul-romanize` | Official South Korean standard (2000) |
|
||||
|
||||
**RR Rules**:
|
||||
- No diacritics (unlike McCune-Reischauer)
|
||||
- Consonant assimilation reflected in spelling
|
||||
- Word boundaries at natural breaks
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: 독립기념관
|
||||
RR: Dongnip Ginyeomgwan
|
||||
Abbrev: DG
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Arabic Script
|
||||
|
||||
| Language | ISO Code | Standard | Library/Tool | Notes |
|
||||
|----------|----------|----------|--------------|-------|
|
||||
| **Arabic** | ar | ISO 233-2:1993 | `arabic-transliteration` | Simplified standard |
|
||||
| **Persian/Farsi** | fa | ISO 233-3:1999 | `persian-transliteration` | Persian extensions |
|
||||
| **Urdu** | ur | ISO 233-3 + Urdu extensions | `urdu-transliteration` | Additional characters |
|
||||
|
||||
**Example (Arabic)**:
|
||||
```
|
||||
Input: المكتبة الوطنية للمملكة المغربية
|
||||
ISO: al-Maktaba al-Wataniya lil-Mamlaka al-Maghribiya
|
||||
ASCII: al-Maktaba al-Wataniya lil-Mamlaka al-Maghribiya
|
||||
Abbrev: MWMM (skip "al-" articles)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Hebrew Script
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| ISO 259-3:1999 | `hebrew-transliteration` | Simplified romanization |
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: ארכיון הסיפור העממי בישראל
|
||||
ISO: Arkhiyon ha-Sipur ha-Amami be-Yisrael
|
||||
ASCII: Arkhiyon ha-Sipur ha-Amami be-Yisrael
|
||||
Abbrev: ASAY (skip "ha-" and "be-" articles)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Greek Script
|
||||
|
||||
| Standard | Library/Tool | Notes |
|
||||
|----------|--------------|-------|
|
||||
| ISO 843:1997 | `greek-transliteration` | Romanization of Greek |
|
||||
|
||||
**Example**:
|
||||
```
|
||||
Input: Αρχαιολογικό Μουσείο Θεσσαλονίκης
|
||||
ISO: Archaiologiko Mouseio Thessalonikis
|
||||
ASCII: Archaiologiko Mouseio Thessalonikis
|
||||
Abbrev: AMT
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Indic Scripts
|
||||
|
||||
| Language | Script | Standard | Library/Tool |
|
||||
|----------|--------|----------|--------------|
|
||||
| **Hindi** | Devanagari | ISO 15919 | `indic-transliteration` |
|
||||
| **Bengali** | Bengali | ISO 15919 | `indic-transliteration` |
|
||||
| **Nepali** | Devanagari | ISO 15919 | `indic-transliteration` |
|
||||
| **Sinhala** | Sinhala | ISO 15919 | `indic-transliteration` |
|
||||
|
||||
**Example (Hindi)**:
|
||||
```
|
||||
Input: राजस्थान प्राच्यविद्या प्रतिष्ठान
|
||||
ISO: Rajasthana Pracyavidya Pratishthana
|
||||
ASCII: Rajasthana Pracyavidya Pratishthana
|
||||
Abbrev: RPP
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Southeast Asian Scripts
|
||||
|
||||
| Language | Script | Standard | Library/Tool |
|
||||
|----------|--------|----------|--------------|
|
||||
| **Thai** | Thai | ISO 11940-2 | `thai-romanization` |
|
||||
| **Khmer** | Khmer | ALA-LC | `khmer-romanization` |
|
||||
|
||||
**Thai Example**:
|
||||
```
|
||||
Input: สำนักหอจดหมายเหตุแห่งชาติ
|
||||
ISO: Samnak Ho Chotmaihet Haeng Chat
|
||||
Abbrev: SHCHC
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Other Scripts
|
||||
|
||||
| Language | Script | Standard | Library/Tool |
|
||||
|----------|--------|----------|--------------|
|
||||
| **Armenian** | Armenian | ISO 9985 | `armenian-transliteration` |
|
||||
| **Georgian** | Georgian | ISO 9984 | `georgian-transliteration` |
|
||||
|
||||
**Georgian Example**:
|
||||
```
|
||||
Input: ხელნაწერთა ეროვნული ცენტრი
|
||||
ISO: Khelnawerti Erovnuli Centri
|
||||
ASCII: Khelnawerti Erovnuli Centri
|
||||
Abbrev: KEC
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
### Python Transliteration Utility
|
||||
|
||||
```python
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
|
||||
def detect_script(text: str) -> str:
|
||||
"""
|
||||
Detect the primary script of the input text.
|
||||
|
||||
Returns one of: 'latin', 'cyrillic', 'chinese', 'japanese',
|
||||
'korean', 'arabic', 'hebrew', 'greek', 'devanagari', etc.
|
||||
"""
|
||||
script_ranges = {
|
||||
'cyrillic': (0x0400, 0x04FF),
|
||||
'arabic': (0x0600, 0x06FF),
|
||||
'hebrew': (0x0590, 0x05FF),
|
||||
'devanagari': (0x0900, 0x097F),
|
||||
'thai': (0x0E00, 0x0E7F),
|
||||
'greek': (0x0370, 0x03FF),
|
||||
'korean': (0xAC00, 0xD7AF),
|
||||
'chinese': (0x4E00, 0x9FFF),
|
||||
}
|
||||
|
||||
for char in text:
|
||||
code = ord(char)
|
||||
for script, (start, end) in script_ranges.items():
|
||||
if start <= code <= end:
|
||||
return script
|
||||
|
||||
return 'latin'
|
||||
|
||||
|
||||
def transliterate_for_abbreviation(emic_name: str, lang: str) -> str:
|
||||
"""
|
||||
Transliterate emic name for GHCID abbreviation generation.
|
||||
|
||||
Args:
|
||||
emic_name: Institution name in original script
|
||||
lang: ISO 639-1 language code
|
||||
|
||||
Returns:
|
||||
Transliterated name ready for abbreviation extraction
|
||||
"""
|
||||
import re
|
||||
|
||||
# Step 1: Transliterate to Latin (implementation depends on script)
|
||||
latin = transliterate(emic_name, lang)
|
||||
|
||||
# Step 2: Normalize diacritics
|
||||
normalized = unicodedata.normalize('NFD', latin)
|
||||
ascii_text = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
|
||||
# Step 3: Remove special characters (except spaces)
|
||||
clean = re.sub(r'[^a-zA-Z\s]', ' ', ascii_text)
|
||||
|
||||
# Step 4: Normalize whitespace
|
||||
clean = ' '.join(clean.split())
|
||||
|
||||
return clean
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Skip Words by Language
|
||||
|
||||
When extracting abbreviations from transliterated text, skip these articles/prepositions:
|
||||
|
||||
### Arabic
|
||||
- `al-` (the definite article)
|
||||
- `bi-`, `li-`, `fi-` (prepositions)
|
||||
|
||||
### Hebrew
|
||||
- `ha-` (the)
|
||||
- `ve-` (and)
|
||||
- `be-`, `le-`, `me-` (prepositions)
|
||||
|
||||
### Persian
|
||||
- `-e`, `-ye` (ezafe connector)
|
||||
- `va` (and)
|
||||
|
||||
### CJK Languages
|
||||
- No skip words (particles are integral to meaning)
|
||||
|
||||
### Indic Languages
|
||||
- `ka`, `ki`, `ke` (Hindi: of)
|
||||
- `aur` (Hindi: and)
|
||||
|
||||
---
|
||||
|
||||
## Validation
|
||||
|
||||
### Check Transliteration Output
|
||||
|
||||
```python
|
||||
def validate_transliteration(result: str) -> bool:
|
||||
"""
|
||||
Validate that transliteration output contains only ASCII letters and spaces.
|
||||
"""
|
||||
import re
|
||||
return bool(re.match(r'^[a-zA-Z\s]+$', result))
|
||||
```
|
||||
|
||||
### Manual Review Queue
|
||||
|
||||
Non-Latin institutions should be flagged for manual review if:
|
||||
1. Transliteration library not available for that script
|
||||
2. Confidence in transliteration is low
|
||||
3. Institution has multiple official romanizations
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- `AGENTS.md` - Rule 12: Transliteration Standards
|
||||
- `rules/ABBREVIATION_RULES.md` - Character filtering after transliteration
|
||||
- `docs/TRANSLITERATION_CONVENTIONS.md` - Extended examples and edge cases
|
||||
- `scripts/transliterate_emic_names.py` - Production transliteration script
|
||||
|
||||
---
|
||||
|
||||
## Changelog
|
||||
|
||||
| Date | Change |
|
||||
|------|--------|
|
||||
| 2025-12-08 | Initial standards document created |
|
||||
210
schemas/20251121/linkml/rules/XPATH_PROVENANCE.md
Normal file
210
schemas/20251121/linkml/rules/XPATH_PROVENANCE.md
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
# WebObservation XPath Provenance Rules
|
||||
|
||||
**Rule ID**: XPATH-PROVENANCE
|
||||
**Status**: MANDATORY
|
||||
**Applies To**: WebClaim extraction from websites
|
||||
**Created**: 2025-11-29
|
||||
|
||||
---
|
||||
|
||||
## Core Principle: Every Claim MUST Have Verifiable Provenance
|
||||
|
||||
**If a claim allegedly came from a webpage, it MUST have an XPath pointer to the exact location in the archived HTML where that value appears. Claims without XPath provenance are considered FABRICATED and must be removed.**
|
||||
|
||||
This is not about "confidence" or "uncertainty" - it's about **verifiability**. Either the claim value exists in the HTML at a specific XPath, or it was hallucinated/fabricated by an LLM.
|
||||
|
||||
---
|
||||
|
||||
## Required Fields for WebObservation Claims
|
||||
|
||||
Every claim in `web_enrichment.claims` MUST have:
|
||||
|
||||
| Field | Required | Description |
|
||||
|-------|----------|-------------|
|
||||
| `claim_type` | YES | Type of claim (full_name, description, email, etc.) |
|
||||
| `claim_value` | YES | The extracted value |
|
||||
| `source_url` | YES | URL the claim was extracted from |
|
||||
| `retrieved_on` | YES | ISO 8601 timestamp when page was archived |
|
||||
| `xpath` | YES | XPath to the element containing this value |
|
||||
| `html_file` | YES | Relative path to archived HTML file |
|
||||
| `xpath_match_score` | YES | 1.0 for exact match, <1.0 for fuzzy match |
|
||||
|
||||
### Example - CORRECT (Verifiable)
|
||||
|
||||
```yaml
|
||||
web_enrichment:
|
||||
claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Historische Vereniging Nijeveen
|
||||
source_url: https://historischeverenigingnijeveen.nl/
|
||||
retrieved_on: "2025-11-29T12:28:00Z"
|
||||
xpath: /[document][1]/html[1]/body[1]/div[6]/div[1]/table[3]/tbody[1]/tr[1]/td[1]/p[6]
|
||||
html_file: web/0021/historischeverenigingnijeveen.nl/rendered.html
|
||||
xpath_match_score: 1.0
|
||||
```
|
||||
|
||||
### Example - WRONG (Fabricated - Must Be Removed)
|
||||
|
||||
```yaml
|
||||
web_enrichment:
|
||||
claims:
|
||||
- claim_type: full_name
|
||||
claim_value: Historische Vereniging Nijeveen
|
||||
confidence: 0.95 # ← NO! This is meaningless without XPath
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Forbidden: Confidence Scores Without XPath
|
||||
|
||||
**NEVER use arbitrary confidence scores for web-extracted claims.**
|
||||
|
||||
Confidence scores like `0.95`, `0.90`, `0.85` are meaningless because:
|
||||
1. There is NO methodology defining what these numbers mean
|
||||
2. They cannot be verified or reproduced
|
||||
3. They give false impression of rigor
|
||||
4. They mask the fact that claims may be fabricated
|
||||
|
||||
If a value appears in the HTML → `xpath_match_score: 1.0`
|
||||
If a value does NOT appear in the HTML → **REMOVE THE CLAIM**
|
||||
|
||||
---
|
||||
|
||||
## Website Archiving Workflow
|
||||
|
||||
### Step 1: Archive the Website
|
||||
|
||||
Use Playwright to archive websites with JavaScript rendering:
|
||||
|
||||
```bash
|
||||
python scripts/fetch_website_playwright.py <entry_number> <url>
|
||||
|
||||
# Example:
|
||||
python scripts/fetch_website_playwright.py 0021 https://historischeverenigingnijeveen.nl/
|
||||
```
|
||||
|
||||
This creates:
|
||||
```
|
||||
data/nde/enriched/entries/web/{entry_number}/{domain}/
|
||||
├── index.html # Raw HTML as received
|
||||
├── rendered.html # HTML after JS execution
|
||||
├── content.md # Markdown conversion
|
||||
└── metadata.yaml # XPath extractions for provenance
|
||||
```
|
||||
|
||||
### Step 2: Add XPath Provenance to Claims
|
||||
|
||||
Run the XPath migration script:
|
||||
|
||||
```bash
|
||||
python scripts/add_xpath_provenance.py
|
||||
|
||||
# Or for specific entries:
|
||||
python scripts/add_xpath_provenance.py --entries 0021,0022,0023
|
||||
```
|
||||
|
||||
This script:
|
||||
1. Reads each entry's `web_enrichment.claims`
|
||||
2. Searches archived HTML for each claim value
|
||||
3. Adds `xpath` + `html_file` if found
|
||||
4. **REMOVES claims that cannot be verified** (stores in `removed_unverified_claims`)
|
||||
|
||||
### Step 3: Audit Removed Claims
|
||||
|
||||
Check `removed_unverified_claims` in each entry file:
|
||||
|
||||
```yaml
|
||||
removed_unverified_claims:
|
||||
- claim_type: phone
|
||||
claim_value: "+31 6 12345678"
|
||||
reason: "Value not found in archived HTML - likely fabricated"
|
||||
removed_on: "2025-11-29T14:30:00Z"
|
||||
```
|
||||
|
||||
These claims were NOT in the HTML and should NOT be restored without proper sourcing.
|
||||
|
||||
---
|
||||
|
||||
## Claim Types and Expected Sources
|
||||
|
||||
| Claim Type | Expected Source | Notes |
|
||||
|------------|-----------------|-------|
|
||||
| `full_name` | Page title, heading, logo text | Usually in `<h1>`, `<title>`, or prominent `<div>` |
|
||||
| `description` | Meta description, about text | Check `<meta name="description">` first |
|
||||
| `email` | Contact page, footer | Often in `<a href="mailto:...">` |
|
||||
| `phone` | Contact page, footer | May need normalization |
|
||||
| `address` | Contact page, footer | Check for structured data too |
|
||||
| `social_media` | Footer, contact page | Links to social platforms |
|
||||
| `opening_hours` | Contact/visit page | May be in structured data |
|
||||
|
||||
---
|
||||
|
||||
## XPath Matching Strategy
|
||||
|
||||
The `add_xpath_provenance.py` script uses this matching strategy:
|
||||
|
||||
1. **Exact match**: Claim value appears exactly in element text
|
||||
2. **Normalized match**: After whitespace normalization
|
||||
3. **Substring match**: Claim value is substring of element text (score < 1.0)
|
||||
|
||||
Priority order for matching:
|
||||
1. `rendered.html` (after JS execution) - preferred
|
||||
2. `index.html` (raw HTML) - fallback
|
||||
|
||||
---
|
||||
|
||||
## Integration with LinkML Schema
|
||||
|
||||
The `WebClaim` class in the LinkML schema requires:
|
||||
|
||||
```yaml
|
||||
# schemas/20251121/linkml/modules/classes/WebClaim.yaml
|
||||
WebClaim:
|
||||
slots:
|
||||
- source_url # Required
|
||||
- retrieved_on # Required (timestamp)
|
||||
- xpath # Required for claims
|
||||
- html_archive_path # Path to archived HTML
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rules for AI Agents
|
||||
|
||||
### When Extracting Claims from Websites
|
||||
|
||||
1. **ALWAYS archive the website first** using Playwright
|
||||
2. **ALWAYS extract claims with XPath provenance** using the archived HTML
|
||||
3. **NEVER invent or infer claims** not present in the HTML
|
||||
4. **NEVER use confidence scores** without XPath backing
|
||||
|
||||
### When Processing Existing Claims
|
||||
|
||||
1. **Verify each claim** against archived HTML
|
||||
2. **Add XPath provenance** to verified claims
|
||||
3. **REMOVE fabricated claims** that cannot be verified
|
||||
4. **Document removed claims** in `removed_unverified_claims`
|
||||
|
||||
### When Reviewing Data Quality
|
||||
|
||||
1. Claims with `xpath` + `html_file` = **VERIFIED**
|
||||
2. Claims with only `confidence` = **SUSPECT** (migrate or remove)
|
||||
3. Claims in `removed_unverified_claims` = **FABRICATED** (do not restore)
|
||||
|
||||
---
|
||||
|
||||
## Scripts Reference
|
||||
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `scripts/fetch_website_playwright.py` | Archive website with Playwright |
|
||||
| `scripts/add_xpath_provenance.py` | Add XPath to claims, remove fabricated |
|
||||
| `scripts/batch_fetch_websites.py` | Batch archive multiple entries |
|
||||
|
||||
---
|
||||
|
||||
## Version History
|
||||
|
||||
- **2025-11-29**: Initial version - established XPath provenance requirement
|
||||
- Replaced confidence scores with verifiable XPath pointers
|
||||
- Established policy of removing fabricated claims
|
||||
|
|
@ -0,0 +1,124 @@
|
|||
```mermaid
|
||||
%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#e3f2fd', 'primaryTextColor': '#1565c0', 'primaryBorderColor': '#1565c0', 'lineColor': '#424242', 'secondaryColor': '#fff3e0', 'tertiaryColor': '#e8f5e9'}}}%%
|
||||
graph TB
|
||||
%% Heritage Custodian Records Lifecycle
|
||||
%% Generated: 2025-12-09 13:12:05
|
||||
%% Three-tier model: Administration → Archive → Collection
|
||||
%% For bronhouder.nl visual representation
|
||||
|
||||
subgraph "PHASE 1: ACTIVE RECORDS"
|
||||
direction TB
|
||||
ADMIN["<b>CustodianAdministration</b><br/><i>rico:RecordResource</i><br/>━━━━━━━━━━━━━━━━━<br/>ACTIVE records in daily use<br/>• Current correspondence<br/>• Personnel files<br/>• Financial records<br/>• Digital files on shared drives<br/>• Email, databases<br/>━━━━━━━━━━━━━━━━━<br/>Managed by: Business units<br/>Retention: Per schedule"]
|
||||
|
||||
style ADMIN fill:#c8e6c9,stroke:#2e7d32,stroke-width:3px
|
||||
end
|
||||
|
||||
subgraph "PHASE 2: INACTIVE ARCHIVES"
|
||||
direction TB
|
||||
ARCHIVE["<b>CustodianArchive</b><br/><i>rico:RecordSet</i><br/>━━━━━━━━━━━━━━━━━<br/>INACTIVE records awaiting processing<br/>• Transferred from administration<br/>• In BACKLOG (may wait DECADES)<br/>• Basic accession-level description<br/>• NOT searchable by researchers<br/>• Tracked in CMS for inventory<br/>━━━━━━━━━━━━━━━━━<br/>Managed by: Archives staff<br/>Status: ArchiveProcessingStatusEnum"]
|
||||
|
||||
style ARCHIVE fill:#fff9c4,stroke:#f9a825,stroke-width:3px
|
||||
end
|
||||
|
||||
subgraph "PHASE 3: HERITAGE COLLECTION"
|
||||
direction TB
|
||||
COLLECTION["<b>CustodianCollection</b><br/><i>crm:E78_Curated_Holding</i><br/>━━━━━━━━━━━━━━━━━<br/>PROCESSED heritage collection<br/>• Full finding aid available<br/>• Searchable by researchers<br/>• Arranged per archival standards<br/>• Integrated into public collection<br/>• Managed as cultural heritage<br/>━━━━━━━━━━━━━━━━━<br/>Managed by: Curators<br/>Access: Public/Restricted"]
|
||||
|
||||
style COLLECTION fill:#bbdefb,stroke:#1565c0,stroke-width:3px
|
||||
end
|
||||
|
||||
%% Transitions between phases
|
||||
ADMIN -->|"<b>TRANSFER</b><br/>Retention period ends<br/>Records closed<br/>prov:wasGeneratedBy"| ARCHIVE
|
||||
ARCHIVE -->|"<b>PROCESSING</b><br/>Appraisal complete<br/>Finding aid created<br/>prov:hadDerivation"| COLLECTION
|
||||
|
||||
%% Lifecycle Type Classifications (SKOS)
|
||||
subgraph "Archive Lifecycle Types (Wikidata)"
|
||||
direction LR
|
||||
TYPE_CURRENT["<b>CurrentArchive</b><br/>Q3621648<br/><i>Active phase</i>"]
|
||||
TYPE_DEPOSIT["<b>DepositArchive</b><br/>Q244904<br/><i>Semi-current phase</i>"]
|
||||
TYPE_HISTORICAL["<b>HistoricalArchive</b><br/>Q3621673<br/><i>Archival phase</i>"]
|
||||
|
||||
style TYPE_CURRENT fill:#c8e6c9,stroke:#2e7d32,stroke-width:2px,stroke-dasharray: 5 5
|
||||
style TYPE_DEPOSIT fill:#fff9c4,stroke:#f9a825,stroke-width:2px,stroke-dasharray: 5 5
|
||||
style TYPE_HISTORICAL fill:#bbdefb,stroke:#1565c0,stroke-width:2px,stroke-dasharray: 5 5
|
||||
end
|
||||
|
||||
%% Type classifications link to phases
|
||||
TYPE_CURRENT -.->|skos:narrower| ADMIN
|
||||
TYPE_DEPOSIT -.->|skos:narrower| ARCHIVE
|
||||
TYPE_HISTORICAL -.->|skos:narrower| COLLECTION
|
||||
|
||||
%% Timeline example
|
||||
subgraph "Example: Ministry Records"
|
||||
direction TB
|
||||
EX_TIMELINE["<b>Temporal Reality</b><br/>━━━━━━━━━━━━━━━━━━━━━━━━<br/>2010-2020: Created (Administration)<br/>2021: Transferred to Archives<br/>2021-2024: In processing backlog<br/>2024: Archivist assigned<br/>2025: Finding aid complete<br/>2025: Available to researchers<br/>━━━━━━━━━━━━━━━━━━━━━━━━<br/><i>Total processing time: 4 years</i><br/><i>(Large archives: 30-50 years)</i>"]
|
||||
|
||||
style EX_TIMELINE fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
||||
end
|
||||
|
||||
%% Processing Status Enum connection
|
||||
subgraph "Processing Status"
|
||||
direction LR
|
||||
STATUS["<b>ArchiveProcessingStatusEnum</b><br/>UNPROCESSED → IN_APPRAISAL →<br/>IN_ARRANGEMENT → IN_DESCRIPTION →<br/>PROCESSED_PENDING_TRANSFER →<br/>TRANSFERRED_TO_COLLECTION"]
|
||||
|
||||
style STATUS fill:#e0e0e0,stroke:#616161,stroke-width:1px
|
||||
end
|
||||
|
||||
ARCHIVE -.->|processing_status| STATUS
|
||||
|
||||
%% Custodian Hub connection
|
||||
subgraph "Central Entity"
|
||||
HUB["<b>Custodian</b><br/>(Hub Entity)<br/>All records belong to<br/>one heritage institution"]
|
||||
|
||||
style HUB fill:#ffeb3b,stroke:#f57f17,stroke-width:4px
|
||||
end
|
||||
|
||||
ADMIN -.->|"refers_to_custodian<br/>crm:P46i"| HUB
|
||||
ARCHIVE -.->|"refers_to_custodian<br/>crm:P46i"| HUB
|
||||
COLLECTION -.->|"refers_to_custodian<br/>crm:P46i"| HUB
|
||||
|
||||
%% Legend
|
||||
subgraph "Legend"
|
||||
direction LR
|
||||
L1["Solid arrow = Data flow/transition"]
|
||||
L2["Dashed arrow = Reference/classification"]
|
||||
L3["Green = Active | Yellow = Processing | Blue = Archived"]
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Records Lifecycle Model
|
||||
|
||||
This diagram shows the three-tier model for heritage custodian records:
|
||||
|
||||
### Phase 1: CustodianAdministration (Active Records)
|
||||
- **Ontology**: `rico:RecordResource`
|
||||
- **Status**: In daily operational use
|
||||
- **Managed by**: Business units (not archives staff)
|
||||
- **Examples**: Current correspondence, personnel files, financial records
|
||||
|
||||
### Phase 2: CustodianArchive (Inactive Archives)
|
||||
- **Ontology**: `rico:RecordSet`
|
||||
- **Status**: Awaiting archival processing (often DECADES)
|
||||
- **Managed by**: Archives staff
|
||||
- **Tracking**: `ArchiveProcessingStatusEnum`
|
||||
- **Key insight**: NOT yet searchable by researchers
|
||||
|
||||
### Phase 3: CustodianCollection (Heritage Collection)
|
||||
- **Ontology**: `crm:E78_Curated_Holding`
|
||||
- **Status**: Fully processed, public/restricted access
|
||||
- **Managed by**: Curators
|
||||
- **Features**: Full finding aid, integrated into heritage collection
|
||||
|
||||
### Key Relationships
|
||||
- `prov:wasGeneratedBy`: Links archive to transfer activity
|
||||
- `prov:hadDerivation`: Links archive to resulting collection
|
||||
- `crm:P46i_forms_part_of`: All phases belong to same Custodian hub
|
||||
|
||||
### Lifecycle Type Classifications (SKOS/Wikidata)
|
||||
- **CurrentArchive** (Q3621648): Active records phase TYPE
|
||||
- **DepositArchive** (Q244904): Semi-current/intermediate phase TYPE
|
||||
- **HistoricalArchive** (Q3621673): Permanent archival phase TYPE
|
||||
|
||||
These are TYPE classifications (skos:Concept) that can be applied to INSTANCE records via `lifecycle_phase_type` slot using `skos:broaderTransitive`.
|
||||
388
scripts/geocode_missing_from_geonames.py
Normal file
388
scripts/geocode_missing_from_geonames.py
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Geocode Missing Coordinates from GeoNames Database
|
||||
|
||||
This script geocodes custodian files that are missing coordinates using the local
|
||||
GeoNames database. It's much faster than API-based geocoding (no rate limits).
|
||||
|
||||
Features:
|
||||
- Uses local GeoNames SQLite database for instant lookups
|
||||
- Fuzzy matching for city names
|
||||
- Updates files in-place preserving YAML structure
|
||||
- Batch processing with progress tracking
|
||||
- Safe updates (additive only, preserves existing data)
|
||||
|
||||
Usage:
|
||||
python scripts/geocode_missing_from_geonames.py --dry-run
|
||||
python scripts/geocode_missing_from_geonames.py --country JP --limit 100
|
||||
python scripts/geocode_missing_from_geonames.py --all
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import unicodedata
|
||||
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
# Setup ruamel.yaml for round-trip preservation
|
||||
yaml = YAML()
|
||||
yaml.preserve_quotes = True
|
||||
yaml.width = 120
|
||||
|
||||
# Configuration
|
||||
CUSTODIAN_DIR = Path("/Users/kempersc/apps/glam/data/custodian")
|
||||
GEONAMES_DB = Path("/Users/kempersc/apps/glam/data/reference/geonames.db")
|
||||
|
||||
|
||||
def normalize_city_name(name: Optional[str]) -> str:
|
||||
"""Normalize city name for matching."""
|
||||
if not name:
|
||||
return ""
|
||||
# NFD decomposition and remove accents
|
||||
normalized = unicodedata.normalize('NFD', name)
|
||||
ascii_name = ''.join(c for c in normalized if unicodedata.category(c) != 'Mn')
|
||||
result = ascii_name.lower().strip()
|
||||
|
||||
# Remove common Japanese administrative suffixes
|
||||
# These are romanized forms of 市 (shi/city), 区 (ku/ward), 町 (machi/town), etc.
|
||||
jp_suffixes = [' shi', '-shi', ' ku', '-ku', ' machi', '-machi', ' cho', '-cho',
|
||||
' ken', '-ken', ' gun', '-gun', ' son', '-son', ' mura', '-mura']
|
||||
for suffix in jp_suffixes:
|
||||
if result.endswith(suffix):
|
||||
result = result[:-len(suffix)]
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class GeoNamesLookup:
|
||||
"""Fast city coordinate lookup from GeoNames database."""
|
||||
|
||||
def __init__(self, db_path: Path):
|
||||
self.conn = sqlite3.connect(db_path)
|
||||
self.conn.row_factory = sqlite3.Row
|
||||
|
||||
def lookup_city(self, city: str, country_code: str, region: str = None) -> Optional[dict]:
|
||||
"""
|
||||
Look up city coordinates in GeoNames database.
|
||||
|
||||
Returns dict with latitude, longitude, geonames_id, etc. or None if not found.
|
||||
"""
|
||||
if not city or not country_code:
|
||||
return None
|
||||
|
||||
# Normalize inputs
|
||||
city_norm = normalize_city_name(city)
|
||||
country_code = country_code.upper()
|
||||
|
||||
# Try exact match first (case-insensitive)
|
||||
cursor = self.conn.execute("""
|
||||
SELECT geonames_id, name, ascii_name, latitude, longitude,
|
||||
admin1_code, admin1_name, feature_code, population
|
||||
FROM cities
|
||||
WHERE country_code = ?
|
||||
AND (LOWER(name) = ? OR LOWER(ascii_name) = ?)
|
||||
ORDER BY population DESC
|
||||
LIMIT 1
|
||||
""", (country_code, city_norm or "", city_norm or ""))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return self._row_to_dict(row)
|
||||
|
||||
# Try with original city name (for non-ASCII)
|
||||
cursor = self.conn.execute("""
|
||||
SELECT geonames_id, name, ascii_name, latitude, longitude,
|
||||
admin1_code, admin1_name, feature_code, population
|
||||
FROM cities
|
||||
WHERE country_code = ?
|
||||
AND (name = ? OR ascii_name = ?)
|
||||
ORDER BY population DESC
|
||||
LIMIT 1
|
||||
""", (country_code, city, city))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return self._row_to_dict(row)
|
||||
|
||||
# Try partial match (city name contains or is contained in)
|
||||
cursor = self.conn.execute("""
|
||||
SELECT geonames_id, name, ascii_name, latitude, longitude,
|
||||
admin1_code, admin1_name, feature_code, population
|
||||
FROM cities
|
||||
WHERE country_code = ?
|
||||
AND (LOWER(name) LIKE ? OR LOWER(ascii_name) LIKE ?)
|
||||
ORDER BY population DESC
|
||||
LIMIT 1
|
||||
""", (country_code, f"%{city_norm}%", f"%{city_norm}%"))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return self._row_to_dict(row)
|
||||
|
||||
return None
|
||||
|
||||
def _row_to_dict(self, row) -> dict:
|
||||
"""Convert database row to dictionary."""
|
||||
return {
|
||||
'geonames_id': row['geonames_id'],
|
||||
'geonames_name': row['name'],
|
||||
'latitude': row['latitude'],
|
||||
'longitude': row['longitude'],
|
||||
'admin1_code': row['admin1_code'],
|
||||
'admin1_name': row['admin1_name'],
|
||||
'feature_code': row['feature_code'],
|
||||
'population': row['population']
|
||||
}
|
||||
|
||||
def close(self):
|
||||
self.conn.close()
|
||||
|
||||
|
||||
def extract_city_country(data: dict) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Extract city and country from custodian data."""
|
||||
city = None
|
||||
country = None
|
||||
|
||||
# Try location block first
|
||||
loc = data.get('location', {})
|
||||
if loc:
|
||||
city = loc.get('city')
|
||||
country = loc.get('country')
|
||||
|
||||
# Try ghcid.location_resolution
|
||||
if not city:
|
||||
ghcid_loc = data.get('ghcid', {}).get('location_resolution', {})
|
||||
if ghcid_loc:
|
||||
city = (ghcid_loc.get('city_name') or
|
||||
ghcid_loc.get('city_label') or
|
||||
ghcid_loc.get('geonames_name') or
|
||||
ghcid_loc.get('google_maps_locality'))
|
||||
if not country:
|
||||
country = ghcid_loc.get('country_code')
|
||||
|
||||
# Try original_entry.locations
|
||||
if not city:
|
||||
orig_locs = data.get('original_entry', {}).get('locations', [])
|
||||
if orig_locs and len(orig_locs) > 0:
|
||||
city = orig_locs[0].get('city')
|
||||
country = orig_locs[0].get('country')
|
||||
|
||||
# Try to infer country from GHCID
|
||||
if not country:
|
||||
ghcid = data.get('ghcid', {}).get('ghcid_current', '')
|
||||
if ghcid and len(ghcid) >= 2:
|
||||
country = ghcid[:2]
|
||||
|
||||
return city, country
|
||||
|
||||
|
||||
def geocode_file(filepath: Path, geonames: GeoNamesLookup, dry_run: bool = False) -> dict:
|
||||
"""
|
||||
Geocode a single custodian file using GeoNames.
|
||||
|
||||
Returns:
|
||||
Dictionary with results:
|
||||
- success: bool
|
||||
- geocoded: bool (True if coordinates were added)
|
||||
- already_has_coords: bool
|
||||
- error: str or None
|
||||
"""
|
||||
result = {
|
||||
'success': False,
|
||||
'geocoded': False,
|
||||
'already_has_coords': False,
|
||||
'city': None,
|
||||
'country': None,
|
||||
'error': None
|
||||
}
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = yaml.load(f)
|
||||
|
||||
if not isinstance(data, dict):
|
||||
result['error'] = "Invalid YAML structure"
|
||||
return result
|
||||
|
||||
# Check if already has coordinates
|
||||
loc = data.get('location', {})
|
||||
if loc.get('latitude') is not None and loc.get('longitude') is not None:
|
||||
result['success'] = True
|
||||
result['already_has_coords'] = True
|
||||
return result
|
||||
|
||||
# Extract city and country
|
||||
city, country = extract_city_country(data)
|
||||
result['city'] = city
|
||||
result['country'] = country
|
||||
|
||||
if not city or not country:
|
||||
result['error'] = f"Missing city ({city}) or country ({country})"
|
||||
result['success'] = True # Not an error, just no data to geocode
|
||||
return result
|
||||
|
||||
# Look up in GeoNames
|
||||
geo_result = geonames.lookup_city(city, country)
|
||||
|
||||
if not geo_result:
|
||||
result['error'] = f"City not found in GeoNames: {city}, {country}"
|
||||
result['success'] = True # Not a fatal error
|
||||
return result
|
||||
|
||||
# Update location block with coordinates
|
||||
if 'location' not in data:
|
||||
data['location'] = {}
|
||||
|
||||
data['location']['latitude'] = geo_result['latitude']
|
||||
data['location']['longitude'] = geo_result['longitude']
|
||||
data['location']['coordinate_provenance'] = {
|
||||
'source_type': 'GEONAMES_LOCAL',
|
||||
'source_path': 'data/reference/geonames.db',
|
||||
'entity_id': geo_result['geonames_id'],
|
||||
'original_timestamp': datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
|
||||
# Add geonames reference if not present
|
||||
if not data['location'].get('geonames_id'):
|
||||
data['location']['geonames_id'] = geo_result['geonames_id']
|
||||
if not data['location'].get('geonames_name'):
|
||||
data['location']['geonames_name'] = geo_result['geonames_name']
|
||||
if not data['location'].get('feature_code'):
|
||||
data['location']['feature_code'] = geo_result['feature_code']
|
||||
|
||||
# Update normalization timestamp
|
||||
data['location']['normalization_timestamp'] = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
if not dry_run:
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(data, f)
|
||||
|
||||
result['success'] = True
|
||||
result['geocoded'] = True
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
result['error'] = str(e)
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Geocode missing coordinates using GeoNames database"
|
||||
)
|
||||
parser.add_argument('--dry-run', action='store_true', help="Preview without writing")
|
||||
parser.add_argument('--country', type=str, help="Only process specific country code (e.g., JP)")
|
||||
parser.add_argument('--limit', type=int, default=0, help="Limit number of files to process")
|
||||
parser.add_argument('--all', action='store_true', help="Process all files (no limit)")
|
||||
parser.add_argument('--verbose', action='store_true', help="Show detailed output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.dry_run:
|
||||
print("DRY RUN - No files will be modified\n")
|
||||
|
||||
# Initialize GeoNames lookup
|
||||
if not GEONAMES_DB.exists():
|
||||
print(f"Error: GeoNames database not found at {GEONAMES_DB}")
|
||||
return 1
|
||||
|
||||
geonames = GeoNamesLookup(GEONAMES_DB)
|
||||
|
||||
# Get list of files to process
|
||||
if args.country:
|
||||
pattern = f"{args.country.upper()}-*.yaml"
|
||||
files = sorted(CUSTODIAN_DIR.glob(pattern))
|
||||
print(f"Processing {args.country.upper()} files: {len(files)} found")
|
||||
else:
|
||||
files = sorted(CUSTODIAN_DIR.glob("*.yaml"))
|
||||
print(f"Processing all files: {len(files)} found")
|
||||
|
||||
if args.limit and not args.all:
|
||||
files = files[:args.limit]
|
||||
print(f"Limited to first {args.limit} files")
|
||||
|
||||
# Statistics
|
||||
stats = {
|
||||
'total': len(files),
|
||||
'geocoded': 0,
|
||||
'already_has_coords': 0,
|
||||
'no_city_data': 0,
|
||||
'not_found': 0,
|
||||
'errors': 0,
|
||||
'by_country': {}
|
||||
}
|
||||
|
||||
errors = []
|
||||
not_found = []
|
||||
|
||||
for i, filepath in enumerate(files):
|
||||
result = geocode_file(filepath, geonames, dry_run=args.dry_run)
|
||||
|
||||
# Extract country from filename
|
||||
country = filepath.name[:2]
|
||||
if country not in stats['by_country']:
|
||||
stats['by_country'][country] = {'geocoded': 0, 'not_found': 0}
|
||||
|
||||
if result['geocoded']:
|
||||
stats['geocoded'] += 1
|
||||
stats['by_country'][country]['geocoded'] += 1
|
||||
elif result['already_has_coords']:
|
||||
stats['already_has_coords'] += 1
|
||||
elif result['error'] and 'Missing city' in result['error']:
|
||||
stats['no_city_data'] += 1
|
||||
elif result['error'] and 'not found in GeoNames' in result['error']:
|
||||
stats['not_found'] += 1
|
||||
stats['by_country'][country]['not_found'] += 1
|
||||
if len(not_found) < 100:
|
||||
not_found.append((filepath.name, result['city'], result['country']))
|
||||
elif result['error']:
|
||||
stats['errors'] += 1
|
||||
if len(errors) < 20:
|
||||
errors.append((filepath.name, result['error']))
|
||||
|
||||
if args.verbose:
|
||||
status = "GEOCODED" if result['geocoded'] else "SKIP" if result['already_has_coords'] else "FAIL"
|
||||
print(f"[{i+1}/{len(files)}] {filepath.name}: {status}")
|
||||
elif (i + 1) % 1000 == 0:
|
||||
print(f"Processed {i+1}/{len(files)} files... (geocoded: {stats['geocoded']})")
|
||||
|
||||
# Print summary
|
||||
print("\n" + "=" * 60)
|
||||
print("GEOCODING SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"Total files processed: {stats['total']}")
|
||||
print(f"Already had coordinates: {stats['already_has_coords']}")
|
||||
print(f"Successfully geocoded: {stats['geocoded']}")
|
||||
print(f"No city data available: {stats['no_city_data']}")
|
||||
print(f"City not found in GeoNames: {stats['not_found']}")
|
||||
print(f"Errors: {stats['errors']}")
|
||||
|
||||
if stats['by_country']:
|
||||
print("\nResults by country:")
|
||||
for country, data in sorted(stats['by_country'].items(), key=lambda x: -x[1]['geocoded']):
|
||||
if data['geocoded'] > 0 or data['not_found'] > 0:
|
||||
print(f" {country}: geocoded={data['geocoded']}, not_found={data['not_found']}")
|
||||
|
||||
if not_found:
|
||||
print(f"\nFirst {len(not_found)} cities not found:")
|
||||
for filename, city, country in not_found[:20]:
|
||||
print(f" {filename}: {city}, {country}")
|
||||
|
||||
if errors:
|
||||
print(f"\nFirst {len(errors)} errors:")
|
||||
for filename, error in errors:
|
||||
print(f" {filename}: {error}")
|
||||
|
||||
if args.dry_run:
|
||||
print("\n(DRY RUN - No files were modified)")
|
||||
|
||||
geonames.close()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
|
|
@ -399,9 +399,26 @@ def extract_top_level_fields(data: dict) -> dict:
|
|||
# Extract wikidata inception/dissolution
|
||||
wd = data.get("wikidata_enrichment", {})
|
||||
if wd:
|
||||
record["wikidata_inception"] = wd.get("wikidata_inception", "") or wd.get("wikidata_founded", "")
|
||||
if wd.get("wikidata_dissolution") and not record["dissolution_date"]:
|
||||
record["dissolution_date"] = wd.get("wikidata_dissolution", "") or wd.get("wikidata_dissolved", "")
|
||||
# Try multiple paths for inception date
|
||||
wikidata_inception = (
|
||||
wd.get("wikidata_inception", "") or
|
||||
wd.get("wikidata_founded", "") or
|
||||
wd.get("wikidata_temporal", {}).get("inception", "")
|
||||
)
|
||||
record["wikidata_inception"] = wikidata_inception
|
||||
|
||||
# Use wikidata_inception as founding_date fallback
|
||||
if wikidata_inception and not record["founding_date"]:
|
||||
record["founding_date"] = wikidata_inception
|
||||
|
||||
# Try multiple paths for dissolution date
|
||||
wikidata_dissolution = (
|
||||
wd.get("wikidata_dissolution", "") or
|
||||
wd.get("wikidata_dissolved", "")
|
||||
)
|
||||
if wikidata_dissolution and not record["dissolution_date"]:
|
||||
record["dissolution_date"] = wikidata_dissolution
|
||||
|
||||
record["wikidata_enrichment_json"] = json.dumps(wd, ensure_ascii=False, default=str)
|
||||
|
||||
# Extract service_area
|
||||
|
|
|
|||
Loading…
Reference in a new issue