Add script to enrich NDE Register NL entries with Wikidata data
- Implemented a Python script that fetches and enriches entries from the NDE Register using data from Wikidata. - Utilized the Wikibase REST API and SPARQL endpoints for data retrieval. - Added logging for tracking progress and errors during the enrichment process. - Configured rate limiting based on authentication status for API requests. - Created a structured output in YAML format, including detailed enrichment data. - Generated a log file summarizing the enrichment process and results.
This commit is contained in:
parent
cd0ff5b9c7
commit
5ef8ccac51
19 changed files with 30132 additions and 79 deletions
|
|
@ -14,7 +14,7 @@
|
|||
datasetregister: dataset ontbreekt
|
||||
versnellen_project: Upgrade? Aanschaf?
|
||||
opmerkingen: Wat is de type organisatie?
|
||||
wikidata_id:
|
||||
wikidata_id: Q22246632
|
||||
type:
|
||||
- M
|
||||
- plaatsnaam_bezoekadres: Borger
|
||||
11505
data/nde/nde_register_nl.yaml
Normal file
11505
data/nde/nde_register_nl.yaml
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -150,7 +150,7 @@
|
|||
.link-bidirectional {
|
||||
stroke: #6366f1 !important; /* Indigo for bidirectional */
|
||||
stroke-dasharray: 5, 3;
|
||||
cursor: context-menu;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.link-bidirectional:hover {
|
||||
|
|
@ -162,9 +162,9 @@
|
|||
fill: #6366f1 !important;
|
||||
}
|
||||
|
||||
/* Hint for right-click action */
|
||||
/* Hint for click action */
|
||||
.link-group:has(.link-bidirectional):hover::after {
|
||||
content: "Right-click to reverse";
|
||||
content: "Click to reverse";
|
||||
position: absolute;
|
||||
background: rgba(0, 0, 0, 0.8);
|
||||
color: white;
|
||||
|
|
|
|||
|
|
@ -65,15 +65,24 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
const [selectedNode, setSelectedNode] = useState<UMLNode | null>(null);
|
||||
const [zoom, setZoom] = useState(1);
|
||||
const zoomTransformRef = useRef<d3.ZoomTransform>(d3.zoomIdentity);
|
||||
const previousDiagramRef = useRef<string | null>(null); // Track diagram changes for auto-fit
|
||||
|
||||
useEffect(() => {
|
||||
if (!svgRef.current || !diagram) return;
|
||||
|
||||
// Store current zoom transform before clearing
|
||||
// Determine if this is a new diagram (should auto-fit) or same diagram with layout change (preserve zoom)
|
||||
const diagramId = diagram.title || JSON.stringify(diagram.nodes.map(n => n.id).sort());
|
||||
const isNewDiagram = previousDiagramRef.current !== diagramId;
|
||||
previousDiagramRef.current = diagramId;
|
||||
|
||||
// Store current zoom transform before clearing (only if same diagram)
|
||||
const currentSvg = d3.select(svgRef.current);
|
||||
const currentTransform = d3.zoomTransform(currentSvg.node() as Element);
|
||||
if (currentTransform && (currentTransform.k !== 1 || currentTransform.x !== 0 || currentTransform.y !== 0)) {
|
||||
if (!isNewDiagram && currentTransform && (currentTransform.k !== 1 || currentTransform.x !== 0 || currentTransform.y !== 0)) {
|
||||
zoomTransformRef.current = currentTransform;
|
||||
} else if (isNewDiagram) {
|
||||
// Reset zoom ref for new diagrams
|
||||
zoomTransformRef.current = d3.zoomIdentity;
|
||||
}
|
||||
|
||||
// Clear previous content
|
||||
|
|
@ -400,7 +409,17 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
// Default node width for fallback (used where node.width is not yet set)
|
||||
const defaultNodeWidth = minNodeWidth;
|
||||
|
||||
diagram.nodes.forEach(node => {
|
||||
// IMPORTANT: Create deep copies to avoid mutating the original diagram prop
|
||||
// This prevents issues when switching layouts (force simulation mutates source/target to objects)
|
||||
const workingNodes = diagram.nodes.map(node => ({ ...node }));
|
||||
const workingLinks = diagram.links.map(link => ({
|
||||
...link,
|
||||
// Always ensure source/target are strings (force simulation may have converted them to objects)
|
||||
source: typeof link.source === 'string' ? link.source : (link.source as any).id,
|
||||
target: typeof link.target === 'string' ? link.target : (link.target as any).id
|
||||
}));
|
||||
|
||||
workingNodes.forEach(node => {
|
||||
const attributeCount = node.attributes?.length || 0;
|
||||
const methodCount = node.methods?.length || 0;
|
||||
|
||||
|
|
@ -435,26 +454,23 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
g.setDefaultEdgeLabel(() => ({}));
|
||||
|
||||
// Add nodes to dagre graph
|
||||
diagram.nodes.forEach(node => {
|
||||
workingNodes.forEach(node => {
|
||||
g.setNode(node.id, {
|
||||
width: node.width || defaultNodeWidth,
|
||||
height: node.height || nodeHeaderHeight
|
||||
});
|
||||
});
|
||||
|
||||
// Add edges to dagre graph
|
||||
diagram.links.forEach(link => {
|
||||
g.setEdge(
|
||||
typeof link.source === 'string' ? link.source : (link.source as any).id,
|
||||
typeof link.target === 'string' ? link.target : (link.target as any).id
|
||||
);
|
||||
// Add edges to dagre graph (source/target are always strings now)
|
||||
workingLinks.forEach(link => {
|
||||
g.setEdge(link.source as string, link.target as string);
|
||||
});
|
||||
|
||||
// Run dagre layout
|
||||
dagre.layout(g);
|
||||
|
||||
// Apply computed positions to nodes
|
||||
diagram.nodes.forEach(node => {
|
||||
workingNodes.forEach(node => {
|
||||
const dagreNode = g.node(node.id);
|
||||
if (dagreNode) {
|
||||
node.x = dagreNode.x;
|
||||
|
|
@ -470,8 +486,8 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
|
||||
} else {
|
||||
// Force simulation for layout (original scattered physics-based layout)
|
||||
simulation = d3.forceSimulation(diagram.nodes as any)
|
||||
.force('link', d3.forceLink(diagram.links)
|
||||
simulation = d3.forceSimulation(workingNodes as any)
|
||||
.force('link', d3.forceLink(workingLinks)
|
||||
.id((d: any) => d.id)
|
||||
.distance(250)
|
||||
.strength(0.5))
|
||||
|
|
@ -529,22 +545,26 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
}
|
||||
});
|
||||
|
||||
// Initialize link state (parser now handles bidirectional detection)
|
||||
diagram.links.forEach(link => {
|
||||
// Initialize link state on working copies (parser now handles bidirectional detection)
|
||||
workingLinks.forEach(link => {
|
||||
// Keep bidirectional and isReversed from parser, with defaults
|
||||
link.bidirectional = link.bidirectional || false;
|
||||
link.isReversed = link.isReversed || false;
|
||||
});
|
||||
|
||||
// Debug: Log link counts
|
||||
const bidirectionalCount = diagram.links.filter(l => l.bidirectional).length;
|
||||
console.log(`[UMLVisualization] Total links: ${diagram.links.length}, Bidirectional: ${bidirectionalCount}`);
|
||||
// Debug: Log node and link counts
|
||||
const bidirectionalCount = workingLinks.filter(l => l.bidirectional).length;
|
||||
console.log(`[UMLVisualization] Nodes: ${workingNodes.length}, Total links: ${workingLinks.length}, Bidirectional: ${bidirectionalCount}`);
|
||||
|
||||
// Debug: Check if nodes have positions
|
||||
const nodesWithPositions = workingNodes.filter(n => n.x !== undefined && n.y !== undefined).length;
|
||||
console.log(`[UMLVisualization] Nodes with positions: ${nodesWithPositions}/${workingNodes.length}`);
|
||||
|
||||
// Draw links first (edges between nodes)
|
||||
const links = g.append('g')
|
||||
.attr('class', 'links')
|
||||
.selectAll('g')
|
||||
.data(diagram.links)
|
||||
.data(workingLinks)
|
||||
.join('g')
|
||||
.attr('class', 'link-group');
|
||||
|
||||
|
|
@ -560,13 +580,12 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
? 'association' : d.type;
|
||||
return `url(#arrow-${arrowType})`;
|
||||
})
|
||||
.style('cursor', (d) => d.bidirectional ? 'context-menu' : 'default')
|
||||
.on('contextmenu', function(event, d: any) {
|
||||
// Toggle direction for bidirectional edges on right-click
|
||||
.style('cursor', (d) => d.bidirectional ? 'pointer' : 'default')
|
||||
.on('click', function(event, d: any) {
|
||||
// Toggle direction for bidirectional edges on left-click
|
||||
if (!d.bidirectional) return;
|
||||
|
||||
event.preventDefault(); // Prevent browser context menu
|
||||
event.stopPropagation();
|
||||
event.stopPropagation(); // Prevent triggering svg click (deselect)
|
||||
|
||||
// Toggle reversed state
|
||||
d.isReversed = !d.isReversed;
|
||||
|
|
@ -577,7 +596,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
d.target = temp;
|
||||
|
||||
// Update the label text to show the correct direction
|
||||
const linkIndex = diagram.links.indexOf(d);
|
||||
const linkIndex = workingLinks.indexOf(d);
|
||||
const labelSelection = linkLabels.filter((_: any, i: number) => i === linkIndex);
|
||||
|
||||
// Get the appropriate label based on current direction
|
||||
|
|
@ -606,7 +625,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
.attr('stroke', '#6366f1') // Back to indigo
|
||||
.attr('marker-end', `url(#arrow-${arrowType})`);
|
||||
|
||||
// Flash the label
|
||||
// Flash the label then hide it again
|
||||
labelSelection
|
||||
.transition()
|
||||
.duration(200)
|
||||
|
|
@ -616,7 +635,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
.transition()
|
||||
.delay(500)
|
||||
.duration(300)
|
||||
.style('opacity', 0.8)
|
||||
.style('opacity', 0) // Hide label again after flash
|
||||
.attr('font-weight', 'normal')
|
||||
.attr('fill', '#172a59');
|
||||
|
||||
|
|
@ -642,7 +661,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
.attr('marker-end', `url(#arrow-${arrowType}-highlight)`);
|
||||
|
||||
// Show label more prominently
|
||||
const linkIndex = diagram.links.indexOf(d);
|
||||
const linkIndex = workingLinks.indexOf(d);
|
||||
linkLabels.filter((_: any, i: number) => i === linkIndex)
|
||||
.transition()
|
||||
.duration(200)
|
||||
|
|
@ -653,7 +672,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
if (d.bidirectional && this.parentNode) {
|
||||
d3.select(this.parentNode as Element)
|
||||
.append('title')
|
||||
.text('Right-click to reverse direction');
|
||||
.text('Click to reverse direction');
|
||||
}
|
||||
})
|
||||
.on('mouseleave', function(_event, d: any) {
|
||||
|
|
@ -669,12 +688,12 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
.attr('stroke-width', 2)
|
||||
.attr('marker-end', `url(#arrow-${arrowType})`);
|
||||
|
||||
// Reset label
|
||||
const linkIndex = diagram.links.indexOf(d);
|
||||
// Reset label - hide it again
|
||||
const linkIndex = workingLinks.indexOf(d);
|
||||
linkLabels.filter((_: any, i: number) => i === linkIndex)
|
||||
.transition()
|
||||
.duration(200)
|
||||
.style('opacity', 0.8)
|
||||
.style('opacity', 0) // Hide label again
|
||||
.attr('font-weight', 'normal');
|
||||
|
||||
// Remove tooltip
|
||||
|
|
@ -683,14 +702,27 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
}
|
||||
});
|
||||
|
||||
// Add link labels with enhanced visibility
|
||||
// Add link labels with background for readability
|
||||
// First add background rects (will be positioned in updateLinkPositions)
|
||||
const linkLabelBackgrounds = links.append('rect')
|
||||
.attr('class', 'link-label-bg')
|
||||
.attr('rx', 3)
|
||||
.attr('ry', 3)
|
||||
.attr('fill', 'white')
|
||||
.attr('stroke', '#e5e7eb')
|
||||
.attr('stroke-width', 1)
|
||||
.style('opacity', 0) // HIDDEN by default
|
||||
.style('pointer-events', 'none');
|
||||
|
||||
// Add link labels - HIDDEN by default (opacity: 0), shown on hover to prevent overlap
|
||||
const linkLabels = links.append('text')
|
||||
.attr('class', (d) => `link-label${d.bidirectional ? ' link-label-bidirectional' : ''}`)
|
||||
.attr('text-anchor', 'middle')
|
||||
.attr('dy', -5)
|
||||
.attr('fill', '#172a59')
|
||||
.attr('font-size', '11px')
|
||||
.style('opacity', 0.8)
|
||||
.attr('font-weight', '500')
|
||||
.style('opacity', 0) // HIDDEN by default - prevents label overlap
|
||||
.style('pointer-events', 'none') // Don't interfere with click events
|
||||
.text((d) => {
|
||||
// Show label based on current direction (isReversed state)
|
||||
|
|
@ -707,7 +739,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
const nodes = g.append('g')
|
||||
.attr('class', 'nodes')
|
||||
.selectAll('g')
|
||||
.data(diagram.nodes)
|
||||
.data(workingNodes)
|
||||
.join('g')
|
||||
.attr('class', (d) => `node node-${d.type}`)
|
||||
.call(d3.drag<any, any>()
|
||||
|
|
@ -830,7 +862,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
const updateLinkPositions = () => {
|
||||
// Build node lookup map for O(1) access
|
||||
const nodeMap = new Map<string, UMLNode>();
|
||||
diagram.nodes.forEach(n => nodeMap.set(n.id, n));
|
||||
workingNodes.forEach(n => nodeMap.set(n.id, n));
|
||||
|
||||
links.each(function(linkData: any) {
|
||||
// Get source and target nodes - handle both string IDs and object references
|
||||
|
|
@ -870,9 +902,25 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
.attr('y2', targetIntersection.y);
|
||||
|
||||
// Update label position at midpoint
|
||||
linkGroup.select('text')
|
||||
.attr('x', (source.x + target.x) / 2)
|
||||
.attr('y', (source.y! + target.y!) / 2);
|
||||
const midX = (source.x + target.x) / 2;
|
||||
const midY = (source.y! + target.y!) / 2;
|
||||
|
||||
const textElement = linkGroup.select('text');
|
||||
textElement
|
||||
.attr('x', midX)
|
||||
.attr('y', midY);
|
||||
|
||||
// Update background rect position and size based on text bounds
|
||||
const textNode = textElement.node() as SVGTextElement | null;
|
||||
if (textNode) {
|
||||
const bbox = textNode.getBBox();
|
||||
const padding = 4;
|
||||
linkGroup.select('.link-label-bg')
|
||||
.attr('x', bbox.x - padding)
|
||||
.attr('y', bbox.y - padding)
|
||||
.attr('width', bbox.width + padding * 2)
|
||||
.attr('height', bbox.height + padding * 2);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
|
|
@ -889,10 +937,23 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
|
|||
updateNodePositions();
|
||||
updateLinkPositions();
|
||||
});
|
||||
|
||||
// Auto-fit after simulation settles (for new diagrams only)
|
||||
if (isNewDiagram) {
|
||||
simulation.on('end', () => {
|
||||
// Delay slightly to ensure DOM is fully updated
|
||||
setTimeout(() => handleFitToScreen(), 100);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Dagre layout - positions are already computed, update immediately
|
||||
updateNodePositions();
|
||||
updateLinkPositions();
|
||||
|
||||
// Auto-fit to screen for new diagrams (delay to ensure DOM is ready)
|
||||
if (isNewDiagram) {
|
||||
setTimeout(() => handleFitToScreen(), 100);
|
||||
}
|
||||
}
|
||||
|
||||
// Drag functions
|
||||
|
|
|
|||
338
schemas/20251121/examples/registration_examples.yaml
Normal file
338
schemas/20251121/examples/registration_examples.yaml
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
# Registration and Trade Register Examples
|
||||
# Examples of registration authorities, trade registers, and jurisdictions
|
||||
# from multiple countries aligned with GLEIF Registration Authorities List (RAL)
|
||||
|
||||
# ============================================================================
|
||||
# JURISDICTIONS
|
||||
# ============================================================================
|
||||
|
||||
jurisdictions:
|
||||
# National Jurisdictions
|
||||
- jurisdiction_id: "NL"
|
||||
jurisdiction_type: NATIONAL
|
||||
country:
|
||||
alpha_2: "NL"
|
||||
alpha_3: "NLD"
|
||||
legal_system_type: CIVIL_LAW
|
||||
gleif_jurisdiction_code: "NL"
|
||||
description: "Netherlands national jurisdiction - civil law system based on Napoleonic code"
|
||||
|
||||
- jurisdiction_id: "GB"
|
||||
jurisdiction_type: NATIONAL
|
||||
country:
|
||||
alpha_2: "GB"
|
||||
alpha_3: "GBR"
|
||||
legal_system_type: COMMON_LAW
|
||||
gleif_jurisdiction_code: "GB"
|
||||
description: "United Kingdom national jurisdiction - common law system"
|
||||
|
||||
- jurisdiction_id: "JP"
|
||||
jurisdiction_type: NATIONAL
|
||||
country:
|
||||
alpha_2: "JP"
|
||||
alpha_3: "JPN"
|
||||
legal_system_type: CIVIL_LAW
|
||||
gleif_jurisdiction_code: "JP"
|
||||
description: "Japan national jurisdiction - civil law system with German influence"
|
||||
|
||||
# Subnational Jurisdictions (Germany has regional courts)
|
||||
- jurisdiction_id: "DE-BY"
|
||||
jurisdiction_type: SUBNATIONAL
|
||||
country:
|
||||
alpha_2: "DE"
|
||||
alpha_3: "DEU"
|
||||
subregion:
|
||||
iso_3166_2_code: "DE-BY"
|
||||
subdivision_name: "Bavaria"
|
||||
legal_system_type: CIVIL_LAW
|
||||
gleif_jurisdiction_code: "DE"
|
||||
description: "Bavaria (Bayern) subnational jurisdiction - German federal state"
|
||||
|
||||
- jurisdiction_id: "US-DE"
|
||||
jurisdiction_type: SUBNATIONAL
|
||||
country:
|
||||
alpha_2: "US"
|
||||
alpha_3: "USA"
|
||||
subregion:
|
||||
iso_3166_2_code: "US-DE"
|
||||
subdivision_name: "Delaware"
|
||||
legal_system_type: COMMON_LAW
|
||||
gleif_jurisdiction_code: "US-DE"
|
||||
description: "Delaware subnational jurisdiction - popular for US corporate registrations"
|
||||
|
||||
# Supranational Jurisdiction
|
||||
- jurisdiction_id: "EU"
|
||||
jurisdiction_type: SUPRANATIONAL
|
||||
supranational_code: "EU"
|
||||
legal_system_type: CIVIL_LAW
|
||||
gleif_jurisdiction_code: "EU"
|
||||
description: "European Union supranational jurisdiction - for SE (Societas Europaea) and EEIG"
|
||||
|
||||
# ============================================================================
|
||||
# REGISTRATION AUTHORITIES
|
||||
# ============================================================================
|
||||
|
||||
registration_authorities:
|
||||
# Netherlands
|
||||
- id: "https://w3id.org/heritage/ra/nl-kvk"
|
||||
name: "Chamber of Commerce"
|
||||
name_local: "Kamer van Koophandel"
|
||||
abbreviation: "KvK"
|
||||
gleif_ra_code: "RA000439"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "NL"
|
||||
jurisdiction_type: NATIONAL
|
||||
website: "https://www.kvk.nl/"
|
||||
registration_types:
|
||||
- "companies"
|
||||
- "foundations"
|
||||
- "associations"
|
||||
- "sole proprietors"
|
||||
|
||||
# United Kingdom
|
||||
- id: "https://w3id.org/heritage/ra/gb-ch"
|
||||
name: "Companies House"
|
||||
name_local: "Companies House"
|
||||
abbreviation: "CH"
|
||||
gleif_ra_code: "RA000585"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "GB"
|
||||
jurisdiction_type: NATIONAL
|
||||
website: "https://www.gov.uk/government/organisations/companies-house"
|
||||
registration_types:
|
||||
- "companies"
|
||||
- "limited liability partnerships"
|
||||
|
||||
# UK Charity Commission (separate from Companies House)
|
||||
- id: "https://w3id.org/heritage/ra/gb-cc"
|
||||
name: "Charity Commission for England and Wales"
|
||||
name_local: "Charity Commission"
|
||||
abbreviation: "CC"
|
||||
gleif_ra_code: "RA000586"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "GB"
|
||||
jurisdiction_type: NATIONAL
|
||||
website: "https://www.gov.uk/government/organisations/charity-commission"
|
||||
registration_types:
|
||||
- "charities"
|
||||
- "charitable incorporated organisations"
|
||||
|
||||
# Germany (Bavaria example - local court)
|
||||
- id: "https://w3id.org/heritage/ra/de-by-muc"
|
||||
name: "Local Court Munich"
|
||||
name_local: "Amtsgericht München"
|
||||
abbreviation: "AG München"
|
||||
gleif_ra_code: "RA000385"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "DE-BY"
|
||||
jurisdiction_type: SUBNATIONAL
|
||||
website: "https://www.justiz.bayern.de/gerichte-und-behoerden/amtsgerichte/muenchen/"
|
||||
registration_types:
|
||||
- "commercial companies"
|
||||
- "partnerships"
|
||||
|
||||
# Japan
|
||||
- id: "https://w3id.org/heritage/ra/jp-lab"
|
||||
name: "Legal Affairs Bureau"
|
||||
name_local: "法務局"
|
||||
abbreviation: "法務局"
|
||||
gleif_ra_code: "RA000429"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "JP"
|
||||
jurisdiction_type: NATIONAL
|
||||
website: "https://www.moj.go.jp/MINJI/minji06_00076.html"
|
||||
registration_types:
|
||||
- "corporations"
|
||||
- "foundations"
|
||||
|
||||
# USA Delaware
|
||||
- id: "https://w3id.org/heritage/ra/us-de-doc"
|
||||
name: "Delaware Division of Corporations"
|
||||
name_local: "Division of Corporations"
|
||||
abbreviation: "DE DOC"
|
||||
gleif_ra_code: "RA000598"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "US-DE"
|
||||
jurisdiction_type: SUBNATIONAL
|
||||
website: "https://corp.delaware.gov/"
|
||||
registration_types:
|
||||
- "corporations"
|
||||
- "limited liability companies"
|
||||
|
||||
# ============================================================================
|
||||
# TRADE REGISTERS
|
||||
# ============================================================================
|
||||
|
||||
trade_registers:
|
||||
# Netherlands Commercial Register (Handelsregister)
|
||||
- register_id: "NL-HR"
|
||||
register_name: "Commercial Register"
|
||||
register_name_local: "Handelsregister"
|
||||
register_abbreviation: "HR"
|
||||
register_type: MIXED
|
||||
gleif_ra_code: "RA000439"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "NL"
|
||||
jurisdiction_type: NATIONAL
|
||||
maintained_by:
|
||||
id: "https://w3id.org/heritage/ra/nl-kvk"
|
||||
name: "Chamber of Commerce"
|
||||
abbreviation: "KvK"
|
||||
website: "https://www.kvk.nl/zoeken/"
|
||||
api_endpoint: "https://api.kvk.nl/"
|
||||
identifier_format: "[0-9]{8}"
|
||||
description: "Netherlands commercial register maintained by KvK, covers all legal entities"
|
||||
|
||||
# UK Companies Register
|
||||
- register_id: "GB-CH"
|
||||
register_name: "Companies Register"
|
||||
register_name_local: "Companies Register"
|
||||
register_abbreviation: "CH"
|
||||
register_type: COMMERCIAL
|
||||
gleif_ra_code: "RA000585"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "GB"
|
||||
jurisdiction_type: NATIONAL
|
||||
maintained_by:
|
||||
id: "https://w3id.org/heritage/ra/gb-ch"
|
||||
name: "Companies House"
|
||||
website: "https://find-and-update.company-information.service.gov.uk/"
|
||||
api_endpoint: "https://api.company-information.service.gov.uk/"
|
||||
identifier_format: "[A-Z]{2}[0-9]{6}|[0-9]{8}"
|
||||
description: "UK companies register - Scotland prefix SC, Northern Ireland prefix NI"
|
||||
|
||||
# UK Charity Register (separate from Companies House)
|
||||
- register_id: "GB-CC"
|
||||
register_name: "Charity Register"
|
||||
register_name_local: "Register of Charities"
|
||||
register_abbreviation: "CC"
|
||||
register_type: CHARITY
|
||||
gleif_ra_code: "RA000586"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "GB"
|
||||
jurisdiction_type: NATIONAL
|
||||
maintained_by:
|
||||
id: "https://w3id.org/heritage/ra/gb-cc"
|
||||
name: "Charity Commission"
|
||||
website: "https://register-of-charities.charitycommission.gov.uk/"
|
||||
identifier_format: "[0-9]{6,7}"
|
||||
description: "England and Wales charity register"
|
||||
|
||||
# German Commercial Register (Bavaria - Munich court)
|
||||
- register_id: "DE-HRB-MUC"
|
||||
register_name: "Commercial Register"
|
||||
register_name_local: "Handelsregister"
|
||||
register_abbreviation: "HRB"
|
||||
register_type: COMMERCIAL
|
||||
gleif_ra_code: "RA000385"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "DE-BY"
|
||||
jurisdiction_type: SUBNATIONAL
|
||||
maintained_by:
|
||||
id: "https://w3id.org/heritage/ra/de-by-muc"
|
||||
name: "Local Court Munich"
|
||||
name_local: "Amtsgericht München"
|
||||
website: "https://www.handelsregister.de/"
|
||||
identifier_format: "HRB [0-9]+ B"
|
||||
description: "Munich commercial register (HRB for GmbH/AG, HRA for partnerships)"
|
||||
|
||||
# Japan Commercial Registration
|
||||
- register_id: "JP-CR"
|
||||
register_name: "Commercial Registration"
|
||||
register_name_local: "商業登記簿"
|
||||
register_abbreviation: "CR"
|
||||
register_type: COMMERCIAL
|
||||
gleif_ra_code: "RA000429"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "JP"
|
||||
jurisdiction_type: NATIONAL
|
||||
maintained_by:
|
||||
id: "https://w3id.org/heritage/ra/jp-lab"
|
||||
name: "Legal Affairs Bureau"
|
||||
name_local: "法務局"
|
||||
website: "https://www.touki-kyoutaku-online.moj.go.jp/"
|
||||
identifier_format: "[0-9]{4}-[0-9]{2}-[0-9]{6}"
|
||||
description: "Japan commercial registration system"
|
||||
|
||||
# ============================================================================
|
||||
# EXAMPLE REGISTRATION NUMBERS
|
||||
# ============================================================================
|
||||
|
||||
registration_numbers:
|
||||
# Rijksmuseum (Netherlands)
|
||||
- id: "https://w3id.org/heritage/reg/nl/rijksmuseum-kvk"
|
||||
number: "41215422"
|
||||
type: "KvK"
|
||||
trade_register:
|
||||
register_id: "NL-HR"
|
||||
register_name: "Commercial Register"
|
||||
temporal_validity:
|
||||
begin_of_the_begin: "1885-07-01"
|
||||
# Still active - no end date
|
||||
|
||||
# British Museum (UK) - Companies House
|
||||
- id: "https://w3id.org/heritage/reg/gb/british-museum-ch"
|
||||
number: "RC000024"
|
||||
type: "CRN"
|
||||
trade_register:
|
||||
register_id: "GB-CH"
|
||||
register_name: "Companies Register"
|
||||
temporal_validity:
|
||||
begin_of_the_begin: "1963-01-01"
|
||||
|
||||
# British Museum (UK) - Charity Commission
|
||||
- id: "https://w3id.org/heritage/reg/gb/british-museum-cc"
|
||||
number: "1126962"
|
||||
type: "CC"
|
||||
trade_register:
|
||||
register_id: "GB-CC"
|
||||
register_name: "Charity Register"
|
||||
temporal_validity:
|
||||
begin_of_the_begin: "2008-12-15"
|
||||
|
||||
# Deutsches Museum (Germany - Munich)
|
||||
- id: "https://w3id.org/heritage/reg/de/deutsches-museum-hrb"
|
||||
number: "HRB 6532 B"
|
||||
type: "HRB"
|
||||
trade_register:
|
||||
register_id: "DE-HRB-MUC"
|
||||
register_name: "Commercial Register"
|
||||
temporal_validity:
|
||||
begin_of_the_begin: "1903-06-28"
|
||||
|
||||
# Tokyo National Museum (Japan)
|
||||
- id: "https://w3id.org/heritage/reg/jp/tokyo-national-museum"
|
||||
number: "0100-05-123456"
|
||||
type: "CR"
|
||||
trade_register:
|
||||
register_id: "JP-CR"
|
||||
register_name: "Commercial Registration"
|
||||
temporal_validity:
|
||||
begin_of_the_begin: "1872-03-10"
|
||||
|
||||
# ============================================================================
|
||||
# NOTES
|
||||
# ============================================================================
|
||||
#
|
||||
# GLEIF Registration Authorities List (RAL):
|
||||
# - Contains 1,050+ registration authorities across 232 jurisdictions
|
||||
# - Each authority has a unique RA code (format: RA followed by 6 digits)
|
||||
# - Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
|
||||
#
|
||||
# Common GLEIF RA Codes for Heritage Institutions:
|
||||
# - RA000439: Netherlands KvK (Kamer van Koophandel)
|
||||
# - RA000585: UK Companies House
|
||||
# - RA000586: UK Charity Commission
|
||||
# - RA000385: Germany Amtsgericht München
|
||||
# - RA000429: Japan Legal Affairs Bureau
|
||||
# - RA000598: USA Delaware Division of Corporations
|
||||
# - RA000421: Ireland Companies Registration Office
|
||||
# - RA000287: France Registre du commerce et des sociétés
|
||||
#
|
||||
# Ontology Alignment:
|
||||
# - gleif-ra:BusinessRegistry → TradeRegister
|
||||
# - gleif-base:RegistrationAuthority → RegistrationAuthority
|
||||
# - gleif-base:RegistryIdentifier → RegistrationNumber
|
||||
# - gleif-base:hasLegalJurisdiction → legal_jurisdiction
|
||||
# - gleif-base:isRegisteredIn → primary_register
|
||||
# - gleif-base:isManagedBy → maintained_by
|
||||
|
|
@ -22,7 +22,7 @@ description: >-
|
|||
|
||||
Inspired by PiCo (Persons in Context) ontology pattern for distinguishing observations from entities.
|
||||
|
||||
version: 0.8.9
|
||||
version: 0.9.0
|
||||
license: https://creativecommons.org/licenses/by-sa/4.0/
|
||||
|
||||
prefixes:
|
||||
|
|
@ -238,6 +238,18 @@ imports:
|
|||
- modules/classes/FeaturePlace
|
||||
- modules/classes/DigitalPlatform
|
||||
- modules/classes/CollectionManagementSystem
|
||||
|
||||
# Registration and Jurisdiction classes (NEW - GLEIF alignment)
|
||||
- modules/classes/Jurisdiction
|
||||
- modules/classes/TradeRegister
|
||||
|
||||
# New slots for registration info
|
||||
- modules/slots/country
|
||||
- modules/slots/description
|
||||
- modules/slots/website
|
||||
- modules/slots/jurisdiction
|
||||
- modules/slots/primary_register
|
||||
- modules/slots/legal_jurisdiction
|
||||
|
||||
comments:
|
||||
- "HYPER-MODULAR STRUCTURE: Direct imports of all component files"
|
||||
|
|
@ -264,6 +276,9 @@ comments:
|
|||
- "Geographic classes (3): Country (ISO 3166-1), Subregion (ISO 3166-2), Settlement (GeoNames)"
|
||||
- "Geographic slots (2): subregion, settlement (added to CustodianPlace alongside existing country slot)"
|
||||
- "Geographic validation: FeatureTypeEnum has dcterms:spatial annotations for 72 country-restricted feature types"
|
||||
- "Registration modeling (v0.9.0): Jurisdiction (lcc-cr:GeographicRegion), TradeRegister (gleif-ra:BusinessRegistry), enhanced RegistrationInfo with GLEIF alignment"
|
||||
- "GLEIF integration: Registration Authority List (RAL) codes (RA000439 for KvK, etc.), gleif-base:RegistryIdentifier, gleif-base:RegistrationAuthority"
|
||||
- "New slots (v0.9.0): country, description, website, jurisdiction for registration and jurisdiction support"
|
||||
|
||||
see_also:
|
||||
- "https://github.com/FICLIT/PiCo"
|
||||
|
|
|
|||
|
|
@ -18,6 +18,10 @@ imports:
|
|||
- ./LegalForm
|
||||
- ./LegalName
|
||||
- ./RegistrationInfo
|
||||
- ./TradeRegister
|
||||
- ./Jurisdiction
|
||||
- ../slots/primary_register
|
||||
- ../slots/legal_jurisdiction
|
||||
|
||||
classes:
|
||||
|
||||
|
|
@ -88,6 +92,8 @@ classes:
|
|||
- legal_form
|
||||
- registration_numbers
|
||||
- registration_authority
|
||||
- primary_register
|
||||
- legal_jurisdiction
|
||||
- dissolution_date
|
||||
- temporal_extent
|
||||
- parent_custodian
|
||||
|
|
@ -199,20 +205,62 @@ classes:
|
|||
Temporal validity is now captured in RegistrationNumber class.
|
||||
range: date
|
||||
registration_authority:
|
||||
slot_uri: rov:hasRegisteredOrganization
|
||||
slot_uri: gleif-base:isManagedBy
|
||||
description: >-
|
||||
Primary registration authority for this entity.
|
||||
Links to RegistrationAuthority class.
|
||||
|
||||
gleif-base:RegistrationAuthority - "An organization that is responsible for
|
||||
maintaining a registry and provides registration services."
|
||||
|
||||
Examples: Chamber of Commerce, Companies House, Charity Commission.
|
||||
ROV: hasRegisteredOrganization for registering authority.
|
||||
range: RegistrationAuthority
|
||||
examples:
|
||||
- value:
|
||||
name: "Kamer van Koophandel"
|
||||
abbreviation: "KvK"
|
||||
jurisdiction: "NL"
|
||||
gleif_ra_code: "RA000439"
|
||||
description: "Dutch Chamber of Commerce"
|
||||
primary_register:
|
||||
slot_uri: gleif-base:isRegisteredIn
|
||||
description: >-
|
||||
Primary trade register where this entity is registered.
|
||||
Links to TradeRegister class.
|
||||
|
||||
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
|
||||
gleif-ra:BusinessRegistry - "a registry for registering and maintaining
|
||||
information about business entities"
|
||||
|
||||
Examples: Netherlands Handelsregister, UK Companies Register, German HRB.
|
||||
range: TradeRegister
|
||||
examples:
|
||||
- value:
|
||||
register_name: "Commercial Register"
|
||||
register_name_local: "Handelsregister"
|
||||
register_abbreviation: "HR"
|
||||
gleif_ra_code: "RA000439"
|
||||
description: "Netherlands commercial register"
|
||||
legal_jurisdiction:
|
||||
slot_uri: gleif-base:hasLegalJurisdiction
|
||||
description: >-
|
||||
Jurisdiction of legal formation and registration.
|
||||
Links to Jurisdiction class.
|
||||
|
||||
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
|
||||
and registration of the entity"
|
||||
|
||||
For most entities, this is the country. For federal systems (USA, Germany),
|
||||
this may be a state/region.
|
||||
range: Jurisdiction
|
||||
examples:
|
||||
- value:
|
||||
jurisdiction_id: "NL"
|
||||
jurisdiction_type: "NATIONAL"
|
||||
country:
|
||||
alpha_2: "NL"
|
||||
alpha_3: "NLD"
|
||||
description: "Netherlands national jurisdiction"
|
||||
dissolution_date:
|
||||
slot_uri: schema:dissolutionDate
|
||||
description: >-
|
||||
|
|
@ -337,12 +385,33 @@ classes:
|
|||
registration_numbers:
|
||||
- number: "41215422"
|
||||
type: "KvK"
|
||||
trade_register:
|
||||
register_id: "NL-HR"
|
||||
register_name: "Commercial Register"
|
||||
register_name_local: "Handelsregister"
|
||||
temporal_validity:
|
||||
begin_of_the_begin: "1885-07-01"
|
||||
registration_authority:
|
||||
name: "Kamer van Koophandel"
|
||||
name: "Chamber of Commerce"
|
||||
name_local: "Kamer van Koophandel"
|
||||
abbreviation: "KvK"
|
||||
jurisdiction: "NL"
|
||||
gleif_ra_code: "RA000439"
|
||||
jurisdiction:
|
||||
jurisdiction_id: "NL"
|
||||
jurisdiction_type: "NATIONAL"
|
||||
primary_register:
|
||||
register_id: "NL-HR"
|
||||
register_name: "Commercial Register"
|
||||
register_name_local: "Handelsregister"
|
||||
gleif_ra_code: "RA000439"
|
||||
register_type: "COMMERCIAL"
|
||||
legal_jurisdiction:
|
||||
jurisdiction_id: "NL"
|
||||
jurisdiction_type: "NATIONAL"
|
||||
country:
|
||||
alpha_2: "NL"
|
||||
alpha_3: "NLD"
|
||||
legal_system_type: "CIVIL_LAW"
|
||||
legal_status:
|
||||
status_code: "ACTIVE"
|
||||
status_name: "Active"
|
||||
|
|
|
|||
248
schemas/20251121/linkml/modules/classes/Jurisdiction.yaml
Normal file
248
schemas/20251121/linkml/modules/classes/Jurisdiction.yaml
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
# Jurisdiction Class - Legal/Administrative Jurisdiction
|
||||
# Represents the geographic area subject to a specific set of laws and governance
|
||||
#
|
||||
# GLEIF alignment: gleif-base:hasLegalJurisdiction, gleif-base:hasCoverageArea
|
||||
# Uses lcc-cr:GeographicRegion (OMG Languages Countries and Codes) from GLEIF
|
||||
#
|
||||
# Used for:
|
||||
# - TradeRegister.jurisdiction: Geographic scope of business register
|
||||
# - RegistrationAuthority.jurisdiction: Territory where authority operates
|
||||
# - LegalForm.jurisdiction: Where specific legal forms are valid
|
||||
#
|
||||
# Design principle: Jurisdictions are LEGAL boundaries, not just geographic
|
||||
# A jurisdiction can be a country, subdivision, or supranational region (e.g., EU)
|
||||
|
||||
id: https://nde.nl/ontology/hc/class/jurisdiction
|
||||
name: jurisdiction
|
||||
title: Jurisdiction Class
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
gleif-base: https://www.gleif.org/ontology/Base/
|
||||
lcc-cr: https://www.omg.org/spec/LCC/Countries/CountryRepresentation/
|
||||
schema: http://schema.org/
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ../metadata
|
||||
- ./Country
|
||||
- ./Subregion
|
||||
- ./Settlement
|
||||
- ../slots/country
|
||||
- ../slots/subregion
|
||||
- ../slots/settlement
|
||||
- ../slots/description
|
||||
|
||||
classes:
|
||||
Jurisdiction:
|
||||
class_uri: lcc-cr:GeographicRegion
|
||||
description: >-
|
||||
A distinct area subject to a government and set of laws.
|
||||
|
||||
Jurisdictions define the LEGAL scope of registration authorities and trade registers.
|
||||
Unlike pure geographic classes (Country, Subregion, Settlement), Jurisdiction captures
|
||||
the legal framework applicable to organizations.
|
||||
|
||||
**Ontology Alignment:**
|
||||
|
||||
- lcc-cr:GeographicRegion - OMG Languages Countries and Codes (used by GLEIF)
|
||||
- gleif-base:hasLegalJurisdiction - links entities to their jurisdiction of registration
|
||||
- gleif-base:hasCoverageArea - geographic scope of registration authorities
|
||||
|
||||
**Types of Jurisdiction:**
|
||||
|
||||
1. **National Jurisdiction**: Entire country (e.g., Netherlands, Japan)
|
||||
- Maps to Country class
|
||||
- Most common for trade registers
|
||||
|
||||
2. **Subnational Jurisdiction**: State, province, region (e.g., Bavaria, New York)
|
||||
- Maps to Subregion class
|
||||
- Common for federal systems (USA, Germany, Canada, Australia)
|
||||
|
||||
3. **Municipal Jurisdiction**: City-level (e.g., City of London, Hong Kong)
|
||||
- Maps to Settlement class
|
||||
- For city-states and special administrative regions
|
||||
|
||||
4. **Supranational Jurisdiction**: Multi-country (e.g., European Union, OHADA)
|
||||
- Represented via supranational identifier
|
||||
- Relevant for EU-wide legal forms (SE, SCE, EEIG)
|
||||
|
||||
**GLEIF Integration:**
|
||||
|
||||
This class aligns with GLEIF ontology concepts:
|
||||
- gleif-base:hasLegalJurisdiction - jurisdiction where entity is registered
|
||||
- gleif-base:hasCoverageArea - geographic scope of registration authority
|
||||
|
||||
**Examples:**
|
||||
|
||||
- Netherlands (national): authority=KvK, covers all NL legal entities
|
||||
- Bavaria (subnational): Handelsregister München, covers Bavarian businesses
|
||||
- Hong Kong (special region): Companies Registry, covers HK companies
|
||||
- European Union (supranational): European Company (SE) registration
|
||||
|
||||
See also:
|
||||
- TradeRegister: Business registers operating within a jurisdiction
|
||||
- RegistrationAuthority: Organizations maintaining registers
|
||||
- LegalForm: Legal forms valid within specific jurisdictions
|
||||
|
||||
exact_mappings:
|
||||
- lcc-cr:GeographicRegion # OMG LCC geographic region
|
||||
close_mappings:
|
||||
- schema:AdministrativeArea # Schema.org administrative area
|
||||
related_mappings:
|
||||
- gleif-base:hasLegalJurisdiction # GLEIF property for jurisdiction
|
||||
- gleif-base:hasCoverageArea # GLEIF property for coverage area
|
||||
|
||||
slots:
|
||||
- jurisdiction_id
|
||||
- jurisdiction_type
|
||||
- country
|
||||
- subregion
|
||||
- settlement
|
||||
- supranational_code
|
||||
- gleif_jurisdiction_code
|
||||
- legal_system_type
|
||||
- description
|
||||
|
||||
slot_usage:
|
||||
jurisdiction_id:
|
||||
identifier: true
|
||||
required: true
|
||||
description: Unique identifier for this jurisdiction record
|
||||
jurisdiction_type:
|
||||
required: true
|
||||
description: Type of jurisdiction (national, subnational, municipal, supranational)
|
||||
country:
|
||||
required: false
|
||||
description: >-
|
||||
Country for national/subnational/municipal jurisdictions.
|
||||
Not applicable for supranational jurisdictions (e.g., EU).
|
||||
subregion:
|
||||
required: false
|
||||
description: >-
|
||||
Subdivision for subnational jurisdictions.
|
||||
Required when jurisdiction_type is SUBNATIONAL.
|
||||
settlement:
|
||||
required: false
|
||||
description: >-
|
||||
Settlement for municipal jurisdictions.
|
||||
Required when jurisdiction_type is MUNICIPAL.
|
||||
supranational_code:
|
||||
required: false
|
||||
description: >-
|
||||
Code for supranational entities (EU, OHADA, etc.).
|
||||
Required when jurisdiction_type is SUPRANATIONAL.
|
||||
|
||||
slots:
|
||||
jurisdiction_id:
|
||||
description: >-
|
||||
Unique identifier for this jurisdiction.
|
||||
|
||||
Recommended format: ISO-based hierarchical identifier
|
||||
|
||||
Examples:
|
||||
- "NL" (Netherlands national)
|
||||
- "DE-BY" (Bavaria subnational)
|
||||
- "HK" (Hong Kong municipal/special region)
|
||||
- "EU" (European Union supranational)
|
||||
range: string
|
||||
slot_uri: schema:identifier
|
||||
|
||||
jurisdiction_type:
|
||||
description: >-
|
||||
Classification of jurisdiction level.
|
||||
|
||||
Values:
|
||||
- NATIONAL: Country-level jurisdiction (most common)
|
||||
- SUBNATIONAL: State/province/region level
|
||||
- MUNICIPAL: City/municipality level
|
||||
- SUPRANATIONAL: Multi-country jurisdiction (EU, OHADA)
|
||||
|
||||
This determines which geographic slot is required:
|
||||
- NATIONAL: country required
|
||||
- SUBNATIONAL: country + subregion required
|
||||
- MUNICIPAL: country + settlement required (subregion optional)
|
||||
- SUPRANATIONAL: supranational_code required
|
||||
range: JurisdictionTypeEnum
|
||||
required: true
|
||||
slot_uri: schema:additionalType
|
||||
|
||||
supranational_code:
|
||||
description: >-
|
||||
Code for supranational jurisdiction.
|
||||
|
||||
Used for multi-country legal frameworks:
|
||||
- "EU": European Union (for SE, SCE, EEIG)
|
||||
- "OHADA": Organisation for the Harmonisation of Business Law in Africa
|
||||
- "BENELUX": Benelux (for certain cross-border structures)
|
||||
|
||||
Only applicable when jurisdiction_type is SUPRANATIONAL.
|
||||
range: string
|
||||
slot_uri: schema:identifier
|
||||
|
||||
gleif_jurisdiction_code:
|
||||
description: >-
|
||||
GLEIF Registration Authority List (RAL) jurisdiction code.
|
||||
|
||||
GLEIF maintains a list of 1,050+ registration authorities across 232 jurisdictions.
|
||||
The jurisdiction code is typically an ISO 3166-1 alpha-2 country code or a
|
||||
composite code for subnational registries.
|
||||
|
||||
Format: ISO 3166-1 alpha-2 or composite (e.g., "NL", "DE", "US-DE" for Delaware)
|
||||
|
||||
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
|
||||
range: string
|
||||
slot_uri: gleif-base:hasCoverageArea
|
||||
|
||||
legal_system_type:
|
||||
description: >-
|
||||
Type of legal system in this jurisdiction.
|
||||
|
||||
Values:
|
||||
- CIVIL_LAW: Continental European tradition (Germany, Netherlands, Japan)
|
||||
- COMMON_LAW: English tradition (UK, USA, Australia)
|
||||
- MIXED: Combination (South Africa, Louisiana, Quebec)
|
||||
- RELIGIOUS: Based on religious law (some Middle Eastern jurisdictions)
|
||||
- CUSTOMARY: Based on local custom
|
||||
|
||||
This affects which legal forms are available and how entities are registered.
|
||||
range: LegalSystemTypeEnum
|
||||
required: false
|
||||
slot_uri: schema:category
|
||||
|
||||
enums:
|
||||
JurisdictionTypeEnum:
|
||||
description: Classification of jurisdiction levels
|
||||
permissible_values:
|
||||
NATIONAL:
|
||||
description: Country-level jurisdiction
|
||||
SUBNATIONAL:
|
||||
description: State, province, or region level jurisdiction
|
||||
MUNICIPAL:
|
||||
description: City or municipality level jurisdiction
|
||||
SUPRANATIONAL:
|
||||
description: Multi-country or international jurisdiction
|
||||
|
||||
LegalSystemTypeEnum:
|
||||
description: Classification of legal systems
|
||||
permissible_values:
|
||||
CIVIL_LAW:
|
||||
description: >-
|
||||
Continental European legal tradition. Codified law.
|
||||
Countries: Germany, France, Netherlands, Japan, most of Europe and Latin America.
|
||||
COMMON_LAW:
|
||||
description: >-
|
||||
English legal tradition. Case law and precedent.
|
||||
Countries: UK, USA, Canada (except Quebec), Australia, India.
|
||||
MIXED:
|
||||
description: >-
|
||||
Combination of civil and common law traditions.
|
||||
Examples: South Africa, Louisiana, Quebec, Scotland.
|
||||
RELIGIOUS:
|
||||
description: >-
|
||||
Based on religious law (Sharia, Canon Law, etc.).
|
||||
Examples: Saudi Arabia, Iran, Vatican.
|
||||
CUSTOMARY:
|
||||
description: >-
|
||||
Based on local customs and traditions.
|
||||
Examples: Many African and Pacific Island nations.
|
||||
|
|
@ -1,5 +1,14 @@
|
|||
# Registration Information Classes
|
||||
# Classes for organizational registration details
|
||||
#
|
||||
# GLEIF alignment:
|
||||
# - gleif-base:RegistrationAuthority - Organization maintaining a registry
|
||||
# - gleif-base:RegistryIdentifier - Identifier associated with a registry entry
|
||||
# - gleif-ra:RegistrationAuthorityCode - Reference code for registration authority
|
||||
#
|
||||
# ROV alignment:
|
||||
# - rov:registration - Relationship between legal entity and authority
|
||||
# - rov:RegisteredOrganization - Organization that is legally registered
|
||||
|
||||
id: https://nde.nl/ontology/hc/class/RegistrationInfo
|
||||
name: registration-info-classes
|
||||
|
|
@ -7,28 +16,65 @@ title: Registration Information Classes
|
|||
|
||||
description: >-
|
||||
Classes for capturing organizational registration details including
|
||||
registration numbers, dates, and authorities.
|
||||
registration numbers, dates, authorities, and trade registers.
|
||||
|
||||
This module provides the foundation for tracking legal entity registrations
|
||||
across multiple jurisdictions worldwide.
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
rov: http://www.w3.org/ns/regorg#
|
||||
gleif-base: https://www.gleif.org/ontology/Base/
|
||||
gleif-ra: https://www.gleif.org/ontology/RegistrationAuthority/
|
||||
org: http://www.w3.org/ns/org#
|
||||
schema: http://schema.org/
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ../metadata
|
||||
- ./TimeSpan
|
||||
- ./Jurisdiction
|
||||
- ../slots/jurisdiction
|
||||
- ../slots/description
|
||||
- ../slots/website
|
||||
|
||||
classes:
|
||||
RegistrationNumber:
|
||||
class_uri: rov:registration
|
||||
class_uri: gleif-base:RegistryIdentifier
|
||||
description: >-
|
||||
Official registration number assigned by an authority.
|
||||
Examples: Chamber of Commerce number, charity registration number.
|
||||
Official registration number assigned by an authority and recorded in a trade register.
|
||||
|
||||
Maps to:
|
||||
- rov:registration (Registered Organizations Vocabulary)
|
||||
**Ontology Alignment:**
|
||||
|
||||
- gleif-base:RegistryIdentifier - "An identifier associated with an entry in a registry,
|
||||
i.e., one that provides an index to the registry for the recorded item."
|
||||
- rov:registration - "The registration is a fundamental relationship between a legal
|
||||
entity and the authority with which it is registered"
|
||||
|
||||
**Examples:**
|
||||
|
||||
- Dutch KvK number: "41215422" (Rijksmuseum)
|
||||
- UK Companies House: "RC000024" or "00000224"
|
||||
- German HRB: "HRB 123456 B" (Berlin)
|
||||
- Irish charity number: "CHY 4700"
|
||||
- US EIN: "12-3456789"
|
||||
|
||||
**Key Properties:**
|
||||
|
||||
- number: The actual registration string
|
||||
- type: Classification of the registration (KvK, EIN, CHY, etc.)
|
||||
- trade_register: The register where this number is recorded
|
||||
- temporal_validity: When this registration was/is valid
|
||||
|
||||
See also:
|
||||
- TradeRegister: The register that issued this number
|
||||
- RegistrationAuthority: The organization maintaining the register
|
||||
|
||||
exact_mappings:
|
||||
- gleif-base:RegistryIdentifier
|
||||
close_mappings:
|
||||
- rov:registration
|
||||
- schema:identifier
|
||||
- tooi:organisatieIdentificatie (TOOI for Dutch entities)
|
||||
|
||||
attributes:
|
||||
id:
|
||||
|
|
@ -39,38 +85,100 @@ classes:
|
|||
required: true
|
||||
|
||||
number:
|
||||
slot_uri: rov:registration
|
||||
slot_uri: gleif-base:hasTag
|
||||
description: >-
|
||||
The actual registration number/code.
|
||||
Examples: "41215422" (KvK number), "CHY 4700" (Irish charity number)
|
||||
|
||||
gleif-base:hasTag - "Has a unique combination of alphanumeric characters
|
||||
corresponding to the identifier"
|
||||
|
||||
Examples:
|
||||
- "41215422" (KvK number)
|
||||
- "CHY 4700" (Irish charity number)
|
||||
- "HRB 123456 B" (German commercial register)
|
||||
range: string
|
||||
required: true
|
||||
|
||||
type:
|
||||
slot_uri: schema:additionalType
|
||||
description: >-
|
||||
Type of registration number.
|
||||
Examples: "KvK" (Dutch Chamber of Commerce), "EIN" (US Employer ID)
|
||||
Type/scheme of registration number.
|
||||
|
||||
Examples:
|
||||
- "KvK" (Dutch Chamber of Commerce)
|
||||
- "EIN" (US Employer ID)
|
||||
- "CRN" (UK Company Registration Number)
|
||||
- "CHY" (Irish Charity Number)
|
||||
- "HRB" (German Handelsregister B - for GmbH, AG)
|
||||
- "HRA" (German Handelsregister A - for partnerships)
|
||||
range: string
|
||||
required: true
|
||||
|
||||
trade_register:
|
||||
slot_uri: gleif-base:isRegisteredIn
|
||||
description: >-
|
||||
The trade register where this number is recorded.
|
||||
|
||||
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
|
||||
|
||||
Links to TradeRegister class.
|
||||
range: TradeRegister
|
||||
required: false
|
||||
|
||||
temporal_validity:
|
||||
slot_uri: schema:temporalCoverage
|
||||
description: >-
|
||||
Time period during which this registration number is/was valid.
|
||||
Captures registration date through deregistration (if applicable).
|
||||
|
||||
Uses TimeSpan class for fuzzy temporal boundaries.
|
||||
range: TimeSpan
|
||||
required: true
|
||||
|
||||
RegistrationAuthority:
|
||||
class_uri: rov:hasRegisteredOrganization
|
||||
class_uri: gleif-base:RegistrationAuthority
|
||||
description: >-
|
||||
Authority that maintains official registrations of organizations.
|
||||
Examples: Chamber of Commerce, Charity Commission, Companies House.
|
||||
|
||||
Maps to:
|
||||
- rov:RegisteredOrganization (the registering authority)
|
||||
**Ontology Alignment:**
|
||||
|
||||
- gleif-base:RegistrationAuthority - "An organization that is responsible for
|
||||
maintaining a registry and provides registration services."
|
||||
|
||||
A RegistrationAuthority is the **organization** that maintains one or more
|
||||
trade registers, distinct from the TradeRegister itself (the database/system).
|
||||
|
||||
**Key Distinction:**
|
||||
- RegistrationAuthority: The organization (e.g., "Kamer van Koophandel", "Companies House")
|
||||
- TradeRegister: The register/database (e.g., "Handelsregister", "Companies Register")
|
||||
|
||||
**Examples:**
|
||||
|
||||
- Netherlands: Kamer van Koophandel (KvK) - GLEIF RA000439
|
||||
- UK: Companies House - GLEIF RA000585
|
||||
- Germany: Amtsgericht München (local court) - GLEIF RA000385
|
||||
- Japan: Legal Affairs Bureau (法務局) - GLEIF RA000429
|
||||
- Ireland: Companies Registration Office (CRO) - GLEIF RA000421
|
||||
|
||||
**GLEIF Integration:**
|
||||
|
||||
GLEIF maintains the Registration Authorities List (RAL) with 1,050+ authorities.
|
||||
Each authority has a unique RA code (format: RA followed by 6 digits).
|
||||
|
||||
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
|
||||
|
||||
See also:
|
||||
- TradeRegister: Registers maintained by this authority
|
||||
- Jurisdiction: Geographic/legal scope of the authority
|
||||
- RegistrationNumber: Numbers issued through this authority's registers
|
||||
|
||||
exact_mappings:
|
||||
- gleif-base:RegistrationAuthority
|
||||
close_mappings:
|
||||
- org:Organization
|
||||
- schema:GovernmentOrganization
|
||||
related_mappings:
|
||||
- rov:hasRegisteredOrganization
|
||||
|
||||
attributes:
|
||||
id:
|
||||
|
|
@ -81,31 +189,93 @@ classes:
|
|||
required: true
|
||||
|
||||
name:
|
||||
slot_uri: schema:name
|
||||
slot_uri: gleif-base:hasNameTranslatedEnglish
|
||||
description: >-
|
||||
Official name of the registration authority.
|
||||
Examples: "Kamer van Koophandel", "Companies House", "IRS"
|
||||
Official name of the registration authority in English.
|
||||
|
||||
gleif-base:hasNameTranslatedEnglish - "The name used to refer to a person
|
||||
or organization, translated into English."
|
||||
|
||||
Examples:
|
||||
- "Chamber of Commerce" (Netherlands)
|
||||
- "Companies House" (UK)
|
||||
- "Legal Affairs Bureau" (Japan)
|
||||
range: string
|
||||
required: true
|
||||
|
||||
name_local:
|
||||
slot_uri: gleif-base:hasNameLegalLocal
|
||||
description: >-
|
||||
Official name in local language.
|
||||
|
||||
gleif-base:hasNameLegalLocal - "The name used to refer to an person or
|
||||
organization in legal communications in local alphabet"
|
||||
|
||||
Examples:
|
||||
- "Kamer van Koophandel" (Dutch)
|
||||
- "法務局" (Japanese)
|
||||
- "Amtsgericht" (German)
|
||||
range: string
|
||||
|
||||
abbreviation:
|
||||
slot_uri: schema:alternateName
|
||||
slot_uri: gleif-base:hasAbbreviationLocal
|
||||
description: >-
|
||||
Common abbreviation.
|
||||
Examples: "KvK", "CH", "IRS"
|
||||
|
||||
gleif-base:hasAbbreviationLocal - "An abbreviation using a language local
|
||||
to the entity identified"
|
||||
|
||||
Examples: "KvK", "CH", "CRO"
|
||||
range: string
|
||||
|
||||
jurisdiction:
|
||||
slot_uri: schema:areaServed
|
||||
slot_uri: gleif-base:hasCoverageArea
|
||||
description: >-
|
||||
Geographic jurisdiction of the authority.
|
||||
Usually a country code, may be regional for federal systems.
|
||||
range: string
|
||||
Geographic/legal jurisdiction of the authority.
|
||||
|
||||
gleif-base:hasCoverageArea - "Indicates a geographic region in which some
|
||||
service is provided, or to which some policy applies"
|
||||
|
||||
Links to Jurisdiction class.
|
||||
range: Jurisdiction
|
||||
required: true
|
||||
|
||||
gleif_ra_code:
|
||||
slot_uri: schema:identifier
|
||||
description: >-
|
||||
GLEIF Registration Authority code.
|
||||
|
||||
Format: "RA" followed by 6 digits
|
||||
|
||||
Examples:
|
||||
- RA000439: Netherlands KvK
|
||||
- RA000585: UK Companies House
|
||||
- RA000385: Germany Amtsgericht München
|
||||
|
||||
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
|
||||
range: string
|
||||
pattern: "^RA[0-9]{6}$"
|
||||
|
||||
registers:
|
||||
slot_uri: gleif-base:isManagedBy
|
||||
description: >-
|
||||
Trade registers maintained by this authority.
|
||||
|
||||
Inverse of TradeRegister.maintained_by.
|
||||
|
||||
Examples:
|
||||
- KvK maintains: Handelsregister
|
||||
- Companies House maintains: Companies Register, LLP Register
|
||||
range: TradeRegister
|
||||
multivalued: true
|
||||
inlined: false
|
||||
|
||||
website:
|
||||
slot_uri: schema:url
|
||||
description: Official website of the registration authority
|
||||
slot_uri: gleif-base:hasWebsite
|
||||
description: >-
|
||||
Official website of the registration authority.
|
||||
|
||||
gleif-base:hasWebsite - "A website associated with something"
|
||||
range: uri
|
||||
|
||||
registration_types:
|
||||
|
|
@ -163,11 +333,24 @@ classes:
|
|||
range: string
|
||||
|
||||
LegalStatus:
|
||||
class_uri: schema:status
|
||||
class_uri: gleif-base:RegistrationStatus
|
||||
description: >-
|
||||
Legal status of an organization (active, dissolved, suspended, etc.).
|
||||
|
||||
**Ontology Alignment:**
|
||||
|
||||
- gleif-base:RegistrationStatus - "A lifecycle stage indicating the status of a
|
||||
given registration of something, such as a business or legal entity."
|
||||
- gleif-base:EntityStatus - ACTIVE or INACTIVE (GLEIF enumeration)
|
||||
|
||||
Status definitions vary by jurisdiction and legal framework.
|
||||
|
||||
exact_mappings:
|
||||
- gleif-base:RegistrationStatus
|
||||
close_mappings:
|
||||
- gleif-base:EntityStatus
|
||||
- schema:status
|
||||
|
||||
attributes:
|
||||
id:
|
||||
identifier: true
|
||||
|
|
@ -177,10 +360,16 @@ classes:
|
|||
required: true
|
||||
|
||||
status_code:
|
||||
slot_uri: schema:codeValue
|
||||
slot_uri: gleif-base:hasTag
|
||||
description: >-
|
||||
Standardized status code.
|
||||
|
||||
gleif-base:hasTag - "Has a unique combination of alphanumeric characters
|
||||
corresponding to the identifier"
|
||||
|
||||
Examples: "ACTIVE", "DISSOLVED", "SUSPENDED", "MERGED"
|
||||
|
||||
GLEIF defines: ACTIVE, INACTIVE
|
||||
range: string
|
||||
required: true
|
||||
pattern: "^[A-Z_]+$"
|
||||
|
|
@ -203,6 +392,10 @@ classes:
|
|||
required: true
|
||||
|
||||
jurisdiction:
|
||||
slot_uri: schema:legislationJurisdiction
|
||||
description: Legal jurisdiction where this status is defined
|
||||
range: string
|
||||
slot_uri: gleif-base:hasLegalJurisdiction
|
||||
description: >-
|
||||
Legal jurisdiction where this status is defined.
|
||||
|
||||
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
|
||||
and registration of the entity"
|
||||
range: Jurisdiction
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ imports:
|
|||
- linkml:types
|
||||
- Country
|
||||
- Subregion
|
||||
- ../slots/country
|
||||
- ../slots/subregion
|
||||
|
||||
classes:
|
||||
Settlement:
|
||||
|
|
|
|||
303
schemas/20251121/linkml/modules/classes/TradeRegister.yaml
Normal file
303
schemas/20251121/linkml/modules/classes/TradeRegister.yaml
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
# TradeRegister Class - Business/Commercial Register
|
||||
# Official register where organizations are formally registered
|
||||
#
|
||||
# GLEIF alignment: gleif-ra:BusinessRegistry, gleif-base:Registry
|
||||
#
|
||||
# Used for:
|
||||
# - RegistrationNumber.trade_register: Which register issued the number
|
||||
# - CustodianLegalStatus.primary_register: Where entity is registered
|
||||
#
|
||||
# GLEIF RAL (Registration Authorities List) contains 1,050+ registers in 232 jurisdictions
|
||||
# See: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
|
||||
|
||||
id: https://nde.nl/ontology/hc/class/trade-register
|
||||
name: trade-register
|
||||
title: TradeRegister Class
|
||||
|
||||
prefixes:
|
||||
linkml: https://w3id.org/linkml/
|
||||
gleif-ra: https://www.gleif.org/ontology/RegistrationAuthority/
|
||||
gleif-base: https://www.gleif.org/ontology/Base/
|
||||
schema: http://schema.org/
|
||||
|
||||
imports:
|
||||
- linkml:types
|
||||
- ../metadata
|
||||
- ./Jurisdiction
|
||||
- ../slots/jurisdiction
|
||||
- ../slots/description
|
||||
- ../slots/website
|
||||
|
||||
classes:
|
||||
TradeRegister:
|
||||
class_uri: gleif-ra:BusinessRegistry
|
||||
description: >-
|
||||
Official business or trade register where organizations are formally registered.
|
||||
|
||||
A TradeRegister is the **register itself** (the database/system), distinct from
|
||||
the RegistrationAuthority (the organization maintaining it). In many jurisdictions
|
||||
these are tightly coupled (KvK maintains Handelsregister), but conceptually separate.
|
||||
|
||||
**Ontology Alignment:**
|
||||
|
||||
- gleif-ra:BusinessRegistry - "a registry for registering and maintaining information
|
||||
about business entities" (subclass of gleif-base:Registry)
|
||||
- gleif-base:Registry - "A system, typically an information system, that records
|
||||
the registration of items"
|
||||
|
||||
**Key Distinction:**
|
||||
- TradeRegister: The register/database (e.g., "Handelsregister", "Companies Register")
|
||||
- RegistrationAuthority: The organization (e.g., "Kamer van Koophandel", "Companies House")
|
||||
|
||||
**GLEIF Integration:**
|
||||
|
||||
GLEIF maintains the Registration Authorities List (RAL) with 1,050+ business registers
|
||||
across 232 jurisdictions worldwide. Each register has a unique RA code.
|
||||
|
||||
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
|
||||
|
||||
**Examples by Country:**
|
||||
|
||||
**Netherlands (NL)**:
|
||||
- Handelsregister (HR) - General commercial register
|
||||
- Maintained by: Kamer van Koophandel (KvK)
|
||||
- GLEIF RA Code: RA000439
|
||||
|
||||
**Germany (DE)**:
|
||||
- Handelsregister (HRB/HRA) - Commercial register
|
||||
- Maintained by: Local courts (Amtsgericht)
|
||||
- GLEIF RA Codes: RA000385 (Munich), RA000386 (Hamburg), etc.
|
||||
|
||||
**United Kingdom (GB)**:
|
||||
- Companies Register
|
||||
- Maintained by: Companies House
|
||||
- GLEIF RA Code: RA000585
|
||||
|
||||
**United States (US)**:
|
||||
- State-level registers (e.g., Delaware Division of Corporations)
|
||||
- GLEIF RA Codes: RA000598 (Delaware), RA000658 (New York), etc.
|
||||
|
||||
**Japan (JP)**:
|
||||
- 商業登記簿 (Commercial Registration Book)
|
||||
- Maintained by: Legal Affairs Bureau (法務局)
|
||||
- GLEIF RA Code: RA000429
|
||||
|
||||
**Register Types:**
|
||||
|
||||
1. **Commercial Register**: For-profit businesses (GmbH, BV, Ltd, Inc)
|
||||
2. **Foundation Register**: Non-profit foundations (Stichting, Stiftung)
|
||||
3. **Association Register**: Voluntary associations (Vereniging, Verein)
|
||||
4. **Charity Register**: Registered charities (UK Charity Commission)
|
||||
5. **Cultural Register**: Heritage institutions (some countries)
|
||||
|
||||
See also:
|
||||
- Jurisdiction: Geographic/legal scope of the register
|
||||
- RegistrationAuthority: Organization maintaining the register
|
||||
- RegistrationNumber: Individual registration issued by this register
|
||||
|
||||
exact_mappings:
|
||||
- gleif-ra:BusinessRegistry # GLEIF business registry class
|
||||
close_mappings:
|
||||
- gleif-base:Registry # GLEIF base registry class
|
||||
- schema:GovernmentService # Registers are government services
|
||||
related_mappings:
|
||||
- rov:RegisteredOrganization # Organizations registered in these registers
|
||||
|
||||
slots:
|
||||
- register_id
|
||||
- register_name
|
||||
- register_name_local
|
||||
- register_abbreviation
|
||||
- register_type
|
||||
- jurisdiction
|
||||
- maintained_by
|
||||
- gleif_ra_code
|
||||
- website
|
||||
- api_endpoint
|
||||
- identifier_format
|
||||
- description
|
||||
|
||||
slot_usage:
|
||||
register_id:
|
||||
identifier: true
|
||||
required: true
|
||||
description: Unique identifier for this register
|
||||
register_name:
|
||||
required: true
|
||||
description: Register name in English
|
||||
register_name_local:
|
||||
required: false
|
||||
description: Register name in local language (e.g., "Handelsregister")
|
||||
register_abbreviation:
|
||||
required: false
|
||||
description: Common abbreviation (e.g., "HR", "KvK", "CH")
|
||||
register_type:
|
||||
required: true
|
||||
description: Type of organizations this register handles
|
||||
jurisdiction:
|
||||
required: true
|
||||
description: Jurisdiction where this register operates
|
||||
maintained_by:
|
||||
required: true
|
||||
description: Registration authority that maintains this register
|
||||
gleif_ra_code:
|
||||
required: false
|
||||
description: >-
|
||||
GLEIF Registration Authority code (if in GLEIF RAL).
|
||||
Format: RA followed by 6 digits (e.g., RA000439)
|
||||
|
||||
slots:
|
||||
register_id:
|
||||
description: >-
|
||||
Unique identifier for this trade register.
|
||||
|
||||
Recommended format: {country_code}-{register_abbreviation}
|
||||
|
||||
Examples:
|
||||
- "NL-HR" (Netherlands Handelsregister)
|
||||
- "DE-HRB-MUC" (Germany HRB Munich)
|
||||
- "GB-CH" (UK Companies House)
|
||||
- "US-DE" (Delaware Division of Corporations)
|
||||
range: string
|
||||
slot_uri: schema:identifier
|
||||
|
||||
register_name:
|
||||
description: >-
|
||||
Official name of the trade register in English.
|
||||
|
||||
Examples:
|
||||
- "Commercial Register" (Netherlands, Germany)
|
||||
- "Companies Register" (UK)
|
||||
- "Division of Corporations" (Delaware)
|
||||
- "Legal Affairs Bureau Commercial Registration" (Japan)
|
||||
range: string
|
||||
required: true
|
||||
slot_uri: gleif-base:hasNameTranslatedEnglish
|
||||
|
||||
register_name_local:
|
||||
description: >-
|
||||
Official name in local language.
|
||||
|
||||
Examples:
|
||||
- "Handelsregister" (Netherlands, Germany)
|
||||
- "Registre du commerce et des sociétés" (France)
|
||||
- "商業登記簿" (Japan)
|
||||
- "Торговый реестр" (Russia)
|
||||
range: string
|
||||
slot_uri: gleif-base:hasNameLegalLocal
|
||||
|
||||
register_abbreviation:
|
||||
description: >-
|
||||
Common abbreviation for the register.
|
||||
|
||||
Examples:
|
||||
- "HR" (Handelsregister)
|
||||
- "KvK" (Kamer van Koophandel - technically the authority, but commonly used)
|
||||
- "CH" (Companies House)
|
||||
- "RCS" (Registre du commerce et des sociétés)
|
||||
range: string
|
||||
slot_uri: gleif-base:hasAbbreviationLocal
|
||||
|
||||
register_type:
|
||||
description: >-
|
||||
Type of organizations this register handles.
|
||||
|
||||
Many jurisdictions have separate registers for different entity types.
|
||||
|
||||
Values:
|
||||
- COMMERCIAL: For-profit businesses (default)
|
||||
- FOUNDATION: Non-profit foundations
|
||||
- ASSOCIATION: Voluntary associations
|
||||
- CHARITY: Registered charities
|
||||
- CULTURAL: Heritage/cultural institutions
|
||||
- MIXED: Multiple entity types in one register
|
||||
range: RegisterTypeEnum
|
||||
required: true
|
||||
slot_uri: schema:category
|
||||
|
||||
maintained_by:
|
||||
description: >-
|
||||
Registration authority that maintains this register.
|
||||
|
||||
Links to RegistrationAuthority class.
|
||||
|
||||
Examples:
|
||||
- KvK (Kamer van Koophandel) maintains NL Handelsregister
|
||||
- Companies House maintains UK Companies Register
|
||||
- Local courts (Amtsgericht) maintain German Handelsregister
|
||||
range: RegistrationAuthority
|
||||
required: true
|
||||
slot_uri: gleif-base:isManagedBy
|
||||
|
||||
gleif_ra_code:
|
||||
description: >-
|
||||
GLEIF Registration Authority List (RAL) code.
|
||||
|
||||
GLEIF maintains a comprehensive list of 1,050+ registration authorities
|
||||
and their registers worldwide. Each has a unique RA code.
|
||||
|
||||
Format: "RA" followed by 6 digits
|
||||
|
||||
Examples:
|
||||
- RA000439: Netherlands Kamer van Koophandel
|
||||
- RA000585: UK Companies House
|
||||
- RA000385: Germany Handelsregister München
|
||||
- RA000429: Japan Legal Affairs Bureau
|
||||
|
||||
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
|
||||
range: string
|
||||
pattern: "^RA[0-9]{6}$"
|
||||
|
||||
identifier_format:
|
||||
description: >-
|
||||
Format specification for registration numbers issued by this register.
|
||||
|
||||
Use regex pattern or description.
|
||||
|
||||
Examples:
|
||||
- NL KvK: "[0-9]{8}" (8 digits)
|
||||
- UK Companies House: "[A-Z]{2}[0-9]{6}|[0-9]{8}" (2 letters + 6 digits OR 8 digits)
|
||||
- German HRB: "HRB [0-9]+" (HRB prefix + number)
|
||||
range: string
|
||||
slot_uri: schema:valuePattern
|
||||
|
||||
api_endpoint:
|
||||
description: >-
|
||||
URL of public API endpoint for querying this register.
|
||||
|
||||
Examples:
|
||||
- KvK API: https://api.kvk.nl/
|
||||
- Companies House API: https://api.company-information.service.gov.uk/
|
||||
|
||||
Many registers offer open data APIs for entity lookups.
|
||||
range: uri
|
||||
slot_uri: gleif-base:hasWebsite
|
||||
|
||||
enums:
|
||||
RegisterTypeEnum:
|
||||
description: Types of trade registers by entity category
|
||||
permissible_values:
|
||||
COMMERCIAL:
|
||||
description: >-
|
||||
Register for commercial/for-profit entities.
|
||||
Examples: GmbH (DE), BV (NL), Ltd (UK), Inc (US)
|
||||
FOUNDATION:
|
||||
description: >-
|
||||
Register for non-profit foundations.
|
||||
Examples: Stichting (NL), Stiftung (DE)
|
||||
ASSOCIATION:
|
||||
description: >-
|
||||
Register for voluntary associations.
|
||||
Examples: Vereniging (NL), Verein (DE), e.V.
|
||||
CHARITY:
|
||||
description: >-
|
||||
Register for charitable organizations.
|
||||
Examples: UK registered charities
|
||||
CULTURAL:
|
||||
description: >-
|
||||
Specialized register for cultural/heritage institutions.
|
||||
Some countries maintain separate heritage registries.
|
||||
MIXED:
|
||||
description: >-
|
||||
Single register handling multiple entity types.
|
||||
Common in smaller jurisdictions.
|
||||
37
schemas/20251121/linkml/modules/slots/country.yaml
Normal file
37
schemas/20251121/linkml/modules/slots/country.yaml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# country slot - ISO 3166-1 country reference
|
||||
|
||||
id: https://nde.nl/ontology/hc/slot/country
|
||||
name: country
|
||||
title: Country Slot
|
||||
|
||||
description: >-
|
||||
Country where entity is located or operates.
|
||||
|
||||
Links to Country class with ISO 3166-1 alpha-2 codes.
|
||||
|
||||
Format: ISO 3166-1 alpha-2 code (e.g., "NL", "DE", "JP")
|
||||
|
||||
Use when:
|
||||
- Place is in a specific country
|
||||
- Legal form is jurisdiction-specific
|
||||
- Feature types are country-specific
|
||||
|
||||
Examples:
|
||||
- Netherlands museum → country.alpha_2 = "NL"
|
||||
- Japanese archive → country.alpha_2 = "JP"
|
||||
- German foundation → country.alpha_2 = "DE"
|
||||
|
||||
slots:
|
||||
country:
|
||||
slot_uri: schema:addressCountry
|
||||
range: Country
|
||||
required: false
|
||||
multivalued: false
|
||||
description: >-
|
||||
Country where entity is located or operates.
|
||||
Links to Country class with ISO 3166-1 alpha-2 codes.
|
||||
|
||||
comments:
|
||||
- "Uses Country class with ISO 3166-1 alpha-2/alpha-3 codes"
|
||||
- "ISO codes are authoritative, stable, and language-neutral"
|
||||
- "Country names should be resolved via external services (GeoNames, UN M49)"
|
||||
30
schemas/20251121/linkml/modules/slots/description.yaml
Normal file
30
schemas/20251121/linkml/modules/slots/description.yaml
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
# description slot - General text description
|
||||
|
||||
id: https://nde.nl/ontology/hc/slot/description
|
||||
name: description
|
||||
title: Description Slot
|
||||
|
||||
description: >-
|
||||
General text description of an entity.
|
||||
|
||||
Used across multiple classes for providing human-readable descriptions
|
||||
of entities, their purpose, and characteristics.
|
||||
|
||||
slots:
|
||||
description:
|
||||
slot_uri: schema:description
|
||||
range: string
|
||||
required: false
|
||||
multivalued: false
|
||||
description: >-
|
||||
Human-readable description of the entity, its purpose, or characteristics.
|
||||
|
||||
Examples:
|
||||
- Jurisdiction: "Bavaria is a federal state in southern Germany with its own commercial register system"
|
||||
- Register: "The Handelsregister is the German commercial register maintained by local courts"
|
||||
- Authority: "The Kamer van Koophandel is the Dutch Chamber of Commerce"
|
||||
|
||||
comments:
|
||||
- "Maps to schema:description"
|
||||
- "Should be concise but informative"
|
||||
- "Language should match the context (typically English for international use)"
|
||||
45
schemas/20251121/linkml/modules/slots/jurisdiction.yaml
Normal file
45
schemas/20251121/linkml/modules/slots/jurisdiction.yaml
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# jurisdiction slot - Legal jurisdiction reference
|
||||
|
||||
id: https://nde.nl/ontology/hc/slot/jurisdiction
|
||||
name: jurisdiction
|
||||
title: Jurisdiction Slot
|
||||
|
||||
prefixes:
|
||||
gleif-base: https://www.gleif.org/ontology/Base/
|
||||
lcc-cr: https://www.omg.org/spec/LCC/Countries/CountryRepresentation/
|
||||
schema: http://schema.org/
|
||||
|
||||
description: >-
|
||||
Legal/administrative jurisdiction where an entity operates or is registered.
|
||||
|
||||
Links to Jurisdiction class which captures:
|
||||
- National jurisdictions (e.g., Netherlands, Japan)
|
||||
- Subnational jurisdictions (e.g., Bavaria, Delaware)
|
||||
- Municipal jurisdictions (e.g., Hong Kong, City of London)
|
||||
- Supranational jurisdictions (e.g., European Union, OHADA)
|
||||
|
||||
GLEIF alignment:
|
||||
- gleif-base:hasLegalJurisdiction - jurisdiction of legal formation
|
||||
- gleif-base:hasCoverageArea - geographic scope of authority/register
|
||||
|
||||
slots:
|
||||
jurisdiction:
|
||||
slot_uri: gleif-base:hasCoverageArea
|
||||
range: Jurisdiction
|
||||
required: false
|
||||
multivalued: false
|
||||
description: >-
|
||||
Legal/administrative jurisdiction where entity operates or is registered.
|
||||
|
||||
gleif-base:hasCoverageArea - "Indicates a geographic region in which
|
||||
some service is provided, or to which some policy applies"
|
||||
|
||||
Examples:
|
||||
- Netherlands (national): KvK jurisdiction
|
||||
- Bavaria (subnational): Amtsgericht München jurisdiction
|
||||
- European Union (supranational): SE registration jurisdiction
|
||||
|
||||
comments:
|
||||
- "Maps to gleif-base:hasCoverageArea"
|
||||
- "Jurisdictions are LEGAL boundaries, not just geographic"
|
||||
- "Used for registration authorities, trade registers, and legal forms"
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
# legal_jurisdiction slot - Legal jurisdiction of formation/registration
|
||||
|
||||
id: https://nde.nl/ontology/hc/slot/legal_jurisdiction
|
||||
name: legal_jurisdiction
|
||||
title: Legal Jurisdiction Slot
|
||||
|
||||
prefixes:
|
||||
gleif-base: https://www.gleif.org/ontology/Base/
|
||||
schema: http://schema.org/
|
||||
|
||||
description: >-
|
||||
Jurisdiction of legal formation and registration.
|
||||
|
||||
Links to Jurisdiction class.
|
||||
|
||||
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
|
||||
and registration of the entity"
|
||||
|
||||
For most entities, this is the country. For federal systems (USA, Germany),
|
||||
this may be a state/region.
|
||||
|
||||
slots:
|
||||
legal_jurisdiction:
|
||||
slot_uri: gleif-base:hasLegalJurisdiction
|
||||
range: Jurisdiction
|
||||
required: false
|
||||
multivalued: false
|
||||
description: >-
|
||||
Jurisdiction of legal formation and registration.
|
||||
|
||||
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
|
||||
and registration of the entity"
|
||||
|
||||
Examples:
|
||||
- NL (Netherlands national)
|
||||
- DE-BY (Bavaria subnational for German entities)
|
||||
- US-DE (Delaware for US corporations)
|
||||
|
||||
comments:
|
||||
- "Maps to gleif-base:hasLegalJurisdiction"
|
||||
- "Jurisdictions are LEGAL boundaries, not just geographic"
|
||||
- "For federal systems, may be state/region level"
|
||||
44
schemas/20251121/linkml/modules/slots/primary_register.yaml
Normal file
44
schemas/20251121/linkml/modules/slots/primary_register.yaml
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
# primary_register slot - Primary trade register reference
|
||||
|
||||
id: https://nde.nl/ontology/hc/slot/primary_register
|
||||
name: primary_register
|
||||
title: Primary Register Slot
|
||||
|
||||
prefixes:
|
||||
gleif-base: https://www.gleif.org/ontology/Base/
|
||||
gleif-ra: https://www.gleif.org/ontology/RegistrationAuthority/
|
||||
schema: http://schema.org/
|
||||
|
||||
description: >-
|
||||
Primary trade register where an entity is registered.
|
||||
|
||||
Links to TradeRegister class.
|
||||
|
||||
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
|
||||
gleif-ra:BusinessRegistry - "a registry for registering and maintaining
|
||||
information about business entities"
|
||||
|
||||
Used for:
|
||||
- CustodianLegalStatus: Primary register where entity is registered
|
||||
- RegistrationNumber: Register that issued the number
|
||||
|
||||
slots:
|
||||
primary_register:
|
||||
slot_uri: gleif-base:isRegisteredIn
|
||||
range: TradeRegister
|
||||
required: false
|
||||
multivalued: false
|
||||
description: >-
|
||||
Primary trade register where entity is registered.
|
||||
|
||||
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
|
||||
|
||||
Examples:
|
||||
- Netherlands Handelsregister (HR)
|
||||
- UK Companies Register
|
||||
- German Handelsregister (HRB/HRA)
|
||||
|
||||
comments:
|
||||
- "Maps to gleif-base:isRegisteredIn"
|
||||
- "Links to TradeRegister class (the register/database)"
|
||||
- "Distinct from RegistrationAuthority (the organization maintaining the register)"
|
||||
38
schemas/20251121/linkml/modules/slots/website.yaml
Normal file
38
schemas/20251121/linkml/modules/slots/website.yaml
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# website slot - Official website URL
|
||||
|
||||
id: https://nde.nl/ontology/hc/slot/website
|
||||
name: website
|
||||
title: Website Slot
|
||||
|
||||
prefixes:
|
||||
gleif-base: https://www.gleif.org/ontology/Base/
|
||||
schema: http://schema.org/
|
||||
|
||||
description: >-
|
||||
Official website URL of an organization or entity.
|
||||
|
||||
Used for:
|
||||
- Trade registers (e.g., https://www.kvk.nl/)
|
||||
- Registration authorities (e.g., https://www.companieshouse.gov.uk/)
|
||||
- Heritage institutions
|
||||
|
||||
slots:
|
||||
website:
|
||||
slot_uri: gleif-base:hasWebsite
|
||||
range: uri
|
||||
required: false
|
||||
multivalued: false
|
||||
description: >-
|
||||
Official website URL of the organization or entity.
|
||||
|
||||
gleif-base:hasWebsite - "A website associated with something"
|
||||
|
||||
Examples:
|
||||
- https://www.kvk.nl/ (Dutch KvK)
|
||||
- https://www.companieshouse.gov.uk/ (UK Companies House)
|
||||
- https://www.rijksmuseum.nl/ (Rijksmuseum)
|
||||
|
||||
comments:
|
||||
- "Maps to gleif-base:hasWebsite and schema:url"
|
||||
- "Should be the official/canonical website URL"
|
||||
- "Use https:// when available"
|
||||
16590
schemas/20251121/rdf/01_custodian_name_20251127_132059.owl.ttl
Normal file
16590
schemas/20251121/rdf/01_custodian_name_20251127_132059.owl.ttl
Normal file
File diff suppressed because one or more lines are too long
493
scripts/enrich_nde_from_wikidata.py
Normal file
493
scripts/enrich_nde_from_wikidata.py
Normal file
|
|
@ -0,0 +1,493 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enrich NDE Register NL entries with Wikidata data.
|
||||
|
||||
This script reads the NDE Register YAML file, fetches comprehensive data from Wikidata
|
||||
for entries that have a 'wikidata_id' field, and creates an enriched YAML file with
|
||||
all available Wikidata properties.
|
||||
|
||||
The script uses the Wikibase REST API and SPARQL endpoints to maximize data retrieval
|
||||
while respecting rate limits.
|
||||
|
||||
Usage:
|
||||
python scripts/enrich_nde_from_wikidata.py
|
||||
|
||||
Environment Variables:
|
||||
WIKIDATA_API_TOKEN - Optional OAuth2 token for increased rate limits (5,000 req/hr)
|
||||
WIKIMEDIA_CONTACT_EMAIL - Contact email for User-Agent (required by Wikimedia policy)
|
||||
|
||||
Output:
|
||||
data/nde/nde_register_nl_enriched_{timestamp}.yaml
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import yaml
|
||||
import httpx
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from typing import Dict, List, Optional, Any
|
||||
from dataclasses import dataclass, field, asdict
|
||||
import logging
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration
|
||||
WIKIDATA_REST_API = "https://www.wikidata.org/w/rest.php/wikibase/v1"
|
||||
WIKIDATA_ACTION_API = "https://www.wikidata.org/w/api.php"
|
||||
SPARQL_URL = "https://query.wikidata.org/sparql"
|
||||
|
||||
# Rate limiting: 500 req/hr for anonymous, 5000 req/hr with token
|
||||
WIKIDATA_API_TOKEN = os.getenv("WIKIDATA_API_TOKEN", "")
|
||||
WIKIMEDIA_CONTACT_EMAIL = os.getenv("WIKIMEDIA_CONTACT_EMAIL", "glam-data@example.com")
|
||||
USER_AGENT = f"GLAMDataExtractor/1.0 ({WIKIMEDIA_CONTACT_EMAIL})"
|
||||
|
||||
# Request delay based on authentication status
|
||||
if WIKIDATA_API_TOKEN:
|
||||
REQUEST_DELAY = 0.75 # ~4800 requests per hour (below 5000 limit)
|
||||
logger.info("Using authenticated mode: 5,000 req/hr limit")
|
||||
else:
|
||||
REQUEST_DELAY = 7.5 # ~480 requests per hour (below 500 limit)
|
||||
logger.info("Using anonymous mode: 500 req/hr limit")
|
||||
|
||||
# Headers
|
||||
HEADERS = {
|
||||
"Accept": "application/json",
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
if WIKIDATA_API_TOKEN:
|
||||
HEADERS["Authorization"] = f"Bearer {WIKIDATA_API_TOKEN}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class WikidataEnrichment:
|
||||
"""Container for all Wikidata data extracted for an entity."""
|
||||
entity_id: str
|
||||
labels: Dict[str, str] = field(default_factory=dict)
|
||||
descriptions: Dict[str, str] = field(default_factory=dict)
|
||||
aliases: Dict[str, List[str]] = field(default_factory=dict)
|
||||
sitelinks: Dict[str, str] = field(default_factory=dict)
|
||||
claims: Dict[str, Any] = field(default_factory=dict)
|
||||
identifiers: Dict[str, str] = field(default_factory=dict)
|
||||
instance_of: List[Dict[str, str]] = field(default_factory=list)
|
||||
country: Optional[Dict[str, str]] = None
|
||||
location: Optional[Dict[str, str]] = None
|
||||
coordinates: Optional[Dict[str, float]] = None
|
||||
inception: Optional[str] = None
|
||||
dissolution: Optional[str] = None
|
||||
official_website: Optional[str] = None
|
||||
image: Optional[str] = None
|
||||
logo: Optional[str] = None
|
||||
fetch_timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||
|
||||
|
||||
# Property IDs for heritage institutions
|
||||
PROPERTY_LABELS = {
|
||||
# Core properties
|
||||
"P31": "instance_of", # Instance of (type)
|
||||
"P17": "country", # Country
|
||||
"P131": "located_in", # Located in administrative territory
|
||||
"P625": "coordinates", # Coordinate location
|
||||
"P571": "inception", # Date founded
|
||||
"P576": "dissolution", # Date dissolved
|
||||
"P856": "official_website", # Official website
|
||||
"P18": "image", # Image
|
||||
"P154": "logo", # Logo
|
||||
|
||||
# Identifiers
|
||||
"P791": "isil", # ISIL code
|
||||
"P214": "viaf", # VIAF ID
|
||||
"P227": "gnd", # GND ID
|
||||
"P244": "lcnaf", # Library of Congress ID
|
||||
"P268": "bnf", # BnF ID
|
||||
"P269": "idref", # IdRef ID
|
||||
"P213": "isni", # ISNI
|
||||
"P1566": "geonames", # GeoNames ID
|
||||
"P2427": "grid", # GRID ID
|
||||
"P3500": "ringgold", # Ringgold ID
|
||||
"P5785": "museofile", # Museofile ID (France)
|
||||
"P8168": "factgrid", # FactGrid ID
|
||||
|
||||
# Cultural heritage specific
|
||||
"P361": "part_of", # Part of
|
||||
"P355": "subsidiaries", # Subsidiaries
|
||||
"P749": "parent_org", # Parent organization
|
||||
"P127": "owned_by", # Owned by
|
||||
"P1037": "director", # Director/manager
|
||||
"P159": "headquarters", # Headquarters location
|
||||
"P463": "member_of", # Member of
|
||||
"P1435": "heritage_status", # Heritage designation
|
||||
"P910": "topic_category", # Topic's main category
|
||||
"P373": "commons_category", # Commons category
|
||||
|
||||
# Additional metadata
|
||||
"P2044": "elevation", # Elevation
|
||||
"P6375": "street_address", # Street address
|
||||
"P281": "postal_code", # Postal code
|
||||
"P1329": "phone", # Phone number
|
||||
"P968": "email", # Email
|
||||
"P973": "described_at_url", # Described at URL
|
||||
"P8402": "kvk_number", # KvK number (Dutch Chamber of Commerce)
|
||||
}
|
||||
|
||||
|
||||
def fetch_entity_data(entity_id: str, client: httpx.Client) -> Optional[Dict]:
|
||||
"""
|
||||
Fetch full entity data from Wikibase REST API.
|
||||
|
||||
Args:
|
||||
entity_id: Wikidata Q-number (e.g., "Q22246632")
|
||||
client: HTTP client for making requests
|
||||
|
||||
Returns:
|
||||
Full entity data as dictionary, or None on error
|
||||
"""
|
||||
url = f"{WIKIDATA_REST_API}/entities/items/{entity_id}"
|
||||
|
||||
try:
|
||||
response = client.get(url, headers=HEADERS)
|
||||
|
||||
# Handle OAuth errors (retry without auth)
|
||||
if response.status_code == 403:
|
||||
headers_no_auth = {k: v for k, v in HEADERS.items() if k != "Authorization"}
|
||||
response = client.get(url, headers=headers_no_auth)
|
||||
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
logger.warning(f"Entity {entity_id} not found")
|
||||
else:
|
||||
logger.error(f"HTTP error fetching {entity_id}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching {entity_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def extract_value_from_statement(statement: Dict) -> Any:
|
||||
"""Extract the value from a Wikidata statement structure."""
|
||||
try:
|
||||
value_data = statement.get("value", {})
|
||||
value_type = value_data.get("type")
|
||||
content = value_data.get("content")
|
||||
|
||||
if value_type == "value":
|
||||
# Simple string/number values
|
||||
return content
|
||||
elif isinstance(content, dict):
|
||||
if "entity-type" in content or "id" in content:
|
||||
# Entity reference
|
||||
return content.get("id", content)
|
||||
elif "time" in content:
|
||||
# Time value
|
||||
return content.get("time")
|
||||
elif "latitude" in content and "longitude" in content:
|
||||
# Coordinates
|
||||
return {
|
||||
"latitude": content.get("latitude"),
|
||||
"longitude": content.get("longitude"),
|
||||
"precision": content.get("precision")
|
||||
}
|
||||
else:
|
||||
return content
|
||||
else:
|
||||
return content
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def parse_entity_data(entity_id: str, data: Dict) -> WikidataEnrichment:
|
||||
"""
|
||||
Parse the full entity data into a WikidataEnrichment object.
|
||||
|
||||
Args:
|
||||
entity_id: The Wikidata entity ID
|
||||
data: Raw API response data
|
||||
|
||||
Returns:
|
||||
WikidataEnrichment object with all extracted data
|
||||
"""
|
||||
enrichment = WikidataEnrichment(entity_id=entity_id)
|
||||
|
||||
# Extract labels
|
||||
labels = data.get("labels", {})
|
||||
enrichment.labels = labels
|
||||
|
||||
# Extract descriptions
|
||||
descriptions = data.get("descriptions", {})
|
||||
enrichment.descriptions = descriptions
|
||||
|
||||
# Extract aliases
|
||||
aliases_raw = data.get("aliases", {})
|
||||
enrichment.aliases = aliases_raw
|
||||
|
||||
# Extract sitelinks
|
||||
sitelinks = data.get("sitelinks", {})
|
||||
for site, link_data in sitelinks.items():
|
||||
if isinstance(link_data, dict):
|
||||
enrichment.sitelinks[site] = link_data.get("title", link_data)
|
||||
else:
|
||||
enrichment.sitelinks[site] = link_data
|
||||
|
||||
# Extract statements/claims
|
||||
statements = data.get("statements", {})
|
||||
|
||||
for prop_id, prop_statements in statements.items():
|
||||
prop_name = PROPERTY_LABELS.get(prop_id, prop_id)
|
||||
|
||||
if not prop_statements:
|
||||
continue
|
||||
|
||||
# Extract first value (or all values for multi-value properties)
|
||||
values = []
|
||||
for stmt in prop_statements:
|
||||
value = extract_value_from_statement(stmt)
|
||||
if value is not None:
|
||||
values.append(value)
|
||||
|
||||
if not values:
|
||||
continue
|
||||
|
||||
# Handle specific properties
|
||||
if prop_id == "P31": # Instance of
|
||||
enrichment.instance_of = [{"id": v} if isinstance(v, str) else v for v in values]
|
||||
elif prop_id == "P17": # Country
|
||||
enrichment.country = {"id": values[0]} if values else None
|
||||
elif prop_id == "P131": # Located in
|
||||
enrichment.location = {"id": values[0]} if values else None
|
||||
elif prop_id == "P625": # Coordinates
|
||||
if values and isinstance(values[0], dict):
|
||||
enrichment.coordinates = values[0]
|
||||
elif prop_id == "P571": # Inception
|
||||
enrichment.inception = values[0] if values else None
|
||||
elif prop_id == "P576": # Dissolution
|
||||
enrichment.dissolution = values[0] if values else None
|
||||
elif prop_id == "P856": # Official website
|
||||
enrichment.official_website = values[0] if values else None
|
||||
elif prop_id == "P18": # Image
|
||||
enrichment.image = values[0] if values else None
|
||||
elif prop_id == "P154": # Logo
|
||||
enrichment.logo = values[0] if values else None
|
||||
elif prop_id in ["P791", "P214", "P227", "P244", "P268", "P269",
|
||||
"P213", "P1566", "P2427", "P3500", "P5785", "P8168", "P8402"]:
|
||||
# Identifiers
|
||||
enrichment.identifiers[prop_name] = values[0] if values else None
|
||||
else:
|
||||
# Store other claims
|
||||
enrichment.claims[prop_name] = values[0] if len(values) == 1 else values
|
||||
|
||||
return enrichment
|
||||
|
||||
|
||||
def enrich_entity(entity_id: str, client: httpx.Client) -> Optional[WikidataEnrichment]:
|
||||
"""
|
||||
Fetch and enrich a single entity from Wikidata.
|
||||
|
||||
Args:
|
||||
entity_id: Wikidata Q-number (e.g., "Q22246632")
|
||||
client: HTTP client for requests
|
||||
|
||||
Returns:
|
||||
WikidataEnrichment object or None on error
|
||||
"""
|
||||
# Ensure proper Q-number format
|
||||
if not entity_id.startswith("Q"):
|
||||
entity_id = f"Q{entity_id}"
|
||||
|
||||
data = fetch_entity_data(entity_id, client)
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
return parse_entity_data(entity_id, data)
|
||||
|
||||
|
||||
def enrichment_to_dict(enrichment: WikidataEnrichment) -> Dict:
|
||||
"""Convert WikidataEnrichment to a clean dictionary for YAML output."""
|
||||
result = {
|
||||
"wikidata_entity_id": enrichment.entity_id,
|
||||
"wikidata_fetch_timestamp": enrichment.fetch_timestamp,
|
||||
}
|
||||
|
||||
# Add labels (prioritize nl, en)
|
||||
if enrichment.labels:
|
||||
result["wikidata_labels"] = enrichment.labels
|
||||
# Add convenient primary label
|
||||
if "nl" in enrichment.labels:
|
||||
result["wikidata_label_nl"] = enrichment.labels["nl"]
|
||||
if "en" in enrichment.labels:
|
||||
result["wikidata_label_en"] = enrichment.labels["en"]
|
||||
|
||||
# Add descriptions
|
||||
if enrichment.descriptions:
|
||||
result["wikidata_descriptions"] = enrichment.descriptions
|
||||
if "nl" in enrichment.descriptions:
|
||||
result["wikidata_description_nl"] = enrichment.descriptions["nl"]
|
||||
if "en" in enrichment.descriptions:
|
||||
result["wikidata_description_en"] = enrichment.descriptions["en"]
|
||||
|
||||
# Add aliases
|
||||
if enrichment.aliases:
|
||||
result["wikidata_aliases"] = enrichment.aliases
|
||||
|
||||
# Add identifiers
|
||||
if enrichment.identifiers:
|
||||
result["wikidata_identifiers"] = {k: v for k, v in enrichment.identifiers.items() if v}
|
||||
|
||||
# Add instance types
|
||||
if enrichment.instance_of:
|
||||
result["wikidata_instance_of"] = enrichment.instance_of
|
||||
|
||||
# Add location data
|
||||
if enrichment.country:
|
||||
result["wikidata_country"] = enrichment.country
|
||||
if enrichment.location:
|
||||
result["wikidata_located_in"] = enrichment.location
|
||||
if enrichment.coordinates:
|
||||
result["wikidata_coordinates"] = enrichment.coordinates
|
||||
|
||||
# Add temporal data
|
||||
if enrichment.inception:
|
||||
result["wikidata_inception"] = enrichment.inception
|
||||
if enrichment.dissolution:
|
||||
result["wikidata_dissolution"] = enrichment.dissolution
|
||||
|
||||
# Add web presence
|
||||
if enrichment.official_website:
|
||||
result["wikidata_official_website"] = enrichment.official_website
|
||||
|
||||
# Add media
|
||||
if enrichment.image:
|
||||
result["wikidata_image"] = enrichment.image
|
||||
if enrichment.logo:
|
||||
result["wikidata_logo"] = enrichment.logo
|
||||
|
||||
# Add sitelinks (Wikipedia links)
|
||||
if enrichment.sitelinks:
|
||||
result["wikidata_sitelinks"] = enrichment.sitelinks
|
||||
|
||||
# Add other claims
|
||||
if enrichment.claims:
|
||||
result["wikidata_claims"] = enrichment.claims
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
# Paths
|
||||
script_dir = Path(__file__).parent
|
||||
data_dir = script_dir.parent / "data" / "nde"
|
||||
input_file = data_dir / "nde_register_nl.yaml"
|
||||
|
||||
# Generate timestamp for output file
|
||||
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||||
output_file = data_dir / f"nde_register_nl_enriched_{timestamp}.yaml"
|
||||
|
||||
logger.info(f"Input file: {input_file}")
|
||||
logger.info(f"Output file: {output_file}")
|
||||
|
||||
# Load input YAML
|
||||
logger.info("Loading input YAML file...")
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
entries = yaml.safe_load(f)
|
||||
|
||||
total_entries = len(entries)
|
||||
logger.info(f"Loaded {total_entries} entries")
|
||||
|
||||
# Count entries with wikidata_id
|
||||
entries_with_wikidata = [e for e in entries if e.get("wikidata_id")]
|
||||
logger.info(f"Found {len(entries_with_wikidata)} entries with wikidata_id")
|
||||
|
||||
# Process entries
|
||||
enriched_entries = []
|
||||
success_count = 0
|
||||
skip_count = 0
|
||||
error_count = 0
|
||||
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
for i, entry in enumerate(entries):
|
||||
wikidata_id = entry.get("wikidata_id")
|
||||
org_name = entry.get("organisatie", "Unknown")
|
||||
|
||||
if not wikidata_id:
|
||||
# Keep entry as-is, skip enrichment
|
||||
enriched_entries.append(entry)
|
||||
skip_count += 1
|
||||
continue
|
||||
|
||||
# Log progress
|
||||
logger.info(f"[{i+1}/{total_entries}] Enriching: {org_name} ({wikidata_id})")
|
||||
|
||||
# Fetch and enrich
|
||||
try:
|
||||
enrichment = enrich_entity(str(wikidata_id), client)
|
||||
|
||||
if enrichment:
|
||||
# Merge enrichment data with original entry
|
||||
enriched_entry = dict(entry)
|
||||
enriched_entry["wikidata_enrichment"] = enrichment_to_dict(enrichment)
|
||||
enriched_entries.append(enriched_entry)
|
||||
success_count += 1
|
||||
else:
|
||||
# Keep original entry on error
|
||||
entry["wikidata_enrichment_error"] = "Failed to fetch from Wikidata"
|
||||
enriched_entries.append(entry)
|
||||
error_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {org_name}: {e}")
|
||||
entry["wikidata_enrichment_error"] = str(e)
|
||||
enriched_entries.append(entry)
|
||||
error_count += 1
|
||||
|
||||
# Rate limiting
|
||||
time.sleep(REQUEST_DELAY)
|
||||
|
||||
# Write output
|
||||
logger.info(f"Writing enriched data to {output_file}...")
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
yaml.dump(enriched_entries, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
|
||||
|
||||
# Summary
|
||||
logger.info("=" * 60)
|
||||
logger.info("ENRICHMENT COMPLETE")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Total entries: {total_entries}")
|
||||
logger.info(f"Entries with wikidata_id: {len(entries_with_wikidata)}")
|
||||
logger.info(f"Successfully enriched: {success_count}")
|
||||
logger.info(f"Skipped (no wikidata_id): {skip_count}")
|
||||
logger.info(f"Errors: {error_count}")
|
||||
logger.info(f"Output file: {output_file}")
|
||||
|
||||
# Create log file
|
||||
log_file = data_dir / f"enrichment_log_{timestamp}.json"
|
||||
log_data = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"input_file": str(input_file),
|
||||
"output_file": str(output_file),
|
||||
"total_entries": total_entries,
|
||||
"entries_with_wikidata_id": len(entries_with_wikidata),
|
||||
"successfully_enriched": success_count,
|
||||
"skipped_no_wikidata_id": skip_count,
|
||||
"errors": error_count,
|
||||
"authenticated": bool(WIKIDATA_API_TOKEN),
|
||||
"rate_limit_delay_seconds": REQUEST_DELAY,
|
||||
}
|
||||
with open(log_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(log_data, f, indent=2)
|
||||
logger.info(f"Log file: {log_file}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Reference in a new issue