Add script to enrich NDE Register NL entries with Wikidata data

- Implemented a Python script that fetches and enriches entries from the NDE Register using data from Wikidata.
- Utilized the Wikibase REST API and SPARQL endpoints for data retrieval.
- Added logging for tracking progress and errors during the enrichment process.
- Configured rate limiting based on authentication status for API requests.
- Created a structured output in YAML format, including detailed enrichment data.
- Generated a log file summarizing the enrichment process and results.
This commit is contained in:
kempersc 2025-11-27 13:30:00 +01:00
parent cd0ff5b9c7
commit 5ef8ccac51
19 changed files with 30132 additions and 79 deletions

View file

@ -14,7 +14,7 @@
datasetregister: dataset ontbreekt
versnellen_project: Upgrade? Aanschaf?
opmerkingen: Wat is de type organisatie?
wikidata_id:
wikidata_id: Q22246632
type:
- M
- plaatsnaam_bezoekadres: Borger

11505
data/nde/nde_register_nl.yaml Normal file

File diff suppressed because it is too large Load diff

View file

@ -150,7 +150,7 @@
.link-bidirectional {
stroke: #6366f1 !important; /* Indigo for bidirectional */
stroke-dasharray: 5, 3;
cursor: context-menu;
cursor: pointer;
}
.link-bidirectional:hover {
@ -162,9 +162,9 @@
fill: #6366f1 !important;
}
/* Hint for right-click action */
/* Hint for click action */
.link-group:has(.link-bidirectional):hover::after {
content: "Right-click to reverse";
content: "Click to reverse";
position: absolute;
background: rgba(0, 0, 0, 0.8);
color: white;

View file

@ -65,15 +65,24 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
const [selectedNode, setSelectedNode] = useState<UMLNode | null>(null);
const [zoom, setZoom] = useState(1);
const zoomTransformRef = useRef<d3.ZoomTransform>(d3.zoomIdentity);
const previousDiagramRef = useRef<string | null>(null); // Track diagram changes for auto-fit
useEffect(() => {
if (!svgRef.current || !diagram) return;
// Store current zoom transform before clearing
// Determine if this is a new diagram (should auto-fit) or same diagram with layout change (preserve zoom)
const diagramId = diagram.title || JSON.stringify(diagram.nodes.map(n => n.id).sort());
const isNewDiagram = previousDiagramRef.current !== diagramId;
previousDiagramRef.current = diagramId;
// Store current zoom transform before clearing (only if same diagram)
const currentSvg = d3.select(svgRef.current);
const currentTransform = d3.zoomTransform(currentSvg.node() as Element);
if (currentTransform && (currentTransform.k !== 1 || currentTransform.x !== 0 || currentTransform.y !== 0)) {
if (!isNewDiagram && currentTransform && (currentTransform.k !== 1 || currentTransform.x !== 0 || currentTransform.y !== 0)) {
zoomTransformRef.current = currentTransform;
} else if (isNewDiagram) {
// Reset zoom ref for new diagrams
zoomTransformRef.current = d3.zoomIdentity;
}
// Clear previous content
@ -400,7 +409,17 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
// Default node width for fallback (used where node.width is not yet set)
const defaultNodeWidth = minNodeWidth;
diagram.nodes.forEach(node => {
// IMPORTANT: Create deep copies to avoid mutating the original diagram prop
// This prevents issues when switching layouts (force simulation mutates source/target to objects)
const workingNodes = diagram.nodes.map(node => ({ ...node }));
const workingLinks = diagram.links.map(link => ({
...link,
// Always ensure source/target are strings (force simulation may have converted them to objects)
source: typeof link.source === 'string' ? link.source : (link.source as any).id,
target: typeof link.target === 'string' ? link.target : (link.target as any).id
}));
workingNodes.forEach(node => {
const attributeCount = node.attributes?.length || 0;
const methodCount = node.methods?.length || 0;
@ -435,26 +454,23 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
g.setDefaultEdgeLabel(() => ({}));
// Add nodes to dagre graph
diagram.nodes.forEach(node => {
workingNodes.forEach(node => {
g.setNode(node.id, {
width: node.width || defaultNodeWidth,
height: node.height || nodeHeaderHeight
});
});
// Add edges to dagre graph
diagram.links.forEach(link => {
g.setEdge(
typeof link.source === 'string' ? link.source : (link.source as any).id,
typeof link.target === 'string' ? link.target : (link.target as any).id
);
// Add edges to dagre graph (source/target are always strings now)
workingLinks.forEach(link => {
g.setEdge(link.source as string, link.target as string);
});
// Run dagre layout
dagre.layout(g);
// Apply computed positions to nodes
diagram.nodes.forEach(node => {
workingNodes.forEach(node => {
const dagreNode = g.node(node.id);
if (dagreNode) {
node.x = dagreNode.x;
@ -470,8 +486,8 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
} else {
// Force simulation for layout (original scattered physics-based layout)
simulation = d3.forceSimulation(diagram.nodes as any)
.force('link', d3.forceLink(diagram.links)
simulation = d3.forceSimulation(workingNodes as any)
.force('link', d3.forceLink(workingLinks)
.id((d: any) => d.id)
.distance(250)
.strength(0.5))
@ -529,22 +545,26 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
}
});
// Initialize link state (parser now handles bidirectional detection)
diagram.links.forEach(link => {
// Initialize link state on working copies (parser now handles bidirectional detection)
workingLinks.forEach(link => {
// Keep bidirectional and isReversed from parser, with defaults
link.bidirectional = link.bidirectional || false;
link.isReversed = link.isReversed || false;
});
// Debug: Log link counts
const bidirectionalCount = diagram.links.filter(l => l.bidirectional).length;
console.log(`[UMLVisualization] Total links: ${diagram.links.length}, Bidirectional: ${bidirectionalCount}`);
// Debug: Log node and link counts
const bidirectionalCount = workingLinks.filter(l => l.bidirectional).length;
console.log(`[UMLVisualization] Nodes: ${workingNodes.length}, Total links: ${workingLinks.length}, Bidirectional: ${bidirectionalCount}`);
// Debug: Check if nodes have positions
const nodesWithPositions = workingNodes.filter(n => n.x !== undefined && n.y !== undefined).length;
console.log(`[UMLVisualization] Nodes with positions: ${nodesWithPositions}/${workingNodes.length}`);
// Draw links first (edges between nodes)
const links = g.append('g')
.attr('class', 'links')
.selectAll('g')
.data(diagram.links)
.data(workingLinks)
.join('g')
.attr('class', 'link-group');
@ -560,13 +580,12 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
? 'association' : d.type;
return `url(#arrow-${arrowType})`;
})
.style('cursor', (d) => d.bidirectional ? 'context-menu' : 'default')
.on('contextmenu', function(event, d: any) {
// Toggle direction for bidirectional edges on right-click
.style('cursor', (d) => d.bidirectional ? 'pointer' : 'default')
.on('click', function(event, d: any) {
// Toggle direction for bidirectional edges on left-click
if (!d.bidirectional) return;
event.preventDefault(); // Prevent browser context menu
event.stopPropagation();
event.stopPropagation(); // Prevent triggering svg click (deselect)
// Toggle reversed state
d.isReversed = !d.isReversed;
@ -577,7 +596,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
d.target = temp;
// Update the label text to show the correct direction
const linkIndex = diagram.links.indexOf(d);
const linkIndex = workingLinks.indexOf(d);
const labelSelection = linkLabels.filter((_: any, i: number) => i === linkIndex);
// Get the appropriate label based on current direction
@ -606,7 +625,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
.attr('stroke', '#6366f1') // Back to indigo
.attr('marker-end', `url(#arrow-${arrowType})`);
// Flash the label
// Flash the label then hide it again
labelSelection
.transition()
.duration(200)
@ -616,7 +635,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
.transition()
.delay(500)
.duration(300)
.style('opacity', 0.8)
.style('opacity', 0) // Hide label again after flash
.attr('font-weight', 'normal')
.attr('fill', '#172a59');
@ -642,7 +661,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
.attr('marker-end', `url(#arrow-${arrowType}-highlight)`);
// Show label more prominently
const linkIndex = diagram.links.indexOf(d);
const linkIndex = workingLinks.indexOf(d);
linkLabels.filter((_: any, i: number) => i === linkIndex)
.transition()
.duration(200)
@ -653,7 +672,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
if (d.bidirectional && this.parentNode) {
d3.select(this.parentNode as Element)
.append('title')
.text('Right-click to reverse direction');
.text('Click to reverse direction');
}
})
.on('mouseleave', function(_event, d: any) {
@ -669,12 +688,12 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
.attr('stroke-width', 2)
.attr('marker-end', `url(#arrow-${arrowType})`);
// Reset label
const linkIndex = diagram.links.indexOf(d);
// Reset label - hide it again
const linkIndex = workingLinks.indexOf(d);
linkLabels.filter((_: any, i: number) => i === linkIndex)
.transition()
.duration(200)
.style('opacity', 0.8)
.style('opacity', 0) // Hide label again
.attr('font-weight', 'normal');
// Remove tooltip
@ -683,14 +702,27 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
}
});
// Add link labels with enhanced visibility
// Add link labels with background for readability
// First add background rects (will be positioned in updateLinkPositions)
const linkLabelBackgrounds = links.append('rect')
.attr('class', 'link-label-bg')
.attr('rx', 3)
.attr('ry', 3)
.attr('fill', 'white')
.attr('stroke', '#e5e7eb')
.attr('stroke-width', 1)
.style('opacity', 0) // HIDDEN by default
.style('pointer-events', 'none');
// Add link labels - HIDDEN by default (opacity: 0), shown on hover to prevent overlap
const linkLabels = links.append('text')
.attr('class', (d) => `link-label${d.bidirectional ? ' link-label-bidirectional' : ''}`)
.attr('text-anchor', 'middle')
.attr('dy', -5)
.attr('fill', '#172a59')
.attr('font-size', '11px')
.style('opacity', 0.8)
.attr('font-weight', '500')
.style('opacity', 0) // HIDDEN by default - prevents label overlap
.style('pointer-events', 'none') // Don't interfere with click events
.text((d) => {
// Show label based on current direction (isReversed state)
@ -707,7 +739,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
const nodes = g.append('g')
.attr('class', 'nodes')
.selectAll('g')
.data(diagram.nodes)
.data(workingNodes)
.join('g')
.attr('class', (d) => `node node-${d.type}`)
.call(d3.drag<any, any>()
@ -830,7 +862,7 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
const updateLinkPositions = () => {
// Build node lookup map for O(1) access
const nodeMap = new Map<string, UMLNode>();
diagram.nodes.forEach(n => nodeMap.set(n.id, n));
workingNodes.forEach(n => nodeMap.set(n.id, n));
links.each(function(linkData: any) {
// Get source and target nodes - handle both string IDs and object references
@ -870,9 +902,25 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
.attr('y2', targetIntersection.y);
// Update label position at midpoint
linkGroup.select('text')
.attr('x', (source.x + target.x) / 2)
.attr('y', (source.y! + target.y!) / 2);
const midX = (source.x + target.x) / 2;
const midY = (source.y! + target.y!) / 2;
const textElement = linkGroup.select('text');
textElement
.attr('x', midX)
.attr('y', midY);
// Update background rect position and size based on text bounds
const textNode = textElement.node() as SVGTextElement | null;
if (textNode) {
const bbox = textNode.getBBox();
const padding = 4;
linkGroup.select('.link-label-bg')
.attr('x', bbox.x - padding)
.attr('y', bbox.y - padding)
.attr('width', bbox.width + padding * 2)
.attr('height', bbox.height + padding * 2);
}
});
};
@ -889,10 +937,23 @@ export const UMLVisualization: React.FC<UMLVisualizationProps> = ({
updateNodePositions();
updateLinkPositions();
});
// Auto-fit after simulation settles (for new diagrams only)
if (isNewDiagram) {
simulation.on('end', () => {
// Delay slightly to ensure DOM is fully updated
setTimeout(() => handleFitToScreen(), 100);
});
}
} else {
// Dagre layout - positions are already computed, update immediately
updateNodePositions();
updateLinkPositions();
// Auto-fit to screen for new diagrams (delay to ensure DOM is ready)
if (isNewDiagram) {
setTimeout(() => handleFitToScreen(), 100);
}
}
// Drag functions

View file

@ -0,0 +1,338 @@
# Registration and Trade Register Examples
# Examples of registration authorities, trade registers, and jurisdictions
# from multiple countries aligned with GLEIF Registration Authorities List (RAL)
# ============================================================================
# JURISDICTIONS
# ============================================================================
jurisdictions:
# National Jurisdictions
- jurisdiction_id: "NL"
jurisdiction_type: NATIONAL
country:
alpha_2: "NL"
alpha_3: "NLD"
legal_system_type: CIVIL_LAW
gleif_jurisdiction_code: "NL"
description: "Netherlands national jurisdiction - civil law system based on Napoleonic code"
- jurisdiction_id: "GB"
jurisdiction_type: NATIONAL
country:
alpha_2: "GB"
alpha_3: "GBR"
legal_system_type: COMMON_LAW
gleif_jurisdiction_code: "GB"
description: "United Kingdom national jurisdiction - common law system"
- jurisdiction_id: "JP"
jurisdiction_type: NATIONAL
country:
alpha_2: "JP"
alpha_3: "JPN"
legal_system_type: CIVIL_LAW
gleif_jurisdiction_code: "JP"
description: "Japan national jurisdiction - civil law system with German influence"
# Subnational Jurisdictions (Germany has regional courts)
- jurisdiction_id: "DE-BY"
jurisdiction_type: SUBNATIONAL
country:
alpha_2: "DE"
alpha_3: "DEU"
subregion:
iso_3166_2_code: "DE-BY"
subdivision_name: "Bavaria"
legal_system_type: CIVIL_LAW
gleif_jurisdiction_code: "DE"
description: "Bavaria (Bayern) subnational jurisdiction - German federal state"
- jurisdiction_id: "US-DE"
jurisdiction_type: SUBNATIONAL
country:
alpha_2: "US"
alpha_3: "USA"
subregion:
iso_3166_2_code: "US-DE"
subdivision_name: "Delaware"
legal_system_type: COMMON_LAW
gleif_jurisdiction_code: "US-DE"
description: "Delaware subnational jurisdiction - popular for US corporate registrations"
# Supranational Jurisdiction
- jurisdiction_id: "EU"
jurisdiction_type: SUPRANATIONAL
supranational_code: "EU"
legal_system_type: CIVIL_LAW
gleif_jurisdiction_code: "EU"
description: "European Union supranational jurisdiction - for SE (Societas Europaea) and EEIG"
# ============================================================================
# REGISTRATION AUTHORITIES
# ============================================================================
registration_authorities:
# Netherlands
- id: "https://w3id.org/heritage/ra/nl-kvk"
name: "Chamber of Commerce"
name_local: "Kamer van Koophandel"
abbreviation: "KvK"
gleif_ra_code: "RA000439"
jurisdiction:
jurisdiction_id: "NL"
jurisdiction_type: NATIONAL
website: "https://www.kvk.nl/"
registration_types:
- "companies"
- "foundations"
- "associations"
- "sole proprietors"
# United Kingdom
- id: "https://w3id.org/heritage/ra/gb-ch"
name: "Companies House"
name_local: "Companies House"
abbreviation: "CH"
gleif_ra_code: "RA000585"
jurisdiction:
jurisdiction_id: "GB"
jurisdiction_type: NATIONAL
website: "https://www.gov.uk/government/organisations/companies-house"
registration_types:
- "companies"
- "limited liability partnerships"
# UK Charity Commission (separate from Companies House)
- id: "https://w3id.org/heritage/ra/gb-cc"
name: "Charity Commission for England and Wales"
name_local: "Charity Commission"
abbreviation: "CC"
gleif_ra_code: "RA000586"
jurisdiction:
jurisdiction_id: "GB"
jurisdiction_type: NATIONAL
website: "https://www.gov.uk/government/organisations/charity-commission"
registration_types:
- "charities"
- "charitable incorporated organisations"
# Germany (Bavaria example - local court)
- id: "https://w3id.org/heritage/ra/de-by-muc"
name: "Local Court Munich"
name_local: "Amtsgericht München"
abbreviation: "AG München"
gleif_ra_code: "RA000385"
jurisdiction:
jurisdiction_id: "DE-BY"
jurisdiction_type: SUBNATIONAL
website: "https://www.justiz.bayern.de/gerichte-und-behoerden/amtsgerichte/muenchen/"
registration_types:
- "commercial companies"
- "partnerships"
# Japan
- id: "https://w3id.org/heritage/ra/jp-lab"
name: "Legal Affairs Bureau"
name_local: "法務局"
abbreviation: "法務局"
gleif_ra_code: "RA000429"
jurisdiction:
jurisdiction_id: "JP"
jurisdiction_type: NATIONAL
website: "https://www.moj.go.jp/MINJI/minji06_00076.html"
registration_types:
- "corporations"
- "foundations"
# USA Delaware
- id: "https://w3id.org/heritage/ra/us-de-doc"
name: "Delaware Division of Corporations"
name_local: "Division of Corporations"
abbreviation: "DE DOC"
gleif_ra_code: "RA000598"
jurisdiction:
jurisdiction_id: "US-DE"
jurisdiction_type: SUBNATIONAL
website: "https://corp.delaware.gov/"
registration_types:
- "corporations"
- "limited liability companies"
# ============================================================================
# TRADE REGISTERS
# ============================================================================
trade_registers:
# Netherlands Commercial Register (Handelsregister)
- register_id: "NL-HR"
register_name: "Commercial Register"
register_name_local: "Handelsregister"
register_abbreviation: "HR"
register_type: MIXED
gleif_ra_code: "RA000439"
jurisdiction:
jurisdiction_id: "NL"
jurisdiction_type: NATIONAL
maintained_by:
id: "https://w3id.org/heritage/ra/nl-kvk"
name: "Chamber of Commerce"
abbreviation: "KvK"
website: "https://www.kvk.nl/zoeken/"
api_endpoint: "https://api.kvk.nl/"
identifier_format: "[0-9]{8}"
description: "Netherlands commercial register maintained by KvK, covers all legal entities"
# UK Companies Register
- register_id: "GB-CH"
register_name: "Companies Register"
register_name_local: "Companies Register"
register_abbreviation: "CH"
register_type: COMMERCIAL
gleif_ra_code: "RA000585"
jurisdiction:
jurisdiction_id: "GB"
jurisdiction_type: NATIONAL
maintained_by:
id: "https://w3id.org/heritage/ra/gb-ch"
name: "Companies House"
website: "https://find-and-update.company-information.service.gov.uk/"
api_endpoint: "https://api.company-information.service.gov.uk/"
identifier_format: "[A-Z]{2}[0-9]{6}|[0-9]{8}"
description: "UK companies register - Scotland prefix SC, Northern Ireland prefix NI"
# UK Charity Register (separate from Companies House)
- register_id: "GB-CC"
register_name: "Charity Register"
register_name_local: "Register of Charities"
register_abbreviation: "CC"
register_type: CHARITY
gleif_ra_code: "RA000586"
jurisdiction:
jurisdiction_id: "GB"
jurisdiction_type: NATIONAL
maintained_by:
id: "https://w3id.org/heritage/ra/gb-cc"
name: "Charity Commission"
website: "https://register-of-charities.charitycommission.gov.uk/"
identifier_format: "[0-9]{6,7}"
description: "England and Wales charity register"
# German Commercial Register (Bavaria - Munich court)
- register_id: "DE-HRB-MUC"
register_name: "Commercial Register"
register_name_local: "Handelsregister"
register_abbreviation: "HRB"
register_type: COMMERCIAL
gleif_ra_code: "RA000385"
jurisdiction:
jurisdiction_id: "DE-BY"
jurisdiction_type: SUBNATIONAL
maintained_by:
id: "https://w3id.org/heritage/ra/de-by-muc"
name: "Local Court Munich"
name_local: "Amtsgericht München"
website: "https://www.handelsregister.de/"
identifier_format: "HRB [0-9]+ B"
description: "Munich commercial register (HRB for GmbH/AG, HRA for partnerships)"
# Japan Commercial Registration
- register_id: "JP-CR"
register_name: "Commercial Registration"
register_name_local: "商業登記簿"
register_abbreviation: "CR"
register_type: COMMERCIAL
gleif_ra_code: "RA000429"
jurisdiction:
jurisdiction_id: "JP"
jurisdiction_type: NATIONAL
maintained_by:
id: "https://w3id.org/heritage/ra/jp-lab"
name: "Legal Affairs Bureau"
name_local: "法務局"
website: "https://www.touki-kyoutaku-online.moj.go.jp/"
identifier_format: "[0-9]{4}-[0-9]{2}-[0-9]{6}"
description: "Japan commercial registration system"
# ============================================================================
# EXAMPLE REGISTRATION NUMBERS
# ============================================================================
registration_numbers:
# Rijksmuseum (Netherlands)
- id: "https://w3id.org/heritage/reg/nl/rijksmuseum-kvk"
number: "41215422"
type: "KvK"
trade_register:
register_id: "NL-HR"
register_name: "Commercial Register"
temporal_validity:
begin_of_the_begin: "1885-07-01"
# Still active - no end date
# British Museum (UK) - Companies House
- id: "https://w3id.org/heritage/reg/gb/british-museum-ch"
number: "RC000024"
type: "CRN"
trade_register:
register_id: "GB-CH"
register_name: "Companies Register"
temporal_validity:
begin_of_the_begin: "1963-01-01"
# British Museum (UK) - Charity Commission
- id: "https://w3id.org/heritage/reg/gb/british-museum-cc"
number: "1126962"
type: "CC"
trade_register:
register_id: "GB-CC"
register_name: "Charity Register"
temporal_validity:
begin_of_the_begin: "2008-12-15"
# Deutsches Museum (Germany - Munich)
- id: "https://w3id.org/heritage/reg/de/deutsches-museum-hrb"
number: "HRB 6532 B"
type: "HRB"
trade_register:
register_id: "DE-HRB-MUC"
register_name: "Commercial Register"
temporal_validity:
begin_of_the_begin: "1903-06-28"
# Tokyo National Museum (Japan)
- id: "https://w3id.org/heritage/reg/jp/tokyo-national-museum"
number: "0100-05-123456"
type: "CR"
trade_register:
register_id: "JP-CR"
register_name: "Commercial Registration"
temporal_validity:
begin_of_the_begin: "1872-03-10"
# ============================================================================
# NOTES
# ============================================================================
#
# GLEIF Registration Authorities List (RAL):
# - Contains 1,050+ registration authorities across 232 jurisdictions
# - Each authority has a unique RA code (format: RA followed by 6 digits)
# - Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
#
# Common GLEIF RA Codes for Heritage Institutions:
# - RA000439: Netherlands KvK (Kamer van Koophandel)
# - RA000585: UK Companies House
# - RA000586: UK Charity Commission
# - RA000385: Germany Amtsgericht München
# - RA000429: Japan Legal Affairs Bureau
# - RA000598: USA Delaware Division of Corporations
# - RA000421: Ireland Companies Registration Office
# - RA000287: France Registre du commerce et des sociétés
#
# Ontology Alignment:
# - gleif-ra:BusinessRegistry → TradeRegister
# - gleif-base:RegistrationAuthority → RegistrationAuthority
# - gleif-base:RegistryIdentifier → RegistrationNumber
# - gleif-base:hasLegalJurisdiction → legal_jurisdiction
# - gleif-base:isRegisteredIn → primary_register
# - gleif-base:isManagedBy → maintained_by

View file

@ -22,7 +22,7 @@ description: >-
Inspired by PiCo (Persons in Context) ontology pattern for distinguishing observations from entities.
version: 0.8.9
version: 0.9.0
license: https://creativecommons.org/licenses/by-sa/4.0/
prefixes:
@ -238,6 +238,18 @@ imports:
- modules/classes/FeaturePlace
- modules/classes/DigitalPlatform
- modules/classes/CollectionManagementSystem
# Registration and Jurisdiction classes (NEW - GLEIF alignment)
- modules/classes/Jurisdiction
- modules/classes/TradeRegister
# New slots for registration info
- modules/slots/country
- modules/slots/description
- modules/slots/website
- modules/slots/jurisdiction
- modules/slots/primary_register
- modules/slots/legal_jurisdiction
comments:
- "HYPER-MODULAR STRUCTURE: Direct imports of all component files"
@ -264,6 +276,9 @@ comments:
- "Geographic classes (3): Country (ISO 3166-1), Subregion (ISO 3166-2), Settlement (GeoNames)"
- "Geographic slots (2): subregion, settlement (added to CustodianPlace alongside existing country slot)"
- "Geographic validation: FeatureTypeEnum has dcterms:spatial annotations for 72 country-restricted feature types"
- "Registration modeling (v0.9.0): Jurisdiction (lcc-cr:GeographicRegion), TradeRegister (gleif-ra:BusinessRegistry), enhanced RegistrationInfo with GLEIF alignment"
- "GLEIF integration: Registration Authority List (RAL) codes (RA000439 for KvK, etc.), gleif-base:RegistryIdentifier, gleif-base:RegistrationAuthority"
- "New slots (v0.9.0): country, description, website, jurisdiction for registration and jurisdiction support"
see_also:
- "https://github.com/FICLIT/PiCo"

View file

@ -18,6 +18,10 @@ imports:
- ./LegalForm
- ./LegalName
- ./RegistrationInfo
- ./TradeRegister
- ./Jurisdiction
- ../slots/primary_register
- ../slots/legal_jurisdiction
classes:
@ -88,6 +92,8 @@ classes:
- legal_form
- registration_numbers
- registration_authority
- primary_register
- legal_jurisdiction
- dissolution_date
- temporal_extent
- parent_custodian
@ -199,20 +205,62 @@ classes:
Temporal validity is now captured in RegistrationNumber class.
range: date
registration_authority:
slot_uri: rov:hasRegisteredOrganization
slot_uri: gleif-base:isManagedBy
description: >-
Primary registration authority for this entity.
Links to RegistrationAuthority class.
gleif-base:RegistrationAuthority - "An organization that is responsible for
maintaining a registry and provides registration services."
Examples: Chamber of Commerce, Companies House, Charity Commission.
ROV: hasRegisteredOrganization for registering authority.
range: RegistrationAuthority
examples:
- value:
name: "Kamer van Koophandel"
abbreviation: "KvK"
jurisdiction: "NL"
gleif_ra_code: "RA000439"
description: "Dutch Chamber of Commerce"
primary_register:
slot_uri: gleif-base:isRegisteredIn
description: >-
Primary trade register where this entity is registered.
Links to TradeRegister class.
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
gleif-ra:BusinessRegistry - "a registry for registering and maintaining
information about business entities"
Examples: Netherlands Handelsregister, UK Companies Register, German HRB.
range: TradeRegister
examples:
- value:
register_name: "Commercial Register"
register_name_local: "Handelsregister"
register_abbreviation: "HR"
gleif_ra_code: "RA000439"
description: "Netherlands commercial register"
legal_jurisdiction:
slot_uri: gleif-base:hasLegalJurisdiction
description: >-
Jurisdiction of legal formation and registration.
Links to Jurisdiction class.
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
and registration of the entity"
For most entities, this is the country. For federal systems (USA, Germany),
this may be a state/region.
range: Jurisdiction
examples:
- value:
jurisdiction_id: "NL"
jurisdiction_type: "NATIONAL"
country:
alpha_2: "NL"
alpha_3: "NLD"
description: "Netherlands national jurisdiction"
dissolution_date:
slot_uri: schema:dissolutionDate
description: >-
@ -337,12 +385,33 @@ classes:
registration_numbers:
- number: "41215422"
type: "KvK"
trade_register:
register_id: "NL-HR"
register_name: "Commercial Register"
register_name_local: "Handelsregister"
temporal_validity:
begin_of_the_begin: "1885-07-01"
registration_authority:
name: "Kamer van Koophandel"
name: "Chamber of Commerce"
name_local: "Kamer van Koophandel"
abbreviation: "KvK"
jurisdiction: "NL"
gleif_ra_code: "RA000439"
jurisdiction:
jurisdiction_id: "NL"
jurisdiction_type: "NATIONAL"
primary_register:
register_id: "NL-HR"
register_name: "Commercial Register"
register_name_local: "Handelsregister"
gleif_ra_code: "RA000439"
register_type: "COMMERCIAL"
legal_jurisdiction:
jurisdiction_id: "NL"
jurisdiction_type: "NATIONAL"
country:
alpha_2: "NL"
alpha_3: "NLD"
legal_system_type: "CIVIL_LAW"
legal_status:
status_code: "ACTIVE"
status_name: "Active"

View file

@ -0,0 +1,248 @@
# Jurisdiction Class - Legal/Administrative Jurisdiction
# Represents the geographic area subject to a specific set of laws and governance
#
# GLEIF alignment: gleif-base:hasLegalJurisdiction, gleif-base:hasCoverageArea
# Uses lcc-cr:GeographicRegion (OMG Languages Countries and Codes) from GLEIF
#
# Used for:
# - TradeRegister.jurisdiction: Geographic scope of business register
# - RegistrationAuthority.jurisdiction: Territory where authority operates
# - LegalForm.jurisdiction: Where specific legal forms are valid
#
# Design principle: Jurisdictions are LEGAL boundaries, not just geographic
# A jurisdiction can be a country, subdivision, or supranational region (e.g., EU)
id: https://nde.nl/ontology/hc/class/jurisdiction
name: jurisdiction
title: Jurisdiction Class
prefixes:
linkml: https://w3id.org/linkml/
gleif-base: https://www.gleif.org/ontology/Base/
lcc-cr: https://www.omg.org/spec/LCC/Countries/CountryRepresentation/
schema: http://schema.org/
imports:
- linkml:types
- ../metadata
- ./Country
- ./Subregion
- ./Settlement
- ../slots/country
- ../slots/subregion
- ../slots/settlement
- ../slots/description
classes:
Jurisdiction:
class_uri: lcc-cr:GeographicRegion
description: >-
A distinct area subject to a government and set of laws.
Jurisdictions define the LEGAL scope of registration authorities and trade registers.
Unlike pure geographic classes (Country, Subregion, Settlement), Jurisdiction captures
the legal framework applicable to organizations.
**Ontology Alignment:**
- lcc-cr:GeographicRegion - OMG Languages Countries and Codes (used by GLEIF)
- gleif-base:hasLegalJurisdiction - links entities to their jurisdiction of registration
- gleif-base:hasCoverageArea - geographic scope of registration authorities
**Types of Jurisdiction:**
1. **National Jurisdiction**: Entire country (e.g., Netherlands, Japan)
- Maps to Country class
- Most common for trade registers
2. **Subnational Jurisdiction**: State, province, region (e.g., Bavaria, New York)
- Maps to Subregion class
- Common for federal systems (USA, Germany, Canada, Australia)
3. **Municipal Jurisdiction**: City-level (e.g., City of London, Hong Kong)
- Maps to Settlement class
- For city-states and special administrative regions
4. **Supranational Jurisdiction**: Multi-country (e.g., European Union, OHADA)
- Represented via supranational identifier
- Relevant for EU-wide legal forms (SE, SCE, EEIG)
**GLEIF Integration:**
This class aligns with GLEIF ontology concepts:
- gleif-base:hasLegalJurisdiction - jurisdiction where entity is registered
- gleif-base:hasCoverageArea - geographic scope of registration authority
**Examples:**
- Netherlands (national): authority=KvK, covers all NL legal entities
- Bavaria (subnational): Handelsregister München, covers Bavarian businesses
- Hong Kong (special region): Companies Registry, covers HK companies
- European Union (supranational): European Company (SE) registration
See also:
- TradeRegister: Business registers operating within a jurisdiction
- RegistrationAuthority: Organizations maintaining registers
- LegalForm: Legal forms valid within specific jurisdictions
exact_mappings:
- lcc-cr:GeographicRegion # OMG LCC geographic region
close_mappings:
- schema:AdministrativeArea # Schema.org administrative area
related_mappings:
- gleif-base:hasLegalJurisdiction # GLEIF property for jurisdiction
- gleif-base:hasCoverageArea # GLEIF property for coverage area
slots:
- jurisdiction_id
- jurisdiction_type
- country
- subregion
- settlement
- supranational_code
- gleif_jurisdiction_code
- legal_system_type
- description
slot_usage:
jurisdiction_id:
identifier: true
required: true
description: Unique identifier for this jurisdiction record
jurisdiction_type:
required: true
description: Type of jurisdiction (national, subnational, municipal, supranational)
country:
required: false
description: >-
Country for national/subnational/municipal jurisdictions.
Not applicable for supranational jurisdictions (e.g., EU).
subregion:
required: false
description: >-
Subdivision for subnational jurisdictions.
Required when jurisdiction_type is SUBNATIONAL.
settlement:
required: false
description: >-
Settlement for municipal jurisdictions.
Required when jurisdiction_type is MUNICIPAL.
supranational_code:
required: false
description: >-
Code for supranational entities (EU, OHADA, etc.).
Required when jurisdiction_type is SUPRANATIONAL.
slots:
jurisdiction_id:
description: >-
Unique identifier for this jurisdiction.
Recommended format: ISO-based hierarchical identifier
Examples:
- "NL" (Netherlands national)
- "DE-BY" (Bavaria subnational)
- "HK" (Hong Kong municipal/special region)
- "EU" (European Union supranational)
range: string
slot_uri: schema:identifier
jurisdiction_type:
description: >-
Classification of jurisdiction level.
Values:
- NATIONAL: Country-level jurisdiction (most common)
- SUBNATIONAL: State/province/region level
- MUNICIPAL: City/municipality level
- SUPRANATIONAL: Multi-country jurisdiction (EU, OHADA)
This determines which geographic slot is required:
- NATIONAL: country required
- SUBNATIONAL: country + subregion required
- MUNICIPAL: country + settlement required (subregion optional)
- SUPRANATIONAL: supranational_code required
range: JurisdictionTypeEnum
required: true
slot_uri: schema:additionalType
supranational_code:
description: >-
Code for supranational jurisdiction.
Used for multi-country legal frameworks:
- "EU": European Union (for SE, SCE, EEIG)
- "OHADA": Organisation for the Harmonisation of Business Law in Africa
- "BENELUX": Benelux (for certain cross-border structures)
Only applicable when jurisdiction_type is SUPRANATIONAL.
range: string
slot_uri: schema:identifier
gleif_jurisdiction_code:
description: >-
GLEIF Registration Authority List (RAL) jurisdiction code.
GLEIF maintains a list of 1,050+ registration authorities across 232 jurisdictions.
The jurisdiction code is typically an ISO 3166-1 alpha-2 country code or a
composite code for subnational registries.
Format: ISO 3166-1 alpha-2 or composite (e.g., "NL", "DE", "US-DE" for Delaware)
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
range: string
slot_uri: gleif-base:hasCoverageArea
legal_system_type:
description: >-
Type of legal system in this jurisdiction.
Values:
- CIVIL_LAW: Continental European tradition (Germany, Netherlands, Japan)
- COMMON_LAW: English tradition (UK, USA, Australia)
- MIXED: Combination (South Africa, Louisiana, Quebec)
- RELIGIOUS: Based on religious law (some Middle Eastern jurisdictions)
- CUSTOMARY: Based on local custom
This affects which legal forms are available and how entities are registered.
range: LegalSystemTypeEnum
required: false
slot_uri: schema:category
enums:
JurisdictionTypeEnum:
description: Classification of jurisdiction levels
permissible_values:
NATIONAL:
description: Country-level jurisdiction
SUBNATIONAL:
description: State, province, or region level jurisdiction
MUNICIPAL:
description: City or municipality level jurisdiction
SUPRANATIONAL:
description: Multi-country or international jurisdiction
LegalSystemTypeEnum:
description: Classification of legal systems
permissible_values:
CIVIL_LAW:
description: >-
Continental European legal tradition. Codified law.
Countries: Germany, France, Netherlands, Japan, most of Europe and Latin America.
COMMON_LAW:
description: >-
English legal tradition. Case law and precedent.
Countries: UK, USA, Canada (except Quebec), Australia, India.
MIXED:
description: >-
Combination of civil and common law traditions.
Examples: South Africa, Louisiana, Quebec, Scotland.
RELIGIOUS:
description: >-
Based on religious law (Sharia, Canon Law, etc.).
Examples: Saudi Arabia, Iran, Vatican.
CUSTOMARY:
description: >-
Based on local customs and traditions.
Examples: Many African and Pacific Island nations.

View file

@ -1,5 +1,14 @@
# Registration Information Classes
# Classes for organizational registration details
#
# GLEIF alignment:
# - gleif-base:RegistrationAuthority - Organization maintaining a registry
# - gleif-base:RegistryIdentifier - Identifier associated with a registry entry
# - gleif-ra:RegistrationAuthorityCode - Reference code for registration authority
#
# ROV alignment:
# - rov:registration - Relationship between legal entity and authority
# - rov:RegisteredOrganization - Organization that is legally registered
id: https://nde.nl/ontology/hc/class/RegistrationInfo
name: registration-info-classes
@ -7,28 +16,65 @@ title: Registration Information Classes
description: >-
Classes for capturing organizational registration details including
registration numbers, dates, and authorities.
registration numbers, dates, authorities, and trade registers.
This module provides the foundation for tracking legal entity registrations
across multiple jurisdictions worldwide.
prefixes:
linkml: https://w3id.org/linkml/
rov: http://www.w3.org/ns/regorg#
gleif-base: https://www.gleif.org/ontology/Base/
gleif-ra: https://www.gleif.org/ontology/RegistrationAuthority/
org: http://www.w3.org/ns/org#
schema: http://schema.org/
imports:
- linkml:types
- ../metadata
- ./TimeSpan
- ./Jurisdiction
- ../slots/jurisdiction
- ../slots/description
- ../slots/website
classes:
RegistrationNumber:
class_uri: rov:registration
class_uri: gleif-base:RegistryIdentifier
description: >-
Official registration number assigned by an authority.
Examples: Chamber of Commerce number, charity registration number.
Official registration number assigned by an authority and recorded in a trade register.
Maps to:
- rov:registration (Registered Organizations Vocabulary)
**Ontology Alignment:**
- gleif-base:RegistryIdentifier - "An identifier associated with an entry in a registry,
i.e., one that provides an index to the registry for the recorded item."
- rov:registration - "The registration is a fundamental relationship between a legal
entity and the authority with which it is registered"
**Examples:**
- Dutch KvK number: "41215422" (Rijksmuseum)
- UK Companies House: "RC000024" or "00000224"
- German HRB: "HRB 123456 B" (Berlin)
- Irish charity number: "CHY 4700"
- US EIN: "12-3456789"
**Key Properties:**
- number: The actual registration string
- type: Classification of the registration (KvK, EIN, CHY, etc.)
- trade_register: The register where this number is recorded
- temporal_validity: When this registration was/is valid
See also:
- TradeRegister: The register that issued this number
- RegistrationAuthority: The organization maintaining the register
exact_mappings:
- gleif-base:RegistryIdentifier
close_mappings:
- rov:registration
- schema:identifier
- tooi:organisatieIdentificatie (TOOI for Dutch entities)
attributes:
id:
@ -39,38 +85,100 @@ classes:
required: true
number:
slot_uri: rov:registration
slot_uri: gleif-base:hasTag
description: >-
The actual registration number/code.
Examples: "41215422" (KvK number), "CHY 4700" (Irish charity number)
gleif-base:hasTag - "Has a unique combination of alphanumeric characters
corresponding to the identifier"
Examples:
- "41215422" (KvK number)
- "CHY 4700" (Irish charity number)
- "HRB 123456 B" (German commercial register)
range: string
required: true
type:
slot_uri: schema:additionalType
description: >-
Type of registration number.
Examples: "KvK" (Dutch Chamber of Commerce), "EIN" (US Employer ID)
Type/scheme of registration number.
Examples:
- "KvK" (Dutch Chamber of Commerce)
- "EIN" (US Employer ID)
- "CRN" (UK Company Registration Number)
- "CHY" (Irish Charity Number)
- "HRB" (German Handelsregister B - for GmbH, AG)
- "HRA" (German Handelsregister A - for partnerships)
range: string
required: true
trade_register:
slot_uri: gleif-base:isRegisteredIn
description: >-
The trade register where this number is recorded.
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
Links to TradeRegister class.
range: TradeRegister
required: false
temporal_validity:
slot_uri: schema:temporalCoverage
description: >-
Time period during which this registration number is/was valid.
Captures registration date through deregistration (if applicable).
Uses TimeSpan class for fuzzy temporal boundaries.
range: TimeSpan
required: true
RegistrationAuthority:
class_uri: rov:hasRegisteredOrganization
class_uri: gleif-base:RegistrationAuthority
description: >-
Authority that maintains official registrations of organizations.
Examples: Chamber of Commerce, Charity Commission, Companies House.
Maps to:
- rov:RegisteredOrganization (the registering authority)
**Ontology Alignment:**
- gleif-base:RegistrationAuthority - "An organization that is responsible for
maintaining a registry and provides registration services."
A RegistrationAuthority is the **organization** that maintains one or more
trade registers, distinct from the TradeRegister itself (the database/system).
**Key Distinction:**
- RegistrationAuthority: The organization (e.g., "Kamer van Koophandel", "Companies House")
- TradeRegister: The register/database (e.g., "Handelsregister", "Companies Register")
**Examples:**
- Netherlands: Kamer van Koophandel (KvK) - GLEIF RA000439
- UK: Companies House - GLEIF RA000585
- Germany: Amtsgericht München (local court) - GLEIF RA000385
- Japan: Legal Affairs Bureau (法務局) - GLEIF RA000429
- Ireland: Companies Registration Office (CRO) - GLEIF RA000421
**GLEIF Integration:**
GLEIF maintains the Registration Authorities List (RAL) with 1,050+ authorities.
Each authority has a unique RA code (format: RA followed by 6 digits).
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
See also:
- TradeRegister: Registers maintained by this authority
- Jurisdiction: Geographic/legal scope of the authority
- RegistrationNumber: Numbers issued through this authority's registers
exact_mappings:
- gleif-base:RegistrationAuthority
close_mappings:
- org:Organization
- schema:GovernmentOrganization
related_mappings:
- rov:hasRegisteredOrganization
attributes:
id:
@ -81,31 +189,93 @@ classes:
required: true
name:
slot_uri: schema:name
slot_uri: gleif-base:hasNameTranslatedEnglish
description: >-
Official name of the registration authority.
Examples: "Kamer van Koophandel", "Companies House", "IRS"
Official name of the registration authority in English.
gleif-base:hasNameTranslatedEnglish - "The name used to refer to a person
or organization, translated into English."
Examples:
- "Chamber of Commerce" (Netherlands)
- "Companies House" (UK)
- "Legal Affairs Bureau" (Japan)
range: string
required: true
name_local:
slot_uri: gleif-base:hasNameLegalLocal
description: >-
Official name in local language.
gleif-base:hasNameLegalLocal - "The name used to refer to an person or
organization in legal communications in local alphabet"
Examples:
- "Kamer van Koophandel" (Dutch)
- "法務局" (Japanese)
- "Amtsgericht" (German)
range: string
abbreviation:
slot_uri: schema:alternateName
slot_uri: gleif-base:hasAbbreviationLocal
description: >-
Common abbreviation.
Examples: "KvK", "CH", "IRS"
gleif-base:hasAbbreviationLocal - "An abbreviation using a language local
to the entity identified"
Examples: "KvK", "CH", "CRO"
range: string
jurisdiction:
slot_uri: schema:areaServed
slot_uri: gleif-base:hasCoverageArea
description: >-
Geographic jurisdiction of the authority.
Usually a country code, may be regional for federal systems.
range: string
Geographic/legal jurisdiction of the authority.
gleif-base:hasCoverageArea - "Indicates a geographic region in which some
service is provided, or to which some policy applies"
Links to Jurisdiction class.
range: Jurisdiction
required: true
gleif_ra_code:
slot_uri: schema:identifier
description: >-
GLEIF Registration Authority code.
Format: "RA" followed by 6 digits
Examples:
- RA000439: Netherlands KvK
- RA000585: UK Companies House
- RA000385: Germany Amtsgericht München
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
range: string
pattern: "^RA[0-9]{6}$"
registers:
slot_uri: gleif-base:isManagedBy
description: >-
Trade registers maintained by this authority.
Inverse of TradeRegister.maintained_by.
Examples:
- KvK maintains: Handelsregister
- Companies House maintains: Companies Register, LLP Register
range: TradeRegister
multivalued: true
inlined: false
website:
slot_uri: schema:url
description: Official website of the registration authority
slot_uri: gleif-base:hasWebsite
description: >-
Official website of the registration authority.
gleif-base:hasWebsite - "A website associated with something"
range: uri
registration_types:
@ -163,11 +333,24 @@ classes:
range: string
LegalStatus:
class_uri: schema:status
class_uri: gleif-base:RegistrationStatus
description: >-
Legal status of an organization (active, dissolved, suspended, etc.).
**Ontology Alignment:**
- gleif-base:RegistrationStatus - "A lifecycle stage indicating the status of a
given registration of something, such as a business or legal entity."
- gleif-base:EntityStatus - ACTIVE or INACTIVE (GLEIF enumeration)
Status definitions vary by jurisdiction and legal framework.
exact_mappings:
- gleif-base:RegistrationStatus
close_mappings:
- gleif-base:EntityStatus
- schema:status
attributes:
id:
identifier: true
@ -177,10 +360,16 @@ classes:
required: true
status_code:
slot_uri: schema:codeValue
slot_uri: gleif-base:hasTag
description: >-
Standardized status code.
gleif-base:hasTag - "Has a unique combination of alphanumeric characters
corresponding to the identifier"
Examples: "ACTIVE", "DISSOLVED", "SUSPENDED", "MERGED"
GLEIF defines: ACTIVE, INACTIVE
range: string
required: true
pattern: "^[A-Z_]+$"
@ -203,6 +392,10 @@ classes:
required: true
jurisdiction:
slot_uri: schema:legislationJurisdiction
description: Legal jurisdiction where this status is defined
range: string
slot_uri: gleif-base:hasLegalJurisdiction
description: >-
Legal jurisdiction where this status is defined.
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
and registration of the entity"
range: Jurisdiction

View file

@ -16,6 +16,8 @@ imports:
- linkml:types
- Country
- Subregion
- ../slots/country
- ../slots/subregion
classes:
Settlement:

View file

@ -0,0 +1,303 @@
# TradeRegister Class - Business/Commercial Register
# Official register where organizations are formally registered
#
# GLEIF alignment: gleif-ra:BusinessRegistry, gleif-base:Registry
#
# Used for:
# - RegistrationNumber.trade_register: Which register issued the number
# - CustodianLegalStatus.primary_register: Where entity is registered
#
# GLEIF RAL (Registration Authorities List) contains 1,050+ registers in 232 jurisdictions
# See: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
id: https://nde.nl/ontology/hc/class/trade-register
name: trade-register
title: TradeRegister Class
prefixes:
linkml: https://w3id.org/linkml/
gleif-ra: https://www.gleif.org/ontology/RegistrationAuthority/
gleif-base: https://www.gleif.org/ontology/Base/
schema: http://schema.org/
imports:
- linkml:types
- ../metadata
- ./Jurisdiction
- ../slots/jurisdiction
- ../slots/description
- ../slots/website
classes:
TradeRegister:
class_uri: gleif-ra:BusinessRegistry
description: >-
Official business or trade register where organizations are formally registered.
A TradeRegister is the **register itself** (the database/system), distinct from
the RegistrationAuthority (the organization maintaining it). In many jurisdictions
these are tightly coupled (KvK maintains Handelsregister), but conceptually separate.
**Ontology Alignment:**
- gleif-ra:BusinessRegistry - "a registry for registering and maintaining information
about business entities" (subclass of gleif-base:Registry)
- gleif-base:Registry - "A system, typically an information system, that records
the registration of items"
**Key Distinction:**
- TradeRegister: The register/database (e.g., "Handelsregister", "Companies Register")
- RegistrationAuthority: The organization (e.g., "Kamer van Koophandel", "Companies House")
**GLEIF Integration:**
GLEIF maintains the Registration Authorities List (RAL) with 1,050+ business registers
across 232 jurisdictions worldwide. Each register has a unique RA code.
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
**Examples by Country:**
**Netherlands (NL)**:
- Handelsregister (HR) - General commercial register
- Maintained by: Kamer van Koophandel (KvK)
- GLEIF RA Code: RA000439
**Germany (DE)**:
- Handelsregister (HRB/HRA) - Commercial register
- Maintained by: Local courts (Amtsgericht)
- GLEIF RA Codes: RA000385 (Munich), RA000386 (Hamburg), etc.
**United Kingdom (GB)**:
- Companies Register
- Maintained by: Companies House
- GLEIF RA Code: RA000585
**United States (US)**:
- State-level registers (e.g., Delaware Division of Corporations)
- GLEIF RA Codes: RA000598 (Delaware), RA000658 (New York), etc.
**Japan (JP)**:
- 商業登記簿 (Commercial Registration Book)
- Maintained by: Legal Affairs Bureau (法務局)
- GLEIF RA Code: RA000429
**Register Types:**
1. **Commercial Register**: For-profit businesses (GmbH, BV, Ltd, Inc)
2. **Foundation Register**: Non-profit foundations (Stichting, Stiftung)
3. **Association Register**: Voluntary associations (Vereniging, Verein)
4. **Charity Register**: Registered charities (UK Charity Commission)
5. **Cultural Register**: Heritage institutions (some countries)
See also:
- Jurisdiction: Geographic/legal scope of the register
- RegistrationAuthority: Organization maintaining the register
- RegistrationNumber: Individual registration issued by this register
exact_mappings:
- gleif-ra:BusinessRegistry # GLEIF business registry class
close_mappings:
- gleif-base:Registry # GLEIF base registry class
- schema:GovernmentService # Registers are government services
related_mappings:
- rov:RegisteredOrganization # Organizations registered in these registers
slots:
- register_id
- register_name
- register_name_local
- register_abbreviation
- register_type
- jurisdiction
- maintained_by
- gleif_ra_code
- website
- api_endpoint
- identifier_format
- description
slot_usage:
register_id:
identifier: true
required: true
description: Unique identifier for this register
register_name:
required: true
description: Register name in English
register_name_local:
required: false
description: Register name in local language (e.g., "Handelsregister")
register_abbreviation:
required: false
description: Common abbreviation (e.g., "HR", "KvK", "CH")
register_type:
required: true
description: Type of organizations this register handles
jurisdiction:
required: true
description: Jurisdiction where this register operates
maintained_by:
required: true
description: Registration authority that maintains this register
gleif_ra_code:
required: false
description: >-
GLEIF Registration Authority code (if in GLEIF RAL).
Format: RA followed by 6 digits (e.g., RA000439)
slots:
register_id:
description: >-
Unique identifier for this trade register.
Recommended format: {country_code}-{register_abbreviation}
Examples:
- "NL-HR" (Netherlands Handelsregister)
- "DE-HRB-MUC" (Germany HRB Munich)
- "GB-CH" (UK Companies House)
- "US-DE" (Delaware Division of Corporations)
range: string
slot_uri: schema:identifier
register_name:
description: >-
Official name of the trade register in English.
Examples:
- "Commercial Register" (Netherlands, Germany)
- "Companies Register" (UK)
- "Division of Corporations" (Delaware)
- "Legal Affairs Bureau Commercial Registration" (Japan)
range: string
required: true
slot_uri: gleif-base:hasNameTranslatedEnglish
register_name_local:
description: >-
Official name in local language.
Examples:
- "Handelsregister" (Netherlands, Germany)
- "Registre du commerce et des sociétés" (France)
- "商業登記簿" (Japan)
- "Торговый реестр" (Russia)
range: string
slot_uri: gleif-base:hasNameLegalLocal
register_abbreviation:
description: >-
Common abbreviation for the register.
Examples:
- "HR" (Handelsregister)
- "KvK" (Kamer van Koophandel - technically the authority, but commonly used)
- "CH" (Companies House)
- "RCS" (Registre du commerce et des sociétés)
range: string
slot_uri: gleif-base:hasAbbreviationLocal
register_type:
description: >-
Type of organizations this register handles.
Many jurisdictions have separate registers for different entity types.
Values:
- COMMERCIAL: For-profit businesses (default)
- FOUNDATION: Non-profit foundations
- ASSOCIATION: Voluntary associations
- CHARITY: Registered charities
- CULTURAL: Heritage/cultural institutions
- MIXED: Multiple entity types in one register
range: RegisterTypeEnum
required: true
slot_uri: schema:category
maintained_by:
description: >-
Registration authority that maintains this register.
Links to RegistrationAuthority class.
Examples:
- KvK (Kamer van Koophandel) maintains NL Handelsregister
- Companies House maintains UK Companies Register
- Local courts (Amtsgericht) maintain German Handelsregister
range: RegistrationAuthority
required: true
slot_uri: gleif-base:isManagedBy
gleif_ra_code:
description: >-
GLEIF Registration Authority List (RAL) code.
GLEIF maintains a comprehensive list of 1,050+ registration authorities
and their registers worldwide. Each has a unique RA code.
Format: "RA" followed by 6 digits
Examples:
- RA000439: Netherlands Kamer van Koophandel
- RA000585: UK Companies House
- RA000385: Germany Handelsregister München
- RA000429: Japan Legal Affairs Bureau
Reference: https://www.gleif.org/en/about-lei/code-lists/registration-authorities-list
range: string
pattern: "^RA[0-9]{6}$"
identifier_format:
description: >-
Format specification for registration numbers issued by this register.
Use regex pattern or description.
Examples:
- NL KvK: "[0-9]{8}" (8 digits)
- UK Companies House: "[A-Z]{2}[0-9]{6}|[0-9]{8}" (2 letters + 6 digits OR 8 digits)
- German HRB: "HRB [0-9]+" (HRB prefix + number)
range: string
slot_uri: schema:valuePattern
api_endpoint:
description: >-
URL of public API endpoint for querying this register.
Examples:
- KvK API: https://api.kvk.nl/
- Companies House API: https://api.company-information.service.gov.uk/
Many registers offer open data APIs for entity lookups.
range: uri
slot_uri: gleif-base:hasWebsite
enums:
RegisterTypeEnum:
description: Types of trade registers by entity category
permissible_values:
COMMERCIAL:
description: >-
Register for commercial/for-profit entities.
Examples: GmbH (DE), BV (NL), Ltd (UK), Inc (US)
FOUNDATION:
description: >-
Register for non-profit foundations.
Examples: Stichting (NL), Stiftung (DE)
ASSOCIATION:
description: >-
Register for voluntary associations.
Examples: Vereniging (NL), Verein (DE), e.V.
CHARITY:
description: >-
Register for charitable organizations.
Examples: UK registered charities
CULTURAL:
description: >-
Specialized register for cultural/heritage institutions.
Some countries maintain separate heritage registries.
MIXED:
description: >-
Single register handling multiple entity types.
Common in smaller jurisdictions.

View file

@ -0,0 +1,37 @@
# country slot - ISO 3166-1 country reference
id: https://nde.nl/ontology/hc/slot/country
name: country
title: Country Slot
description: >-
Country where entity is located or operates.
Links to Country class with ISO 3166-1 alpha-2 codes.
Format: ISO 3166-1 alpha-2 code (e.g., "NL", "DE", "JP")
Use when:
- Place is in a specific country
- Legal form is jurisdiction-specific
- Feature types are country-specific
Examples:
- Netherlands museum → country.alpha_2 = "NL"
- Japanese archive → country.alpha_2 = "JP"
- German foundation → country.alpha_2 = "DE"
slots:
country:
slot_uri: schema:addressCountry
range: Country
required: false
multivalued: false
description: >-
Country where entity is located or operates.
Links to Country class with ISO 3166-1 alpha-2 codes.
comments:
- "Uses Country class with ISO 3166-1 alpha-2/alpha-3 codes"
- "ISO codes are authoritative, stable, and language-neutral"
- "Country names should be resolved via external services (GeoNames, UN M49)"

View file

@ -0,0 +1,30 @@
# description slot - General text description
id: https://nde.nl/ontology/hc/slot/description
name: description
title: Description Slot
description: >-
General text description of an entity.
Used across multiple classes for providing human-readable descriptions
of entities, their purpose, and characteristics.
slots:
description:
slot_uri: schema:description
range: string
required: false
multivalued: false
description: >-
Human-readable description of the entity, its purpose, or characteristics.
Examples:
- Jurisdiction: "Bavaria is a federal state in southern Germany with its own commercial register system"
- Register: "The Handelsregister is the German commercial register maintained by local courts"
- Authority: "The Kamer van Koophandel is the Dutch Chamber of Commerce"
comments:
- "Maps to schema:description"
- "Should be concise but informative"
- "Language should match the context (typically English for international use)"

View file

@ -0,0 +1,45 @@
# jurisdiction slot - Legal jurisdiction reference
id: https://nde.nl/ontology/hc/slot/jurisdiction
name: jurisdiction
title: Jurisdiction Slot
prefixes:
gleif-base: https://www.gleif.org/ontology/Base/
lcc-cr: https://www.omg.org/spec/LCC/Countries/CountryRepresentation/
schema: http://schema.org/
description: >-
Legal/administrative jurisdiction where an entity operates or is registered.
Links to Jurisdiction class which captures:
- National jurisdictions (e.g., Netherlands, Japan)
- Subnational jurisdictions (e.g., Bavaria, Delaware)
- Municipal jurisdictions (e.g., Hong Kong, City of London)
- Supranational jurisdictions (e.g., European Union, OHADA)
GLEIF alignment:
- gleif-base:hasLegalJurisdiction - jurisdiction of legal formation
- gleif-base:hasCoverageArea - geographic scope of authority/register
slots:
jurisdiction:
slot_uri: gleif-base:hasCoverageArea
range: Jurisdiction
required: false
multivalued: false
description: >-
Legal/administrative jurisdiction where entity operates or is registered.
gleif-base:hasCoverageArea - "Indicates a geographic region in which
some service is provided, or to which some policy applies"
Examples:
- Netherlands (national): KvK jurisdiction
- Bavaria (subnational): Amtsgericht München jurisdiction
- European Union (supranational): SE registration jurisdiction
comments:
- "Maps to gleif-base:hasCoverageArea"
- "Jurisdictions are LEGAL boundaries, not just geographic"
- "Used for registration authorities, trade registers, and legal forms"

View file

@ -0,0 +1,42 @@
# legal_jurisdiction slot - Legal jurisdiction of formation/registration
id: https://nde.nl/ontology/hc/slot/legal_jurisdiction
name: legal_jurisdiction
title: Legal Jurisdiction Slot
prefixes:
gleif-base: https://www.gleif.org/ontology/Base/
schema: http://schema.org/
description: >-
Jurisdiction of legal formation and registration.
Links to Jurisdiction class.
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
and registration of the entity"
For most entities, this is the country. For federal systems (USA, Germany),
this may be a state/region.
slots:
legal_jurisdiction:
slot_uri: gleif-base:hasLegalJurisdiction
range: Jurisdiction
required: false
multivalued: false
description: >-
Jurisdiction of legal formation and registration.
gleif-base:hasLegalJurisdiction - "The jurisdiction of legal formation
and registration of the entity"
Examples:
- NL (Netherlands national)
- DE-BY (Bavaria subnational for German entities)
- US-DE (Delaware for US corporations)
comments:
- "Maps to gleif-base:hasLegalJurisdiction"
- "Jurisdictions are LEGAL boundaries, not just geographic"
- "For federal systems, may be state/region level"

View file

@ -0,0 +1,44 @@
# primary_register slot - Primary trade register reference
id: https://nde.nl/ontology/hc/slot/primary_register
name: primary_register
title: Primary Register Slot
prefixes:
gleif-base: https://www.gleif.org/ontology/Base/
gleif-ra: https://www.gleif.org/ontology/RegistrationAuthority/
schema: http://schema.org/
description: >-
Primary trade register where an entity is registered.
Links to TradeRegister class.
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
gleif-ra:BusinessRegistry - "a registry for registering and maintaining
information about business entities"
Used for:
- CustodianLegalStatus: Primary register where entity is registered
- RegistrationNumber: Register that issued the number
slots:
primary_register:
slot_uri: gleif-base:isRegisteredIn
range: TradeRegister
required: false
multivalued: false
description: >-
Primary trade register where entity is registered.
gleif-base:isRegisteredIn - "indicates the registry that something is registered in"
Examples:
- Netherlands Handelsregister (HR)
- UK Companies Register
- German Handelsregister (HRB/HRA)
comments:
- "Maps to gleif-base:isRegisteredIn"
- "Links to TradeRegister class (the register/database)"
- "Distinct from RegistrationAuthority (the organization maintaining the register)"

View file

@ -0,0 +1,38 @@
# website slot - Official website URL
id: https://nde.nl/ontology/hc/slot/website
name: website
title: Website Slot
prefixes:
gleif-base: https://www.gleif.org/ontology/Base/
schema: http://schema.org/
description: >-
Official website URL of an organization or entity.
Used for:
- Trade registers (e.g., https://www.kvk.nl/)
- Registration authorities (e.g., https://www.companieshouse.gov.uk/)
- Heritage institutions
slots:
website:
slot_uri: gleif-base:hasWebsite
range: uri
required: false
multivalued: false
description: >-
Official website URL of the organization or entity.
gleif-base:hasWebsite - "A website associated with something"
Examples:
- https://www.kvk.nl/ (Dutch KvK)
- https://www.companieshouse.gov.uk/ (UK Companies House)
- https://www.rijksmuseum.nl/ (Rijksmuseum)
comments:
- "Maps to gleif-base:hasWebsite and schema:url"
- "Should be the official/canonical website URL"
- "Use https:// when available"

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,493 @@
#!/usr/bin/env python3
"""
Enrich NDE Register NL entries with Wikidata data.
This script reads the NDE Register YAML file, fetches comprehensive data from Wikidata
for entries that have a 'wikidata_id' field, and creates an enriched YAML file with
all available Wikidata properties.
The script uses the Wikibase REST API and SPARQL endpoints to maximize data retrieval
while respecting rate limits.
Usage:
python scripts/enrich_nde_from_wikidata.py
Environment Variables:
WIKIDATA_API_TOKEN - Optional OAuth2 token for increased rate limits (5,000 req/hr)
WIKIMEDIA_CONTACT_EMAIL - Contact email for User-Agent (required by Wikimedia policy)
Output:
data/nde/nde_register_nl_enriched_{timestamp}.yaml
"""
import os
import sys
import time
import json
import yaml
import httpx
from pathlib import Path
from datetime import datetime, timezone
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, field, asdict
import logging
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Configuration
WIKIDATA_REST_API = "https://www.wikidata.org/w/rest.php/wikibase/v1"
WIKIDATA_ACTION_API = "https://www.wikidata.org/w/api.php"
SPARQL_URL = "https://query.wikidata.org/sparql"
# Rate limiting: 500 req/hr for anonymous, 5000 req/hr with token
WIKIDATA_API_TOKEN = os.getenv("WIKIDATA_API_TOKEN", "")
WIKIMEDIA_CONTACT_EMAIL = os.getenv("WIKIMEDIA_CONTACT_EMAIL", "glam-data@example.com")
USER_AGENT = f"GLAMDataExtractor/1.0 ({WIKIMEDIA_CONTACT_EMAIL})"
# Request delay based on authentication status
if WIKIDATA_API_TOKEN:
REQUEST_DELAY = 0.75 # ~4800 requests per hour (below 5000 limit)
logger.info("Using authenticated mode: 5,000 req/hr limit")
else:
REQUEST_DELAY = 7.5 # ~480 requests per hour (below 500 limit)
logger.info("Using anonymous mode: 500 req/hr limit")
# Headers
HEADERS = {
"Accept": "application/json",
"User-Agent": USER_AGENT,
}
if WIKIDATA_API_TOKEN:
HEADERS["Authorization"] = f"Bearer {WIKIDATA_API_TOKEN}"
@dataclass
class WikidataEnrichment:
"""Container for all Wikidata data extracted for an entity."""
entity_id: str
labels: Dict[str, str] = field(default_factory=dict)
descriptions: Dict[str, str] = field(default_factory=dict)
aliases: Dict[str, List[str]] = field(default_factory=dict)
sitelinks: Dict[str, str] = field(default_factory=dict)
claims: Dict[str, Any] = field(default_factory=dict)
identifiers: Dict[str, str] = field(default_factory=dict)
instance_of: List[Dict[str, str]] = field(default_factory=list)
country: Optional[Dict[str, str]] = None
location: Optional[Dict[str, str]] = None
coordinates: Optional[Dict[str, float]] = None
inception: Optional[str] = None
dissolution: Optional[str] = None
official_website: Optional[str] = None
image: Optional[str] = None
logo: Optional[str] = None
fetch_timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
# Property IDs for heritage institutions
PROPERTY_LABELS = {
# Core properties
"P31": "instance_of", # Instance of (type)
"P17": "country", # Country
"P131": "located_in", # Located in administrative territory
"P625": "coordinates", # Coordinate location
"P571": "inception", # Date founded
"P576": "dissolution", # Date dissolved
"P856": "official_website", # Official website
"P18": "image", # Image
"P154": "logo", # Logo
# Identifiers
"P791": "isil", # ISIL code
"P214": "viaf", # VIAF ID
"P227": "gnd", # GND ID
"P244": "lcnaf", # Library of Congress ID
"P268": "bnf", # BnF ID
"P269": "idref", # IdRef ID
"P213": "isni", # ISNI
"P1566": "geonames", # GeoNames ID
"P2427": "grid", # GRID ID
"P3500": "ringgold", # Ringgold ID
"P5785": "museofile", # Museofile ID (France)
"P8168": "factgrid", # FactGrid ID
# Cultural heritage specific
"P361": "part_of", # Part of
"P355": "subsidiaries", # Subsidiaries
"P749": "parent_org", # Parent organization
"P127": "owned_by", # Owned by
"P1037": "director", # Director/manager
"P159": "headquarters", # Headquarters location
"P463": "member_of", # Member of
"P1435": "heritage_status", # Heritage designation
"P910": "topic_category", # Topic's main category
"P373": "commons_category", # Commons category
# Additional metadata
"P2044": "elevation", # Elevation
"P6375": "street_address", # Street address
"P281": "postal_code", # Postal code
"P1329": "phone", # Phone number
"P968": "email", # Email
"P973": "described_at_url", # Described at URL
"P8402": "kvk_number", # KvK number (Dutch Chamber of Commerce)
}
def fetch_entity_data(entity_id: str, client: httpx.Client) -> Optional[Dict]:
"""
Fetch full entity data from Wikibase REST API.
Args:
entity_id: Wikidata Q-number (e.g., "Q22246632")
client: HTTP client for making requests
Returns:
Full entity data as dictionary, or None on error
"""
url = f"{WIKIDATA_REST_API}/entities/items/{entity_id}"
try:
response = client.get(url, headers=HEADERS)
# Handle OAuth errors (retry without auth)
if response.status_code == 403:
headers_no_auth = {k: v for k, v in HEADERS.items() if k != "Authorization"}
response = client.get(url, headers=headers_no_auth)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
logger.warning(f"Entity {entity_id} not found")
else:
logger.error(f"HTTP error fetching {entity_id}: {e}")
return None
except Exception as e:
logger.error(f"Error fetching {entity_id}: {e}")
return None
def extract_value_from_statement(statement: Dict) -> Any:
"""Extract the value from a Wikidata statement structure."""
try:
value_data = statement.get("value", {})
value_type = value_data.get("type")
content = value_data.get("content")
if value_type == "value":
# Simple string/number values
return content
elif isinstance(content, dict):
if "entity-type" in content or "id" in content:
# Entity reference
return content.get("id", content)
elif "time" in content:
# Time value
return content.get("time")
elif "latitude" in content and "longitude" in content:
# Coordinates
return {
"latitude": content.get("latitude"),
"longitude": content.get("longitude"),
"precision": content.get("precision")
}
else:
return content
else:
return content
except Exception:
return None
def parse_entity_data(entity_id: str, data: Dict) -> WikidataEnrichment:
"""
Parse the full entity data into a WikidataEnrichment object.
Args:
entity_id: The Wikidata entity ID
data: Raw API response data
Returns:
WikidataEnrichment object with all extracted data
"""
enrichment = WikidataEnrichment(entity_id=entity_id)
# Extract labels
labels = data.get("labels", {})
enrichment.labels = labels
# Extract descriptions
descriptions = data.get("descriptions", {})
enrichment.descriptions = descriptions
# Extract aliases
aliases_raw = data.get("aliases", {})
enrichment.aliases = aliases_raw
# Extract sitelinks
sitelinks = data.get("sitelinks", {})
for site, link_data in sitelinks.items():
if isinstance(link_data, dict):
enrichment.sitelinks[site] = link_data.get("title", link_data)
else:
enrichment.sitelinks[site] = link_data
# Extract statements/claims
statements = data.get("statements", {})
for prop_id, prop_statements in statements.items():
prop_name = PROPERTY_LABELS.get(prop_id, prop_id)
if not prop_statements:
continue
# Extract first value (or all values for multi-value properties)
values = []
for stmt in prop_statements:
value = extract_value_from_statement(stmt)
if value is not None:
values.append(value)
if not values:
continue
# Handle specific properties
if prop_id == "P31": # Instance of
enrichment.instance_of = [{"id": v} if isinstance(v, str) else v for v in values]
elif prop_id == "P17": # Country
enrichment.country = {"id": values[0]} if values else None
elif prop_id == "P131": # Located in
enrichment.location = {"id": values[0]} if values else None
elif prop_id == "P625": # Coordinates
if values and isinstance(values[0], dict):
enrichment.coordinates = values[0]
elif prop_id == "P571": # Inception
enrichment.inception = values[0] if values else None
elif prop_id == "P576": # Dissolution
enrichment.dissolution = values[0] if values else None
elif prop_id == "P856": # Official website
enrichment.official_website = values[0] if values else None
elif prop_id == "P18": # Image
enrichment.image = values[0] if values else None
elif prop_id == "P154": # Logo
enrichment.logo = values[0] if values else None
elif prop_id in ["P791", "P214", "P227", "P244", "P268", "P269",
"P213", "P1566", "P2427", "P3500", "P5785", "P8168", "P8402"]:
# Identifiers
enrichment.identifiers[prop_name] = values[0] if values else None
else:
# Store other claims
enrichment.claims[prop_name] = values[0] if len(values) == 1 else values
return enrichment
def enrich_entity(entity_id: str, client: httpx.Client) -> Optional[WikidataEnrichment]:
"""
Fetch and enrich a single entity from Wikidata.
Args:
entity_id: Wikidata Q-number (e.g., "Q22246632")
client: HTTP client for requests
Returns:
WikidataEnrichment object or None on error
"""
# Ensure proper Q-number format
if not entity_id.startswith("Q"):
entity_id = f"Q{entity_id}"
data = fetch_entity_data(entity_id, client)
if data is None:
return None
return parse_entity_data(entity_id, data)
def enrichment_to_dict(enrichment: WikidataEnrichment) -> Dict:
"""Convert WikidataEnrichment to a clean dictionary for YAML output."""
result = {
"wikidata_entity_id": enrichment.entity_id,
"wikidata_fetch_timestamp": enrichment.fetch_timestamp,
}
# Add labels (prioritize nl, en)
if enrichment.labels:
result["wikidata_labels"] = enrichment.labels
# Add convenient primary label
if "nl" in enrichment.labels:
result["wikidata_label_nl"] = enrichment.labels["nl"]
if "en" in enrichment.labels:
result["wikidata_label_en"] = enrichment.labels["en"]
# Add descriptions
if enrichment.descriptions:
result["wikidata_descriptions"] = enrichment.descriptions
if "nl" in enrichment.descriptions:
result["wikidata_description_nl"] = enrichment.descriptions["nl"]
if "en" in enrichment.descriptions:
result["wikidata_description_en"] = enrichment.descriptions["en"]
# Add aliases
if enrichment.aliases:
result["wikidata_aliases"] = enrichment.aliases
# Add identifiers
if enrichment.identifiers:
result["wikidata_identifiers"] = {k: v for k, v in enrichment.identifiers.items() if v}
# Add instance types
if enrichment.instance_of:
result["wikidata_instance_of"] = enrichment.instance_of
# Add location data
if enrichment.country:
result["wikidata_country"] = enrichment.country
if enrichment.location:
result["wikidata_located_in"] = enrichment.location
if enrichment.coordinates:
result["wikidata_coordinates"] = enrichment.coordinates
# Add temporal data
if enrichment.inception:
result["wikidata_inception"] = enrichment.inception
if enrichment.dissolution:
result["wikidata_dissolution"] = enrichment.dissolution
# Add web presence
if enrichment.official_website:
result["wikidata_official_website"] = enrichment.official_website
# Add media
if enrichment.image:
result["wikidata_image"] = enrichment.image
if enrichment.logo:
result["wikidata_logo"] = enrichment.logo
# Add sitelinks (Wikipedia links)
if enrichment.sitelinks:
result["wikidata_sitelinks"] = enrichment.sitelinks
# Add other claims
if enrichment.claims:
result["wikidata_claims"] = enrichment.claims
return result
def main():
"""Main entry point."""
# Paths
script_dir = Path(__file__).parent
data_dir = script_dir.parent / "data" / "nde"
input_file = data_dir / "nde_register_nl.yaml"
# Generate timestamp for output file
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
output_file = data_dir / f"nde_register_nl_enriched_{timestamp}.yaml"
logger.info(f"Input file: {input_file}")
logger.info(f"Output file: {output_file}")
# Load input YAML
logger.info("Loading input YAML file...")
with open(input_file, 'r', encoding='utf-8') as f:
entries = yaml.safe_load(f)
total_entries = len(entries)
logger.info(f"Loaded {total_entries} entries")
# Count entries with wikidata_id
entries_with_wikidata = [e for e in entries if e.get("wikidata_id")]
logger.info(f"Found {len(entries_with_wikidata)} entries with wikidata_id")
# Process entries
enriched_entries = []
success_count = 0
skip_count = 0
error_count = 0
with httpx.Client(timeout=30.0) as client:
for i, entry in enumerate(entries):
wikidata_id = entry.get("wikidata_id")
org_name = entry.get("organisatie", "Unknown")
if not wikidata_id:
# Keep entry as-is, skip enrichment
enriched_entries.append(entry)
skip_count += 1
continue
# Log progress
logger.info(f"[{i+1}/{total_entries}] Enriching: {org_name} ({wikidata_id})")
# Fetch and enrich
try:
enrichment = enrich_entity(str(wikidata_id), client)
if enrichment:
# Merge enrichment data with original entry
enriched_entry = dict(entry)
enriched_entry["wikidata_enrichment"] = enrichment_to_dict(enrichment)
enriched_entries.append(enriched_entry)
success_count += 1
else:
# Keep original entry on error
entry["wikidata_enrichment_error"] = "Failed to fetch from Wikidata"
enriched_entries.append(entry)
error_count += 1
except Exception as e:
logger.error(f"Error processing {org_name}: {e}")
entry["wikidata_enrichment_error"] = str(e)
enriched_entries.append(entry)
error_count += 1
# Rate limiting
time.sleep(REQUEST_DELAY)
# Write output
logger.info(f"Writing enriched data to {output_file}...")
with open(output_file, 'w', encoding='utf-8') as f:
yaml.dump(enriched_entries, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
# Summary
logger.info("=" * 60)
logger.info("ENRICHMENT COMPLETE")
logger.info("=" * 60)
logger.info(f"Total entries: {total_entries}")
logger.info(f"Entries with wikidata_id: {len(entries_with_wikidata)}")
logger.info(f"Successfully enriched: {success_count}")
logger.info(f"Skipped (no wikidata_id): {skip_count}")
logger.info(f"Errors: {error_count}")
logger.info(f"Output file: {output_file}")
# Create log file
log_file = data_dir / f"enrichment_log_{timestamp}.json"
log_data = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"input_file": str(input_file),
"output_file": str(output_file),
"total_entries": total_entries,
"entries_with_wikidata_id": len(entries_with_wikidata),
"successfully_enriched": success_count,
"skipped_no_wikidata_id": skip_count,
"errors": error_count,
"authenticated": bool(WIKIDATA_API_TOKEN),
"rate_limit_delay_seconds": REQUEST_DELAY,
}
with open(log_file, 'w', encoding='utf-8') as f:
json.dump(log_data, f, indent=2)
logger.info(f"Log file: {log_file}")
return 0
if __name__ == "__main__":
sys.exit(main())