- Introduced custodian_hub_v3.mmd, custodian_hub_v4_final.mmd, and custodian_hub_v5_FINAL.mmd for Mermaid representation. - Created custodian_hub_FINAL.puml and custodian_hub_v3.puml for PlantUML representation. - Defined entities such as CustodianReconstruction, Identifier, TimeSpan, Agent, CustodianName, CustodianObservation, ReconstructionActivity, Appellation, ConfidenceMeasure, Custodian, LanguageCode, and SourceDocument. - Established relationships and associations between entities, including temporal extents, observations, and reconstruction activities. - Incorporated enumerations for various types, statuses, and classifications relevant to custodians and their activities.
159 lines
6.2 KiB
Python
Executable file
159 lines
6.2 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Parse ISO 20275 Entity Legal Form codes from CSV and generate LinkML mappings.
|
|
"""
|
|
|
|
import csv
|
|
import yaml
|
|
from pathlib import Path
|
|
from typing import Dict, List, Any
|
|
|
|
def parse_iso20275_csv(csv_path: Path) -> List[Dict[str, Any]]:
|
|
"""
|
|
Parse the ISO 20275 ELF code CSV file.
|
|
|
|
Expected columns based on the GLEIF standard:
|
|
- ELF Code (4 characters)
|
|
- Country Code (ISO 3166-1)
|
|
- Jurisdiction
|
|
- Country/Subdivision
|
|
- Entity Legal Form name (local language)
|
|
- Entity Legal Form name (transliterated)
|
|
- Abbreviation (local language)
|
|
- Abbreviation (transliterated)
|
|
- Date added
|
|
- ELF Status
|
|
"""
|
|
legal_forms = []
|
|
|
|
with open(csv_path, 'r', encoding='utf-8-sig') as f:
|
|
# Try to detect delimiter
|
|
sample = f.read(1024)
|
|
f.seek(0)
|
|
sniffer = csv.Sniffer()
|
|
delimiter = sniffer.sniff(sample).delimiter
|
|
|
|
reader = csv.DictReader(f, delimiter=delimiter)
|
|
|
|
for row in reader:
|
|
# Map column names (may vary)
|
|
elf_code = (row.get('ELF Code') or
|
|
row.get('Entity Legal Form Code') or
|
|
row.get('Code') or '').strip()
|
|
|
|
country_code = (row.get('Country Code') or
|
|
row.get('Country') or
|
|
row.get('ISO Country Code') or '').strip()
|
|
|
|
local_name = (row.get('Entity Legal Form name Local') or
|
|
row.get('Legal Form Name') or
|
|
row.get('Name Local') or '').strip()
|
|
|
|
transliterated = (row.get('Entity Legal Form name Transliterated') or
|
|
row.get('Name Transliterated') or '').strip()
|
|
|
|
abbreviation = (row.get('Abbreviation Local') or
|
|
row.get('Abbreviation') or '').strip()
|
|
|
|
status = (row.get('ELF Status') or
|
|
row.get('Status') or 'Active').strip()
|
|
|
|
if elf_code and len(elf_code) == 4 and status == 'Active':
|
|
legal_forms.append({
|
|
'elf_code': elf_code.upper(),
|
|
'country_code': country_code.upper() if country_code else '',
|
|
'local_name': local_name,
|
|
'transliterated_name': transliterated if transliterated != local_name else None,
|
|
'abbreviation': abbreviation if abbreviation else None,
|
|
})
|
|
|
|
return legal_forms
|
|
|
|
def generate_common_mappings(legal_forms: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Generate mappings for common heritage institution legal forms.
|
|
Focus on forms commonly used by museums, archives, and libraries.
|
|
"""
|
|
# Common legal forms for heritage institutions
|
|
heritage_forms = {
|
|
# Netherlands
|
|
'8888': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'foundation'},
|
|
'54M6': {'ontology': ['org:FormalOrganization', 'schema:Corporation'], 'type': 'private_company'},
|
|
|
|
# Germany
|
|
'QS1L': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'foundation'},
|
|
'HRA1': {'ontology': ['org:FormalOrganization', 'schema:Corporation'], 'type': 'gmbh'},
|
|
|
|
# France
|
|
'L6L1': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'association'},
|
|
|
|
# UK
|
|
'PRIV': {'ontology': ['org:FormalOrganization', 'schema:Corporation'], 'type': 'private_limited'},
|
|
'CHAR': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'charity'},
|
|
|
|
# US
|
|
'501C': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'nonprofit'},
|
|
}
|
|
|
|
mappings = []
|
|
for form in legal_forms:
|
|
if form['elf_code'] in heritage_forms:
|
|
mapping = {
|
|
'elf_code': form['elf_code'],
|
|
'country_code': form['country_code'],
|
|
'legal_form_name': form['local_name'],
|
|
'ontology_mappings': heritage_forms[form['elf_code']]['ontology'],
|
|
'common_type': heritage_forms[form['elf_code']]['type'],
|
|
}
|
|
if form['transliterated_name']:
|
|
mapping['transliterated_name'] = form['transliterated_name']
|
|
if form['abbreviation']:
|
|
mapping['abbreviation'] = form['abbreviation']
|
|
|
|
mappings.append(mapping)
|
|
|
|
return {
|
|
'id': 'https://nde.nl/ontology/hc/mapping/ISO20275_common',
|
|
'name': 'ISO20275_common_mappings',
|
|
'title': 'Common ISO 20275 Legal Forms for Heritage Institutions',
|
|
'description': 'Frequently used legal forms for museums, archives, and libraries',
|
|
'mappings': mappings
|
|
}
|
|
|
|
def main():
|
|
"""Parse ISO 20275 codes and generate mappings."""
|
|
csv_path = Path('data/ontology/2023-09-28-elf-code-list-v1.5.csv')
|
|
|
|
if not csv_path.exists():
|
|
print(f"Error: CSV file not found at {csv_path}")
|
|
return
|
|
|
|
print(f"Parsing ISO 20275 codes from {csv_path}")
|
|
legal_forms = parse_iso20275_csv(csv_path)
|
|
print(f"Found {len(legal_forms)} active legal form codes")
|
|
|
|
# Count by country
|
|
by_country = {}
|
|
for form in legal_forms:
|
|
country = form['country_code']
|
|
if country:
|
|
by_country[country] = by_country.get(country, 0) + 1
|
|
|
|
print("\nTop 10 countries by number of legal forms:")
|
|
for country, count in sorted(by_country.items(), key=lambda x: x[1], reverse=True)[:10]:
|
|
print(f" {country}: {count} forms")
|
|
|
|
# Generate common mappings
|
|
mappings = generate_common_mappings(legal_forms)
|
|
|
|
output_path = Path('schemas/20251121/linkml/modules/mappings/ISO20275_common.yaml')
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(output_path, 'w') as f:
|
|
yaml.dump(mappings, f, default_flow_style=False, allow_unicode=True)
|
|
|
|
print(f"\nGenerated common mappings: {output_path}")
|
|
print(f"Mapped {len(mappings['mappings'])} common heritage institution legal forms")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|