#!/usr/bin/env python3 """ Parse ISO 20275 Entity Legal Form codes from CSV and generate LinkML mappings. """ import csv import yaml from pathlib import Path from typing import Dict, List, Any def parse_iso20275_csv(csv_path: Path) -> List[Dict[str, Any]]: """ Parse the ISO 20275 ELF code CSV file. Expected columns based on the GLEIF standard: - ELF Code (4 characters) - Country Code (ISO 3166-1) - Jurisdiction - Country/Subdivision - Entity Legal Form name (local language) - Entity Legal Form name (transliterated) - Abbreviation (local language) - Abbreviation (transliterated) - Date added - ELF Status """ legal_forms = [] with open(csv_path, 'r', encoding='utf-8-sig') as f: # Try to detect delimiter sample = f.read(1024) f.seek(0) sniffer = csv.Sniffer() delimiter = sniffer.sniff(sample).delimiter reader = csv.DictReader(f, delimiter=delimiter) for row in reader: # Map column names (may vary) elf_code = (row.get('ELF Code') or row.get('Entity Legal Form Code') or row.get('Code') or '').strip() country_code = (row.get('Country Code') or row.get('Country') or row.get('ISO Country Code') or '').strip() local_name = (row.get('Entity Legal Form name Local') or row.get('Legal Form Name') or row.get('Name Local') or '').strip() transliterated = (row.get('Entity Legal Form name Transliterated') or row.get('Name Transliterated') or '').strip() abbreviation = (row.get('Abbreviation Local') or row.get('Abbreviation') or '').strip() status = (row.get('ELF Status') or row.get('Status') or 'Active').strip() if elf_code and len(elf_code) == 4 and status == 'Active': legal_forms.append({ 'elf_code': elf_code.upper(), 'country_code': country_code.upper() if country_code else '', 'local_name': local_name, 'transliterated_name': transliterated if transliterated != local_name else None, 'abbreviation': abbreviation if abbreviation else None, }) return legal_forms def generate_common_mappings(legal_forms: List[Dict[str, Any]]) -> Dict[str, Any]: """ Generate mappings for common heritage institution legal forms. Focus on forms commonly used by museums, archives, and libraries. """ # Common legal forms for heritage institutions heritage_forms = { # Netherlands '8888': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'foundation'}, '54M6': {'ontology': ['org:FormalOrganization', 'schema:Corporation'], 'type': 'private_company'}, # Germany 'QS1L': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'foundation'}, 'HRA1': {'ontology': ['org:FormalOrganization', 'schema:Corporation'], 'type': 'gmbh'}, # France 'L6L1': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'association'}, # UK 'PRIV': {'ontology': ['org:FormalOrganization', 'schema:Corporation'], 'type': 'private_limited'}, 'CHAR': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'charity'}, # US '501C': {'ontology': ['org:FormalOrganization', 'schema:NonProfitOrganization'], 'type': 'nonprofit'}, } mappings = [] for form in legal_forms: if form['elf_code'] in heritage_forms: mapping = { 'elf_code': form['elf_code'], 'country_code': form['country_code'], 'legal_form_name': form['local_name'], 'ontology_mappings': heritage_forms[form['elf_code']]['ontology'], 'common_type': heritage_forms[form['elf_code']]['type'], } if form['transliterated_name']: mapping['transliterated_name'] = form['transliterated_name'] if form['abbreviation']: mapping['abbreviation'] = form['abbreviation'] mappings.append(mapping) return { 'id': 'https://nde.nl/ontology/hc/mapping/ISO20275_common', 'name': 'ISO20275_common_mappings', 'title': 'Common ISO 20275 Legal Forms for Heritage Institutions', 'description': 'Frequently used legal forms for museums, archives, and libraries', 'mappings': mappings } def main(): """Parse ISO 20275 codes and generate mappings.""" csv_path = Path('data/ontology/2023-09-28-elf-code-list-v1.5.csv') if not csv_path.exists(): print(f"Error: CSV file not found at {csv_path}") return print(f"Parsing ISO 20275 codes from {csv_path}") legal_forms = parse_iso20275_csv(csv_path) print(f"Found {len(legal_forms)} active legal form codes") # Count by country by_country = {} for form in legal_forms: country = form['country_code'] if country: by_country[country] = by_country.get(country, 0) + 1 print("\nTop 10 countries by number of legal forms:") for country, count in sorted(by_country.items(), key=lambda x: x[1], reverse=True)[:10]: print(f" {country}: {count} forms") # Generate common mappings mappings = generate_common_mappings(legal_forms) output_path = Path('schemas/20251121/linkml/modules/mappings/ISO20275_common.yaml') output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w') as f: yaml.dump(mappings, f, default_flow_style=False, allow_unicode=True) print(f"\nGenerated common mappings: {output_path}") print(f"Mapped {len(mappings['mappings'])} common heritage institution legal forms") if __name__ == '__main__': main()