#!/usr/bin/env python3 """ Convert Canadian ISIL records to LinkML format. Processes all Canadian library records and exports to YAML/JSON. """ import sys import json from pathlib import Path from datetime import datetime # Add src to Python path sys.path.insert(0, str(Path(__file__).parent / "src")) from glam_extractor.parsers.canadian_isil import CanadianISILParser def main(): """Convert Canadian ISIL records to LinkML format""" # Input/output paths input_file = Path("data/isil/canada/canadian_libraries_all.json") output_dir = Path("data/instances/canada") output_dir.mkdir(parents=True, exist_ok=True) # Initialize parser print(f"Parsing {input_file}...") parser = CanadianISILParser() # Parse all records custodians = list(parser.parse_file(input_file)) print(f"\nāœ… Successfully parsed {len(custodians)} institutions") # Statistics active = sum(1 for c in custodians if c.organization_status == "ACTIVE") inactive = sum(1 for c in custodians if c.organization_status == "INACTIVE") # Count by type type_counts = {} for c in custodians: inst_type = str(c.institution_type) if c.institution_type else "UNKNOWN" type_counts[inst_type] = type_counts.get(inst_type, 0) + 1 # Count by province province_counts = {} for c in custodians: if c.locations: region = c.locations[0].region province_counts[region] = province_counts.get(region, 0) + 1 print(f"\nšŸ“Š Statistics:") print(f" Active institutions: {active}") print(f" Inactive/Closed: {inactive}") print(f"\n By Institution Type:") for inst_type, count in sorted(type_counts.items(), key=lambda x: -x[1]): print(f" {inst_type}: {count}") print(f"\n By Province (Top 5):") for province, count in sorted(province_counts.items(), key=lambda x: -x[1])[:5]: print(f" {province}: {count}") # Export to JSON json_output = output_dir / "canadian_heritage_custodians.json" print(f"\nšŸ’¾ Exporting to {json_output}...") json_data = [] for custodian in custodians: # Convert to JSON object (dict) data = custodian._as_json_obj() json_data.append(data) with open(json_output, 'w', encoding='utf-8') as f: json.dump(json_data, f, indent=2, ensure_ascii=False, default=str) print(f"āœ… Exported {len(json_data)} records to JSON") # Export sample YAML (first 100 records) yaml_output = output_dir / "canadian_heritage_custodians_sample.yaml" print(f"\nšŸ’¾ Exporting sample (100 records) to {yaml_output}...") try: import yaml sample_data = [custodian._as_json_obj() for custodian in custodians[:100]] with open(yaml_output, 'w', encoding='utf-8') as f: yaml.dump(sample_data, f, default_flow_style=False, allow_unicode=True, sort_keys=False) print(f"āœ… Exported sample to YAML") except ImportError: print("āš ļø PyYAML not installed, skipping YAML export") print(f"\n✨ Conversion complete!") print(f"\nšŸ“ Output files:") print(f" {json_output}") if yaml_output.exists(): print(f" {yaml_output}") if __name__ == "__main__": main()