glam/test_real_isil.py
2025-11-19 23:25:22 +01:00

56 lines
2.1 KiB
Python

#!/usr/bin/env python3
"""
Quick test script to parse the real ISIL CSV file
"""
from glam_extractor.parsers.isil_registry import ISILRegistryParser
def main():
parser = ISILRegistryParser()
# Parse the real ISIL CSV
print("Parsing ISIL registry CSV...")
records = parser.parse_file("data/ISIL-codes_2025-08-01.csv")
print(f"\n✓ Successfully parsed {len(records)} ISIL records")
# Show first 5 records
print("\nFirst 5 records:")
for i, record in enumerate(records[:5], 1):
print(f"\n{i}. {record.instelling}")
print(f" ISIL: {record.isil_code}")
print(f" City: {record.plaats}")
print(f" Assigned: {record.toegekend_op}")
if record.opmerking:
print(f" Remark: {record.opmerking}")
# Convert to HeritageCustodian models
print("\n\nConverting to HeritageCustodian models...")
custodians = parser.parse_and_convert("data/ISIL-codes_2025-08-01.csv")
print(f"✓ Successfully converted {len(custodians)} records")
# Show first 3 custodians
print("\nFirst 3 HeritageCustodian models:")
for i, custodian in enumerate(custodians[:3], 1):
print(f"\n{i}. {custodian.name}")
print(f" ID: {custodian.id}")
print(f" Type: {custodian.institution_type}")
print(f" Location: {custodian.locations[0].city}, {custodian.locations[0].country}")
print(f" ISIL: {custodian.identifiers[0].identifier_value}")
print(f" Data Tier: {custodian.provenance.data_tier}")
print(f" Confidence: {custodian.provenance.confidence_score}")
if custodian.description:
print(f" Description: {custodian.description}")
# Statistics
print("\n\nStatistics:")
with_remarks = sum(1 for c in custodians if c.description)
print(f" Total institutions: {len(custodians)}")
print(f" With remarks/descriptions: {with_remarks}")
print(f" Cities represented: {len(set(c.locations[0].city for c in custodians if c.locations))}")
print("\n✓ All tests passed!")
if __name__ == "__main__":
main()