56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Quick test script to parse the real ISIL CSV file
|
|
"""
|
|
|
|
from glam_extractor.parsers.isil_registry import ISILRegistryParser
|
|
|
|
def main():
|
|
parser = ISILRegistryParser()
|
|
|
|
# Parse the real ISIL CSV
|
|
print("Parsing ISIL registry CSV...")
|
|
records = parser.parse_file("data/ISIL-codes_2025-08-01.csv")
|
|
|
|
print(f"\n✓ Successfully parsed {len(records)} ISIL records")
|
|
|
|
# Show first 5 records
|
|
print("\nFirst 5 records:")
|
|
for i, record in enumerate(records[:5], 1):
|
|
print(f"\n{i}. {record.instelling}")
|
|
print(f" ISIL: {record.isil_code}")
|
|
print(f" City: {record.plaats}")
|
|
print(f" Assigned: {record.toegekend_op}")
|
|
if record.opmerking:
|
|
print(f" Remark: {record.opmerking}")
|
|
|
|
# Convert to HeritageCustodian models
|
|
print("\n\nConverting to HeritageCustodian models...")
|
|
custodians = parser.parse_and_convert("data/ISIL-codes_2025-08-01.csv")
|
|
|
|
print(f"✓ Successfully converted {len(custodians)} records")
|
|
|
|
# Show first 3 custodians
|
|
print("\nFirst 3 HeritageCustodian models:")
|
|
for i, custodian in enumerate(custodians[:3], 1):
|
|
print(f"\n{i}. {custodian.name}")
|
|
print(f" ID: {custodian.id}")
|
|
print(f" Type: {custodian.institution_type}")
|
|
print(f" Location: {custodian.locations[0].city}, {custodian.locations[0].country}")
|
|
print(f" ISIL: {custodian.identifiers[0].identifier_value}")
|
|
print(f" Data Tier: {custodian.provenance.data_tier}")
|
|
print(f" Confidence: {custodian.provenance.confidence_score}")
|
|
if custodian.description:
|
|
print(f" Description: {custodian.description}")
|
|
|
|
# Statistics
|
|
print("\n\nStatistics:")
|
|
with_remarks = sum(1 for c in custodians if c.description)
|
|
print(f" Total institutions: {len(custodians)}")
|
|
print(f" With remarks/descriptions: {with_remarks}")
|
|
print(f" Cities represented: {len(set(c.locations[0].city for c in custodians if c.locations))}")
|
|
|
|
print("\n✓ All tests passed!")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|