#!/usr/bin/env python3 """ Validate YAML instance files against the Pydantic models. Usage: python scripts/validate_yaml_instance.py """ import sys import yaml from pathlib import Path from datetime import date, datetime # Add src to path sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from glam_extractor.models import ( HeritageCustodian, Location, Identifier, ChangeEvent, Provenance, DigitalPlatform, InstitutionType, OrganizationStatus, DataSource, DataTier, ChangeType, ) def validate_yaml_file(yaml_path: Path) -> bool: """ Validate a YAML instance file against the Pydantic models. Args: yaml_path: Path to YAML file containing HeritageCustodian instances Returns: True if all instances are valid, False otherwise """ print(f"📄 Validating: {yaml_path.name}") print() # Load YAML with open(yaml_path, "r", encoding="utf-8") as f: data = yaml.safe_load(f) if not isinstance(data, list): print("❌ Error: YAML must contain a list of HeritageCustodian instances") return False print(f"Found {len(data)} institution(s) to validate") print() all_valid = True for i, instance_data in enumerate(data, 1): try: print(f"Validating institution {i}/{len(data)}: {instance_data.get('name', 'Unknown')}") # Convert date strings to date objects if 'change_history' in instance_data and instance_data['change_history']: for event in instance_data['change_history']: if 'event_date' in event and isinstance(event['event_date'], str): event['event_date'] = date.fromisoformat(event['event_date']) # Convert extraction_date string to datetime if 'provenance' in instance_data and instance_data['provenance']: prov = instance_data['provenance'] if 'extraction_date' in prov and isinstance(prov['extraction_date'], str): prov['extraction_date'] = datetime.fromisoformat(prov['extraction_date'].replace('Z', '+00:00')) # Validate with Pydantic custodian = HeritageCustodian(**instance_data) print(f" ✅ Valid: {custodian.name}") print(f" - Type: {custodian.institution_type}") print(f" - Locations: {len(custodian.locations or [])}") print(f" - Identifiers: {len(custodian.identifiers or [])}") print(f" - Events: {len(custodian.change_history or [])}") print(f" - Confidence: {custodian.provenance.confidence_score}") print() except Exception as e: print(f" ❌ Validation Error:") print(f" {type(e).__name__}: {e}") print() all_valid = False return all_valid def main(): if len(sys.argv) < 2: print("Usage: python scripts/validate_yaml_instance.py ") print() print("Example:") print(" python scripts/validate_yaml_instance.py data/instances/test_outputs/test_brazilian_institutions.yaml") sys.exit(1) yaml_path = Path(sys.argv[1]) if not yaml_path.exists(): print(f"❌ Error: File not found: {yaml_path}") sys.exit(1) print("=" * 80) print("YAML INSTANCE VALIDATION") print("=" * 80) print() try: valid = validate_yaml_file(yaml_path) print("=" * 80) if valid: print("✅ All instances are valid!") print("=" * 80) sys.exit(0) else: print("❌ Some instances failed validation") print("=" * 80) sys.exit(1) except Exception as e: print(f"❌ Fatal Error: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()