- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
133 lines
4 KiB
Python
Executable file
133 lines
4 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
Validate YAML instance files against the Pydantic models.
|
|
|
|
Usage:
|
|
python scripts/validate_yaml_instance.py <yaml_file>
|
|
"""
|
|
|
|
import sys
|
|
import yaml
|
|
from pathlib import Path
|
|
from datetime import date, datetime
|
|
|
|
# Add src to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
|
|
from glam_extractor.models import (
|
|
HeritageCustodian,
|
|
Location,
|
|
Identifier,
|
|
ChangeEvent,
|
|
Provenance,
|
|
DigitalPlatform,
|
|
InstitutionType,
|
|
OrganizationStatus,
|
|
DataSource,
|
|
DataTier,
|
|
ChangeType,
|
|
)
|
|
|
|
|
|
def validate_yaml_file(yaml_path: Path) -> bool:
|
|
"""
|
|
Validate a YAML instance file against the Pydantic models.
|
|
|
|
Args:
|
|
yaml_path: Path to YAML file containing HeritageCustodian instances
|
|
|
|
Returns:
|
|
True if all instances are valid, False otherwise
|
|
"""
|
|
print(f"📄 Validating: {yaml_path.name}")
|
|
print()
|
|
|
|
# Load YAML
|
|
with open(yaml_path, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if not isinstance(data, list):
|
|
print("❌ Error: YAML must contain a list of HeritageCustodian instances")
|
|
return False
|
|
|
|
print(f"Found {len(data)} institution(s) to validate")
|
|
print()
|
|
|
|
all_valid = True
|
|
for i, instance_data in enumerate(data, 1):
|
|
try:
|
|
print(f"Validating institution {i}/{len(data)}: {instance_data.get('name', 'Unknown')}")
|
|
|
|
# Convert date strings to date objects
|
|
if 'change_history' in instance_data and instance_data['change_history']:
|
|
for event in instance_data['change_history']:
|
|
if 'event_date' in event and isinstance(event['event_date'], str):
|
|
event['event_date'] = date.fromisoformat(event['event_date'])
|
|
|
|
# Convert extraction_date string to datetime
|
|
if 'provenance' in instance_data and instance_data['provenance']:
|
|
prov = instance_data['provenance']
|
|
if 'extraction_date' in prov and isinstance(prov['extraction_date'], str):
|
|
prov['extraction_date'] = datetime.fromisoformat(prov['extraction_date'].replace('Z', '+00:00'))
|
|
|
|
# Validate with Pydantic
|
|
custodian = HeritageCustodian(**instance_data)
|
|
|
|
print(f" ✅ Valid: {custodian.name}")
|
|
print(f" - Type: {custodian.institution_type}")
|
|
print(f" - Locations: {len(custodian.locations or [])}")
|
|
print(f" - Identifiers: {len(custodian.identifiers or [])}")
|
|
print(f" - Events: {len(custodian.change_history or [])}")
|
|
print(f" - Confidence: {custodian.provenance.confidence_score}")
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Validation Error:")
|
|
print(f" {type(e).__name__}: {e}")
|
|
print()
|
|
all_valid = False
|
|
|
|
return all_valid
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python scripts/validate_yaml_instance.py <yaml_file>")
|
|
print()
|
|
print("Example:")
|
|
print(" python scripts/validate_yaml_instance.py data/instances/test_outputs/test_brazilian_institutions.yaml")
|
|
sys.exit(1)
|
|
|
|
yaml_path = Path(sys.argv[1])
|
|
|
|
if not yaml_path.exists():
|
|
print(f"❌ Error: File not found: {yaml_path}")
|
|
sys.exit(1)
|
|
|
|
print("=" * 80)
|
|
print("YAML INSTANCE VALIDATION")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
try:
|
|
valid = validate_yaml_file(yaml_path)
|
|
|
|
print("=" * 80)
|
|
if valid:
|
|
print("✅ All instances are valid!")
|
|
print("=" * 80)
|
|
sys.exit(0)
|
|
else:
|
|
print("❌ Some instances failed validation")
|
|
print("=" * 80)
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
print(f"❌ Fatal Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|