glam/scripts/test_webclaim_validator.py
2025-12-05 15:30:23 +01:00

265 lines
8.4 KiB
Python

#!/usr/bin/env python3
"""
Test script for WebClaim Validator.
Tests the validator against real NDE entry files with web_claims.
"""
import sys
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root / "src"))
import yaml
from glam_extractor.annotators.webclaim_validator import (
WebClaimValidator,
WebClaim,
ClaimType,
ValidationStatus,
ValidationResult,
batch_validate_entry_files,
)
def test_single_claim_validation():
"""Test validating a single claim from an entry file."""
print("=" * 60)
print("TEST: Single Claim Validation")
print("=" * 60)
# Entry file path
entry_path = project_root / "data/nde/enriched/entries/1667_historische_kring_wierden.yaml"
web_base = project_root / "data/nde/enriched/entries"
# Load the entry
with open(entry_path, 'r', encoding='utf-8') as f:
entry = yaml.safe_load(f)
# Get the first web claim
claims_data = entry.get('web_claims', {}).get('claims', [])
if not claims_data:
print("No web_claims found in entry!")
return False
# Test first claim (org_name with XPath)
claim_data = claims_data[0]
print(f"\nTesting claim:")
print(f" Type: {claim_data.get('claim_type')}")
print(f" Value: {claim_data.get('claim_value')}")
print(f" XPath: {claim_data.get('xpath')}")
print(f" HTML File: {claim_data.get('html_file')}")
# Create WebClaim object (use 'full_name' as fallback for custom claim types)
claim_type_str = claim_data.get('claim_type', 'full_name')
try:
claim_type = ClaimType(claim_type_str)
except ValueError:
# Custom claim type - use FULL_NAME as fallback for validation
print(f" (Custom claim type '{claim_type_str}', using FULL_NAME for validation)")
claim_type = ClaimType.FULL_NAME
claim = WebClaim(
claim_type=claim_type,
claim_value=claim_data.get('claim_value', ''),
source_url=claim_data.get('source_url', ''),
retrieved_on=claim_data.get('retrieved_on', ''),
xpath=claim_data.get('xpath', ''),
html_file=claim_data.get('html_file', ''),
xpath_match_score=claim_data.get('xpath_match_score', 0.0),
)
# Create validator with proper base path
validator = WebClaimValidator(html_base_path=web_base)
# Validate
result = validator.validate_claim(claim)
print(f"\nValidation Result:")
print(f" Status: {result.status.value}")
print(f" Computed Match Score: {result.computed_match_score:.2f}")
if result.matched_text:
print(f" Matched Text: {result.matched_text[:100]}...")
if result.error_message:
print(f" Error: {result.error_message}")
return result.status == ValidationStatus.VALID
def test_all_claims_in_entry():
"""Test validating all claims in an entry file."""
print("\n" + "=" * 60)
print("TEST: All Claims in Entry File")
print("=" * 60)
entry_path = project_root / "data/nde/enriched/entries/1667_historische_kring_wierden.yaml"
web_base = project_root / "data/nde/enriched/entries"
# Load the entry
with open(entry_path, 'r', encoding='utf-8') as f:
entry = yaml.safe_load(f)
claims_data = entry.get('web_claims', {}).get('claims', [])
print(f"\nTotal claims in entry: {len(claims_data)}")
# Convert to WebClaim objects
claims = []
for cd in claims_data:
claim_type_str = cd.get('claim_type', 'full_name')
try:
claim_type = ClaimType(claim_type_str)
except ValueError:
claim_type = ClaimType.FULL_NAME # Fallback for custom types
claims.append(WebClaim(
claim_type=claim_type,
claim_value=cd.get('claim_value', ''),
source_url=cd.get('source_url', ''),
retrieved_on=cd.get('retrieved_on', ''),
xpath=cd.get('xpath', ''),
html_file=cd.get('html_file', ''),
xpath_match_score=cd.get('xpath_match_score', 0.0),
))
# Validate all
validator = WebClaimValidator(html_base_path=web_base)
results = validator.validate_claims(claims)
# Summarize results
summary = {
ValidationStatus.VALID: 0,
ValidationStatus.INVALID: 0,
ValidationStatus.FABRICATED: 0,
ValidationStatus.NEEDS_REVIEW: 0,
}
for result in results:
summary[result.status] += 1
print("\nValidation Summary:")
for status, count in summary.items():
print(f" {status.value}: {count}")
# Show details for non-valid claims
non_valid = [r for r in results if r.status != ValidationStatus.VALID]
if non_valid:
print(f"\nNon-valid claims ({len(non_valid)}):")
for r in non_valid[:5]: # Show first 5
print(f" - {r.claim.claim_type.value}: '{r.claim.claim_value[:30]}...'")
print(f" Status: {r.status.value}, Score: {r.computed_match_score:.2f}")
if r.error_message:
print(f" Error: {r.error_message}")
return summary[ValidationStatus.VALID] > 0
def test_fabricated_claim_detection():
"""Test detection of claims without XPath provenance."""
print("\n" + "=" * 60)
print("TEST: Fabricated Claim Detection")
print("=" * 60)
# Create a claim with no XPath (fabricated)
fabricated_claim = WebClaim(
claim_type=ClaimType.FULL_NAME,
claim_value="Test Institution",
source_url="https://example.com",
retrieved_on="2025-12-01T00:00:00Z",
xpath="", # No XPath = fabricated
html_file="test.html",
xpath_match_score=0.95, # LLM might claim high confidence...
)
validator = WebClaimValidator()
result = validator.validate_claim(fabricated_claim)
print(f"\nClaim with empty XPath:")
print(f" Status: {result.status.value}")
print(f" Error: {result.error_message}")
# Should be detected as fabricated
assert result.status == ValidationStatus.FABRICATED
print(" ✓ Correctly identified as FABRICATED")
return True
def test_xpath_not_found():
"""Test handling of invalid XPath."""
print("\n" + "=" * 60)
print("TEST: XPath Not Found in HTML")
print("=" * 60)
entry_path = project_root / "data/nde/enriched/entries/1667_historische_kring_wierden.yaml"
web_base = project_root / "data/nde/enriched/entries"
# Create a claim with non-existent XPath
bad_xpath_claim = WebClaim(
claim_type=ClaimType.FULL_NAME,
claim_value="Test Value",
source_url="https://historischekringwierden.nl/",
retrieved_on="2025-12-01T00:00:00Z",
xpath="/html/body/div[999]/nonexistent", # Bad XPath
html_file="web/1667/historischekringwierden.nl/rendered.html",
xpath_match_score=1.0,
)
validator = WebClaimValidator(html_base_path=web_base)
result = validator.validate_claim(bad_xpath_claim)
print(f"\nClaim with invalid XPath:")
print(f" XPath: {bad_xpath_claim.xpath}")
print(f" Status: {result.status.value}")
print(f" Error: {result.error_message}")
assert result.status == ValidationStatus.INVALID
print(" ✓ Correctly identified as INVALID")
return True
def main():
"""Run all tests."""
print("\n" + "=" * 60)
print("WebClaim Validator Test Suite")
print("=" * 60)
tests = [
("Single Claim Validation", test_single_claim_validation),
("All Claims in Entry", test_all_claims_in_entry),
("Fabricated Claim Detection", test_fabricated_claim_detection),
("XPath Not Found", test_xpath_not_found),
]
results = []
for name, test_func in tests:
try:
passed = test_func()
results.append((name, passed))
except Exception as e:
print(f"\n❌ Test '{name}' FAILED with exception: {e}")
import traceback
traceback.print_exc()
results.append((name, False))
# Summary
print("\n" + "=" * 60)
print("TEST SUMMARY")
print("=" * 60)
passed = sum(1 for _, p in results if p)
total = len(results)
for name, p in results:
status = "✓ PASSED" if p else "❌ FAILED"
print(f" {status}: {name}")
print(f"\nTotal: {passed}/{total} tests passed")
return passed == total
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)