glam/scripts/test_live_annotation.py

#!/usr/bin/env python3
"""
Test script for live LLM annotation with domain/range validation.

Tests the full annotation pipeline with real NDE web archives.
"""

import asyncio
import json
import sys
import os
from pathlib import Path
from datetime import datetime

# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

# Load environment
from dotenv import load_dotenv
load_dotenv()

from glam_extractor.annotators.llm_annotator import (
    LLMAnnotator,
    LLMAnnotatorConfig,
    LLMProvider,
    RetryConfig,
)


async def test_luther_museum_annotation():
    """Test annotation of Luther Museum website with relationship validation."""

    print("\n" + "="*70)
    print("LIVE LLM ANNOTATION TEST - Luther Museum")
    print("="*70)

    # Check for API token
    api_token = os.getenv("ZAI_API_TOKEN")
    if not api_token:
        print("  [SKIP] ZAI_API_TOKEN not set in environment")
        return None

    # Path to Luther Museum HTML
    html_path = Path("data/nde/enriched/entries/web/1600/luthermuseum.nl/rendered.html")
    if not html_path.exists():
        print(f"  [SKIP] HTML file not found: {html_path}")
        return None

    print(f"\n  Source: {html_path}")
    print(f"  Provider: Z.AI (GLM-4-Flash)")

    # Configure annotator
    config = LLMAnnotatorConfig(
        provider=LLMProvider.ZAI,
        model="glm-4-flash",
        api_key=api_token,
        context_convention="GLAM-NER v1.7.0-unified",
        retry=RetryConfig(
            max_retries=3,
            base_delay=2.0,
            max_delay=30.0,
        ),
    )

    annotator = LLMAnnotator(config)

    print("\n  Starting annotation...")
    start_time = datetime.now()

    try:
        session = await annotator.annotate(
            html_path,
            source_url="https://luthermuseum.nl/nl",
        )

        elapsed = (datetime.now() - start_time).total_seconds()
        print(f"  Completed in {elapsed:.2f} seconds")

        # Report results
        print("\n" + "-"*70)
        print("ANNOTATION RESULTS")
        print("-"*70)

        print(f"\n  Session ID: {session.session_id}")
        print(f"  Agent: {session.agent_name} / {session.model_id}")

        # Entity claims
        print(f"\n  ENTITY CLAIMS: {len(session.entity_claims)}")
        for i, claim in enumerate(session.entity_claims[:10]):  # Show first 10
            hyponym = claim.hyponym or (claim.hypernym.value if claim.hypernym else "?")
            print(f"    [{i+1}] {hyponym}: {claim.claim_value}")
            if claim.class_uri:
                print(f"        class_uri: {claim.class_uri}")
            if claim.wikidata_id:
                print(f"        wikidata: {claim.wikidata_id}")

        if len(session.entity_claims) > 10:
            print(f"    ... and {len(session.entity_claims) - 10} more entities")

        # Relationship claims
        print(f"\n  RELATIONSHIP CLAIMS: {len(session.relationship_claims)}")
        for i, claim in enumerate(session.relationship_claims[:10]):  # Show first 10
            rel_type = claim.relationship_hyponym or "?"
            subject = claim.subject.span_text if claim.subject else "?"
            obj = claim.object.span_text if claim.object else "?"
            print(f"    [{i+1}] {rel_type}: {subject} -> {obj}")
            if claim.predicate_uris:
                print(f"        predicates: {claim.predicate_uris[:2]}")
            if claim.temporal_scope and claim.temporal_scope.start_date:
                print(f"        temporal: {claim.temporal_scope.start_date}")

        if len(session.relationship_claims) > 10:
            print(f"    ... and {len(session.relationship_claims) - 10} more relationships")

        # Validation errors/warnings
        if session.errors:
            print(f"\n  VALIDATION WARNINGS/ERRORS: {len(session.errors)}")
            for error in session.errors[:10]:
                print(f"    - {error[:100]}...")
            if len(session.errors) > 10:
                print(f"    ... and {len(session.errors) - 10} more")
        else:
            print("\n  VALIDATION: No domain/range violations detected")

        # Export to JSON
        output_path = Path("data/nde/enriched/entries/web/1600/luthermuseum.nl/annotation_session.json")
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(session.to_dict(), f, indent=2, ensure_ascii=False)
        print(f"\n  Exported to: {output_path}")

        return session

    except Exception as e:
        print(f"\n  [ERROR] Annotation failed: {e}")
        import traceback
        traceback.print_exc()
        return None


async def test_boijmans_annotation():
    """Test annotation of Boijmans Museum website."""

    print("\n" + "="*70)
    print("LIVE LLM ANNOTATION TEST - Boijmans Museum")
    print("="*70)

    api_token = os.getenv("ZAI_API_TOKEN")
    if not api_token:
        print("  [SKIP] ZAI_API_TOKEN not set")
        return None

    html_path = Path("data/nde/enriched/entries/web/1606/boijmans.nl/rendered.html")
    if not html_path.exists():
        print(f"  [SKIP] HTML file not found: {html_path}")
        return None

    print(f"\n  Source: {html_path}")

    config = LLMAnnotatorConfig(
        provider=LLMProvider.ZAI,
        model="glm-4-flash",
        api_key=api_token,
        context_convention="GLAM-NER v1.7.0-unified",
    )

    annotator = LLMAnnotator(config)

    print("\n  Starting annotation...")
    start_time = datetime.now()

    try:
        session = await annotator.annotate(
            html_path,
            source_url="https://boijmans.nl",
        )

        elapsed = (datetime.now() - start_time).total_seconds()
        print(f"  Completed in {elapsed:.2f} seconds")

        print(f"\n  Entity claims: {len(session.entity_claims)}")
        print(f"  Relationship claims: {len(session.relationship_claims)}")
        print(f"  Validation issues: {len(session.errors)}")

        # Show entity type distribution
        type_counts = {}
        for claim in session.entity_claims:
            t = claim.hyponym or (claim.hypernym.value if claim.hypernym else "UNK")
            type_counts[t] = type_counts.get(t, 0) + 1

        print("\n  Entity type distribution:")
        for t, count in sorted(type_counts.items(), key=lambda x: -x[1])[:10]:
            print(f"    {t}: {count}")

        return session

    except Exception as e:
        print(f"\n  [ERROR] {e}")
        return None


async def main():
    """Run all live annotation tests."""

    print("\n" + "="*70)
    print("LIVE ANNOTATION TEST SUITE")
    print("="*70)
    print(f"\nTimestamp: {datetime.now().isoformat()}")

    # Test 1: Luther Museum
    session1 = await test_luther_museum_annotation()

    # Test 2: Boijmans (optional - comment out to save API calls)
    # session2 = await test_boijmans_annotation()

    print("\n" + "="*70)
    print("TEST SUITE COMPLETE")
    print("="*70 + "\n")


if __name__ == "__main__":
    asyncio.run(main())