796 lines
27 KiB
Python
796 lines
27 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Nationaal Archief Data Service Endpoint Testing Script
|
|
|
|
Tests all documented API endpoints for the Nationaal Archief (NL-ZH-DHA-A-NA):
|
|
1. OAI-PMH endpoint (EAD/XML harvesting)
|
|
2. METS API (metadata files)
|
|
3. File API (scan downloads)
|
|
4. SPARQL webservice (photo collection LOD)
|
|
5. Ontology endpoint (NAO)
|
|
6. Photo API (currently offline)
|
|
|
|
Usage:
|
|
python scripts/test_nationaalarchief_endpoints.py [--verbose] [--output-dir DIR]
|
|
|
|
References:
|
|
- Data file: data/custodian/NL-ZH-DHA-A-NA.yaml (lines 573-750)
|
|
- Documentation: https://www.nationaalarchief.nl/onderzoeken/open-data
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from xml.etree import ElementTree as ET
|
|
|
|
import requests
|
|
|
|
# ============================================================================
|
|
# Configuration
|
|
# ============================================================================
|
|
|
|
ENDPOINTS = {
|
|
"oai_pmh": {
|
|
"id": "nationaalarchief-oai-pmh-ead",
|
|
"name": "OAI-PMH Endpoint (EAD/XML)",
|
|
"base_url": "https://service.archief.nl/gaf/oai/!open_oai.OAIHandler",
|
|
"protocol": "OAI-PMH 2.0",
|
|
"expected_status": "ACTIVE",
|
|
},
|
|
"mets_api": {
|
|
"id": "nationaalarchief-mets-api",
|
|
"name": "METS API",
|
|
"base_url": "https://service.archief.nl/gaf/api/mets/v1",
|
|
"protocol": "REST",
|
|
"expected_status": "ACTIVE",
|
|
"sample_uuid": "48f1f22f-1228-4b00-9720-5816a07b4003",
|
|
},
|
|
"file_api": {
|
|
"id": "nationaalarchief-file-api",
|
|
"name": "File Download API",
|
|
"base_url": "https://service.archief.nl/gaf/api/file/v1",
|
|
"protocol": "REST",
|
|
"expected_status": "ACTIVE",
|
|
"sample_uuids": {
|
|
"default": "835776c2-fb57-47eb-b537-b82758b6558a",
|
|
"thumb": "834a6c29-61f1-4926-94bd-674132d25fd5",
|
|
},
|
|
},
|
|
"sparql": {
|
|
"id": "nationaalarchief-sparql-webservice",
|
|
"name": "SPARQL Webservice",
|
|
"base_url": "https://www.nationaalarchief.nl/onderzoeken/sparql",
|
|
"protocol": "SPARQL",
|
|
"expected_status": "ACTIVE",
|
|
},
|
|
"ontology": {
|
|
"id": "nationaalarchief-ontology",
|
|
"name": "Nationaal Archief Ontologie (NAO)",
|
|
"base_url": "https://raw.githubusercontent.com/NationaalArchief/LOD/master/archief.nl-def-ontologie.json",
|
|
"deprecated_url": "https://archief.nl/def/ontologie/", # SSL/403 issues
|
|
"protocol": "HTTP",
|
|
"expected_status": "ACTIVE",
|
|
},
|
|
"photo_api": {
|
|
"id": "nationaalarchief-photo-api",
|
|
"name": "Photo API",
|
|
"base_url": None, # Currently offline
|
|
"protocol": "REST",
|
|
"expected_status": "OFFLINE",
|
|
},
|
|
}
|
|
|
|
# OAI-PMH verbs to test
|
|
OAI_PMH_VERBS = [
|
|
("Identify", {}),
|
|
("ListMetadataFormats", {}),
|
|
("ListSets", {}),
|
|
("ListIdentifiers", {"metadataPrefix": "oai_ead", "set": "2.21.205.69"}),
|
|
("GetRecord", {"metadataPrefix": "oai_ead", "identifier": "2.21.205.69"}),
|
|
]
|
|
|
|
# Sample SPARQL query for photo collection
|
|
SPARQL_QUERY = """
|
|
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
|
PREFIX dcterms: <http://purl.org/dc/terms/>
|
|
PREFIX edm: <http://www.europeana.eu/schemas/edm/>
|
|
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
|
|
|
SELECT ?photo ?title ?creator ?date
|
|
WHERE {
|
|
?photo a edm:ProvidedCHO .
|
|
OPTIONAL { ?photo dc:title ?title }
|
|
OPTIONAL { ?photo dc:creator ?creator }
|
|
OPTIONAL { ?photo dc:date ?date }
|
|
}
|
|
LIMIT 5
|
|
"""
|
|
|
|
# ============================================================================
|
|
# Data Classes
|
|
# ============================================================================
|
|
|
|
|
|
@dataclass
|
|
class TestResult:
|
|
"""Result of a single endpoint test."""
|
|
endpoint_id: str
|
|
test_name: str
|
|
success: bool
|
|
status_code: int | None = None
|
|
response_time_ms: float | None = None
|
|
content_type: str | None = None
|
|
error_message: str | None = None
|
|
details: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
@dataclass
|
|
class EndpointReport:
|
|
"""Aggregated report for an endpoint."""
|
|
endpoint_id: str
|
|
endpoint_name: str
|
|
protocol: str
|
|
expected_status: str
|
|
actual_status: str
|
|
tests_passed: int = 0
|
|
tests_failed: int = 0
|
|
tests: list[TestResult] = field(default_factory=list)
|
|
|
|
|
|
# ============================================================================
|
|
# Test Functions
|
|
# ============================================================================
|
|
|
|
|
|
def test_oai_pmh(verbose: bool = False) -> EndpointReport:
|
|
"""Test OAI-PMH endpoint with all verbs."""
|
|
config = ENDPOINTS["oai_pmh"]
|
|
report = EndpointReport(
|
|
endpoint_id=config["id"],
|
|
endpoint_name=config["name"],
|
|
protocol=config["protocol"],
|
|
expected_status=config["expected_status"],
|
|
actual_status="UNKNOWN",
|
|
)
|
|
|
|
base_url = config["base_url"]
|
|
|
|
for verb, params in OAI_PMH_VERBS:
|
|
test_name = f"OAI-PMH {verb}"
|
|
params_with_verb = {"verb": verb, **params}
|
|
|
|
try:
|
|
start = time.time()
|
|
response = requests.get(base_url, params=params_with_verb, timeout=30)
|
|
elapsed_ms = (time.time() - start) * 1000
|
|
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=response.status_code == 200,
|
|
status_code=response.status_code,
|
|
response_time_ms=round(elapsed_ms, 2),
|
|
content_type=response.headers.get("Content-Type"),
|
|
)
|
|
|
|
# Parse XML response
|
|
if response.status_code == 200:
|
|
try:
|
|
root = ET.fromstring(response.content)
|
|
# Check for OAI-PMH error
|
|
ns = {"oai": "http://www.openarchives.org/OAI/2.0/"}
|
|
error = root.find(".//oai:error", ns)
|
|
if error is not None:
|
|
result.success = False
|
|
result.error_message = f"OAI error: {error.get('code')} - {error.text}"
|
|
else:
|
|
# Extract some details based on verb
|
|
if verb == "Identify":
|
|
repo_name = root.find(".//oai:repositoryName", ns)
|
|
if repo_name is not None:
|
|
result.details["repository_name"] = repo_name.text
|
|
elif verb == "ListSets":
|
|
sets = root.findall(".//oai:set", ns)
|
|
result.details["set_count"] = len(sets)
|
|
elif verb == "GetRecord":
|
|
record = root.find(".//oai:record", ns)
|
|
result.details["record_found"] = record is not None
|
|
except ET.ParseError as e:
|
|
result.success = False
|
|
result.error_message = f"XML parse error: {e}"
|
|
|
|
if verbose:
|
|
print(f" [{'+' if result.success else '-'}] {test_name}: "
|
|
f"{result.status_code} ({result.response_time_ms:.0f}ms)")
|
|
if result.details:
|
|
print(f" Details: {result.details}")
|
|
if result.error_message:
|
|
print(f" Error: {result.error_message}")
|
|
|
|
except requests.RequestException as e:
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=False,
|
|
error_message=str(e),
|
|
)
|
|
if verbose:
|
|
print(f" [-] {test_name}: ERROR - {e}")
|
|
|
|
report.tests.append(result)
|
|
if result.success:
|
|
report.tests_passed += 1
|
|
else:
|
|
report.tests_failed += 1
|
|
|
|
# Determine actual status
|
|
if report.tests_passed == len(OAI_PMH_VERBS):
|
|
report.actual_status = "ACTIVE"
|
|
elif report.tests_passed > 0:
|
|
report.actual_status = "DEGRADED"
|
|
else:
|
|
report.actual_status = "OFFLINE"
|
|
|
|
return report
|
|
|
|
|
|
def test_mets_api(verbose: bool = False) -> EndpointReport:
|
|
"""Test METS API endpoint."""
|
|
config = ENDPOINTS["mets_api"]
|
|
report = EndpointReport(
|
|
endpoint_id=config["id"],
|
|
endpoint_name=config["name"],
|
|
protocol=config["protocol"],
|
|
expected_status=config["expected_status"],
|
|
actual_status="UNKNOWN",
|
|
)
|
|
|
|
uuid = config["sample_uuid"]
|
|
url = f"{config['base_url']}/{uuid}"
|
|
test_name = "METS file retrieval"
|
|
|
|
try:
|
|
start = time.time()
|
|
response = requests.get(url, timeout=30)
|
|
elapsed_ms = (time.time() - start) * 1000
|
|
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=response.status_code == 200,
|
|
status_code=response.status_code,
|
|
response_time_ms=round(elapsed_ms, 2),
|
|
content_type=response.headers.get("Content-Type"),
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
try:
|
|
root = ET.fromstring(response.content)
|
|
# Count file entries in METS
|
|
ns = {"mets": "http://www.loc.gov/METS/"}
|
|
files = root.findall(".//mets:file", ns)
|
|
result.details["file_count"] = len(files)
|
|
result.details["mets_uuid"] = uuid
|
|
|
|
# Extract first few file UUIDs for reference
|
|
file_uuids = []
|
|
for f in files[:3]:
|
|
fid = f.get("ID", "")
|
|
file_uuids.append(fid)
|
|
if file_uuids:
|
|
result.details["sample_file_ids"] = file_uuids
|
|
|
|
except ET.ParseError as e:
|
|
result.success = False
|
|
result.error_message = f"XML parse error: {e}"
|
|
|
|
if verbose:
|
|
print(f" [{'+' if result.success else '-'}] {test_name}: "
|
|
f"{result.status_code} ({result.response_time_ms:.0f}ms)")
|
|
if result.details:
|
|
print(f" Details: {result.details}")
|
|
if result.error_message:
|
|
print(f" Error: {result.error_message}")
|
|
|
|
except requests.RequestException as e:
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=False,
|
|
error_message=str(e),
|
|
)
|
|
if verbose:
|
|
print(f" [-] {test_name}: ERROR - {e}")
|
|
|
|
report.tests.append(result)
|
|
if result.success:
|
|
report.tests_passed += 1
|
|
report.actual_status = "ACTIVE"
|
|
else:
|
|
report.tests_failed += 1
|
|
report.actual_status = "OFFLINE"
|
|
|
|
return report
|
|
|
|
|
|
def test_file_api(verbose: bool = False) -> EndpointReport:
|
|
"""Test File Download API endpoint."""
|
|
config = ENDPOINTS["file_api"]
|
|
report = EndpointReport(
|
|
endpoint_id=config["id"],
|
|
endpoint_name=config["name"],
|
|
protocol=config["protocol"],
|
|
expected_status=config["expected_status"],
|
|
actual_status="UNKNOWN",
|
|
)
|
|
|
|
for file_type, uuid in config["sample_uuids"].items():
|
|
url = f"{config['base_url']}/{file_type}/{uuid}"
|
|
test_name = f"File download ({file_type})"
|
|
|
|
try:
|
|
start = time.time()
|
|
# Use HEAD request first to avoid downloading large files
|
|
response = requests.head(url, timeout=30, allow_redirects=True)
|
|
elapsed_ms = (time.time() - start) * 1000
|
|
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=response.status_code == 200,
|
|
status_code=response.status_code,
|
|
response_time_ms=round(elapsed_ms, 2),
|
|
content_type=response.headers.get("Content-Type"),
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
content_length = response.headers.get("Content-Length")
|
|
if content_length:
|
|
result.details["content_length_bytes"] = int(content_length)
|
|
result.details["file_type"] = file_type
|
|
result.details["uuid"] = uuid
|
|
|
|
if verbose:
|
|
print(f" [{'+' if result.success else '-'}] {test_name}: "
|
|
f"{result.status_code} ({result.response_time_ms:.0f}ms)")
|
|
if result.details:
|
|
print(f" Details: {result.details}")
|
|
|
|
except requests.RequestException as e:
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=False,
|
|
error_message=str(e),
|
|
)
|
|
if verbose:
|
|
print(f" [-] {test_name}: ERROR - {e}")
|
|
|
|
report.tests.append(result)
|
|
if result.success:
|
|
report.tests_passed += 1
|
|
else:
|
|
report.tests_failed += 1
|
|
|
|
# Determine actual status
|
|
if report.tests_passed == len(config["sample_uuids"]):
|
|
report.actual_status = "ACTIVE"
|
|
elif report.tests_passed > 0:
|
|
report.actual_status = "DEGRADED"
|
|
else:
|
|
report.actual_status = "OFFLINE"
|
|
|
|
return report
|
|
|
|
|
|
def test_sparql(verbose: bool = False) -> EndpointReport:
|
|
"""Test SPARQL webservice endpoint."""
|
|
config = ENDPOINTS["sparql"]
|
|
report = EndpointReport(
|
|
endpoint_id=config["id"],
|
|
endpoint_name=config["name"],
|
|
protocol=config["protocol"],
|
|
expected_status=config["expected_status"],
|
|
actual_status="UNKNOWN",
|
|
)
|
|
|
|
# Test 1: Check endpoint availability
|
|
test_name = "SPARQL endpoint availability"
|
|
try:
|
|
start = time.time()
|
|
response = requests.get(config["base_url"], timeout=30)
|
|
elapsed_ms = (time.time() - start) * 1000
|
|
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=response.status_code == 200,
|
|
status_code=response.status_code,
|
|
response_time_ms=round(elapsed_ms, 2),
|
|
content_type=response.headers.get("Content-Type"),
|
|
)
|
|
|
|
if verbose:
|
|
print(f" [{'+' if result.success else '-'}] {test_name}: "
|
|
f"{result.status_code} ({result.response_time_ms:.0f}ms)")
|
|
|
|
except requests.RequestException as e:
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=False,
|
|
error_message=str(e),
|
|
)
|
|
if verbose:
|
|
print(f" [-] {test_name}: ERROR - {e}")
|
|
|
|
report.tests.append(result)
|
|
if result.success:
|
|
report.tests_passed += 1
|
|
else:
|
|
report.tests_failed += 1
|
|
|
|
# Test 2: Execute sample query
|
|
test_name = "SPARQL query execution"
|
|
try:
|
|
start = time.time()
|
|
response = requests.post(
|
|
config["base_url"],
|
|
data={"query": SPARQL_QUERY},
|
|
headers={"Accept": "application/sparql-results+json"},
|
|
timeout=60,
|
|
)
|
|
elapsed_ms = (time.time() - start) * 1000
|
|
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=response.status_code == 200,
|
|
status_code=response.status_code,
|
|
response_time_ms=round(elapsed_ms, 2),
|
|
content_type=response.headers.get("Content-Type"),
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
try:
|
|
data = response.json()
|
|
bindings = data.get("results", {}).get("bindings", [])
|
|
result.details["result_count"] = len(bindings)
|
|
if bindings:
|
|
result.details["sample_result"] = {
|
|
k: v.get("value") for k, v in bindings[0].items()
|
|
}
|
|
except json.JSONDecodeError:
|
|
result.details["note"] = "Response not JSON, may be HTML form"
|
|
|
|
if verbose:
|
|
print(f" [{'+' if result.success else '-'}] {test_name}: "
|
|
f"{result.status_code} ({result.response_time_ms:.0f}ms)")
|
|
if result.details:
|
|
print(f" Details: {result.details}")
|
|
|
|
except requests.RequestException as e:
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=False,
|
|
error_message=str(e),
|
|
)
|
|
if verbose:
|
|
print(f" [-] {test_name}: ERROR - {e}")
|
|
|
|
report.tests.append(result)
|
|
if result.success:
|
|
report.tests_passed += 1
|
|
else:
|
|
report.tests_failed += 1
|
|
|
|
# Determine actual status
|
|
if report.tests_passed == 2:
|
|
report.actual_status = "ACTIVE"
|
|
elif report.tests_passed > 0:
|
|
report.actual_status = "DEGRADED"
|
|
else:
|
|
report.actual_status = "OFFLINE"
|
|
|
|
return report
|
|
|
|
|
|
def test_ontology(verbose: bool = False) -> EndpointReport:
|
|
"""Test NAO ontology endpoint (JSON-LD from GitHub).
|
|
|
|
Note: The original archief.nl/def/ontologie/ URL has SSL/403 issues.
|
|
The ontology is now hosted on GitHub as JSON-LD only.
|
|
"""
|
|
config = ENDPOINTS["ontology"]
|
|
report = EndpointReport(
|
|
endpoint_id=config["id"],
|
|
endpoint_name=config["name"],
|
|
protocol=config["protocol"],
|
|
expected_status=config["expected_status"],
|
|
actual_status="UNKNOWN",
|
|
)
|
|
|
|
# Test 1: Fetch JSON-LD ontology from GitHub
|
|
test_name = "Ontology retrieval (JSON-LD from GitHub)"
|
|
try:
|
|
start = time.time()
|
|
response = requests.get(
|
|
config["base_url"],
|
|
timeout=30,
|
|
allow_redirects=True,
|
|
)
|
|
elapsed_ms = (time.time() - start) * 1000
|
|
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=response.status_code == 200,
|
|
status_code=response.status_code,
|
|
response_time_ms=round(elapsed_ms, 2),
|
|
content_type=response.headers.get("Content-Type"),
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result.details["response_size_bytes"] = len(response.content)
|
|
result.details["source"] = "GitHub (NationaalArchief/LOD)"
|
|
|
|
# Try to parse JSON-LD and extract some metadata
|
|
try:
|
|
data = response.json()
|
|
if isinstance(data, list):
|
|
# JSON-LD is an array of entities
|
|
result.details["entity_count"] = len(data)
|
|
# Count different types
|
|
types = {}
|
|
for item in data:
|
|
if isinstance(item, dict) and "@type" in item:
|
|
t = item["@type"]
|
|
if isinstance(t, list):
|
|
t = t[0] if t else "unknown"
|
|
types[t] = types.get(t, 0) + 1
|
|
if types:
|
|
result.details["types_found"] = dict(list(types.items())[:5])
|
|
elif isinstance(data, dict):
|
|
# Standard JSON-LD with @graph
|
|
graph = data.get("@graph", [])
|
|
result.details["graph_size"] = len(graph)
|
|
except json.JSONDecodeError:
|
|
result.details["parse_note"] = "Content is not valid JSON"
|
|
|
|
if verbose:
|
|
print(f" [{'+' if result.success else '-'}] {test_name}: "
|
|
f"{result.status_code} ({result.response_time_ms:.0f}ms)")
|
|
if result.details:
|
|
print(f" Details: {result.details}")
|
|
|
|
except requests.RequestException as e:
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name=test_name,
|
|
success=False,
|
|
error_message=str(e),
|
|
)
|
|
if verbose:
|
|
print(f" [-] {test_name}: ERROR - {e}")
|
|
|
|
report.tests.append(result)
|
|
if result.success:
|
|
report.tests_passed += 1
|
|
else:
|
|
report.tests_failed += 1
|
|
|
|
# Determine actual status
|
|
if report.tests_passed == 1:
|
|
report.actual_status = "ACTIVE"
|
|
else:
|
|
report.actual_status = "OFFLINE"
|
|
|
|
return report
|
|
|
|
|
|
def test_photo_api(verbose: bool = False) -> EndpointReport:
|
|
"""Test Photo API (expected to be offline)."""
|
|
config = ENDPOINTS["photo_api"]
|
|
report = EndpointReport(
|
|
endpoint_id=config["id"],
|
|
endpoint_name=config["name"],
|
|
protocol=config["protocol"],
|
|
expected_status=config["expected_status"],
|
|
actual_status="OFFLINE",
|
|
)
|
|
|
|
# No URL to test - documented as offline
|
|
result = TestResult(
|
|
endpoint_id=config["id"],
|
|
test_name="Photo API status check",
|
|
success=True, # Expected to be offline
|
|
details={"status": "OFFLINE", "note": "API under development, no URL available"},
|
|
)
|
|
|
|
if verbose:
|
|
print(f" [~] Photo API: OFFLINE (expected - under development)")
|
|
|
|
report.tests.append(result)
|
|
report.tests_passed += 1 # Correctly identified as offline
|
|
|
|
return report
|
|
|
|
|
|
# ============================================================================
|
|
# Report Generation
|
|
# ============================================================================
|
|
|
|
|
|
def generate_report(reports: list[EndpointReport], output_dir: Path | None = None) -> dict:
|
|
"""Generate comprehensive test report."""
|
|
timestamp = datetime.now(timezone.utc).isoformat()
|
|
|
|
summary = {
|
|
"report_timestamp": timestamp,
|
|
"custodian_id": "NL-ZH-DHA-A-NA",
|
|
"custodian_name": "Nationaal Archief",
|
|
"total_endpoints": len(reports),
|
|
"endpoints_active": sum(1 for r in reports if r.actual_status == "ACTIVE"),
|
|
"endpoints_degraded": sum(1 for r in reports if r.actual_status == "DEGRADED"),
|
|
"endpoints_offline": sum(1 for r in reports if r.actual_status == "OFFLINE"),
|
|
"total_tests": sum(r.tests_passed + r.tests_failed for r in reports),
|
|
"tests_passed": sum(r.tests_passed for r in reports),
|
|
"tests_failed": sum(r.tests_failed for r in reports),
|
|
}
|
|
|
|
endpoint_results = []
|
|
for report in reports:
|
|
endpoint_results.append({
|
|
"endpoint_id": report.endpoint_id,
|
|
"endpoint_name": report.endpoint_name,
|
|
"protocol": report.protocol,
|
|
"expected_status": report.expected_status,
|
|
"actual_status": report.actual_status,
|
|
"status_match": report.expected_status == report.actual_status,
|
|
"tests_passed": report.tests_passed,
|
|
"tests_failed": report.tests_failed,
|
|
"tests": [
|
|
{
|
|
"name": t.test_name,
|
|
"success": t.success,
|
|
"status_code": t.status_code,
|
|
"response_time_ms": t.response_time_ms,
|
|
"content_type": t.content_type,
|
|
"error": t.error_message,
|
|
"details": t.details,
|
|
}
|
|
for t in report.tests
|
|
],
|
|
})
|
|
|
|
full_report = {
|
|
"summary": summary,
|
|
"endpoints": endpoint_results,
|
|
}
|
|
|
|
# Save to file if output directory specified
|
|
if output_dir:
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
report_file = output_dir / f"nationaalarchief_endpoint_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
|
with open(report_file, "w") as f:
|
|
json.dump(full_report, f, indent=2)
|
|
print(f"\nReport saved to: {report_file}")
|
|
|
|
return full_report
|
|
|
|
|
|
def print_summary(reports: list[EndpointReport]) -> None:
|
|
"""Print summary table."""
|
|
print("\n" + "=" * 70)
|
|
print("ENDPOINT TEST SUMMARY - Nationaal Archief (NL-ZH-DHA-A-NA)")
|
|
print("=" * 70)
|
|
|
|
total_passed = 0
|
|
total_failed = 0
|
|
|
|
for report in reports:
|
|
status_icon = {
|
|
"ACTIVE": "\u2705", # Green checkmark
|
|
"DEGRADED": "\u26A0\uFE0F", # Warning
|
|
"OFFLINE": "\u274C", # Red X
|
|
"UNKNOWN": "\u2753", # Question mark
|
|
}.get(report.actual_status, "?")
|
|
|
|
match_icon = "\u2705" if report.expected_status == report.actual_status else "\u26A0\uFE0F"
|
|
|
|
print(f"\n{report.endpoint_name}")
|
|
print(f" Protocol: {report.protocol}")
|
|
print(f" Expected: {report.expected_status} | Actual: {report.actual_status} {status_icon}")
|
|
print(f" Tests: {report.tests_passed} passed, {report.tests_failed} failed")
|
|
|
|
total_passed += report.tests_passed
|
|
total_failed += report.tests_failed
|
|
|
|
print("\n" + "-" * 70)
|
|
print(f"TOTAL: {total_passed} tests passed, {total_failed} tests failed")
|
|
print("=" * 70)
|
|
|
|
|
|
# ============================================================================
|
|
# Main
|
|
# ============================================================================
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Test Nationaal Archief data service endpoints"
|
|
)
|
|
parser.add_argument(
|
|
"-v", "--verbose",
|
|
action="store_true",
|
|
help="Show detailed test output"
|
|
)
|
|
parser.add_argument(
|
|
"-o", "--output-dir",
|
|
type=str,
|
|
help="Directory to save JSON report"
|
|
)
|
|
parser.add_argument(
|
|
"--endpoint",
|
|
type=str,
|
|
choices=list(ENDPOINTS.keys()),
|
|
help="Test only a specific endpoint"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("Nationaal Archief Endpoint Testing")
|
|
print("=" * 40)
|
|
print(f"Started: {datetime.now().isoformat()}")
|
|
|
|
reports = []
|
|
|
|
# Define test functions
|
|
test_functions = {
|
|
"oai_pmh": test_oai_pmh,
|
|
"mets_api": test_mets_api,
|
|
"file_api": test_file_api,
|
|
"sparql": test_sparql,
|
|
"ontology": test_ontology,
|
|
"photo_api": test_photo_api,
|
|
}
|
|
|
|
# Run tests
|
|
if args.endpoint:
|
|
endpoints_to_test = [args.endpoint]
|
|
else:
|
|
endpoints_to_test = list(test_functions.keys())
|
|
|
|
for endpoint_key in endpoints_to_test:
|
|
config = ENDPOINTS[endpoint_key]
|
|
print(f"\n[{endpoint_key.upper()}] Testing {config['name']}...")
|
|
|
|
test_func = test_functions[endpoint_key]
|
|
report = test_func(verbose=args.verbose)
|
|
reports.append(report)
|
|
|
|
# Generate and print report
|
|
output_path = Path(args.output_dir) if args.output_dir else Path("reports/endpoint_tests")
|
|
full_report = generate_report(
|
|
reports,
|
|
output_dir=output_path
|
|
)
|
|
|
|
print_summary(reports)
|
|
|
|
# Return exit code based on test results
|
|
if full_report["summary"]["tests_failed"] > 0:
|
|
sys.exit(1)
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|