76 lines
2.3 KiB
Python
76 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sachsen-Anhalt GLAM Institutions - DDB API Harvest
|
|
Extracts museums, libraries, and archives via DDB Search API
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any
|
|
import time
|
|
|
|
def query_ddb_search_api(query: str, rows: int = 100) -> List[Dict[str, Any]]:
|
|
"""Query DDB Search API for institutions."""
|
|
|
|
# DDB Search API endpoint
|
|
base_url = "https://api.deutsche-digitale-bibliothek.de/search"
|
|
|
|
# API key (public key from DDB documentation)
|
|
api_key = "YOUR_API_KEY" # Note: DDB requires registration for API key
|
|
|
|
params = {
|
|
'query': query,
|
|
'rows': rows,
|
|
'offset': 0,
|
|
'facet': 'sector_fct', # Filter by sector (archives, libraries, museums)
|
|
'oauth_consumer_key': api_key
|
|
}
|
|
|
|
print(f"Querying DDB Search API: {query}")
|
|
print(f"URL: {base_url}")
|
|
|
|
try:
|
|
response = requests.get(base_url, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"❌ API request failed: {e}")
|
|
return {}
|
|
|
|
def scrape_ddb_web_interface() -> List[Dict[str, Any]]:
|
|
"""
|
|
Alternative: Scrape DDB web interface for Sachsen-Anhalt institutions.
|
|
Uses the public search interface without requiring API key.
|
|
"""
|
|
|
|
print("DDB API requires authentication. Switching to Archivportal-D approach...")
|
|
print()
|
|
|
|
# Archivportal-D has better coverage and no API key requirement
|
|
return []
|
|
|
|
def main():
|
|
"""Main execution."""
|
|
|
|
print("=" * 80)
|
|
print("Sachsen-Anhalt GLAM Institutions - DDB API Harvest")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
print("⚠️ DDB SPARQL endpoint is unavailable (404 Not Found)")
|
|
print("⚠️ DDB Search API requires authentication key")
|
|
print()
|
|
print("Alternative approach: Use Archivportal-D for archive coverage")
|
|
print(" + Direct website scraping for museums/libraries")
|
|
print()
|
|
print("Next steps:")
|
|
print(" 1. Harvest from Archivportal-D (Sachsen-Anhalt filter)")
|
|
print(" 2. Scrape Museumsverband Sachsen-Anhalt website")
|
|
print(" 3. Scrape regional library networks")
|
|
print()
|
|
print("=" * 80)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|