#!/usr/bin/env python3 """ Quick script to scrape pages 14-20 of Austrian ISIL database using Playwright. This continues from where page 13 left off. """ import json import time from pathlib import Path # Note: This script expects to be run via OpenCode's Playwright browser tools # It generates the URLs and offsets for pages 14-20 OUTPUT_DIR = Path("/Users/kempersc/apps/glam/data/isil/austria") def generate_page_info(): """Generate page numbers and offsets for pages 14-20.""" pages = [] for page_num in range(14, 21): # 14 through 20 offset = (page_num - 1) * 10 url = f"https://www.isil.at/primo-explore/search?query=any,contains,AT-&tab=default_tab&search_scope=default_scope&vid=AIS&offset={offset}" pages.append({ "page": page_num, "offset": offset, "url": url, "output_file": OUTPUT_DIR / f"page_{page_num:03d}_data.json" }) return pages if __name__ == "__main__": pages = generate_page_info() print("Pages to scrape:") for p in pages: print(f" Page {p['page']}: offset={p['offset']}, file={p['output_file'].name}") print(f"\nTotal: {len(pages)} pages")