#!/usr/bin/env python3 """Test script for debugging link extraction.""" import asyncio import sys import os from pathlib import Path # Add project root to path PROJECT_ROOT = Path(__file__).parent.parent sys.path.insert(0, str(PROJECT_ROOT)) os.chdir(PROJECT_ROOT) from batch_extract_mission_statements import ( get_api_tokens, LinkupWebReader, extract_links_from_markdown, filter_mission_links, MISSION_LINK_KEYWORDS ) from urllib.parse import urlparse async def test(): tokens = get_api_tokens() reader = LinkupWebReader(tokens['linkup']) print("Fetching homepage...") result = await reader.read_webpage('https://grachten.museum/') content = result.get('content', '') print(f"Content length: {len(content)}") print(f"\n--- Content preview ---\n{content[:1000]}\n---\n") links = extract_links_from_markdown(content, 'https://grachten.museum/') print(f"\nFound {len(links)} links:") for link in links: parsed = urlparse(link) print(f" {link}") print(f" path: {parsed.path.lower()}") print(f"\nMission link keywords: {MISSION_LINK_KEYWORDS}") mission_links = filter_mission_links(links, 'grachten.museum') print(f"\nFiltered to {len(mission_links)} mission links:") for link in mission_links: print(f" {link}") if __name__ == "__main__": asyncio.run(test())