#!/usr/bin/env python3 """ Monitor progress of the Wikidata enrichment script. Shows number of entities cached and estimated completion. """ import json import os from pathlib import Path from datetime import datetime, timezone REGISTER_FILE = Path("data/wikidata/GLAMORCUBEPSXHFN/.fetch_register.json") TOTAL_ENTITIES = 1970 # From count: hypernym(1662) + entity(214) + entity_list(35) + collection(59) def monitor(): print("="*70) print("📊 WIKIDATA ENRICHMENT PROGRESS MONITOR") print("="*70) if not REGISTER_FILE.exists(): print("❌ Register file not found") return # Load register with open(REGISTER_FILE, 'r') as f: register = json.load(f) entities_cached = len(register.get('entities', {})) total_fetches = register.get('total_fetches', 0) last_updated = register.get('last_updated', 'Unknown') # Calculate progress progress_pct = (entities_cached / TOTAL_ENTITIES * 100) if TOTAL_ENTITIES > 0 else 0 remaining = TOTAL_ENTITIES - entities_cached print(f"\n{'Status':<25s}: {'Running' if os.system('pgrep -f enrich_hyponyms_with_wikidata > /dev/null') == 0 else 'Completed/Stopped'}") print(f"{'Last Updated':<25s}: {last_updated}") print(f"\n{'Total Entities to Process':<25s}: {TOTAL_ENTITIES:,}") print(f"{'Entities Cached':<25s}: {entities_cached:,}") print(f"{'Total API Fetches':<25s}: {total_fetches:,}") print(f"{'Progress':<25s}: {progress_pct:.1f}% ({entities_cached}/{TOTAL_ENTITIES})") print(f"{'Remaining':<25s}: {remaining:,} entities") # Estimate time if entities_cached > 0: rate = 10 # requests per second (configured in script) est_seconds = remaining / rate est_minutes = est_seconds / 60 print(f"{'Estimated Time Remaining':<25s}: {est_minutes:.1f} minutes") # Progress bar bar_width = 50 filled = int(bar_width * progress_pct / 100) bar = '█' * filled + '░' * (bar_width - filled) print(f"\n[{bar}] {progress_pct:.1f}%") # Show sample cached entities if entities_cached > 0: print(f"\n{'Sample Cached Entities':<25s}:") sample_qids = list(register['entities'].keys())[:5] for qid in sample_qids: entry = register['entities'][qid] label = entry.get('data', {}).get('labels', {}).get('en', {}).get('value', 'No English label') print(f" • {qid}: {label}") print("\n" + "="*70) print("💡 Tip: Run this script again to see updated progress") print("="*70) if __name__ == "__main__": monitor()