#!/bin/bash # Monitor geocoding progress LOG_FILE="data/logs/geocoding_full_run_fixed.log" echo "===================================================================" echo "GEOCODING PROGRESS MONITOR" echo "===================================================================" echo "" # Check if process is running PROC_COUNT=$(ps aux | grep "geocode_global_institutions.py" | grep -v grep | wc -l) if [ "$PROC_COUNT" -gt 0 ]; then echo "✅ Geocoding process is RUNNING" echo "" else echo "⚠️ No geocoding process found" echo "" fi # Show last progress line if [ -f "$LOG_FILE" ]; then echo "Latest progress:" echo "-------------------------------------------------------------------" tail -1 "$LOG_FILE" echo "-------------------------------------------------------------------" echo "" # Extract statistics from log INSTITUTIONS=$(grep -o "Progress: [0-9]*/[0-9]*" "$LOG_FILE" | tail -1 | cut -d' ' -f2) CURRENT=$(echo "$INSTITUTIONS" | cut -d'/' -f1) TOTAL=$(echo "$INSTITUTIONS" | cut -d'/' -f2) if [ ! -z "$CURRENT" ] && [ ! -z "$TOTAL" ]; then PERCENT=$(awk "BEGIN {printf \"%.1f\", ($CURRENT/$TOTAL)*100}") echo "Institutions processed: $CURRENT / $TOTAL ($PERCENT%)" # Show cache stats if available CACHE_HITS=$(grep "Cache hits:" "$LOG_FILE" | tail -1 | grep -o "[0-9,]*" | tr -d ',') API_CALLS=$(grep "API calls:" "$LOG_FILE" | tail -1 | grep -o "[0-9,]*" | tr -d ',') if [ ! -z "$CACHE_HITS" ]; then echo "Cache hits: $CACHE_HITS" fi if [ ! -z "$API_CALLS" ]; then echo "API calls: $API_CALLS" fi fi echo "" echo "Log file: $LOG_FILE" echo "View full log: tail -f $LOG_FILE" else echo "⚠️ Log file not found: $LOG_FILE" fi echo "==================================================================="