glam/scripts/check_geocoding_progress.sh
kempersc e5a532a8bc Add comprehensive tests for NLP institution extraction and RDF partnership integration
- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive).
- Added tests for extracted entities and result handling to validate the extraction process.
- Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format.
- Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns.
- Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
2025-11-19 23:20:47 +01:00

57 lines
1.9 KiB
Bash
Executable file

#!/bin/bash
# Monitor geocoding progress
LOG_FILE="data/logs/geocoding_full_run_fixed.log"
echo "==================================================================="
echo "GEOCODING PROGRESS MONITOR"
echo "==================================================================="
echo ""
# Check if process is running
PROC_COUNT=$(ps aux | grep "geocode_global_institutions.py" | grep -v grep | wc -l)
if [ "$PROC_COUNT" -gt 0 ]; then
echo "✅ Geocoding process is RUNNING"
echo ""
else
echo "⚠️ No geocoding process found"
echo ""
fi
# Show last progress line
if [ -f "$LOG_FILE" ]; then
echo "Latest progress:"
echo "-------------------------------------------------------------------"
tail -1 "$LOG_FILE"
echo "-------------------------------------------------------------------"
echo ""
# Extract statistics from log
INSTITUTIONS=$(grep -o "Progress: [0-9]*/[0-9]*" "$LOG_FILE" | tail -1 | cut -d' ' -f2)
CURRENT=$(echo "$INSTITUTIONS" | cut -d'/' -f1)
TOTAL=$(echo "$INSTITUTIONS" | cut -d'/' -f2)
if [ ! -z "$CURRENT" ] && [ ! -z "$TOTAL" ]; then
PERCENT=$(awk "BEGIN {printf \"%.1f\", ($CURRENT/$TOTAL)*100}")
echo "Institutions processed: $CURRENT / $TOTAL ($PERCENT%)"
# Show cache stats if available
CACHE_HITS=$(grep "Cache hits:" "$LOG_FILE" | tail -1 | grep -o "[0-9,]*" | tr -d ',')
API_CALLS=$(grep "API calls:" "$LOG_FILE" | tail -1 | grep -o "[0-9,]*" | tr -d ',')
if [ ! -z "$CACHE_HITS" ]; then
echo "Cache hits: $CACHE_HITS"
fi
if [ ! -z "$API_CALLS" ]; then
echo "API calls: $API_CALLS"
fi
fi
echo ""
echo "Log file: $LOG_FILE"
echo "View full log: tail -f $LOG_FILE"
else
echo "⚠️ Log file not found: $LOG_FILE"
fi
echo "==================================================================="