- Introduced `test_nlp_extractor.py` with unit tests for the InstitutionExtractor, covering various extraction patterns (ISIL, Wikidata, VIAF, city names) and ensuring proper classification of institutions (museum, library, archive). - Added tests for extracted entities and result handling to validate the extraction process. - Created `test_partnership_rdf_integration.py` to validate the end-to-end process of extracting partnerships from a conversation and exporting them to RDF format. - Implemented tests for temporal properties in partnerships and ensured compliance with W3C Organization Ontology patterns. - Verified that extracted partnerships are correctly linked with PROV-O provenance metadata.
57 lines
1.9 KiB
Bash
Executable file
57 lines
1.9 KiB
Bash
Executable file
#!/bin/bash
|
|
# Monitor geocoding progress
|
|
|
|
LOG_FILE="data/logs/geocoding_full_run_fixed.log"
|
|
|
|
echo "==================================================================="
|
|
echo "GEOCODING PROGRESS MONITOR"
|
|
echo "==================================================================="
|
|
echo ""
|
|
|
|
# Check if process is running
|
|
PROC_COUNT=$(ps aux | grep "geocode_global_institutions.py" | grep -v grep | wc -l)
|
|
if [ "$PROC_COUNT" -gt 0 ]; then
|
|
echo "✅ Geocoding process is RUNNING"
|
|
echo ""
|
|
else
|
|
echo "⚠️ No geocoding process found"
|
|
echo ""
|
|
fi
|
|
|
|
# Show last progress line
|
|
if [ -f "$LOG_FILE" ]; then
|
|
echo "Latest progress:"
|
|
echo "-------------------------------------------------------------------"
|
|
tail -1 "$LOG_FILE"
|
|
echo "-------------------------------------------------------------------"
|
|
echo ""
|
|
|
|
# Extract statistics from log
|
|
INSTITUTIONS=$(grep -o "Progress: [0-9]*/[0-9]*" "$LOG_FILE" | tail -1 | cut -d' ' -f2)
|
|
CURRENT=$(echo "$INSTITUTIONS" | cut -d'/' -f1)
|
|
TOTAL=$(echo "$INSTITUTIONS" | cut -d'/' -f2)
|
|
|
|
if [ ! -z "$CURRENT" ] && [ ! -z "$TOTAL" ]; then
|
|
PERCENT=$(awk "BEGIN {printf \"%.1f\", ($CURRENT/$TOTAL)*100}")
|
|
echo "Institutions processed: $CURRENT / $TOTAL ($PERCENT%)"
|
|
|
|
# Show cache stats if available
|
|
CACHE_HITS=$(grep "Cache hits:" "$LOG_FILE" | tail -1 | grep -o "[0-9,]*" | tr -d ',')
|
|
API_CALLS=$(grep "API calls:" "$LOG_FILE" | tail -1 | grep -o "[0-9,]*" | tr -d ',')
|
|
|
|
if [ ! -z "$CACHE_HITS" ]; then
|
|
echo "Cache hits: $CACHE_HITS"
|
|
fi
|
|
if [ ! -z "$API_CALLS" ]; then
|
|
echo "API calls: $API_CALLS"
|
|
fi
|
|
fi
|
|
|
|
echo ""
|
|
echo "Log file: $LOG_FILE"
|
|
echo "View full log: tail -f $LOG_FILE"
|
|
else
|
|
echo "⚠️ Log file not found: $LOG_FILE"
|
|
fi
|
|
|
|
echo "==================================================================="
|