hvac-kia-content/monitor_backlog_progress.sh
Ben Reed 0a795437a7 Optimize Instagram scraper and increase capture targets to 1000
- Increased Instagram rate limit from 100 to 200 posts/hour
- Reduced delays: 10-20s (was 15-30s), extended breaks 30-60s (was 60-120s)
- Extended break interval: every 10 requests (was 5)
- Updated capture targets: 1000 posts for Instagram, 1000 videos for TikTok
- Added production deployment and monitoring scripts
- Created environment configuration template

This provides ~40-50% speed improvement for Instagram scraping and
captures 5x more Instagram content and 3.3x more TikTok content.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 22:59:11 -03:00

149 lines
No EOL
4.6 KiB
Bash
Executable file

#!/bin/bash
#
# Backlog Capture Progress Monitor
# Shows real-time progress of the production backlog capture
#
# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
RED='\033[0;31m'
NC='\033[0m'
LOG_FILE="backlog_capture.log"
echo "📊 HVAC Know It All - Backlog Capture Progress Monitor"
echo "=================================================="
if [[ ! -f "$LOG_FILE" ]]; then
echo "❌ Log file not found: $LOG_FILE"
exit 1
fi
echo "🔍 Monitoring: $LOG_FILE"
echo "⏰ Started: $(date)"
echo
# Extract progress information
extract_progress() {
local completed_sources=()
local current_source=""
local total_items=0
local total_media=0
# Parse completed sources
while IFS= read -r line; do
if [[ $line =~ ✅\ ([^:]+):[[:space:]]*([0-9]+)\ items,[[:space:]]*([0-9]+)\ media\ files ]]; then
source_name="${BASH_REMATCH[1]}"
items="${BASH_REMATCH[2]}"
media="${BASH_REMATCH[3]}"
completed_sources+=("$source_name:$items:$media")
total_items=$((total_items + items))
total_media=$((total_media + media))
fi
done < "$LOG_FILE"
# Find current source
current_source=$(grep "PROCESSING:" "$LOG_FILE" | tail -1 | sed 's/.*PROCESSING: //' | tr -d '\r')
# Display progress
echo -e "${BLUE}📈 PROGRESS SUMMARY${NC}"
echo "==================="
if [[ ${#completed_sources[@]} -gt 0 ]]; then
echo -e "${GREEN}✅ Completed Sources:${NC}"
for source_info in "${completed_sources[@]}"; do
IFS=':' read -r name items media <<< "$source_info"
printf " %-12s: %4s items, %3s media files\n" "$name" "$items" "$media"
done
echo
echo -e "${GREEN}📊 Totals so far: $total_items items, $total_media media files${NC}"
else
echo -e "${YELLOW}⏳ No sources completed yet${NC}"
fi
if [[ -n "$current_source" ]]; then
echo
echo -e "${BLUE}🔄 Currently Processing: ${YELLOW}$current_source${NC}"
# Show last few progress lines for current source
echo -e "${BLUE}Recent activity:${NC}"
grep -E "(Starting|Fetching|Downloaded|Processing)" "$LOG_FILE" | tail -3 | while read -r line; do
timestamp=$(echo "$line" | cut -d' ' -f1-2)
message=$(echo "$line" | sed 's/^[^-]*- [^-]* - [^-]* - //')
echo " $timestamp: $message"
done
fi
# Check if complete
if grep -q "AUTOMATED BACKLOG CAPTURE COMPLETE" "$LOG_FILE"; then
echo
echo -e "${GREEN}🎉 BACKLOG CAPTURE COMPLETE!${NC}"
# Extract final summary
if grep -q "Total items captured:" "$LOG_FILE"; then
final_items=$(grep "Total items captured:" "$LOG_FILE" | tail -1 | sed 's/.*Total items captured: //' | sed 's/,//')
final_media=$(grep "Total media files:" "$LOG_FILE" | tail -1 | sed 's/.*Total media files: //' | sed 's/,//')
duration=$(grep "Duration:" "$LOG_FILE" | tail -1 | sed 's/.*Duration: //')
echo -e "${GREEN}📊 Final Results:${NC}"
echo " Total items: $final_items"
echo " Total media: $final_media"
echo " Duration: $duration"
fi
return 0
fi
# Check for errors
local error_count=$(grep -c "❌\|ERROR\|failed" "$LOG_FILE" 2>/dev/null || echo "0")
if [[ $error_count -gt 0 ]]; then
echo
echo -e "${RED}⚠️ Errors detected: $error_count${NC}"
echo " Last error:"
grep -E "❌|ERROR|failed" "$LOG_FILE" | tail -1 | sed 's/^[^-]*- / /'
fi
return 1
}
# Show current progress
extract_progress
capture_complete=$?
echo
echo "=================================================="
# Live monitoring option
if [[ "$1" == "--live" ]]; then
echo "📡 Starting live monitoring (Ctrl+C to stop)..."
echo
# Monitor in real-time
while [[ $capture_complete -ne 0 ]]; do
sleep 10
clear
echo "📊 HVAC Know It All - Live Progress Monitor"
echo "=================================================="
echo "🔍 Monitoring: $LOG_FILE"
echo "⏰ Updated: $(date)"
echo
extract_progress
capture_complete=$?
if [[ $capture_complete -eq 0 ]]; then
break
fi
echo
echo "🔄 Refreshing in 10 seconds... (Ctrl+C to stop)"
done
echo
echo "🎉 Monitoring complete!"
else
echo "💡 Tip: Use '$0 --live' for real-time monitoring"
fi