- Increased Instagram rate limit from 100 to 200 posts/hour - Reduced delays: 10-20s (was 15-30s), extended breaks 30-60s (was 60-120s) - Extended break interval: every 10 requests (was 5) - Updated capture targets: 1000 posts for Instagram, 1000 videos for TikTok - Added production deployment and monitoring scripts - Created environment configuration template This provides ~40-50% speed improvement for Instagram scraping and captures 5x more Instagram content and 3.3x more TikTok content. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
149 lines
No EOL
4.6 KiB
Bash
Executable file
149 lines
No EOL
4.6 KiB
Bash
Executable file
#!/bin/bash
|
|
#
|
|
# Backlog Capture Progress Monitor
|
|
# Shows real-time progress of the production backlog capture
|
|
#
|
|
|
|
# Colors
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
RED='\033[0;31m'
|
|
NC='\033[0m'
|
|
|
|
LOG_FILE="backlog_capture.log"
|
|
|
|
echo "📊 HVAC Know It All - Backlog Capture Progress Monitor"
|
|
echo "=================================================="
|
|
|
|
if [[ ! -f "$LOG_FILE" ]]; then
|
|
echo "❌ Log file not found: $LOG_FILE"
|
|
exit 1
|
|
fi
|
|
|
|
echo "🔍 Monitoring: $LOG_FILE"
|
|
echo "⏰ Started: $(date)"
|
|
echo
|
|
|
|
# Extract progress information
|
|
extract_progress() {
|
|
local completed_sources=()
|
|
local current_source=""
|
|
local total_items=0
|
|
local total_media=0
|
|
|
|
# Parse completed sources
|
|
while IFS= read -r line; do
|
|
if [[ $line =~ ✅\ ([^:]+):[[:space:]]*([0-9]+)\ items,[[:space:]]*([0-9]+)\ media\ files ]]; then
|
|
source_name="${BASH_REMATCH[1]}"
|
|
items="${BASH_REMATCH[2]}"
|
|
media="${BASH_REMATCH[3]}"
|
|
|
|
completed_sources+=("$source_name:$items:$media")
|
|
total_items=$((total_items + items))
|
|
total_media=$((total_media + media))
|
|
fi
|
|
done < "$LOG_FILE"
|
|
|
|
# Find current source
|
|
current_source=$(grep "PROCESSING:" "$LOG_FILE" | tail -1 | sed 's/.*PROCESSING: //' | tr -d '\r')
|
|
|
|
# Display progress
|
|
echo -e "${BLUE}📈 PROGRESS SUMMARY${NC}"
|
|
echo "==================="
|
|
|
|
if [[ ${#completed_sources[@]} -gt 0 ]]; then
|
|
echo -e "${GREEN}✅ Completed Sources:${NC}"
|
|
for source_info in "${completed_sources[@]}"; do
|
|
IFS=':' read -r name items media <<< "$source_info"
|
|
printf " %-12s: %4s items, %3s media files\n" "$name" "$items" "$media"
|
|
done
|
|
echo
|
|
echo -e "${GREEN}📊 Totals so far: $total_items items, $total_media media files${NC}"
|
|
else
|
|
echo -e "${YELLOW}⏳ No sources completed yet${NC}"
|
|
fi
|
|
|
|
if [[ -n "$current_source" ]]; then
|
|
echo
|
|
echo -e "${BLUE}🔄 Currently Processing: ${YELLOW}$current_source${NC}"
|
|
|
|
# Show last few progress lines for current source
|
|
echo -e "${BLUE}Recent activity:${NC}"
|
|
grep -E "(Starting|Fetching|Downloaded|Processing)" "$LOG_FILE" | tail -3 | while read -r line; do
|
|
timestamp=$(echo "$line" | cut -d' ' -f1-2)
|
|
message=$(echo "$line" | sed 's/^[^-]*- [^-]* - [^-]* - //')
|
|
echo " $timestamp: $message"
|
|
done
|
|
fi
|
|
|
|
# Check if complete
|
|
if grep -q "AUTOMATED BACKLOG CAPTURE COMPLETE" "$LOG_FILE"; then
|
|
echo
|
|
echo -e "${GREEN}🎉 BACKLOG CAPTURE COMPLETE!${NC}"
|
|
|
|
# Extract final summary
|
|
if grep -q "Total items captured:" "$LOG_FILE"; then
|
|
final_items=$(grep "Total items captured:" "$LOG_FILE" | tail -1 | sed 's/.*Total items captured: //' | sed 's/,//')
|
|
final_media=$(grep "Total media files:" "$LOG_FILE" | tail -1 | sed 's/.*Total media files: //' | sed 's/,//')
|
|
duration=$(grep "Duration:" "$LOG_FILE" | tail -1 | sed 's/.*Duration: //')
|
|
|
|
echo -e "${GREEN}📊 Final Results:${NC}"
|
|
echo " Total items: $final_items"
|
|
echo " Total media: $final_media"
|
|
echo " Duration: $duration"
|
|
fi
|
|
|
|
return 0
|
|
fi
|
|
|
|
# Check for errors
|
|
local error_count=$(grep -c "❌\|ERROR\|failed" "$LOG_FILE" 2>/dev/null || echo "0")
|
|
if [[ $error_count -gt 0 ]]; then
|
|
echo
|
|
echo -e "${RED}⚠️ Errors detected: $error_count${NC}"
|
|
echo " Last error:"
|
|
grep -E "❌|ERROR|failed" "$LOG_FILE" | tail -1 | sed 's/^[^-]*- / /'
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
# Show current progress
|
|
extract_progress
|
|
capture_complete=$?
|
|
|
|
echo
|
|
echo "=================================================="
|
|
|
|
# Live monitoring option
|
|
if [[ "$1" == "--live" ]]; then
|
|
echo "📡 Starting live monitoring (Ctrl+C to stop)..."
|
|
echo
|
|
|
|
# Monitor in real-time
|
|
while [[ $capture_complete -ne 0 ]]; do
|
|
sleep 10
|
|
clear
|
|
echo "📊 HVAC Know It All - Live Progress Monitor"
|
|
echo "=================================================="
|
|
echo "🔍 Monitoring: $LOG_FILE"
|
|
echo "⏰ Updated: $(date)"
|
|
echo
|
|
|
|
extract_progress
|
|
capture_complete=$?
|
|
|
|
if [[ $capture_complete -eq 0 ]]; then
|
|
break
|
|
fi
|
|
|
|
echo
|
|
echo "🔄 Refreshing in 10 seconds... (Ctrl+C to stop)"
|
|
done
|
|
|
|
echo
|
|
echo "🎉 Monitoring complete!"
|
|
else
|
|
echo "💡 Tip: Use '$0 --live' for real-time monitoring"
|
|
fi |