#!/usr/bin/env python3 """ Detailed monitoring of backlog processing progress. Tracks actual item counts and progress indicators. """ import time import os from pathlib import Path from datetime import datetime import re def count_items_in_markdown(file_path): """Count individual items in a markdown file.""" if not file_path.exists(): return 0 try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Count items by looking for ID headers item_count = len(re.findall(r'^# ID:', content, re.MULTILINE)) return item_count except Exception as e: print(f"Error reading {file_path}: {e}") return 0 def get_log_stats(log_file): """Extract key statistics from log file.""" if not log_file.exists(): return {"size_mb": 0, "last_activity": "No log file", "key_stats": []} try: size_mb = log_file.stat().st_size / (1024 * 1024) with open(log_file, 'r', encoding='utf-8') as f: lines = f.readlines() # Look for key progress indicators key_stats = [] recent_lines = lines[-10:] if len(lines) >= 10 else lines for line in recent_lines: # Look for total counts, page numbers, etc. if any(keyword in line.lower() for keyword in ['total', 'fetched', 'found', 'page', 'completed']): timestamp = line.split(' - ')[0] if ' - ' in line else '' message = line.split(' - ')[-1].strip() if ' - ' in line else line.strip() key_stats.append(f"{timestamp}: {message}") last_activity = recent_lines[-1].strip() if recent_lines else "No activity" return { "size_mb": size_mb, "last_activity": last_activity, "key_stats": key_stats[-3:] # Last 3 important stats } except Exception as e: return {"size_mb": 0, "last_activity": f"Error: {e}", "key_stats": []} def detailed_progress_check(): """Comprehensive progress check.""" print(f"\n{'='*80}") print(f"COMPREHENSIVE BACKLOG PROGRESS - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"{'='*80}") log_dir = Path("test_logs/backlog") data_dir = Path("test_data/backlog") sources = { "WordPress": "wordpress", "Instagram": "instagram", "MailChimp": "mailchimp", "Podcast": "podcast", "YouTube": "youtube", "TikTok": "tiktok" } total_items = 0 for display_name, file_name in sources.items(): print(f"\n📊 {display_name.upper()}:") print("-" * 50) # Check log progress log_file = log_dir / display_name / f"{file_name}.log" log_stats = get_log_stats(log_file) print(f" Log Size: {log_stats['size_mb']:.2f} MB") if log_stats['key_stats']: print(" Recent Progress:") for stat in log_stats['key_stats']: print(f" {stat}") else: print(f" Status: {log_stats['last_activity']}") # Check output file markdown_file = data_dir / f"{file_name}_backlog_test.md" item_count = count_items_in_markdown(markdown_file) if markdown_file.exists(): file_size_kb = markdown_file.stat().st_size / 1024 print(f" Output: {item_count} items, {file_size_kb:.1f} KB") total_items += item_count else: print(" Output: No file generated yet") print(f"\n🎯 SUMMARY:") print(f" Total Items Processed: {total_items}") print(f" Target Goal: 1000 items per source (6000 total)") print(f" Progress: {(total_items/6000)*100:.1f}% of target") return total_items if __name__ == "__main__": try: while True: items = detailed_progress_check() print(f"\n⏱️ Next check in 60 seconds... (Ctrl+C to stop)") print(f"{'='*80}") time.sleep(60) except KeyboardInterrupt: print("\n\n👋 Monitoring stopped.") final_items = detailed_progress_check() print(f"\n🏁 Final Status: {final_items} total items processed")