hvac-kia-content/detailed_monitor.py

#!/usr/bin/env python3
"""
Detailed monitoring of backlog processing progress.
Tracks actual item counts and progress indicators.
"""

import time
import os
from pathlib import Path
from datetime import datetime
import re

def count_items_in_markdown(file_path):
    """Count individual items in a markdown file."""
    if not file_path.exists():
        return 0

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            # Count items by looking for ID headers
            item_count = len(re.findall(r'^# ID:', content, re.MULTILINE))
            return item_count
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return 0

def get_log_stats(log_file):
    """Extract key statistics from log file."""
    if not log_file.exists():
        return {"size_mb": 0, "last_activity": "No log file", "key_stats": []}

    try:
        size_mb = log_file.stat().st_size / (1024 * 1024)

        with open(log_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()

        # Look for key progress indicators
        key_stats = []
        recent_lines = lines[-10:] if len(lines) >= 10 else lines

        for line in recent_lines:
            # Look for total counts, page numbers, etc.
            if any(keyword in line.lower() for keyword in ['total', 'fetched', 'found', 'page', 'completed']):
                timestamp = line.split(' - ')[0] if ' - ' in line else ''
                message = line.split(' - ')[-1].strip() if ' - ' in line else line.strip()
                key_stats.append(f"{timestamp}: {message}")

        last_activity = recent_lines[-1].strip() if recent_lines else "No activity"

        return {
            "size_mb": size_mb,
            "last_activity": last_activity,
            "key_stats": key_stats[-3:]  # Last 3 important stats
        }
    except Exception as e:
        return {"size_mb": 0, "last_activity": f"Error: {e}", "key_stats": []}

def detailed_progress_check():
    """Comprehensive progress check."""
    print(f"\n{'='*80}")
    print(f"COMPREHENSIVE BACKLOG PROGRESS - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"{'='*80}")

    log_dir = Path("test_logs/backlog")
    data_dir = Path("test_data/backlog")

    sources = {
        "WordPress": "wordpress",
        "Instagram": "instagram",
        "MailChimp": "mailchimp",
        "Podcast": "podcast",
        "YouTube": "youtube",
        "TikTok": "tiktok"
    }

    total_items = 0

    for display_name, file_name in sources.items():
        print(f"\n📊 {display_name.upper()}:")
        print("-" * 50)

        # Check log progress
        log_file = log_dir / display_name / f"{file_name}.log"
        log_stats = get_log_stats(log_file)

        print(f"  Log Size: {log_stats['size_mb']:.2f} MB")

        if log_stats['key_stats']:
            print("  Recent Progress:")
            for stat in log_stats['key_stats']:
                print(f"    {stat}")
        else:
            print(f"  Status: {log_stats['last_activity']}")

        # Check output file
        markdown_file = data_dir / f"{file_name}_backlog_test.md"
        item_count = count_items_in_markdown(markdown_file)

        if markdown_file.exists():
            file_size_kb = markdown_file.stat().st_size / 1024
            print(f"  Output: {item_count} items, {file_size_kb:.1f} KB")
            total_items += item_count
        else:
            print("  Output: No file generated yet")

    print(f"\n🎯 SUMMARY:")
    print(f"  Total Items Processed: {total_items}")
    print(f"  Target Goal: 1000 items per source (6000 total)")
    print(f"  Progress: {(total_items/6000)*100:.1f}% of target")

    return total_items

if __name__ == "__main__":
    try:
        while True:
            items = detailed_progress_check()
            print(f"\n⏱️  Next check in 60 seconds... (Ctrl+C to stop)")
            print(f"{'='*80}")
            time.sleep(60)
    except KeyboardInterrupt:
        print("\n\n👋 Monitoring stopped.")
        final_items = detailed_progress_check()
        print(f"\n🏁 Final Status: {final_items} total items processed")