hvac-kia-content/detailed_monitor.py
Ben Reed 05218a873b Fix critical production issues and improve spec compliance
Production Readiness Improvements:
- Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM)
- Enabled NAS synchronization in production runner with error handling
- Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md)
- Made systemd services portable (removed hardcoded user/paths)
- Added environment variable validation on startup
- Moved DISPLAY/XAUTHORITY to .env configuration

Systemd Improvements:
- Created template service file (@.service) for any user
- Changed all paths to /opt/hvac-kia-content
- Updated installation script for portable deployment
- Fixed service dependencies and resource limits

Documentation:
- Created comprehensive PRODUCTION_TODO.md with 25 tasks
- Added PRODUCTION_GUIDE.md with deployment instructions
- Documented spec compliance gaps (65% complete)

Remaining work includes retry logic, connection pooling, media downloads,
and pytest test suite as documented in PRODUCTION_TODO.md

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 20:07:55 -03:00

125 lines
No EOL
4.2 KiB
Python

#!/usr/bin/env python3
"""
Detailed monitoring of backlog processing progress.
Tracks actual item counts and progress indicators.
"""
import time
import os
from pathlib import Path
from datetime import datetime
import re
def count_items_in_markdown(file_path):
"""Count individual items in a markdown file."""
if not file_path.exists():
return 0
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Count items by looking for ID headers
item_count = len(re.findall(r'^# ID:', content, re.MULTILINE))
return item_count
except Exception as e:
print(f"Error reading {file_path}: {e}")
return 0
def get_log_stats(log_file):
"""Extract key statistics from log file."""
if not log_file.exists():
return {"size_mb": 0, "last_activity": "No log file", "key_stats": []}
try:
size_mb = log_file.stat().st_size / (1024 * 1024)
with open(log_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Look for key progress indicators
key_stats = []
recent_lines = lines[-10:] if len(lines) >= 10 else lines
for line in recent_lines:
# Look for total counts, page numbers, etc.
if any(keyword in line.lower() for keyword in ['total', 'fetched', 'found', 'page', 'completed']):
timestamp = line.split(' - ')[0] if ' - ' in line else ''
message = line.split(' - ')[-1].strip() if ' - ' in line else line.strip()
key_stats.append(f"{timestamp}: {message}")
last_activity = recent_lines[-1].strip() if recent_lines else "No activity"
return {
"size_mb": size_mb,
"last_activity": last_activity,
"key_stats": key_stats[-3:] # Last 3 important stats
}
except Exception as e:
return {"size_mb": 0, "last_activity": f"Error: {e}", "key_stats": []}
def detailed_progress_check():
"""Comprehensive progress check."""
print(f"\n{'='*80}")
print(f"COMPREHENSIVE BACKLOG PROGRESS - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*80}")
log_dir = Path("test_logs/backlog")
data_dir = Path("test_data/backlog")
sources = {
"WordPress": "wordpress",
"Instagram": "instagram",
"MailChimp": "mailchimp",
"Podcast": "podcast",
"YouTube": "youtube",
"TikTok": "tiktok"
}
total_items = 0
for display_name, file_name in sources.items():
print(f"\n📊 {display_name.upper()}:")
print("-" * 50)
# Check log progress
log_file = log_dir / display_name / f"{file_name}.log"
log_stats = get_log_stats(log_file)
print(f" Log Size: {log_stats['size_mb']:.2f} MB")
if log_stats['key_stats']:
print(" Recent Progress:")
for stat in log_stats['key_stats']:
print(f" {stat}")
else:
print(f" Status: {log_stats['last_activity']}")
# Check output file
markdown_file = data_dir / f"{file_name}_backlog_test.md"
item_count = count_items_in_markdown(markdown_file)
if markdown_file.exists():
file_size_kb = markdown_file.stat().st_size / 1024
print(f" Output: {item_count} items, {file_size_kb:.1f} KB")
total_items += item_count
else:
print(" Output: No file generated yet")
print(f"\n🎯 SUMMARY:")
print(f" Total Items Processed: {total_items}")
print(f" Target Goal: 1000 items per source (6000 total)")
print(f" Progress: {(total_items/6000)*100:.1f}% of target")
return total_items
if __name__ == "__main__":
try:
while True:
items = detailed_progress_check()
print(f"\n⏱️ Next check in 60 seconds... (Ctrl+C to stop)")
print(f"{'='*80}")
time.sleep(60)
except KeyboardInterrupt:
print("\n\n👋 Monitoring stopped.")
final_items = detailed_progress_check()
print(f"\n🏁 Final Status: {final_items} total items processed")