Production Readiness Improvements: - Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM) - Enabled NAS synchronization in production runner with error handling - Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md) - Made systemd services portable (removed hardcoded user/paths) - Added environment variable validation on startup - Moved DISPLAY/XAUTHORITY to .env configuration Systemd Improvements: - Created template service file (@.service) for any user - Changed all paths to /opt/hvac-kia-content - Updated installation script for portable deployment - Fixed service dependencies and resource limits Documentation: - Created comprehensive PRODUCTION_TODO.md with 25 tasks - Added PRODUCTION_GUIDE.md with deployment instructions - Documented spec compliance gaps (65% complete) Remaining work includes retry logic, connection pooling, media downloads, and pytest test suite as documented in PRODUCTION_TODO.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
125 lines
No EOL
4.2 KiB
Python
125 lines
No EOL
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Detailed monitoring of backlog processing progress.
|
|
Tracks actual item counts and progress indicators.
|
|
"""
|
|
|
|
import time
|
|
import os
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import re
|
|
|
|
def count_items_in_markdown(file_path):
|
|
"""Count individual items in a markdown file."""
|
|
if not file_path.exists():
|
|
return 0
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
# Count items by looking for ID headers
|
|
item_count = len(re.findall(r'^# ID:', content, re.MULTILINE))
|
|
return item_count
|
|
except Exception as e:
|
|
print(f"Error reading {file_path}: {e}")
|
|
return 0
|
|
|
|
def get_log_stats(log_file):
|
|
"""Extract key statistics from log file."""
|
|
if not log_file.exists():
|
|
return {"size_mb": 0, "last_activity": "No log file", "key_stats": []}
|
|
|
|
try:
|
|
size_mb = log_file.stat().st_size / (1024 * 1024)
|
|
|
|
with open(log_file, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
|
|
# Look for key progress indicators
|
|
key_stats = []
|
|
recent_lines = lines[-10:] if len(lines) >= 10 else lines
|
|
|
|
for line in recent_lines:
|
|
# Look for total counts, page numbers, etc.
|
|
if any(keyword in line.lower() for keyword in ['total', 'fetched', 'found', 'page', 'completed']):
|
|
timestamp = line.split(' - ')[0] if ' - ' in line else ''
|
|
message = line.split(' - ')[-1].strip() if ' - ' in line else line.strip()
|
|
key_stats.append(f"{timestamp}: {message}")
|
|
|
|
last_activity = recent_lines[-1].strip() if recent_lines else "No activity"
|
|
|
|
return {
|
|
"size_mb": size_mb,
|
|
"last_activity": last_activity,
|
|
"key_stats": key_stats[-3:] # Last 3 important stats
|
|
}
|
|
except Exception as e:
|
|
return {"size_mb": 0, "last_activity": f"Error: {e}", "key_stats": []}
|
|
|
|
def detailed_progress_check():
|
|
"""Comprehensive progress check."""
|
|
print(f"\n{'='*80}")
|
|
print(f"COMPREHENSIVE BACKLOG PROGRESS - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print(f"{'='*80}")
|
|
|
|
log_dir = Path("test_logs/backlog")
|
|
data_dir = Path("test_data/backlog")
|
|
|
|
sources = {
|
|
"WordPress": "wordpress",
|
|
"Instagram": "instagram",
|
|
"MailChimp": "mailchimp",
|
|
"Podcast": "podcast",
|
|
"YouTube": "youtube",
|
|
"TikTok": "tiktok"
|
|
}
|
|
|
|
total_items = 0
|
|
|
|
for display_name, file_name in sources.items():
|
|
print(f"\n📊 {display_name.upper()}:")
|
|
print("-" * 50)
|
|
|
|
# Check log progress
|
|
log_file = log_dir / display_name / f"{file_name}.log"
|
|
log_stats = get_log_stats(log_file)
|
|
|
|
print(f" Log Size: {log_stats['size_mb']:.2f} MB")
|
|
|
|
if log_stats['key_stats']:
|
|
print(" Recent Progress:")
|
|
for stat in log_stats['key_stats']:
|
|
print(f" {stat}")
|
|
else:
|
|
print(f" Status: {log_stats['last_activity']}")
|
|
|
|
# Check output file
|
|
markdown_file = data_dir / f"{file_name}_backlog_test.md"
|
|
item_count = count_items_in_markdown(markdown_file)
|
|
|
|
if markdown_file.exists():
|
|
file_size_kb = markdown_file.stat().st_size / 1024
|
|
print(f" Output: {item_count} items, {file_size_kb:.1f} KB")
|
|
total_items += item_count
|
|
else:
|
|
print(" Output: No file generated yet")
|
|
|
|
print(f"\n🎯 SUMMARY:")
|
|
print(f" Total Items Processed: {total_items}")
|
|
print(f" Target Goal: 1000 items per source (6000 total)")
|
|
print(f" Progress: {(total_items/6000)*100:.1f}% of target")
|
|
|
|
return total_items
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
while True:
|
|
items = detailed_progress_check()
|
|
print(f"\n⏱️ Next check in 60 seconds... (Ctrl+C to stop)")
|
|
print(f"{'='*80}")
|
|
time.sleep(60)
|
|
except KeyboardInterrupt:
|
|
print("\n\n👋 Monitoring stopped.")
|
|
final_items = detailed_progress_check()
|
|
print(f"\n🏁 Final Status: {final_items} total items processed") |