hvac-kia-content/monitoring/dashboard_generator.py
Ben Reed dc57ce80d5 Add comprehensive monitoring and alerting system
- Created SystemMonitor class for health check monitoring
- Implemented system metrics collection (CPU, memory, disk, network)
- Added application metrics monitoring (scrapers, logs, data sizes)
- Built alert system with configurable thresholds
- Developed HTML dashboard generator with real-time charts
- Added systemd services for automated monitoring (15-min intervals)
- Created responsive web dashboard with Bootstrap and Chart.js
- Implemented automatic cleanup of old metric files
- Added comprehensive documentation and troubleshooting guide

Features:
- Real-time system resource monitoring
- Scraper performance tracking and alerts
- Interactive dashboard with trend charts
- Email-ready alert notifications
- Systemd integration for production deployment
- Security hardening with minimal privileges
- Auto-refresh dashboard every 5 minutes
- 7-day metric retention with automatic cleanup

Alert conditions:
- Critical: CPU >80%, Memory >85%, Disk >90%
- Warning: Scraper inactive >24h, Log files >100MB
- Error: Monitoring failures, configuration issues

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 21:35:28 -03:00

566 lines
No EOL
20 KiB
Python
Executable file

#!/usr/bin/env python3
"""
HTML Dashboard Generator for HVAC Know It All Content Aggregation System
Generates a web-based dashboard showing:
- System health overview
- Scraper performance metrics
- Resource usage trends
- Alert history
- Data collection statistics
"""
import json
import os
from pathlib import Path
from datetime import datetime, timedelta
from typing import Dict, List, Any
import logging
logger = logging.getLogger(__name__)
class DashboardGenerator:
"""Generate HTML dashboard from monitoring data"""
def __init__(self, monitoring_dir: Path = None):
self.monitoring_dir = monitoring_dir or Path("/opt/hvac-kia-content/monitoring")
self.metrics_dir = self.monitoring_dir / "metrics"
self.alerts_dir = self.monitoring_dir / "alerts"
self.dashboard_dir = self.monitoring_dir / "dashboard"
# Create dashboard directory
self.dashboard_dir.mkdir(parents=True, exist_ok=True)
def load_recent_metrics(self, metric_type: str, hours: int = 24) -> List[Dict[str, Any]]:
"""Load recent metrics of specified type"""
cutoff_time = datetime.now() - timedelta(hours=hours)
metrics = []
pattern = f"{metric_type}_*.json"
for metrics_file in sorted(self.metrics_dir.glob(pattern)):
try:
file_time = datetime.fromtimestamp(metrics_file.stat().st_mtime)
if file_time >= cutoff_time:
with open(metrics_file) as f:
data = json.load(f)
data['file_timestamp'] = file_time.isoformat()
metrics.append(data)
except Exception as e:
logger.warning(f"Error loading {metrics_file}: {e}")
return metrics
def load_recent_alerts(self, hours: int = 72) -> List[Dict[str, Any]]:
"""Load recent alerts"""
cutoff_time = datetime.now() - timedelta(hours=hours)
all_alerts = []
for alerts_file in sorted(self.alerts_dir.glob("alerts_*.json")):
try:
file_time = datetime.fromtimestamp(alerts_file.stat().st_mtime)
if file_time >= cutoff_time:
with open(alerts_file) as f:
alerts = json.load(f)
if isinstance(alerts, list):
all_alerts.extend(alerts)
else:
all_alerts.append(alerts)
except Exception as e:
logger.warning(f"Error loading {alerts_file}: {e}")
# Sort by timestamp
all_alerts.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
return all_alerts
def generate_system_charts_js(self, system_metrics: List[Dict[str, Any]]) -> str:
"""Generate JavaScript for system resource charts"""
if not system_metrics:
return ""
# Extract data for charts
timestamps = []
cpu_data = []
memory_data = []
disk_data = []
for metric in system_metrics[-50:]: # Last 50 data points
if 'system' in metric and 'timestamp' in metric:
timestamp = metric['timestamp'][:16] # YYYY-MM-DDTHH:MM
timestamps.append(f"'{timestamp}'")
sys_data = metric['system']
cpu_data.append(sys_data.get('cpu_percent', 0))
memory_data.append(sys_data.get('memory_percent', 0))
disk_data.append(sys_data.get('disk_percent', 0))
return f"""
// System Resource Charts
const systemTimestamps = [{', '.join(timestamps)}];
const cpuData = {cpu_data};
const memoryData = {memory_data};
const diskData = {disk_data};
// CPU Chart
const cpuCtx = document.getElementById('cpuChart').getContext('2d');
new Chart(cpuCtx, {{
type: 'line',
data: {{
labels: systemTimestamps,
datasets: [{{
label: 'CPU Usage (%)',
data: cpuData,
borderColor: 'rgb(255, 99, 132)',
backgroundColor: 'rgba(255, 99, 132, 0.2)',
tension: 0.1
}}]
}},
options: {{
responsive: true,
scales: {{
y: {{
beginAtZero: true,
max: 100
}}
}}
}}
}});
// Memory Chart
const memoryCtx = document.getElementById('memoryChart').getContext('2d');
new Chart(memoryCtx, {{
type: 'line',
data: {{
labels: systemTimestamps,
datasets: [{{
label: 'Memory Usage (%)',
data: memoryData,
borderColor: 'rgb(54, 162, 235)',
backgroundColor: 'rgba(54, 162, 235, 0.2)',
tension: 0.1
}}]
}},
options: {{
responsive: true,
scales: {{
y: {{
beginAtZero: true,
max: 100
}}
}}
}}
}});
// Disk Chart
const diskCtx = document.getElementById('diskChart').getContext('2d');
new Chart(diskCtx, {{
type: 'line',
data: {{
labels: systemTimestamps,
datasets: [{{
label: 'Disk Usage (%)',
data: diskData,
borderColor: 'rgb(255, 205, 86)',
backgroundColor: 'rgba(255, 205, 86, 0.2)',
tension: 0.1
}}]
}},
options: {{
responsive: true,
scales: {{
y: {{
beginAtZero: true,
max: 100
}}
}}
}}
}});
"""
def generate_scraper_charts_js(self, app_metrics: List[Dict[str, Any]]) -> str:
"""Generate JavaScript for scraper performance charts"""
if not app_metrics:
return ""
# Collect scraper data over time
scraper_data = {}
timestamps = []
for metric in app_metrics[-20:]: # Last 20 data points
if 'scrapers' in metric and 'timestamp' in metric:
timestamp = metric['timestamp'][:16] # YYYY-MM-DDTHH:MM
if timestamp not in timestamps:
timestamps.append(timestamp)
for scraper_name, scraper_info in metric['scrapers'].items():
if scraper_name not in scraper_data:
scraper_data[scraper_name] = []
scraper_data[scraper_name].append(scraper_info.get('last_item_count', 0))
# Generate datasets for each scraper
datasets = []
colors = [
'rgb(255, 99, 132)', 'rgb(54, 162, 235)', 'rgb(255, 205, 86)',
'rgb(75, 192, 192)', 'rgb(153, 102, 255)', 'rgb(255, 159, 64)'
]
for i, (scraper_name, data) in enumerate(scraper_data.items()):
color = colors[i % len(colors)]
datasets.append(f"""{{
label: '{scraper_name}',
data: {data[-len(timestamps):]},
borderColor: '{color}',
backgroundColor: '{color.replace("rgb", "rgba").replace(")", ", 0.2)")}',
tension: 0.1
}}""")
return f"""
// Scraper Performance Chart
const scraperTimestamps = {[f"'{ts}'" for ts in timestamps]};
const scraperCtx = document.getElementById('scraperChart').getContext('2d');
new Chart(scraperCtx, {{
type: 'line',
data: {{
labels: scraperTimestamps,
datasets: [{', '.join(datasets)}]
}},
options: {{
responsive: true,
scales: {{
y: {{
beginAtZero: true
}}
}}
}}
}});
"""
def generate_html_dashboard(self, system_metrics: List[Dict[str, Any]],
app_metrics: List[Dict[str, Any]],
alerts: List[Dict[str, Any]]) -> str:
"""Generate complete HTML dashboard"""
# Get latest metrics for current status
latest_system = system_metrics[-1] if system_metrics else {}
latest_app = app_metrics[-1] if app_metrics else {}
# Calculate health status
critical_alerts = [a for a in alerts if a.get('type') == 'CRITICAL']
warning_alerts = [a for a in alerts if a.get('type') == 'WARNING']
if critical_alerts:
health_status = "CRITICAL"
health_color = "#dc3545" # Red
elif warning_alerts:
health_status = "WARNING"
health_color = "#ffc107" # Yellow
else:
health_status = "HEALTHY"
health_color = "#28a745" # Green
# Generate system status cards
system_cards = ""
if 'system' in latest_system:
sys_data = latest_system['system']
system_cards = f"""
<div class="col-md-3">
<div class="card">
<div class="card-body">
<h5 class="card-title">CPU Usage</h5>
<h2 class="text-primary">{sys_data.get('cpu_percent', 'N/A'):.1f}%</h2>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<div class="card-body">
<h5 class="card-title">Memory Usage</h5>
<h2 class="text-info">{sys_data.get('memory_percent', 'N/A'):.1f}%</h2>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<div class="card-body">
<h5 class="card-title">Disk Usage</h5>
<h2 class="text-warning">{sys_data.get('disk_percent', 'N/A'):.1f}%</h2>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<div class="card-body">
<h5 class="card-title">Uptime</h5>
<h2 class="text-success">{sys_data.get('uptime_hours', 0):.1f}h</h2>
</div>
</div>
</div>
"""
# Generate scraper status table
scraper_rows = ""
if 'scrapers' in latest_app:
for name, data in latest_app['scrapers'].items():
last_count = data.get('last_item_count', 0)
minutes_since = data.get('minutes_since_update')
if minutes_since is not None:
if minutes_since < 60:
time_str = f"{minutes_since:.0f}m ago"
status_color = "success"
elif minutes_since < 1440: # 24 hours
time_str = f"{minutes_since/60:.1f}h ago"
status_color = "warning"
else:
time_str = f"{minutes_since/1440:.1f}d ago"
status_color = "danger"
else:
time_str = "Never"
status_color = "secondary"
scraper_rows += f"""
<tr>
<td>{name.title()}</td>
<td>{last_count}</td>
<td><span class="badge bg-{status_color}">{time_str}</span></td>
<td>{data.get('last_id', 'N/A')}</td>
</tr>
"""
# Generate alerts table
alert_rows = ""
for alert in alerts[:10]: # Show last 10 alerts
alert_type = alert.get('type', 'INFO')
if alert_type == 'CRITICAL':
badge_class = "bg-danger"
elif alert_type == 'WARNING':
badge_class = "bg-warning"
else:
badge_class = "bg-info"
timestamp = alert.get('timestamp', '')[:19].replace('T', ' ')
alert_rows += f"""
<tr>
<td>{timestamp}</td>
<td><span class="badge {badge_class}">{alert_type}</span></td>
<td>{alert.get('component', 'N/A')}</td>
<td>{alert.get('message', 'N/A')}</td>
</tr>
"""
# Generate JavaScript for charts
system_charts_js = self.generate_system_charts_js(system_metrics)
scraper_charts_js = self.generate_scraper_charts_js(app_metrics)
html = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>HVAC Know It All - System Dashboard</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<style>
.status-indicator {{
width: 20px;
height: 20px;
border-radius: 50%;
display: inline-block;
margin-right: 10px;
}}
.chart-container {{
position: relative;
height: 300px;
margin-bottom: 20px;
}}
.refresh-time {{
font-size: 0.8em;
color: #6c757d;
}}
</style>
</head>
<body>
<div class="container-fluid">
<div class="row">
<div class="col-12">
<nav class="navbar navbar-dark bg-dark">
<div class="container-fluid">
<span class="navbar-brand mb-0 h1">
<span class="status-indicator" style="background-color: {health_color};"></span>
HVAC Know It All - System Dashboard
</span>
<span class="navbar-text refresh-time">
Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
</span>
</div>
</nav>
</div>
</div>
<!-- Health Status -->
<div class="row mt-3">
<div class="col-12">
<div class="alert alert-{'danger' if health_status == 'CRITICAL' else 'warning' if health_status == 'WARNING' else 'success'}" role="alert">
<h4 class="alert-heading">System Status: {health_status}</h4>
<p>Total Alerts: {len(alerts)} | Critical: {len(critical_alerts)} | Warnings: {len(warning_alerts)}</p>
</div>
</div>
</div>
<!-- System Metrics -->
<div class="row mt-3">
<div class="col-12">
<h3>System Resources</h3>
</div>
{system_cards}
</div>
<!-- Charts -->
<div class="row mt-4">
<div class="col-md-4">
<h5>CPU Usage Trend</h5>
<div class="chart-container">
<canvas id="cpuChart"></canvas>
</div>
</div>
<div class="col-md-4">
<h5>Memory Usage Trend</h5>
<div class="chart-container">
<canvas id="memoryChart"></canvas>
</div>
</div>
<div class="col-md-4">
<h5>Disk Usage Trend</h5>
<div class="chart-container">
<canvas id="diskChart"></canvas>
</div>
</div>
</div>
<!-- Scraper Performance -->
<div class="row mt-4">
<div class="col-md-8">
<h5>Scraper Item Collection Trend</h5>
<div class="chart-container">
<canvas id="scraperChart"></canvas>
</div>
</div>
<div class="col-md-4">
<h5>Scraper Status</h5>
<div class="table-responsive">
<table class="table table-sm table-striped">
<thead>
<tr>
<th>Scraper</th>
<th>Last Items</th>
<th>Last Update</th>
<th>Last ID</th>
</tr>
</thead>
<tbody>
{scraper_rows}
</tbody>
</table>
</div>
</div>
</div>
<!-- Recent Alerts -->
<div class="row mt-4">
<div class="col-12">
<h5>Recent Alerts</h5>
<div class="table-responsive">
<table class="table table-sm table-striped">
<thead>
<tr>
<th>Timestamp</th>
<th>Type</th>
<th>Component</th>
<th>Message</th>
</tr>
</thead>
<tbody>
{alert_rows}
</tbody>
</table>
</div>
</div>
</div>
<div class="row mt-4 mb-3">
<div class="col-12">
<p class="text-muted text-center">
Dashboard auto-refreshes every 5 minutes.
<a href="javascript:location.reload()">Refresh Now</a>
</p>
</div>
</div>
</div>
<script>
{system_charts_js}
{scraper_charts_js}
// Auto-refresh every 5 minutes
setTimeout(function() {{
location.reload();
}}, 300000);
</script>
</body>
</html>
"""
return html
def generate_dashboard(self):
"""Generate and save the HTML dashboard"""
logger.info("Generating HTML dashboard...")
# Load recent metrics and alerts
system_metrics = self.load_recent_metrics('system', 24)
app_metrics = self.load_recent_metrics('application', 24)
alerts = self.load_recent_alerts(72)
# Generate HTML
html_content = self.generate_html_dashboard(system_metrics, app_metrics, alerts)
# Save dashboard
dashboard_file = self.dashboard_dir / "index.html"
try:
with open(dashboard_file, 'w') as f:
f.write(html_content)
logger.info(f"Dashboard saved to {dashboard_file}")
# Also create a timestamped version
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = self.dashboard_dir / f"dashboard_{timestamp}.html"
with open(backup_file, 'w') as f:
f.write(html_content)
return dashboard_file
except Exception as e:
logger.error(f"Error saving dashboard: {e}")
return None
def main():
"""Generate dashboard"""
generator = DashboardGenerator()
dashboard_file = generator.generate_dashboard()
if dashboard_file:
print(f"Dashboard generated: {dashboard_file}")
print(f"View at: file://{dashboard_file.absolute()}")
return True
else:
print("Failed to generate dashboard")
return False
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
success = main()
exit(0 if success else 1)