- Created SystemMonitor class for health check monitoring - Implemented system metrics collection (CPU, memory, disk, network) - Added application metrics monitoring (scrapers, logs, data sizes) - Built alert system with configurable thresholds - Developed HTML dashboard generator with real-time charts - Added systemd services for automated monitoring (15-min intervals) - Created responsive web dashboard with Bootstrap and Chart.js - Implemented automatic cleanup of old metric files - Added comprehensive documentation and troubleshooting guide Features: - Real-time system resource monitoring - Scraper performance tracking and alerts - Interactive dashboard with trend charts - Email-ready alert notifications - Systemd integration for production deployment - Security hardening with minimal privileges - Auto-refresh dashboard every 5 minutes - 7-day metric retention with automatic cleanup Alert conditions: - Critical: CPU >80%, Memory >85%, Disk >90% - Warning: Scraper inactive >24h, Log files >100MB - Error: Monitoring failures, configuration issues 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
566 lines
No EOL
20 KiB
Python
Executable file
566 lines
No EOL
20 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
HTML Dashboard Generator for HVAC Know It All Content Aggregation System
|
|
|
|
Generates a web-based dashboard showing:
|
|
- System health overview
|
|
- Scraper performance metrics
|
|
- Resource usage trends
|
|
- Alert history
|
|
- Data collection statistics
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Any
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DashboardGenerator:
|
|
"""Generate HTML dashboard from monitoring data"""
|
|
|
|
def __init__(self, monitoring_dir: Path = None):
|
|
self.monitoring_dir = monitoring_dir or Path("/opt/hvac-kia-content/monitoring")
|
|
self.metrics_dir = self.monitoring_dir / "metrics"
|
|
self.alerts_dir = self.monitoring_dir / "alerts"
|
|
self.dashboard_dir = self.monitoring_dir / "dashboard"
|
|
|
|
# Create dashboard directory
|
|
self.dashboard_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def load_recent_metrics(self, metric_type: str, hours: int = 24) -> List[Dict[str, Any]]:
|
|
"""Load recent metrics of specified type"""
|
|
cutoff_time = datetime.now() - timedelta(hours=hours)
|
|
metrics = []
|
|
|
|
pattern = f"{metric_type}_*.json"
|
|
for metrics_file in sorted(self.metrics_dir.glob(pattern)):
|
|
try:
|
|
file_time = datetime.fromtimestamp(metrics_file.stat().st_mtime)
|
|
if file_time >= cutoff_time:
|
|
with open(metrics_file) as f:
|
|
data = json.load(f)
|
|
data['file_timestamp'] = file_time.isoformat()
|
|
metrics.append(data)
|
|
except Exception as e:
|
|
logger.warning(f"Error loading {metrics_file}: {e}")
|
|
|
|
return metrics
|
|
|
|
def load_recent_alerts(self, hours: int = 72) -> List[Dict[str, Any]]:
|
|
"""Load recent alerts"""
|
|
cutoff_time = datetime.now() - timedelta(hours=hours)
|
|
all_alerts = []
|
|
|
|
for alerts_file in sorted(self.alerts_dir.glob("alerts_*.json")):
|
|
try:
|
|
file_time = datetime.fromtimestamp(alerts_file.stat().st_mtime)
|
|
if file_time >= cutoff_time:
|
|
with open(alerts_file) as f:
|
|
alerts = json.load(f)
|
|
if isinstance(alerts, list):
|
|
all_alerts.extend(alerts)
|
|
else:
|
|
all_alerts.append(alerts)
|
|
except Exception as e:
|
|
logger.warning(f"Error loading {alerts_file}: {e}")
|
|
|
|
# Sort by timestamp
|
|
all_alerts.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
|
|
return all_alerts
|
|
|
|
def generate_system_charts_js(self, system_metrics: List[Dict[str, Any]]) -> str:
|
|
"""Generate JavaScript for system resource charts"""
|
|
if not system_metrics:
|
|
return ""
|
|
|
|
# Extract data for charts
|
|
timestamps = []
|
|
cpu_data = []
|
|
memory_data = []
|
|
disk_data = []
|
|
|
|
for metric in system_metrics[-50:]: # Last 50 data points
|
|
if 'system' in metric and 'timestamp' in metric:
|
|
timestamp = metric['timestamp'][:16] # YYYY-MM-DDTHH:MM
|
|
timestamps.append(f"'{timestamp}'")
|
|
|
|
sys_data = metric['system']
|
|
cpu_data.append(sys_data.get('cpu_percent', 0))
|
|
memory_data.append(sys_data.get('memory_percent', 0))
|
|
disk_data.append(sys_data.get('disk_percent', 0))
|
|
|
|
return f"""
|
|
// System Resource Charts
|
|
const systemTimestamps = [{', '.join(timestamps)}];
|
|
const cpuData = {cpu_data};
|
|
const memoryData = {memory_data};
|
|
const diskData = {disk_data};
|
|
|
|
// CPU Chart
|
|
const cpuCtx = document.getElementById('cpuChart').getContext('2d');
|
|
new Chart(cpuCtx, {{
|
|
type: 'line',
|
|
data: {{
|
|
labels: systemTimestamps,
|
|
datasets: [{{
|
|
label: 'CPU Usage (%)',
|
|
data: cpuData,
|
|
borderColor: 'rgb(255, 99, 132)',
|
|
backgroundColor: 'rgba(255, 99, 132, 0.2)',
|
|
tension: 0.1
|
|
}}]
|
|
}},
|
|
options: {{
|
|
responsive: true,
|
|
scales: {{
|
|
y: {{
|
|
beginAtZero: true,
|
|
max: 100
|
|
}}
|
|
}}
|
|
}}
|
|
}});
|
|
|
|
// Memory Chart
|
|
const memoryCtx = document.getElementById('memoryChart').getContext('2d');
|
|
new Chart(memoryCtx, {{
|
|
type: 'line',
|
|
data: {{
|
|
labels: systemTimestamps,
|
|
datasets: [{{
|
|
label: 'Memory Usage (%)',
|
|
data: memoryData,
|
|
borderColor: 'rgb(54, 162, 235)',
|
|
backgroundColor: 'rgba(54, 162, 235, 0.2)',
|
|
tension: 0.1
|
|
}}]
|
|
}},
|
|
options: {{
|
|
responsive: true,
|
|
scales: {{
|
|
y: {{
|
|
beginAtZero: true,
|
|
max: 100
|
|
}}
|
|
}}
|
|
}}
|
|
}});
|
|
|
|
// Disk Chart
|
|
const diskCtx = document.getElementById('diskChart').getContext('2d');
|
|
new Chart(diskCtx, {{
|
|
type: 'line',
|
|
data: {{
|
|
labels: systemTimestamps,
|
|
datasets: [{{
|
|
label: 'Disk Usage (%)',
|
|
data: diskData,
|
|
borderColor: 'rgb(255, 205, 86)',
|
|
backgroundColor: 'rgba(255, 205, 86, 0.2)',
|
|
tension: 0.1
|
|
}}]
|
|
}},
|
|
options: {{
|
|
responsive: true,
|
|
scales: {{
|
|
y: {{
|
|
beginAtZero: true,
|
|
max: 100
|
|
}}
|
|
}}
|
|
}}
|
|
}});
|
|
"""
|
|
|
|
def generate_scraper_charts_js(self, app_metrics: List[Dict[str, Any]]) -> str:
|
|
"""Generate JavaScript for scraper performance charts"""
|
|
if not app_metrics:
|
|
return ""
|
|
|
|
# Collect scraper data over time
|
|
scraper_data = {}
|
|
timestamps = []
|
|
|
|
for metric in app_metrics[-20:]: # Last 20 data points
|
|
if 'scrapers' in metric and 'timestamp' in metric:
|
|
timestamp = metric['timestamp'][:16] # YYYY-MM-DDTHH:MM
|
|
if timestamp not in timestamps:
|
|
timestamps.append(timestamp)
|
|
|
|
for scraper_name, scraper_info in metric['scrapers'].items():
|
|
if scraper_name not in scraper_data:
|
|
scraper_data[scraper_name] = []
|
|
scraper_data[scraper_name].append(scraper_info.get('last_item_count', 0))
|
|
|
|
# Generate datasets for each scraper
|
|
datasets = []
|
|
colors = [
|
|
'rgb(255, 99, 132)', 'rgb(54, 162, 235)', 'rgb(255, 205, 86)',
|
|
'rgb(75, 192, 192)', 'rgb(153, 102, 255)', 'rgb(255, 159, 64)'
|
|
]
|
|
|
|
for i, (scraper_name, data) in enumerate(scraper_data.items()):
|
|
color = colors[i % len(colors)]
|
|
datasets.append(f"""{{
|
|
label: '{scraper_name}',
|
|
data: {data[-len(timestamps):]},
|
|
borderColor: '{color}',
|
|
backgroundColor: '{color.replace("rgb", "rgba").replace(")", ", 0.2)")}',
|
|
tension: 0.1
|
|
}}""")
|
|
|
|
return f"""
|
|
// Scraper Performance Chart
|
|
const scraperTimestamps = {[f"'{ts}'" for ts in timestamps]};
|
|
const scraperCtx = document.getElementById('scraperChart').getContext('2d');
|
|
new Chart(scraperCtx, {{
|
|
type: 'line',
|
|
data: {{
|
|
labels: scraperTimestamps,
|
|
datasets: [{', '.join(datasets)}]
|
|
}},
|
|
options: {{
|
|
responsive: true,
|
|
scales: {{
|
|
y: {{
|
|
beginAtZero: true
|
|
}}
|
|
}}
|
|
}}
|
|
}});
|
|
"""
|
|
|
|
def generate_html_dashboard(self, system_metrics: List[Dict[str, Any]],
|
|
app_metrics: List[Dict[str, Any]],
|
|
alerts: List[Dict[str, Any]]) -> str:
|
|
"""Generate complete HTML dashboard"""
|
|
|
|
# Get latest metrics for current status
|
|
latest_system = system_metrics[-1] if system_metrics else {}
|
|
latest_app = app_metrics[-1] if app_metrics else {}
|
|
|
|
# Calculate health status
|
|
critical_alerts = [a for a in alerts if a.get('type') == 'CRITICAL']
|
|
warning_alerts = [a for a in alerts if a.get('type') == 'WARNING']
|
|
|
|
if critical_alerts:
|
|
health_status = "CRITICAL"
|
|
health_color = "#dc3545" # Red
|
|
elif warning_alerts:
|
|
health_status = "WARNING"
|
|
health_color = "#ffc107" # Yellow
|
|
else:
|
|
health_status = "HEALTHY"
|
|
health_color = "#28a745" # Green
|
|
|
|
# Generate system status cards
|
|
system_cards = ""
|
|
if 'system' in latest_system:
|
|
sys_data = latest_system['system']
|
|
system_cards = f"""
|
|
<div class="col-md-3">
|
|
<div class="card">
|
|
<div class="card-body">
|
|
<h5 class="card-title">CPU Usage</h5>
|
|
<h2 class="text-primary">{sys_data.get('cpu_percent', 'N/A'):.1f}%</h2>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-3">
|
|
<div class="card">
|
|
<div class="card-body">
|
|
<h5 class="card-title">Memory Usage</h5>
|
|
<h2 class="text-info">{sys_data.get('memory_percent', 'N/A'):.1f}%</h2>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-3">
|
|
<div class="card">
|
|
<div class="card-body">
|
|
<h5 class="card-title">Disk Usage</h5>
|
|
<h2 class="text-warning">{sys_data.get('disk_percent', 'N/A'):.1f}%</h2>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-3">
|
|
<div class="card">
|
|
<div class="card-body">
|
|
<h5 class="card-title">Uptime</h5>
|
|
<h2 class="text-success">{sys_data.get('uptime_hours', 0):.1f}h</h2>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
"""
|
|
|
|
# Generate scraper status table
|
|
scraper_rows = ""
|
|
if 'scrapers' in latest_app:
|
|
for name, data in latest_app['scrapers'].items():
|
|
last_count = data.get('last_item_count', 0)
|
|
minutes_since = data.get('minutes_since_update')
|
|
|
|
if minutes_since is not None:
|
|
if minutes_since < 60:
|
|
time_str = f"{minutes_since:.0f}m ago"
|
|
status_color = "success"
|
|
elif minutes_since < 1440: # 24 hours
|
|
time_str = f"{minutes_since/60:.1f}h ago"
|
|
status_color = "warning"
|
|
else:
|
|
time_str = f"{minutes_since/1440:.1f}d ago"
|
|
status_color = "danger"
|
|
else:
|
|
time_str = "Never"
|
|
status_color = "secondary"
|
|
|
|
scraper_rows += f"""
|
|
<tr>
|
|
<td>{name.title()}</td>
|
|
<td>{last_count}</td>
|
|
<td><span class="badge bg-{status_color}">{time_str}</span></td>
|
|
<td>{data.get('last_id', 'N/A')}</td>
|
|
</tr>
|
|
"""
|
|
|
|
# Generate alerts table
|
|
alert_rows = ""
|
|
for alert in alerts[:10]: # Show last 10 alerts
|
|
alert_type = alert.get('type', 'INFO')
|
|
if alert_type == 'CRITICAL':
|
|
badge_class = "bg-danger"
|
|
elif alert_type == 'WARNING':
|
|
badge_class = "bg-warning"
|
|
else:
|
|
badge_class = "bg-info"
|
|
|
|
timestamp = alert.get('timestamp', '')[:19].replace('T', ' ')
|
|
|
|
alert_rows += f"""
|
|
<tr>
|
|
<td>{timestamp}</td>
|
|
<td><span class="badge {badge_class}">{alert_type}</span></td>
|
|
<td>{alert.get('component', 'N/A')}</td>
|
|
<td>{alert.get('message', 'N/A')}</td>
|
|
</tr>
|
|
"""
|
|
|
|
# Generate JavaScript for charts
|
|
system_charts_js = self.generate_system_charts_js(system_metrics)
|
|
scraper_charts_js = self.generate_scraper_charts_js(app_metrics)
|
|
|
|
html = f"""
|
|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>HVAC Know It All - System Dashboard</title>
|
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
|
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
|
<style>
|
|
.status-indicator {{
|
|
width: 20px;
|
|
height: 20px;
|
|
border-radius: 50%;
|
|
display: inline-block;
|
|
margin-right: 10px;
|
|
}}
|
|
.chart-container {{
|
|
position: relative;
|
|
height: 300px;
|
|
margin-bottom: 20px;
|
|
}}
|
|
.refresh-time {{
|
|
font-size: 0.8em;
|
|
color: #6c757d;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container-fluid">
|
|
<div class="row">
|
|
<div class="col-12">
|
|
<nav class="navbar navbar-dark bg-dark">
|
|
<div class="container-fluid">
|
|
<span class="navbar-brand mb-0 h1">
|
|
<span class="status-indicator" style="background-color: {health_color};"></span>
|
|
HVAC Know It All - System Dashboard
|
|
</span>
|
|
<span class="navbar-text refresh-time">
|
|
Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
</span>
|
|
</div>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Health Status -->
|
|
<div class="row mt-3">
|
|
<div class="col-12">
|
|
<div class="alert alert-{'danger' if health_status == 'CRITICAL' else 'warning' if health_status == 'WARNING' else 'success'}" role="alert">
|
|
<h4 class="alert-heading">System Status: {health_status}</h4>
|
|
<p>Total Alerts: {len(alerts)} | Critical: {len(critical_alerts)} | Warnings: {len(warning_alerts)}</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- System Metrics -->
|
|
<div class="row mt-3">
|
|
<div class="col-12">
|
|
<h3>System Resources</h3>
|
|
</div>
|
|
{system_cards}
|
|
</div>
|
|
|
|
<!-- Charts -->
|
|
<div class="row mt-4">
|
|
<div class="col-md-4">
|
|
<h5>CPU Usage Trend</h5>
|
|
<div class="chart-container">
|
|
<canvas id="cpuChart"></canvas>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-4">
|
|
<h5>Memory Usage Trend</h5>
|
|
<div class="chart-container">
|
|
<canvas id="memoryChart"></canvas>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-4">
|
|
<h5>Disk Usage Trend</h5>
|
|
<div class="chart-container">
|
|
<canvas id="diskChart"></canvas>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Scraper Performance -->
|
|
<div class="row mt-4">
|
|
<div class="col-md-8">
|
|
<h5>Scraper Item Collection Trend</h5>
|
|
<div class="chart-container">
|
|
<canvas id="scraperChart"></canvas>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-4">
|
|
<h5>Scraper Status</h5>
|
|
<div class="table-responsive">
|
|
<table class="table table-sm table-striped">
|
|
<thead>
|
|
<tr>
|
|
<th>Scraper</th>
|
|
<th>Last Items</th>
|
|
<th>Last Update</th>
|
|
<th>Last ID</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{scraper_rows}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Recent Alerts -->
|
|
<div class="row mt-4">
|
|
<div class="col-12">
|
|
<h5>Recent Alerts</h5>
|
|
<div class="table-responsive">
|
|
<table class="table table-sm table-striped">
|
|
<thead>
|
|
<tr>
|
|
<th>Timestamp</th>
|
|
<th>Type</th>
|
|
<th>Component</th>
|
|
<th>Message</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{alert_rows}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="row mt-4 mb-3">
|
|
<div class="col-12">
|
|
<p class="text-muted text-center">
|
|
Dashboard auto-refreshes every 5 minutes.
|
|
<a href="javascript:location.reload()">Refresh Now</a>
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<script>
|
|
{system_charts_js}
|
|
{scraper_charts_js}
|
|
|
|
// Auto-refresh every 5 minutes
|
|
setTimeout(function() {{
|
|
location.reload();
|
|
}}, 300000);
|
|
</script>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
return html
|
|
|
|
def generate_dashboard(self):
|
|
"""Generate and save the HTML dashboard"""
|
|
logger.info("Generating HTML dashboard...")
|
|
|
|
# Load recent metrics and alerts
|
|
system_metrics = self.load_recent_metrics('system', 24)
|
|
app_metrics = self.load_recent_metrics('application', 24)
|
|
alerts = self.load_recent_alerts(72)
|
|
|
|
# Generate HTML
|
|
html_content = self.generate_html_dashboard(system_metrics, app_metrics, alerts)
|
|
|
|
# Save dashboard
|
|
dashboard_file = self.dashboard_dir / "index.html"
|
|
try:
|
|
with open(dashboard_file, 'w') as f:
|
|
f.write(html_content)
|
|
logger.info(f"Dashboard saved to {dashboard_file}")
|
|
|
|
# Also create a timestamped version
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
backup_file = self.dashboard_dir / f"dashboard_{timestamp}.html"
|
|
with open(backup_file, 'w') as f:
|
|
f.write(html_content)
|
|
|
|
return dashboard_file
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error saving dashboard: {e}")
|
|
return None
|
|
|
|
|
|
def main():
|
|
"""Generate dashboard"""
|
|
generator = DashboardGenerator()
|
|
dashboard_file = generator.generate_dashboard()
|
|
|
|
if dashboard_file:
|
|
print(f"Dashboard generated: {dashboard_file}")
|
|
print(f"View at: file://{dashboard_file.absolute()}")
|
|
return True
|
|
else:
|
|
print("Failed to generate dashboard")
|
|
return False
|
|
|
|
|
|
if __name__ == '__main__':
|
|
logging.basicConfig(level=logging.INFO)
|
|
success = main()
|
|
exit(0 if success else 1) |