Add complete enterprise-level reliability, security, and performance systems: ## Core Monitoring Systems - **Health Monitor**: 8 automated health checks with email alerts and REST API - **Error Recovery**: 4 recovery strategies (retry, fallback, circuit breaker, graceful failure) - **Security Monitor**: Real-time threat detection with automatic IP blocking - **Performance Monitor**: Performance tracking with automated benchmarks and alerts ## Data Protection & Optimization - **Backup Manager**: Automated backups with encryption, compression, and disaster recovery - **Cache Optimizer**: Intelligent caching with 3 strategies and 5 specialized cache groups ## Enterprise Features - Automated scheduling with WordPress cron integration - Admin dashboards for all systems under Tools menu - REST API endpoints for external monitoring - WP-CLI commands for automation and CI/CD - Comprehensive documentation (docs/MONITORING-SYSTEMS.md) - Emergency response systems with immediate email alerts - Circuit breaker pattern for external service failures - Smart cache warming and invalidation - Database query caching and optimization - File integrity monitoring - Performance degradation detection ## Integration - Plugin architecture updated with proper initialization - Singleton pattern for all monitoring classes - WordPress hooks and filters integration - Background job processing system - Comprehensive error handling and logging Systems provide enterprise-grade reliability with automated threat response, proactive performance monitoring, and complete disaster recovery capabilities. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
589 lines
No EOL
20 KiB
PHP
589 lines
No EOL
20 KiB
PHP
<?php
|
|
/**
|
|
* HVAC Error Recovery System
|
|
*
|
|
* Provides automatic error recovery and graceful degradation for plugin functionality
|
|
*
|
|
* @package HVAC_Community_Events
|
|
* @since 1.0.8
|
|
*/
|
|
|
|
if (!defined('ABSPATH')) {
|
|
exit;
|
|
}
|
|
|
|
/**
|
|
* HVAC_Error_Recovery class
|
|
*/
|
|
class HVAC_Error_Recovery {
|
|
|
|
/**
|
|
* Recovery strategies
|
|
*/
|
|
const STRATEGY_RETRY = 'retry';
|
|
const STRATEGY_FALLBACK = 'fallback';
|
|
const STRATEGY_GRACEFUL_FAIL = 'graceful_fail';
|
|
const STRATEGY_CIRCUIT_BREAKER = 'circuit_breaker';
|
|
|
|
/**
|
|
* Error tracking
|
|
*/
|
|
private static $error_counts = [];
|
|
private static $circuit_breakers = [];
|
|
|
|
/**
|
|
* Recovery configuration
|
|
*/
|
|
private static $recovery_config = [
|
|
'database_query' => [
|
|
'strategy' => self::STRATEGY_RETRY,
|
|
'max_attempts' => 3,
|
|
'backoff_multiplier' => 2,
|
|
'fallback_callback' => null
|
|
],
|
|
'cache_operation' => [
|
|
'strategy' => self::STRATEGY_FALLBACK,
|
|
'max_attempts' => 2,
|
|
'fallback_callback' => 'skip_cache'
|
|
],
|
|
'external_api' => [
|
|
'strategy' => self::STRATEGY_CIRCUIT_BREAKER,
|
|
'max_failures' => 5,
|
|
'timeout' => 300, // 5 minutes
|
|
'fallback_callback' => 'use_cached_data'
|
|
],
|
|
'file_operation' => [
|
|
'strategy' => self::STRATEGY_GRACEFUL_FAIL,
|
|
'max_attempts' => 2,
|
|
'fallback_callback' => 'log_and_continue'
|
|
]
|
|
];
|
|
|
|
/**
|
|
* Initialize error recovery system
|
|
*/
|
|
public static function init() {
|
|
// Set custom error handler for plugin operations
|
|
add_action('init', [__CLASS__, 'setup_error_handling']);
|
|
|
|
// Hook into WordPress error handling
|
|
add_action('wp_die_handler', [__CLASS__, 'handle_wp_die'], 10, 1);
|
|
|
|
// Monitor and recover from specific plugin errors
|
|
add_action('hvac_operation_failed', [__CLASS__, 'handle_operation_failure'], 10, 3);
|
|
|
|
// Admin interface for error recovery stats
|
|
if (is_admin()) {
|
|
add_action('admin_menu', [__CLASS__, 'add_admin_menu']);
|
|
}
|
|
|
|
// Cleanup old error data
|
|
add_action('wp_scheduled_delete', [__CLASS__, 'cleanup_old_errors']);
|
|
}
|
|
|
|
/**
|
|
* Setup error handling
|
|
*/
|
|
public static function setup_error_handling() {
|
|
// Only set error handler for plugin operations
|
|
if (self::is_plugin_context()) {
|
|
set_error_handler([__CLASS__, 'handle_php_error'], E_ALL);
|
|
register_shutdown_function([__CLASS__, 'handle_fatal_error']);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if we're in plugin context
|
|
*/
|
|
private static function is_plugin_context() {
|
|
$backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 10);
|
|
|
|
foreach ($backtrace as $trace) {
|
|
$file = $trace['file'] ?? '';
|
|
if (strpos($file, 'hvac-community-events') !== false) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Execute operation with error recovery
|
|
*
|
|
* @param string $operation_type Type of operation
|
|
* @param callable $callback Operation callback
|
|
* @param array $args Operation arguments
|
|
* @return mixed Operation result or fallback
|
|
*/
|
|
public static function execute_with_recovery($operation_type, $callback, $args = []) {
|
|
$config = self::$recovery_config[$operation_type] ?? self::$recovery_config['file_operation'];
|
|
$attempt = 0;
|
|
$last_error = null;
|
|
|
|
// Check circuit breaker
|
|
if ($config['strategy'] === self::STRATEGY_CIRCUIT_BREAKER) {
|
|
if (self::is_circuit_open($operation_type)) {
|
|
return self::execute_fallback($operation_type, $config, $args);
|
|
}
|
|
}
|
|
|
|
while ($attempt < ($config['max_attempts'] ?? 1)) {
|
|
$attempt++;
|
|
|
|
try {
|
|
// Execute operation
|
|
$result = call_user_func_array($callback, $args);
|
|
|
|
// Reset error count on success
|
|
self::reset_error_count($operation_type);
|
|
|
|
return $result;
|
|
|
|
} catch (Exception $e) {
|
|
$last_error = $e;
|
|
|
|
// Increment error count
|
|
self::increment_error_count($operation_type);
|
|
|
|
// Log error
|
|
HVAC_Logger::warning(
|
|
"Operation failed (attempt $attempt): {$e->getMessage()}",
|
|
'Error Recovery'
|
|
);
|
|
|
|
// Apply recovery strategy
|
|
if ($attempt < ($config['max_attempts'] ?? 1)) {
|
|
switch ($config['strategy']) {
|
|
case self::STRATEGY_RETRY:
|
|
$delay = ($config['backoff_multiplier'] ?? 1) * $attempt;
|
|
sleep($delay);
|
|
break;
|
|
|
|
case self::STRATEGY_CIRCUIT_BREAKER:
|
|
if (self::should_open_circuit($operation_type)) {
|
|
self::open_circuit($operation_type, $config['timeout'] ?? 300);
|
|
return self::execute_fallback($operation_type, $config, $args);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// All attempts failed - execute fallback or fail gracefully
|
|
return self::handle_final_failure($operation_type, $config, $last_error, $args);
|
|
}
|
|
|
|
/**
|
|
* Handle final operation failure
|
|
*/
|
|
private static function handle_final_failure($operation_type, $config, $error, $args) {
|
|
HVAC_Logger::error(
|
|
"Operation $operation_type failed after all attempts: " . $error->getMessage(),
|
|
'Error Recovery'
|
|
);
|
|
|
|
switch ($config['strategy']) {
|
|
case self::STRATEGY_FALLBACK:
|
|
case self::STRATEGY_CIRCUIT_BREAKER:
|
|
return self::execute_fallback($operation_type, $config, $args);
|
|
|
|
case self::STRATEGY_GRACEFUL_FAIL:
|
|
// Return safe default value
|
|
return self::get_safe_default($operation_type);
|
|
|
|
default:
|
|
// Re-throw exception for retry strategy
|
|
throw $error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Execute fallback operation
|
|
*/
|
|
private static function execute_fallback($operation_type, $config, $args) {
|
|
$fallback = $config['fallback_callback'] ?? null;
|
|
|
|
if (!$fallback) {
|
|
return self::get_safe_default($operation_type);
|
|
}
|
|
|
|
try {
|
|
if (is_string($fallback) && method_exists(__CLASS__, $fallback)) {
|
|
return call_user_func([__CLASS__, $fallback], $operation_type, $args);
|
|
} elseif (is_callable($fallback)) {
|
|
return call_user_func_array($fallback, $args);
|
|
}
|
|
} catch (Exception $e) {
|
|
HVAC_Logger::error(
|
|
"Fallback also failed for $operation_type: " . $e->getMessage(),
|
|
'Error Recovery'
|
|
);
|
|
}
|
|
|
|
return self::get_safe_default($operation_type);
|
|
}
|
|
|
|
/**
|
|
* Get safe default value for operation type
|
|
*/
|
|
private static function get_safe_default($operation_type) {
|
|
$defaults = [
|
|
'database_query' => [],
|
|
'cache_operation' => null,
|
|
'external_api' => ['error' => 'Service temporarily unavailable'],
|
|
'file_operation' => false
|
|
];
|
|
|
|
return $defaults[$operation_type] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Circuit breaker management
|
|
*/
|
|
private static function is_circuit_open($operation_type) {
|
|
return isset(self::$circuit_breakers[$operation_type]) &&
|
|
self::$circuit_breakers[$operation_type] > time();
|
|
}
|
|
|
|
private static function should_open_circuit($operation_type) {
|
|
$error_count = self::get_error_count($operation_type);
|
|
$config = self::$recovery_config[$operation_type] ?? [];
|
|
|
|
return $error_count >= ($config['max_failures'] ?? 5);
|
|
}
|
|
|
|
private static function open_circuit($operation_type, $timeout) {
|
|
self::$circuit_breakers[$operation_type] = time() + $timeout;
|
|
update_option('hvac_circuit_breakers', self::$circuit_breakers);
|
|
|
|
HVAC_Logger::warning(
|
|
"Circuit breaker opened for $operation_type (timeout: {$timeout}s)",
|
|
'Error Recovery'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Error counting
|
|
*/
|
|
private static function increment_error_count($operation_type) {
|
|
if (!isset(self::$error_counts[$operation_type])) {
|
|
self::$error_counts[$operation_type] = 0;
|
|
}
|
|
|
|
self::$error_counts[$operation_type]++;
|
|
update_option('hvac_error_counts', self::$error_counts);
|
|
}
|
|
|
|
private static function get_error_count($operation_type) {
|
|
if (empty(self::$error_counts)) {
|
|
self::$error_counts = get_option('hvac_error_counts', []);
|
|
}
|
|
|
|
return self::$error_counts[$operation_type] ?? 0;
|
|
}
|
|
|
|
private static function reset_error_count($operation_type) {
|
|
self::$error_counts[$operation_type] = 0;
|
|
update_option('hvac_error_counts', self::$error_counts);
|
|
}
|
|
|
|
/**
|
|
* Fallback implementations
|
|
*/
|
|
public static function skip_cache($operation_type, $args) {
|
|
HVAC_Logger::info("Cache operation skipped due to errors", 'Error Recovery');
|
|
return null;
|
|
}
|
|
|
|
public static function use_cached_data($operation_type, $args) {
|
|
// Try to get stale cached data
|
|
$cache_key = 'hvac_fallback_' . md5($operation_type . serialize($args));
|
|
$cached_data = get_transient($cache_key);
|
|
|
|
if ($cached_data !== false) {
|
|
HVAC_Logger::info("Using stale cached data for $operation_type", 'Error Recovery');
|
|
return $cached_data;
|
|
}
|
|
|
|
return self::get_safe_default($operation_type);
|
|
}
|
|
|
|
public static function log_and_continue($operation_type, $args) {
|
|
HVAC_Logger::info("Continuing after failed $operation_type", 'Error Recovery');
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Handle PHP errors
|
|
*/
|
|
public static function handle_php_error($severity, $message, $file, $line) {
|
|
// Only handle errors from plugin files
|
|
if (strpos($file, 'hvac-community-events') === false) {
|
|
return false;
|
|
}
|
|
|
|
$error_types = [
|
|
E_ERROR => 'Error',
|
|
E_WARNING => 'Warning',
|
|
E_NOTICE => 'Notice',
|
|
E_USER_ERROR => 'User Error',
|
|
E_USER_WARNING => 'User Warning',
|
|
E_USER_NOTICE => 'User Notice'
|
|
];
|
|
|
|
$error_type = $error_types[$severity] ?? 'Unknown';
|
|
|
|
HVAC_Logger::error(
|
|
"PHP $error_type: $message in $file:$line",
|
|
'Error Recovery'
|
|
);
|
|
|
|
// Don't execute PHP internal error handler
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Handle fatal errors
|
|
*/
|
|
public static function handle_fatal_error() {
|
|
$error = error_get_last();
|
|
|
|
if ($error && in_array($error['type'], [E_ERROR, E_CORE_ERROR, E_COMPILE_ERROR, E_PARSE])) {
|
|
// Only handle fatal errors from plugin files
|
|
if (strpos($error['file'], 'hvac-community-events') !== false) {
|
|
HVAC_Logger::error(
|
|
"Fatal Error: {$error['message']} in {$error['file']}:{$error['line']}",
|
|
'Error Recovery'
|
|
);
|
|
|
|
// Attempt to recover by disabling problematic functionality
|
|
self::emergency_recovery($error);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Emergency recovery for fatal errors
|
|
*/
|
|
private static function emergency_recovery($error) {
|
|
// Create emergency flag to disable problematic functionality
|
|
update_option('hvac_emergency_mode', [
|
|
'enabled' => true,
|
|
'error' => $error,
|
|
'timestamp' => time()
|
|
]);
|
|
|
|
// Send emergency notification
|
|
$admin_email = get_option('admin_email');
|
|
$site_name = get_bloginfo('name');
|
|
|
|
wp_mail(
|
|
$admin_email,
|
|
"[$site_name] HVAC Plugin Emergency Mode Activated",
|
|
"A fatal error occurred in the HVAC plugin and emergency mode has been activated.\n\n" .
|
|
"Error: {$error['message']}\n" .
|
|
"File: {$error['file']}:{$error['line']}\n\n" .
|
|
"Please check the plugin status and contact support if needed."
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Check if emergency mode is active
|
|
*/
|
|
public static function is_emergency_mode() {
|
|
$emergency = get_option('hvac_emergency_mode', false);
|
|
|
|
if (!$emergency || !$emergency['enabled']) {
|
|
return false;
|
|
}
|
|
|
|
// Auto-disable after 24 hours
|
|
if (time() - $emergency['timestamp'] > 86400) {
|
|
delete_option('hvac_emergency_mode');
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Disable emergency mode
|
|
*/
|
|
public static function disable_emergency_mode() {
|
|
delete_option('hvac_emergency_mode');
|
|
HVAC_Logger::info('Emergency mode disabled', 'Error Recovery');
|
|
}
|
|
|
|
/**
|
|
* Handle operation failure action
|
|
*/
|
|
public static function handle_operation_failure($operation_type, $error_message, $context = []) {
|
|
// This can be triggered by other parts of the plugin
|
|
HVAC_Logger::warning(
|
|
"Operation failure reported: $operation_type - $error_message",
|
|
'Error Recovery'
|
|
);
|
|
|
|
self::increment_error_count($operation_type);
|
|
|
|
// Check if circuit breaker should be triggered
|
|
$config = self::$recovery_config[$operation_type] ?? [];
|
|
if ($config['strategy'] === self::STRATEGY_CIRCUIT_BREAKER) {
|
|
if (self::should_open_circuit($operation_type)) {
|
|
self::open_circuit($operation_type, $config['timeout'] ?? 300);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get error recovery statistics
|
|
*/
|
|
public static function get_recovery_stats() {
|
|
return [
|
|
'error_counts' => get_option('hvac_error_counts', []),
|
|
'circuit_breakers' => get_option('hvac_circuit_breakers', []),
|
|
'emergency_mode' => get_option('hvac_emergency_mode', false)
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Cleanup old error data
|
|
*/
|
|
public static function cleanup_old_errors() {
|
|
// Reset error counts daily
|
|
$last_reset = get_option('hvac_error_reset_time', 0);
|
|
if (time() - $last_reset > 86400) { // 24 hours
|
|
update_option('hvac_error_counts', []);
|
|
update_option('hvac_error_reset_time', time());
|
|
}
|
|
|
|
// Clean up expired circuit breakers
|
|
$circuit_breakers = get_option('hvac_circuit_breakers', []);
|
|
$current_time = time();
|
|
$updated = false;
|
|
|
|
foreach ($circuit_breakers as $operation => $expiry) {
|
|
if ($expiry < $current_time) {
|
|
unset($circuit_breakers[$operation]);
|
|
$updated = true;
|
|
}
|
|
}
|
|
|
|
if ($updated) {
|
|
update_option('hvac_circuit_breakers', $circuit_breakers);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add admin menu
|
|
*/
|
|
public static function add_admin_menu() {
|
|
if (current_user_can('manage_options')) {
|
|
add_submenu_page(
|
|
'tools.php',
|
|
'HVAC Error Recovery',
|
|
'HVAC Error Recovery',
|
|
'manage_options',
|
|
'hvac-error-recovery',
|
|
[__CLASS__, 'admin_page']
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Admin page
|
|
*/
|
|
public static function admin_page() {
|
|
$stats = self::get_recovery_stats();
|
|
$emergency_mode = self::is_emergency_mode();
|
|
|
|
?>
|
|
<div class="wrap">
|
|
<h1>HVAC Error Recovery System</h1>
|
|
|
|
<?php if ($emergency_mode): ?>
|
|
<div class="notice notice-error">
|
|
<p><strong>Emergency Mode Active</strong> - Some plugin functionality may be disabled due to critical errors.</p>
|
|
<p>
|
|
<button type="button" class="button button-primary" onclick="disableEmergencyMode()">
|
|
Disable Emergency Mode
|
|
</button>
|
|
</p>
|
|
</div>
|
|
<?php endif; ?>
|
|
|
|
<div class="card">
|
|
<h2>Error Statistics</h2>
|
|
<table class="wp-list-table widefat fixed striped">
|
|
<thead>
|
|
<tr>
|
|
<th>Operation Type</th>
|
|
<th>Error Count</th>
|
|
<th>Circuit Breaker</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<?php if (empty($stats['error_counts'])): ?>
|
|
<tr>
|
|
<td colspan="3">No errors recorded</td>
|
|
</tr>
|
|
<?php else: ?>
|
|
<?php foreach ($stats['error_counts'] as $operation => $count): ?>
|
|
<tr>
|
|
<td><?php echo esc_html($operation); ?></td>
|
|
<td><?php echo esc_html($count); ?></td>
|
|
<td>
|
|
<?php
|
|
$breaker_expiry = $stats['circuit_breakers'][$operation] ?? 0;
|
|
if ($breaker_expiry > time()) {
|
|
echo 'OPEN (expires: ' . date('H:i:s', $breaker_expiry) . ')';
|
|
} else {
|
|
echo 'CLOSED';
|
|
}
|
|
?>
|
|
</td>
|
|
</tr>
|
|
<?php endforeach; ?>
|
|
<?php endif; ?>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
<div class="card">
|
|
<h2>Recovery Configuration</h2>
|
|
<p>The error recovery system is configured with the following strategies:</p>
|
|
<ul>
|
|
<li><strong>Database Queries:</strong> Retry with exponential backoff (3 attempts)</li>
|
|
<li><strong>Cache Operations:</strong> Skip and continue without caching</li>
|
|
<li><strong>External APIs:</strong> Circuit breaker with 5-minute timeout</li>
|
|
<li><strong>File Operations:</strong> Graceful failure with safe defaults</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<script>
|
|
function disableEmergencyMode() {
|
|
if (confirm('Are you sure you want to disable emergency mode?')) {
|
|
fetch(ajaxurl, {
|
|
method: 'POST',
|
|
body: new URLSearchParams({
|
|
action: 'hvac_disable_emergency_mode',
|
|
nonce: '<?php echo wp_create_nonce('hvac_emergency_mode'); ?>'
|
|
})
|
|
})
|
|
.then(response => response.json())
|
|
.then(data => {
|
|
if (data.success) {
|
|
location.reload();
|
|
} else {
|
|
alert('Failed to disable emergency mode');
|
|
}
|
|
});
|
|
}
|
|
}
|
|
</script>
|
|
<?php
|
|
}
|
|
}
|