upskill-event-manager/includes/class-hvac-error-recovery.php
bengizmo afc221a98a feat: Implement comprehensive enterprise monitoring and optimization infrastructure
Add complete enterprise-level reliability, security, and performance systems:

## Core Monitoring Systems
- **Health Monitor**: 8 automated health checks with email alerts and REST API
- **Error Recovery**: 4 recovery strategies (retry, fallback, circuit breaker, graceful failure)
- **Security Monitor**: Real-time threat detection with automatic IP blocking
- **Performance Monitor**: Performance tracking with automated benchmarks and alerts

## Data Protection & Optimization
- **Backup Manager**: Automated backups with encryption, compression, and disaster recovery
- **Cache Optimizer**: Intelligent caching with 3 strategies and 5 specialized cache groups

## Enterprise Features
- Automated scheduling with WordPress cron integration
- Admin dashboards for all systems under Tools menu
- REST API endpoints for external monitoring
- WP-CLI commands for automation and CI/CD
- Comprehensive documentation (docs/MONITORING-SYSTEMS.md)
- Emergency response systems with immediate email alerts
- Circuit breaker pattern for external service failures
- Smart cache warming and invalidation
- Database query caching and optimization
- File integrity monitoring
- Performance degradation detection

## Integration
- Plugin architecture updated with proper initialization
- Singleton pattern for all monitoring classes
- WordPress hooks and filters integration
- Background job processing system
- Comprehensive error handling and logging

Systems provide enterprise-grade reliability with automated threat response,
proactive performance monitoring, and complete disaster recovery capabilities.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-07 04:08:52 -03:00

589 lines
No EOL
20 KiB
PHP

<?php
/**
* HVAC Error Recovery System
*
* Provides automatic error recovery and graceful degradation for plugin functionality
*
* @package HVAC_Community_Events
* @since 1.0.8
*/
if (!defined('ABSPATH')) {
exit;
}
/**
* HVAC_Error_Recovery class
*/
class HVAC_Error_Recovery {
/**
* Recovery strategies
*/
const STRATEGY_RETRY = 'retry';
const STRATEGY_FALLBACK = 'fallback';
const STRATEGY_GRACEFUL_FAIL = 'graceful_fail';
const STRATEGY_CIRCUIT_BREAKER = 'circuit_breaker';
/**
* Error tracking
*/
private static $error_counts = [];
private static $circuit_breakers = [];
/**
* Recovery configuration
*/
private static $recovery_config = [
'database_query' => [
'strategy' => self::STRATEGY_RETRY,
'max_attempts' => 3,
'backoff_multiplier' => 2,
'fallback_callback' => null
],
'cache_operation' => [
'strategy' => self::STRATEGY_FALLBACK,
'max_attempts' => 2,
'fallback_callback' => 'skip_cache'
],
'external_api' => [
'strategy' => self::STRATEGY_CIRCUIT_BREAKER,
'max_failures' => 5,
'timeout' => 300, // 5 minutes
'fallback_callback' => 'use_cached_data'
],
'file_operation' => [
'strategy' => self::STRATEGY_GRACEFUL_FAIL,
'max_attempts' => 2,
'fallback_callback' => 'log_and_continue'
]
];
/**
* Initialize error recovery system
*/
public static function init() {
// Set custom error handler for plugin operations
add_action('init', [__CLASS__, 'setup_error_handling']);
// Hook into WordPress error handling
add_action('wp_die_handler', [__CLASS__, 'handle_wp_die'], 10, 1);
// Monitor and recover from specific plugin errors
add_action('hvac_operation_failed', [__CLASS__, 'handle_operation_failure'], 10, 3);
// Admin interface for error recovery stats
if (is_admin()) {
add_action('admin_menu', [__CLASS__, 'add_admin_menu']);
}
// Cleanup old error data
add_action('wp_scheduled_delete', [__CLASS__, 'cleanup_old_errors']);
}
/**
* Setup error handling
*/
public static function setup_error_handling() {
// Only set error handler for plugin operations
if (self::is_plugin_context()) {
set_error_handler([__CLASS__, 'handle_php_error'], E_ALL);
register_shutdown_function([__CLASS__, 'handle_fatal_error']);
}
}
/**
* Check if we're in plugin context
*/
private static function is_plugin_context() {
$backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 10);
foreach ($backtrace as $trace) {
$file = $trace['file'] ?? '';
if (strpos($file, 'hvac-community-events') !== false) {
return true;
}
}
return false;
}
/**
* Execute operation with error recovery
*
* @param string $operation_type Type of operation
* @param callable $callback Operation callback
* @param array $args Operation arguments
* @return mixed Operation result or fallback
*/
public static function execute_with_recovery($operation_type, $callback, $args = []) {
$config = self::$recovery_config[$operation_type] ?? self::$recovery_config['file_operation'];
$attempt = 0;
$last_error = null;
// Check circuit breaker
if ($config['strategy'] === self::STRATEGY_CIRCUIT_BREAKER) {
if (self::is_circuit_open($operation_type)) {
return self::execute_fallback($operation_type, $config, $args);
}
}
while ($attempt < ($config['max_attempts'] ?? 1)) {
$attempt++;
try {
// Execute operation
$result = call_user_func_array($callback, $args);
// Reset error count on success
self::reset_error_count($operation_type);
return $result;
} catch (Exception $e) {
$last_error = $e;
// Increment error count
self::increment_error_count($operation_type);
// Log error
HVAC_Logger::warning(
"Operation failed (attempt $attempt): {$e->getMessage()}",
'Error Recovery'
);
// Apply recovery strategy
if ($attempt < ($config['max_attempts'] ?? 1)) {
switch ($config['strategy']) {
case self::STRATEGY_RETRY:
$delay = ($config['backoff_multiplier'] ?? 1) * $attempt;
sleep($delay);
break;
case self::STRATEGY_CIRCUIT_BREAKER:
if (self::should_open_circuit($operation_type)) {
self::open_circuit($operation_type, $config['timeout'] ?? 300);
return self::execute_fallback($operation_type, $config, $args);
}
break;
}
}
}
}
// All attempts failed - execute fallback or fail gracefully
return self::handle_final_failure($operation_type, $config, $last_error, $args);
}
/**
* Handle final operation failure
*/
private static function handle_final_failure($operation_type, $config, $error, $args) {
HVAC_Logger::error(
"Operation $operation_type failed after all attempts: " . $error->getMessage(),
'Error Recovery'
);
switch ($config['strategy']) {
case self::STRATEGY_FALLBACK:
case self::STRATEGY_CIRCUIT_BREAKER:
return self::execute_fallback($operation_type, $config, $args);
case self::STRATEGY_GRACEFUL_FAIL:
// Return safe default value
return self::get_safe_default($operation_type);
default:
// Re-throw exception for retry strategy
throw $error;
}
}
/**
* Execute fallback operation
*/
private static function execute_fallback($operation_type, $config, $args) {
$fallback = $config['fallback_callback'] ?? null;
if (!$fallback) {
return self::get_safe_default($operation_type);
}
try {
if (is_string($fallback) && method_exists(__CLASS__, $fallback)) {
return call_user_func([__CLASS__, $fallback], $operation_type, $args);
} elseif (is_callable($fallback)) {
return call_user_func_array($fallback, $args);
}
} catch (Exception $e) {
HVAC_Logger::error(
"Fallback also failed for $operation_type: " . $e->getMessage(),
'Error Recovery'
);
}
return self::get_safe_default($operation_type);
}
/**
* Get safe default value for operation type
*/
private static function get_safe_default($operation_type) {
$defaults = [
'database_query' => [],
'cache_operation' => null,
'external_api' => ['error' => 'Service temporarily unavailable'],
'file_operation' => false
];
return $defaults[$operation_type] ?? null;
}
/**
* Circuit breaker management
*/
private static function is_circuit_open($operation_type) {
return isset(self::$circuit_breakers[$operation_type]) &&
self::$circuit_breakers[$operation_type] > time();
}
private static function should_open_circuit($operation_type) {
$error_count = self::get_error_count($operation_type);
$config = self::$recovery_config[$operation_type] ?? [];
return $error_count >= ($config['max_failures'] ?? 5);
}
private static function open_circuit($operation_type, $timeout) {
self::$circuit_breakers[$operation_type] = time() + $timeout;
update_option('hvac_circuit_breakers', self::$circuit_breakers);
HVAC_Logger::warning(
"Circuit breaker opened for $operation_type (timeout: {$timeout}s)",
'Error Recovery'
);
}
/**
* Error counting
*/
private static function increment_error_count($operation_type) {
if (!isset(self::$error_counts[$operation_type])) {
self::$error_counts[$operation_type] = 0;
}
self::$error_counts[$operation_type]++;
update_option('hvac_error_counts', self::$error_counts);
}
private static function get_error_count($operation_type) {
if (empty(self::$error_counts)) {
self::$error_counts = get_option('hvac_error_counts', []);
}
return self::$error_counts[$operation_type] ?? 0;
}
private static function reset_error_count($operation_type) {
self::$error_counts[$operation_type] = 0;
update_option('hvac_error_counts', self::$error_counts);
}
/**
* Fallback implementations
*/
public static function skip_cache($operation_type, $args) {
HVAC_Logger::info("Cache operation skipped due to errors", 'Error Recovery');
return null;
}
public static function use_cached_data($operation_type, $args) {
// Try to get stale cached data
$cache_key = 'hvac_fallback_' . md5($operation_type . serialize($args));
$cached_data = get_transient($cache_key);
if ($cached_data !== false) {
HVAC_Logger::info("Using stale cached data for $operation_type", 'Error Recovery');
return $cached_data;
}
return self::get_safe_default($operation_type);
}
public static function log_and_continue($operation_type, $args) {
HVAC_Logger::info("Continuing after failed $operation_type", 'Error Recovery');
return true;
}
/**
* Handle PHP errors
*/
public static function handle_php_error($severity, $message, $file, $line) {
// Only handle errors from plugin files
if (strpos($file, 'hvac-community-events') === false) {
return false;
}
$error_types = [
E_ERROR => 'Error',
E_WARNING => 'Warning',
E_NOTICE => 'Notice',
E_USER_ERROR => 'User Error',
E_USER_WARNING => 'User Warning',
E_USER_NOTICE => 'User Notice'
];
$error_type = $error_types[$severity] ?? 'Unknown';
HVAC_Logger::error(
"PHP $error_type: $message in $file:$line",
'Error Recovery'
);
// Don't execute PHP internal error handler
return true;
}
/**
* Handle fatal errors
*/
public static function handle_fatal_error() {
$error = error_get_last();
if ($error && in_array($error['type'], [E_ERROR, E_CORE_ERROR, E_COMPILE_ERROR, E_PARSE])) {
// Only handle fatal errors from plugin files
if (strpos($error['file'], 'hvac-community-events') !== false) {
HVAC_Logger::error(
"Fatal Error: {$error['message']} in {$error['file']}:{$error['line']}",
'Error Recovery'
);
// Attempt to recover by disabling problematic functionality
self::emergency_recovery($error);
}
}
}
/**
* Emergency recovery for fatal errors
*/
private static function emergency_recovery($error) {
// Create emergency flag to disable problematic functionality
update_option('hvac_emergency_mode', [
'enabled' => true,
'error' => $error,
'timestamp' => time()
]);
// Send emergency notification
$admin_email = get_option('admin_email');
$site_name = get_bloginfo('name');
wp_mail(
$admin_email,
"[$site_name] HVAC Plugin Emergency Mode Activated",
"A fatal error occurred in the HVAC plugin and emergency mode has been activated.\n\n" .
"Error: {$error['message']}\n" .
"File: {$error['file']}:{$error['line']}\n\n" .
"Please check the plugin status and contact support if needed."
);
}
/**
* Check if emergency mode is active
*/
public static function is_emergency_mode() {
$emergency = get_option('hvac_emergency_mode', false);
if (!$emergency || !$emergency['enabled']) {
return false;
}
// Auto-disable after 24 hours
if (time() - $emergency['timestamp'] > 86400) {
delete_option('hvac_emergency_mode');
return false;
}
return true;
}
/**
* Disable emergency mode
*/
public static function disable_emergency_mode() {
delete_option('hvac_emergency_mode');
HVAC_Logger::info('Emergency mode disabled', 'Error Recovery');
}
/**
* Handle operation failure action
*/
public static function handle_operation_failure($operation_type, $error_message, $context = []) {
// This can be triggered by other parts of the plugin
HVAC_Logger::warning(
"Operation failure reported: $operation_type - $error_message",
'Error Recovery'
);
self::increment_error_count($operation_type);
// Check if circuit breaker should be triggered
$config = self::$recovery_config[$operation_type] ?? [];
if ($config['strategy'] === self::STRATEGY_CIRCUIT_BREAKER) {
if (self::should_open_circuit($operation_type)) {
self::open_circuit($operation_type, $config['timeout'] ?? 300);
}
}
}
/**
* Get error recovery statistics
*/
public static function get_recovery_stats() {
return [
'error_counts' => get_option('hvac_error_counts', []),
'circuit_breakers' => get_option('hvac_circuit_breakers', []),
'emergency_mode' => get_option('hvac_emergency_mode', false)
];
}
/**
* Cleanup old error data
*/
public static function cleanup_old_errors() {
// Reset error counts daily
$last_reset = get_option('hvac_error_reset_time', 0);
if (time() - $last_reset > 86400) { // 24 hours
update_option('hvac_error_counts', []);
update_option('hvac_error_reset_time', time());
}
// Clean up expired circuit breakers
$circuit_breakers = get_option('hvac_circuit_breakers', []);
$current_time = time();
$updated = false;
foreach ($circuit_breakers as $operation => $expiry) {
if ($expiry < $current_time) {
unset($circuit_breakers[$operation]);
$updated = true;
}
}
if ($updated) {
update_option('hvac_circuit_breakers', $circuit_breakers);
}
}
/**
* Add admin menu
*/
public static function add_admin_menu() {
if (current_user_can('manage_options')) {
add_submenu_page(
'tools.php',
'HVAC Error Recovery',
'HVAC Error Recovery',
'manage_options',
'hvac-error-recovery',
[__CLASS__, 'admin_page']
);
}
}
/**
* Admin page
*/
public static function admin_page() {
$stats = self::get_recovery_stats();
$emergency_mode = self::is_emergency_mode();
?>
<div class="wrap">
<h1>HVAC Error Recovery System</h1>
<?php if ($emergency_mode): ?>
<div class="notice notice-error">
<p><strong>Emergency Mode Active</strong> - Some plugin functionality may be disabled due to critical errors.</p>
<p>
<button type="button" class="button button-primary" onclick="disableEmergencyMode()">
Disable Emergency Mode
</button>
</p>
</div>
<?php endif; ?>
<div class="card">
<h2>Error Statistics</h2>
<table class="wp-list-table widefat fixed striped">
<thead>
<tr>
<th>Operation Type</th>
<th>Error Count</th>
<th>Circuit Breaker</th>
</tr>
</thead>
<tbody>
<?php if (empty($stats['error_counts'])): ?>
<tr>
<td colspan="3">No errors recorded</td>
</tr>
<?php else: ?>
<?php foreach ($stats['error_counts'] as $operation => $count): ?>
<tr>
<td><?php echo esc_html($operation); ?></td>
<td><?php echo esc_html($count); ?></td>
<td>
<?php
$breaker_expiry = $stats['circuit_breakers'][$operation] ?? 0;
if ($breaker_expiry > time()) {
echo 'OPEN (expires: ' . date('H:i:s', $breaker_expiry) . ')';
} else {
echo 'CLOSED';
}
?>
</td>
</tr>
<?php endforeach; ?>
<?php endif; ?>
</tbody>
</table>
</div>
<div class="card">
<h2>Recovery Configuration</h2>
<p>The error recovery system is configured with the following strategies:</p>
<ul>
<li><strong>Database Queries:</strong> Retry with exponential backoff (3 attempts)</li>
<li><strong>Cache Operations:</strong> Skip and continue without caching</li>
<li><strong>External APIs:</strong> Circuit breaker with 5-minute timeout</li>
<li><strong>File Operations:</strong> Graceful failure with safe defaults</li>
</ul>
</div>
</div>
<script>
function disableEmergencyMode() {
if (confirm('Are you sure you want to disable emergency mode?')) {
fetch(ajaxurl, {
method: 'POST',
body: new URLSearchParams({
action: 'hvac_disable_emergency_mode',
nonce: '<?php echo wp_create_nonce('hvac_emergency_mode'); ?>'
})
})
.then(response => response.json())
.then(data => {
if (data.success) {
location.reload();
} else {
alert('Failed to disable emergency mode');
}
});
}
}
</script>
<?php
}
}