'event_title', 'description' => 'event_description', 'start_date' => 'event_start_datetime', 'end_date' => 'event_end_datetime', 'venue' => 'venue_data', 'organizer' => 'organizer_data', 'cost' => 'event_cost', 'capacity' => 'event_capacity', 'url' => 'event_url' ]; /** * Constructor */ private function __construct() { // Validate API key availability if (!defined('ANTHROPIC_API_KEY') || empty(ANTHROPIC_API_KEY)) { error_log('HVAC AI Event Populator: ANTHROPIC_API_KEY not defined in wp-config.php'); } } /** * Main method to populate event data from input * * @param string $input User input (URL, text, or description) * @param string $input_type Type of input: 'url', 'text', or 'description' * @return array|WP_Error Parsed event data or error */ public function populate_from_input(string $input, string $input_type = 'auto'): array|WP_Error { // Validate inputs $validation = $this->validate_input($input, $input_type); if (is_wp_error($validation)) { return $validation; } // Auto-detect input type if not specified if ($input_type === 'auto') { $input_type = $this->detect_input_type($input); } // Check cache first $cache_key = $this->generate_cache_key($input); $cached_response = $this->get_cached_response($cache_key); if ($cached_response !== false) { error_log('HVAC AI: Using cached response for input'); return $cached_response; } // Build context for prompt $context = $this->build_context(); // Create structured prompt $prompt = $this->build_prompt($input, $input_type, $context); // Make API request $api_response = $this->make_api_request($prompt); if (is_wp_error($api_response)) { return $api_response; } // Parse and validate response $parsed_data = $this->parse_api_response($api_response); if (is_wp_error($parsed_data)) { return $parsed_data; } // Post-process data (venue/organizer matching, etc.) $processed_data = $this->post_process_data($parsed_data); // Cache successful response $this->cache_response($cache_key, $processed_data); return $processed_data; } /** * Validate user input * * @param string $input User input * @param string $input_type Input type * @return true|WP_Error */ private function validate_input(string $input, string $input_type): bool|WP_Error { $input = trim($input); // Check minimum length if (strlen($input) < 10) { return new WP_Error( 'input_too_short', 'Input must be at least 10 characters long.', ['status' => 400] ); } // Check maximum length (prevent token overflow) if (strlen($input) > 50000) { return new WP_Error( 'input_too_long', 'Input is too large. Please provide a shorter description or URL.', ['status' => 400] ); } // URL-specific validation if ($input_type === 'url') { if (!filter_var($input, FILTER_VALIDATE_URL)) { return new WP_Error( 'invalid_url', 'Please provide a valid URL.', ['status' => 400] ); } } return true; } /** * Auto-detect input type * * @param string $input User input * @return string Detected type: 'url', 'text', or 'description' */ private function detect_input_type(string $input): string { $input = trim($input); // Check if it's a URL if (filter_var($input, FILTER_VALIDATE_URL)) { return 'url'; } // Check for common text patterns (emails, structured content) if (preg_match('/\b(from|to|subject|date):\s/i', $input) || preg_match('/\n.*\n.*\n/s', $input) || strlen($input) > 500) { return 'text'; } // Default to description for short, unstructured input return 'description'; } /** * Build context for the AI prompt * * @return array Context data */ private function build_context(): array { $context = [ 'current_date' => current_time('Y-m-d'), 'current_datetime' => current_time('c'), 'venues' => $this->get_existing_venues(), 'organizers' => $this->get_existing_organizers(), ]; return $context; } /** * Get existing venues for context * * @return array List of venue names and addresses */ private function get_existing_venues(): array { $venues = get_posts([ 'post_type' => 'tribe_venue', 'posts_per_page' => 50, 'post_status' => 'publish', 'orderby' => 'post_title', 'order' => 'ASC' ]); $venue_list = []; foreach ($venues as $venue) { $address = get_post_meta($venue->ID, '_VenueAddress', true); $city = get_post_meta($venue->ID, '_VenueCity', true); $venue_list[] = [ 'name' => $venue->post_title, 'address' => trim($address . ', ' . $city, ', '), 'id' => $venue->ID ]; } return $venue_list; } /** * Get existing organizers for context * * @return array List of organizer names and details */ private function get_existing_organizers(): array { $organizers = get_posts([ 'post_type' => 'tribe_organizer', 'posts_per_page' => 50, 'post_status' => 'publish', 'orderby' => 'post_title', 'order' => 'ASC' ]); $organizer_list = []; foreach ($organizers as $organizer) { $email = get_post_meta($organizer->ID, '_OrganizerEmail', true); $phone = get_post_meta($organizer->ID, '_OrganizerPhone', true); $organizer_list[] = [ 'name' => $organizer->post_title, 'email' => $email, 'phone' => $phone, 'id' => $organizer->ID ]; } return $organizer_list; } /** * Build structured prompt for Claude API * * @param string $input User input * @param string $input_type Type of input * @param array $context Context data * @return string Formatted prompt */ private function build_prompt(string $input, string $input_type, array $context): string { $venue_context = ''; if (!empty($context['venues'])) { $venue_names = array_slice(array_column($context['venues'], 'name'), 0, 20); $venue_context = "Existing venues: " . implode(', ', $venue_names); } $organizer_context = ''; if (!empty($context['organizers'])) { $organizer_names = array_slice(array_column($context['organizers'], 'name'), 0, 20); $organizer_context = "Existing organizers: " . implode(', ', $organizer_names); } // For URLs, fetch content using Jina.ai reader $actual_content = $input; $source_note = ''; if ($input_type === 'url' && filter_var($input, FILTER_VALIDATE_URL)) { $fetched_content = $this->fetch_url_with_jina($input); if (!is_wp_error($fetched_content)) { $actual_content = $fetched_content; $source_note = "\n\nSOURCE: Content extracted from {$input}"; } else { $source_note = "\n\nNOTE: Could not fetch URL content ({$fetched_content->get_error_message()}). Please extract what you can from the URL itself."; } } $input_instruction = match($input_type) { 'url' => "Please extract event information from this webpage content:", 'text' => "Please extract event information from this text content (likely from an email or document):", 'description' => "Please extract event information from this brief description:", default => "Please extract event information from the following content:" }; return << 80% 8. Convert relative dates to absolute dates (e.g., "next Tuesday" to actual date) 9. Handle both in-person and virtual events appropriately 10. For event_image_url: Only include images that are at least 200x200 pixels - ignore favicons, icons, and small logos 11. If multiple events are found, extract only the first/primary one 12. CRITICAL: For virtual/online events (webinars, online training, virtual conferences), set ALL venue fields to null - do not use "Virtual", "Online", or any venue name for virtual events 13. Set confidence scores based on how explicitly the information is stated: - 1.0 = Explicitly stated with exact details - 0.8 = Clearly stated but some interpretation needed - 0.6 = Somewhat implied or requires inference - 0.4 = Vague reference that might be correct - 0.2 = Highly uncertain, mostly guessing - 0.0 = Information not present OUTPUT FORMAT: Return ONLY a valid JSON object with this exact structure (use null for missing fields): { "title": "string or null", "description": "string (NEVER null - always generate professional training description)", "start_date": "YYYY-MM-DD or null", "start_time": "HH:MM or null", "end_date": "YYYY-MM-DD or null", "end_time": "HH:MM or null", "venue_name": "string or null", "venue_address": "string or null", "venue_city": "string or null", "venue_state": "string or null", "venue_zip": "string or null", "organizer_name": "string or null", "organizer_email": "string or null", "organizer_phone": "string or null", "website": "string or null", "cost": "number or null", "capacity": "number or null", "event_url": "string or null", "event_image_url": "string or null", "price": "number or null", "confidence": { "overall": 0.0-1.0, "per_field": { "title": 0.0-1.0, "dates": 0.0-1.0, "venue": 0.0-1.0, "organizer": 0.0-1.0, "cost": 0.0-1.0 } } } IMPORTANT: Return ONLY the JSON object, no explanatory text before or after. PROMPT; } /** * Fetch URL content using Jina.ai reader * * @param string $url URL to fetch * @return string|WP_Error Fetched content or error */ private function fetch_url_with_jina(string $url): string|WP_Error { $jina_url = "https://r.jina.ai/"; $token = "jina_73c8ff38ef724602829cf3ff8b2dc5b5jkzgvbaEZhFKXzyXgQ1_o1U9oE2b"; $data = wp_json_encode([ 'url' => $url, 'injectPageScript' => [ "// Remove headers, footers, navigation elements\ndocument.querySelectorAll('header, footer, nav, .header, .footer, .navigation, .sidebar').forEach(el => el.remove());\n\n// Remove ads and promotional content\ndocument.querySelectorAll('.ad, .ads, .advertisement, .promo, .banner').forEach(el => el.remove());" ] ]); $args = [ 'timeout' => 45, // Jina can take 5-40 seconds 'headers' => [ 'Accept' => 'application/json', 'Authorization' => 'Bearer ' . $token, 'Content-Type' => 'application/json' ], 'body' => $data, 'method' => 'POST' ]; $response = wp_remote_post($jina_url, $args); if (is_wp_error($response)) { error_log('HVAC AI: Jina.ai request failed: ' . $response->get_error_message()); return new WP_Error( 'jina_request_failed', 'Failed to fetch webpage content: ' . $response->get_error_message(), ['status' => 500] ); } $response_code = wp_remote_retrieve_response_code($response); if ($response_code !== 200) { error_log("HVAC AI: Jina.ai returned HTTP {$response_code}"); return new WP_Error( 'jina_http_error', "Webpage content service returned error: HTTP {$response_code}", ['status' => $response_code] ); } $response_body = wp_remote_retrieve_body($response); if (empty($response_body)) { return new WP_Error( 'jina_empty_response', 'No content received from webpage', ['status' => 500] ); } // Jina returns the cleaned text content directly error_log('HVAC AI: Jina.ai extracted content (' . strlen($response_body) . ' characters)'); return $response_body; } /** * Make API request to Claude * * @param string $prompt Structured prompt * @return array|WP_Error API response or error */ private function make_api_request(string $prompt): array|WP_Error { if (!defined('ANTHROPIC_API_KEY') || empty(ANTHROPIC_API_KEY)) { return new WP_Error( 'api_key_missing', 'Anthropic API key not configured.', ['status' => 500] ); } $headers = [ 'Content-Type' => 'application/json', 'x-api-key' => ANTHROPIC_API_KEY, 'anthropic-version' => '2023-06-01' ]; $body = [ 'model' => self::API_MODEL, 'max_tokens' => 4000, 'temperature' => 0.4, 'messages' => [ [ 'role' => 'user', 'content' => $prompt ] ] ]; $args = [ 'timeout' => self::REQUEST_TIMEOUT, 'headers' => $headers, 'body' => wp_json_encode($body), 'method' => 'POST', 'sslverify' => true ]; $start_time = microtime(true); error_log('HVAC AI: Making API request to Claude (timeout: ' . self::REQUEST_TIMEOUT . 's)'); $response = wp_remote_request(self::API_ENDPOINT, $args); $duration = round(microtime(true) - $start_time, 2); error_log("HVAC AI: Claude API request completed in {$duration}s"); if (is_wp_error($response)) { error_log('HVAC AI: API request failed: ' . $response->get_error_message()); return $response; } $response_code = wp_remote_retrieve_response_code($response); $response_body = wp_remote_retrieve_body($response); if ($response_code !== 200) { error_log("HVAC AI: API returned error code {$response_code}: {$response_body}"); return new WP_Error( 'api_request_failed', 'AI service temporarily unavailable. Please try again later.', ['status' => $response_code] ); } $decoded_response = json_decode($response_body, true); if (json_last_error() !== JSON_ERROR_NONE) { error_log('HVAC AI: Failed to decode API response JSON'); return new WP_Error( 'api_response_invalid', 'Invalid response from AI service.', ['status' => 500] ); } return $decoded_response; } /** * Parse API response and extract event data * * @param array $api_response Raw API response * @return array|WP_Error Parsed event data or error */ private function parse_api_response(array $api_response): array|WP_Error { // Extract content from Claude's response structure if (!isset($api_response['content'][0]['text'])) { error_log('HVAC AI: Unexpected API response structure'); return new WP_Error( 'api_response_structure', 'Unexpected response structure from AI service.', ['status' => 500] ); } $content = trim($api_response['content'][0]['text']); // Debug: Log raw Claude response error_log('HVAC AI: Raw Claude response: ' . substr($content, 0, 1000) . (strlen($content) > 1000 ? '...' : '')); // Try to extract JSON from response $json_match = []; if (preg_match('/\{.*\}/s', $content, $json_match)) { $content = $json_match[0]; } // Parse JSON $event_data = json_decode($content, true); if (json_last_error() !== JSON_ERROR_NONE) { error_log('HVAC AI: Failed to parse event data JSON: ' . json_last_error_msg()); return new WP_Error( 'event_data_invalid', 'AI service returned invalid event data format.', ['status' => 500] ); } // Debug: Log the parsed event data structure error_log('HVAC AI: Parsed event data: ' . json_encode($event_data, JSON_PRETTY_PRINT)); // Validate required fields $required_fields = ['title', 'description', 'confidence']; foreach ($required_fields as $field) { if (empty($event_data[$field])) { error_log("HVAC AI: Missing required field: {$field}"); return new WP_Error( 'missing_required_field', "Missing required event information: {$field}", ['status' => 422] ); } } return $event_data; } /** * Post-process extracted data (venue/organizer matching, etc.) * * @param array $event_data Raw event data * @return array Processed event data */ private function post_process_data(array $event_data): array { // Process venue matching (handle both flat and nested structures) $venue_name = $event_data['venue_name'] ?? $event_data['venue']['name'] ?? null; if (!empty($venue_name)) { $venue_data = [ 'name' => $venue_name, 'address' => $event_data['venue_address'] ?? $event_data['venue']['address'] ?? null, 'city' => $event_data['venue_city'] ?? $event_data['venue']['city'] ?? null, 'state' => $event_data['venue_state'] ?? $event_data['venue']['state'] ?? null, 'zip' => $event_data['venue_zip'] ?? $event_data['venue']['zip'] ?? null ]; $matched_venue = $this->find_matching_venue($venue_data); if ($matched_venue) { $event_data['venue_matched_id'] = $matched_venue['id']; $event_data['venue_is_existing'] = true; } } // Process organizer matching (handle both flat and nested structures) $organizer_name = $event_data['organizer_name'] ?? $event_data['organizer']['name'] ?? null; if (!empty($organizer_name)) { $organizer_data = [ 'name' => $organizer_name, 'email' => $event_data['organizer_email'] ?? $event_data['organizer']['email'] ?? null, 'phone' => $event_data['organizer_phone'] ?? $event_data['organizer']['phone'] ?? null ]; $matched_organizer = $this->find_matching_organizer($organizer_data); if ($matched_organizer) { $event_data['organizer_matched_id'] = $matched_organizer['id']; $event_data['organizer_is_existing'] = true; } } // Combine date and time fields if (!empty($event_data['start_date']) && !empty($event_data['start_time'])) { $event_data['start_datetime'] = $event_data['start_date'] . 'T' . $event_data['start_time']; } if (!empty($event_data['end_date']) && !empty($event_data['end_time'])) { $event_data['end_datetime'] = $event_data['end_date'] . 'T' . $event_data['end_time']; } // Sanitize data $event_data = $this->sanitize_event_data($event_data); return $event_data; } /** * Find matching venue from existing venues * * @param array $extracted_venue Venue data from AI * @return array|null Matched venue or null */ private function find_matching_venue(array $extracted_venue): ?array { $existing_venues = $this->get_existing_venues(); $venue_name = strtolower($extracted_venue['name'] ?? ''); foreach ($existing_venues as $venue) { $existing_name = strtolower($venue['name']); // Calculate similarity similar_text($venue_name, $existing_name, $percent); // Match if similarity is above 80% if ($percent >= 80) { return $venue; } } return null; } /** * Find matching organizer from existing organizers * * @param array $extracted_organizer Organizer data from AI * @return array|null Matched organizer or null */ private function find_matching_organizer(array $extracted_organizer): ?array { $existing_organizers = $this->get_existing_organizers(); $organizer_name = strtolower($extracted_organizer['name'] ?? ''); foreach ($existing_organizers as $organizer) { $existing_name = strtolower($organizer['name']); // Calculate similarity similar_text($organizer_name, $existing_name, $percent); // Match if similarity is above 80% if ($percent >= 80) { return $organizer; } // Also check email match if available if (!empty($extracted_organizer['email']) && !empty($organizer['email'])) { if (strtolower($extracted_organizer['email']) === strtolower($organizer['email'])) { return $organizer; } } } return null; } /** * Sanitize event data for security * * @param array $event_data Raw event data * @return array Sanitized event data */ private function sanitize_event_data(array $event_data): array { // Sanitize text fields $text_fields = ['title', 'description']; foreach ($text_fields as $field) { if (isset($event_data[$field])) { $event_data[$field] = sanitize_textarea_field($event_data[$field]); } } // Sanitize URL fields if (isset($event_data['url'])) { $event_data['url'] = esc_url_raw($event_data['url']); } // Sanitize venue data if (isset($event_data['venue']) && is_array($event_data['venue'])) { foreach ($event_data['venue'] as $key => $value) { if (is_string($value)) { $event_data['venue'][$key] = sanitize_text_field($value); } } } // Sanitize organizer data if (isset($event_data['organizer']) && is_array($event_data['organizer'])) { foreach ($event_data['organizer'] as $key => $value) { if ($key === 'email' && is_string($value)) { $event_data['organizer'][$key] = sanitize_email($value); } elseif (is_string($value)) { $event_data['organizer'][$key] = sanitize_text_field($value); } } } // Sanitize numeric fields if (isset($event_data['cost'])) { $event_data['cost'] = (float) $event_data['cost']; } if (isset($event_data['capacity'])) { $event_data['capacity'] = (int) $event_data['capacity']; } return $event_data; } /** * Generate cache key for input * * @param string $input User input * @return string Cache key */ private function generate_cache_key(string $input): string { return self::CACHE_PREFIX . md5($input); } /** * Get cached response * * @param string $cache_key Cache key * @return array|false Cached data or false */ private function get_cached_response(string $cache_key): array|false { return get_transient($cache_key) ?: false; } /** * Cache API response * * @param string $cache_key Cache key * @param array $data Data to cache * @return bool Success */ private function cache_response(string $cache_key, array $data): bool { return set_transient($cache_key, $data, self::CACHE_TTL); } /** * Clear all cached responses (for admin use) * * @return void */ public function clear_cache(): void { global $wpdb; $wpdb->query($wpdb->prepare( "DELETE FROM {$wpdb->options} WHERE option_name LIKE %s", '_transient_' . self::CACHE_PREFIX . '%' )); $wpdb->query($wpdb->prepare( "DELETE FROM {$wpdb->options} WHERE option_name LIKE %s", '_transient_timeout_' . self::CACHE_PREFIX . '%' )); error_log('HVAC AI: Cache cleared'); } }