#!/usr/bin/env python3 """ Comprehensive Unit Tests for Claude Haiku Analyzer Tests Claude API integration, content classification, batch processing, and error handling. """ import pytest from unittest.mock import Mock, patch, MagicMock from pathlib import Path import sys # Add src to path for imports if str(Path(__file__).parent.parent) not in sys.path: sys.path.insert(0, str(Path(__file__).parent.parent)) from src.content_analysis.claude_analyzer import ClaudeHaikuAnalyzer class TestClaudeHaikuAnalyzer: """Test suite for ClaudeHaikuAnalyzer""" @pytest.fixture def mock_claude_client(self): """Create mock Claude client""" mock_client = Mock() mock_response = Mock() mock_response.content = [Mock()] mock_response.content[0].text = """[ { "topics": ["hvac_systems", "installation"], "products": ["heat_pump"], "difficulty": "intermediate", "content_type": "tutorial", "sentiment": 0.7, "hvac_relevance": 0.9, "keywords": ["heat pump", "installation", "efficiency"] } ]""" mock_client.messages.create.return_value = mock_response return mock_client @pytest.fixture def analyzer_with_mock_client(self, mock_claude_client): """Create analyzer with mocked Claude client""" with patch('src.content_analysis.claude_analyzer.anthropic.Anthropic') as mock_anthropic: mock_anthropic.return_value = mock_claude_client analyzer = ClaudeHaikuAnalyzer("test-api-key") analyzer.client = mock_claude_client return analyzer @pytest.fixture def sample_content_items(self): """Sample content items for testing""" return [ { 'id': 'item1', 'title': 'Heat Pump Installation Guide', 'content': 'Complete guide to installing high-efficiency heat pumps for residential applications.', 'source': 'youtube' }, { 'id': 'item2', 'title': 'AC Troubleshooting', 'content': 'Common air conditioning problems and how to diagnose compressor issues.', 'source': 'blog' }, { 'id': 'item3', 'title': 'Thermostat Wiring', 'content': 'Step-by-step wiring instructions for smart thermostats and HVAC controls.', 'source': 'instagram' } ] def test_initialization_with_api_key(self): """Test analyzer initialization with API key""" with patch('src.content_analysis.claude_analyzer.anthropic.Anthropic') as mock_anthropic: analyzer = ClaudeHaikuAnalyzer("test-api-key") assert analyzer.api_key == "test-api-key" assert analyzer.model_name == "claude-3-haiku-20240307" assert analyzer.max_tokens == 4000 assert analyzer.temperature == 0.1 mock_anthropic.assert_called_once_with(api_key="test-api-key") def test_initialization_without_api_key(self): """Test analyzer initialization without API key raises error""" with pytest.raises(ValueError, match="ANTHROPIC_API_KEY is required"): ClaudeHaikuAnalyzer(None) def test_analyze_single_content(self, analyzer_with_mock_client, sample_content_items): """Test single content item analysis""" item = sample_content_items[0] result = analyzer_with_mock_client.analyze_content(item) # Verify API call structure analyzer_with_mock_client.client.messages.create.assert_called_once() call_args = analyzer_with_mock_client.client.messages.create.call_args assert call_args[1]['model'] == "claude-3-haiku-20240307" assert call_args[1]['max_tokens'] == 4000 assert call_args[1]['temperature'] == 0.1 # Verify result structure assert 'topics' in result assert 'products' in result assert 'difficulty' in result assert 'content_type' in result assert 'sentiment' in result assert 'hvac_relevance' in result assert 'keywords' in result def test_analyze_content_batch(self, analyzer_with_mock_client, sample_content_items): """Test batch content analysis""" # Mock batch response batch_response = Mock() batch_response.content = [Mock()] batch_response.content[0].text = """[ { "topics": ["hvac_systems"], "products": ["heat_pump"], "difficulty": "intermediate", "content_type": "tutorial", "sentiment": 0.7, "hvac_relevance": 0.9, "keywords": ["heat pump"] }, { "topics": ["troubleshooting"], "products": ["air_conditioning"], "difficulty": "advanced", "content_type": "diagnostic", "sentiment": 0.5, "hvac_relevance": 0.8, "keywords": ["ac repair"] }, { "topics": ["controls"], "products": ["thermostat"], "difficulty": "beginner", "content_type": "tutorial", "sentiment": 0.6, "hvac_relevance": 0.7, "keywords": ["thermostat wiring"] } ]""" analyzer_with_mock_client.client.messages.create.return_value = batch_response results = analyzer_with_mock_client.analyze_content_batch(sample_content_items) assert len(results) == 3 # Verify each result structure for result in results: assert 'topics' in result assert 'products' in result assert 'difficulty' in result assert 'content_type' in result assert 'sentiment' in result assert 'hvac_relevance' in result assert 'keywords' in result def test_batch_processing_chunking(self, analyzer_with_mock_client): """Test batch processing with chunking for large item lists""" # Create large list of content items large_content_list = [] for i in range(15): # More than batch_size of 10 large_content_list.append({ 'id': f'item{i}', 'title': f'HVAC Item {i}', 'content': f'Content for item {i}', 'source': 'test' }) # Mock responses for multiple batches response1 = Mock() response1.content = [Mock()] response1.content[0].text = '[' + ','.join([ '{"topics": ["hvac_systems"], "products": [], "difficulty": "intermediate", "content_type": "tutorial", "sentiment": 0.5, "hvac_relevance": 0.8, "keywords": []}' ] * 10) + ']' response2 = Mock() response2.content = [Mock()] response2.content[0].text = '[' + ','.join([ '{"topics": ["maintenance"], "products": [], "difficulty": "beginner", "content_type": "guide", "sentiment": 0.6, "hvac_relevance": 0.7, "keywords": []}' ] * 5) + ']' analyzer_with_mock_client.client.messages.create.side_effect = [response1, response2] results = analyzer_with_mock_client.analyze_content_batch(large_content_list) assert len(results) == 15 assert analyzer_with_mock_client.client.messages.create.call_count == 2 def test_create_analysis_prompt_single(self, analyzer_with_mock_client, sample_content_items): """Test analysis prompt creation for single item""" item = sample_content_items[0] prompt = analyzer_with_mock_client._create_analysis_prompt([item]) # Verify prompt contains expected elements assert 'Heat Pump Installation Guide' in prompt assert 'Complete guide to installing' in prompt assert 'HVAC Content Analysis' in prompt assert 'topics' in prompt assert 'products' in prompt assert 'difficulty' in prompt def test_create_analysis_prompt_batch(self, analyzer_with_mock_client, sample_content_items): """Test analysis prompt creation for batch""" prompt = analyzer_with_mock_client._create_analysis_prompt(sample_content_items) # Should contain all items assert 'Heat Pump Installation Guide' in prompt assert 'AC Troubleshooting' in prompt assert 'Thermostat Wiring' in prompt # Should be structured as JSON array request assert 'JSON array' in prompt def test_parse_claude_response_valid_json(self, analyzer_with_mock_client): """Test parsing valid Claude JSON response""" response_text = """[ { "topics": ["hvac_systems"], "products": ["heat_pump"], "difficulty": "intermediate", "content_type": "tutorial", "sentiment": 0.7, "hvac_relevance": 0.9, "keywords": ["heat pump", "installation"] } ]""" results = analyzer_with_mock_client._parse_claude_response(response_text, 1) assert len(results) == 1 assert results[0]['topics'] == ["hvac_systems"] assert results[0]['products'] == ["heat_pump"] assert results[0]['sentiment'] == 0.7 def test_parse_claude_response_invalid_json(self, analyzer_with_mock_client): """Test parsing invalid Claude JSON response""" invalid_json = "This is not valid JSON" results = analyzer_with_mock_client._parse_claude_response(invalid_json, 2) # Should return fallback results assert len(results) == 2 for result in results: assert result['topics'] == [] assert result['products'] == [] assert result['difficulty'] == 'unknown' assert result['content_type'] == 'unknown' assert result['sentiment'] == 0 assert result['hvac_relevance'] == 0 assert result['keywords'] == [] def test_parse_claude_response_partial_json(self, analyzer_with_mock_client): """Test parsing partially valid JSON response""" partial_json = """[ { "topics": ["hvac_systems"], "products": ["heat_pump"], "difficulty": "intermediate" // Missing some fields } ]""" results = analyzer_with_mock_client._parse_claude_response(partial_json, 1) # Should still get fallback for malformed JSON assert len(results) == 1 assert results[0]['topics'] == [] def test_create_fallback_analysis(self, analyzer_with_mock_client): """Test fallback analysis creation""" fallback = analyzer_with_mock_client._create_fallback_analysis() assert fallback['topics'] == [] assert fallback['products'] == [] assert fallback['difficulty'] == 'unknown' assert fallback['content_type'] == 'unknown' assert fallback['sentiment'] == 0 assert fallback['hvac_relevance'] == 0 assert fallback['keywords'] == [] def test_api_error_handling(self, analyzer_with_mock_client): """Test API error handling""" # Mock API error analyzer_with_mock_client.client.messages.create.side_effect = Exception("API Error") item = {'id': 'test', 'title': 'Test', 'content': 'Test content', 'source': 'test'} result = analyzer_with_mock_client.analyze_content(item) # Should return fallback analysis assert result['topics'] == [] assert result['difficulty'] == 'unknown' def test_rate_limiting_backoff(self, analyzer_with_mock_client): """Test rate limiting and backoff behavior""" # Mock rate limiting error followed by success rate_limit_error = Exception("Rate limit exceeded") success_response = Mock() success_response.content = [Mock()] success_response.content[0].text = '[{"topics": [], "products": [], "difficulty": "unknown", "content_type": "unknown", "sentiment": 0, "hvac_relevance": 0, "keywords": []}]' analyzer_with_mock_client.client.messages.create.side_effect = [rate_limit_error, success_response] with patch('time.sleep') as mock_sleep: item = {'id': 'test', 'title': 'Test', 'content': 'Test content', 'source': 'test'} result = analyzer_with_mock_client.analyze_content(item) # Should have retried and succeeded assert analyzer_with_mock_client.client.messages.create.call_count == 2 mock_sleep.assert_called_once() def test_empty_content_handling(self, analyzer_with_mock_client): """Test handling of empty or minimal content""" empty_items = [ {'id': 'empty1', 'title': '', 'content': '', 'source': 'test'}, {'id': 'empty2', 'title': 'Title Only', 'source': 'test'} # Missing content ] results = analyzer_with_mock_client.analyze_content_batch(empty_items) # Should still process and return results assert len(results) == 2 def test_content_length_limits(self, analyzer_with_mock_client): """Test handling of very long content""" long_content = { 'id': 'long1', 'title': 'Long Content Test', 'content': 'A' * 10000, # Very long content 'source': 'test' } # Should not crash with long content result = analyzer_with_mock_client.analyze_content(long_content) assert 'topics' in result def test_special_characters_handling(self, analyzer_with_mock_client): """Test handling of special characters and encoding""" special_content = { 'id': 'special1', 'title': 'Special Characters: "Quotes" & Symbols ®™', 'content': 'Content with émojis 🔧 and speciál çharaçters', 'source': 'test' } # Should handle special characters without errors result = analyzer_with_mock_client.analyze_content(special_content) assert 'topics' in result def test_taxonomy_validation(self, analyzer_with_mock_client): """Test HVAC taxonomy validation in prompts""" item = {'id': 'test', 'title': 'Test', 'content': 'Test', 'source': 'test'} prompt = analyzer_with_mock_client._create_analysis_prompt([item]) # Should include HVAC topic categories hvac_topics = ['hvac_systems', 'heat_pumps', 'air_conditioning', 'refrigeration', 'maintenance', 'installation', 'troubleshooting', 'controls'] for topic in hvac_topics: assert topic in prompt # Should include product categories hvac_products = ['heat_pump', 'air_conditioner', 'furnace', 'boiler', 'thermostat', 'compressor', 'evaporator', 'condenser'] for product in hvac_products: assert product in prompt def test_model_configuration_validation(self, analyzer_with_mock_client): """Test model configuration parameters""" assert analyzer_with_mock_client.model_name == "claude-3-haiku-20240307" assert analyzer_with_mock_client.max_tokens == 4000 assert analyzer_with_mock_client.temperature == 0.1 assert analyzer_with_mock_client.batch_size == 10 @patch('src.content_analysis.claude_analyzer.logging') def test_logging_functionality(self, mock_logging, analyzer_with_mock_client): """Test logging of analysis operations""" item = {'id': 'test', 'title': 'Test', 'content': 'Test', 'source': 'test'} analyzer_with_mock_client.analyze_content(item) # Should have logged the operation assert mock_logging.getLogger.called def test_response_format_validation(self, analyzer_with_mock_client): """Test validation of response format from Claude""" # Test with correctly formatted response good_response = '''[{ "topics": ["hvac_systems"], "products": ["heat_pump"], "difficulty": "intermediate", "content_type": "tutorial", "sentiment": 0.7, "hvac_relevance": 0.9, "keywords": ["heat pump"] }]''' result = analyzer_with_mock_client._parse_claude_response(good_response, 1) assert len(result) == 1 assert result[0]['topics'] == ["hvac_systems"] # Test with missing required fields incomplete_response = '''[{ "topics": ["hvac_systems"] }]''' result = analyzer_with_mock_client._parse_claude_response(incomplete_response, 1) # Should fall back to default structure assert len(result) == 1 if __name__ == "__main__": pytest.main([__file__, "-v", "--cov=src.content_analysis.claude_analyzer", "--cov-report=term-missing"])