hvac-kia-content/tests/test_claude_analyzer.py

#!/usr/bin/env python3
"""
Comprehensive Unit Tests for Claude Haiku Analyzer

Tests Claude API integration, content classification,
batch processing, and error handling.
"""

import pytest
from unittest.mock import Mock, patch, MagicMock
from pathlib import Path
import sys

# Add src to path for imports
if str(Path(__file__).parent.parent) not in sys.path:
    sys.path.insert(0, str(Path(__file__).parent.parent))

from src.content_analysis.claude_analyzer import ClaudeHaikuAnalyzer


class TestClaudeHaikuAnalyzer:
    """Test suite for ClaudeHaikuAnalyzer"""

    @pytest.fixture
    def mock_claude_client(self):
        """Create mock Claude client"""
        mock_client = Mock()
        mock_response = Mock()
        mock_response.content = [Mock()]
        mock_response.content[0].text = """[
            {
                "topics": ["hvac_systems", "installation"],
                "products": ["heat_pump"],
                "difficulty": "intermediate",
                "content_type": "tutorial",
                "sentiment": 0.7,
                "hvac_relevance": 0.9,
                "keywords": ["heat pump", "installation", "efficiency"]
            }
        ]"""
        mock_client.messages.create.return_value = mock_response
        return mock_client

    @pytest.fixture
    def analyzer_with_mock_client(self, mock_claude_client):
        """Create analyzer with mocked Claude client"""
        with patch('src.content_analysis.claude_analyzer.anthropic.Anthropic') as mock_anthropic:
            mock_anthropic.return_value = mock_claude_client
            analyzer = ClaudeHaikuAnalyzer("test-api-key")
            analyzer.client = mock_claude_client
            return analyzer

    @pytest.fixture
    def sample_content_items(self):
        """Sample content items for testing"""
        return [
            {
                'id': 'item1',
                'title': 'Heat Pump Installation Guide',
                'content': 'Complete guide to installing high-efficiency heat pumps for residential applications.',
                'source': 'youtube'
            },
            {
                'id': 'item2',
                'title': 'AC Troubleshooting',
                'content': 'Common air conditioning problems and how to diagnose compressor issues.',
                'source': 'blog'
            },
            {
                'id': 'item3',
                'title': 'Thermostat Wiring',
                'content': 'Step-by-step wiring instructions for smart thermostats and HVAC controls.',
                'source': 'instagram'
            }
        ]

    def test_initialization_with_api_key(self):
        """Test analyzer initialization with API key"""

        with patch('src.content_analysis.claude_analyzer.anthropic.Anthropic') as mock_anthropic:
            analyzer = ClaudeHaikuAnalyzer("test-api-key")

            assert analyzer.api_key == "test-api-key"
            assert analyzer.model_name == "claude-3-haiku-20240307"
            assert analyzer.max_tokens == 4000
            assert analyzer.temperature == 0.1
            mock_anthropic.assert_called_once_with(api_key="test-api-key")

    def test_initialization_without_api_key(self):
        """Test analyzer initialization without API key raises error"""

        with pytest.raises(ValueError, match="ANTHROPIC_API_KEY is required"):
            ClaudeHaikuAnalyzer(None)

    def test_analyze_single_content(self, analyzer_with_mock_client, sample_content_items):
        """Test single content item analysis"""

        item = sample_content_items[0]
        result = analyzer_with_mock_client.analyze_content(item)

        # Verify API call structure
        analyzer_with_mock_client.client.messages.create.assert_called_once()
        call_args = analyzer_with_mock_client.client.messages.create.call_args

        assert call_args[1]['model'] == "claude-3-haiku-20240307"
        assert call_args[1]['max_tokens'] == 4000
        assert call_args[1]['temperature'] == 0.1

        # Verify result structure
        assert 'topics' in result
        assert 'products' in result
        assert 'difficulty' in result
        assert 'content_type' in result
        assert 'sentiment' in result
        assert 'hvac_relevance' in result
        assert 'keywords' in result

    def test_analyze_content_batch(self, analyzer_with_mock_client, sample_content_items):
        """Test batch content analysis"""

        # Mock batch response
        batch_response = Mock()
        batch_response.content = [Mock()]
        batch_response.content[0].text = """[
            {
                "topics": ["hvac_systems"],
                "products": ["heat_pump"],
                "difficulty": "intermediate",
                "content_type": "tutorial",
                "sentiment": 0.7,
                "hvac_relevance": 0.9,
                "keywords": ["heat pump"]
            },
            {
                "topics": ["troubleshooting"],
                "products": ["air_conditioning"],
                "difficulty": "advanced",
                "content_type": "diagnostic",
                "sentiment": 0.5,
                "hvac_relevance": 0.8,
                "keywords": ["ac repair"]
            },
            {
                "topics": ["controls"],
                "products": ["thermostat"],
                "difficulty": "beginner",
                "content_type": "tutorial",
                "sentiment": 0.6,
                "hvac_relevance": 0.7,
                "keywords": ["thermostat wiring"]
            }
        ]"""
        analyzer_with_mock_client.client.messages.create.return_value = batch_response

        results = analyzer_with_mock_client.analyze_content_batch(sample_content_items)

        assert len(results) == 3

        # Verify each result structure
        for result in results:
            assert 'topics' in result
            assert 'products' in result
            assert 'difficulty' in result
            assert 'content_type' in result
            assert 'sentiment' in result
            assert 'hvac_relevance' in result
            assert 'keywords' in result

    def test_batch_processing_chunking(self, analyzer_with_mock_client):
        """Test batch processing with chunking for large item lists"""

        # Create large list of content items
        large_content_list = []
        for i in range(15):  # More than batch_size of 10
            large_content_list.append({
                'id': f'item{i}',
                'title': f'HVAC Item {i}',
                'content': f'Content for item {i}',
                'source': 'test'
            })

        # Mock responses for multiple batches
        response1 = Mock()
        response1.content = [Mock()]
        response1.content[0].text = '[' + ','.join([
            '{"topics": ["hvac_systems"], "products": [], "difficulty": "intermediate", "content_type": "tutorial", "sentiment": 0.5, "hvac_relevance": 0.8, "keywords": []}'
        ] * 10) + ']'

        response2 = Mock()
        response2.content = [Mock()]
        response2.content[0].text = '[' + ','.join([
            '{"topics": ["maintenance"], "products": [], "difficulty": "beginner", "content_type": "guide", "sentiment": 0.6, "hvac_relevance": 0.7, "keywords": []}'
        ] * 5) + ']'

        analyzer_with_mock_client.client.messages.create.side_effect = [response1, response2]

        results = analyzer_with_mock_client.analyze_content_batch(large_content_list)

        assert len(results) == 15
        assert analyzer_with_mock_client.client.messages.create.call_count == 2

    def test_create_analysis_prompt_single(self, analyzer_with_mock_client, sample_content_items):
        """Test analysis prompt creation for single item"""

        item = sample_content_items[0]
        prompt = analyzer_with_mock_client._create_analysis_prompt([item])

        # Verify prompt contains expected elements
        assert 'Heat Pump Installation Guide' in prompt
        assert 'Complete guide to installing' in prompt
        assert 'HVAC Content Analysis' in prompt
        assert 'topics' in prompt
        assert 'products' in prompt
        assert 'difficulty' in prompt

    def test_create_analysis_prompt_batch(self, analyzer_with_mock_client, sample_content_items):
        """Test analysis prompt creation for batch"""

        prompt = analyzer_with_mock_client._create_analysis_prompt(sample_content_items)

        # Should contain all items
        assert 'Heat Pump Installation Guide' in prompt
        assert 'AC Troubleshooting' in prompt
        assert 'Thermostat Wiring' in prompt

        # Should be structured as JSON array request
        assert 'JSON array' in prompt

    def test_parse_claude_response_valid_json(self, analyzer_with_mock_client):
        """Test parsing valid Claude JSON response"""

        response_text = """[
            {
                "topics": ["hvac_systems"],
                "products": ["heat_pump"],
                "difficulty": "intermediate",
                "content_type": "tutorial",
                "sentiment": 0.7,
                "hvac_relevance": 0.9,
                "keywords": ["heat pump", "installation"]
            }
        ]"""

        results = analyzer_with_mock_client._parse_claude_response(response_text, 1)

        assert len(results) == 1
        assert results[0]['topics'] == ["hvac_systems"]
        assert results[0]['products'] == ["heat_pump"]
        assert results[0]['sentiment'] == 0.7

    def test_parse_claude_response_invalid_json(self, analyzer_with_mock_client):
        """Test parsing invalid Claude JSON response"""

        invalid_json = "This is not valid JSON"

        results = analyzer_with_mock_client._parse_claude_response(invalid_json, 2)

        # Should return fallback results
        assert len(results) == 2
        for result in results:
            assert result['topics'] == []
            assert result['products'] == []
            assert result['difficulty'] == 'unknown'
            assert result['content_type'] == 'unknown'
            assert result['sentiment'] == 0
            assert result['hvac_relevance'] == 0
            assert result['keywords'] == []

    def test_parse_claude_response_partial_json(self, analyzer_with_mock_client):
        """Test parsing partially valid JSON response"""

        partial_json = """[
            {
                "topics": ["hvac_systems"],
                "products": ["heat_pump"],
                "difficulty": "intermediate"
                // Missing some fields
            }
        ]"""

        results = analyzer_with_mock_client._parse_claude_response(partial_json, 1)

        # Should still get fallback for malformed JSON
        assert len(results) == 1
        assert results[0]['topics'] == []

    def test_create_fallback_analysis(self, analyzer_with_mock_client):
        """Test fallback analysis creation"""

        fallback = analyzer_with_mock_client._create_fallback_analysis()

        assert fallback['topics'] == []
        assert fallback['products'] == []
        assert fallback['difficulty'] == 'unknown'
        assert fallback['content_type'] == 'unknown'
        assert fallback['sentiment'] == 0
        assert fallback['hvac_relevance'] == 0
        assert fallback['keywords'] == []

    def test_api_error_handling(self, analyzer_with_mock_client):
        """Test API error handling"""

        # Mock API error
        analyzer_with_mock_client.client.messages.create.side_effect = Exception("API Error")

        item = {'id': 'test', 'title': 'Test', 'content': 'Test content', 'source': 'test'}
        result = analyzer_with_mock_client.analyze_content(item)

        # Should return fallback analysis
        assert result['topics'] == []
        assert result['difficulty'] == 'unknown'

    def test_rate_limiting_backoff(self, analyzer_with_mock_client):
        """Test rate limiting and backoff behavior"""

        # Mock rate limiting error followed by success
        rate_limit_error = Exception("Rate limit exceeded")
        success_response = Mock()
        success_response.content = [Mock()]
        success_response.content[0].text = '[{"topics": [], "products": [], "difficulty": "unknown", "content_type": "unknown", "sentiment": 0, "hvac_relevance": 0, "keywords": []}]'

        analyzer_with_mock_client.client.messages.create.side_effect = [rate_limit_error, success_response]

        with patch('time.sleep') as mock_sleep:
            item = {'id': 'test', 'title': 'Test', 'content': 'Test content', 'source': 'test'}
            result = analyzer_with_mock_client.analyze_content(item)

            # Should have retried and succeeded
            assert analyzer_with_mock_client.client.messages.create.call_count == 2
            mock_sleep.assert_called_once()

    def test_empty_content_handling(self, analyzer_with_mock_client):
        """Test handling of empty or minimal content"""

        empty_items = [
            {'id': 'empty1', 'title': '', 'content': '', 'source': 'test'},
            {'id': 'empty2', 'title': 'Title Only', 'source': 'test'}  # Missing content
        ]

        results = analyzer_with_mock_client.analyze_content_batch(empty_items)

        # Should still process and return results
        assert len(results) == 2

    def test_content_length_limits(self, analyzer_with_mock_client):
        """Test handling of very long content"""

        long_content = {
            'id': 'long1',
            'title': 'Long Content Test',
            'content': 'A' * 10000,  # Very long content
            'source': 'test'
        }

        # Should not crash with long content
        result = analyzer_with_mock_client.analyze_content(long_content)
        assert 'topics' in result

    def test_special_characters_handling(self, analyzer_with_mock_client):
        """Test handling of special characters and encoding"""

        special_content = {
            'id': 'special1',
            'title': 'Special Characters: "Quotes" & Symbols ®™',
            'content': 'Content with émojis 🔧 and speciál çharaçters',
            'source': 'test'
        }

        # Should handle special characters without errors
        result = analyzer_with_mock_client.analyze_content(special_content)
        assert 'topics' in result

    def test_taxonomy_validation(self, analyzer_with_mock_client):
        """Test HVAC taxonomy validation in prompts"""

        item = {'id': 'test', 'title': 'Test', 'content': 'Test', 'source': 'test'}
        prompt = analyzer_with_mock_client._create_analysis_prompt([item])

        # Should include HVAC topic categories
        hvac_topics = ['hvac_systems', 'heat_pumps', 'air_conditioning', 'refrigeration',
                      'maintenance', 'installation', 'troubleshooting', 'controls']
        for topic in hvac_topics:
            assert topic in prompt

        # Should include product categories
        hvac_products = ['heat_pump', 'air_conditioner', 'furnace', 'boiler', 'thermostat',
                        'compressor', 'evaporator', 'condenser']
        for product in hvac_products:
            assert product in prompt

    def test_model_configuration_validation(self, analyzer_with_mock_client):
        """Test model configuration parameters"""

        assert analyzer_with_mock_client.model_name == "claude-3-haiku-20240307"
        assert analyzer_with_mock_client.max_tokens == 4000
        assert analyzer_with_mock_client.temperature == 0.1
        assert analyzer_with_mock_client.batch_size == 10

    @patch('src.content_analysis.claude_analyzer.logging')
    def test_logging_functionality(self, mock_logging, analyzer_with_mock_client):
        """Test logging of analysis operations"""

        item = {'id': 'test', 'title': 'Test', 'content': 'Test', 'source': 'test'}
        analyzer_with_mock_client.analyze_content(item)

        # Should have logged the operation
        assert mock_logging.getLogger.called

    def test_response_format_validation(self, analyzer_with_mock_client):
        """Test validation of response format from Claude"""

        # Test with correctly formatted response
        good_response = '''[{
            "topics": ["hvac_systems"],
            "products": ["heat_pump"],
            "difficulty": "intermediate",
            "content_type": "tutorial",
            "sentiment": 0.7,
            "hvac_relevance": 0.9,
            "keywords": ["heat pump"]
        }]'''

        result = analyzer_with_mock_client._parse_claude_response(good_response, 1)
        assert len(result) == 1
        assert result[0]['topics'] == ["hvac_systems"]

        # Test with missing required fields
        incomplete_response = '''[{
            "topics": ["hvac_systems"]
        }]'''

        result = analyzer_with_mock_client._parse_claude_response(incomplete_response, 1)
        # Should fall back to default structure
        assert len(result) == 1


if __name__ == "__main__":
    pytest.main([__file__, "-v", "--cov=src.content_analysis.claude_analyzer", "--cov-report=term-missing"])