#!/usr/bin/env python3 """ Comprehensive Unit Tests for Engagement Analyzer Tests engagement metrics calculation, trending content identification, virality scoring, and source-specific analysis. """ import pytest from unittest.mock import Mock, patch from datetime import datetime, timedelta from pathlib import Path import sys # Add src to path for imports if str(Path(__file__).parent.parent) not in sys.path: sys.path.insert(0, str(Path(__file__).parent.parent)) from src.content_analysis.engagement_analyzer import ( EngagementAnalyzer, EngagementMetrics, TrendingContent ) class TestEngagementAnalyzer: """Test suite for EngagementAnalyzer""" @pytest.fixture def analyzer(self): """Create engagement analyzer instance""" return EngagementAnalyzer() @pytest.fixture def sample_youtube_items(self): """Sample YouTube content items with engagement data""" return [ { 'id': 'video1', 'title': 'HVAC Troubleshooting Guide', 'source': 'youtube', 'views': 10000, 'likes': 500, 'comments': 50, 'upload_date': '2025-08-27' }, { 'id': 'video2', 'title': 'Heat Pump Installation', 'source': 'youtube', 'views': 5000, 'likes': 200, 'comments': 20, 'upload_date': '2025-08-26' }, { 'id': 'video3', 'title': 'AC Repair Tips', 'source': 'youtube', 'views': 1000, 'likes': 30, 'comments': 5, 'upload_date': '2025-08-25' } ] @pytest.fixture def sample_instagram_items(self): """Sample Instagram content items""" return [ { 'id': 'post1', 'title': 'HVAC tools showcase', 'source': 'instagram', 'likes': 150, 'comments': 25, 'upload_date': '2025-08-27' }, { 'id': 'post2', 'title': 'Before and after AC install', 'source': 'instagram', 'likes': 80, 'comments': 10, 'upload_date': '2025-08-26' } ] def test_calculate_engagement_rate_youtube(self, analyzer): """Test engagement rate calculation for YouTube content""" # Test normal case item = {'views': 1000, 'likes': 50, 'comments': 10} rate = analyzer._calculate_engagement_rate(item, 'youtube') assert rate == 0.06 # (50 + 10) / 1000 # Test zero views item = {'views': 0, 'likes': 50, 'comments': 10} rate = analyzer._calculate_engagement_rate(item, 'youtube') assert rate == 0 # Test missing engagement data item = {'views': 1000} rate = analyzer._calculate_engagement_rate(item, 'youtube') assert rate == 0 def test_calculate_engagement_rate_instagram(self, analyzer): """Test engagement rate calculation for Instagram content""" # Test with views, likes and comments (preferred method) item = {'views': 1000, 'likes': 100, 'comments': 20} rate = analyzer._calculate_engagement_rate(item, 'instagram') # Should use (likes + comments) / views: (100 + 20) / 1000 = 0.12 assert rate == 0.12 # Test with likes and comments but no views (fallback) item = {'likes': 100, 'comments': 20} rate = analyzer._calculate_engagement_rate(item, 'instagram') # Should use comments/likes fallback: 20/100 = 0.2 assert rate == 0.2 # Test with only comments (no likes, no views) item = {'comments': 10} rate = analyzer._calculate_engagement_rate(item, 'instagram') # Should return 0 as there are no likes to calculate fallback assert rate == 0.0 def test_get_total_engagement(self, analyzer): """Test total engagement calculation""" # Test YouTube (likes + comments) item = {'likes': 50, 'comments': 10} total = analyzer._get_total_engagement(item, 'youtube') assert total == 60 # Test Instagram (likes + comments) item = {'likes': 100, 'comments': 25} total = analyzer._get_total_engagement(item, 'instagram') assert total == 125 # Test missing data item = {} total = analyzer._get_total_engagement(item, 'youtube') assert total == 0 def test_analyze_source_engagement_youtube(self, analyzer, sample_youtube_items): """Test source engagement analysis for YouTube""" result = analyzer.analyze_source_engagement(sample_youtube_items, 'youtube') # Verify structure assert 'total_items' in result assert 'avg_engagement_rate' in result assert 'median_engagement_rate' in result assert 'total_engagement' in result assert 'trending_count' in result assert 'high_performers' in result assert 'trending_content' in result # Verify calculations assert result['total_items'] == 3 assert result['total_engagement'] == 805 # 550 + 220 + 35 # Check engagement rates are calculated correctly # video1: (500+50)/10000 = 0.055, video2: (200+20)/5000 = 0.044, video3: (30+5)/1000 = 0.035 expected_avg = (0.055 + 0.044 + 0.035) / 3 assert abs(result['avg_engagement_rate'] - expected_avg) < 0.001 # Check high performers (threshold 0.05 for YouTube) assert result['high_performers'] == 1 # Only video1 above 0.05 def test_analyze_source_engagement_instagram(self, analyzer, sample_instagram_items): """Test source engagement analysis for Instagram""" result = analyzer.analyze_source_engagement(sample_instagram_items, 'instagram') assert result['total_items'] == 2 assert result['total_engagement'] == 265 # 175 + 90 # Instagram uses comments/likes: post1: 25/150=0.167, post2: 10/80=0.125 expected_avg = (0.167 + 0.125) / 2 assert abs(result['avg_engagement_rate'] - expected_avg) < 0.001 def test_identify_trending_content(self, analyzer, sample_youtube_items): """Test trending content identification""" trending = analyzer.identify_trending_content(sample_youtube_items, 'youtube') # Should identify high-engagement content assert len(trending) > 0 # Check trending content structure if trending: item = trending[0] assert 'content_id' in item assert 'source' in item assert 'title' in item assert 'engagement_score' in item assert 'trend_type' in item def test_calculate_virality_score(self, analyzer): """Test virality score calculation""" # High engagement, recent content item = { 'views': 10000, 'likes': 800, 'comments': 200, 'upload_date': '2025-08-27' } score = analyzer._calculate_virality_score(item, 'youtube') assert score > 0 # Low engagement content item = { 'views': 100, 'likes': 5, 'comments': 1, 'upload_date': '2025-08-27' } score = analyzer._calculate_virality_score(item, 'youtube') assert score >= 0 def test_get_engagement_velocity(self, analyzer): """Test engagement velocity calculation""" # Recent high-engagement content item = { 'views': 5000, 'upload_date': '2025-08-27' } with patch('src.content_analysis.engagement_analyzer.datetime') as mock_datetime: mock_datetime.now.return_value = datetime(2025, 8, 28) mock_datetime.strptime = datetime.strptime velocity = analyzer._get_engagement_velocity(item) assert velocity == 5000 # 5000 views / 1 day # Older content item = { 'views': 1000, 'upload_date': '2025-08-25' } with patch('src.content_analysis.engagement_analyzer.datetime') as mock_datetime: mock_datetime.now.return_value = datetime(2025, 8, 28) mock_datetime.strptime = datetime.strptime velocity = analyzer._get_engagement_velocity(item) assert velocity == 333.33 # 1000 views / 3 days (rounded) def test_empty_content_list(self, analyzer): """Test handling of empty content lists""" result = analyzer.analyze_source_engagement([], 'youtube') assert result['total_items'] == 0 assert result['avg_engagement_rate'] == 0 assert result['median_engagement_rate'] == 0 assert result['total_engagement'] == 0 assert result['trending_count'] == 0 assert result['high_performers'] == 0 assert result['trending_content'] == [] def test_missing_engagement_data(self, analyzer): """Test handling of content with missing engagement data""" items = [ {'id': 'test1', 'title': 'Test', 'source': 'youtube'}, # No engagement data {'id': 'test2', 'title': 'Test 2', 'source': 'youtube', 'views': 0} # Zero views ] result = analyzer.analyze_source_engagement(items, 'youtube') assert result['total_items'] == 2 assert result['avg_engagement_rate'] == 0 assert result['total_engagement'] == 0 def test_engagement_thresholds_configuration(self, analyzer): """Test engagement threshold configuration for different sources""" # Check YouTube thresholds youtube_thresholds = analyzer.engagement_thresholds['youtube'] assert 'high_engagement_rate' in youtube_thresholds assert 'viral_threshold' in youtube_thresholds assert 'view_velocity_threshold' in youtube_thresholds # Check Instagram thresholds instagram_thresholds = analyzer.engagement_thresholds['instagram'] assert 'high_engagement_rate' in instagram_thresholds assert 'viral_threshold' in instagram_thresholds def test_wordpress_engagement_analysis(self, analyzer): """Test WordPress content engagement analysis""" items = [ { 'id': 'post1', 'title': 'HVAC Blog Post', 'source': 'wordpress', 'comments': 15, 'upload_date': '2025-08-27' } ] result = analyzer.analyze_source_engagement(items, 'wordpress') assert result['total_items'] == 1 # WordPress uses estimated views from comments assert result['total_engagement'] == 15 def test_podcast_engagement_analysis(self, analyzer): """Test podcast content engagement analysis""" items = [ { 'id': 'episode1', 'title': 'HVAC Podcast Episode', 'source': 'podcast', 'upload_date': '2025-08-27' } ] result = analyzer.analyze_source_engagement(items, 'podcast') assert result['total_items'] == 1 # Podcast typically has minimal engagement data assert result['total_engagement'] == 0 def test_edge_case_numeric_conversions(self, analyzer): """Test edge cases in numeric field handling""" # Test string numeric values item = {'views': '1,000', 'likes': '50', 'comments': '10'} rate = analyzer._calculate_engagement_rate(item, 'youtube') # Should handle string conversion: (50+10)/1000 = 0.06 assert rate == 0.06 # Test None values item = {'views': None, 'likes': None, 'comments': None} rate = analyzer._calculate_engagement_rate(item, 'youtube') assert rate == 0 def test_trending_content_types(self, analyzer): """Test different types of trending content classification""" # High engagement, recent = viral viral_item = { 'id': 'viral1', 'title': 'Viral HVAC Video', 'views': 100000, 'likes': 5000, 'comments': 500, 'upload_date': '2025-08-27' } # Steady growth steady_item = { 'id': 'steady1', 'title': 'Steady HVAC Content', 'views': 10000, 'likes': 300, 'comments': 30, 'upload_date': '2025-08-25' } items = [viral_item, steady_item] trending = analyzer.identify_trending_content(items, 'youtube') # Should identify trending content with proper classification assert len(trending) > 0 # Check for viral classification viral_found = any(item.get('trend_type') == 'viral' for item in trending) # Note: This might not always trigger depending on thresholds, so we test structure for item in trending: assert item['trend_type'] in ['viral', 'steady_growth', 'spike'] if __name__ == "__main__": pytest.main([__file__, "-v", "--cov=src.content_analysis.engagement_analyzer", "--cov-report=term-missing"])