#!/usr/bin/env python3 """ Comprehensive Unit Tests for Intelligence Aggregator Tests intelligence report generation, markdown parsing, content analysis coordination, and strategic insights. """ import pytest from unittest.mock import Mock, patch, mock_open from pathlib import Path from datetime import datetime, timedelta import json import sys # Add src to path for imports if str(Path(__file__).parent.parent) not in sys.path: sys.path.insert(0, str(Path(__file__).parent.parent)) from src.content_analysis.intelligence_aggregator import IntelligenceAggregator class TestIntelligenceAggregator: """Test suite for IntelligenceAggregator""" @pytest.fixture def temp_data_dir(self, tmp_path): """Create temporary data directory structure""" data_dir = tmp_path / "data" data_dir.mkdir() # Create required subdirectories (data_dir / "intelligence" / "daily").mkdir(parents=True) (data_dir / "intelligence" / "weekly").mkdir(parents=True) (data_dir / "intelligence" / "monthly").mkdir(parents=True) (data_dir / "markdown_current").mkdir() return data_dir @pytest.fixture def aggregator(self, temp_data_dir): """Create intelligence aggregator instance with temp directory""" return IntelligenceAggregator(temp_data_dir) @pytest.fixture def sample_markdown_content(self): """Sample markdown content for testing parsing""" return """# ID: video1 ## Title: HVAC Installation Guide ## Type: video ## Author: HVAC Know It All ## Link: https://www.youtube.com/watch?v=video1 ## Upload Date: 2025-08-27 ## Views: 5000 ## Likes: 250 ## Comments: 30 ## Engagement Rate: 5.6% ## Description: Learn professional HVAC installation techniques in this comprehensive guide. # ID: video2 ## Title: Heat Pump Maintenance ## Type: video ## Views: 3000 ## Likes: 150 ## Comments: 20 ## Description: Essential heat pump maintenance procedures for optimal performance. """ @pytest.fixture def sample_content_items(self): """Sample content items for testing analysis""" return [ { 'id': 'item1', 'title': 'HVAC Installation Guide', 'source': 'youtube', 'views': 5000, 'likes': 250, 'comments': 30, 'content': 'Professional HVAC installation techniques, heat pump setup, refrigeration cycle', 'upload_date': '2025-08-27' }, { 'id': 'item2', 'title': 'AC Troubleshooting', 'source': 'wordpress', 'likes': 45, 'comments': 8, 'content': 'Air conditioning repair, compressor issues, refrigerant leaks', 'upload_date': '2025-08-26' }, { 'id': 'item3', 'title': 'Smart Thermostat Install', 'source': 'instagram', 'likes': 120, 'comments': 15, 'content': 'Smart thermostat wiring, HVAC controls, energy efficiency', 'upload_date': '2025-08-25' } ] def test_initialization(self, temp_data_dir): """Test aggregator initialization and directory creation""" aggregator = IntelligenceAggregator(temp_data_dir) assert aggregator.data_dir == temp_data_dir assert aggregator.intelligence_dir == temp_data_dir / "intelligence" assert aggregator.intelligence_dir.exists() assert (aggregator.intelligence_dir / "daily").exists() assert (aggregator.intelligence_dir / "weekly").exists() assert (aggregator.intelligence_dir / "monthly").exists() def test_parse_markdown_file(self, aggregator, temp_data_dir, sample_markdown_content): """Test markdown file parsing""" # Create test markdown file md_file = temp_data_dir / "markdown_current" / "hkia_youtube_test.md" md_file.write_text(sample_markdown_content, encoding='utf-8') items = aggregator._parse_markdown_file(md_file) assert len(items) == 2 # Check first item item1 = items[0] assert item1['id'] == 'video1' assert item1['title'] == 'HVAC Installation Guide' assert item1['source'] == 'youtube' assert item1['views'] == 5000 assert item1['likes'] == 250 assert item1['comments'] == 30 # Check second item item2 = items[1] assert item2['id'] == 'video2' assert item2['title'] == 'Heat Pump Maintenance' assert item2['views'] == 3000 def test_parse_content_item(self, aggregator): """Test individual content item parsing""" item_content = """video1 ## Title: Test Video ## Views: 1,500 ## Likes: 75 ## Comments: 10 ## Description: Test video description here. """ item = aggregator._parse_content_item(item_content, "youtube_test") assert item['id'] == 'video1' assert item['title'] == 'Test Video' assert item['views'] == 1500 # Comma should be removed assert item['likes'] == 75 assert item['comments'] == 10 assert item['source'] == 'youtube' def test_extract_numeric_fields(self, aggregator): """Test numeric field extraction and conversion""" item = { 'views': '10,000', 'likes': '500', 'comments': '50', 'invalid_number': 'abc' } aggregator._extract_numeric_fields(item) assert item['views'] == 10000 assert item['likes'] == 500 assert item['comments'] == 50 # Invalid numbers should become 0 # Note: 'invalid_number' not in numeric_fields list, so unchanged def test_extract_source_from_filename(self, aggregator): """Test source extraction from filenames""" assert aggregator._extract_source_from_filename("hkia_youtube_20250827") == "youtube" assert aggregator._extract_source_from_filename("hkia_instagram_test") == "instagram" assert aggregator._extract_source_from_filename("hkia_wordpress_latest") == "wordpress" assert aggregator._extract_source_from_filename("hkia_mailchimp_feed") == "mailchimp" assert aggregator._extract_source_from_filename("hkia_podcast_episode") == "podcast" assert aggregator._extract_source_from_filename("hkia_hvacrschool_article") == "hvacrschool" assert aggregator._extract_source_from_filename("unknown_source") == "unknown" @patch('src.content_analysis.intelligence_aggregator.IntelligenceAggregator._load_hkia_content') @patch('src.content_analysis.intelligence_aggregator.IntelligenceAggregator._analyze_hkia_content') def test_generate_daily_intelligence(self, mock_analyze, mock_load, aggregator, sample_content_items): """Test daily intelligence report generation""" # Mock content loading mock_load.return_value = sample_content_items # Mock analysis results mock_analyze.return_value = { 'content_classified': 3, 'topic_distribution': {'hvac_systems': {'count': 2}, 'maintenance': {'count': 1}}, 'engagement_summary': {'youtube': {'total_items': 1}}, 'trending_keywords': [{'keyword': 'hvac', 'frequency': 3}], 'content_gaps': [], 'sentiment_overview': {'avg_sentiment': 0.5} } # Generate report test_date = datetime(2025, 8, 28) report = aggregator.generate_daily_intelligence(test_date) # Verify report structure assert 'report_date' in report assert 'generated_at' in report assert 'hkia_analysis' in report assert 'competitor_analysis' in report assert 'strategic_insights' in report assert 'meta' in report assert report['report_date'] == '2025-08-28' assert report['meta']['total_hkia_items'] == 3 def test_load_hkia_content_no_files(self, aggregator, temp_data_dir): """Test content loading when no markdown files exist""" test_date = datetime(2025, 8, 28) content = aggregator._load_hkia_content(test_date) assert content == [] def test_load_hkia_content_with_files(self, aggregator, temp_data_dir, sample_markdown_content): """Test content loading with markdown files""" # Create test files md_dir = temp_data_dir / "markdown_current" (md_dir / "hkia_youtube_20250827.md").write_text(sample_markdown_content) (md_dir / "hkia_instagram_20250827.md").write_text("# ID: post1\n\n## Title: Test Post") test_date = datetime(2025, 8, 28) content = aggregator._load_hkia_content(test_date) assert len(content) >= 2 # Should load from both files @patch('src.content_analysis.intelligence_aggregator.ClaudeHaikuAnalyzer') def test_analyze_hkia_content_with_claude(self, mock_claude_class, aggregator, sample_content_items): """Test HKIA content analysis with Claude analyzer""" # Mock Claude analyzer mock_analyzer = Mock() mock_analyzer.analyze_content_batch.return_value = [ {'topics': ['hvac_systems'], 'sentiment': 0.7, 'difficulty': 'intermediate'}, {'topics': ['maintenance'], 'sentiment': 0.5, 'difficulty': 'beginner'}, {'topics': ['controls'], 'sentiment': 0.6, 'difficulty': 'advanced'} ] mock_claude_class.return_value = mock_analyzer # Re-initialize aggregator to enable Claude analyzer aggregator.claude_analyzer = mock_analyzer result = aggregator._analyze_hkia_content(sample_content_items) assert result['content_classified'] == 3 assert 'topic_distribution' in result assert 'engagement_summary' in result assert 'trending_keywords' in result def test_analyze_hkia_content_without_claude(self, aggregator, sample_content_items): """Test HKIA content analysis without Claude analyzer (fallback mode)""" # Ensure no Claude analyzer aggregator.claude_analyzer = None result = aggregator._analyze_hkia_content(sample_content_items) assert result['content_classified'] == 0 assert 'topic_distribution' in result assert 'engagement_summary' in result assert 'trending_keywords' in result # Should still have engagement analysis and keyword extraction assert len(result['engagement_summary']) > 0 def test_calculate_topic_distribution(self, aggregator): """Test topic distribution calculation""" analyses = [ {'topics': ['hvac_systems'], 'sentiment': 0.7}, {'topics': ['hvac_systems', 'maintenance'], 'sentiment': 0.5}, {'topics': ['maintenance'], 'sentiment': 0.6} ] distribution = aggregator._calculate_topic_distribution(analyses) assert 'hvac_systems' in distribution assert 'maintenance' in distribution assert distribution['hvac_systems']['count'] == 2 assert distribution['maintenance']['count'] == 2 assert abs(distribution['hvac_systems']['avg_sentiment'] - 0.6) < 0.1 def test_calculate_sentiment_overview(self, aggregator): """Test sentiment overview calculation""" analyses = [ {'sentiment': 0.7}, {'sentiment': 0.5}, {'sentiment': 0.6} ] overview = aggregator._calculate_sentiment_overview(analyses) assert 'avg_sentiment' in overview assert 'sentiment_distribution' in overview assert abs(overview['avg_sentiment'] - 0.6) < 0.1 def test_identify_content_gaps(self, aggregator): """Test content gap identification""" topic_distribution = { 'hvac_systems': {'count': 10}, 'maintenance': {'count': 1}, # Low coverage 'installation': {'count': 8}, 'troubleshooting': {'count': 1} # Low coverage } gaps = aggregator._identify_content_gaps(topic_distribution) assert len(gaps) > 0 assert any('maintenance' in gap for gap in gaps) assert any('troubleshooting' in gap for gap in gaps) def test_generate_strategic_insights(self, aggregator): """Test strategic insights generation""" hkia_analysis = { 'topic_distribution': { 'maintenance': {'count': 1}, 'installation': {'count': 8} }, 'trending_keywords': [{'keyword': 'heat pump', 'frequency': 20}], 'engagement_summary': { 'youtube': {'avg_engagement_rate': 0.02} }, 'sentiment_overview': {'avg_sentiment': 0.3} } competitor_analysis = {} insights = aggregator._generate_strategic_insights(hkia_analysis, competitor_analysis) assert 'content_opportunities' in insights assert 'performance_insights' in insights assert 'competitive_advantages' in insights assert 'areas_for_improvement' in insights # Should identify content opportunities based on trending keywords assert len(insights['content_opportunities']) > 0 def test_save_intelligence_report(self, aggregator, temp_data_dir): """Test intelligence report saving""" report = { 'report_date': '2025-08-28', 'test_data': 'sample' } test_date = datetime(2025, 8, 28) saved_file = aggregator._save_intelligence_report(report, test_date, 'daily') assert saved_file.exists() assert 'hkia_intelligence_2025-08-28.json' in saved_file.name # Verify content with open(saved_file, 'r') as f: saved_report = json.load(f) assert saved_report['report_date'] == '2025-08-28' def test_generate_weekly_intelligence(self, aggregator, temp_data_dir): """Test weekly intelligence generation""" # Create sample daily reports daily_dir = temp_data_dir / "intelligence" / "daily" for i in range(7): date = datetime(2025, 8, 21) + timedelta(days=i) date_str = date.strftime('%Y-%m-%d') report = { 'report_date': date_str, 'hkia_analysis': { 'content_classified': 10, 'trending_keywords': [{'keyword': 'hvac', 'frequency': 5}] }, 'meta': {'total_hkia_items': 100} } report_file = daily_dir / f"hkia_intelligence_{date_str}.json" with open(report_file, 'w') as f: json.dump(report, f) # Generate weekly report end_date = datetime(2025, 8, 28) weekly_report = aggregator.generate_weekly_intelligence(end_date) assert 'period_start' in weekly_report assert 'period_end' in weekly_report assert 'summary' in weekly_report assert 'daily_reports_included' in weekly_report def test_error_handling_file_operations(self, aggregator): """Test error handling in file operations""" # Test parsing non-existent file fake_file = Path("/nonexistent/file.md") items = aggregator._parse_markdown_file(fake_file) assert items == [] # Test parsing malformed content malformed_content = "This is not properly formatted markdown" item = aggregator._parse_content_item(malformed_content, "test") assert item is None def test_empty_content_analysis(self, aggregator): """Test analysis with empty content list""" result = aggregator._analyze_hkia_content([]) assert result['content_classified'] == 0 assert result['topic_distribution'] == {} assert result['trending_keywords'] == [] assert result['content_gaps'] == [] @patch('builtins.open', side_effect=IOError("File access error")) def test_file_access_error_handling(self, mock_open, aggregator, temp_data_dir): """Test handling of file access errors""" test_date = datetime(2025, 8, 28) # Should handle file access errors gracefully content = aggregator._load_hkia_content(test_date) assert content == [] def test_numeric_field_edge_cases(self, aggregator): """Test numeric field extraction edge cases""" item = { 'views': '', # Empty string 'likes': 'N/A', # Non-numeric string 'comments': None, # None value 'view_count': '1.5K' # Non-standard format } aggregator._extract_numeric_fields(item) # All should convert to 0 for invalid formats assert item['views'] == 0 assert item['likes'] == 0 assert item['comments'] == 0 assert item['view_count'] == 0 def test_intelligence_directory_permissions(self, aggregator, temp_data_dir): """Test intelligence directory creation with proper permissions""" # Remove intelligence directory to test recreation intelligence_dir = temp_data_dir / "intelligence" if intelligence_dir.exists(): import shutil shutil.rmtree(intelligence_dir) # Re-initialize aggregator new_aggregator = IntelligenceAggregator(temp_data_dir) assert new_aggregator.intelligence_dir.exists() assert (new_aggregator.intelligence_dir / "daily").exists() if __name__ == "__main__": pytest.main([__file__, "-v", "--cov=src.content_analysis.intelligence_aggregator", "--cov-report=term-missing"])