#!/usr/bin/env python3 """ Comprehensive test suite for MailChimp API scraper Following TDD principles for robust implementation validation """ import pytest import json import os from unittest.mock import Mock, patch, MagicMock from datetime import datetime import pytz from pathlib import Path # Import the scraper import sys sys.path.insert(0, str(Path(__file__).parent.parent)) from src.mailchimp_api_scraper import MailChimpAPIScraper from src.base_scraper import ScraperConfig class TestMailChimpAPIScraper: """Test suite for MailChimp API scraper""" @pytest.fixture def config(self, tmp_path): """Create test configuration""" return ScraperConfig( source_name='mailchimp', brand_name='test_brand', data_dir=tmp_path / 'data', logs_dir=tmp_path / 'logs', timezone='America/Halifax' ) @pytest.fixture def mock_env_vars(self, monkeypatch): """Mock environment variables""" monkeypatch.setenv('MAILCHIMP_API_KEY', 'test-api-key-us10') monkeypatch.setenv('MAILCHIMP_SERVER_PREFIX', 'us10') @pytest.fixture def scraper(self, config, mock_env_vars): """Create scraper instance with mocked environment""" return MailChimpAPIScraper(config) @pytest.fixture def sample_folder_response(self): """Sample folder list response""" return { 'folders': [ {'id': 'folder1', 'name': 'General'}, {'id': 'folder2', 'name': 'Bi-Weekly Newsletter'}, {'id': 'folder3', 'name': 'Special Announcements'} ], 'total_items': 3 } @pytest.fixture def sample_campaigns_response(self): """Sample campaigns list response""" return { 'campaigns': [ { 'id': 'camp1', 'type': 'regular', 'status': 'sent', 'send_time': '2025-08-15T10:00:00+00:00', 'archive_url': 'https://archive.url/camp1', 'long_archive_url': 'https://long.archive.url/camp1', 'settings': { 'subject_line': 'August Newsletter - HVAC Tips', 'preview_text': 'This month: AC maintenance tips', 'from_name': 'HVAC Know It All', 'reply_to': 'info@hvacknowitall.com', 'folder_id': 'folder2' } }, { 'id': 'camp2', 'type': 'regular', 'status': 'sent', 'send_time': '2025-08-01T10:00:00+00:00', 'settings': { 'subject_line': 'July Newsletter - Heat Pump Guide', 'preview_text': 'Everything about heat pumps', 'from_name': 'HVAC Know It All', 'reply_to': 'info@hvacknowitall.com', 'folder_id': 'folder2' } } ], 'total_items': 2 } @pytest.fixture def sample_content_response(self): """Sample campaign content response""" return { 'plain_text': 'Welcome to our August newsletter!\n\nThis month we cover AC maintenance...', 'html': '

Welcome to our August newsletter!

' } @pytest.fixture def sample_report_response(self): """Sample campaign report response""" return { 'emails_sent': 1500, 'opens': { 'unique_opens': 850, 'open_rate': 0.567, 'opens_total': 1200 }, 'clicks': { 'unique_clicks': 125, 'click_rate': 0.083, 'clicks_total': 180 }, 'unsubscribed': 3, 'bounces': { 'hard_bounces': 2, 'soft_bounces': 5, 'syntax_errors': 0 }, 'abuse_reports': 0, 'forwards': { 'forwards_count': 10, 'forwards_opens': 15 } } def test_initialization(self, scraper): """Test scraper initialization""" assert scraper.api_key == 'test-api-key-us10' assert scraper.server_prefix == 'us10' assert scraper.base_url == 'https://us10.api.mailchimp.com/3.0' assert scraper.target_folder_name == 'Bi-Weekly Newsletter' def test_missing_api_key(self, config, monkeypatch): """Test initialization fails without API key""" monkeypatch.delenv('MAILCHIMP_API_KEY', raising=False) with pytest.raises(ValueError, match="MAILCHIMP_API_KEY not found"): MailChimpAPIScraper(config) @patch('requests.get') def test_connection_success(self, mock_get, scraper): """Test successful API connection""" mock_get.return_value.status_code = 200 result = scraper._test_connection() assert result is True mock_get.assert_called_once_with( 'https://us10.api.mailchimp.com/3.0/ping', headers=scraper.headers ) @patch('requests.get') def test_connection_failure(self, mock_get, scraper): """Test failed API connection""" mock_get.return_value.status_code = 401 result = scraper._test_connection() assert result is False @patch('requests.get') def test_get_folder_id(self, mock_get, scraper, sample_folder_response): """Test finding the target folder ID""" mock_get.return_value.status_code = 200 mock_get.return_value.json.return_value = sample_folder_response folder_id = scraper._get_folder_id() assert folder_id == 'folder2' assert scraper.target_folder_id == 'folder2' @patch('requests.get') def test_get_folder_id_not_found(self, mock_get, scraper): """Test when target folder doesn't exist""" mock_get.return_value.status_code = 200 mock_get.return_value.json.return_value = { 'folders': [{'id': 'other', 'name': 'Other Folder'}], 'total_items': 1 } folder_id = scraper._get_folder_id() assert folder_id is None @patch('requests.get') def test_fetch_campaign_content(self, mock_get, scraper, sample_content_response): """Test fetching campaign content""" mock_get.return_value.status_code = 200 mock_get.return_value.json.return_value = sample_content_response content = scraper._fetch_campaign_content('camp1') assert content is not None assert 'plain_text' in content assert 'html' in content @patch('requests.get') def test_fetch_campaign_report(self, mock_get, scraper, sample_report_response): """Test fetching campaign metrics""" mock_get.return_value.status_code = 200 mock_get.return_value.json.return_value = sample_report_response report = scraper._fetch_campaign_report('camp1') assert report is not None assert report['emails_sent'] == 1500 assert report['opens']['unique_opens'] == 850 assert report['clicks']['unique_clicks'] == 125 @patch('requests.get') def test_fetch_content_full_flow(self, mock_get, scraper, sample_folder_response, sample_campaigns_response, sample_content_response, sample_report_response): """Test complete content fetching flow""" # Setup mock responses in order mock_responses = [ Mock(status_code=200, json=Mock(return_value={'health_status': 'Everything\'s Chimpy!'})), # ping Mock(status_code=200, json=Mock(return_value=sample_folder_response)), # folders Mock(status_code=200, json=Mock(return_value=sample_campaigns_response)), # campaigns Mock(status_code=200, json=Mock(return_value=sample_content_response)), # content camp1 Mock(status_code=200, json=Mock(return_value=sample_report_response)), # report camp1 Mock(status_code=200, json=Mock(return_value=sample_content_response)), # content camp2 Mock(status_code=200, json=Mock(return_value=sample_report_response)) # report camp2 ] mock_get.side_effect = mock_responses campaigns = scraper.fetch_content(max_items=10) assert len(campaigns) == 2 assert campaigns[0]['id'] == 'camp1' assert campaigns[0]['title'] == 'August Newsletter - HVAC Tips' assert campaigns[0]['metrics']['emails_sent'] == 1500 assert campaigns[0]['plain_text'] == sample_content_response['plain_text'] def test_format_markdown(self, scraper): """Test markdown formatting""" campaigns = [ { 'id': 'camp1', 'title': 'Test Newsletter', 'send_time': '2025-08-15T10:00:00+00:00', 'from_name': 'Test Sender', 'reply_to': 'test@example.com', 'long_archive_url': 'https://archive.url', 'preview_text': 'Preview text here', 'plain_text': 'Newsletter content here', 'metrics': { 'emails_sent': 1000, 'unique_opens': 500, 'open_rate': 0.5, 'unique_clicks': 100, 'click_rate': 0.1, 'unsubscribed': 2, 'bounces': {'hard': 1, 'soft': 3}, 'abuse_reports': 0, 'forwards': {'count': 5} } } ] markdown = scraper.format_markdown(campaigns) assert '# ID: camp1' in markdown assert '## Title: Test Newsletter' in markdown assert '## Type: email_campaign' in markdown assert '## Send Date: 2025-08-15T10:00:00+00:00' in markdown assert '### Emails Sent: 1000' in markdown assert '### Opens: 500 unique (50.0%)' in markdown assert '### Clicks: 100 unique (10.0%)' in markdown assert '## Content:' in markdown assert 'Newsletter content here' in markdown def test_get_incremental_items_no_state(self, scraper): """Test incremental items with no previous state""" items = [ {'id': 'camp1', 'send_time': '2025-08-15'}, {'id': 'camp2', 'send_time': '2025-08-01'} ] new_items = scraper.get_incremental_items(items, {}) assert new_items == items def test_get_incremental_items_with_state(self, scraper): """Test incremental items with existing state""" items = [ {'id': 'camp3', 'send_time': '2025-08-20'}, {'id': 'camp2', 'send_time': '2025-08-15'}, # Last synced {'id': 'camp1', 'send_time': '2025-08-01'} ] state = { 'last_campaign_id': 'camp2', 'last_send_time': '2025-08-15' } new_items = scraper.get_incremental_items(items, state) assert len(new_items) == 1 assert new_items[0]['id'] == 'camp3' def test_update_state(self, scraper): """Test state update with new campaigns""" items = [ {'id': 'camp3', 'title': 'Latest Campaign', 'send_time': '2025-08-20'}, {'id': 'camp2', 'title': 'Previous Campaign', 'send_time': '2025-08-15'} ] state = {} new_state = scraper.update_state(state, items) assert new_state['last_campaign_id'] == 'camp3' assert new_state['last_send_time'] == '2025-08-20' assert new_state['last_campaign_title'] == 'Latest Campaign' assert new_state['campaign_count'] == 2 assert 'last_sync' in new_state @patch('requests.get') def test_quota_management(self, mock_get, scraper): """Test that scraper respects rate limits""" # Mock slow responses to test delay import time start_time = time.time() mock_get.return_value.status_code = 200 mock_get.return_value.json.return_value = {'plain_text': 'content'} # Fetch content should add delays scraper._fetch_campaign_content('camp1') # No significant delay for single request elapsed = time.time() - start_time assert elapsed < 1.0 # Should be fast for single request @patch('requests.get') def test_error_handling(self, mock_get, scraper): """Test error handling in various scenarios""" # Test network error mock_get.side_effect = Exception("Network error") result = scraper._test_connection() assert result is False # Test campaign content fetch error mock_get.side_effect = None mock_get.return_value.status_code = 404 content = scraper._fetch_campaign_content('nonexistent') assert content is None # Test report fetch error report = scraper._fetch_campaign_report('nonexistent') assert report is None if __name__ == "__main__": pytest.main([__file__, "-v"])