#!/usr/bin/env python3
"""
Comprehensive test suite for MailChimp API scraper
Following TDD principles for robust implementation validation
"""
import pytest
import json
import os
from unittest.mock import Mock, patch, MagicMock
from datetime import datetime
import pytz
from pathlib import Path
# Import the scraper
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.mailchimp_api_scraper import MailChimpAPIScraper
from src.base_scraper import ScraperConfig
class TestMailChimpAPIScraper:
"""Test suite for MailChimp API scraper"""
@pytest.fixture
def config(self, tmp_path):
"""Create test configuration"""
return ScraperConfig(
source_name='mailchimp',
brand_name='test_brand',
data_dir=tmp_path / 'data',
logs_dir=tmp_path / 'logs',
timezone='America/Halifax'
)
@pytest.fixture
def mock_env_vars(self, monkeypatch):
"""Mock environment variables"""
monkeypatch.setenv('MAILCHIMP_API_KEY', 'test-api-key-us10')
monkeypatch.setenv('MAILCHIMP_SERVER_PREFIX', 'us10')
@pytest.fixture
def scraper(self, config, mock_env_vars):
"""Create scraper instance with mocked environment"""
return MailChimpAPIScraper(config)
@pytest.fixture
def sample_folder_response(self):
"""Sample folder list response"""
return {
'folders': [
{'id': 'folder1', 'name': 'General'},
{'id': 'folder2', 'name': 'Bi-Weekly Newsletter'},
{'id': 'folder3', 'name': 'Special Announcements'}
],
'total_items': 3
}
@pytest.fixture
def sample_campaigns_response(self):
"""Sample campaigns list response"""
return {
'campaigns': [
{
'id': 'camp1',
'type': 'regular',
'status': 'sent',
'send_time': '2025-08-15T10:00:00+00:00',
'archive_url': 'https://archive.url/camp1',
'long_archive_url': 'https://long.archive.url/camp1',
'settings': {
'subject_line': 'August Newsletter - HVAC Tips',
'preview_text': 'This month: AC maintenance tips',
'from_name': 'HVAC Know It All',
'reply_to': 'info@hvacknowitall.com',
'folder_id': 'folder2'
}
},
{
'id': 'camp2',
'type': 'regular',
'status': 'sent',
'send_time': '2025-08-01T10:00:00+00:00',
'settings': {
'subject_line': 'July Newsletter - Heat Pump Guide',
'preview_text': 'Everything about heat pumps',
'from_name': 'HVAC Know It All',
'reply_to': 'info@hvacknowitall.com',
'folder_id': 'folder2'
}
}
],
'total_items': 2
}
@pytest.fixture
def sample_content_response(self):
"""Sample campaign content response"""
return {
'plain_text': 'Welcome to our August newsletter!\n\nThis month we cover AC maintenance...',
'html': '
Welcome to our August newsletter!
'
}
@pytest.fixture
def sample_report_response(self):
"""Sample campaign report response"""
return {
'emails_sent': 1500,
'opens': {
'unique_opens': 850,
'open_rate': 0.567,
'opens_total': 1200
},
'clicks': {
'unique_clicks': 125,
'click_rate': 0.083,
'clicks_total': 180
},
'unsubscribed': 3,
'bounces': {
'hard_bounces': 2,
'soft_bounces': 5,
'syntax_errors': 0
},
'abuse_reports': 0,
'forwards': {
'forwards_count': 10,
'forwards_opens': 15
}
}
def test_initialization(self, scraper):
"""Test scraper initialization"""
assert scraper.api_key == 'test-api-key-us10'
assert scraper.server_prefix == 'us10'
assert scraper.base_url == 'https://us10.api.mailchimp.com/3.0'
assert scraper.target_folder_name == 'Bi-Weekly Newsletter'
def test_missing_api_key(self, config, monkeypatch):
"""Test initialization fails without API key"""
monkeypatch.delenv('MAILCHIMP_API_KEY', raising=False)
with pytest.raises(ValueError, match="MAILCHIMP_API_KEY not found"):
MailChimpAPIScraper(config)
@patch('requests.get')
def test_connection_success(self, mock_get, scraper):
"""Test successful API connection"""
mock_get.return_value.status_code = 200
result = scraper._test_connection()
assert result is True
mock_get.assert_called_once_with(
'https://us10.api.mailchimp.com/3.0/ping',
headers=scraper.headers
)
@patch('requests.get')
def test_connection_failure(self, mock_get, scraper):
"""Test failed API connection"""
mock_get.return_value.status_code = 401
result = scraper._test_connection()
assert result is False
@patch('requests.get')
def test_get_folder_id(self, mock_get, scraper, sample_folder_response):
"""Test finding the target folder ID"""
mock_get.return_value.status_code = 200
mock_get.return_value.json.return_value = sample_folder_response
folder_id = scraper._get_folder_id()
assert folder_id == 'folder2'
assert scraper.target_folder_id == 'folder2'
@patch('requests.get')
def test_get_folder_id_not_found(self, mock_get, scraper):
"""Test when target folder doesn't exist"""
mock_get.return_value.status_code = 200
mock_get.return_value.json.return_value = {
'folders': [{'id': 'other', 'name': 'Other Folder'}],
'total_items': 1
}
folder_id = scraper._get_folder_id()
assert folder_id is None
@patch('requests.get')
def test_fetch_campaign_content(self, mock_get, scraper, sample_content_response):
"""Test fetching campaign content"""
mock_get.return_value.status_code = 200
mock_get.return_value.json.return_value = sample_content_response
content = scraper._fetch_campaign_content('camp1')
assert content is not None
assert 'plain_text' in content
assert 'html' in content
@patch('requests.get')
def test_fetch_campaign_report(self, mock_get, scraper, sample_report_response):
"""Test fetching campaign metrics"""
mock_get.return_value.status_code = 200
mock_get.return_value.json.return_value = sample_report_response
report = scraper._fetch_campaign_report('camp1')
assert report is not None
assert report['emails_sent'] == 1500
assert report['opens']['unique_opens'] == 850
assert report['clicks']['unique_clicks'] == 125
@patch('requests.get')
def test_fetch_content_full_flow(self, mock_get, scraper,
sample_folder_response,
sample_campaigns_response,
sample_content_response,
sample_report_response):
"""Test complete content fetching flow"""
# Setup mock responses in order
mock_responses = [
Mock(status_code=200, json=Mock(return_value={'health_status': 'Everything\'s Chimpy!'})), # ping
Mock(status_code=200, json=Mock(return_value=sample_folder_response)), # folders
Mock(status_code=200, json=Mock(return_value=sample_campaigns_response)), # campaigns
Mock(status_code=200, json=Mock(return_value=sample_content_response)), # content camp1
Mock(status_code=200, json=Mock(return_value=sample_report_response)), # report camp1
Mock(status_code=200, json=Mock(return_value=sample_content_response)), # content camp2
Mock(status_code=200, json=Mock(return_value=sample_report_response)) # report camp2
]
mock_get.side_effect = mock_responses
campaigns = scraper.fetch_content(max_items=10)
assert len(campaigns) == 2
assert campaigns[0]['id'] == 'camp1'
assert campaigns[0]['title'] == 'August Newsletter - HVAC Tips'
assert campaigns[0]['metrics']['emails_sent'] == 1500
assert campaigns[0]['plain_text'] == sample_content_response['plain_text']
def test_format_markdown(self, scraper):
"""Test markdown formatting"""
campaigns = [
{
'id': 'camp1',
'title': 'Test Newsletter',
'send_time': '2025-08-15T10:00:00+00:00',
'from_name': 'Test Sender',
'reply_to': 'test@example.com',
'long_archive_url': 'https://archive.url',
'preview_text': 'Preview text here',
'plain_text': 'Newsletter content here',
'metrics': {
'emails_sent': 1000,
'unique_opens': 500,
'open_rate': 0.5,
'unique_clicks': 100,
'click_rate': 0.1,
'unsubscribed': 2,
'bounces': {'hard': 1, 'soft': 3},
'abuse_reports': 0,
'forwards': {'count': 5}
}
}
]
markdown = scraper.format_markdown(campaigns)
assert '# ID: camp1' in markdown
assert '## Title: Test Newsletter' in markdown
assert '## Type: email_campaign' in markdown
assert '## Send Date: 2025-08-15T10:00:00+00:00' in markdown
assert '### Emails Sent: 1000' in markdown
assert '### Opens: 500 unique (50.0%)' in markdown
assert '### Clicks: 100 unique (10.0%)' in markdown
assert '## Content:' in markdown
assert 'Newsletter content here' in markdown
def test_get_incremental_items_no_state(self, scraper):
"""Test incremental items with no previous state"""
items = [
{'id': 'camp1', 'send_time': '2025-08-15'},
{'id': 'camp2', 'send_time': '2025-08-01'}
]
new_items = scraper.get_incremental_items(items, {})
assert new_items == items
def test_get_incremental_items_with_state(self, scraper):
"""Test incremental items with existing state"""
items = [
{'id': 'camp3', 'send_time': '2025-08-20'},
{'id': 'camp2', 'send_time': '2025-08-15'}, # Last synced
{'id': 'camp1', 'send_time': '2025-08-01'}
]
state = {
'last_campaign_id': 'camp2',
'last_send_time': '2025-08-15'
}
new_items = scraper.get_incremental_items(items, state)
assert len(new_items) == 1
assert new_items[0]['id'] == 'camp3'
def test_update_state(self, scraper):
"""Test state update with new campaigns"""
items = [
{'id': 'camp3', 'title': 'Latest Campaign', 'send_time': '2025-08-20'},
{'id': 'camp2', 'title': 'Previous Campaign', 'send_time': '2025-08-15'}
]
state = {}
new_state = scraper.update_state(state, items)
assert new_state['last_campaign_id'] == 'camp3'
assert new_state['last_send_time'] == '2025-08-20'
assert new_state['last_campaign_title'] == 'Latest Campaign'
assert new_state['campaign_count'] == 2
assert 'last_sync' in new_state
@patch('requests.get')
def test_quota_management(self, mock_get, scraper):
"""Test that scraper respects rate limits"""
# Mock slow responses to test delay
import time
start_time = time.time()
mock_get.return_value.status_code = 200
mock_get.return_value.json.return_value = {'plain_text': 'content'}
# Fetch content should add delays
scraper._fetch_campaign_content('camp1')
# No significant delay for single request
elapsed = time.time() - start_time
assert elapsed < 1.0 # Should be fast for single request
@patch('requests.get')
def test_error_handling(self, mock_get, scraper):
"""Test error handling in various scenarios"""
# Test network error
mock_get.side_effect = Exception("Network error")
result = scraper._test_connection()
assert result is False
# Test campaign content fetch error
mock_get.side_effect = None
mock_get.return_value.status_code = 404
content = scraper._fetch_campaign_content('nonexistent')
assert content is None
# Test report fetch error
report = scraper._fetch_campaign_report('nonexistent')
assert report is None
if __name__ == "__main__":
pytest.main([__file__, "-v"])