Major Changes: - Updated all code references from hvacknowitall/hvacnkowitall to hkia - Renamed all existing markdown files to use hkia_ prefix - Updated configuration files, scrapers, and production scripts - Modified systemd service descriptions to use HKIA - Changed NAS sync path to /mnt/nas/hkia Files Updated: - 20+ source files updated with new naming convention - 34 markdown files renamed to hkia_* format - All ScraperConfig brand_name parameters now use 'hkia' - Documentation updated to reflect new naming Rationale: - Shorter, cleaner filenames - Consistent branding across all outputs - Easier to type and reference - Maintains same functionality with improved naming Next Steps: - Deploy updated services to production - Update any external references to old naming - Monitor scrapers to ensure proper operation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
364 lines
No EOL
13 KiB
Python
364 lines
No EOL
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Comprehensive test suite for MailChimp API scraper
|
|
Following TDD principles for robust implementation validation
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
import os
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
from datetime import datetime
|
|
import pytz
|
|
from pathlib import Path
|
|
|
|
# Import the scraper
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
from src.mailchimp_api_scraper import MailChimpAPIScraper
|
|
from src.base_scraper import ScraperConfig
|
|
|
|
|
|
class TestMailChimpAPIScraper:
|
|
"""Test suite for MailChimp API scraper"""
|
|
|
|
@pytest.fixture
|
|
def config(self, tmp_path):
|
|
"""Create test configuration"""
|
|
return ScraperConfig(
|
|
source_name='mailchimp',
|
|
brand_name='test_brand',
|
|
data_dir=tmp_path / 'data',
|
|
logs_dir=tmp_path / 'logs',
|
|
timezone='America/Halifax'
|
|
)
|
|
|
|
@pytest.fixture
|
|
def mock_env_vars(self, monkeypatch):
|
|
"""Mock environment variables"""
|
|
monkeypatch.setenv('MAILCHIMP_API_KEY', 'test-api-key-us10')
|
|
monkeypatch.setenv('MAILCHIMP_SERVER_PREFIX', 'us10')
|
|
|
|
@pytest.fixture
|
|
def scraper(self, config, mock_env_vars):
|
|
"""Create scraper instance with mocked environment"""
|
|
return MailChimpAPIScraper(config)
|
|
|
|
@pytest.fixture
|
|
def sample_folder_response(self):
|
|
"""Sample folder list response"""
|
|
return {
|
|
'folders': [
|
|
{'id': 'folder1', 'name': 'General'},
|
|
{'id': 'folder2', 'name': 'Bi-Weekly Newsletter'},
|
|
{'id': 'folder3', 'name': 'Special Announcements'}
|
|
],
|
|
'total_items': 3
|
|
}
|
|
|
|
@pytest.fixture
|
|
def sample_campaigns_response(self):
|
|
"""Sample campaigns list response"""
|
|
return {
|
|
'campaigns': [
|
|
{
|
|
'id': 'camp1',
|
|
'type': 'regular',
|
|
'status': 'sent',
|
|
'send_time': '2025-08-15T10:00:00+00:00',
|
|
'archive_url': 'https://archive.url/camp1',
|
|
'long_archive_url': 'https://long.archive.url/camp1',
|
|
'settings': {
|
|
'subject_line': 'August Newsletter - HVAC Tips',
|
|
'preview_text': 'This month: AC maintenance tips',
|
|
'from_name': 'HVAC Know It All',
|
|
'reply_to': 'info@hvacknowitall.com',
|
|
'folder_id': 'folder2'
|
|
}
|
|
},
|
|
{
|
|
'id': 'camp2',
|
|
'type': 'regular',
|
|
'status': 'sent',
|
|
'send_time': '2025-08-01T10:00:00+00:00',
|
|
'settings': {
|
|
'subject_line': 'July Newsletter - Heat Pump Guide',
|
|
'preview_text': 'Everything about heat pumps',
|
|
'from_name': 'HVAC Know It All',
|
|
'reply_to': 'info@hvacknowitall.com',
|
|
'folder_id': 'folder2'
|
|
}
|
|
}
|
|
],
|
|
'total_items': 2
|
|
}
|
|
|
|
@pytest.fixture
|
|
def sample_content_response(self):
|
|
"""Sample campaign content response"""
|
|
return {
|
|
'plain_text': 'Welcome to our August newsletter!\n\nThis month we cover AC maintenance...',
|
|
'html': '<html><body><h1>Welcome to our August newsletter!</h1></body></html>'
|
|
}
|
|
|
|
@pytest.fixture
|
|
def sample_report_response(self):
|
|
"""Sample campaign report response"""
|
|
return {
|
|
'emails_sent': 1500,
|
|
'opens': {
|
|
'unique_opens': 850,
|
|
'open_rate': 0.567,
|
|
'opens_total': 1200
|
|
},
|
|
'clicks': {
|
|
'unique_clicks': 125,
|
|
'click_rate': 0.083,
|
|
'clicks_total': 180
|
|
},
|
|
'unsubscribed': 3,
|
|
'bounces': {
|
|
'hard_bounces': 2,
|
|
'soft_bounces': 5,
|
|
'syntax_errors': 0
|
|
},
|
|
'abuse_reports': 0,
|
|
'forwards': {
|
|
'forwards_count': 10,
|
|
'forwards_opens': 15
|
|
}
|
|
}
|
|
|
|
def test_initialization(self, scraper):
|
|
"""Test scraper initialization"""
|
|
assert scraper.api_key == 'test-api-key-us10'
|
|
assert scraper.server_prefix == 'us10'
|
|
assert scraper.base_url == 'https://us10.api.mailchimp.com/3.0'
|
|
assert scraper.target_folder_name == 'Bi-Weekly Newsletter'
|
|
|
|
def test_missing_api_key(self, config, monkeypatch):
|
|
"""Test initialization fails without API key"""
|
|
monkeypatch.delenv('MAILCHIMP_API_KEY', raising=False)
|
|
with pytest.raises(ValueError, match="MAILCHIMP_API_KEY not found"):
|
|
MailChimpAPIScraper(config)
|
|
|
|
@patch('requests.get')
|
|
def test_connection_success(self, mock_get, scraper):
|
|
"""Test successful API connection"""
|
|
mock_get.return_value.status_code = 200
|
|
|
|
result = scraper._test_connection()
|
|
|
|
assert result is True
|
|
mock_get.assert_called_once_with(
|
|
'https://us10.api.mailchimp.com/3.0/ping',
|
|
headers=scraper.headers
|
|
)
|
|
|
|
@patch('requests.get')
|
|
def test_connection_failure(self, mock_get, scraper):
|
|
"""Test failed API connection"""
|
|
mock_get.return_value.status_code = 401
|
|
|
|
result = scraper._test_connection()
|
|
|
|
assert result is False
|
|
|
|
@patch('requests.get')
|
|
def test_get_folder_id(self, mock_get, scraper, sample_folder_response):
|
|
"""Test finding the target folder ID"""
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = sample_folder_response
|
|
|
|
folder_id = scraper._get_folder_id()
|
|
|
|
assert folder_id == 'folder2'
|
|
assert scraper.target_folder_id == 'folder2'
|
|
|
|
@patch('requests.get')
|
|
def test_get_folder_id_not_found(self, mock_get, scraper):
|
|
"""Test when target folder doesn't exist"""
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = {
|
|
'folders': [{'id': 'other', 'name': 'Other Folder'}],
|
|
'total_items': 1
|
|
}
|
|
|
|
folder_id = scraper._get_folder_id()
|
|
|
|
assert folder_id is None
|
|
|
|
@patch('requests.get')
|
|
def test_fetch_campaign_content(self, mock_get, scraper, sample_content_response):
|
|
"""Test fetching campaign content"""
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = sample_content_response
|
|
|
|
content = scraper._fetch_campaign_content('camp1')
|
|
|
|
assert content is not None
|
|
assert 'plain_text' in content
|
|
assert 'html' in content
|
|
|
|
@patch('requests.get')
|
|
def test_fetch_campaign_report(self, mock_get, scraper, sample_report_response):
|
|
"""Test fetching campaign metrics"""
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = sample_report_response
|
|
|
|
report = scraper._fetch_campaign_report('camp1')
|
|
|
|
assert report is not None
|
|
assert report['emails_sent'] == 1500
|
|
assert report['opens']['unique_opens'] == 850
|
|
assert report['clicks']['unique_clicks'] == 125
|
|
|
|
@patch('requests.get')
|
|
def test_fetch_content_full_flow(self, mock_get, scraper,
|
|
sample_folder_response,
|
|
sample_campaigns_response,
|
|
sample_content_response,
|
|
sample_report_response):
|
|
"""Test complete content fetching flow"""
|
|
# Setup mock responses in order
|
|
mock_responses = [
|
|
Mock(status_code=200, json=Mock(return_value={'health_status': 'Everything\'s Chimpy!'})), # ping
|
|
Mock(status_code=200, json=Mock(return_value=sample_folder_response)), # folders
|
|
Mock(status_code=200, json=Mock(return_value=sample_campaigns_response)), # campaigns
|
|
Mock(status_code=200, json=Mock(return_value=sample_content_response)), # content camp1
|
|
Mock(status_code=200, json=Mock(return_value=sample_report_response)), # report camp1
|
|
Mock(status_code=200, json=Mock(return_value=sample_content_response)), # content camp2
|
|
Mock(status_code=200, json=Mock(return_value=sample_report_response)) # report camp2
|
|
]
|
|
mock_get.side_effect = mock_responses
|
|
|
|
campaigns = scraper.fetch_content(max_items=10)
|
|
|
|
assert len(campaigns) == 2
|
|
assert campaigns[0]['id'] == 'camp1'
|
|
assert campaigns[0]['title'] == 'August Newsletter - HVAC Tips'
|
|
assert campaigns[0]['metrics']['emails_sent'] == 1500
|
|
assert campaigns[0]['plain_text'] == sample_content_response['plain_text']
|
|
|
|
def test_format_markdown(self, scraper):
|
|
"""Test markdown formatting"""
|
|
campaigns = [
|
|
{
|
|
'id': 'camp1',
|
|
'title': 'Test Newsletter',
|
|
'send_time': '2025-08-15T10:00:00+00:00',
|
|
'from_name': 'Test Sender',
|
|
'reply_to': 'test@example.com',
|
|
'long_archive_url': 'https://archive.url',
|
|
'preview_text': 'Preview text here',
|
|
'plain_text': 'Newsletter content here',
|
|
'metrics': {
|
|
'emails_sent': 1000,
|
|
'unique_opens': 500,
|
|
'open_rate': 0.5,
|
|
'unique_clicks': 100,
|
|
'click_rate': 0.1,
|
|
'unsubscribed': 2,
|
|
'bounces': {'hard': 1, 'soft': 3},
|
|
'abuse_reports': 0,
|
|
'forwards': {'count': 5}
|
|
}
|
|
}
|
|
]
|
|
|
|
markdown = scraper.format_markdown(campaigns)
|
|
|
|
assert '# ID: camp1' in markdown
|
|
assert '## Title: Test Newsletter' in markdown
|
|
assert '## Type: email_campaign' in markdown
|
|
assert '## Send Date: 2025-08-15T10:00:00+00:00' in markdown
|
|
assert '### Emails Sent: 1000' in markdown
|
|
assert '### Opens: 500 unique (50.0%)' in markdown
|
|
assert '### Clicks: 100 unique (10.0%)' in markdown
|
|
assert '## Content:' in markdown
|
|
assert 'Newsletter content here' in markdown
|
|
|
|
def test_get_incremental_items_no_state(self, scraper):
|
|
"""Test incremental items with no previous state"""
|
|
items = [
|
|
{'id': 'camp1', 'send_time': '2025-08-15'},
|
|
{'id': 'camp2', 'send_time': '2025-08-01'}
|
|
]
|
|
|
|
new_items = scraper.get_incremental_items(items, {})
|
|
|
|
assert new_items == items
|
|
|
|
def test_get_incremental_items_with_state(self, scraper):
|
|
"""Test incremental items with existing state"""
|
|
items = [
|
|
{'id': 'camp3', 'send_time': '2025-08-20'},
|
|
{'id': 'camp2', 'send_time': '2025-08-15'}, # Last synced
|
|
{'id': 'camp1', 'send_time': '2025-08-01'}
|
|
]
|
|
state = {
|
|
'last_campaign_id': 'camp2',
|
|
'last_send_time': '2025-08-15'
|
|
}
|
|
|
|
new_items = scraper.get_incremental_items(items, state)
|
|
|
|
assert len(new_items) == 1
|
|
assert new_items[0]['id'] == 'camp3'
|
|
|
|
def test_update_state(self, scraper):
|
|
"""Test state update with new campaigns"""
|
|
items = [
|
|
{'id': 'camp3', 'title': 'Latest Campaign', 'send_time': '2025-08-20'},
|
|
{'id': 'camp2', 'title': 'Previous Campaign', 'send_time': '2025-08-15'}
|
|
]
|
|
state = {}
|
|
|
|
new_state = scraper.update_state(state, items)
|
|
|
|
assert new_state['last_campaign_id'] == 'camp3'
|
|
assert new_state['last_send_time'] == '2025-08-20'
|
|
assert new_state['last_campaign_title'] == 'Latest Campaign'
|
|
assert new_state['campaign_count'] == 2
|
|
assert 'last_sync' in new_state
|
|
|
|
@patch('requests.get')
|
|
def test_quota_management(self, mock_get, scraper):
|
|
"""Test that scraper respects rate limits"""
|
|
# Mock slow responses to test delay
|
|
import time
|
|
start_time = time.time()
|
|
|
|
mock_get.return_value.status_code = 200
|
|
mock_get.return_value.json.return_value = {'plain_text': 'content'}
|
|
|
|
# Fetch content should add delays
|
|
scraper._fetch_campaign_content('camp1')
|
|
|
|
# No significant delay for single request
|
|
elapsed = time.time() - start_time
|
|
assert elapsed < 1.0 # Should be fast for single request
|
|
|
|
@patch('requests.get')
|
|
def test_error_handling(self, mock_get, scraper):
|
|
"""Test error handling in various scenarios"""
|
|
# Test network error
|
|
mock_get.side_effect = Exception("Network error")
|
|
|
|
result = scraper._test_connection()
|
|
assert result is False
|
|
|
|
# Test campaign content fetch error
|
|
mock_get.side_effect = None
|
|
mock_get.return_value.status_code = 404
|
|
|
|
content = scraper._fetch_campaign_content('nonexistent')
|
|
assert content is None
|
|
|
|
# Test report fetch error
|
|
report = scraper._fetch_campaign_report('nonexistent')
|
|
assert report is None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"]) |