hvac-kia-content/tests/test_tiktok_scraper.py
Ben Reed 05218a873b Fix critical production issues and improve spec compliance
Production Readiness Improvements:
- Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM)
- Enabled NAS synchronization in production runner with error handling
- Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md)
- Made systemd services portable (removed hardcoded user/paths)
- Added environment variable validation on startup
- Moved DISPLAY/XAUTHORITY to .env configuration

Systemd Improvements:
- Created template service file (@.service) for any user
- Changed all paths to /opt/hvac-kia-content
- Updated installation script for portable deployment
- Fixed service dependencies and resource limits

Documentation:
- Created comprehensive PRODUCTION_TODO.md with 25 tasks
- Added PRODUCTION_GUIDE.md with deployment instructions
- Documented spec compliance gaps (65% complete)

Remaining work includes retry logic, connection pooling, media downloads,
and pytest test suite as documented in PRODUCTION_TODO.md

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 20:07:55 -03:00

217 lines
No EOL
8 KiB
Python

import pytest
from unittest.mock import Mock, patch, MagicMock, AsyncMock
from datetime import datetime
from pathlib import Path
import asyncio
from src.tiktok_scraper import TikTokScraper
from src.base_scraper import ScraperConfig
class TestTikTokScraper:
@pytest.fixture
def config(self):
return ScraperConfig(
source_name="tiktok",
brand_name="hvacknowitall",
data_dir=Path("data"),
logs_dir=Path("logs"),
timezone="America/Halifax"
)
@pytest.fixture
def mock_env(self):
with patch.dict('os.environ', {
'TIKTOK_USERNAME': 'test@example.com',
'TIKTOK_PASSWORD': 'testpass',
'TIKTOK_TARGET': 'hvacknowitall'
}):
yield
@pytest.fixture
def sample_video(self):
mock_video = MagicMock()
mock_video.id = '7234567890123456789'
mock_video.author.username = 'hvacknowitall'
mock_video.author.nickname = 'HVAC Know It All'
mock_video.desc = 'Check out this HVAC tip! #hvac #maintenance'
mock_video.create_time = 1704134400 # 2024-01-01 12:00:00 UTC
mock_video.stats.play_count = 15000
mock_video.stats.comment_count = 250
mock_video.stats.share_count = 50
mock_video.stats.collect_count = 100 # Likes/favorites
mock_video.music.title = 'Original sound'
mock_video.duration = 30
mock_video.hashtags = ['hvac', 'maintenance']
return mock_video
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
def test_initialization(self, mock_setup, config, mock_env):
mock_setup.return_value = AsyncMock()
scraper = TikTokScraper(config)
assert scraper.config == config
assert scraper.username == 'test@example.com'
assert scraper.password == 'testpass'
assert scraper.target_account == 'hvacknowitall'
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
def test_humanized_delay(self, mock_setup, config, mock_env):
mock_setup.return_value = AsyncMock()
scraper = TikTokScraper(config)
with patch('time.sleep') as mock_sleep:
with patch('random.uniform', return_value=3.5):
scraper._humanized_delay()
mock_sleep.assert_called_with(3.5)
@pytest.mark.asyncio
@patch('src.tiktok_scraper.TikTokApi')
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
async def test_fetch_user_videos(self, mock_setup, mock_tiktokapi_class, config, mock_env, sample_video):
# Create a simpler mock that doesn't use AsyncMock
mock_api = MagicMock()
mock_setup.return_value = mock_api
# Setup async context manager
mock_api.__aenter__ = AsyncMock(return_value=mock_api)
mock_api.__aexit__ = AsyncMock(return_value=None)
mock_api.create_sessions = AsyncMock(return_value=None)
# Mock user
mock_user = MagicMock()
mock_api.user.return_value = mock_user
# Create async generator for videos
async def video_generator(count=None):
yield sample_video
mock_user.videos = video_generator
scraper = TikTokScraper(config)
scraper.api = mock_api
videos = await scraper.fetch_user_videos(max_videos=10)
assert len(videos) == 1
assert videos[0]['id'] == '7234567890123456789'
assert videos[0]['author'] == 'hvacknowitall'
assert videos[0]['description'] == 'Check out this HVAC tip! #hvac #maintenance'
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
def test_format_markdown(self, mock_setup, config, mock_env):
mock_setup.return_value = AsyncMock()
scraper = TikTokScraper(config)
videos = [
{
'id': '7234567890123456789',
'author': 'hvacknowitall',
'nickname': 'HVAC Know It All',
'description': 'HVAC maintenance tips',
'publish_date': '2024-01-01T12:00:00',
'link': 'https://www.tiktok.com/@hvacknowitall/video/7234567890123456789',
'views': 15000,
'likes': 100,
'comments': 250,
'shares': 50,
'duration': 30,
'music': 'Original sound',
'hashtags': ['hvac', 'maintenance']
}
]
markdown = scraper.format_markdown(videos)
assert '# ID: 7234567890123456789' in markdown
assert '## Author: hvacknowitall' in markdown
assert '## Nickname: HVAC Know It All' in markdown
assert '## Description:' in markdown
assert 'HVAC maintenance tips' in markdown
assert '## Views: 15000' in markdown
assert '## Likes: 100' in markdown
assert '## Comments: 250' in markdown
assert '## Shares: 50' in markdown
assert '## Duration: 30 seconds' in markdown
assert '## Music: Original sound' in markdown
assert '## Hashtags: hvac, maintenance' in markdown
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
def test_get_incremental_items(self, mock_setup, config, mock_env):
mock_setup.return_value = AsyncMock()
scraper = TikTokScraper(config)
videos = [
{'id': 'video3', 'publish_date': '2024-01-03T12:00:00'},
{'id': 'video2', 'publish_date': '2024-01-02T12:00:00'},
{'id': 'video1', 'publish_date': '2024-01-01T12:00:00'}
]
# Test with no previous state
state = {}
new_videos = scraper.get_incremental_items(videos, state)
assert len(new_videos) == 3
# Test with existing state
state = {'last_video_id': 'video2'}
new_videos = scraper.get_incremental_items(videos, state)
assert len(new_videos) == 1
assert new_videos[0]['id'] == 'video3'
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
def test_update_state(self, mock_setup, config, mock_env):
mock_setup.return_value = AsyncMock()
scraper = TikTokScraper(config)
state = {}
videos = [
{'id': 'video2', 'publish_date': '2024-01-02T12:00:00'},
{'id': 'video1', 'publish_date': '2024-01-01T12:00:00'}
]
updated_state = scraper.update_state(state, videos)
assert updated_state['last_video_id'] == 'video2'
assert updated_state['last_video_date'] == '2024-01-02T12:00:00'
assert updated_state['video_count'] == 2
@pytest.mark.asyncio
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
async def test_error_handling(self, mock_setup, config, mock_env):
mock_api = MagicMock()
mock_setup.return_value = mock_api
# Setup async context manager that raises error
mock_api.__aenter__ = AsyncMock(side_effect=Exception("API Error"))
mock_api.__aexit__ = AsyncMock(return_value=None)
scraper = TikTokScraper(config)
scraper.api = mock_api
videos = await scraper.fetch_user_videos()
assert videos == []
@pytest.mark.asyncio
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
async def test_fetch_content_wrapper(self, mock_setup, config, mock_env):
mock_setup.return_value = MagicMock()
scraper = TikTokScraper(config)
# Mock the fetch_user_videos to return sample data
async def mock_fetch():
return [
{
'id': '7234567890123456789',
'author': 'hvacknowitall',
'description': 'Test video'
}
]
scraper.fetch_user_videos = mock_fetch
# Test the synchronous wrapper by running it in an async context
import asyncio
loop = asyncio.get_event_loop()
videos = await loop.run_in_executor(None, scraper.fetch_content)
assert len(videos) == 1
assert videos[0]['id'] == '7234567890123456789'