Production Readiness Improvements: - Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM) - Enabled NAS synchronization in production runner with error handling - Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md) - Made systemd services portable (removed hardcoded user/paths) - Added environment variable validation on startup - Moved DISPLAY/XAUTHORITY to .env configuration Systemd Improvements: - Created template service file (@.service) for any user - Changed all paths to /opt/hvac-kia-content - Updated installation script for portable deployment - Fixed service dependencies and resource limits Documentation: - Created comprehensive PRODUCTION_TODO.md with 25 tasks - Added PRODUCTION_GUIDE.md with deployment instructions - Documented spec compliance gaps (65% complete) Remaining work includes retry logic, connection pooling, media downloads, and pytest test suite as documented in PRODUCTION_TODO.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
217 lines
No EOL
8 KiB
Python
217 lines
No EOL
8 KiB
Python
import pytest
|
|
from unittest.mock import Mock, patch, MagicMock, AsyncMock
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import asyncio
|
|
from src.tiktok_scraper import TikTokScraper
|
|
from src.base_scraper import ScraperConfig
|
|
|
|
|
|
class TestTikTokScraper:
|
|
@pytest.fixture
|
|
def config(self):
|
|
return ScraperConfig(
|
|
source_name="tiktok",
|
|
brand_name="hvacknowitall",
|
|
data_dir=Path("data"),
|
|
logs_dir=Path("logs"),
|
|
timezone="America/Halifax"
|
|
)
|
|
|
|
@pytest.fixture
|
|
def mock_env(self):
|
|
with patch.dict('os.environ', {
|
|
'TIKTOK_USERNAME': 'test@example.com',
|
|
'TIKTOK_PASSWORD': 'testpass',
|
|
'TIKTOK_TARGET': 'hvacknowitall'
|
|
}):
|
|
yield
|
|
|
|
@pytest.fixture
|
|
def sample_video(self):
|
|
mock_video = MagicMock()
|
|
mock_video.id = '7234567890123456789'
|
|
mock_video.author.username = 'hvacknowitall'
|
|
mock_video.author.nickname = 'HVAC Know It All'
|
|
mock_video.desc = 'Check out this HVAC tip! #hvac #maintenance'
|
|
mock_video.create_time = 1704134400 # 2024-01-01 12:00:00 UTC
|
|
mock_video.stats.play_count = 15000
|
|
mock_video.stats.comment_count = 250
|
|
mock_video.stats.share_count = 50
|
|
mock_video.stats.collect_count = 100 # Likes/favorites
|
|
mock_video.music.title = 'Original sound'
|
|
mock_video.duration = 30
|
|
mock_video.hashtags = ['hvac', 'maintenance']
|
|
return mock_video
|
|
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
def test_initialization(self, mock_setup, config, mock_env):
|
|
mock_setup.return_value = AsyncMock()
|
|
scraper = TikTokScraper(config)
|
|
assert scraper.config == config
|
|
assert scraper.username == 'test@example.com'
|
|
assert scraper.password == 'testpass'
|
|
assert scraper.target_account == 'hvacknowitall'
|
|
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
def test_humanized_delay(self, mock_setup, config, mock_env):
|
|
mock_setup.return_value = AsyncMock()
|
|
scraper = TikTokScraper(config)
|
|
|
|
with patch('time.sleep') as mock_sleep:
|
|
with patch('random.uniform', return_value=3.5):
|
|
scraper._humanized_delay()
|
|
mock_sleep.assert_called_with(3.5)
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('src.tiktok_scraper.TikTokApi')
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
async def test_fetch_user_videos(self, mock_setup, mock_tiktokapi_class, config, mock_env, sample_video):
|
|
# Create a simpler mock that doesn't use AsyncMock
|
|
mock_api = MagicMock()
|
|
mock_setup.return_value = mock_api
|
|
|
|
# Setup async context manager
|
|
mock_api.__aenter__ = AsyncMock(return_value=mock_api)
|
|
mock_api.__aexit__ = AsyncMock(return_value=None)
|
|
mock_api.create_sessions = AsyncMock(return_value=None)
|
|
|
|
# Mock user
|
|
mock_user = MagicMock()
|
|
mock_api.user.return_value = mock_user
|
|
|
|
# Create async generator for videos
|
|
async def video_generator(count=None):
|
|
yield sample_video
|
|
|
|
mock_user.videos = video_generator
|
|
|
|
scraper = TikTokScraper(config)
|
|
scraper.api = mock_api
|
|
|
|
videos = await scraper.fetch_user_videos(max_videos=10)
|
|
|
|
assert len(videos) == 1
|
|
assert videos[0]['id'] == '7234567890123456789'
|
|
assert videos[0]['author'] == 'hvacknowitall'
|
|
assert videos[0]['description'] == 'Check out this HVAC tip! #hvac #maintenance'
|
|
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
def test_format_markdown(self, mock_setup, config, mock_env):
|
|
mock_setup.return_value = AsyncMock()
|
|
scraper = TikTokScraper(config)
|
|
|
|
videos = [
|
|
{
|
|
'id': '7234567890123456789',
|
|
'author': 'hvacknowitall',
|
|
'nickname': 'HVAC Know It All',
|
|
'description': 'HVAC maintenance tips',
|
|
'publish_date': '2024-01-01T12:00:00',
|
|
'link': 'https://www.tiktok.com/@hvacknowitall/video/7234567890123456789',
|
|
'views': 15000,
|
|
'likes': 100,
|
|
'comments': 250,
|
|
'shares': 50,
|
|
'duration': 30,
|
|
'music': 'Original sound',
|
|
'hashtags': ['hvac', 'maintenance']
|
|
}
|
|
]
|
|
|
|
markdown = scraper.format_markdown(videos)
|
|
|
|
assert '# ID: 7234567890123456789' in markdown
|
|
assert '## Author: hvacknowitall' in markdown
|
|
assert '## Nickname: HVAC Know It All' in markdown
|
|
assert '## Description:' in markdown
|
|
assert 'HVAC maintenance tips' in markdown
|
|
assert '## Views: 15000' in markdown
|
|
assert '## Likes: 100' in markdown
|
|
assert '## Comments: 250' in markdown
|
|
assert '## Shares: 50' in markdown
|
|
assert '## Duration: 30 seconds' in markdown
|
|
assert '## Music: Original sound' in markdown
|
|
assert '## Hashtags: hvac, maintenance' in markdown
|
|
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
def test_get_incremental_items(self, mock_setup, config, mock_env):
|
|
mock_setup.return_value = AsyncMock()
|
|
scraper = TikTokScraper(config)
|
|
|
|
videos = [
|
|
{'id': 'video3', 'publish_date': '2024-01-03T12:00:00'},
|
|
{'id': 'video2', 'publish_date': '2024-01-02T12:00:00'},
|
|
{'id': 'video1', 'publish_date': '2024-01-01T12:00:00'}
|
|
]
|
|
|
|
# Test with no previous state
|
|
state = {}
|
|
new_videos = scraper.get_incremental_items(videos, state)
|
|
assert len(new_videos) == 3
|
|
|
|
# Test with existing state
|
|
state = {'last_video_id': 'video2'}
|
|
new_videos = scraper.get_incremental_items(videos, state)
|
|
assert len(new_videos) == 1
|
|
assert new_videos[0]['id'] == 'video3'
|
|
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
def test_update_state(self, mock_setup, config, mock_env):
|
|
mock_setup.return_value = AsyncMock()
|
|
scraper = TikTokScraper(config)
|
|
|
|
state = {}
|
|
videos = [
|
|
{'id': 'video2', 'publish_date': '2024-01-02T12:00:00'},
|
|
{'id': 'video1', 'publish_date': '2024-01-01T12:00:00'}
|
|
]
|
|
|
|
updated_state = scraper.update_state(state, videos)
|
|
|
|
assert updated_state['last_video_id'] == 'video2'
|
|
assert updated_state['last_video_date'] == '2024-01-02T12:00:00'
|
|
assert updated_state['video_count'] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
async def test_error_handling(self, mock_setup, config, mock_env):
|
|
mock_api = MagicMock()
|
|
mock_setup.return_value = mock_api
|
|
|
|
# Setup async context manager that raises error
|
|
mock_api.__aenter__ = AsyncMock(side_effect=Exception("API Error"))
|
|
mock_api.__aexit__ = AsyncMock(return_value=None)
|
|
|
|
scraper = TikTokScraper(config)
|
|
scraper.api = mock_api
|
|
|
|
videos = await scraper.fetch_user_videos()
|
|
assert videos == []
|
|
|
|
@pytest.mark.asyncio
|
|
@patch('src.tiktok_scraper.TikTokScraper._setup_api')
|
|
async def test_fetch_content_wrapper(self, mock_setup, config, mock_env):
|
|
mock_setup.return_value = MagicMock()
|
|
|
|
scraper = TikTokScraper(config)
|
|
|
|
# Mock the fetch_user_videos to return sample data
|
|
async def mock_fetch():
|
|
return [
|
|
{
|
|
'id': '7234567890123456789',
|
|
'author': 'hvacknowitall',
|
|
'description': 'Test video'
|
|
}
|
|
]
|
|
|
|
scraper.fetch_user_videos = mock_fetch
|
|
|
|
# Test the synchronous wrapper by running it in an async context
|
|
import asyncio
|
|
loop = asyncio.get_event_loop()
|
|
videos = await loop.run_in_executor(None, scraper.fetch_content)
|
|
|
|
assert len(videos) == 1
|
|
assert videos[0]['id'] == '7234567890123456789' |