hvac-kia-content/test_tiktok_scrapling.py
Ben Reed 05218a873b Fix critical production issues and improve spec compliance
Production Readiness Improvements:
- Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM)
- Enabled NAS synchronization in production runner with error handling
- Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md)
- Made systemd services portable (removed hardcoded user/paths)
- Added environment variable validation on startup
- Moved DISPLAY/XAUTHORITY to .env configuration

Systemd Improvements:
- Created template service file (@.service) for any user
- Changed all paths to /opt/hvac-kia-content
- Updated installation script for portable deployment
- Fixed service dependencies and resource limits

Documentation:
- Created comprehensive PRODUCTION_TODO.md with 25 tasks
- Added PRODUCTION_GUIDE.md with deployment instructions
- Documented spec compliance gaps (65% complete)

Remaining work includes retry logic, connection pooling, media downloads,
and pytest test suite as documented in PRODUCTION_TODO.md

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 20:07:55 -03:00

81 lines
No EOL
2.5 KiB
Python

#!/usr/bin/env python3
"""Test TikTok scraper with Scrapling/Camofaux."""
import sys
from pathlib import Path
from dotenv import load_dotenv
from src.tiktok_scraper_scrapling import TikTokScraperScrapling
from src.base_scraper import ScraperConfig
# Load environment variables
load_dotenv()
def test_tiktok_scraper():
"""Test TikTok scraper with real data."""
print("\n" + "="*60)
print("Testing TikTok Scraper with Scrapling/Camofaux")
print("="*60)
# Configure scraper
config = ScraperConfig(
source_name="tiktok",
brand_name="hvacknowitall",
data_dir=Path("test_data"),
logs_dir=Path("logs"),
timezone="America/Halifax"
)
# Create scraper instance
scraper = TikTokScraperScrapling(config)
try:
# Fetch posts
print(f"\nFetching posts from @{scraper.target_username}...")
posts = scraper.fetch_posts(max_posts=3)
if posts:
print(f"\n✓ Successfully fetched {len(posts)} posts")
# Display first post
if posts:
first_post = posts[0]
print("\nFirst post details:")
print(f" ID: {first_post.get('id')}")
print(f" Link: {first_post.get('link')}")
print(f" Views: {first_post.get('views', 0):,}")
caption = first_post.get('caption', '')
if caption:
print(f" Caption: {caption[:100]}...")
# Generate markdown
markdown = scraper.format_markdown(posts)
# Save to file
output_file = config.data_dir / "tiktok_test.md"
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(markdown)
print(f"\n✓ Markdown saved to: {output_file}")
# Show snippet of markdown
lines = markdown.split('\n')[:20]
print("\nMarkdown preview:")
print("-" * 40)
for line in lines:
print(line)
print("-" * 40)
else:
print("\n✗ No posts fetched - possible bot detection or rate limiting")
except Exception as e:
print(f"\n✗ Error: {e}")
import traceback
traceback.print_exc()
return False
return len(posts) > 0
if __name__ == "__main__":
success = test_tiktok_scraper()
sys.exit(0 if success else 1)