Production Readiness Improvements: - Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM) - Enabled NAS synchronization in production runner with error handling - Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md) - Made systemd services portable (removed hardcoded user/paths) - Added environment variable validation on startup - Moved DISPLAY/XAUTHORITY to .env configuration Systemd Improvements: - Created template service file (@.service) for any user - Changed all paths to /opt/hvac-kia-content - Updated installation script for portable deployment - Fixed service dependencies and resource limits Documentation: - Created comprehensive PRODUCTION_TODO.md with 25 tasks - Added PRODUCTION_GUIDE.md with deployment instructions - Documented spec compliance gaps (65% complete) Remaining work includes retry logic, connection pooling, media downloads, and pytest test suite as documented in PRODUCTION_TODO.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
90 lines
No EOL
2.9 KiB
Python
90 lines
No EOL
2.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Test advanced TikTok scraper with headed browser and enhanced stealth."""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
from src.tiktok_scraper_advanced import TikTokScraperAdvanced
|
|
from src.base_scraper import ScraperConfig
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
def test_tiktok_scraper():
|
|
"""Test advanced TikTok scraper with real data."""
|
|
print("\n" + "="*60)
|
|
print("Testing Advanced TikTok Scraper with Headed Browser")
|
|
print("="*60)
|
|
print("Note: This will open a browser window - watch for CAPTCHA prompts")
|
|
print("="*60)
|
|
|
|
# Configure scraper
|
|
config = ScraperConfig(
|
|
source_name="tiktok",
|
|
brand_name="hvacknowitall",
|
|
data_dir=Path("test_data"),
|
|
logs_dir=Path("logs"),
|
|
timezone="America/Halifax"
|
|
)
|
|
|
|
# Create scraper instance
|
|
scraper = TikTokScraperAdvanced(config)
|
|
|
|
try:
|
|
# Fetch posts
|
|
print(f"\nFetching posts from @{scraper.target_username}...")
|
|
print("Browser window will open - manually solve any CAPTCHAs if prompted")
|
|
|
|
posts = scraper.fetch_posts(max_posts=3)
|
|
|
|
if posts:
|
|
print(f"\n✓ Successfully fetched {len(posts)} posts")
|
|
|
|
# Display first post
|
|
if posts:
|
|
first_post = posts[0]
|
|
print("\nFirst post details:")
|
|
print(f" ID: {first_post.get('id')}")
|
|
print(f" Link: {first_post.get('link')}")
|
|
print(f" Views: {first_post.get('views', 0):,}")
|
|
caption = first_post.get('caption', '')
|
|
if caption:
|
|
print(f" Caption: {caption[:100]}...")
|
|
|
|
# Generate markdown
|
|
markdown = scraper.format_markdown(posts)
|
|
|
|
# Save to file
|
|
output_file = config.data_dir / "tiktok_advanced_test.md"
|
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
output_file.write_text(markdown)
|
|
|
|
print(f"\n✓ Markdown saved to: {output_file}")
|
|
|
|
# Show snippet of markdown
|
|
lines = markdown.split('\n')[:20]
|
|
print("\nMarkdown preview:")
|
|
print("-" * 40)
|
|
for line in lines:
|
|
print(line)
|
|
print("-" * 40)
|
|
|
|
else:
|
|
print("\n✗ No posts fetched")
|
|
print("Possible issues:")
|
|
print(" - Geographic restrictions")
|
|
print(" - Need to solve CAPTCHA manually")
|
|
print(" - TikTok has updated their selectors")
|
|
print(" - Rate limiting or bot detection")
|
|
|
|
except Exception as e:
|
|
print(f"\n✗ Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
return len(posts) > 0
|
|
|
|
if __name__ == "__main__":
|
|
success = test_tiktok_scraper()
|
|
sys.exit(0 if success else 1) |