hvac-kia-content/test_sources_simple.py
Ben Reed 05218a873b Fix critical production issues and improve spec compliance
Production Readiness Improvements:
- Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM)
- Enabled NAS synchronization in production runner with error handling
- Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md)
- Made systemd services portable (removed hardcoded user/paths)
- Added environment variable validation on startup
- Moved DISPLAY/XAUTHORITY to .env configuration

Systemd Improvements:
- Created template service file (@.service) for any user
- Changed all paths to /opt/hvac-kia-content
- Updated installation script for portable deployment
- Fixed service dependencies and resource limits

Documentation:
- Created comprehensive PRODUCTION_TODO.md with 25 tasks
- Added PRODUCTION_GUIDE.md with deployment instructions
- Documented spec compliance gaps (65% complete)

Remaining work includes retry logic, connection pooling, media downloads,
and pytest test suite as documented in PRODUCTION_TODO.md

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 20:07:55 -03:00

128 lines
No EOL
3.9 KiB
Python

#!/usr/bin/env python3
"""
Simple test to check if each source can connect and fetch data.
"""
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
# Add src to path
sys.path.insert(0, str(Path(__file__).parent))
from src.base_scraper import ScraperConfig
from src.wordpress_scraper import WordPressScraper
from src.rss_scraper import RSSScraperMailChimp, RSSScraperPodcast
from src.youtube_scraper import YouTubeScraper
from src.instagram_scraper import InstagramScraper
from src.tiktok_scraper import TikTokScraper
def test_source(scraper_class, name, limit=3):
"""Test if a source can fetch data."""
print(f"\n{'='*50}")
print(f"Testing {name}")
print('='*50)
config = ScraperConfig(
source_name=name.lower(),
brand_name="hvacknowitall",
data_dir=Path("test_data"),
logs_dir=Path("test_logs"),
timezone="America/Halifax"
)
try:
scraper = scraper_class(config)
# Fetch with appropriate method
if name == "YouTube":
items = scraper.fetch_channel_videos(max_videos=limit)
elif name == "Instagram":
posts = scraper.fetch_posts(max_posts=limit)
stories = scraper.fetch_stories()[:1] # Just try 1 story
items = posts + stories
elif name == "TikTok":
# TikTok is async, let's use fetch_content wrapper
items = scraper.fetch_content()
items = items[:limit] if items else []
else:
# WordPress and RSS scrapers
items = scraper.fetch_content()
items = items[:limit] if items else []
if items:
print(f"✅ SUCCESS: Fetched {len(items)} items")
# Show first item
if items:
first = items[0]
print(f"\nFirst item preview:")
# Show key fields
for key in ['title', 'description', 'caption', 'author', 'channel', 'date', 'publish_date', 'link', 'url']:
if key in first:
value = str(first[key])[:100]
if value:
print(f" {key}: {value}")
else:
print(f"❌ FAILED: No items fetched")
return False
return True
except Exception as e:
print(f"❌ ERROR: {e}")
import traceback
traceback.print_exc()
return False
def main():
# Load environment
load_dotenv()
print("\n" + "#"*50)
print("# TESTING ALL SOURCES - Simple Connection Test")
print("#"*50)
results = {}
# Test each source
if os.getenv('WORDPRESS_API_URL'):
results['WordPress'] = test_source(WordPressScraper, "WordPress")
if os.getenv('MAILCHIMP_RSS_URL'):
results['MailChimp'] = test_source(RSSScraperMailChimp, "MailChimp")
if os.getenv('PODCAST_RSS_URL'):
results['Podcast'] = test_source(RSSScraperPodcast, "Podcast")
if os.getenv('YOUTUBE_CHANNEL_URL'):
results['YouTube'] = test_source(YouTubeScraper, "YouTube")
if os.getenv('INSTAGRAM_USERNAME'):
results['Instagram'] = test_source(InstagramScraper, "Instagram")
if os.getenv('TIKTOK_USERNAME'):
print("\n⚠️ TikTok requires Playwright browser automation")
print(" This may take longer and could be blocked")
results['TikTok'] = test_source(TikTokScraper, "TikTok", limit=2)
# Summary
print("\n" + "="*50)
print("SUMMARY")
print("="*50)
for source, success in results.items():
status = "" if success else ""
print(f"{status} {source}")
total = len(results)
passed = sum(1 for s in results.values() if s)
print(f"\nTotal: {passed}/{total} sources working")
if __name__ == "__main__":
main()