Production Readiness Improvements: - Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM) - Enabled NAS synchronization in production runner with error handling - Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md) - Made systemd services portable (removed hardcoded user/paths) - Added environment variable validation on startup - Moved DISPLAY/XAUTHORITY to .env configuration Systemd Improvements: - Created template service file (@.service) for any user - Changed all paths to /opt/hvac-kia-content - Updated installation script for portable deployment - Fixed service dependencies and resource limits Documentation: - Created comprehensive PRODUCTION_TODO.md with 25 tasks - Added PRODUCTION_GUIDE.md with deployment instructions - Documented spec compliance gaps (65% complete) Remaining work includes retry logic, connection pooling, media downloads, and pytest test suite as documented in PRODUCTION_TODO.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
128 lines
No EOL
3.9 KiB
Python
128 lines
No EOL
3.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Simple test to check if each source can connect and fetch data.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from dotenv import load_dotenv
|
|
|
|
# Add src to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from src.base_scraper import ScraperConfig
|
|
from src.wordpress_scraper import WordPressScraper
|
|
from src.rss_scraper import RSSScraperMailChimp, RSSScraperPodcast
|
|
from src.youtube_scraper import YouTubeScraper
|
|
from src.instagram_scraper import InstagramScraper
|
|
from src.tiktok_scraper import TikTokScraper
|
|
|
|
|
|
def test_source(scraper_class, name, limit=3):
|
|
"""Test if a source can fetch data."""
|
|
print(f"\n{'='*50}")
|
|
print(f"Testing {name}")
|
|
print('='*50)
|
|
|
|
config = ScraperConfig(
|
|
source_name=name.lower(),
|
|
brand_name="hvacknowitall",
|
|
data_dir=Path("test_data"),
|
|
logs_dir=Path("test_logs"),
|
|
timezone="America/Halifax"
|
|
)
|
|
|
|
try:
|
|
scraper = scraper_class(config)
|
|
|
|
# Fetch with appropriate method
|
|
if name == "YouTube":
|
|
items = scraper.fetch_channel_videos(max_videos=limit)
|
|
elif name == "Instagram":
|
|
posts = scraper.fetch_posts(max_posts=limit)
|
|
stories = scraper.fetch_stories()[:1] # Just try 1 story
|
|
items = posts + stories
|
|
elif name == "TikTok":
|
|
# TikTok is async, let's use fetch_content wrapper
|
|
items = scraper.fetch_content()
|
|
items = items[:limit] if items else []
|
|
else:
|
|
# WordPress and RSS scrapers
|
|
items = scraper.fetch_content()
|
|
items = items[:limit] if items else []
|
|
|
|
if items:
|
|
print(f"✅ SUCCESS: Fetched {len(items)} items")
|
|
|
|
# Show first item
|
|
if items:
|
|
first = items[0]
|
|
print(f"\nFirst item preview:")
|
|
|
|
# Show key fields
|
|
for key in ['title', 'description', 'caption', 'author', 'channel', 'date', 'publish_date', 'link', 'url']:
|
|
if key in first:
|
|
value = str(first[key])[:100]
|
|
if value:
|
|
print(f" {key}: {value}")
|
|
else:
|
|
print(f"❌ FAILED: No items fetched")
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ ERROR: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def main():
|
|
# Load environment
|
|
load_dotenv()
|
|
|
|
print("\n" + "#"*50)
|
|
print("# TESTING ALL SOURCES - Simple Connection Test")
|
|
print("#"*50)
|
|
|
|
results = {}
|
|
|
|
# Test each source
|
|
if os.getenv('WORDPRESS_API_URL'):
|
|
results['WordPress'] = test_source(WordPressScraper, "WordPress")
|
|
|
|
if os.getenv('MAILCHIMP_RSS_URL'):
|
|
results['MailChimp'] = test_source(RSSScraperMailChimp, "MailChimp")
|
|
|
|
if os.getenv('PODCAST_RSS_URL'):
|
|
results['Podcast'] = test_source(RSSScraperPodcast, "Podcast")
|
|
|
|
if os.getenv('YOUTUBE_CHANNEL_URL'):
|
|
results['YouTube'] = test_source(YouTubeScraper, "YouTube")
|
|
|
|
if os.getenv('INSTAGRAM_USERNAME'):
|
|
results['Instagram'] = test_source(InstagramScraper, "Instagram")
|
|
|
|
if os.getenv('TIKTOK_USERNAME'):
|
|
print("\n⚠️ TikTok requires Playwright browser automation")
|
|
print(" This may take longer and could be blocked")
|
|
results['TikTok'] = test_source(TikTokScraper, "TikTok", limit=2)
|
|
|
|
# Summary
|
|
print("\n" + "="*50)
|
|
print("SUMMARY")
|
|
print("="*50)
|
|
|
|
for source, success in results.items():
|
|
status = "✅" if success else "❌"
|
|
print(f"{status} {source}")
|
|
|
|
total = len(results)
|
|
passed = sum(1 for s in results.values() if s)
|
|
print(f"\nTotal: {passed}/{total} sources working")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |