#!/usr/bin/env python3 """ Test full backlog capture with new API scrapers This will fetch all YouTube videos and MailChimp campaigns using APIs """ import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from src.youtube_api_scraper import YouTubeAPIScraper from src.mailchimp_api_scraper import MailChimpAPIScraper from src.base_scraper import ScraperConfig import time def test_youtube_api_full(): """Test YouTube API scraper with full channel fetch""" print("=" * 60) print("TESTING YOUTUBE API SCRAPER - FULL CHANNEL") print("=" * 60) config = ScraperConfig( source_name='youtube_api', brand_name='hvacknowitall', data_dir=Path('data_api_test/youtube'), logs_dir=Path('logs_api_test/youtube'), timezone='America/Halifax' ) scraper = YouTubeAPIScraper(config) print(f"Fetching all videos from channel...") start = time.time() # Fetch all videos (should be ~370) # With transcripts for top 50 by views videos = scraper.fetch_content(fetch_transcripts=True) elapsed = time.time() - start print(f"\n✅ Fetched {len(videos)} videos in {elapsed:.1f} seconds") # Show statistics total_views = sum(v.get('view_count', 0) for v in videos) total_likes = sum(v.get('like_count', 0) for v in videos) with_transcripts = sum(1 for v in videos if v.get('transcript')) print(f"\nStatistics:") print(f" Total videos: {len(videos)}") print(f" Total views: {total_views:,}") print(f" Total likes: {total_likes:,}") print(f" Videos with transcripts: {with_transcripts}") print(f" Quota used: {scraper.quota_used}/{scraper.daily_quota_limit} units") # Show top 5 videos by views print(f"\nTop 5 videos by views:") top_videos = sorted(videos, key=lambda x: x.get('view_count', 0), reverse=True)[:5] for i, video in enumerate(top_videos, 1): views = video.get('view_count', 0) title = video.get('title', 'Unknown')[:60] has_transcript = '✓' if video.get('transcript') else '✗' print(f" {i}. {views:,} views | {title}... | Transcript: {has_transcript}") # Save markdown markdown = scraper.format_markdown(videos) output_file = Path('data_api_test/youtube/youtube_api_full.md') output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(markdown, encoding='utf-8') print(f"\nMarkdown saved to: {output_file}") return videos def test_mailchimp_api_full(): """Test MailChimp API scraper with full campaign fetch""" print("\n" + "=" * 60) print("TESTING MAILCHIMP API SCRAPER - ALL CAMPAIGNS") print("=" * 60) config = ScraperConfig( source_name='mailchimp_api', brand_name='hvacknowitall', data_dir=Path('data_api_test/mailchimp'), logs_dir=Path('logs_api_test/mailchimp'), timezone='America/Halifax' ) scraper = MailChimpAPIScraper(config) print(f"Fetching all campaigns from 'Bi-Weekly Newsletter' folder...") start = time.time() # Fetch all campaigns (up to 100) campaigns = scraper.fetch_content(max_items=100) elapsed = time.time() - start print(f"\n✅ Fetched {len(campaigns)} campaigns in {elapsed:.1f} seconds") if campaigns: # Show statistics total_sent = sum(c.get('metrics', {}).get('emails_sent', 0) for c in campaigns) total_opens = sum(c.get('metrics', {}).get('unique_opens', 0) for c in campaigns) total_clicks = sum(c.get('metrics', {}).get('unique_clicks', 0) for c in campaigns) print(f"\nStatistics:") print(f" Total campaigns: {len(campaigns)}") print(f" Total emails sent: {total_sent:,}") print(f" Total unique opens: {total_opens:,}") print(f" Total unique clicks: {total_clicks:,}") # Calculate average rates if campaigns: avg_open_rate = sum(c.get('metrics', {}).get('open_rate', 0) for c in campaigns) / len(campaigns) avg_click_rate = sum(c.get('metrics', {}).get('click_rate', 0) for c in campaigns) / len(campaigns) print(f" Average open rate: {avg_open_rate*100:.1f}%") print(f" Average click rate: {avg_click_rate*100:.1f}%") # Show recent campaigns print(f"\n5 Most Recent Campaigns:") for i, campaign in enumerate(campaigns[:5], 1): title = campaign.get('title', 'Unknown')[:50] send_time = campaign.get('send_time', 'Unknown')[:10] metrics = campaign.get('metrics', {}) opens = metrics.get('unique_opens', 0) open_rate = metrics.get('open_rate', 0) * 100 print(f" {i}. {send_time} | {title}... | Opens: {opens} ({open_rate:.1f}%)") # Save markdown markdown = scraper.format_markdown(campaigns) output_file = Path('data_api_test/mailchimp/mailchimp_api_full.md') output_file.parent.mkdir(parents=True, exist_ok=True) output_file.write_text(markdown, encoding='utf-8') print(f"\nMarkdown saved to: {output_file}") else: print("\n⚠️ No campaigns found!") return campaigns def main(): """Run full API scraper tests""" print("HVAC Know It All - API Scraper Full Test") print("This will fetch all content using the new API scrapers") print("-" * 60) # Test YouTube API youtube_videos = test_youtube_api_full() # Test MailChimp API mailchimp_campaigns = test_mailchimp_api_full() # Summary print("\n" + "=" * 60) print("SUMMARY") print("=" * 60) print(f"✅ YouTube API: {len(youtube_videos)} videos fetched") print(f"✅ MailChimp API: {len(mailchimp_campaigns)} campaigns fetched") print("\nAPI scrapers are working successfully!") print("Ready for production deployment.") if __name__ == "__main__": main()