#!/usr/bin/env python3 """ Test the slow delay system with 5 videos including transcripts """ import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from src.base_scraper import ScraperConfig from src.youtube_scraper import YouTubeScraper import time def test_slow_delays(): """Test slow delays with 5 videos""" print("🧪 Testing slow delay system with 5 videos + transcripts") print("This should take 5-10 minutes with extended delays") print("=" * 60) config = ScraperConfig( source_name="youtube_slow_test", brand_name="hvacknowitall", data_dir=Path("test_data/slow_delays"), logs_dir=Path("test_logs/slow_delays"), timezone="America/Halifax" ) scraper = YouTubeScraper(config) start_time = time.time() # Fetch 5 videos with transcripts (this will use normal delays since max_posts is specified) print("Testing normal delays (max_posts=5)...") videos_normal = scraper.fetch_content(max_posts=5, fetch_transcripts=True) normal_duration = time.time() - start_time print(f"Normal mode: {len(videos_normal)} videos in {normal_duration:.1f} seconds") # Now test without max_posts to trigger backlog mode delays print(f"\nWaiting 2 minutes before testing backlog delays...") time.sleep(120) # Create new scraper instance for backlog test config2 = ScraperConfig( source_name="youtube_backlog_test", brand_name="hvacknowitall", data_dir=Path("test_data/backlog_delays"), logs_dir=Path("test_logs/backlog_delays"), timezone="America/Halifax" ) scraper2 = YouTubeScraper(config2) # Manually test just 2 videos in backlog mode print("Testing backlog delays (simulating full backlog mode)...") start_backlog = time.time() # Get video list first video_list = scraper2.fetch_channel_videos(max_videos=2) backlog_videos = [] for i, video in enumerate(video_list): video_id = video.get('id') print(f"Processing video {i+1}/2: {video_id}") if i > 0: # Test the backlog delay scraper2._backlog_delay(transcript_mode=True) detailed_info = scraper2.fetch_video_details(video_id, fetch_transcript=True) if detailed_info: backlog_videos.append(detailed_info) backlog_duration = time.time() - start_backlog print(f"\nResults:") print(f"Normal mode (5 videos): {normal_duration:.1f} seconds ({normal_duration/len(videos_normal):.1f}s per video)") print(f"Backlog mode (2 videos): {backlog_duration:.1f} seconds ({backlog_duration/len(backlog_videos):.1f}s per video)") # Count transcripts normal_transcripts = sum(1 for v in videos_normal if v.get('transcript')) backlog_transcripts = sum(1 for v in backlog_videos if v.get('transcript')) print(f"Transcripts:") print(f" Normal mode: {normal_transcripts}/{len(videos_normal)}") print(f" Backlog mode: {backlog_transcripts}/{len(backlog_videos)}") return True if __name__ == "__main__": test_slow_delays()