hvac-kia-content/test_slow_delays.py

#!/usr/bin/env python3
"""
Test the slow delay system with 5 videos including transcripts
"""

import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))

from src.base_scraper import ScraperConfig
from src.youtube_scraper import YouTubeScraper
import time

def test_slow_delays():
    """Test slow delays with 5 videos"""
    print("🧪 Testing slow delay system with 5 videos + transcripts")
    print("This should take 5-10 minutes with extended delays")
    print("=" * 60)

    config = ScraperConfig(
        source_name="youtube_slow_test",
        brand_name="hvacknowitall",
        data_dir=Path("test_data/slow_delays"),
        logs_dir=Path("test_logs/slow_delays"),
        timezone="America/Halifax"
    )

    scraper = YouTubeScraper(config)

    start_time = time.time()

    # Fetch 5 videos with transcripts (this will use normal delays since max_posts is specified)
    print("Testing normal delays (max_posts=5)...")
    videos_normal = scraper.fetch_content(max_posts=5, fetch_transcripts=True)

    normal_duration = time.time() - start_time
    print(f"Normal mode: {len(videos_normal)} videos in {normal_duration:.1f} seconds")

    # Now test without max_posts to trigger backlog mode delays
    print(f"\nWaiting 2 minutes before testing backlog delays...")
    time.sleep(120)

    # Create new scraper instance for backlog test
    config2 = ScraperConfig(
        source_name="youtube_backlog_test",
        brand_name="hvacknowitall",
        data_dir=Path("test_data/backlog_delays"),
        logs_dir=Path("test_logs/backlog_delays"),
        timezone="America/Halifax"
    )

    scraper2 = YouTubeScraper(config2)

    # Manually test just 2 videos in backlog mode
    print("Testing backlog delays (simulating full backlog mode)...")
    start_backlog = time.time()

    # Get video list first
    video_list = scraper2.fetch_channel_videos(max_videos=2)
    backlog_videos = []

    for i, video in enumerate(video_list):
        video_id = video.get('id')
        print(f"Processing video {i+1}/2: {video_id}")

        if i > 0:
            # Test the backlog delay
            scraper2._backlog_delay(transcript_mode=True)

        detailed_info = scraper2.fetch_video_details(video_id, fetch_transcript=True)
        if detailed_info:
            backlog_videos.append(detailed_info)

    backlog_duration = time.time() - start_backlog

    print(f"\nResults:")
    print(f"Normal mode (5 videos): {normal_duration:.1f} seconds ({normal_duration/len(videos_normal):.1f}s per video)")
    print(f"Backlog mode (2 videos): {backlog_duration:.1f} seconds ({backlog_duration/len(backlog_videos):.1f}s per video)")

    # Count transcripts
    normal_transcripts = sum(1 for v in videos_normal if v.get('transcript'))
    backlog_transcripts = sum(1 for v in backlog_videos if v.get('transcript'))

    print(f"Transcripts:")
    print(f"  Normal mode: {normal_transcripts}/{len(videos_normal)}")
    print(f"  Backlog mode: {backlog_transcripts}/{len(backlog_videos)}")

    return True

if __name__ == "__main__":
    test_slow_delays()