#!/usr/bin/env python3 """ Test YouTube authentication with various methods """ import yt_dlp from pathlib import Path import json def test_direct_extraction(): """Try direct extraction without cookies first""" print("Testing direct YouTube access...") print("=" * 60) test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U" # Basic options without authentication ydl_opts = { 'quiet': False, 'no_warnings': False, 'extract_flat': False, 'skip_download': True, 'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['en'], # Add user agent and headers 'user_agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'referer': 'https://www.youtube.com/', # Try age gate bypass 'age_limit': None, # Format selection - try to avoid age-gated formats 'format': 'best[height<=720]', } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: print("Extracting video info...") info = ydl.extract_info(test_video, download=False) if info: print(f"✅ Successfully extracted video info!") print(f"Title: {info.get('title', 'Unknown')}") print(f"Duration: {info.get('duration', 0)} seconds") # Check for transcripts subtitles = info.get('subtitles', {}) auto_captions = info.get('automatic_captions', {}) print(f"\nTranscript availability:") if subtitles: print(f" Manual subtitles: {list(subtitles.keys())}") if auto_captions: print(f" Auto-captions: {list(auto_captions.keys())[:5]}...") # Show first 5 if 'en' in auto_captions: print(f"\n ✅ English auto-captions available!") caption_urls = auto_captions['en'] for cap in caption_urls[:2]: # Show first 2 formats print(f" - {cap.get('ext', 'unknown')}: {cap.get('url', '')[:80]}...") return True except Exception as e: print(f"❌ Error: {e}") return False def test_with_cookie_file(): """Test with existing cookie file""" cookie_file = Path("data_production_backlog/.cookies/youtube_cookies.txt") if not cookie_file.exists(): print(f"Cookie file not found: {cookie_file}") return False print(f"\nTesting with cookie file: {cookie_file}") print("=" * 60) test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U" ydl_opts = { 'cookiefile': str(cookie_file), 'quiet': False, 'no_warnings': False, 'skip_download': True, 'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['en'], } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: print("Extracting with cookies...") info = ydl.extract_info(test_video, download=False) if info: print(f"✅ Success with cookies!") # Check transcripts auto_captions = info.get('automatic_captions', {}) if 'en' in auto_captions: print(f"✅ Transcripts available with cookies!") return True except Exception as e: print(f"❌ Error with cookies: {e}") return False if __name__ == "__main__": # Try direct first success = test_direct_extraction() if not success: print("\n" + "=" * 60) print("Direct extraction failed. Trying with cookies...") success = test_with_cookie_file() if success: print("\n✅ YouTube access working!") print("Transcripts can be fetched.") else: print("\n❌ YouTube access blocked") print("\nYouTube is blocking automated access.") print("This is a known issue with YouTube's anti-bot measures.") print("\nPossible solutions:") print("1. Use a proxy/VPN to change IP") print("2. Wait and retry later") print("3. Use authenticated browser session") print("4. Use YouTube API with API key")