#!/usr/bin/env python3 """ Authenticate with YouTube and fetch transcripts """ import yt_dlp import os from pathlib import Path def authenticate_youtube(): """Authenticate with YouTube using credentials""" print("šŸ” Authenticating with YouTube...") print("Using account: benreed1987@gmail.com") print("=" * 60) # Get credentials from environment username = os.getenv('YOUTUBE_USERNAME', 'benreed1987@gmail.com') password = os.getenv('YOUTUBE_PASSWORD', 'v*6D7MYfXss6oU67') # Cookie file path cookie_file = Path("data_production_backlog/.cookies/youtube_cookies_auth.txt") cookie_file.parent.mkdir(parents=True, exist_ok=True) # yt-dlp options with authentication ydl_opts = { 'username': username, 'password': password, 'cookiefile': str(cookie_file), # Save cookies here 'quiet': False, 'no_warnings': False, 'extract_flat': False, 'skip_download': True, # Add these for better authentication 'nocheckcertificate': True, 'geo_bypass': True, 'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['en'], } try: # Test authentication with a video test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U" print("Testing authentication with a video...") with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(test_video, download=False) if info: print(f"āœ… Successfully authenticated!") print(f"Video title: {info.get('title', 'Unknown')}") # Check for transcripts subtitles = info.get('subtitles', {}) auto_captions = info.get('automatic_captions', {}) print(f"\nTranscript availability:") if 'en' in subtitles: print(f" āœ… Manual English subtitles available") elif 'en' in auto_captions: print(f" āœ… Auto-generated English captions available") else: print(f" āŒ No English transcripts found") # Check cookie file if cookie_file.exists(): cookie_size = cookie_file.stat().st_size cookie_lines = len(cookie_file.read_text().splitlines()) print(f"\nšŸ“„ Cookie file saved:") print(f" Path: {cookie_file}") print(f" Size: {cookie_size} bytes") print(f" Lines: {cookie_lines}") if cookie_lines > 20: print(f" āœ… Full session cookies saved ({cookie_lines} lines)") else: print(f" āš ļø Limited cookies ({cookie_lines} lines)") return True else: print("āŒ Failed to authenticate") return False except Exception as e: print(f"āŒ Authentication error: {e}") # Try alternative: cookies from browser print("\nšŸ”„ Alternative: Export cookies from browser") print("1. Install browser extension: 'Get cookies.txt LOCALLY'") print("2. Log into YouTube in your browser") print("3. Export cookies while on youtube.com") print("4. Save as: data_production_backlog/.cookies/youtube_cookies_browser.txt") return False if __name__ == "__main__": success = authenticate_youtube() if success: print("\nāœ… Authentication successful!") print("You can now fetch transcripts with the authenticated session.") else: print("\nāŒ Authentication failed.") print("YouTube may require browser-based authentication.") print("\nManual steps:") print("1. Use browser to log into YouTube") print("2. Export cookies using browser extension") print("3. Save cookies file and update scraper to use it")