Major Changes: - Updated all code references from hvacknowitall/hvacnkowitall to hkia - Renamed all existing markdown files to use hkia_ prefix - Updated configuration files, scrapers, and production scripts - Modified systemd service descriptions to use HKIA - Changed NAS sync path to /mnt/nas/hkia Files Updated: - 20+ source files updated with new naming convention - 34 markdown files renamed to hkia_* format - All ScraperConfig brand_name parameters now use 'hkia' - Documentation updated to reflect new naming Rationale: - Shorter, cleaner filenames - Consistent branding across all outputs - Easier to type and reference - Maintains same functionality with improved naming Next Steps: - Deploy updated services to production - Update any external references to old naming - Monitor scrapers to ensure proper operation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
		
			
				
	
	
		
			109 lines
		
	
	
		
			No EOL
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			109 lines
		
	
	
		
			No EOL
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| """
 | |
| Authenticate with YouTube and fetch transcripts
 | |
| """
 | |
| 
 | |
| import yt_dlp
 | |
| import os
 | |
| from pathlib import Path
 | |
| 
 | |
| def authenticate_youtube():
 | |
|     """Authenticate with YouTube using credentials"""
 | |
|     
 | |
|     print("🔐 Authenticating with YouTube...")
 | |
|     print("Using account: benreed1987@gmail.com")
 | |
|     print("=" * 60)
 | |
|     
 | |
|     # Get credentials from environment
 | |
|     username = os.getenv('YOUTUBE_USERNAME', 'benreed1987@gmail.com')
 | |
|     password = os.getenv('YOUTUBE_PASSWORD', 'v*6D7MYfXss6oU67')
 | |
|     
 | |
|     # Cookie file path
 | |
|     cookie_file = Path("data_production_backlog/.cookies/youtube_cookies_auth.txt")
 | |
|     cookie_file.parent.mkdir(parents=True, exist_ok=True)
 | |
|     
 | |
|     # yt-dlp options with authentication
 | |
|     ydl_opts = {
 | |
|         'username': username,
 | |
|         'password': password,
 | |
|         'cookiefile': str(cookie_file),  # Save cookies here
 | |
|         'quiet': False,
 | |
|         'no_warnings': False,
 | |
|         'extract_flat': False,
 | |
|         'skip_download': True,
 | |
|         # Add these for better authentication
 | |
|         'nocheckcertificate': True,
 | |
|         'geo_bypass': True,
 | |
|         'writesubtitles': True,
 | |
|         'writeautomaticsub': True,
 | |
|         'subtitleslangs': ['en'],
 | |
|     }
 | |
|     
 | |
|     try:
 | |
|         # Test authentication with a video
 | |
|         test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U"
 | |
|         
 | |
|         print("Testing authentication with a video...")
 | |
|         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 | |
|             info = ydl.extract_info(test_video, download=False)
 | |
|             
 | |
|             if info:
 | |
|                 print(f"✅ Successfully authenticated!")
 | |
|                 print(f"Video title: {info.get('title', 'Unknown')}")
 | |
|                 
 | |
|                 # Check for transcripts
 | |
|                 subtitles = info.get('subtitles', {})
 | |
|                 auto_captions = info.get('automatic_captions', {})
 | |
|                 
 | |
|                 print(f"\nTranscript availability:")
 | |
|                 if 'en' in subtitles:
 | |
|                     print(f"  ✅ Manual English subtitles available")
 | |
|                 elif 'en' in auto_captions:
 | |
|                     print(f"  ✅ Auto-generated English captions available")
 | |
|                 else:
 | |
|                     print(f"  ❌ No English transcripts found")
 | |
|                 
 | |
|                 # Check cookie file
 | |
|                 if cookie_file.exists():
 | |
|                     cookie_size = cookie_file.stat().st_size
 | |
|                     cookie_lines = len(cookie_file.read_text().splitlines())
 | |
|                     print(f"\n📄 Cookie file saved:")
 | |
|                     print(f"  Path: {cookie_file}")
 | |
|                     print(f"  Size: {cookie_size} bytes")
 | |
|                     print(f"  Lines: {cookie_lines}")
 | |
|                     
 | |
|                     if cookie_lines > 20:
 | |
|                         print(f"  ✅ Full session cookies saved ({cookie_lines} lines)")
 | |
|                     else:
 | |
|                         print(f"  ⚠️ Limited cookies ({cookie_lines} lines)")
 | |
|                 
 | |
|                 return True
 | |
|             else:
 | |
|                 print("❌ Failed to authenticate")
 | |
|                 return False
 | |
|                 
 | |
|     except Exception as e:
 | |
|         print(f"❌ Authentication error: {e}")
 | |
|         
 | |
|         # Try alternative: cookies from browser
 | |
|         print("\n🔄 Alternative: Export cookies from browser")
 | |
|         print("1. Install browser extension: 'Get cookies.txt LOCALLY'")
 | |
|         print("2. Log into YouTube in your browser")
 | |
|         print("3. Export cookies while on youtube.com")
 | |
|         print("4. Save as: data_production_backlog/.cookies/youtube_cookies_browser.txt")
 | |
|         
 | |
|         return False
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     success = authenticate_youtube()
 | |
|     
 | |
|     if success:
 | |
|         print("\n✅ Authentication successful!")
 | |
|         print("You can now fetch transcripts with the authenticated session.")
 | |
|     else:
 | |
|         print("\n❌ Authentication failed.")
 | |
|         print("YouTube may require browser-based authentication.")
 | |
|         print("\nManual steps:")
 | |
|         print("1. Use browser to log into YouTube")
 | |
|         print("2. Export cookies using browser extension")
 | |
|         print("3. Save cookies file and update scraper to use it") |