hvac-kia-content/youtube_auth.py
Ben Reed daab901e35 refactor: Update naming convention from hvacknowitall to hkia
Major Changes:
- Updated all code references from hvacknowitall/hvacnkowitall to hkia
- Renamed all existing markdown files to use hkia_ prefix
- Updated configuration files, scrapers, and production scripts
- Modified systemd service descriptions to use HKIA
- Changed NAS sync path to /mnt/nas/hkia

Files Updated:
- 20+ source files updated with new naming convention
- 34 markdown files renamed to hkia_* format
- All ScraperConfig brand_name parameters now use 'hkia'
- Documentation updated to reflect new naming

Rationale:
- Shorter, cleaner filenames
- Consistent branding across all outputs
- Easier to type and reference
- Maintains same functionality with improved naming

Next Steps:
- Deploy updated services to production
- Update any external references to old naming
- Monitor scrapers to ensure proper operation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-19 13:35:23 -03:00

109 lines
No EOL
4.1 KiB
Python

#!/usr/bin/env python3
"""
Authenticate with YouTube and fetch transcripts
"""
import yt_dlp
import os
from pathlib import Path
def authenticate_youtube():
"""Authenticate with YouTube using credentials"""
print("🔐 Authenticating with YouTube...")
print("Using account: benreed1987@gmail.com")
print("=" * 60)
# Get credentials from environment
username = os.getenv('YOUTUBE_USERNAME', 'benreed1987@gmail.com')
password = os.getenv('YOUTUBE_PASSWORD', 'v*6D7MYfXss6oU67')
# Cookie file path
cookie_file = Path("data_production_backlog/.cookies/youtube_cookies_auth.txt")
cookie_file.parent.mkdir(parents=True, exist_ok=True)
# yt-dlp options with authentication
ydl_opts = {
'username': username,
'password': password,
'cookiefile': str(cookie_file), # Save cookies here
'quiet': False,
'no_warnings': False,
'extract_flat': False,
'skip_download': True,
# Add these for better authentication
'nocheckcertificate': True,
'geo_bypass': True,
'writesubtitles': True,
'writeautomaticsub': True,
'subtitleslangs': ['en'],
}
try:
# Test authentication with a video
test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U"
print("Testing authentication with a video...")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(test_video, download=False)
if info:
print(f"✅ Successfully authenticated!")
print(f"Video title: {info.get('title', 'Unknown')}")
# Check for transcripts
subtitles = info.get('subtitles', {})
auto_captions = info.get('automatic_captions', {})
print(f"\nTranscript availability:")
if 'en' in subtitles:
print(f" ✅ Manual English subtitles available")
elif 'en' in auto_captions:
print(f" ✅ Auto-generated English captions available")
else:
print(f" ❌ No English transcripts found")
# Check cookie file
if cookie_file.exists():
cookie_size = cookie_file.stat().st_size
cookie_lines = len(cookie_file.read_text().splitlines())
print(f"\n📄 Cookie file saved:")
print(f" Path: {cookie_file}")
print(f" Size: {cookie_size} bytes")
print(f" Lines: {cookie_lines}")
if cookie_lines > 20:
print(f" ✅ Full session cookies saved ({cookie_lines} lines)")
else:
print(f" ⚠️ Limited cookies ({cookie_lines} lines)")
return True
else:
print("❌ Failed to authenticate")
return False
except Exception as e:
print(f"❌ Authentication error: {e}")
# Try alternative: cookies from browser
print("\n🔄 Alternative: Export cookies from browser")
print("1. Install browser extension: 'Get cookies.txt LOCALLY'")
print("2. Log into YouTube in your browser")
print("3. Export cookies while on youtube.com")
print("4. Save as: data_production_backlog/.cookies/youtube_cookies_browser.txt")
return False
if __name__ == "__main__":
success = authenticate_youtube()
if success:
print("\n✅ Authentication successful!")
print("You can now fetch transcripts with the authenticated session.")
else:
print("\n❌ Authentication failed.")
print("YouTube may require browser-based authentication.")
print("\nManual steps:")
print("1. Use browser to log into YouTube")
print("2. Export cookies using browser extension")
print("3. Save cookies file and update scraper to use it")