Major Changes: - Updated all code references from hvacknowitall/hvacnkowitall to hkia - Renamed all existing markdown files to use hkia_ prefix - Updated configuration files, scrapers, and production scripts - Modified systemd service descriptions to use HKIA - Changed NAS sync path to /mnt/nas/hkia Files Updated: - 20+ source files updated with new naming convention - 34 markdown files renamed to hkia_* format - All ScraperConfig brand_name parameters now use 'hkia' - Documentation updated to reflect new naming Rationale: - Shorter, cleaner filenames - Consistent branding across all outputs - Easier to type and reference - Maintains same functionality with improved naming Next Steps: - Deploy updated services to production - Update any external references to old naming - Monitor scrapers to ensure proper operation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
109 lines
No EOL
4.1 KiB
Python
109 lines
No EOL
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Authenticate with YouTube and fetch transcripts
|
|
"""
|
|
|
|
import yt_dlp
|
|
import os
|
|
from pathlib import Path
|
|
|
|
def authenticate_youtube():
|
|
"""Authenticate with YouTube using credentials"""
|
|
|
|
print("🔐 Authenticating with YouTube...")
|
|
print("Using account: benreed1987@gmail.com")
|
|
print("=" * 60)
|
|
|
|
# Get credentials from environment
|
|
username = os.getenv('YOUTUBE_USERNAME', 'benreed1987@gmail.com')
|
|
password = os.getenv('YOUTUBE_PASSWORD', 'v*6D7MYfXss6oU67')
|
|
|
|
# Cookie file path
|
|
cookie_file = Path("data_production_backlog/.cookies/youtube_cookies_auth.txt")
|
|
cookie_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# yt-dlp options with authentication
|
|
ydl_opts = {
|
|
'username': username,
|
|
'password': password,
|
|
'cookiefile': str(cookie_file), # Save cookies here
|
|
'quiet': False,
|
|
'no_warnings': False,
|
|
'extract_flat': False,
|
|
'skip_download': True,
|
|
# Add these for better authentication
|
|
'nocheckcertificate': True,
|
|
'geo_bypass': True,
|
|
'writesubtitles': True,
|
|
'writeautomaticsub': True,
|
|
'subtitleslangs': ['en'],
|
|
}
|
|
|
|
try:
|
|
# Test authentication with a video
|
|
test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U"
|
|
|
|
print("Testing authentication with a video...")
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(test_video, download=False)
|
|
|
|
if info:
|
|
print(f"✅ Successfully authenticated!")
|
|
print(f"Video title: {info.get('title', 'Unknown')}")
|
|
|
|
# Check for transcripts
|
|
subtitles = info.get('subtitles', {})
|
|
auto_captions = info.get('automatic_captions', {})
|
|
|
|
print(f"\nTranscript availability:")
|
|
if 'en' in subtitles:
|
|
print(f" ✅ Manual English subtitles available")
|
|
elif 'en' in auto_captions:
|
|
print(f" ✅ Auto-generated English captions available")
|
|
else:
|
|
print(f" ❌ No English transcripts found")
|
|
|
|
# Check cookie file
|
|
if cookie_file.exists():
|
|
cookie_size = cookie_file.stat().st_size
|
|
cookie_lines = len(cookie_file.read_text().splitlines())
|
|
print(f"\n📄 Cookie file saved:")
|
|
print(f" Path: {cookie_file}")
|
|
print(f" Size: {cookie_size} bytes")
|
|
print(f" Lines: {cookie_lines}")
|
|
|
|
if cookie_lines > 20:
|
|
print(f" ✅ Full session cookies saved ({cookie_lines} lines)")
|
|
else:
|
|
print(f" ⚠️ Limited cookies ({cookie_lines} lines)")
|
|
|
|
return True
|
|
else:
|
|
print("❌ Failed to authenticate")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Authentication error: {e}")
|
|
|
|
# Try alternative: cookies from browser
|
|
print("\n🔄 Alternative: Export cookies from browser")
|
|
print("1. Install browser extension: 'Get cookies.txt LOCALLY'")
|
|
print("2. Log into YouTube in your browser")
|
|
print("3. Export cookies while on youtube.com")
|
|
print("4. Save as: data_production_backlog/.cookies/youtube_cookies_browser.txt")
|
|
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = authenticate_youtube()
|
|
|
|
if success:
|
|
print("\n✅ Authentication successful!")
|
|
print("You can now fetch transcripts with the authenticated session.")
|
|
else:
|
|
print("\n❌ Authentication failed.")
|
|
print("YouTube may require browser-based authentication.")
|
|
print("\nManual steps:")
|
|
print("1. Use browser to log into YouTube")
|
|
print("2. Export cookies using browser extension")
|
|
print("3. Save cookies file and update scraper to use it") |