hvac-kia-content/test_youtube_auth.py
Ben Reed daab901e35 refactor: Update naming convention from hvacknowitall to hkia
Major Changes:
- Updated all code references from hvacknowitall/hvacnkowitall to hkia
- Renamed all existing markdown files to use hkia_ prefix
- Updated configuration files, scrapers, and production scripts
- Modified systemd service descriptions to use HKIA
- Changed NAS sync path to /mnt/nas/hkia

Files Updated:
- 20+ source files updated with new naming convention
- 34 markdown files renamed to hkia_* format
- All ScraperConfig brand_name parameters now use 'hkia'
- Documentation updated to reflect new naming

Rationale:
- Shorter, cleaner filenames
- Consistent branding across all outputs
- Easier to type and reference
- Maintains same functionality with improved naming

Next Steps:
- Deploy updated services to production
- Update any external references to old naming
- Monitor scrapers to ensure proper operation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-19 13:35:23 -03:00

131 lines
No EOL
4.4 KiB
Python

#!/usr/bin/env python3
"""
Test YouTube authentication with various methods
"""
import yt_dlp
from pathlib import Path
import json
def test_direct_extraction():
"""Try direct extraction without cookies first"""
print("Testing direct YouTube access...")
print("=" * 60)
test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U"
# Basic options without authentication
ydl_opts = {
'quiet': False,
'no_warnings': False,
'extract_flat': False,
'skip_download': True,
'writesubtitles': True,
'writeautomaticsub': True,
'subtitleslangs': ['en'],
# Add user agent and headers
'user_agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'referer': 'https://www.youtube.com/',
# Try age gate bypass
'age_limit': None,
# Format selection - try to avoid age-gated formats
'format': 'best[height<=720]',
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
print("Extracting video info...")
info = ydl.extract_info(test_video, download=False)
if info:
print(f"✅ Successfully extracted video info!")
print(f"Title: {info.get('title', 'Unknown')}")
print(f"Duration: {info.get('duration', 0)} seconds")
# Check for transcripts
subtitles = info.get('subtitles', {})
auto_captions = info.get('automatic_captions', {})
print(f"\nTranscript availability:")
if subtitles:
print(f" Manual subtitles: {list(subtitles.keys())}")
if auto_captions:
print(f" Auto-captions: {list(auto_captions.keys())[:5]}...") # Show first 5
if 'en' in auto_captions:
print(f"\n ✅ English auto-captions available!")
caption_urls = auto_captions['en']
for cap in caption_urls[:2]: # Show first 2 formats
print(f" - {cap.get('ext', 'unknown')}: {cap.get('url', '')[:80]}...")
return True
except Exception as e:
print(f"❌ Error: {e}")
return False
def test_with_cookie_file():
"""Test with existing cookie file"""
cookie_file = Path("data_production_backlog/.cookies/youtube_cookies.txt")
if not cookie_file.exists():
print(f"Cookie file not found: {cookie_file}")
return False
print(f"\nTesting with cookie file: {cookie_file}")
print("=" * 60)
test_video = "https://www.youtube.com/watch?v=TpdYT_itu9U"
ydl_opts = {
'cookiefile': str(cookie_file),
'quiet': False,
'no_warnings': False,
'skip_download': True,
'writesubtitles': True,
'writeautomaticsub': True,
'subtitleslangs': ['en'],
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
print("Extracting with cookies...")
info = ydl.extract_info(test_video, download=False)
if info:
print(f"✅ Success with cookies!")
# Check transcripts
auto_captions = info.get('automatic_captions', {})
if 'en' in auto_captions:
print(f"✅ Transcripts available with cookies!")
return True
except Exception as e:
print(f"❌ Error with cookies: {e}")
return False
if __name__ == "__main__":
# Try direct first
success = test_direct_extraction()
if not success:
print("\n" + "=" * 60)
print("Direct extraction failed. Trying with cookies...")
success = test_with_cookie_file()
if success:
print("\n✅ YouTube access working!")
print("Transcripts can be fetched.")
else:
print("\n❌ YouTube access blocked")
print("\nYouTube is blocking automated access.")
print("This is a known issue with YouTube's anti-bot measures.")
print("\nPossible solutions:")
print("1. Use a proxy/VPN to change IP")
print("2. Wait and retry later")
print("3. Use authenticated browser session")
print("4. Use YouTube API with API key")