hvac-kia-content/test_youtube_api.py
Ben Reed daab901e35 refactor: Update naming convention from hvacknowitall to hkia
Major Changes:
- Updated all code references from hvacknowitall/hvacnkowitall to hkia
- Renamed all existing markdown files to use hkia_ prefix
- Updated configuration files, scrapers, and production scripts
- Modified systemd service descriptions to use HKIA
- Changed NAS sync path to /mnt/nas/hkia

Files Updated:
- 20+ source files updated with new naming convention
- 34 markdown files renamed to hkia_* format
- All ScraperConfig brand_name parameters now use 'hkia'
- Documentation updated to reflect new naming

Rationale:
- Shorter, cleaner filenames
- Consistent branding across all outputs
- Easier to type and reference
- Maintains same functionality with improved naming

Next Steps:
- Deploy updated services to production
- Update any external references to old naming
- Monitor scrapers to ensure proper operation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-19 13:35:23 -03:00

177 lines
No EOL
6.7 KiB
Python

#!/usr/bin/env python3
"""
Proof of concept for YouTube Data API v3 integration
Fetches video details, statistics, and transcripts
"""
import os
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
import json
# Load environment variables
load_dotenv()
def test_youtube_api():
"""Test YouTube API connection and fetch video details"""
api_key = os.getenv('YOUTUBE_API_KEY')
channel_url = os.getenv('YOUTUBE_CHANNEL_URL', 'https://www.youtube.com/@HVACKnowItAll')
if not api_key:
print("❌ No YouTube API key found in .env")
return
print("🔍 Testing YouTube Data API v3...")
print(f"Channel: {channel_url}")
print("-" * 60)
try:
# Build YouTube API client
youtube = build('youtube', 'v3', developerKey=api_key)
# Extract channel handle from URL
channel_handle = channel_url.split('@')[-1]
print(f"Channel handle: @{channel_handle}")
# Step 1: Get channel ID from handle or search by name
print("\n📡 Fetching channel information...")
# Try direct channel lookup first
channel_response = youtube.channels().list(
part='snippet,statistics,contentDetails',
forHandle=channel_handle
).execute()
if not channel_response.get('items'):
# Fallback to search
search_response = youtube.search().list(
part='snippet',
q="HVAC Know It All",
type='channel',
maxResults=1
).execute()
if not search_response.get('items'):
print("❌ Channel not found")
return
channel_id = search_response['items'][0]['snippet']['channelId']
# Get full channel details
channel_response = youtube.channels().list(
part='snippet,statistics,contentDetails',
id=channel_id
).execute()
if not channel_response.get('items'):
print("❌ Channel not found")
return
channel_data = channel_response['items'][0]
channel_id = channel_data['id']
channel_title = channel_data['snippet']['title']
print(f"✅ Found channel: {channel_title}")
print(f" Channel ID: {channel_id}")
# Step 2: Get channel statistics
stats = channel_data['statistics']
print(f"\n📊 Channel Statistics:")
print(f" - Subscribers: {int(stats.get('subscriberCount', 0)):,}")
print(f" - Total Views: {int(stats.get('viewCount', 0)):,}")
print(f" - Video Count: {int(stats.get('videoCount', 0)):,}")
# Get uploads playlist ID
uploads_id = channel_data['contentDetails']['relatedPlaylists']['uploads']
# Step 3: Fetch recent videos
print(f"\n🎥 Fetching recent videos...")
videos_response = youtube.playlistItems().list(
part='snippet,contentDetails',
playlistId=uploads_id,
maxResults=5
).execute()
video_ids = []
for item in videos_response.get('items', []):
video_ids.append(item['contentDetails']['videoId'])
# Step 4: Get detailed video information
if video_ids:
videos_detail = youtube.videos().list(
part='snippet,statistics,contentDetails',
id=','.join(video_ids)
).execute()
print(f"Found {len(videos_detail.get('items', []))} videos")
print("-" * 60)
for i, video in enumerate(videos_detail.get('items', [])[:3], 1):
video_id = video['id']
snippet = video['snippet']
stats = video['statistics']
print(f"\n📹 Video {i}: {snippet['title']}")
print(f" ID: {video_id}")
print(f" Published: {snippet['publishedAt']}")
print(f" Duration: {video['contentDetails']['duration']}")
# Full description (untruncated)
full_description = snippet.get('description', '')
print(f" Description Length: {len(full_description)} chars")
print(f" Description Preview: {full_description[:200]}...")
# Statistics
print(f" 📈 Stats:")
print(f" - Views: {int(stats.get('viewCount', 0)):,}")
print(f" - Likes: {int(stats.get('likeCount', 0)):,}")
print(f" - Comments: {int(stats.get('commentCount', 0)):,}")
# Tags
tags = snippet.get('tags', [])
if tags:
print(f" 🏷️ Tags: {', '.join(tags[:5])}")
# Try to get transcript
print(f" 📝 Transcript: ", end="")
try:
# Create API instance and fetch transcript
api = YouTubeTranscriptApi()
segments = api.fetch(video_id)
if segments:
print(f"Available ({len(segments)} segments)")
# Show first 200 chars of transcript
full_text = ' '.join([seg['text'] for seg in segments[:10]])
print(f" Preview: {full_text[:150]}...")
else:
print("No transcript available")
except Exception as e:
print(f"Error fetching transcript: {e}")
# Step 5: Check API quota usage
print("\n" + "=" * 60)
print("📊 API Usage Notes:")
print(" - Search: 100 quota units")
print(" - Channel details: 1 quota unit")
print(" - Playlist items: 1 quota unit")
print(" - Video details: 1 quota unit")
print(" - Total used in this test: ~104 units")
print(" - Daily quota: 10,000 units")
print(" - Can fetch ~2,500 videos per day with full details")
except HttpError as e:
print(f"❌ YouTube API error: {e}")
error_detail = json.loads(e.content)
print(f" Error details: {error_detail.get('error', {}).get('message', 'Unknown error')}")
except Exception as e:
print(f"❌ Error: {e}")
print("\n" + "=" * 60)
print("YouTube API test complete!")
if __name__ == "__main__":
test_youtube_api()