hvac-kia-content/test_youtube_api.py

#!/usr/bin/env python3
"""
Proof of concept for YouTube Data API v3 integration
Fetches video details, statistics, and transcripts
"""

import os
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()

def test_youtube_api():
    """Test YouTube API connection and fetch video details"""

    api_key = os.getenv('YOUTUBE_API_KEY')
    channel_url = os.getenv('YOUTUBE_CHANNEL_URL', 'https://www.youtube.com/@HVACKnowItAll')

    if not api_key:
        print("❌ No YouTube API key found in .env")
        return

    print("🔍 Testing YouTube Data API v3...")
    print(f"Channel: {channel_url}")
    print("-" * 60)

    try:
        # Build YouTube API client
        youtube = build('youtube', 'v3', developerKey=api_key)

        # Extract channel handle from URL
        channel_handle = channel_url.split('@')[-1]
        print(f"Channel handle: @{channel_handle}")

        # Step 1: Get channel ID from handle or search by name
        print("\n📡 Fetching channel information...")

        # Try direct channel lookup first
        channel_response = youtube.channels().list(
            part='snippet,statistics,contentDetails',
            forHandle=channel_handle
        ).execute()

        if not channel_response.get('items'):
            # Fallback to search
            search_response = youtube.search().list(
                part='snippet',
                q="HVAC Know It All",
                type='channel',
                maxResults=1
            ).execute()

            if not search_response.get('items'):
                print("❌ Channel not found")
                return

            channel_id = search_response['items'][0]['snippet']['channelId']

            # Get full channel details
            channel_response = youtube.channels().list(
                part='snippet,statistics,contentDetails',
                id=channel_id
            ).execute()

        if not channel_response.get('items'):
            print("❌ Channel not found")
            return

        channel_data = channel_response['items'][0]
        channel_id = channel_data['id']
        channel_title = channel_data['snippet']['title']
        print(f"✅ Found channel: {channel_title}")
        print(f"   Channel ID: {channel_id}")

        # Step 2: Get channel statistics
        stats = channel_data['statistics']
        print(f"\n📊 Channel Statistics:")
        print(f"   - Subscribers: {int(stats.get('subscriberCount', 0)):,}")
        print(f"   - Total Views: {int(stats.get('viewCount', 0)):,}")
        print(f"   - Video Count: {int(stats.get('videoCount', 0)):,}")

        # Get uploads playlist ID
        uploads_id = channel_data['contentDetails']['relatedPlaylists']['uploads']

        # Step 3: Fetch recent videos
        print(f"\n🎥 Fetching recent videos...")
        videos_response = youtube.playlistItems().list(
            part='snippet,contentDetails',
            playlistId=uploads_id,
            maxResults=5
        ).execute()

        video_ids = []
        for item in videos_response.get('items', []):
            video_ids.append(item['contentDetails']['videoId'])

        # Step 4: Get detailed video information
        if video_ids:
            videos_detail = youtube.videos().list(
                part='snippet,statistics,contentDetails',
                id=','.join(video_ids)
            ).execute()

            print(f"Found {len(videos_detail.get('items', []))} videos")
            print("-" * 60)

            for i, video in enumerate(videos_detail.get('items', [])[:3], 1):
                video_id = video['id']
                snippet = video['snippet']
                stats = video['statistics']

                print(f"\n📹 Video {i}: {snippet['title']}")
                print(f"   ID: {video_id}")
                print(f"   Published: {snippet['publishedAt']}")
                print(f"   Duration: {video['contentDetails']['duration']}")

                # Full description (untruncated)
                full_description = snippet.get('description', '')
                print(f"   Description Length: {len(full_description)} chars")
                print(f"   Description Preview: {full_description[:200]}...")

                # Statistics
                print(f"   📈 Stats:")
                print(f"      - Views: {int(stats.get('viewCount', 0)):,}")
                print(f"      - Likes: {int(stats.get('likeCount', 0)):,}")
                print(f"      - Comments: {int(stats.get('commentCount', 0)):,}")

                # Tags
                tags = snippet.get('tags', [])
                if tags:
                    print(f"   🏷️ Tags: {', '.join(tags[:5])}")

                # Try to get transcript
                print(f"   📝 Transcript: ", end="")
                try:
                    # Create API instance and fetch transcript
                    api = YouTubeTranscriptApi()
                    segments = api.fetch(video_id)

                    if segments:
                        print(f"Available ({len(segments)} segments)")
                        # Show first 200 chars of transcript
                        full_text = ' '.join([seg['text'] for seg in segments[:10]])
                        print(f"      Preview: {full_text[:150]}...")
                    else:
                        print("No transcript available")

                except Exception as e:
                    print(f"Error fetching transcript: {e}")

        # Step 5: Check API quota usage
        print("\n" + "=" * 60)
        print("📊 API Usage Notes:")
        print("   - Search: 100 quota units")
        print("   - Channel details: 1 quota unit")
        print("   - Playlist items: 1 quota unit")
        print("   - Video details: 1 quota unit")
        print("   - Total used in this test: ~104 units")
        print("   - Daily quota: 10,000 units")
        print("   - Can fetch ~2,500 videos per day with full details")

    except HttpError as e:
        print(f"❌ YouTube API error: {e}")
        error_detail = json.loads(e.content)
        print(f"   Error details: {error_detail.get('error', {}).get('message', 'Unknown error')}")
    except Exception as e:
        print(f"❌ Error: {e}")

    print("\n" + "=" * 60)
    print("YouTube API test complete!")

if __name__ == "__main__":
    test_youtube_api()