Major Changes: - Updated all code references from hvacknowitall/hvacnkowitall to hkia - Renamed all existing markdown files to use hkia_ prefix - Updated configuration files, scrapers, and production scripts - Modified systemd service descriptions to use HKIA - Changed NAS sync path to /mnt/nas/hkia Files Updated: - 20+ source files updated with new naming convention - 34 markdown files renamed to hkia_* format - All ScraperConfig brand_name parameters now use 'hkia' - Documentation updated to reflect new naming Rationale: - Shorter, cleaner filenames - Consistent branding across all outputs - Easier to type and reference - Maintains same functionality with improved naming Next Steps: - Deploy updated services to production - Update any external references to old naming - Monitor scrapers to ensure proper operation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
177 lines
No EOL
6.7 KiB
Python
177 lines
No EOL
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Proof of concept for YouTube Data API v3 integration
|
|
Fetches video details, statistics, and transcripts
|
|
"""
|
|
|
|
import os
|
|
from googleapiclient.discovery import build
|
|
from googleapiclient.errors import HttpError
|
|
from youtube_transcript_api import YouTubeTranscriptApi
|
|
from dotenv import load_dotenv
|
|
import json
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
def test_youtube_api():
|
|
"""Test YouTube API connection and fetch video details"""
|
|
|
|
api_key = os.getenv('YOUTUBE_API_KEY')
|
|
channel_url = os.getenv('YOUTUBE_CHANNEL_URL', 'https://www.youtube.com/@HVACKnowItAll')
|
|
|
|
if not api_key:
|
|
print("❌ No YouTube API key found in .env")
|
|
return
|
|
|
|
print("🔍 Testing YouTube Data API v3...")
|
|
print(f"Channel: {channel_url}")
|
|
print("-" * 60)
|
|
|
|
try:
|
|
# Build YouTube API client
|
|
youtube = build('youtube', 'v3', developerKey=api_key)
|
|
|
|
# Extract channel handle from URL
|
|
channel_handle = channel_url.split('@')[-1]
|
|
print(f"Channel handle: @{channel_handle}")
|
|
|
|
# Step 1: Get channel ID from handle or search by name
|
|
print("\n📡 Fetching channel information...")
|
|
|
|
# Try direct channel lookup first
|
|
channel_response = youtube.channels().list(
|
|
part='snippet,statistics,contentDetails',
|
|
forHandle=channel_handle
|
|
).execute()
|
|
|
|
if not channel_response.get('items'):
|
|
# Fallback to search
|
|
search_response = youtube.search().list(
|
|
part='snippet',
|
|
q="HVAC Know It All",
|
|
type='channel',
|
|
maxResults=1
|
|
).execute()
|
|
|
|
if not search_response.get('items'):
|
|
print("❌ Channel not found")
|
|
return
|
|
|
|
channel_id = search_response['items'][0]['snippet']['channelId']
|
|
|
|
# Get full channel details
|
|
channel_response = youtube.channels().list(
|
|
part='snippet,statistics,contentDetails',
|
|
id=channel_id
|
|
).execute()
|
|
|
|
if not channel_response.get('items'):
|
|
print("❌ Channel not found")
|
|
return
|
|
|
|
channel_data = channel_response['items'][0]
|
|
channel_id = channel_data['id']
|
|
channel_title = channel_data['snippet']['title']
|
|
print(f"✅ Found channel: {channel_title}")
|
|
print(f" Channel ID: {channel_id}")
|
|
|
|
# Step 2: Get channel statistics
|
|
stats = channel_data['statistics']
|
|
print(f"\n📊 Channel Statistics:")
|
|
print(f" - Subscribers: {int(stats.get('subscriberCount', 0)):,}")
|
|
print(f" - Total Views: {int(stats.get('viewCount', 0)):,}")
|
|
print(f" - Video Count: {int(stats.get('videoCount', 0)):,}")
|
|
|
|
# Get uploads playlist ID
|
|
uploads_id = channel_data['contentDetails']['relatedPlaylists']['uploads']
|
|
|
|
# Step 3: Fetch recent videos
|
|
print(f"\n🎥 Fetching recent videos...")
|
|
videos_response = youtube.playlistItems().list(
|
|
part='snippet,contentDetails',
|
|
playlistId=uploads_id,
|
|
maxResults=5
|
|
).execute()
|
|
|
|
video_ids = []
|
|
for item in videos_response.get('items', []):
|
|
video_ids.append(item['contentDetails']['videoId'])
|
|
|
|
# Step 4: Get detailed video information
|
|
if video_ids:
|
|
videos_detail = youtube.videos().list(
|
|
part='snippet,statistics,contentDetails',
|
|
id=','.join(video_ids)
|
|
).execute()
|
|
|
|
print(f"Found {len(videos_detail.get('items', []))} videos")
|
|
print("-" * 60)
|
|
|
|
for i, video in enumerate(videos_detail.get('items', [])[:3], 1):
|
|
video_id = video['id']
|
|
snippet = video['snippet']
|
|
stats = video['statistics']
|
|
|
|
print(f"\n📹 Video {i}: {snippet['title']}")
|
|
print(f" ID: {video_id}")
|
|
print(f" Published: {snippet['publishedAt']}")
|
|
print(f" Duration: {video['contentDetails']['duration']}")
|
|
|
|
# Full description (untruncated)
|
|
full_description = snippet.get('description', '')
|
|
print(f" Description Length: {len(full_description)} chars")
|
|
print(f" Description Preview: {full_description[:200]}...")
|
|
|
|
# Statistics
|
|
print(f" 📈 Stats:")
|
|
print(f" - Views: {int(stats.get('viewCount', 0)):,}")
|
|
print(f" - Likes: {int(stats.get('likeCount', 0)):,}")
|
|
print(f" - Comments: {int(stats.get('commentCount', 0)):,}")
|
|
|
|
# Tags
|
|
tags = snippet.get('tags', [])
|
|
if tags:
|
|
print(f" 🏷️ Tags: {', '.join(tags[:5])}")
|
|
|
|
# Try to get transcript
|
|
print(f" 📝 Transcript: ", end="")
|
|
try:
|
|
# Create API instance and fetch transcript
|
|
api = YouTubeTranscriptApi()
|
|
segments = api.fetch(video_id)
|
|
|
|
if segments:
|
|
print(f"Available ({len(segments)} segments)")
|
|
# Show first 200 chars of transcript
|
|
full_text = ' '.join([seg['text'] for seg in segments[:10]])
|
|
print(f" Preview: {full_text[:150]}...")
|
|
else:
|
|
print("No transcript available")
|
|
|
|
except Exception as e:
|
|
print(f"Error fetching transcript: {e}")
|
|
|
|
# Step 5: Check API quota usage
|
|
print("\n" + "=" * 60)
|
|
print("📊 API Usage Notes:")
|
|
print(" - Search: 100 quota units")
|
|
print(" - Channel details: 1 quota unit")
|
|
print(" - Playlist items: 1 quota unit")
|
|
print(" - Video details: 1 quota unit")
|
|
print(" - Total used in this test: ~104 units")
|
|
print(" - Daily quota: 10,000 units")
|
|
print(" - Can fetch ~2,500 videos per day with full details")
|
|
|
|
except HttpError as e:
|
|
print(f"❌ YouTube API error: {e}")
|
|
error_detail = json.loads(e.content)
|
|
print(f" Error details: {error_detail.get('error', {}).get('message', 'Unknown error')}")
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("YouTube API test complete!")
|
|
|
|
if __name__ == "__main__":
|
|
test_youtube_api() |