hvac-kia-content/test_image_downloads.py
Ben Reed daab901e35 refactor: Update naming convention from hvacknowitall to hkia
Major Changes:
- Updated all code references from hvacknowitall/hvacnkowitall to hkia
- Renamed all existing markdown files to use hkia_ prefix
- Updated configuration files, scrapers, and production scripts
- Modified systemd service descriptions to use HKIA
- Changed NAS sync path to /mnt/nas/hkia

Files Updated:
- 20+ source files updated with new naming convention
- 34 markdown files renamed to hkia_* format
- All ScraperConfig brand_name parameters now use 'hkia'
- Documentation updated to reflect new naming

Rationale:
- Shorter, cleaner filenames
- Consistent branding across all outputs
- Easier to type and reference
- Maintains same functionality with improved naming

Next Steps:
- Deploy updated services to production
- Update any external references to old naming
- Monitor scrapers to ensure proper operation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-19 13:35:23 -03:00

280 lines
No EOL
9.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Test script to verify image downloading functionality.
Tests each scraper with a small number of items.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from src.youtube_api_scraper_with_thumbnails import YouTubeAPIScraperWithThumbnails
from src.instagram_scraper_with_images import InstagramScraperWithImages
from src.rss_scraper_with_images import RSSScraperPodcastWithImages
from src.base_scraper import ScraperConfig
from datetime import datetime
import pytz
import os
from dotenv import load_dotenv
# Load environment
load_dotenv()
def test_youtube_thumbnails():
"""Test YouTube thumbnail downloads."""
print("\n" + "=" * 60)
print("TESTING YOUTUBE THUMBNAIL DOWNLOADS")
print("=" * 60)
config = ScraperConfig(
source_name='YouTube_Test',
brand_name='hvacnkowitall',
data_dir=Path('test_data/images'),
logs_dir=Path('test_logs'),
timezone='America/Halifax'
)
try:
scraper = YouTubeAPIScraperWithThumbnails(config)
print("Fetching 3 YouTube videos with thumbnails...")
videos = scraper.fetch_content(max_posts=3)
if videos:
print(f"✅ Fetched {len(videos)} videos")
# Check thumbnails
for video in videos:
if video.get('local_thumbnail'):
thumb_path = Path(video['local_thumbnail'])
if thumb_path.exists():
size_kb = thumb_path.stat().st_size / 1024
print(f"{video['title'][:50]}...")
print(f" Thumbnail: {thumb_path.name} ({size_kb:.1f} KB)")
else:
print(f"{video['title'][:50]}... - thumbnail file missing")
else:
print(f"{video['title'][:50]}... - no thumbnail downloaded")
# Save sample markdown
markdown = scraper.format_markdown(videos)
output_file = Path('test_data/images/youtube_test.md')
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(markdown, encoding='utf-8')
print(f"\nMarkdown saved to: {output_file}")
return True
else:
print("❌ No videos fetched")
return False
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
return False
def test_instagram_images():
"""Test Instagram image downloads."""
print("\n" + "=" * 60)
print("TESTING INSTAGRAM IMAGE DOWNLOADS")
print("=" * 60)
if not os.getenv('INSTAGRAM_USERNAME'):
print("⚠️ Instagram not configured - skipping")
return False
config = ScraperConfig(
source_name='Instagram_Test',
brand_name='hvacnkowitall',
data_dir=Path('test_data/images'),
logs_dir=Path('test_logs'),
timezone='America/Halifax'
)
try:
scraper = InstagramScraperWithImages(config)
print("Fetching 3 Instagram posts with images...")
items = scraper.fetch_content(max_posts=3)
if items:
print(f"✅ Fetched {len(items)} posts")
# Check images
total_images = 0
for item in items:
images = item.get('local_images', [])
total_images += len(images)
if images:
print(f" ✓ Post {item['id']}: {len(images)} image(s)")
for img_path in images:
path = Path(img_path)
if path.exists():
size_kb = path.stat().st_size / 1024
print(f" - {path.name} ({size_kb:.1f} KB)")
else:
if item.get('is_video'):
print(f" Post {item['id']}: Video post (thumbnail only)")
else:
print(f" ✗ Post {item['id']}: No images downloaded")
print(f"\nTotal images downloaded: {total_images}")
# Save sample markdown
markdown = scraper.format_markdown(items)
output_file = Path('test_data/images/instagram_test.md')
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(markdown, encoding='utf-8')
print(f"Markdown saved to: {output_file}")
return True
else:
print("❌ No posts fetched")
return False
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
return False
def test_podcast_thumbnails():
"""Test Podcast thumbnail downloads."""
print("\n" + "=" * 60)
print("TESTING PODCAST THUMBNAIL DOWNLOADS")
print("=" * 60)
if not os.getenv('PODCAST_RSS_URL'):
print("⚠️ Podcast not configured - skipping")
return False
config = ScraperConfig(
source_name='Podcast_Test',
brand_name='hvacnkowitall',
data_dir=Path('test_data/images'),
logs_dir=Path('test_logs'),
timezone='America/Halifax'
)
try:
scraper = RSSScraperPodcastWithImages(config)
print("Fetching 3 podcast episodes with thumbnails...")
items = scraper.fetch_content(max_items=3)
if items:
print(f"✅ Fetched {len(items)} episodes")
# Check thumbnails
for item in items:
title = item.get('title', 'Unknown')[:50]
if item.get('local_thumbnail'):
thumb_path = Path(item['local_thumbnail'])
if thumb_path.exists():
size_kb = thumb_path.stat().st_size / 1024
print(f"{title}...")
print(f" Thumbnail: {thumb_path.name} ({size_kb:.1f} KB)")
else:
print(f"{title}... - thumbnail file missing")
else:
print(f"{title}... - no thumbnail downloaded")
# Save sample markdown
markdown = scraper.format_markdown(items)
output_file = Path('test_data/images/podcast_test.md')
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(markdown, encoding='utf-8')
print(f"\nMarkdown saved to: {output_file}")
return True
else:
print("❌ No episodes fetched")
return False
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
return False
def check_media_directories():
"""Check media directory structure."""
print("\n" + "=" * 60)
print("MEDIA DIRECTORY STRUCTURE")
print("=" * 60)
test_media = Path('test_data/images/media')
if test_media.exists():
print(f"Media directory: {test_media}")
for source_dir in sorted(test_media.glob('*')):
if source_dir.is_dir():
images = list(source_dir.glob('*.jpg')) + \
list(source_dir.glob('*.jpeg')) + \
list(source_dir.glob('*.png')) + \
list(source_dir.glob('*.gif'))
if images:
total_size = sum(img.stat().st_size for img in images) / (1024 * 1024) # MB
print(f" {source_dir.name}/: {len(images)} images ({total_size:.1f} MB)")
# Show first 3 images
for img in images[:3]:
size_kb = img.stat().st_size / 1024
print(f" - {img.name} ({size_kb:.1f} KB)")
if len(images) > 3:
print(f" ... and {len(images) - 3} more")
else:
print("No test media directory found")
def main():
"""Run all tests."""
print("=" * 70)
print("TESTING IMAGE DOWNLOAD FUNCTIONALITY")
print("=" * 70)
print("This will test downloading thumbnails and images from all sources")
print("(YouTube thumbnails, Instagram images, Podcast thumbnails)")
print()
results = {}
# Test YouTube
results['YouTube'] = test_youtube_thumbnails()
# Test Instagram
results['Instagram'] = test_instagram_images()
# Test Podcast
results['Podcast'] = test_podcast_thumbnails()
# Check media directories
check_media_directories()
# Summary
print("\n" + "=" * 60)
print("TEST SUMMARY")
print("=" * 60)
for source, success in results.items():
status = "✅ PASSED" if success else "❌ FAILED"
print(f"{source:15} {status}")
passed = sum(1 for s in results.values() if s)
total = len(results)
print(f"\nTotal: {passed}/{total} passed")
if passed == total:
print("\n✅ All tests passed! Ready for production.")
else:
print("\n⚠️ Some tests failed. Check the errors above.")
if __name__ == "__main__":
main()