Major Changes: - Updated all code references from hvacknowitall/hvacnkowitall to hkia - Renamed all existing markdown files to use hkia_ prefix - Updated configuration files, scrapers, and production scripts - Modified systemd service descriptions to use HKIA - Changed NAS sync path to /mnt/nas/hkia Files Updated: - 20+ source files updated with new naming convention - 34 markdown files renamed to hkia_* format - All ScraperConfig brand_name parameters now use 'hkia' - Documentation updated to reflect new naming Rationale: - Shorter, cleaner filenames - Consistent branding across all outputs - Easier to type and reference - Maintains same functionality with improved naming Next Steps: - Deploy updated services to production - Update any external references to old naming - Monitor scrapers to ensure proper operation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
91 lines
No EOL
3.1 KiB
Python
91 lines
No EOL
3.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test the slow delay system with 5 videos including transcripts
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from src.base_scraper import ScraperConfig
|
|
from src.youtube_scraper import YouTubeScraper
|
|
import time
|
|
|
|
def test_slow_delays():
|
|
"""Test slow delays with 5 videos"""
|
|
print("🧪 Testing slow delay system with 5 videos + transcripts")
|
|
print("This should take 5-10 minutes with extended delays")
|
|
print("=" * 60)
|
|
|
|
config = ScraperConfig(
|
|
source_name="youtube_slow_test",
|
|
brand_name="hvacknowitall",
|
|
data_dir=Path("test_data/slow_delays"),
|
|
logs_dir=Path("test_logs/slow_delays"),
|
|
timezone="America/Halifax"
|
|
)
|
|
|
|
scraper = YouTubeScraper(config)
|
|
|
|
start_time = time.time()
|
|
|
|
# Fetch 5 videos with transcripts (this will use normal delays since max_posts is specified)
|
|
print("Testing normal delays (max_posts=5)...")
|
|
videos_normal = scraper.fetch_content(max_posts=5, fetch_transcripts=True)
|
|
|
|
normal_duration = time.time() - start_time
|
|
print(f"Normal mode: {len(videos_normal)} videos in {normal_duration:.1f} seconds")
|
|
|
|
# Now test without max_posts to trigger backlog mode delays
|
|
print(f"\nWaiting 2 minutes before testing backlog delays...")
|
|
time.sleep(120)
|
|
|
|
# Create new scraper instance for backlog test
|
|
config2 = ScraperConfig(
|
|
source_name="youtube_backlog_test",
|
|
brand_name="hvacknowitall",
|
|
data_dir=Path("test_data/backlog_delays"),
|
|
logs_dir=Path("test_logs/backlog_delays"),
|
|
timezone="America/Halifax"
|
|
)
|
|
|
|
scraper2 = YouTubeScraper(config2)
|
|
|
|
# Manually test just 2 videos in backlog mode
|
|
print("Testing backlog delays (simulating full backlog mode)...")
|
|
start_backlog = time.time()
|
|
|
|
# Get video list first
|
|
video_list = scraper2.fetch_channel_videos(max_videos=2)
|
|
backlog_videos = []
|
|
|
|
for i, video in enumerate(video_list):
|
|
video_id = video.get('id')
|
|
print(f"Processing video {i+1}/2: {video_id}")
|
|
|
|
if i > 0:
|
|
# Test the backlog delay
|
|
scraper2._backlog_delay(transcript_mode=True)
|
|
|
|
detailed_info = scraper2.fetch_video_details(video_id, fetch_transcript=True)
|
|
if detailed_info:
|
|
backlog_videos.append(detailed_info)
|
|
|
|
backlog_duration = time.time() - start_backlog
|
|
|
|
print(f"\nResults:")
|
|
print(f"Normal mode (5 videos): {normal_duration:.1f} seconds ({normal_duration/len(videos_normal):.1f}s per video)")
|
|
print(f"Backlog mode (2 videos): {backlog_duration:.1f} seconds ({backlog_duration/len(backlog_videos):.1f}s per video)")
|
|
|
|
# Count transcripts
|
|
normal_transcripts = sum(1 for v in videos_normal if v.get('transcript'))
|
|
backlog_transcripts = sum(1 for v in backlog_videos if v.get('transcript'))
|
|
|
|
print(f"Transcripts:")
|
|
print(f" Normal mode: {normal_transcripts}/{len(videos_normal)}")
|
|
print(f" Backlog mode: {backlog_transcripts}/{len(backlog_videos)}")
|
|
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
test_slow_delays() |