hvac-kia-content/test_slow_delays.py
Ben Reed daab901e35 refactor: Update naming convention from hvacknowitall to hkia
Major Changes:
- Updated all code references from hvacknowitall/hvacnkowitall to hkia
- Renamed all existing markdown files to use hkia_ prefix
- Updated configuration files, scrapers, and production scripts
- Modified systemd service descriptions to use HKIA
- Changed NAS sync path to /mnt/nas/hkia

Files Updated:
- 20+ source files updated with new naming convention
- 34 markdown files renamed to hkia_* format
- All ScraperConfig brand_name parameters now use 'hkia'
- Documentation updated to reflect new naming

Rationale:
- Shorter, cleaner filenames
- Consistent branding across all outputs
- Easier to type and reference
- Maintains same functionality with improved naming

Next Steps:
- Deploy updated services to production
- Update any external references to old naming
- Monitor scrapers to ensure proper operation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-19 13:35:23 -03:00

91 lines
No EOL
3.1 KiB
Python

#!/usr/bin/env python3
"""
Test the slow delay system with 5 videos including transcripts
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from src.base_scraper import ScraperConfig
from src.youtube_scraper import YouTubeScraper
import time
def test_slow_delays():
"""Test slow delays with 5 videos"""
print("🧪 Testing slow delay system with 5 videos + transcripts")
print("This should take 5-10 minutes with extended delays")
print("=" * 60)
config = ScraperConfig(
source_name="youtube_slow_test",
brand_name="hvacknowitall",
data_dir=Path("test_data/slow_delays"),
logs_dir=Path("test_logs/slow_delays"),
timezone="America/Halifax"
)
scraper = YouTubeScraper(config)
start_time = time.time()
# Fetch 5 videos with transcripts (this will use normal delays since max_posts is specified)
print("Testing normal delays (max_posts=5)...")
videos_normal = scraper.fetch_content(max_posts=5, fetch_transcripts=True)
normal_duration = time.time() - start_time
print(f"Normal mode: {len(videos_normal)} videos in {normal_duration:.1f} seconds")
# Now test without max_posts to trigger backlog mode delays
print(f"\nWaiting 2 minutes before testing backlog delays...")
time.sleep(120)
# Create new scraper instance for backlog test
config2 = ScraperConfig(
source_name="youtube_backlog_test",
brand_name="hvacknowitall",
data_dir=Path("test_data/backlog_delays"),
logs_dir=Path("test_logs/backlog_delays"),
timezone="America/Halifax"
)
scraper2 = YouTubeScraper(config2)
# Manually test just 2 videos in backlog mode
print("Testing backlog delays (simulating full backlog mode)...")
start_backlog = time.time()
# Get video list first
video_list = scraper2.fetch_channel_videos(max_videos=2)
backlog_videos = []
for i, video in enumerate(video_list):
video_id = video.get('id')
print(f"Processing video {i+1}/2: {video_id}")
if i > 0:
# Test the backlog delay
scraper2._backlog_delay(transcript_mode=True)
detailed_info = scraper2.fetch_video_details(video_id, fetch_transcript=True)
if detailed_info:
backlog_videos.append(detailed_info)
backlog_duration = time.time() - start_backlog
print(f"\nResults:")
print(f"Normal mode (5 videos): {normal_duration:.1f} seconds ({normal_duration/len(videos_normal):.1f}s per video)")
print(f"Backlog mode (2 videos): {backlog_duration:.1f} seconds ({backlog_duration/len(backlog_videos):.1f}s per video)")
# Count transcripts
normal_transcripts = sum(1 for v in videos_normal if v.get('transcript'))
backlog_transcripts = sum(1 for v in backlog_videos if v.get('transcript'))
print(f"Transcripts:")
print(f" Normal mode: {normal_transcripts}/{len(videos_normal)}")
print(f" Backlog mode: {backlog_transcripts}/{len(backlog_videos)}")
return True
if __name__ == "__main__":
test_slow_delays()