feat: Implement YouTube scraper with humanized behavior
- YouTube channel scraper using yt-dlp
- Authentication and session persistence via cookies
- Humanized delays and rate limiting (2-5 seconds between requests)
- User agent rotation for stealth
- Incremental updates via state management
- Support for videos, shorts, and live streams detection
- All 11 tests passing
🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
			
			
This commit is contained in:
		
							parent
							
								
									7191fcd132
								
							
						
					
					
						commit
						c1831d3a52
					
				
					 2 changed files with 532 additions and 0 deletions
				
			
		
							
								
								
									
										299
									
								
								src/youtube_scraper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										299
									
								
								src/youtube_scraper.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,299 @@ | |||
| import os | ||||
| import time | ||||
| import random | ||||
| import json | ||||
| from typing import Any, Dict, List, Optional | ||||
| from datetime import datetime | ||||
| from pathlib import Path | ||||
| import yt_dlp | ||||
| from src.base_scraper import BaseScraper, ScraperConfig | ||||
| 
 | ||||
| 
 | ||||
| class YouTubeScraper(BaseScraper): | ||||
|     """YouTube channel scraper using yt-dlp.""" | ||||
|      | ||||
|     def __init__(self, config: ScraperConfig): | ||||
|         super().__init__(config) | ||||
|         self.username = os.getenv('YOUTUBE_USERNAME') | ||||
|         self.password = os.getenv('YOUTUBE_PASSWORD') | ||||
|         self.channel_url = os.getenv('YOUTUBE_CHANNEL_URL', 'https://www.youtube.com/@HVACKnowItAll') | ||||
|          | ||||
|         # Cookies file for session persistence | ||||
|         self.cookies_file = self.config.data_dir / '.cookies' / 'youtube_cookies.txt' | ||||
|         self.cookies_file.parent.mkdir(parents=True, exist_ok=True) | ||||
|          | ||||
|         # User agents for rotation | ||||
|         self.user_agents = [ | ||||
|             'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | ||||
|             'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | ||||
|             'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' | ||||
|         ] | ||||
| 
 | ||||
|     def _get_ydl_options(self) -> Dict[str, Any]: | ||||
|         """Get yt-dlp options with authentication and rate limiting.""" | ||||
|         options = { | ||||
|             'quiet': True, | ||||
|             'no_warnings': True, | ||||
|             'extract_flat': False,  # Get full video info | ||||
|             'ignoreerrors': True,  # Continue on error | ||||
|             'cookiefile': str(self.cookies_file), | ||||
|             'cookiesfrombrowser': None,  # Don't use browser cookies | ||||
|             'username': self.username, | ||||
|             'password': self.password, | ||||
|             'ratelimit': 100000,  # 100KB/s rate limit | ||||
|             'sleep_interval': 1,  # Sleep between downloads | ||||
|             'max_sleep_interval': 3, | ||||
|             'user_agent': random.choice(self.user_agents), | ||||
|             'referer': 'https://www.youtube.com/', | ||||
|             'add_header': ['Accept-Language:en-US,en;q=0.9'], | ||||
|         } | ||||
|          | ||||
|         # Add proxy if configured | ||||
|         proxy = os.getenv('YOUTUBE_PROXY') | ||||
|         if proxy: | ||||
|             options['proxy'] = proxy | ||||
|          | ||||
|         return options | ||||
| 
 | ||||
|     def _humanized_delay(self, min_seconds: float = 2, max_seconds: float = 5) -> None: | ||||
|         """Add humanized random delay between requests.""" | ||||
|         delay = random.uniform(min_seconds, max_seconds) | ||||
|         self.logger.debug(f"Waiting {delay:.2f} seconds...") | ||||
|         time.sleep(delay) | ||||
| 
 | ||||
|     def fetch_channel_videos(self, max_videos: int = 50) -> List[Dict[str, Any]]: | ||||
|         """Fetch video list from YouTube channel.""" | ||||
|         videos = [] | ||||
|          | ||||
|         try: | ||||
|             self.logger.info(f"Fetching videos from channel: {self.channel_url}") | ||||
|              | ||||
|             ydl_opts = self._get_ydl_options() | ||||
|             ydl_opts['extract_flat'] = True  # Just get video list, not full info | ||||
|             ydl_opts['playlistend'] = max_videos | ||||
|              | ||||
|             with yt_dlp.YoutubeDL(ydl_opts) as ydl: | ||||
|                 channel_info = ydl.extract_info(self.channel_url, download=False) | ||||
|                  | ||||
|                 if 'entries' in channel_info: | ||||
|                     videos = list(channel_info['entries']) | ||||
|                     self.logger.info(f"Found {len(videos)} videos in channel") | ||||
|                 else: | ||||
|                     self.logger.warning("No entries found in channel info") | ||||
|              | ||||
|             # Save cookies for next session | ||||
|             if self.cookies_file.exists(): | ||||
|                 self.logger.debug("Cookies saved for next session") | ||||
|                  | ||||
|         except Exception as e: | ||||
|             self.logger.error(f"Error fetching channel videos: {e}") | ||||
|          | ||||
|         return videos | ||||
| 
 | ||||
|     def fetch_video_details(self, video_id: str) -> Optional[Dict[str, Any]]: | ||||
|         """Fetch detailed information for a specific video.""" | ||||
|         try: | ||||
|             video_url = f"https://www.youtube.com/watch?v={video_id}" | ||||
|              | ||||
|             ydl_opts = self._get_ydl_options() | ||||
|             ydl_opts['extract_flat'] = False  # Get full video info | ||||
|              | ||||
|             with yt_dlp.YoutubeDL(ydl_opts) as ydl: | ||||
|                 video_info = ydl.extract_info(video_url, download=False) | ||||
|                 return video_info | ||||
|                  | ||||
|         except Exception as e: | ||||
|             self.logger.error(f"Error fetching video {video_id}: {e}") | ||||
|             return None | ||||
| 
 | ||||
|     def _get_video_type(self, video: Dict[str, Any]) -> str: | ||||
|         """Determine video type (video, short, live).""" | ||||
|         duration = video.get('duration', 0) | ||||
|         is_live = video.get('is_live', False) | ||||
|          | ||||
|         if is_live: | ||||
|             return 'live' | ||||
|         elif duration and duration < 60:  # Less than 60 seconds | ||||
|             return 'short' | ||||
|         else: | ||||
|             return 'video' | ||||
| 
 | ||||
|     def fetch_content(self) -> List[Dict[str, Any]]: | ||||
|         """Fetch and enrich video content with rate limiting.""" | ||||
|         # First get list of videos | ||||
|         videos = self.fetch_channel_videos() | ||||
|          | ||||
|         if not videos: | ||||
|             return [] | ||||
|          | ||||
|         # Enrich each video with detailed information | ||||
|         enriched_videos = [] | ||||
|          | ||||
|         for i, video in enumerate(videos): | ||||
|             try: | ||||
|                 video_id = video.get('id') | ||||
|                 if not video_id: | ||||
|                     continue | ||||
|                  | ||||
|                 self.logger.info(f"Fetching details for video {i+1}/{len(videos)}: {video_id}") | ||||
|                  | ||||
|                 # Add humanized delay between requests | ||||
|                 if i > 0: | ||||
|                     self._humanized_delay() | ||||
|                  | ||||
|                 # Fetch full video details | ||||
|                 detailed_info = self.fetch_video_details(video_id) | ||||
|                  | ||||
|                 if detailed_info: | ||||
|                     # Add video type | ||||
|                     detailed_info['type'] = self._get_video_type(detailed_info) | ||||
|                     enriched_videos.append(detailed_info) | ||||
|                      | ||||
|                     # Extra delay after every 5 videos | ||||
|                     if (i + 1) % 5 == 0: | ||||
|                         self.logger.info("Taking longer break after 5 videos...") | ||||
|                         self._humanized_delay(5, 10) | ||||
|                  | ||||
|             except Exception as e: | ||||
|                 self.logger.error(f"Error enriching video {video.get('id')}: {e}") | ||||
|                 continue | ||||
|          | ||||
|         self.logger.info(f"Successfully enriched {len(enriched_videos)} videos") | ||||
|         return enriched_videos | ||||
| 
 | ||||
|     def format_markdown(self, videos: List[Dict[str, Any]]) -> str: | ||||
|         """Format videos as markdown.""" | ||||
|         markdown_sections = [] | ||||
|          | ||||
|         for video in videos: | ||||
|             section = [] | ||||
|              | ||||
|             # ID | ||||
|             video_id = video.get('id', 'N/A') | ||||
|             section.append(f"# ID: {video_id}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Title | ||||
|             title = video.get('title', 'Untitled') | ||||
|             section.append(f"## Title: {title}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Type | ||||
|             video_type = video.get('type', self._get_video_type(video)) | ||||
|             section.append(f"## Type: {video_type}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Author/Uploader | ||||
|             author = video.get('uploader', 'Unknown') | ||||
|             section.append(f"## Author: {author}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Link | ||||
|             link = video.get('webpage_url', f"https://www.youtube.com/watch?v={video_id}") | ||||
|             section.append(f"## Link: {link}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Upload Date | ||||
|             upload_date = video.get('upload_date', '') | ||||
|             if upload_date and len(upload_date) == 8:  # YYYYMMDD format | ||||
|                 formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:]}" | ||||
|                 section.append(f"## Upload Date: {formatted_date}") | ||||
|             else: | ||||
|                 section.append(f"## Upload Date: {upload_date}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Views | ||||
|             view_count = video.get('view_count', 0) | ||||
|             section.append(f"## Views: {view_count}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Likes | ||||
|             like_count = video.get('like_count', 0) | ||||
|             section.append(f"## Likes: {like_count}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Comments | ||||
|             comment_count = video.get('comment_count', 0) | ||||
|             section.append(f"## Comments: {comment_count}") | ||||
|             section.append("") | ||||
|              | ||||
|             # Duration | ||||
|             duration = video.get('duration', 0) | ||||
|             section.append(f"## Duration: {duration} seconds") | ||||
|             section.append("") | ||||
|              | ||||
|             # Tags | ||||
|             tags = video.get('tags', []) | ||||
|             if tags: | ||||
|                 tags_str = ', '.join(tags[:10])  # Limit to first 10 tags | ||||
|                 section.append(f"## Tags: {tags_str}") | ||||
|                 section.append("") | ||||
|              | ||||
|             # Thumbnail | ||||
|             thumbnail = video.get('thumbnail', '') | ||||
|             if thumbnail: | ||||
|                 section.append(f"## Thumbnail: {thumbnail}") | ||||
|                 section.append("") | ||||
|              | ||||
|             # Description | ||||
|             section.append("## Description:") | ||||
|             description = video.get('description', '') | ||||
|             if description: | ||||
|                 # Limit description to first 500 characters | ||||
|                 if len(description) > 500: | ||||
|                     description = description[:500] + "..." | ||||
|                 section.append(description) | ||||
|             section.append("") | ||||
|              | ||||
|             # Separator | ||||
|             section.append("-" * 50) | ||||
|             section.append("") | ||||
|              | ||||
|             markdown_sections.append('\n'.join(section)) | ||||
|          | ||||
|         return '\n'.join(markdown_sections) | ||||
| 
 | ||||
|     def get_incremental_items(self, items: List[Dict[str, Any]], state: Dict[str, Any]) -> List[Dict[str, Any]]: | ||||
|         """Get only new videos since last sync.""" | ||||
|         if not state: | ||||
|             return items | ||||
|          | ||||
|         last_video_id = state.get('last_video_id') | ||||
|         last_video_date = state.get('last_video_date') | ||||
|          | ||||
|         if not last_video_id: | ||||
|             return items | ||||
|          | ||||
|         # Filter for videos newer than the last synced | ||||
|         new_items = [] | ||||
|         for item in items: | ||||
|             video_id = item.get('id') | ||||
|             upload_date = item.get('upload_date', '') | ||||
|              | ||||
|             # Check if this is a new video | ||||
|             if video_id == last_video_id: | ||||
|                 break  # Found the last synced video, stop here | ||||
|              | ||||
|             # Also check by date as backup | ||||
|             if upload_date and last_video_date and upload_date <= last_video_date: | ||||
|                 continue | ||||
|              | ||||
|             new_items.append(item) | ||||
|          | ||||
|         return new_items | ||||
| 
 | ||||
|     def update_state(self, state: Dict[str, Any], items: List[Dict[str, Any]]) -> Dict[str, Any]: | ||||
|         """Update state with latest video information.""" | ||||
|         if not items: | ||||
|             return state | ||||
|          | ||||
|         # Get the first item (most recent) | ||||
|         latest_item = items[0] | ||||
|          | ||||
|         state['last_video_id'] = latest_item.get('id') | ||||
|         state['last_video_date'] = latest_item.get('upload_date') | ||||
|         state['last_video_title'] = latest_item.get('title') | ||||
|         state['last_sync'] = datetime.now(self.tz).isoformat() | ||||
|         state['video_count'] = len(items) | ||||
|          | ||||
|         return state | ||||
							
								
								
									
										233
									
								
								tests/test_youtube_scraper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										233
									
								
								tests/test_youtube_scraper.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,233 @@ | |||
| import pytest | ||||
| from unittest.mock import Mock, patch, MagicMock, call | ||||
| from datetime import datetime | ||||
| from pathlib import Path | ||||
| import random | ||||
| from src.youtube_scraper import YouTubeScraper | ||||
| from src.base_scraper import ScraperConfig | ||||
| 
 | ||||
| 
 | ||||
| class TestYouTubeScraper: | ||||
|     @pytest.fixture | ||||
|     def config(self): | ||||
|         return ScraperConfig( | ||||
|             source_name="youtube", | ||||
|             brand_name="hvacknowitall", | ||||
|             data_dir=Path("data"), | ||||
|             logs_dir=Path("logs"), | ||||
|             timezone="America/Halifax" | ||||
|         ) | ||||
|      | ||||
|     @pytest.fixture | ||||
|     def mock_env(self): | ||||
|         with patch.dict('os.environ', { | ||||
|             'YOUTUBE_USERNAME': 'test@example.com', | ||||
|             'YOUTUBE_PASSWORD': 'test_password', | ||||
|             'YOUTUBE_CHANNEL_URL': 'https://www.youtube.com/@HVACKnowItAll' | ||||
|         }): | ||||
|             yield | ||||
| 
 | ||||
|     @pytest.fixture | ||||
|     def sample_video_info(self): | ||||
|         return { | ||||
|             'id': 'abc123', | ||||
|             'title': 'HVAC Maintenance Tips', | ||||
|             'description': 'Learn how to maintain your HVAC system', | ||||
|             'uploader': 'HVAC Know It All', | ||||
|             'upload_date': '20240101', | ||||
|             'view_count': 1500, | ||||
|             'like_count': 100, | ||||
|             'comment_count': 25, | ||||
|             'duration': 600, | ||||
|             'webpage_url': 'https://www.youtube.com/watch?v=abc123', | ||||
|             'thumbnail': 'https://i.ytimg.com/vi/abc123/maxresdefault.jpg', | ||||
|             'tags': ['hvac', 'maintenance', 'tips'] | ||||
|         } | ||||
| 
 | ||||
|     def test_initialization(self, config, mock_env): | ||||
|         scraper = YouTubeScraper(config) | ||||
|         assert scraper.config == config | ||||
|         assert scraper.username == 'test@example.com' | ||||
|         assert scraper.password == 'test_password' | ||||
|         assert scraper.channel_url == 'https://www.youtube.com/@HVACKnowItAll' | ||||
| 
 | ||||
|     @patch('yt_dlp.YoutubeDL') | ||||
|     def test_setup_ydl_options(self, mock_ydl_class, config, mock_env): | ||||
|         scraper = YouTubeScraper(config) | ||||
|         options = scraper._get_ydl_options() | ||||
|          | ||||
|         # Check key options | ||||
|         assert options['quiet'] == True | ||||
|         assert options['no_warnings'] == True | ||||
|         assert options['extract_flat'] == False | ||||
|         assert 'username' in options | ||||
|         assert 'password' in options | ||||
|         assert 'cookiefile' in options | ||||
|         assert 'ratelimit' in options | ||||
| 
 | ||||
|     @patch('yt_dlp.YoutubeDL') | ||||
|     def test_fetch_channel_videos(self, mock_ydl_class, config, mock_env, sample_video_info): | ||||
|         mock_ydl = MagicMock() | ||||
|         mock_ydl_class.return_value.__enter__.return_value = mock_ydl | ||||
|          | ||||
|         # Mock channel info with videos | ||||
|         mock_ydl.extract_info.return_value = { | ||||
|             'entries': [ | ||||
|                 sample_video_info, | ||||
|                 {**sample_video_info, 'id': 'def456', 'title': 'Another Video'} | ||||
|             ] | ||||
|         } | ||||
|          | ||||
|         scraper = YouTubeScraper(config) | ||||
|         videos = scraper.fetch_channel_videos() | ||||
|          | ||||
|         assert len(videos) == 2 | ||||
|         assert videos[0]['id'] == 'abc123' | ||||
|         assert videos[1]['id'] == 'def456' | ||||
|         mock_ydl.extract_info.assert_called_once() | ||||
| 
 | ||||
|     @patch('yt_dlp.YoutubeDL') | ||||
|     def test_fetch_video_details(self, mock_ydl_class, config, mock_env, sample_video_info): | ||||
|         mock_ydl = MagicMock() | ||||
|         mock_ydl_class.return_value.__enter__.return_value = mock_ydl | ||||
|         mock_ydl.extract_info.return_value = sample_video_info | ||||
|          | ||||
|         scraper = YouTubeScraper(config) | ||||
|         video_info = scraper.fetch_video_details('abc123') | ||||
|          | ||||
|         assert video_info['id'] == 'abc123' | ||||
|         assert video_info['title'] == 'HVAC Maintenance Tips' | ||||
|         mock_ydl.extract_info.assert_called_with( | ||||
|             'https://www.youtube.com/watch?v=abc123', | ||||
|             download=False | ||||
|         ) | ||||
| 
 | ||||
|     @patch('time.sleep') | ||||
|     @patch('random.uniform') | ||||
|     def test_humanized_delay(self, mock_uniform, mock_sleep, config, mock_env): | ||||
|         mock_uniform.return_value = 3.5 | ||||
|          | ||||
|         scraper = YouTubeScraper(config) | ||||
|         scraper._humanized_delay() | ||||
|          | ||||
|         mock_uniform.assert_called_with(2, 5) | ||||
|         mock_sleep.assert_called_with(3.5) | ||||
| 
 | ||||
|     def test_format_video_type(self, config, mock_env): | ||||
|         scraper = YouTubeScraper(config) | ||||
|          | ||||
|         # Test short video | ||||
|         assert scraper._get_video_type({'duration': 50}) == 'short' | ||||
|          | ||||
|         # Test regular video | ||||
|         assert scraper._get_video_type({'duration': 600}) == 'video' | ||||
|          | ||||
|         # Test live stream | ||||
|         assert scraper._get_video_type({'is_live': True}) == 'live' | ||||
|          | ||||
|         # Test missing duration | ||||
|         assert scraper._get_video_type({}) == 'video' | ||||
| 
 | ||||
|     def test_format_markdown(self, config, mock_env): | ||||
|         scraper = YouTubeScraper(config) | ||||
|          | ||||
|         videos = [ | ||||
|             { | ||||
|                 'id': 'abc123', | ||||
|                 'title': 'HVAC Tips', | ||||
|                 'description': 'Learn HVAC basics', | ||||
|                 'uploader': 'HVAC Know It All', | ||||
|                 'upload_date': '20240101', | ||||
|                 'view_count': 1500, | ||||
|                 'like_count': 100, | ||||
|                 'comment_count': 25, | ||||
|                 'duration': 600, | ||||
|                 'webpage_url': 'https://www.youtube.com/watch?v=abc123', | ||||
|                 'tags': ['hvac', 'tips'], | ||||
|                 'type': 'video' | ||||
|             } | ||||
|         ] | ||||
|          | ||||
|         markdown = scraper.format_markdown(videos) | ||||
|          | ||||
|         assert '# ID: abc123' in markdown | ||||
|         assert '## Title: HVAC Tips' in markdown | ||||
|         assert '## Type: video' in markdown | ||||
|         assert '## Author: HVAC Know It All' in markdown | ||||
|         assert '## Link: https://www.youtube.com/watch?v=abc123' in markdown | ||||
|         assert '## Views: 1500' in markdown | ||||
|         assert '## Likes: 100' in markdown | ||||
|         assert '## Comments: 25' in markdown | ||||
|         assert '## Duration: 600 seconds' in markdown | ||||
|         assert '## Upload Date: 2024-01-01' in markdown | ||||
|         assert '## Tags: hvac, tips' in markdown | ||||
| 
 | ||||
|     def test_get_incremental_items(self, config, mock_env): | ||||
|         scraper = YouTubeScraper(config) | ||||
|          | ||||
|         videos = [ | ||||
|             {'id': 'video3', 'upload_date': '20240103'}, | ||||
|             {'id': 'video2', 'upload_date': '20240102'}, | ||||
|             {'id': 'video1', 'upload_date': '20240101'} | ||||
|         ] | ||||
|          | ||||
|         # Test with no previous state | ||||
|         state = {} | ||||
|         new_videos = scraper.get_incremental_items(videos, state) | ||||
|         assert len(new_videos) == 3 | ||||
|          | ||||
|         # Test with existing state | ||||
|         state = {'last_video_id': 'video2', 'last_video_date': '20240102'} | ||||
|         new_videos = scraper.get_incremental_items(videos, state) | ||||
|         assert len(new_videos) == 1 | ||||
|         assert new_videos[0]['id'] == 'video3' | ||||
| 
 | ||||
|     def test_update_state(self, config, mock_env): | ||||
|         scraper = YouTubeScraper(config) | ||||
|          | ||||
|         state = {} | ||||
|         videos = [ | ||||
|             {'id': 'video2', 'upload_date': '20240102'}, | ||||
|             {'id': 'video1', 'upload_date': '20240101'} | ||||
|         ] | ||||
|          | ||||
|         updated_state = scraper.update_state(state, videos) | ||||
|          | ||||
|         assert updated_state['last_video_id'] == 'video2' | ||||
|         assert updated_state['last_video_date'] == '20240102' | ||||
|         assert updated_state['video_count'] == 2 | ||||
| 
 | ||||
|     @patch('yt_dlp.YoutubeDL') | ||||
|     def test_error_handling(self, mock_ydl_class, config, mock_env): | ||||
|         mock_ydl = MagicMock() | ||||
|         mock_ydl_class.return_value.__enter__.return_value = mock_ydl | ||||
|         mock_ydl.extract_info.side_effect = Exception("Network error") | ||||
|          | ||||
|         scraper = YouTubeScraper(config) | ||||
|         videos = scraper.fetch_channel_videos() | ||||
|          | ||||
|         assert videos == [] | ||||
| 
 | ||||
|     @patch('yt_dlp.YoutubeDL') | ||||
|     @patch('time.sleep') | ||||
|     def test_fetch_content_with_rate_limiting(self, mock_sleep, mock_ydl_class, config, mock_env, sample_video_info): | ||||
|         mock_ydl = MagicMock() | ||||
|         mock_ydl_class.return_value.__enter__.return_value = mock_ydl | ||||
|          | ||||
|         # Mock channel with multiple videos | ||||
|         mock_ydl.extract_info.side_effect = [ | ||||
|             {'entries': [ | ||||
|                 {'id': 'video1', 'title': 'Video 1'}, | ||||
|                 {'id': 'video2', 'title': 'Video 2'} | ||||
|             ]}, | ||||
|             {**sample_video_info, 'id': 'video1'}, | ||||
|             {**sample_video_info, 'id': 'video2'} | ||||
|         ] | ||||
|          | ||||
|         scraper = YouTubeScraper(config) | ||||
|         with patch.object(scraper, '_humanized_delay') as mock_delay: | ||||
|             videos = scraper.fetch_content() | ||||
|          | ||||
|         assert len(videos) == 2 | ||||
|         # Check that delay was called between video fetches (once for second video) | ||||
|         assert mock_delay.call_count >= 1 | ||||
		Loading…
	
		Reference in a new issue