#!/usr/bin/env python3 """ Enhanced Phase 2 Social Media Competitive Intelligence Test Script Comprehensive testing for YouTube and Instagram competitive scrapers with Python best practices. Features Tested: - Enhanced error handling with custom exceptions - Resource management with context managers - Type safety validation - Rate limiting and quota management - Integration with competitive orchestrator - Async patterns (future implementation) """ import argparse import json import logging import sys import time from pathlib import Path from typing import Dict, List, Optional, Union from datetime import datetime import contextlib # Add src to path sys.path.insert(0, str(Path(__file__).parent / "src")) from competitive_intelligence.competitive_orchestrator import CompetitiveIntelligenceOrchestrator from competitive_intelligence.youtube_competitive_scraper import ( YouTubeCompetitiveScraper, YouTubeQuotaManager, create_youtube_competitive_scrapers ) from competitive_intelligence.instagram_competitive_scraper import ( InstagramCompetitiveScraper, InstagramScraperManager, create_instagram_competitive_scrapers ) from competitive_intelligence.exceptions import ( CompetitiveIntelligenceError, ConfigurationError, QuotaExceededError, YouTubeAPIError, InstagramError, RateLimitError ) from competitive_intelligence.types import Platform, ContentItem def setup_logging(verbose: bool = False, log_file: Optional[str] = None): """Setup comprehensive logging for testing.""" level = logging.DEBUG if verbose else logging.INFO handlers = [logging.StreamHandler()] if log_file: handlers.append(logging.FileHandler(log_file)) logging.basicConfig( level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=handlers ) # Set specific loggers to appropriate levels logging.getLogger('googleapiclient.discovery').setLevel(logging.WARNING) logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING) def test_youtube_scraper_integration(data_dir: Path, logs_dir: Path, competitor_key: str, limit: int = 3): """Test YouTube competitive scraper with enhanced error handling.""" print(f\"\\n=== Testing Enhanced YouTube Scraper Integration ({competitor_key}) ===\") try: # Test context manager pattern with YouTubeCompetitiveScraper(data_dir, logs_dir, competitor_key) as scraper: print(f\"โœ… Scraper initialized: {scraper.competitor_name}\")\n print(f\"๐Ÿ“Š Base URL: {scraper.base_url}\")\n print(f\"๐Ÿ”‘ API configured: {bool(scraper.api_key[:10] + '...')if scraper.api_key else 'No'}\")\n \n # Test quota manager\n quota_status = scraper.quota_manager.get_quota_status()\n print(f\"๐Ÿ“ˆ API Quota: {quota_status['quota_used']}/{quota_status['daily_limit']}\")\n \n # Test URL discovery with error handling\n print(f\"\\n๐Ÿ” Discovering content URLs (limit: {limit})...\")\n urls = scraper.discover_content_urls(limit)\n \n if urls:\n print(f\"โœ… Discovered {len(urls)} URLs\")\n for i, url_data in enumerate(urls[:2], 1): # Show first 2\n print(f\" {i}. {url_data['url']}\")\n print(f\" ๐Ÿ“… Published: {url_data.get('publish_date', 'Unknown')}\")\n print(f\" ๐ŸŽฏ Priority: {url_data.get('competitive_priority', 'medium')}\")\n \n # Test content scraping with validation\n test_url = urls[0]['url']\n print(f\"\\n๐Ÿ”ฌ Testing content scraping: {test_url}\")\n \n content = scraper.scrape_content_item(test_url)\n if content:\n print(\"โœ… Content scraping successful:\")\n print(f\" ๐Ÿ“ Title: {content.get('title', 'Unknown')[:80]}...\")\n print(f\" ๐Ÿ‘€ Views: {content.get('social_metrics', {}).get('views', 'Unknown'):,}\")\n print(f\" ๐Ÿ‘ Likes: {content.get('social_metrics', {}).get('likes', 'Unknown'):,}\")\n print(f\" ๐Ÿ’ฌ Comments: {content.get('social_metrics', {}).get('comments', 'Unknown'):,}\")\n print(f\" ๐Ÿ“Š Word count: {content.get('word_count', 0)}\")\n print(f\" ๐Ÿท๏ธ Categories: {', '.join(content.get('categories', [])[:3])}\")\n \n # Test data validation\n if scraper._validate_video_data({'id': content['id'], 'snippet': {}}):\n print(\"โœ… Data validation: Passed\")\n else:\n print(\"โš ๏ธ Data validation: Failed\")\n \n else:\n print(\"โŒ Content scraping failed\")\n \n # Test competitor analysis\n print(\"\\n๐Ÿ“Š Testing competitor analysis...\")\n analysis = scraper.run_competitor_analysis()\n \n if 'error' not in analysis:\n print(\"โœ… Competitor analysis successful:\")\n print(f\" ๐Ÿ“ˆ Total videos analyzed: {analysis.get('sample_size', 0)}\")\n \n channel_meta = analysis.get('channel_metadata', {})\n print(f\" ๐Ÿ‘ฅ Subscribers: {channel_meta.get('subscriber_count', 'Unknown'):,}\")\n print(f\" ๐ŸŽฅ Total videos: {channel_meta.get('video_count', 'Unknown'):,}\")\n \n pub_analysis = analysis.get('publishing_analysis', {})\n print(f\" ๐Ÿ“… Posts per day: {pub_analysis.get('average_frequency_per_day', 0):.2f}\")\n \n else:\n print(f\"โŒ Analysis failed: {analysis['error']}\")\n \n else:\n print(\"โš ๏ธ No URLs discovered\")\n \n except ConfigurationError as e:\n print(f\"โŒ Configuration Error: {e.message}\")\n if e.details:\n print(f\" Details: {e.details}\")\n return False\n \n except QuotaExceededError as e:\n print(f\"โŒ Quota Exceeded: {e.message}\")\n print(f\" Used: {e.quota_used}/{e.quota_limit}\")\n print(f\" Reset: {e.reset_time or 'Unknown'}\")\n return False\n \n except YouTubeAPIError as e:\n print(f\"โŒ YouTube API Error: {e.message}\")\n print(f\" Error code: {e.error_code or 'Unknown'}\")\n return False\n \n except CompetitiveIntelligenceError as e:\n print(f\"โŒ Competitive Intelligence Error: {e.message}\")\n return False\n \n except Exception as e:\n print(f\"โŒ Unexpected Error: {e}\")\n logging.exception(\"Unexpected error in YouTube testing\")\n return False\n \n print(\"โœ… YouTube scraper integration test completed successfully\")\n return True\n\n\ndef test_instagram_scraper_integration(data_dir: Path, logs_dir: Path, competitor_key: str, limit: int = 3):\n \"\"\"Test Instagram competitive scraper with enhanced error handling.\"\"\"\n print(f\"\\n=== Testing Enhanced Instagram Scraper Integration ({competitor_key}) ===\")\n \n try:\n # Test scraper manager pattern\n with InstagramScraperManager(data_dir, logs_dir) as manager:\n with manager.scraper_context(competitor_key) as scraper:\n print(f\"โœ… Scraper initialized: {scraper.competitor_info['name']}\")\n print(f\"๐Ÿ“ฑ Instagram URL: {scraper.competitor_info['url']}\")\n print(f\"๐Ÿ‘ค Target username: {scraper.target_username}\")\n print(f\"๐Ÿ” Auth configured: {bool(scraper.username and scraper.password)}\")\n \n # Test profile loading\n print(f\"\\n๐Ÿ‘ค Loading competitor profile...\")\n profile = scraper._get_target_profile()\n \n if profile:\n meta = scraper.profile_metadata\n print(f\"โœ… Profile loaded: {meta.get('full_name', 'Unknown')}\")\n print(f\" ๐Ÿ‘ฅ Followers: {meta.get('followers', 0):,}\")\n print(f\" ๐Ÿ“ธ Posts: {meta.get('posts_count', 0):,}\")\n print(f\" ๐Ÿ”’ Private: {'Yes' if meta.get('is_private') else 'No'}\")\n print(f\" โœ… Verified: {'Yes' if meta.get('is_verified') else 'No'}\")\n \n if meta.get('is_private'):\n print(\"โš ๏ธ Private account - limited access\")\n return True # Early return for private accounts\n \n # Test URL discovery\n print(f\"\\n๐Ÿ” Discovering Instagram posts (limit: {limit})...\")\n posts = scraper.discover_content_urls(limit)\n \n if posts:\n print(f\"โœ… Discovered {len(posts)} posts\")\n for i, post_data in enumerate(posts[:2], 1):\n print(f\" {i}. {post_data['url']}\")\n print(f\" ๐Ÿ“… Date: {post_data.get('date_utc', 'Unknown')[:10]}\")\n print(f\" ๐Ÿ“ฑ Type: {post_data.get('typename', 'Unknown')}\")\n print(f\" ๐ŸŽฅ Video: {'Yes' if post_data.get('is_video') else 'No'}\")\n print(f\" ๐Ÿ‘ Likes: {post_data.get('likes', 0):,}\")\n \n # Test content scraping\n test_url = posts[0]['url']\n print(f\"\\n๐Ÿ”ฌ Testing post scraping: {test_url}\")\n \n content = scraper.scrape_content_item(test_url)\n if content:\n print(\"โœ… Post scraping successful:\")\n print(f\" ๐Ÿ“ Caption: {content.get('description', '')[:100]}...\")\n print(f\" ๐Ÿ‘ Likes: {content.get('social_metrics', {}).get('likes', 0):,}\")\n print(f\" ๐Ÿ’ฌ Comments: {content.get('social_metrics', {}).get('comments', 0):,}\")\n print(f\" ๐Ÿท๏ธ Hashtags: {len(content.get('hashtags', []))}\")\n print(f\" ๐Ÿ“Š Word count: {content.get('word_count', 0)}\")\n \n # Test data validation\n test_data = {\n 'shortcode': content['id'],\n 'date_utc': content['publish_date'],\n 'owner_username': content['author']\n }\n if scraper._validate_post_data(test_data):\n print(\"โœ… Data validation: Passed\")\n else:\n print(\"โš ๏ธ Data validation: Failed\")\n \n # Test caption sanitization\n sanitized = scraper._sanitize_caption(content.get('description', ''))\n if sanitized != content.get('description', ''):\n print(\"โœ… Caption sanitization applied\")\n \n else:\n print(\"โŒ Post scraping failed\")\n \n # Test competitor analysis\n print(\"\\n๐Ÿ“Š Testing Instagram competitor analysis...\")\n analysis = scraper.run_competitor_analysis()\n \n if 'error' not in analysis:\n print(\"โœ… Analysis successful:\")\n print(f\" ๐Ÿ“ˆ Posts analyzed: {analysis.get('total_recent_posts', 0)}\")\n \n posting = analysis.get('posting_analysis', {})\n print(f\" ๐Ÿ“… Posts per day: {posting.get('average_posts_per_day', 0):.2f}\")\n print(f\" ๐ŸŽฅ Video percentage: {posting.get('video_percentage', 0):.1f}%\")\n \n engagement = analysis.get('engagement_analysis', {})\n print(f\" ๐Ÿ‘ Avg likes: {engagement.get('average_likes', 0):,.0f}\")\n print(f\" ๐Ÿ’ฌ Avg comments: {engagement.get('average_comments', 0):,.0f}\")\n print(f\" ๐Ÿ“ˆ Engagement rate: {engagement.get('average_engagement_rate', 0):.2f}%\")\n \n else:\n error_type = analysis.get('error', 'unknown')\n if error_type == 'private_account':\n print(\"โš ๏ธ Analysis limited: Private account\")\n else:\n print(f\"โŒ Analysis failed: {analysis.get('message', 'Unknown error')}\")\n \n else:\n print(\"โš ๏ธ No posts discovered\")\n \n else:\n print(\"โŒ Failed to load competitor profile\")\n return False\n \n except ConfigurationError as e:\n print(f\"โŒ Configuration Error: {e.message}\")\n return False\n \n except InstagramError as e:\n print(f\"โŒ Instagram Error: {e.message}\")\n return False\n \n except RateLimitError as e:\n print(f\"โŒ Rate Limit Error: {e.message}\")\n print(f\" Retry after: {e.retry_after or 'Unknown'} seconds\")\n return False\n \n except CompetitiveIntelligenceError as e:\n print(f\"โŒ Competitive Intelligence Error: {e.message}\")\n return False\n \n except Exception as e:\n print(f\"โŒ Unexpected Error: {e}\")\n logging.exception(\"Unexpected error in Instagram testing\")\n return False\n \n print(\"โœ… Instagram scraper integration test completed successfully\")\n return True\n\n\ndef test_orchestrator_social_media_integration(data_dir: Path, logs_dir: Path, limit: int = 2):\n \"\"\"Test competitive orchestrator with social media scrapers.\"\"\"\n print(\"\\n=== Testing Competitive Orchestrator Social Media Integration ===\")\n \n try:\n orchestrator = CompetitiveIntelligenceOrchestrator(data_dir, logs_dir)\n print(f\"โœ… Orchestrator initialized with {len(orchestrator.scrapers)} scrapers\")\n \n # Test social media status\n print(\"\\n๐Ÿ“ฑ Testing social media status...\")\n social_status = orchestrator.get_social_media_status()\n \n print(f\" ๐Ÿ“Š Total social scrapers: {social_status['total_social_media_scrapers']}\")\n print(f\" ๐ŸŽฅ YouTube scrapers: {social_status['youtube_scrapers']}\")\n print(f\" ๐Ÿ“ธ Instagram scrapers: {social_status['instagram_scrapers']}\")\n \n # Test listing competitors\n print(\"\\n๐Ÿ“ Listing available competitors...\")\n competitors = orchestrator.list_available_competitors()\n \n for platform, scraper_list in competitors['by_platform'].items():\n if scraper_list:\n print(f\" {platform.upper()}: {len(scraper_list)} scrapers\")\n for scraper in scraper_list[:2]: # Show first 2\n print(f\" โ€ข {scraper}\")\n \n # Test social media incremental sync (limited)\n print(f\"\\n๐Ÿ”„ Testing social media incremental sync (YouTube only, limit {limit})...\")\n \n # Test just YouTube to avoid Instagram rate limits\n sync_results = orchestrator.run_social_media_incremental(['youtube'])\n \n if sync_results.get('results'):\n for scraper_name, result in sync_results['results'].items():\n status = result.get('status', 'unknown')\n icon = 'โœ…' if status == 'success' else 'โŒ'\n message = result.get('message', result.get('error', 'Unknown'))\n print(f\" {icon} {scraper_name}: {message}\")\n \n # Test platform-specific analysis (YouTube only)\n print(\"\\n๐Ÿ“Š Testing YouTube platform analysis...\")\n youtube_analysis = orchestrator.run_platform_analysis('youtube')\n \n if youtube_analysis.get('results'):\n print(\"โœ… YouTube analysis completed:\")\n for scraper_name, result in youtube_analysis['results'].items():\n if result.get('status') == 'success':\n analysis = result.get('analysis', {})\n competitor_name = analysis.get('competitor_name', scraper_name)\n total_videos = analysis.get('total_recent_videos', 0)\n print(f\" ๐Ÿ“ˆ {competitor_name}: {total_videos} videos analyzed\")\n \n # Show channel metadata if available\n channel_meta = analysis.get('channel_metadata', {})\n if 'subscriber_count' in channel_meta:\n print(f\" ๐Ÿ‘ฅ {channel_meta['subscriber_count']:,} subscribers\")\n \n print(\"\\nโฑ๏ธ Orchestrator integration test completed\")\n return True\n \n except Exception as e:\n print(f\"โŒ Orchestrator integration error: {e}\")\n logging.exception(\"Error in orchestrator integration testing\")\n return False\n\n\ndef test_error_handling_scenarios(data_dir: Path, logs_dir: Path):\n \"\"\"Test various error handling scenarios.\"\"\"\n print(\"\\n=== Testing Error Handling Scenarios ===\")\n \n scenarios_passed = 0\n total_scenarios = 0\n \n # Test 1: Invalid competitor key\n total_scenarios += 1\n print(\"\\n๐Ÿงช Test 1: Invalid competitor configuration\")\n try:\n YouTubeCompetitiveScraper(data_dir, logs_dir, \"nonexistent_competitor\")\n print(\"โŒ Should have raised ConfigurationError\")\n except ConfigurationError as e:\n print(f\"โœ… Correctly caught ConfigurationError: {e.message[:60]}...\")\n scenarios_passed += 1\n except Exception as e:\n print(f\"โŒ Wrong exception type: {type(e).__name__}\")\n \n # Test 2: Invalid URL format\n total_scenarios += 1\n print(\"\\n๐Ÿงช Test 2: Invalid URL validation\")\n try:\n scraper = list(create_youtube_competitive_scrapers(data_dir, logs_dir).values())[0]\n if scraper:\n scraper.scrape_content_item(\"https://invalid-url.com/watch\")\n print(\"โŒ Should have raised DataValidationError\")\n else:\n print(\"โš ๏ธ Skipped - no YouTube scraper available\")\n scenarios_passed += 1\n except Exception as e:\n # Accept any validation-related error\n if \"validation\" in str(e).lower() or \"invalid\" in str(e).lower():\n print(f\"โœ… Correctly caught validation error: {type(e).__name__}\")\n scenarios_passed += 1\n else:\n print(f\"โŒ Unexpected error: {e}\")\n \n # Test 3: Resource cleanup\n total_scenarios += 1\n print(\"\\n๐Ÿงช Test 3: Resource cleanup with context managers\")\n try:\n instagram_scrapers = create_instagram_competitive_scrapers(data_dir, logs_dir)\n if instagram_scrapers:\n scraper_key = list(instagram_scrapers.keys())[0]\n with InstagramScraperManager(data_dir, logs_dir) as manager:\n with manager.scraper_context(scraper_key.split('_')[-1]) as scraper:\n # Verify scraper is working\n assert scraper is not None\n # After context exit, resources should be cleaned up\n print(\"โœ… Context manager cleanup completed successfully\")\n scenarios_passed += 1\n else:\n print(\"โš ๏ธ Skipped - no Instagram scraper available\")\n scenarios_passed += 1\n except Exception as e:\n print(f\"โŒ Context manager error: {e}\")\n \n print(f\"\\n๐Ÿ“Š Error handling test results: {scenarios_passed}/{total_scenarios} scenarios passed\")\n return scenarios_passed == total_scenarios\n\n\ndef main():\n \"\"\"Main test runner for Phase 2 social media integration.\"\"\"\n parser = argparse.ArgumentParser(\n description='Enhanced Phase 2 Social Media Competitive Intelligence Test',\n formatter_class=argparse.RawDescriptionHelpFormatter,\n epilog=\"\"\"\nExamples:\n # Test all social media scrapers\n python test_phase2_social_media_integration.py\n\n # Test specific platforms\n python test_phase2_social_media_integration.py --platforms youtube\n python test_phase2_social_media_integration.py --platforms instagram\n\n # Test with specific competitors\n python test_phase2_social_media_integration.py --youtube-competitor ac_service_tech\n python test_phase2_social_media_integration.py --instagram-competitor love2hvac\n\n # Detailed testing with logging\n python test_phase2_social_media_integration.py --verbose --log-file test_results.log\n\n # Quick test with minimal content\n python test_phase2_social_media_integration.py --limit 1 --skip-orchestrator\n \"\"\"\n )\n \n parser.add_argument(\n '--platforms',\n nargs='+',\n choices=['youtube', 'instagram'],\n default=['youtube', 'instagram'],\n help='Platforms to test (default: both)'\n )\n \n parser.add_argument(\n '--youtube-competitor',\n choices=['ac_service_tech', 'refrigeration_mentor', 'love2hvac', 'hvac_tv'],\n default='ac_service_tech',\n help='YouTube competitor to test'\n )\n \n parser.add_argument(\n '--instagram-competitor',\n choices=['ac_service_tech', 'love2hvac', 'hvac_learning_solutions'],\n default='ac_service_tech',\n help='Instagram competitor to test'\n )\n \n parser.add_argument(\n '--limit',\n type=int,\n default=3,\n help='Limit items per test (default: 3)'\n )\n \n parser.add_argument(\n '--data-dir',\n type=Path,\n default=Path('data'),\n help='Data directory (default: ./data)'\n )\n \n parser.add_argument(\n '--logs-dir',\n type=Path,\n default=Path('logs'),\n help='Logs directory (default: ./logs)'\n )\n \n parser.add_argument(\n '--verbose',\n action='store_true',\n help='Enable verbose logging'\n )\n \n parser.add_argument(\n '--log-file',\n help='Log to file'\n )\n \n parser.add_argument(\n '--skip-orchestrator',\n action='store_true',\n help='Skip orchestrator integration tests'\n )\n \n parser.add_argument(\n '--skip-error-tests',\n action='store_true',\n help='Skip error handling tests'\n )\n \n args = parser.parse_args()\n \n # Setup logging\n setup_logging(args.verbose, args.log_file)\n \n # Ensure directories exist\n args.data_dir.mkdir(exist_ok=True)\n args.logs_dir.mkdir(exist_ok=True)\n \n print(\"๐Ÿš€ Enhanced Phase 2 Social Media Competitive Intelligence Test\")\n print(\"=\" * 65)\n print(f\"๐Ÿ“… Test started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n print(f\"๐Ÿ“ Data directory: {args.data_dir}\")\n print(f\"๐Ÿ“„ Logs directory: {args.logs_dir}\")\n print(f\"๐ŸŽฏ Platforms: {', '.join(args.platforms)}\")\n print(f\"๐Ÿ“Š Content limit: {args.limit}\")\n \n # Track test results\n results = {\n 'youtube': None,\n 'instagram': None,\n 'orchestrator': None,\n 'error_handling': None\n }\n \n start_time = time.time()\n \n try:\n # Test YouTube scraper\n if 'youtube' in args.platforms:\n results['youtube'] = test_youtube_scraper_integration(\n args.data_dir, args.logs_dir, args.youtube_competitor, args.limit\n )\n \n # Test Instagram scraper\n if 'instagram' in args.platforms:\n results['instagram'] = test_instagram_scraper_integration(\n args.data_dir, args.logs_dir, args.instagram_competitor, args.limit\n )\n \n # Test orchestrator integration\n if not args.skip_orchestrator:\n results['orchestrator'] = test_orchestrator_social_media_integration(\n args.data_dir, args.logs_dir, args.limit\n )\n \n # Test error handling\n if not args.skip_error_tests:\n results['error_handling'] = test_error_handling_scenarios(\n args.data_dir, args.logs_dir\n )\n \n except KeyboardInterrupt:\n print(\"\\nโš ๏ธ Test interrupted by user\")\n sys.exit(130)\n \n except Exception as e:\n print(f\"\\nโŒ Unexpected test error: {e}\")\n logging.exception(\"Unexpected error in test runner\")\n sys.exit(1)\n \n # Calculate results\n end_time = time.time()\n duration = end_time - start_time\n \n # Print summary\n print(\"\\n\" + \"=\" * 65)\n print(\"๐Ÿ“‹ Test Summary\")\n print(\"=\" * 65)\n \n passed = 0\n total = 0\n \n for test_name, result in results.items():\n if result is not None:\n total += 1\n if result:\n passed += 1\n print(f\"โœ… {test_name.title()}: PASSED\")\n else:\n print(f\"โŒ {test_name.title()}: FAILED\")\n else:\n print(f\"โšช {test_name.title()}: SKIPPED\")\n \n print(f\"\\nโฑ๏ธ Total duration: {duration:.2f} seconds\")\n print(f\"๐Ÿ“Š Overall result: {passed}/{total} tests passed\")\n \n if passed == total and total > 0:\n print(\"\\n๐ŸŽ‰ All Phase 2 social media integration tests PASSED!\")\n print(\"โœจ The enhanced competitive intelligence system is ready for production.\")\n sys.exit(0)\n else:\n print(\"\\nโš ๏ธ Some tests failed. Please review the output above.\")\n sys.exit(1)\n\n\nif __name__ == \"__main__\":\n main()