#!/usr/bin/env python3 """ Test script for Social Media Competitive Intelligence Tests YouTube and Instagram competitive scrapers """ import os import sys import logging from pathlib import Path # Add src to Python path sys.path.insert(0, str(Path(__file__).parent / "src")) from competitive_intelligence.competitive_orchestrator import CompetitiveIntelligenceOrchestrator def setup_logging(): """Setup logging for testing.""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) def test_orchestrator_initialization(): """Test that the orchestrator initializes with social media scrapers.""" print("๐Ÿงช Testing Competitive Intelligence Orchestrator Initialization") print("=" * 60) data_dir = Path("data") logs_dir = Path("logs") try: orchestrator = CompetitiveIntelligenceOrchestrator(data_dir, logs_dir) print(f"โœ… Orchestrator initialized successfully") print(f"๐Ÿ“Š Total scrapers: {len(orchestrator.scrapers)}") # Check for social media scrapers social_media_scrapers = [k for k in orchestrator.scrapers.keys() if k.startswith(('youtube_', 'instagram_'))] youtube_scrapers = [k for k in orchestrator.scrapers.keys() if k.startswith('youtube_')] instagram_scrapers = [k for k in orchestrator.scrapers.keys() if k.startswith('instagram_')] print(f"๐Ÿ“ฑ Social media scrapers: {len(social_media_scrapers)}") print(f"๐ŸŽฅ YouTube scrapers: {len(youtube_scrapers)}") print(f"๐Ÿ“ธ Instagram scrapers: {len(instagram_scrapers)}") print("\nAvailable scrapers:") for scraper_name in sorted(orchestrator.scrapers.keys()): print(f" โ€ข {scraper_name}") return orchestrator, True except Exception as e: print(f"โŒ Failed to initialize orchestrator: {e}") return None, False def test_list_competitors(orchestrator): """Test listing competitors.""" print("\n๐Ÿงช Testing List Competitors") print("=" * 40) try: results = orchestrator.list_available_competitors() print(f"โœ… Listed competitors successfully") print(f"๐Ÿ“Š Total scrapers: {results['total_scrapers']}") for platform, competitors in results['by_platform'].items(): if competitors: print(f"\n{platform.upper()}: {len(competitors)} scrapers") for competitor in competitors: print(f" โ€ข {competitor}") return True except Exception as e: print(f"โŒ Failed to list competitors: {e}") return False def test_social_media_status(orchestrator): """Test social media status.""" print("\n๐Ÿงช Testing Social Media Status") print("=" * 40) try: results = orchestrator.get_social_media_status() print(f"โœ… Got social media status successfully") print(f"๐Ÿ“ฑ Total social media scrapers: {results['total_social_media_scrapers']}") print(f"๐ŸŽฅ YouTube scrapers: {results['youtube_scrapers']}") print(f"๐Ÿ“ธ Instagram scrapers: {results['instagram_scrapers']}") # Show status of each scraper for scraper_name, status in results['scrapers'].items(): scraper_type = status.get('scraper_type', 'unknown') configured = status.get('scraper_configured', False) emoji = 'โœ…' if configured else 'โŒ' print(f"\n{emoji} {scraper_name} ({scraper_type}):") if 'error' in status: print(f" โŒ Error: {status['error']}") else: # Show basic info if scraper_type == 'youtube': metadata = status.get('channel_metadata', {}) print(f" ๐Ÿท๏ธ Channel: {metadata.get('title', 'Unknown')}") print(f" ๐Ÿ‘ฅ Subscribers: {metadata.get('subscriber_count', 'Unknown'):,}") elif scraper_type == 'instagram': metadata = status.get('profile_metadata', {}) print(f" ๐Ÿท๏ธ Account: {metadata.get('full_name', 'Unknown')}") print(f" ๐Ÿ‘ฅ Followers: {metadata.get('followers', 'Unknown'):,}") return True except Exception as e: print(f"โŒ Failed to get social media status: {e}") return False def test_competitive_setup(orchestrator): """Test competitive setup.""" print("\n๐Ÿงช Testing Competitive Setup") print("=" * 40) try: results = orchestrator.test_competitive_setup() overall_status = results.get('overall_status', 'unknown') print(f"Overall Status: {'โœ…' if overall_status == 'operational' else 'โŒ'} {overall_status}") # Show test results for each scraper for scraper_name, test_result in results.get('test_results', {}).items(): status = test_result.get('status', 'unknown') emoji = 'โœ…' if status == 'success' else 'โŒ' print(f"\n{emoji} {scraper_name}:") if status == 'success': config = test_result.get('config', {}) print(f" ๐ŸŒ Base URL: {config.get('base_url', 'Unknown')}") print(f" ๐Ÿ”’ Proxy: {'โœ…' if config.get('proxy_configured') else 'โŒ'}") print(f" ๐Ÿค– Jina AI: {'โœ…' if config.get('jina_api_configured') else 'โŒ'}") print(f" ๐Ÿ“ Directories: {'โœ…' if config.get('directories_exist') else 'โŒ'}") else: print(f" โŒ Error: {test_result.get('error', 'Unknown')}") return overall_status == 'operational' except Exception as e: print(f"โŒ Failed to test competitive setup: {e}") return False def test_youtube_discovery(orchestrator): """Test YouTube content discovery (dry run).""" print("\n๐Ÿงช Testing YouTube Content Discovery") print("=" * 40) youtube_scrapers = {k: v for k, v in orchestrator.scrapers.items() if k.startswith('youtube_')} if not youtube_scrapers: print("โš ๏ธ No YouTube scrapers available") return False # Test one YouTube scraper scraper_name = list(youtube_scrapers.keys())[0] scraper = youtube_scrapers[scraper_name] try: print(f"๐ŸŽฅ Testing content discovery for {scraper_name}") # Discover a small number of URLs content_urls = scraper.discover_content_urls(3) print(f"โœ… Discovered {len(content_urls)} content URLs") for i, url_data in enumerate(content_urls, 1): url = url_data.get('url') if isinstance(url_data, dict) else url_data title = url_data.get('title', 'Unknown') if isinstance(url_data, dict) else 'Unknown' print(f" {i}. {title[:50]}...") print(f" {url}") return True except Exception as e: print(f"โŒ YouTube discovery test failed: {e}") return False def test_instagram_discovery(orchestrator): """Test Instagram content discovery (dry run).""" print("\n๐Ÿงช Testing Instagram Content Discovery") print("=" * 40) instagram_scrapers = {k: v for k, v in orchestrator.scrapers.items() if k.startswith('instagram_')} if not instagram_scrapers: print("โš ๏ธ No Instagram scrapers available") return False # Test one Instagram scraper scraper_name = list(instagram_scrapers.keys())[0] scraper = instagram_scrapers[scraper_name] try: print(f"๐Ÿ“ธ Testing content discovery for {scraper_name}") # Discover a small number of URLs content_urls = scraper.discover_content_urls(2) # Very small for Instagram print(f"โœ… Discovered {len(content_urls)} content URLs") for i, url_data in enumerate(content_urls, 1): url = url_data.get('url') if isinstance(url_data, dict) else url_data caption = url_data.get('caption', '')[:30] + '...' if isinstance(url_data, dict) and url_data.get('caption') else 'No caption' print(f" {i}. {caption}") print(f" {url}") return True except Exception as e: print(f"โŒ Instagram discovery test failed: {e}") return False def main(): """Run all tests.""" setup_logging() print("๐Ÿงช Social Media Competitive Intelligence Test Suite") print("=" * 60) print("This test suite validates the Phase 2 social media competitive scrapers") print() # Test 1: Orchestrator initialization orchestrator, init_success = test_orchestrator_initialization() if not init_success: print("โŒ Critical failure: Could not initialize orchestrator") sys.exit(1) test_results = {'initialization': True} # Test 2: List competitors test_results['list_competitors'] = test_list_competitors(orchestrator) # Test 3: Social media status test_results['social_media_status'] = test_social_media_status(orchestrator) # Test 4: Competitive setup test_results['competitive_setup'] = test_competitive_setup(orchestrator) # Test 5: YouTube discovery (only if API key available) if os.getenv('YOUTUBE_API_KEY'): test_results['youtube_discovery'] = test_youtube_discovery(orchestrator) else: print("\nโš ๏ธ Skipping YouTube discovery test (no API key)") test_results['youtube_discovery'] = None # Test 6: Instagram discovery (only if credentials available) if os.getenv('INSTAGRAM_USERNAME') and os.getenv('INSTAGRAM_PASSWORD'): test_results['instagram_discovery'] = test_instagram_discovery(orchestrator) else: print("\nโš ๏ธ Skipping Instagram discovery test (no credentials)") test_results['instagram_discovery'] = None # Summary print("\n" + "=" * 60) print("๐Ÿ“‹ TEST SUMMARY") print("=" * 60) passed = sum(1 for result in test_results.values() if result is True) failed = sum(1 for result in test_results.values() if result is False) skipped = sum(1 for result in test_results.values() if result is None) print(f"โœ… Tests Passed: {passed}") print(f"โŒ Tests Failed: {failed}") print(f"โš ๏ธ Tests Skipped: {skipped}") for test_name, result in test_results.items(): if result is True: print(f" โœ… {test_name}") elif result is False: print(f" โŒ {test_name}") else: print(f" โš ๏ธ {test_name} (skipped)") if failed > 0: print(f"\nโŒ Some tests failed. Check the logs above for details.") sys.exit(1) else: print(f"\nโœ… All available tests passed! Social media competitive intelligence is ready.") print("\nNext steps:") print("1. Set up environment variables (YOUTUBE_API_KEY, INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD)") print("2. Test backlog capture: python run_competitive_intelligence.py --operation social-backlog --limit 5") print("3. Test incremental sync: python run_competitive_intelligence.py --operation social-incremental") sys.exit(0) if __name__ == "__main__": main()