#!/usr/bin/env python3 """ Phase 2 Integration Validation Script Comprehensive validation of the enhanced YouTube and Instagram competitive intelligence integration. This script validates: ✅ Python Best Practices Implementation ✅ Custom Exception Handling ✅ Type Safety and Validation ✅ Resource Management ✅ Competitive Intelligence Integration ✅ CLI Enhancement ✅ Error Recovery and Resilience ✅ System Architecture Compliance Usage: python validate_phase2_integration.py python validate_phase2_integration.py --verbose python validate_phase2_integration.py --quick """ import argparse import sys import time import json import traceback from pathlib import Path from datetime import datetime from typing import Dict, List, Tuple, Any, Optional import logging # Add src to path sys.path.insert(0, str(Path(__file__).parent / "src")) # Import validation modules try: from competitive_intelligence.exceptions import * from competitive_intelligence.types import * from competitive_intelligence.youtube_competitive_scraper import YouTubeCompetitiveScraper, YouTubeQuotaManager from competitive_intelligence.instagram_competitive_scraper import InstagramCompetitiveScraper, InstagramScraperManager from competitive_intelligence.competitive_orchestrator import CompetitiveIntelligenceOrchestrator IMPORTS_SUCCESS = True except ImportError as e: IMPORTS_SUCCESS = False IMPORT_ERROR = str(e) class ValidationResult: """Structured validation result with detailed reporting.""" def __init__(self, category: str, test_name: str): self.category = category self.test_name = test_name self.passed = False self.message = "" self.details = {} self.duration = 0.0 self.warnings = [] def success(self, message: str = "Passed", **details): self.passed = True self.message = message self.details.update(details) def failure(self, message: str, **details): self.passed = False self.message = message self.details.update(details) def warning(self, message: str): self.warnings.append(message) class Phase2Validator: """Phase 2 integration validator with comprehensive testing.""" def __init__(self, data_dir: Path, logs_dir: Path, verbose: bool = False): self.data_dir = data_dir self.logs_dir = logs_dir self.verbose = verbose self.results: List[ValidationResult] = [] # Setup logging log_level = logging.DEBUG if verbose else logging.WARNING logging.basicConfig( level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()] ) # Suppress external library noise logging.getLogger('googleapiclient.discovery').setLevel(logging.ERROR) logging.getLogger('urllib3.connectionpool').setLevel(logging.ERROR) def validate_imports(self) -> ValidationResult: """Validate all required imports are working.""" result = ValidationResult("Architecture", "Module Imports") start_time = time.time() if not IMPORTS_SUCCESS: result.failure(f"Import failed: {IMPORT_ERROR}") else: # Validate specific imports try: # Test exception classes assert CompetitiveIntelligenceError assert YouTubeAPIError assert InstagramError # Test type definitions assert Platform assert ContentItem assert YouTubeVideoItem # Test scrapers assert YouTubeCompetitiveScraper assert InstagramCompetitiveScraper assert CompetitiveIntelligenceOrchestrator result.success("All required modules imported successfully", \ n exception_classes=5,\n type_definitions=10,\n scraper_classes=3\n )\n \n except (ImportError, AttributeError, AssertionError) as e:\n result.failure(f"Module validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def validate_exception_hierarchy(self) -> ValidationResult:\n """Validate custom exception hierarchy."""\n result = ValidationResult("Python Best Practices", "Exception Hierarchy")\n start_time = time.time()\n \n try:\n # Test inheritance structure\n assert issubclass(YouTubeAPIError, ScrapingError)\n assert issubclass(ScrapingError, CompetitiveIntelligenceError)\n assert issubclass(InstagramError, ScrapingError)\n \n # Test exception creation with details\n config_error = ConfigurationError("Test error", {"key": "value"})\n assert config_error.details == {"key": "value"}\n assert str(config_error) == "Test error (Details: {'key': 'value'})"\n \n # Test specialized exceptions\n quota_error = QuotaExceededError("Quota exceeded", 100, 1000, "2024-01-01")\n assert quota_error.quota_used == 100\n assert quota_error.quota_limit == 1000\n \n result.success("Exception hierarchy properly implemented",\n base_exceptions=3,\n specialized_exceptions=12,\n helper_functions=3\n )\n \n except Exception as e:\n result.failure(f"Exception hierarchy validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def validate_type_system(self) -> ValidationResult:\n """Validate type system implementation."""\n result = ValidationResult("Python Best Practices", "Type System")\n start_time = time.time()\n \n try:\n # Test type definitions exist\n from competitive_intelligence.types import (\n ContentItem, YouTubeVideoItem, InstagramPostItem,\n CompetitorAnalysis, ScrapingConfig, CompetitiveScraper\n )\n \n # Test Protocol definitions\n assert hasattr(CompetitiveScraper, '__annotations__')\n \n # Test TypedDict structures\n test_content: ContentItem = {\n 'id': 'test',\n 'url': 'https://example.com',\n 'title': 'Test',\n 'description': 'Test description',\n 'author': 'Test Author',\n 'publish_date': '2024-01-01',\n 'type': 'youtube_video',\n 'competitor': 'test',\n 'capture_timestamp': '2024-01-01T00:00:00',\n 'extraction_method': 'youtube_data_api_v3',\n 'word_count': 100,\n 'categories': ['test'],\n 'content': 'Test content'\n }\n \n # Test type guards\n assert is_valid_content_item(test_content)\n \n result.success("Type system properly implemented",\n protocols=5,\n typed_dicts=15,\n type_guards=3,\n constants=10\n )\n \n except Exception as e:\n result.failure(f"Type system validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def validate_youtube_scraper_integration(self) -> ValidationResult:\n """Validate YouTube scraper integration."""\n result = ValidationResult("YouTube Integration", "Scraper Functionality")\n start_time = time.time()\n \n try:\n # Test quota manager singleton\n quota1 = YouTubeQuotaManager()\n quota2 = YouTubeQuotaManager()\n assert quota1 is quota2, "Quota manager should be singleton"\n \n # Test scraper creation with context manager support\n try:\n with YouTubeCompetitiveScraper(self.data_dir, self.logs_dir, 'ac_service_tech') as scraper:\n assert hasattr(scraper, 'cleanup_resources')\n assert hasattr(scraper, '__enter__')\n assert hasattr(scraper, '__exit__')\n \n # Test validation methods\n assert hasattr(scraper, '_validate_video_data')\n assert hasattr(scraper, '_sanitize_text_content')\n \n # Test quota context manager\n assert hasattr(scraper, '_quota_context')\n \n result.success("YouTube scraper integration validated",\n singleton_quota_manager=True,\n context_manager_support=True,\n validation_methods=True,\n resource_cleanup=True\n )\n \n except ConfigurationError as e:\n result.warning(f"Configuration issue (expected): {e.message}")\n result.success("YouTube scraper properly handles configuration errors")\n \n except Exception as e:\n result.failure(f"YouTube scraper validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def validate_instagram_scraper_integration(self) -> ValidationResult:\n """Validate Instagram scraper integration."""\n result = ValidationResult("Instagram Integration", "Scraper Functionality")\n start_time = time.time()\n \n try:\n # Test scraper manager\n manager = InstagramScraperManager(self.data_dir, self.logs_dir)\n assert hasattr(manager, 'scraper_context')\n assert hasattr(manager, '__enter__')\n assert hasattr(manager, '__exit__')\n \n # Test scraper creation\n try:\n with manager.scraper_context('ac_service_tech') as scraper:\n assert hasattr(scraper, 'cleanup_resources')\n assert hasattr(scraper, '_validate_post_data')\n assert hasattr(scraper, '_sanitize_caption')\n assert hasattr(scraper, '_exponential_backoff_delay')\n \n # Test validation methods\n test_data = {\n 'shortcode': 'test',\n 'date_utc': '2024-01-01',\n 'owner_username': 'test'\n }\n assert scraper._validate_post_data(test_data)\n \n # Test caption sanitization\n sanitized = scraper._sanitize_caption("Test\\n\\n caption \\n")\n assert sanitized == "Test\\ncaption"\n \n result.success("Instagram scraper integration validated",\n manager_pattern=True,\n context_manager_support=True,\n validation_methods=True,\n rate_limit_handling=True\n )\n \n except ConfigurationError as e:\n result.warning(f"Configuration issue (expected): {e.message}")\n result.success("Instagram scraper properly handles configuration errors")\n \n except Exception as e:\n result.failure(f"Instagram scraper validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def validate_orchestrator_integration(self) -> ValidationResult:\n """Validate competitive orchestrator integration."""\n result = ValidationResult("Orchestrator Integration", "Enhanced Operations")\n start_time = time.time()\n \n try:\n orchestrator = CompetitiveIntelligenceOrchestrator(self.data_dir, self.logs_dir)\n \n # Test enhanced social media methods\n assert hasattr(orchestrator, 'run_social_media_backlog')\n assert hasattr(orchestrator, 'run_social_media_incremental')\n assert hasattr(orchestrator, 'run_platform_analysis')\n assert hasattr(orchestrator, 'get_social_media_status')\n \n # Test scraper initialization\n social_scrapers = {k: v for k, v in orchestrator.scrapers.items() \n if k.startswith(('youtube_', 'instagram_'))}\n \n # Test status methods\n status = orchestrator.get_social_media_status()\n assert 'total_social_media_scrapers' in status\n assert 'youtube_scrapers' in status\n assert 'instagram_scrapers' in status\n \n # Test competitor listing\n competitors = orchestrator.list_available_competitors()\n assert 'by_platform' in competitors\n assert 'total_scrapers' in competitors\n \n result.success("Orchestrator integration validated",\n social_media_methods=4,\n status_methods=2,\n scraper_management=True,\n error_handling_enhanced=True\n )\n \n except Exception as e:\n result.failure(f"Orchestrator validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def validate_cli_enhancements(self) -> ValidationResult:\n """Validate CLI script enhancements."""\n result = ValidationResult("CLI Enhancement", "Command Interface")\n start_time = time.time()\n \n try:\n # Read and validate CLI script\n cli_path = Path(__file__).parent / "run_competitive_intelligence.py"\n if not cli_path.exists():\n result.failure("CLI script not found")\n return result\n \n cli_content = cli_path.read_text()\n \n # Check for enhanced operations\n required_operations = [\n 'social-backlog', 'social-incremental', 'platform-analysis', \n 'test-integration'\n ]\n \n operations_found = []\n for op in required_operations:\n if op in cli_content:\n operations_found.append(op)\n \n # Check for enhanced error handling\n exception_handling = [\n 'ConfigurationError', 'QuotaExceededError', 'RateLimitError',\n 'YouTubeAPIError', 'InstagramError'\n ]\n \n error_handling_found = []\n for exc in exception_handling:\n if exc in cli_content:\n error_handling_found.append(exc)\n \n # Check for enhanced output formatting\n enhanced_features = [\n 'rate_limited', 'platform_error', 'retry_recommended'\n ]\n \n features_found = []\n for feature in enhanced_features:\n if feature in cli_content:\n features_found.append(feature)\n \n if len(operations_found) >= 3 and len(error_handling_found) >= 4:\n result.success("CLI enhancements validated",\n enhanced_operations=len(operations_found),\n exception_handling=len(error_handling_found),\n enhanced_features=len(features_found)\n )\n else:\n result.failure("CLI enhancements incomplete",\n operations_found=operations_found,\n error_handling_found=error_handling_found\n )\n \n except Exception as e:\n result.failure(f"CLI validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def validate_error_recovery(self) -> ValidationResult:\n """Validate error recovery and resilience."""\n result = ValidationResult("Error Recovery", "Resilience Testing")\n start_time = time.time()\n \n try:\n recovery_tests = 0\n passed_tests = 0\n \n # Test 1: Invalid competitor key handling\n recovery_tests += 1\n try:\n YouTubeCompetitiveScraper(self.data_dir, self.logs_dir, \"invalid_competitor\")\n result.warning("Should have raised ConfigurationError")\n except ConfigurationError:\n passed_tests += 1\n except Exception as e:\n result.warning(f"Wrong exception type for invalid competitor: {e}")\n \n # Test 2: Missing credentials handling\n recovery_tests += 1\n try:\n # Temporarily clear environment\n import os\n original_key = os.environ.get('YOUTUBE_API_KEY')\n if 'YOUTUBE_API_KEY' in os.environ:\n del os.environ['YOUTUBE_API_KEY']\n \n try:\n YouTubeCompetitiveScraper(self.data_dir, self.logs_dir, \"ac_service_tech\")\n result.warning("Should have raised ConfigurationError for missing API key")\n except ConfigurationError:\n passed_tests += 1\n finally:\n if original_key:\n os.environ['YOUTUBE_API_KEY'] = original_key\n \n except Exception as e:\n result.warning(f"Error in credentials test: {e}")\n \n # Test 3: Context manager cleanup\n recovery_tests += 1\n try:\n scraper_manager = InstagramScraperManager(self.data_dir, self.logs_dir)\n with scraper_manager:\n # Test that manager works\n assert hasattr(scraper_manager, 'active_scrapers')\n passed_tests += 1\n \n except Exception as e:\n result.warning(f"Context manager test failed: {e}")\n \n success_rate = (passed_tests / recovery_tests) * 100 if recovery_tests > 0 else 0\n \n if success_rate >= 66: # At least 2/3 tests should pass\n result.success(f"Error recovery validated ({success_rate:.0f}% success rate)",\n tests_run=recovery_tests,\n tests_passed=passed_tests,\n success_rate=f"{success_rate:.1f}%"\n )\n else:\n result.failure(f"Error recovery insufficient ({success_rate:.0f}% success rate)")\n \n except Exception as e:\n result.failure(f"Error recovery validation failed: {e}")\n \n result.duration = time.time() - start_time\n return result\n \n def run_all_validations(self, quick_mode: bool = False) -> List[ValidationResult]:\n """Run all validation tests."""\n validations = [\n ("Module Imports", self.validate_imports),\n ("Exception Hierarchy", self.validate_exception_hierarchy),\n ("Type System", self.validate_type_system),\n ("YouTube Integration", self.validate_youtube_scraper_integration),\n ("Instagram Integration", self.validate_instagram_scraper_integration),\n ("Orchestrator Integration", self.validate_orchestrator_integration),\n ("CLI Enhancements", self.validate_cli_enhancements),\n ]\n \n if not quick_mode:\n validations.append(("Error Recovery", self.validate_error_recovery))\n \n for name, validation_func in validations:\n print(f"🔍 Running {name}...", end=" ")\n try:\n result = validation_func()\n self.results.append(result)\n \n if result.passed:\n print(f"✅ PASSED ({result.duration:.2f}s)")\n if self.verbose and result.details:\n for key, value in result.details.items():\n print(f" 📊 {key}: {value}")\n else:\n print(f"❌ FAILED ({result.duration:.2f}s)")\n print(f" 💬 {result.message}")\n \n for warning in result.warnings:\n print(f" ⚠️ {warning}")\n \n except Exception as e:\n error_result = ValidationResult(\"System\", name)\n error_result.failure(f\"Validation error: {e}\")\n error_result.duration = 0\n self.results.append(error_result)\n print(f\"💥 ERROR: {e}\")\n \n if self.verbose:\n traceback.print_exc()\n \n return self.results\n \n def generate_report(self) -> Dict[str, Any]:\n """Generate comprehensive validation report."""\n total_tests = len(self.results)\n passed_tests = sum(1 for r in self.results if r.passed)\n total_duration = sum(r.duration for r in self.results)\n \n categories = {}\n for result in self.results:\n if result.category not in categories:\n categories[result.category] = {'total': 0, 'passed': 0, 'tests': []}\n \n categories[result.category]['total'] += 1\n if result.passed:\n categories[result.category]['passed'] += 1\n categories[result.category]['tests'].append({\n 'name': result.test_name,\n 'passed': result.passed,\n 'message': result.message,\n 'duration': result.duration,\n 'warnings': result.warnings,\n 'details': result.details\n })\n \n return {\n 'timestamp': datetime.now().isoformat(),\n 'summary': {\n 'total_tests': total_tests,\n 'passed_tests': passed_tests,\n 'success_rate': (passed_tests / total_tests * 100) if total_tests > 0 else 0,\n 'total_duration': total_duration,\n 'phase2_ready': passed_tests >= total_tests * 0.8 # 80% pass rate\n },\n 'categories': categories\n }\n\n\ndef main():\n \"\"\"Main validation runner.\"\"\"\n parser = argparse.ArgumentParser(\n description='Phase 2 Integration Validation',\n formatter_class=argparse.RawDescriptionHelpFormatter,\n epilog=\"\"\"\nValidation Categories:\n 🏗️ Architecture - Module imports and structure\n 🐍 Python Best Practices - Exception handling, type system\n 🎥 YouTube Integration - Scraper functionality\n 📱 Instagram Integration - Scraper functionality \n 🎛️ Orchestrator Integration - Enhanced operations\n 💻 CLI Enhancement - Command interface improvements\n 🛡️ Error Recovery - Resilience testing\n\nExit Codes:\n 0 - All validations passed (Phase 2 ready)\n 1 - Critical validations failed\n 2 - Some validations failed but system functional\n \"\"\"\n )\n \n parser.add_argument('--verbose', action='store_true', \n help='Show detailed validation output')\n parser.add_argument('--quick', action='store_true',\n help='Skip time-consuming validations')\n parser.add_argument('--data-dir', type=Path, default=Path('data'),\n help='Data directory (default: ./data)')\n parser.add_argument('--logs-dir', type=Path, default=Path('logs'),\n help='Logs directory (default: ./logs)')\n parser.add_argument('--report', type=Path,\n help='Save detailed report to file')\n \n args = parser.parse_args()\n \n # Ensure directories exist\n args.data_dir.mkdir(exist_ok=True)\n args.logs_dir.mkdir(exist_ok=True)\n \n print(\"🚀 Phase 2 Social Media Competitive Intelligence Integration Validation\")\n print(\"=\" * 80)\n print(f\"📅 Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n print(f\"📁 Data directory: {args.data_dir}\")\n print(f\"📄 Logs directory: {args.logs_dir}\")\n print(f\"⚡ Mode: {'Quick' if args.quick else 'Comprehensive'}\")\n print(\"=\" * 80)\n \n # Run validation\n start_time = time.time()\n validator = Phase2Validator(args.data_dir, args.logs_dir, args.verbose)\n \n try:\n results = validator.run_all_validations(args.quick)\n report = validator.generate_report()\n \n # Print summary\n print(\"\\n\" + \"=\" * 80)\n print(\"📋 VALIDATION SUMMARY\")\n print(\"=\" * 80)\n \n summary = report['summary']\n print(f\"📊 Tests: {summary['passed_tests']}/{summary['total_tests']} passed \"\n f\"({summary['success_rate']:.1f}% success rate)\")\n print(f\"⏱️ Duration: {summary['total_duration']:.2f} seconds\")\n \n # Category breakdown\n for category, stats in report['categories'].items():\n success_rate = (stats['passed'] / stats['total'] * 100) if stats['total'] > 0 else 0\n icon = \"✅\" if success_rate == 100 else \"⚠️\" if success_rate >= 50 else \"❌\"\n print(f\"{icon} {category}: {stats['passed']}/{stats['total']} ({success_rate:.0f}%)\")\n \n # Phase 2 readiness\n if summary['phase2_ready']:\n print(\"\\n🎉 Phase 2 Integration VALIDATED - System Ready for Production!\")\n print(\"✨ Enhanced competitive intelligence features are fully integrated.\")\n exit_code = 0\n else:\n failed_critical = any(\n not result.passed and result.category in ['Architecture', 'Python Best Practices']\n for result in results\n )\n \n if failed_critical:\n print(\"\\n❌ Phase 2 Integration FAILED - Critical issues detected\")\n print(\"🔧 Please address the failed validations above.\")\n exit_code = 1\n else:\n print(\"\\n⚠️ Phase 2 Integration PARTIAL - Some features may be limited\")\n print(\"🔧 System is functional but some enhancements may not work optimally.\")\n exit_code = 2\n \n # Save report if requested\n if args.report:\n args.report.write_text(json.dumps(report, indent=2))\n print(f\"📄 Detailed report saved to: {args.report}\")\n \n print(f\"\\n⏱️ Total validation time: {time.time() - start_time:.2f} seconds\")\n print(\"=\"*80)\n \n sys.exit(exit_code)\n \n except KeyboardInterrupt:\n print(\"\\n⚠️ Validation interrupted by user\")\n sys.exit(130)\n except Exception as e:\n print(f\"\\n💥 Validation failed with error: {e}\")\n if args.verbose:\n traceback.print_exc()\n sys.exit(1)\n\n\nif __name__ == \"__main__\":\n main()