""" Production configuration for HVAC Know It All Content Aggregator """ from pathlib import Path from datetime import datetime import os # Base directories BASE_DIR = Path("/opt/hvac-kia-content") DATA_DIR = BASE_DIR / "data" LOGS_DIR = BASE_DIR / "logs" STATE_DIR = BASE_DIR / "state" # Ensure directories exist for dir_path in [DATA_DIR, LOGS_DIR, STATE_DIR]: dir_path.mkdir(parents=True, exist_ok=True) # Scraper configurations SCRAPERS_CONFIG = { "youtube": { "enabled": True, "max_videos": 20, "incremental": True, "schedule": "0 8,12 * * *" # 8 AM and 12 PM daily (as per spec) }, "wordpress": { "enabled": True, "max_posts": 20, "incremental": True, "schedule": "0 6,18 * * *" }, "instagram": { "enabled": True, "max_posts": 10, # Limited due to rate limiting "incremental": True, "schedule": "0 9 * * *" # Once daily at 9 AM (after main run) }, "tiktok": { "enabled": True, "max_posts": 35, "fetch_captions": False, # Disabled by default for speed "max_caption_fetches": 5, # Only top 5 if enabled "incremental": True, "schedule": "0 6,18 * * *" }, "mailchimp": { "enabled": True, "max_items": None, # RSS feed limited to 10 anyway "incremental": True, "schedule": "0 6,18 * * *" }, "podcast": { "enabled": True, "max_items": 10, "incremental": True, "schedule": "0 6,18 * * *" } } # TikTok special configuration for overnight caption fetching TIKTOK_CAPTION_JOB = { "enabled": False, # Enable if captions are critical "schedule": "0 2 * * *", # 2 AM daily "max_posts": 20, "max_caption_fetches": 20, "timeout_minutes": 60 } # Performance settings PARALLEL_PROCESSING = { "enabled": True, "max_workers": 3, # Conservative to avoid overwhelming APIs "exclude": ["tiktok", "instagram"] # These require sequential processing } # Retry configuration RETRY_CONFIG = { "max_attempts": 3, "initial_delay": 5, "backoff_factor": 2, "max_delay": 60 } # Monitoring and alerting MONITORING = { "healthcheck_url": os.getenv("HEALTHCHECK_URL"), "alert_email": os.getenv("ALERT_EMAIL"), "metrics_enabled": True, "metrics_port": 9090 } # Output configuration OUTPUT_CONFIG = { "format": "markdown", "combine_sources": True, "output_file": DATA_DIR / f"combined_{datetime.now():%Y%m%d}.md", "archive_days": 30, # Keep 30 days of history "compress_archives": True } # Rate limiting (requests per hour) RATE_LIMITS = { "instagram": 20, # Very conservative "tiktok": 100, "youtube": 500, "wordpress": 200, "mailchimp": 100, "podcast": 100 } # Logging configuration LOGGING = { "level": "INFO", "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", "max_bytes": 10485760, # 10MB "backup_count": 5, "separate_errors": True }