hvac-kia-content/config/production.py
Ben Reed 05218a873b Fix critical production issues and improve spec compliance
Production Readiness Improvements:
- Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM)
- Enabled NAS synchronization in production runner with error handling
- Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md)
- Made systemd services portable (removed hardcoded user/paths)
- Added environment variable validation on startup
- Moved DISPLAY/XAUTHORITY to .env configuration

Systemd Improvements:
- Created template service file (@.service) for any user
- Changed all paths to /opt/hvac-kia-content
- Updated installation script for portable deployment
- Fixed service dependencies and resource limits

Documentation:
- Created comprehensive PRODUCTION_TODO.md with 25 tasks
- Added PRODUCTION_GUIDE.md with deployment instructions
- Documented spec compliance gaps (65% complete)

Remaining work includes retry logic, connection pooling, media downloads,
and pytest test suite as documented in PRODUCTION_TODO.md

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 20:07:55 -03:00

118 lines
No EOL
3 KiB
Python

"""
Production configuration for HVAC Know It All Content Aggregator
"""
from pathlib import Path
from datetime import datetime
import os
# Base directories
BASE_DIR = Path("/opt/hvac-kia-content")
DATA_DIR = BASE_DIR / "data"
LOGS_DIR = BASE_DIR / "logs"
STATE_DIR = BASE_DIR / "state"
# Ensure directories exist
for dir_path in [DATA_DIR, LOGS_DIR, STATE_DIR]:
dir_path.mkdir(parents=True, exist_ok=True)
# Scraper configurations
SCRAPERS_CONFIG = {
"youtube": {
"enabled": True,
"max_videos": 20,
"incremental": True,
"schedule": "0 8,12 * * *" # 8 AM and 12 PM daily (as per spec)
},
"wordpress": {
"enabled": True,
"max_posts": 20,
"incremental": True,
"schedule": "0 6,18 * * *"
},
"instagram": {
"enabled": True,
"max_posts": 10, # Limited due to rate limiting
"incremental": True,
"schedule": "0 9 * * *" # Once daily at 9 AM (after main run)
},
"tiktok": {
"enabled": True,
"max_posts": 35,
"fetch_captions": False, # Disabled by default for speed
"max_caption_fetches": 5, # Only top 5 if enabled
"incremental": True,
"schedule": "0 6,18 * * *"
},
"mailchimp": {
"enabled": True,
"max_items": None, # RSS feed limited to 10 anyway
"incremental": True,
"schedule": "0 6,18 * * *"
},
"podcast": {
"enabled": True,
"max_items": 10,
"incremental": True,
"schedule": "0 6,18 * * *"
}
}
# TikTok special configuration for overnight caption fetching
TIKTOK_CAPTION_JOB = {
"enabled": False, # Enable if captions are critical
"schedule": "0 2 * * *", # 2 AM daily
"max_posts": 20,
"max_caption_fetches": 20,
"timeout_minutes": 60
}
# Performance settings
PARALLEL_PROCESSING = {
"enabled": True,
"max_workers": 3, # Conservative to avoid overwhelming APIs
"exclude": ["tiktok", "instagram"] # These require sequential processing
}
# Retry configuration
RETRY_CONFIG = {
"max_attempts": 3,
"initial_delay": 5,
"backoff_factor": 2,
"max_delay": 60
}
# Monitoring and alerting
MONITORING = {
"healthcheck_url": os.getenv("HEALTHCHECK_URL"),
"alert_email": os.getenv("ALERT_EMAIL"),
"metrics_enabled": True,
"metrics_port": 9090
}
# Output configuration
OUTPUT_CONFIG = {
"format": "markdown",
"combine_sources": True,
"output_file": DATA_DIR / f"combined_{datetime.now():%Y%m%d}.md",
"archive_days": 30, # Keep 30 days of history
"compress_archives": True
}
# Rate limiting (requests per hour)
RATE_LIMITS = {
"instagram": 20, # Very conservative
"tiktok": 100,
"youtube": 500,
"wordpress": 200,
"mailchimp": 100,
"podcast": 100
}
# Logging configuration
LOGGING = {
"level": "INFO",
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
"max_bytes": 10485760, # 10MB
"backup_count": 5,
"separate_errors": True
}