Production Readiness Improvements: - Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM) - Enabled NAS synchronization in production runner with error handling - Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md) - Made systemd services portable (removed hardcoded user/paths) - Added environment variable validation on startup - Moved DISPLAY/XAUTHORITY to .env configuration Systemd Improvements: - Created template service file (@.service) for any user - Changed all paths to /opt/hvac-kia-content - Updated installation script for portable deployment - Fixed service dependencies and resource limits Documentation: - Created comprehensive PRODUCTION_TODO.md with 25 tasks - Added PRODUCTION_GUIDE.md with deployment instructions - Documented spec compliance gaps (65% complete) Remaining work includes retry logic, connection pooling, media downloads, and pytest test suite as documented in PRODUCTION_TODO.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
118 lines
No EOL
3 KiB
Python
118 lines
No EOL
3 KiB
Python
"""
|
|
Production configuration for HVAC Know It All Content Aggregator
|
|
"""
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import os
|
|
|
|
# Base directories
|
|
BASE_DIR = Path("/opt/hvac-kia-content")
|
|
DATA_DIR = BASE_DIR / "data"
|
|
LOGS_DIR = BASE_DIR / "logs"
|
|
STATE_DIR = BASE_DIR / "state"
|
|
|
|
# Ensure directories exist
|
|
for dir_path in [DATA_DIR, LOGS_DIR, STATE_DIR]:
|
|
dir_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Scraper configurations
|
|
SCRAPERS_CONFIG = {
|
|
"youtube": {
|
|
"enabled": True,
|
|
"max_videos": 20,
|
|
"incremental": True,
|
|
"schedule": "0 8,12 * * *" # 8 AM and 12 PM daily (as per spec)
|
|
},
|
|
"wordpress": {
|
|
"enabled": True,
|
|
"max_posts": 20,
|
|
"incremental": True,
|
|
"schedule": "0 6,18 * * *"
|
|
},
|
|
"instagram": {
|
|
"enabled": True,
|
|
"max_posts": 10, # Limited due to rate limiting
|
|
"incremental": True,
|
|
"schedule": "0 9 * * *" # Once daily at 9 AM (after main run)
|
|
},
|
|
"tiktok": {
|
|
"enabled": True,
|
|
"max_posts": 35,
|
|
"fetch_captions": False, # Disabled by default for speed
|
|
"max_caption_fetches": 5, # Only top 5 if enabled
|
|
"incremental": True,
|
|
"schedule": "0 6,18 * * *"
|
|
},
|
|
"mailchimp": {
|
|
"enabled": True,
|
|
"max_items": None, # RSS feed limited to 10 anyway
|
|
"incremental": True,
|
|
"schedule": "0 6,18 * * *"
|
|
},
|
|
"podcast": {
|
|
"enabled": True,
|
|
"max_items": 10,
|
|
"incremental": True,
|
|
"schedule": "0 6,18 * * *"
|
|
}
|
|
}
|
|
|
|
# TikTok special configuration for overnight caption fetching
|
|
TIKTOK_CAPTION_JOB = {
|
|
"enabled": False, # Enable if captions are critical
|
|
"schedule": "0 2 * * *", # 2 AM daily
|
|
"max_posts": 20,
|
|
"max_caption_fetches": 20,
|
|
"timeout_minutes": 60
|
|
}
|
|
|
|
# Performance settings
|
|
PARALLEL_PROCESSING = {
|
|
"enabled": True,
|
|
"max_workers": 3, # Conservative to avoid overwhelming APIs
|
|
"exclude": ["tiktok", "instagram"] # These require sequential processing
|
|
}
|
|
|
|
# Retry configuration
|
|
RETRY_CONFIG = {
|
|
"max_attempts": 3,
|
|
"initial_delay": 5,
|
|
"backoff_factor": 2,
|
|
"max_delay": 60
|
|
}
|
|
|
|
# Monitoring and alerting
|
|
MONITORING = {
|
|
"healthcheck_url": os.getenv("HEALTHCHECK_URL"),
|
|
"alert_email": os.getenv("ALERT_EMAIL"),
|
|
"metrics_enabled": True,
|
|
"metrics_port": 9090
|
|
}
|
|
|
|
# Output configuration
|
|
OUTPUT_CONFIG = {
|
|
"format": "markdown",
|
|
"combine_sources": True,
|
|
"output_file": DATA_DIR / f"combined_{datetime.now():%Y%m%d}.md",
|
|
"archive_days": 30, # Keep 30 days of history
|
|
"compress_archives": True
|
|
}
|
|
|
|
# Rate limiting (requests per hour)
|
|
RATE_LIMITS = {
|
|
"instagram": 20, # Very conservative
|
|
"tiktok": 100,
|
|
"youtube": 500,
|
|
"wordpress": 200,
|
|
"mailchimp": 100,
|
|
"podcast": 100
|
|
}
|
|
|
|
# Logging configuration
|
|
LOGGING = {
|
|
"level": "INFO",
|
|
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
"max_bytes": 10485760, # 10MB
|
|
"backup_count": 5,
|
|
"separate_errors": True
|
|
} |