hvac-kia-content/resume_instagram_capture.py
Ben Reed 0a795437a7 Optimize Instagram scraper and increase capture targets to 1000
- Increased Instagram rate limit from 100 to 200 posts/hour
- Reduced delays: 10-20s (was 15-30s), extended breaks 30-60s (was 60-120s)
- Extended break interval: every 10 requests (was 5)
- Updated capture targets: 1000 posts for Instagram, 1000 videos for TikTok
- Added production deployment and monitoring scripts
- Created environment configuration template

This provides ~40-50% speed improvement for Instagram scraping and
captures 5x more Instagram content and 3.3x more TikTok content.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-18 22:59:11 -03:00

74 lines
No EOL
2.5 KiB
Python

#!/usr/bin/env python3
"""
Resume Instagram and TikTok capture with updated rate limits
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from production_backlog_capture import ProductionBacklogCapture
import logging
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('instagram_resume.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def main():
"""Resume Instagram and TikTok capture"""
logger.info("🚀 Resuming Instagram capture with updated rate limits")
logger.info("New settings: 200 posts/hour, 10-20 second delays")
logger.info("=" * 60)
# Initialize capture with existing data directory
capture = ProductionBacklogCapture(Path("data_production_backlog"))
# Capture Instagram with updated settings (already has 40 posts fetched)
logger.info("Starting Instagram capture - targeting 1000 posts...")
instagram_result = capture.capture_source_backlog("instagram", 1000)
if instagram_result["success"]:
logger.info(f"✅ Instagram completed: {instagram_result['items']} items")
# Continue with TikTok
logger.info("\nStarting TikTok capture with captions - targeting 1000 videos...")
tiktok_result = capture.capture_source_backlog("tiktok", 1000)
if tiktok_result["success"]:
logger.info(f"✅ TikTok completed: {tiktok_result['items']} items")
else:
logger.error(f"❌ TikTok failed: {tiktok_result.get('error', 'Unknown error')}")
else:
logger.error(f"❌ Instagram failed: {instagram_result.get('error', 'Unknown error')}")
# Sync to NAS if successful
if instagram_result.get("success") or tiktok_result.get("success"):
logger.info("\nSyncing to NAS...")
nas_success = capture.sync_to_nas()
logger.info(f"NAS sync: {'' if nas_success else ''}")
# Summary
logger.info("\n" + "=" * 60)
logger.info("📊 CAPTURE SUMMARY")
logger.info(f"Instagram: {instagram_result.get('items', 0)} items")
logger.info(f"TikTok: {tiktok_result.get('items', 0)} items")
return True
if __name__ == "__main__":
try:
success = main()
sys.exit(0 if success else 1)
except KeyboardInterrupt:
logger.info("\nCapture interrupted by user")
sys.exit(1)
except Exception as e:
logger.critical(f"Capture failed: {e}")
sys.exit(2)