- Increased Instagram rate limit from 100 to 200 posts/hour - Reduced delays: 10-20s (was 15-30s), extended breaks 30-60s (was 60-120s) - Extended break interval: every 10 requests (was 5) - Updated capture targets: 1000 posts for Instagram, 1000 videos for TikTok - Added production deployment and monitoring scripts - Created environment configuration template This provides ~40-50% speed improvement for Instagram scraping and captures 5x more Instagram content and 3.3x more TikTok content. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
74 lines
No EOL
2.5 KiB
Python
74 lines
No EOL
2.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Resume Instagram and TikTok capture with updated rate limits
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from production_backlog_capture import ProductionBacklogCapture
|
|
import logging
|
|
|
|
# Set up logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler('instagram_resume.log'),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def main():
|
|
"""Resume Instagram and TikTok capture"""
|
|
logger.info("🚀 Resuming Instagram capture with updated rate limits")
|
|
logger.info("New settings: 200 posts/hour, 10-20 second delays")
|
|
logger.info("=" * 60)
|
|
|
|
# Initialize capture with existing data directory
|
|
capture = ProductionBacklogCapture(Path("data_production_backlog"))
|
|
|
|
# Capture Instagram with updated settings (already has 40 posts fetched)
|
|
logger.info("Starting Instagram capture - targeting 1000 posts...")
|
|
instagram_result = capture.capture_source_backlog("instagram", 1000)
|
|
|
|
if instagram_result["success"]:
|
|
logger.info(f"✅ Instagram completed: {instagram_result['items']} items")
|
|
|
|
# Continue with TikTok
|
|
logger.info("\nStarting TikTok capture with captions - targeting 1000 videos...")
|
|
tiktok_result = capture.capture_source_backlog("tiktok", 1000)
|
|
|
|
if tiktok_result["success"]:
|
|
logger.info(f"✅ TikTok completed: {tiktok_result['items']} items")
|
|
else:
|
|
logger.error(f"❌ TikTok failed: {tiktok_result.get('error', 'Unknown error')}")
|
|
else:
|
|
logger.error(f"❌ Instagram failed: {instagram_result.get('error', 'Unknown error')}")
|
|
|
|
# Sync to NAS if successful
|
|
if instagram_result.get("success") or tiktok_result.get("success"):
|
|
logger.info("\nSyncing to NAS...")
|
|
nas_success = capture.sync_to_nas()
|
|
logger.info(f"NAS sync: {'✅' if nas_success else '❌'}")
|
|
|
|
# Summary
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("📊 CAPTURE SUMMARY")
|
|
logger.info(f"Instagram: {instagram_result.get('items', 0)} items")
|
|
logger.info(f"TikTok: {tiktok_result.get('items', 0)} items")
|
|
|
|
return True
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
success = main()
|
|
sys.exit(0 if success else 1)
|
|
except KeyboardInterrupt:
|
|
logger.info("\nCapture interrupted by user")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
logger.critical(f"Capture failed: {e}")
|
|
sys.exit(2) |