hvac-kia-content/create_instagram_incremental.py
Ben Reed daab901e35 refactor: Update naming convention from hvacknowitall to hkia
Major Changes:
- Updated all code references from hvacknowitall/hvacnkowitall to hkia
- Renamed all existing markdown files to use hkia_ prefix
- Updated configuration files, scrapers, and production scripts
- Modified systemd service descriptions to use HKIA
- Changed NAS sync path to /mnt/nas/hkia

Files Updated:
- 20+ source files updated with new naming convention
- 34 markdown files renamed to hkia_* format
- All ScraperConfig brand_name parameters now use 'hkia'
- Documentation updated to reflect new naming

Rationale:
- Shorter, cleaner filenames
- Consistent branding across all outputs
- Easier to type and reference
- Maintains same functionality with improved naming

Next Steps:
- Deploy updated services to production
- Update any external references to old naming
- Monitor scrapers to ensure proper operation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-19 13:35:23 -03:00

122 lines
No EOL
4.3 KiB
Python

#!/usr/bin/env python3
"""
Create incremental Instagram markdown file from running process without losing progress.
This script safely generates output from whatever the running Instagram scraper has collected so far.
"""
import os
import sys
import time
from pathlib import Path
from datetime import datetime
import pytz
from dotenv import load_dotenv
# Add src to path
sys.path.insert(0, str(Path(__file__).parent / 'src'))
from base_scraper import ScraperConfig
from instagram_scraper import InstagramScraper
def create_incremental_output():
"""Create incremental output without interfering with running process."""
print("=== INSTAGRAM INCREMENTAL OUTPUT ===")
print("Safely creating incremental markdown without stopping running process")
print()
# Load environment
load_dotenv()
# Check if Instagram scraper is running
import subprocess
result = subprocess.run(
["ps", "aux"],
capture_output=True,
text=True
)
instagram_running = False
for line in result.stdout.split('\n'):
if 'instagram_scraper' in line.lower() and 'python' in line and 'grep' not in line:
instagram_running = True
print(f"✓ Found running Instagram scraper: {line.strip()}")
break
if not instagram_running:
print("⚠️ No running Instagram scraper detected")
print(" This script is designed to work with a running scraper process")
return
# Get Atlantic timezone timestamp
tz = pytz.timezone('America/Halifax')
now = datetime.now(tz)
timestamp = now.strftime('%Y-%m-%dT%H%M%S')
print(f"Creating incremental output at: {now.strftime('%Y-%m-%d %H:%M:%S %Z')}")
print()
# Setup config - use temporary session to avoid conflicts
config = ScraperConfig(
source_name='instagram_incremental',
brand_name='hvacnkowitall',
data_dir=Path('data'),
logs_dir=Path('logs'),
timezone='America/Halifax'
)
try:
# Create a separate scraper instance with different session
scraper = InstagramScraper(config)
# Override session file to avoid conflicts with running process
scraper.session_file = scraper.session_file.parent / f'{scraper.username}_incremental.session'
print("Initializing separate Instagram connection for incremental output...")
# Try to create incremental output with limited posts to avoid rate limiting conflicts
print("Fetching recent posts for incremental output (max 20 to avoid conflicts)...")
# Fetch a small number of recent posts
items = scraper.fetch_content(max_posts=20)
if items:
# Format as markdown
markdown_content = scraper.format_markdown(items)
# Save with incremental naming
output_file = Path('data/markdown_current') / f'hvacnkowitall_instagram_incremental_{timestamp}.md'
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(markdown_content, encoding='utf-8')
print()
print("=" * 60)
print("INSTAGRAM INCREMENTAL OUTPUT CREATED")
print("=" * 60)
print(f"Posts captured: {len(items)}")
print(f"Output file: {output_file}")
print("=" * 60)
print()
print("NOTE: This is a sample of recent posts.")
print("The main backlog process is still running and will create")
print("a complete file with all 1000 posts when finished.")
else:
print("❌ No Instagram posts captured for incremental output")
print(" This may be due to rate limiting or session conflicts")
print(" The main backlog process should continue normally")
except Exception as e:
print(f"❌ Error creating incremental output: {e}")
print()
print("This is expected if the main Instagram process is using")
print("all available API quota. The main process will continue")
print("and create the complete output when finished.")
print()
print("To check progress of the main process:")
print(" tail -f logs/instagram.log")
if __name__ == "__main__":
create_incremental_output()