fix: Add missing update_cumulative_file method to CumulativeMarkdownManager

The method was being called by multiple scripts but didn't exist, causing Instagram
capture to fail at post 1200. Added a compatibility method that uses a basic
formatter to handle any source type with standard fields like ID, title, views,
likes, images, etc.

Tested successfully with test script.
This commit is contained in:
Ben Reed 2025-08-19 15:02:36 -03:00
parent 7e5377e7b1
commit 299eb35910
2 changed files with 169 additions and 1 deletions

View file

@ -271,3 +271,104 @@ class CumulativeMarkdownManager:
} }
return stats return stats
def update_cumulative_file(self, items: List[Dict[str, Any]], source_name: str) -> Path:
"""
Update cumulative file for a source using a basic formatter.
This is a compatibility method for scripts that expect this interface.
"""
def basic_formatter(items: List[Dict[str, Any]]) -> str:
"""Basic markdown formatter for any source."""
sections = []
for item in items:
section = []
# ID
item_id = item.get('id', 'Unknown')
section.append(f"# ID: {item_id}")
section.append("")
# Title
title = item.get('title', item.get('caption', 'Untitled'))
if title:
# Truncate very long titles/captions
if len(title) > 100:
title = title[:97] + "..."
section.append(f"## Title: {title}")
section.append("")
# Type
item_type = item.get('type', source_name.lower())
section.append(f"## Type: {item_type}")
section.append("")
# Link
link = item.get('link', item.get('url', ''))
if link:
section.append(f"## Link: {link}")
section.append("")
# Author/Channel
author = item.get('author', item.get('channel', ''))
if author:
section.append(f"## Author: {author}")
section.append("")
# Publish Date
pub_date = item.get('publish_date', item.get('published', ''))
if pub_date:
section.append(f"## Publish Date: {pub_date}")
section.append("")
# Views
views = item.get('views')
if views is not None:
section.append(f"## Views: {views:,}")
section.append("")
# Likes
likes = item.get('likes')
if likes is not None:
section.append(f"## Likes: {likes:,}")
section.append("")
# Comments
comments = item.get('comments')
if comments is not None:
section.append(f"## Comments: {comments:,}")
section.append("")
# Local images
local_images = item.get('local_images', [])
if local_images:
section.append(f"## Images Downloaded: {len(local_images)}")
for i, img_path in enumerate(local_images, 1):
rel_path = Path(img_path).relative_to(self.config.data_dir)
section.append(f"![Image {i}]({rel_path})")
section.append("")
# Local thumbnail
local_thumbnail = item.get('local_thumbnail')
if local_thumbnail:
section.append("## Thumbnail:")
rel_path = Path(local_thumbnail).relative_to(self.config.data_dir)
section.append(f"![Thumbnail]({rel_path})")
section.append("")
# Description/Caption
description = item.get('description', item.get('caption', ''))
if description:
section.append("## Description:")
section.append(description)
section.append("")
# Separator
section.append("-" * 50)
section.append("")
sections.append('\n'.join(section))
return '\n'.join(sections)
return self.save_cumulative(items, basic_formatter)

67
test_cumulative_fix.py Normal file
View file

@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""
Test the CumulativeMarkdownManager fix.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from src.cumulative_markdown_manager import CumulativeMarkdownManager
from src.base_scraper import ScraperConfig
def test_cumulative_manager():
"""Test that the update_cumulative_file method works."""
print("Testing CumulativeMarkdownManager fix...")
# Create test config
config = ScraperConfig(
source_name='TestSource',
brand_name='hkia',
data_dir=Path('test_data'),
logs_dir=Path('test_logs'),
timezone='America/Halifax'
)
# Create manager
manager = CumulativeMarkdownManager(config)
# Test data
test_items = [
{
'id': 'test123',
'title': 'Test Post',
'type': 'test',
'link': 'https://example.com/test123',
'author': 'test_user',
'publish_date': '2025-08-19',
'views': 1000,
'likes': 50,
'comments': 10,
'local_images': ['test_data/media/test_image.jpg'],
'description': 'This is a test post'
}
]
try:
# This should work now
output_file = manager.update_cumulative_file(test_items, 'TestSource')
print(f"✅ Success! Created file: {output_file}")
# Check that the file exists and has content
if output_file.exists():
content = output_file.read_text()
print(f"✅ File has {len(content)} characters")
print(f"✅ Contains ID section: {'# ID: test123' in content}")
return True
else:
print("❌ File was not created")
return False
except Exception as e:
print(f"❌ Error: {e}")
return False
if __name__ == "__main__":
success = test_cumulative_manager()
sys.exit(0 if success else 1)