fix: Add missing update_cumulative_file method to CumulativeMarkdownManager
The method was being called by multiple scripts but didn't exist, causing Instagram capture to fail at post 1200. Added a compatibility method that uses a basic formatter to handle any source type with standard fields like ID, title, views, likes, images, etc. Tested successfully with test script.
This commit is contained in:
parent
7e5377e7b1
commit
299eb35910
2 changed files with 169 additions and 1 deletions
|
|
@ -270,4 +270,105 @@ class CumulativeMarkdownManager:
|
|||
'file_size_kb': file_path.stat().st_size // 1024
|
||||
}
|
||||
|
||||
return stats
|
||||
return stats
|
||||
|
||||
def update_cumulative_file(self, items: List[Dict[str, Any]], source_name: str) -> Path:
|
||||
"""
|
||||
Update cumulative file for a source using a basic formatter.
|
||||
This is a compatibility method for scripts that expect this interface.
|
||||
"""
|
||||
def basic_formatter(items: List[Dict[str, Any]]) -> str:
|
||||
"""Basic markdown formatter for any source."""
|
||||
sections = []
|
||||
|
||||
for item in items:
|
||||
section = []
|
||||
|
||||
# ID
|
||||
item_id = item.get('id', 'Unknown')
|
||||
section.append(f"# ID: {item_id}")
|
||||
section.append("")
|
||||
|
||||
# Title
|
||||
title = item.get('title', item.get('caption', 'Untitled'))
|
||||
if title:
|
||||
# Truncate very long titles/captions
|
||||
if len(title) > 100:
|
||||
title = title[:97] + "..."
|
||||
section.append(f"## Title: {title}")
|
||||
section.append("")
|
||||
|
||||
# Type
|
||||
item_type = item.get('type', source_name.lower())
|
||||
section.append(f"## Type: {item_type}")
|
||||
section.append("")
|
||||
|
||||
# Link
|
||||
link = item.get('link', item.get('url', ''))
|
||||
if link:
|
||||
section.append(f"## Link: {link}")
|
||||
section.append("")
|
||||
|
||||
# Author/Channel
|
||||
author = item.get('author', item.get('channel', ''))
|
||||
if author:
|
||||
section.append(f"## Author: {author}")
|
||||
section.append("")
|
||||
|
||||
# Publish Date
|
||||
pub_date = item.get('publish_date', item.get('published', ''))
|
||||
if pub_date:
|
||||
section.append(f"## Publish Date: {pub_date}")
|
||||
section.append("")
|
||||
|
||||
# Views
|
||||
views = item.get('views')
|
||||
if views is not None:
|
||||
section.append(f"## Views: {views:,}")
|
||||
section.append("")
|
||||
|
||||
# Likes
|
||||
likes = item.get('likes')
|
||||
if likes is not None:
|
||||
section.append(f"## Likes: {likes:,}")
|
||||
section.append("")
|
||||
|
||||
# Comments
|
||||
comments = item.get('comments')
|
||||
if comments is not None:
|
||||
section.append(f"## Comments: {comments:,}")
|
||||
section.append("")
|
||||
|
||||
# Local images
|
||||
local_images = item.get('local_images', [])
|
||||
if local_images:
|
||||
section.append(f"## Images Downloaded: {len(local_images)}")
|
||||
for i, img_path in enumerate(local_images, 1):
|
||||
rel_path = Path(img_path).relative_to(self.config.data_dir)
|
||||
section.append(f"")
|
||||
section.append("")
|
||||
|
||||
# Local thumbnail
|
||||
local_thumbnail = item.get('local_thumbnail')
|
||||
if local_thumbnail:
|
||||
section.append("## Thumbnail:")
|
||||
rel_path = Path(local_thumbnail).relative_to(self.config.data_dir)
|
||||
section.append(f"")
|
||||
section.append("")
|
||||
|
||||
# Description/Caption
|
||||
description = item.get('description', item.get('caption', ''))
|
||||
if description:
|
||||
section.append("## Description:")
|
||||
section.append(description)
|
||||
section.append("")
|
||||
|
||||
# Separator
|
||||
section.append("-" * 50)
|
||||
section.append("")
|
||||
|
||||
sections.append('\n'.join(section))
|
||||
|
||||
return '\n'.join(sections)
|
||||
|
||||
return self.save_cumulative(items, basic_formatter)
|
||||
67
test_cumulative_fix.py
Normal file
67
test_cumulative_fix.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test the CumulativeMarkdownManager fix.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from src.cumulative_markdown_manager import CumulativeMarkdownManager
|
||||
from src.base_scraper import ScraperConfig
|
||||
|
||||
def test_cumulative_manager():
|
||||
"""Test that the update_cumulative_file method works."""
|
||||
print("Testing CumulativeMarkdownManager fix...")
|
||||
|
||||
# Create test config
|
||||
config = ScraperConfig(
|
||||
source_name='TestSource',
|
||||
brand_name='hkia',
|
||||
data_dir=Path('test_data'),
|
||||
logs_dir=Path('test_logs'),
|
||||
timezone='America/Halifax'
|
||||
)
|
||||
|
||||
# Create manager
|
||||
manager = CumulativeMarkdownManager(config)
|
||||
|
||||
# Test data
|
||||
test_items = [
|
||||
{
|
||||
'id': 'test123',
|
||||
'title': 'Test Post',
|
||||
'type': 'test',
|
||||
'link': 'https://example.com/test123',
|
||||
'author': 'test_user',
|
||||
'publish_date': '2025-08-19',
|
||||
'views': 1000,
|
||||
'likes': 50,
|
||||
'comments': 10,
|
||||
'local_images': ['test_data/media/test_image.jpg'],
|
||||
'description': 'This is a test post'
|
||||
}
|
||||
]
|
||||
|
||||
try:
|
||||
# This should work now
|
||||
output_file = manager.update_cumulative_file(test_items, 'TestSource')
|
||||
print(f"✅ Success! Created file: {output_file}")
|
||||
|
||||
# Check that the file exists and has content
|
||||
if output_file.exists():
|
||||
content = output_file.read_text()
|
||||
print(f"✅ File has {len(content)} characters")
|
||||
print(f"✅ Contains ID section: {'# ID: test123' in content}")
|
||||
return True
|
||||
else:
|
||||
print("❌ File was not created")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_cumulative_manager()
|
||||
sys.exit(0 if success else 1)
|
||||
Loading…
Reference in a new issue