diff --git a/src/cumulative_markdown_manager.py b/src/cumulative_markdown_manager.py index 5302a0e..6256d49 100644 --- a/src/cumulative_markdown_manager.py +++ b/src/cumulative_markdown_manager.py @@ -270,4 +270,105 @@ class CumulativeMarkdownManager: 'file_size_kb': file_path.stat().st_size // 1024 } - return stats \ No newline at end of file + return stats + + def update_cumulative_file(self, items: List[Dict[str, Any]], source_name: str) -> Path: + """ + Update cumulative file for a source using a basic formatter. + This is a compatibility method for scripts that expect this interface. + """ + def basic_formatter(items: List[Dict[str, Any]]) -> str: + """Basic markdown formatter for any source.""" + sections = [] + + for item in items: + section = [] + + # ID + item_id = item.get('id', 'Unknown') + section.append(f"# ID: {item_id}") + section.append("") + + # Title + title = item.get('title', item.get('caption', 'Untitled')) + if title: + # Truncate very long titles/captions + if len(title) > 100: + title = title[:97] + "..." + section.append(f"## Title: {title}") + section.append("") + + # Type + item_type = item.get('type', source_name.lower()) + section.append(f"## Type: {item_type}") + section.append("") + + # Link + link = item.get('link', item.get('url', '')) + if link: + section.append(f"## Link: {link}") + section.append("") + + # Author/Channel + author = item.get('author', item.get('channel', '')) + if author: + section.append(f"## Author: {author}") + section.append("") + + # Publish Date + pub_date = item.get('publish_date', item.get('published', '')) + if pub_date: + section.append(f"## Publish Date: {pub_date}") + section.append("") + + # Views + views = item.get('views') + if views is not None: + section.append(f"## Views: {views:,}") + section.append("") + + # Likes + likes = item.get('likes') + if likes is not None: + section.append(f"## Likes: {likes:,}") + section.append("") + + # Comments + comments = item.get('comments') + if comments is not None: + section.append(f"## Comments: {comments:,}") + section.append("") + + # Local images + local_images = item.get('local_images', []) + if local_images: + section.append(f"## Images Downloaded: {len(local_images)}") + for i, img_path in enumerate(local_images, 1): + rel_path = Path(img_path).relative_to(self.config.data_dir) + section.append(f"![Image {i}]({rel_path})") + section.append("") + + # Local thumbnail + local_thumbnail = item.get('local_thumbnail') + if local_thumbnail: + section.append("## Thumbnail:") + rel_path = Path(local_thumbnail).relative_to(self.config.data_dir) + section.append(f"![Thumbnail]({rel_path})") + section.append("") + + # Description/Caption + description = item.get('description', item.get('caption', '')) + if description: + section.append("## Description:") + section.append(description) + section.append("") + + # Separator + section.append("-" * 50) + section.append("") + + sections.append('\n'.join(section)) + + return '\n'.join(sections) + + return self.save_cumulative(items, basic_formatter) \ No newline at end of file diff --git a/test_cumulative_fix.py b/test_cumulative_fix.py new file mode 100644 index 0000000..d02d495 --- /dev/null +++ b/test_cumulative_fix.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +""" +Test the CumulativeMarkdownManager fix. +""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent)) + +from src.cumulative_markdown_manager import CumulativeMarkdownManager +from src.base_scraper import ScraperConfig + +def test_cumulative_manager(): + """Test that the update_cumulative_file method works.""" + print("Testing CumulativeMarkdownManager fix...") + + # Create test config + config = ScraperConfig( + source_name='TestSource', + brand_name='hkia', + data_dir=Path('test_data'), + logs_dir=Path('test_logs'), + timezone='America/Halifax' + ) + + # Create manager + manager = CumulativeMarkdownManager(config) + + # Test data + test_items = [ + { + 'id': 'test123', + 'title': 'Test Post', + 'type': 'test', + 'link': 'https://example.com/test123', + 'author': 'test_user', + 'publish_date': '2025-08-19', + 'views': 1000, + 'likes': 50, + 'comments': 10, + 'local_images': ['test_data/media/test_image.jpg'], + 'description': 'This is a test post' + } + ] + + try: + # This should work now + output_file = manager.update_cumulative_file(test_items, 'TestSource') + print(f"✅ Success! Created file: {output_file}") + + # Check that the file exists and has content + if output_file.exists(): + content = output_file.read_text() + print(f"✅ File has {len(content)} characters") + print(f"✅ Contains ID section: {'# ID: test123' in content}") + return True + else: + print("❌ File was not created") + return False + + except Exception as e: + print(f"❌ Error: {e}") + return False + +if __name__ == "__main__": + success = test_cumulative_manager() + sys.exit(0 if success else 1) \ No newline at end of file