diff --git a/src/mailchimp_api_scraper_v2.py b/src/mailchimp_api_scraper_v2.py index 761b1de..6571e7d 100644 --- a/src/mailchimp_api_scraper_v2.py +++ b/src/mailchimp_api_scraper_v2.py @@ -234,16 +234,16 @@ class MailChimpAPIScraper(BaseScraper): content_data = self._fetch_campaign_content(campaign_id) if content_data: plain_text = content_data.get('plain_text', '') - # Clean the content - enriched_campaign['plain_text'] = self._clean_content(plain_text) - # If no plain text, convert HTML - if not enriched_campaign['plain_text'] and content_data.get('html'): - converted = self.convert_to_markdown( + # If no plain text, convert HTML first + if not plain_text and content_data.get('html'): + plain_text = self.convert_to_markdown( content_data['html'], content_type="text/html" ) - enriched_campaign['plain_text'] = self._clean_content(converted) + + # Clean the content (only once, after deciding on source) + enriched_campaign['plain_text'] = self._clean_content(plain_text) # Fetch metrics report_data = self._fetch_campaign_report(campaign_id)