hvac-kia-content/final_verification.py

#!/usr/bin/env python3
"""
Final verification of the complete MailChimp processing flow
"""

import os
import requests
from dotenv import load_dotenv
import re
from markdownify import markdownify as md

load_dotenv()

def clean_content(content):
    """Replicate the exact _clean_content logic"""
    if not content:
        return content

    patterns_to_remove = [
        r'VIEW THIS EMAIL IN BROWSER[^\n]*\n?',
        r'\(\*\|ARCHIVE\|\*\)[^\n]*\n?',
        r'https://hvacknowitall\.com/?\n?',
        r'Newsletter produced by Teal Maker[^\n]*\n?',
        r'https://tealmaker\.com[^\n]*\n?',
        r'Copyright \(C\)[^\n]*\n?',
        r'\n{3,}',
    ]

    cleaned = content
    for pattern in patterns_to_remove:
        cleaned = re.sub(pattern, '', cleaned, flags=re.MULTILINE | re.IGNORECASE)

    cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
    cleaned = cleaned.strip()
    return cleaned

def test_complete_flow():
    """Test the complete processing flow for both working and empty campaigns"""

    api_key = os.getenv('MAILCHIMP_API_KEY')
    server = os.getenv('MAILCHIMP_SERVER_PREFIX', 'us10')

    base_url = f"https://{server}.api.mailchimp.com/3.0"
    headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}

    # Test specific campaigns: one with content, one without
    test_campaigns = [
        {"id": "b2d24e152c", "name": "Has Content"},
        {"id": "00ffe573c4", "name": "No Content"}
    ]

    for campaign in test_campaigns:
        campaign_id = campaign["id"]
        campaign_name = campaign["name"]

        print(f"\n{'='*60}")
        print(f"TESTING CAMPAIGN: {campaign_name} ({campaign_id})")
        print(f"{'='*60}")

        # Step 1: Get content from API
        response = requests.get(f"{base_url}/campaigns/{campaign_id}/content", headers=headers)
        if response.status_code != 200:
            print(f"API Error: {response.status_code}")
            continue

        content_data = response.json()
        plain_text = content_data.get('plain_text', '')
        html = content_data.get('html', '')

        print(f"1. API Response:")
        print(f"   Plain Text Length: {len(plain_text)}")
        print(f"   HTML Length: {len(html)}")

        # Step 2: Apply our processing logic (lines 236-246)
        if not plain_text and html:
            print(f"2. Converting HTML to Markdown...")
            plain_text = md(html, heading_style="ATX", bullets="-")
            print(f"   Converted Length: {len(plain_text)}")
        else:
            print(f"2. Using Plain Text (no conversion needed)")

        # Step 3: Clean content
        cleaned_text = clean_content(plain_text)
        print(f"3. After Cleaning:")
        print(f"   Final Length: {len(cleaned_text)}")

        if cleaned_text:
            preview = cleaned_text[:200].replace('\n', ' ')
            print(f"   Preview: {preview}...")
        else:
            print(f"   Result: EMPTY (no content to display)")

if __name__ == "__main__":
    test_complete_flow()