Major Changes: - Updated all code references from hvacknowitall/hvacnkowitall to hkia - Renamed all existing markdown files to use hkia_ prefix - Updated configuration files, scrapers, and production scripts - Modified systemd service descriptions to use HKIA - Changed NAS sync path to /mnt/nas/hkia Files Updated: - 20+ source files updated with new naming convention - 34 markdown files renamed to hkia_* format - All ScraperConfig brand_name parameters now use 'hkia' - Documentation updated to reflect new naming Rationale: - Shorter, cleaner filenames - Consistent branding across all outputs - Easier to type and reference - Maintains same functionality with improved naming Next Steps: - Deploy updated services to production - Update any external references to old naming - Monitor scrapers to ensure proper operation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
94 lines
No EOL
3 KiB
Python
94 lines
No EOL
3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Final verification of the complete MailChimp processing flow
|
|
"""
|
|
|
|
import os
|
|
import requests
|
|
from dotenv import load_dotenv
|
|
import re
|
|
from markdownify import markdownify as md
|
|
|
|
load_dotenv()
|
|
|
|
def clean_content(content):
|
|
"""Replicate the exact _clean_content logic"""
|
|
if not content:
|
|
return content
|
|
|
|
patterns_to_remove = [
|
|
r'VIEW THIS EMAIL IN BROWSER[^\n]*\n?',
|
|
r'\(\*\|ARCHIVE\|\*\)[^\n]*\n?',
|
|
r'https://hvacknowitall\.com/?\n?',
|
|
r'Newsletter produced by Teal Maker[^\n]*\n?',
|
|
r'https://tealmaker\.com[^\n]*\n?',
|
|
r'Copyright \(C\)[^\n]*\n?',
|
|
r'\n{3,}',
|
|
]
|
|
|
|
cleaned = content
|
|
for pattern in patterns_to_remove:
|
|
cleaned = re.sub(pattern, '', cleaned, flags=re.MULTILINE | re.IGNORECASE)
|
|
|
|
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
|
|
cleaned = cleaned.strip()
|
|
return cleaned
|
|
|
|
def test_complete_flow():
|
|
"""Test the complete processing flow for both working and empty campaigns"""
|
|
|
|
api_key = os.getenv('MAILCHIMP_API_KEY')
|
|
server = os.getenv('MAILCHIMP_SERVER_PREFIX', 'us10')
|
|
|
|
base_url = f"https://{server}.api.mailchimp.com/3.0"
|
|
headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}
|
|
|
|
# Test specific campaigns: one with content, one without
|
|
test_campaigns = [
|
|
{"id": "b2d24e152c", "name": "Has Content"},
|
|
{"id": "00ffe573c4", "name": "No Content"}
|
|
]
|
|
|
|
for campaign in test_campaigns:
|
|
campaign_id = campaign["id"]
|
|
campaign_name = campaign["name"]
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"TESTING CAMPAIGN: {campaign_name} ({campaign_id})")
|
|
print(f"{'='*60}")
|
|
|
|
# Step 1: Get content from API
|
|
response = requests.get(f"{base_url}/campaigns/{campaign_id}/content", headers=headers)
|
|
if response.status_code != 200:
|
|
print(f"API Error: {response.status_code}")
|
|
continue
|
|
|
|
content_data = response.json()
|
|
plain_text = content_data.get('plain_text', '')
|
|
html = content_data.get('html', '')
|
|
|
|
print(f"1. API Response:")
|
|
print(f" Plain Text Length: {len(plain_text)}")
|
|
print(f" HTML Length: {len(html)}")
|
|
|
|
# Step 2: Apply our processing logic (lines 236-246)
|
|
if not plain_text and html:
|
|
print(f"2. Converting HTML to Markdown...")
|
|
plain_text = md(html, heading_style="ATX", bullets="-")
|
|
print(f" Converted Length: {len(plain_text)}")
|
|
else:
|
|
print(f"2. Using Plain Text (no conversion needed)")
|
|
|
|
# Step 3: Clean content
|
|
cleaned_text = clean_content(plain_text)
|
|
print(f"3. After Cleaning:")
|
|
print(f" Final Length: {len(cleaned_text)}")
|
|
|
|
if cleaned_text:
|
|
preview = cleaned_text[:200].replace('\n', ' ')
|
|
print(f" Preview: {preview}...")
|
|
else:
|
|
print(f" Result: EMPTY (no content to display)")
|
|
|
|
if __name__ == "__main__":
|
|
test_complete_flow() |