hvac-kia-content/final_verification.py
Ben Reed daab901e35 refactor: Update naming convention from hvacknowitall to hkia
Major Changes:
- Updated all code references from hvacknowitall/hvacnkowitall to hkia
- Renamed all existing markdown files to use hkia_ prefix
- Updated configuration files, scrapers, and production scripts
- Modified systemd service descriptions to use HKIA
- Changed NAS sync path to /mnt/nas/hkia

Files Updated:
- 20+ source files updated with new naming convention
- 34 markdown files renamed to hkia_* format
- All ScraperConfig brand_name parameters now use 'hkia'
- Documentation updated to reflect new naming

Rationale:
- Shorter, cleaner filenames
- Consistent branding across all outputs
- Easier to type and reference
- Maintains same functionality with improved naming

Next Steps:
- Deploy updated services to production
- Update any external references to old naming
- Monitor scrapers to ensure proper operation

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-19 13:35:23 -03:00

94 lines
No EOL
3 KiB
Python

#!/usr/bin/env python3
"""
Final verification of the complete MailChimp processing flow
"""
import os
import requests
from dotenv import load_dotenv
import re
from markdownify import markdownify as md
load_dotenv()
def clean_content(content):
"""Replicate the exact _clean_content logic"""
if not content:
return content
patterns_to_remove = [
r'VIEW THIS EMAIL IN BROWSER[^\n]*\n?',
r'\(\*\|ARCHIVE\|\*\)[^\n]*\n?',
r'https://hvacknowitall\.com/?\n?',
r'Newsletter produced by Teal Maker[^\n]*\n?',
r'https://tealmaker\.com[^\n]*\n?',
r'Copyright \(C\)[^\n]*\n?',
r'\n{3,}',
]
cleaned = content
for pattern in patterns_to_remove:
cleaned = re.sub(pattern, '', cleaned, flags=re.MULTILINE | re.IGNORECASE)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
cleaned = cleaned.strip()
return cleaned
def test_complete_flow():
"""Test the complete processing flow for both working and empty campaigns"""
api_key = os.getenv('MAILCHIMP_API_KEY')
server = os.getenv('MAILCHIMP_SERVER_PREFIX', 'us10')
base_url = f"https://{server}.api.mailchimp.com/3.0"
headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}
# Test specific campaigns: one with content, one without
test_campaigns = [
{"id": "b2d24e152c", "name": "Has Content"},
{"id": "00ffe573c4", "name": "No Content"}
]
for campaign in test_campaigns:
campaign_id = campaign["id"]
campaign_name = campaign["name"]
print(f"\n{'='*60}")
print(f"TESTING CAMPAIGN: {campaign_name} ({campaign_id})")
print(f"{'='*60}")
# Step 1: Get content from API
response = requests.get(f"{base_url}/campaigns/{campaign_id}/content", headers=headers)
if response.status_code != 200:
print(f"API Error: {response.status_code}")
continue
content_data = response.json()
plain_text = content_data.get('plain_text', '')
html = content_data.get('html', '')
print(f"1. API Response:")
print(f" Plain Text Length: {len(plain_text)}")
print(f" HTML Length: {len(html)}")
# Step 2: Apply our processing logic (lines 236-246)
if not plain_text and html:
print(f"2. Converting HTML to Markdown...")
plain_text = md(html, heading_style="ATX", bullets="-")
print(f" Converted Length: {len(plain_text)}")
else:
print(f"2. Using Plain Text (no conversion needed)")
# Step 3: Clean content
cleaned_text = clean_content(plain_text)
print(f"3. After Cleaning:")
print(f" Final Length: {len(cleaned_text)}")
if cleaned_text:
preview = cleaned_text[:200].replace('\n', ' ')
print(f" Preview: {preview}...")
else:
print(f" Result: EMPTY (no content to display)")
if __name__ == "__main__":
test_complete_flow()