Production Readiness Improvements: - Fixed scheduling to match spec (8 AM & 12 PM ADT instead of 6 AM/6 PM) - Enabled NAS synchronization in production runner with error handling - Fixed file naming convention to spec format (hvacknowitall_combined_YYYY-MM-DD-THHMMSS.md) - Made systemd services portable (removed hardcoded user/paths) - Added environment variable validation on startup - Moved DISPLAY/XAUTHORITY to .env configuration Systemd Improvements: - Created template service file (@.service) for any user - Changed all paths to /opt/hvac-kia-content - Updated installation script for portable deployment - Fixed service dependencies and resource limits Documentation: - Created comprehensive PRODUCTION_TODO.md with 25 tasks - Added PRODUCTION_GUIDE.md with deployment instructions - Documented spec compliance gaps (65% complete) Remaining work includes retry logic, connection pooling, media downloads, and pytest test suite as documented in PRODUCTION_TODO.md 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
123 lines
No EOL
4.3 KiB
Python
123 lines
No EOL
4.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Debug WordPress raw content without conversion.
|
|
"""
|
|
|
|
import os
|
|
import requests
|
|
from requests.auth import HTTPBasicAuth
|
|
from dotenv import load_dotenv
|
|
import json
|
|
|
|
load_dotenv()
|
|
|
|
# Get credentials
|
|
api_url = os.getenv('WORDPRESS_API_URL')
|
|
username = os.getenv('WORDPRESS_USERNAME')
|
|
api_key = os.getenv('WORDPRESS_API_KEY')
|
|
|
|
print(f"API URL: {api_url}")
|
|
print(f"Username: {username}")
|
|
print(f"API Key: {api_key[:10]}..." if api_key else "No API key")
|
|
|
|
# Fetch just one post
|
|
url = f"{api_url}/posts"
|
|
params = {
|
|
'per_page': 1,
|
|
'page': 1,
|
|
'_embed': True
|
|
}
|
|
|
|
auth = HTTPBasicAuth(username, api_key) if username and api_key else None
|
|
|
|
print(f"\nFetching from: {url}")
|
|
print(f"Params: {params}")
|
|
|
|
response = requests.get(url, params=params, auth=auth)
|
|
print(f"Status: {response.status_code}")
|
|
|
|
if response.status_code == 200:
|
|
posts = response.json()
|
|
|
|
if posts:
|
|
post = posts[0]
|
|
|
|
# Save full post data
|
|
with open('test_data/wordpress_post_raw.json', 'w', encoding='utf-8') as f:
|
|
json.dump(post, f, indent=2, ensure_ascii=False)
|
|
print(f"\nSaved full post to test_data/wordpress_post_raw.json")
|
|
|
|
# Check the content field
|
|
if 'content' in post and 'rendered' in post['content']:
|
|
content = post['content']['rendered']
|
|
|
|
print(f"\nContent details:")
|
|
print(f" Type: {type(content)}")
|
|
print(f" Length: {len(content)} characters")
|
|
|
|
# Show first 500 chars
|
|
print(f"\nFirst 500 characters:")
|
|
print("-" * 50)
|
|
print(content[:500])
|
|
print("-" * 50)
|
|
|
|
# Look for problematic characters
|
|
print("\nChecking for special characters...")
|
|
special_chars = []
|
|
for i, char in enumerate(content):
|
|
if ord(char) > 127:
|
|
special_chars.append((i, char, f"U+{ord(char):04X}", char.encode('utf-8', errors='replace')))
|
|
|
|
if special_chars:
|
|
print(f"Found {len(special_chars)} non-ASCII characters")
|
|
print("First 10:")
|
|
for pos, char, unicode_point, utf8_bytes in special_chars[:10]:
|
|
print(f" Pos {pos}: '{char}' ({unicode_point}) = {utf8_bytes}")
|
|
|
|
# Save raw HTML content
|
|
with open('test_data/wordpress_content.html', 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
print(f"\nSaved raw HTML to test_data/wordpress_content.html")
|
|
|
|
# Test MarkItDown directly
|
|
print("\nTesting MarkItDown conversion...")
|
|
from markitdown import MarkItDown
|
|
import io
|
|
|
|
converter = MarkItDown()
|
|
|
|
# Try conversion
|
|
try:
|
|
# Create BytesIO with UTF-8 encoding
|
|
content_bytes = content.encode('utf-8')
|
|
print(f"Encoded to UTF-8: {len(content_bytes)} bytes")
|
|
|
|
stream = io.BytesIO(content_bytes)
|
|
print("Created BytesIO stream")
|
|
|
|
result = converter.convert_stream(stream)
|
|
print(f"Conversion result type: {type(result)}")
|
|
print(f"Has text_content: {hasattr(result, 'text_content')}")
|
|
|
|
if hasattr(result, 'text_content'):
|
|
md_content = result.text_content
|
|
print(f"Markdown length: {len(md_content)} characters")
|
|
|
|
# Save markdown
|
|
with open('test_data/wordpress_content.md', 'w', encoding='utf-8') as f:
|
|
f.write(md_content)
|
|
print("Saved markdown to test_data/wordpress_content.md")
|
|
|
|
# Show first 500 chars of markdown
|
|
print("\nFirst 500 chars of markdown:")
|
|
print("-" * 50)
|
|
print(md_content[:500])
|
|
|
|
except Exception as e:
|
|
print(f"❌ Conversion failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
else:
|
|
print(f"Failed to fetch posts: {response.status_code}")
|
|
print(response.text) |