#!/usr/bin/env python3
"""
Debug WordPress raw content without conversion.
"""

import os
import requests
from requests.auth import HTTPBasicAuth
from dotenv import load_dotenv
import json

load_dotenv()

# Get credentials
api_url = os.getenv('WORDPRESS_API_URL')
username = os.getenv('WORDPRESS_USERNAME')
api_key = os.getenv('WORDPRESS_API_KEY')

print(f"API URL: {api_url}")
print(f"Username: {username}")
print(f"API Key: {api_key[:10]}..." if api_key else "No API key")

# Fetch just one post
url = f"{api_url}/posts"
params = {
    'per_page': 1,
    'page': 1,
    '_embed': True
}

auth = HTTPBasicAuth(username, api_key) if username and api_key else None

print(f"\nFetching from: {url}")
print(f"Params: {params}")

response = requests.get(url, params=params, auth=auth)
print(f"Status: {response.status_code}")

if response.status_code == 200:
    posts = response.json()
    
    if posts:
        post = posts[0]
        
        # Save full post data
        with open('test_data/wordpress_post_raw.json', 'w', encoding='utf-8') as f:
            json.dump(post, f, indent=2, ensure_ascii=False)
        print(f"\nSaved full post to test_data/wordpress_post_raw.json")
        
        # Check the content field
        if 'content' in post and 'rendered' in post['content']:
            content = post['content']['rendered']
            
            print(f"\nContent details:")
            print(f"  Type: {type(content)}")
            print(f"  Length: {len(content)} characters")
            
            # Show first 500 chars
            print(f"\nFirst 500 characters:")
            print("-" * 50)
            print(content[:500])
            print("-" * 50)
            
            # Look for problematic characters
            print("\nChecking for special characters...")
            special_chars = []
            for i, char in enumerate(content):
                if ord(char) > 127:
                    special_chars.append((i, char, f"U+{ord(char):04X}", char.encode('utf-8', errors='replace')))
            
            if special_chars:
                print(f"Found {len(special_chars)} non-ASCII characters")
                print("First 10:")
                for pos, char, unicode_point, utf8_bytes in special_chars[:10]:
                    print(f"  Pos {pos}: '{char}' ({unicode_point}) = {utf8_bytes}")
            
            # Save raw HTML content
            with open('test_data/wordpress_content.html', 'w', encoding='utf-8') as f:
                f.write(content)
            print(f"\nSaved raw HTML to test_data/wordpress_content.html")
            
            # Test MarkItDown directly
            print("\nTesting MarkItDown conversion...")
            from markitdown import MarkItDown
            import io
            
            converter = MarkItDown()
            
            # Try conversion
            try:
                # Create BytesIO with UTF-8 encoding
                content_bytes = content.encode('utf-8')
                print(f"Encoded to UTF-8: {len(content_bytes)} bytes")
                
                stream = io.BytesIO(content_bytes)
                print("Created BytesIO stream")
                
                result = converter.convert_stream(stream)
                print(f"Conversion result type: {type(result)}")
                print(f"Has text_content: {hasattr(result, 'text_content')}")
                
                if hasattr(result, 'text_content'):
                    md_content = result.text_content
                    print(f"Markdown length: {len(md_content)} characters")
                    
                    # Save markdown
                    with open('test_data/wordpress_content.md', 'w', encoding='utf-8') as f:
                        f.write(md_content)
                    print("Saved markdown to test_data/wordpress_content.md")
                    
                    # Show first 500 chars of markdown
                    print("\nFirst 500 chars of markdown:")
                    print("-" * 50)
                    print(md_content[:500])
                    
            except Exception as e:
                print(f"❌ Conversion failed: {e}")
                import traceback
                traceback.print_exc()
                
else:
    print(f"Failed to fetch posts: {response.status_code}")
    print(response.text)