#!/usr/bin/env python3
"""
Simple Twitter/X Radar Scraper
Lightweight version using only requests + BeautifulSoup
"""

import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime

def scrape_twitter_radar(url):
    """Simple scraper using requests"""

    print(f"🔍 Scraping: {url}")
    print("⚠️  Note: This simple version may not work on JavaScript-heavy pages")
    print("   For better results, use the full scraper with Selenium/Playwright")
    print("")

    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
    }

    try:
        print("📡 Sending request...")
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()

        print(f"✅ Response received: {len(response.text)} bytes")
        print("🔍 Parsing HTML...")

        soup = BeautifulSoup(response.text, 'html.parser')

        # Save raw HTML for inspection
        with open('twitter_page_raw.html', 'w', encoding='utf-8') as f:
            f.write(soup.prettify())
        print("💾 Saved raw HTML to twitter_page_raw.html")

        # Try to find tweets
        articles = soup.find_all('article')
        print(f"📊 Found {len(articles)} <article> elements")

        # Try alternative selectors
        divs_with_dir = soup.find_all('div', attrs={'dir': 'auto'})
        print(f"📊 Found {len(divs_with_dir)} <div dir='auto'> elements")

        # Extract text content
        posts = []

        # Method 1: Articles
        for i, article in enumerate(articles[:5]):  # Limit to first 5
            text_content = article.get_text(strip=True)[:200]
            if text_content:
                posts.append({
                    'method': 'article',
                    'index': i,
                    'text': text_content,
                    'timestamp': datetime.now().isoformat()
                })

        # Method 2: Divs with dir=auto
        for i, div in enumerate(divs_with_dir[:10]):  # Limit to first 10
            text_content = div.get_text(strip=True)
            if len(text_content) > 50:  # Only substantial text
                posts.append({
                    'method': 'div_dir_auto',
                    'index': i,
                    'text': text_content[:200],
                    'timestamp': datetime.now().isoformat()
                })

        # Save results
        output_file = f"twitter_simple_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(posts, f, indent=2, ensure_ascii=False)

        print(f"\n✅ Extracted {len(posts)} text snippets")
        print(f"💾 Saved to {output_file}")

        # Display sample
        if posts:
            print(f"\n📝 Sample content:")
            for i, post in enumerate(posts[:3], 1):
                print(f"\n{i}. [{post['method']}]")
                print(f"   {post['text'][:100]}...")

        return posts

    except requests.RequestException as e:
        print(f"❌ Request failed: {e}")
        return []
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()
        return []

if __name__ == '__main__':
    import sys

    if len(sys.argv) < 2:
        print("Usage: python3 twitter_scraper_simple.py <URL>")
        print("\nExample:")
        print("  python3 twitter_scraper_simple.py 'https://x.com/i/radar/1983552532581327145'")
        sys.exit(1)

    url = sys.argv[1]

    print("╔══════════════════════════════════════════════════════════════╗")
    print("║         Twitter/X Simple Scraper (Requests Only)           ║")
    print("╚══════════════════════════════════════════════════════════════╝")
    print()

    scrape_twitter_radar(url)

    print("\n💡 For better results, use the full scraper:")
    print("   python3 twitter_radar_scraper.py '{}' ".format(url))
