#!/usr/bin/env python3
"""
Instagram DM Analyzer - Batch Analysis with Claude API
Analyzes all extracted DM conversations and generates lead scores.
"""

import json
import os
import time
from pathlib import Path
import urllib.request
import urllib.error

# Configuration
API_URL = "https://api.anthropic.com/v1/messages"
MODEL = "claude-sonnet-4-20250514"
DM_FILE = Path.home() / "Documents" / "instagram_dm_analysis.json"
OUTPUT_FILE = Path.home() / "Documents" / "instagram_dm_leads_analyzed.json"

def load_api_key() -> str:
    """Load Claude API key from file or environment."""
    key_file = Path.home() / ".anthropic" / "api_key"
    if key_file.exists():
        return key_file.read_text().strip()
    return os.environ.get("ANTHROPIC_API_KEY", "")

def call_claude(prompt: str, api_key: str) -> dict:
    """Call Claude API and return parsed response."""
    headers = {
        "Content-Type": "application/json",
        "x-api-key": api_key,
        "anthropic-version": "2023-06-01"
    }

    data = json.dumps({
        "model": MODEL,
        "max_tokens": 2048,
        "messages": [{"role": "user", "content": prompt}]
    }).encode()

    req = urllib.request.Request(API_URL, data=data, headers=headers, method="POST")

    try:
        with urllib.request.urlopen(req, timeout=60) as response:
            result = json.loads(response.read().decode())
            text = result["content"][0]["text"]
            # Try to parse as JSON (strip markdown wrappers if present)
            try:
                # Remove markdown code blocks
                clean_text = text.strip()
                if clean_text.startswith("```"):
                    # Remove opening ```json or ```
                    lines = clean_text.split("\n")
                    if lines[0].startswith("```"):
                        lines = lines[1:]
                    # Remove closing ```
                    if lines[-1].strip() == "```":
                        lines = lines[:-1]
                    clean_text = "\n".join(lines)
                return json.loads(clean_text)
            except:
                return {"raw_response": text}
    except urllib.error.HTTPError as e:
        return {"error": f"API Error {e.code}: {e.read().decode()}"}
    except Exception as e:
        return {"error": str(e)}

def analyze_conversations(conversations: list, api_key: str) -> list:
    """Analyze all conversations with Claude."""

    # Prepare batch prompt with all conversations
    conv_summaries = []
    for i, conv in enumerate(conversations):
        verified = "✓ VERIFICADO" if conv.get("isVerified") else ""
        preview = conv.get("lastMessagePreview", "")
        time_ago = conv.get("lastMessageTime", "")
        username = conv.get("participantUsername", "Unknown")

        conv_summaries.append(f"""
{i+1}. @{username} {verified}
   Última mensagem: "{preview}"
   Tempo: {time_ago}
""")

    prompt = f"""Analise estas {len(conversations)} conversas do Instagram Direct e classifique cada uma.

CONVERSAS:
{"".join(conv_summaries)}

Para CADA conversa, retorne um JSON array com objetos contendo:
- username: string
- leadScore: 0-100 (baseado no preview e contexto)
- sentiment: "positive"|"neutral"|"negative"|"mixed"
- intent: "lead"|"support"|"collaboration"|"personal"|"business"|"spam"
- urgency: "low"|"medium"|"high"|"critical"
- summary: resumo curto em português (max 50 palavras)
- priority: "high"|"medium"|"low" (prioridade de resposta)

CRITÉRIOS DE LEAD SCORE:
- 80-100: Pedido explícito de orçamento, compra, contratação
- 60-79: Interesse claro em trabalho/serviços, perguntas sobre preço
- 40-59: Engajamento positivo, potencial interesse
- 20-39: Conversa casual mas engajada
- 0-19: Spam, sem contexto, ou conversa pessoal sem potencial comercial

CONTAS VERIFICADAS têm +10 no lead score base (mais credibilidade/influência).

Retorne APENAS o JSON array, sem markdown ou explicações."""

    print(f"Enviando {len(conversations)} conversas para Claude...")
    result = call_claude(prompt, api_key)

    if isinstance(result, list):
        return result
    elif "error" in result:
        print(f"Erro: {result['error']}")
        return []
    else:
        print(f"Resposta inesperada: {result}")
        return []

def main():
    print("=" * 60)
    print("Instagram DM Analyzer - Batch Analysis")
    print("=" * 60)

    # Load API key
    api_key = load_api_key()
    if not api_key:
        print("ERRO: API key não encontrada!")
        print("Configure em ~/.anthropic/api_key ou ANTHROPIC_API_KEY")
        return

    print(f"API Key: ...{api_key[-8:]}")

    # Load conversations
    if not DM_FILE.exists():
        print(f"ERRO: Arquivo não encontrado: {DM_FILE}")
        return

    with open(DM_FILE) as f:
        data = json.load(f)

    conversations = data.get("conversations", [])
    print(f"Conversas carregadas: {len(conversations)}")

    # Count verified
    verified_count = sum(1 for c in conversations if c.get("isVerified"))
    print(f"Contas verificadas: {verified_count}")

    # Analyze in batches of 20 (to avoid token limits)
    all_analyses = []
    batch_size = 20

    for i in range(0, len(conversations), batch_size):
        batch = conversations[i:i+batch_size]
        print(f"\nAnalisando batch {i//batch_size + 1}/{(len(conversations)-1)//batch_size + 1}...")

        analyses = analyze_conversations(batch, api_key)

        if analyses:
            all_analyses.extend(analyses)
            print(f"  -> {len(analyses)} análises recebidas")

        # Rate limiting
        if i + batch_size < len(conversations):
            time.sleep(1)

    # Merge analyses back into conversations
    analysis_map = {a.get("username", "").lower(): a for a in all_analyses}

    for conv in conversations:
        username = conv.get("participantUsername", "").lower()
        if username in analysis_map:
            conv["analysis"] = analysis_map[username]

    # Sort by lead score
    conversations.sort(key=lambda c: c.get("analysis", {}).get("leadScore", 0), reverse=True)

    # Generate report
    print("\n" + "=" * 60)
    print("RELATÓRIO DE LEADS")
    print("=" * 60)

    high_leads = [c for c in conversations if c.get("analysis", {}).get("leadScore", 0) >= 60]
    medium_leads = [c for c in conversations if 40 <= c.get("analysis", {}).get("leadScore", 0) < 60]

    print(f"\nLEADS QUENTES (score >= 60): {len(high_leads)}")
    for c in high_leads:
        a = c.get("analysis", {})
        verified = "✓" if c.get("isVerified") else ""
        print(f"  [{a.get('leadScore', 0):3d}] @{c['participantUsername']} {verified}")
        print(f"        {a.get('summary', 'N/A')[:60]}...")

    print(f"\nLEADS MORNOS (40-59): {len(medium_leads)}")
    for c in medium_leads:
        a = c.get("analysis", {})
        verified = "✓" if c.get("isVerified") else ""
        print(f"  [{a.get('leadScore', 0):3d}] @{c['participantUsername']} {verified}")

    # Save enriched data
    output = {
        "analyzedAt": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
        "totalConversations": len(conversations),
        "highLeads": len(high_leads),
        "mediumLeads": len(medium_leads),
        "verifiedAccounts": verified_count,
        "conversations": conversations
    }

    with open(OUTPUT_FILE, "w") as f:
        json.dump(output, f, indent=2, ensure_ascii=False)

    print(f"\nResultados salvos em: {OUTPUT_FILE}")

    # Quick CSV export
    csv_file = Path.home() / "Documents" / "instagram_dm_leads.csv"
    with open(csv_file, "w") as f:
        f.write("username,verified,lead_score,sentiment,intent,urgency,priority,summary,last_message,time\n")
        for c in conversations:
            a = c.get("analysis", {})
            row = [
                c.get("participantUsername", ""),
                "yes" if c.get("isVerified") else "no",
                str(a.get("leadScore", 0)),
                a.get("sentiment", ""),
                a.get("intent", ""),
                a.get("urgency", ""),
                a.get("priority", ""),
                '"' + a.get("summary", "").replace('"', '""') + '"',
                '"' + c.get("lastMessagePreview", "").replace('"', '""') + '"',
                c.get("lastMessageTime", "")
            ]
            f.write(",".join(row) + "\n")

    print(f"CSV exportado: {csv_file}")
    print("\nAnálise concluída!")

if __name__ == "__main__":
    main()
