#!/usr/bin/env python3
"""
Batch scraper for Brazilian banks and fintechs
"""
import sys
import os
import json
import time

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from src.scraper import SafariScraper

# Sites to scrape (prioritized list)
SITES = [
    # Bancos Digitais (most relevant)
    "inter.co",
    "c6bank.com.br",
    "pagbank.com.br",
    "neon.com.br",

    # Bancos Tradicionais
    "bb.com.br",
    "santander.com.br",
    "caixa.gov.br",

    # Fintechs de Pagamento
    "mercadopago.com.br",
    "picpay.com",
    "stone.com.br",

    # Investimentos
    "xpi.com.br",
    "rico.com.vc",
]

def main():
    scraper = SafariScraper()
    results = []
    total_cost = 0
    total_tokens = 0

    print(f"\n{'#'*60}")
    print(f"# BATCH SCRAPER - {len(SITES)} sites")
    print(f"{'#'*60}\n")

    for i, site in enumerate(SITES, 1):
        print(f"\n[{i}/{len(SITES)}] Processing: {site}")
        print("-" * 40)

        try:
            result = scraper.run(site)

            if "error" not in result and "analysis" in result:
                metrics = result["analysis"].get("metrics", {})
                cost = metrics.get("total_cost_usd", 0)
                tokens = metrics.get("total_tokens", 0)

                total_cost += cost
                total_tokens += tokens

                results.append({
                    "url": site,
                    "success": True,
                    "tokens": tokens,
                    "cost": cost
                })

                print(f"✅ Success: {tokens} tokens, ${cost:.4f}")
            else:
                results.append({
                    "url": site,
                    "success": False,
                    "error": result.get("error", "Unknown error")
                })
                print(f"❌ Failed: {result.get('error', 'Unknown')}")

        except Exception as e:
            results.append({
                "url": site,
                "success": False,
                "error": str(e)
            })
            print(f"❌ Error: {e}")

        # Small delay between requests
        if i < len(SITES):
            print("Waiting 3s before next...")
            time.sleep(3)

    # Summary
    print(f"\n{'='*60}")
    print("BATCH SUMMARY")
    print(f"{'='*60}")

    successful = sum(1 for r in results if r.get("success"))
    print(f"Successful: {successful}/{len(SITES)}")
    print(f"Total Tokens: {total_tokens:,}")
    print(f"Total Cost: ${total_cost:.4f}")

    # Save summary
    summary_path = os.path.join(os.path.dirname(__file__), "output", "batch_summary.json")
    with open(summary_path, "w") as f:
        json.dump({
            "total_sites": len(SITES),
            "successful": successful,
            "total_tokens": total_tokens,
            "total_cost": total_cost,
            "results": results
        }, f, indent=2)

    print(f"\nSummary saved to: {summary_path}")

if __name__ == "__main__":
    main()
