#!/usr/bin/env python3
"""
YouTube Channel Video Extractor via Safari Developer Tools
Extrai todos os vídeos de um canal do YouTube usando Safari + AppleScript
Exporta em TXT, CSV e JSON
"""

import subprocess
import json
import csv
import time
import sys
import re
from pathlib import Path

def run_applescript(script: str) -> str:
    """Executa AppleScript e retorna o resultado."""
    result = subprocess.run(
        ['osascript', '-e', script],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        raise Exception(f"AppleScript error: {result.stderr}")
    return result.stdout.strip()

def open_safari_url(url: str):
    """Abre URL no Safari."""
    script = f'''
    tell application "Safari"
        activate
        if (count of windows) = 0 then
            make new document
        end if
        set URL of document 1 to "{url}"
    end tell
    '''
    run_applescript(script)

def execute_js_in_safari(js_code: str) -> str:
    """Executa JavaScript no Safari e retorna resultado."""
    # Escape aspas para AppleScript
    js_escaped = js_code.replace('\\', '\\\\').replace('"', '\\"')
    script = f'''
    tell application "Safari"
        do JavaScript "{js_escaped}" in document 1
    end tell
    '''
    return run_applescript(script)

def wait_for_page_load(timeout: int = 30):
    """Aguarda a página carregar."""
    print("⏳ Aguardando página carregar...")
    time.sleep(3)

    for _ in range(timeout):
        try:
            state = execute_js_in_safari("document.readyState")
            if state == "complete":
                return True
        except:
            pass
        time.sleep(1)
    return False

def scroll_to_load_all_videos():
    """Faz scroll para carregar todos os vídeos (lazy loading)."""
    print("📜 Fazendo scroll para carregar todos os vídeos...")

    js_scroll = '''
    (function() {
        return new Promise((resolve) => {
            let lastHeight = 0;
            let sameCount = 0;

            const interval = setInterval(() => {
                window.scrollTo(0, document.documentElement.scrollHeight);

                const currentHeight = document.documentElement.scrollHeight;
                if (currentHeight === lastHeight) {
                    sameCount++;
                    if (sameCount >= 5) {
                        clearInterval(interval);
                        resolve('done');
                    }
                } else {
                    sameCount = 0;
                    lastHeight = currentHeight;
                }
            }, 1000);
        });
    })();
    'scrolling'
    '''

    execute_js_in_safari(js_scroll)

    # Aguarda scroll terminar (verifica quando para de crescer)
    last_count = 0
    stable_count = 0

    while stable_count < 5:
        time.sleep(2)
        try:
            count = int(execute_js_in_safari(
                "document.querySelectorAll('ytd-rich-item-renderer, ytd-grid-video-renderer').length"
            ))
            print(f"   Vídeos carregados: {count}")

            if count == last_count:
                stable_count += 1
            else:
                stable_count = 0
                last_count = count
                # Continua scrollando
                execute_js_in_safari("window.scrollTo(0, document.documentElement.scrollHeight)")
        except:
            stable_count += 1

    print(f"✅ Total de vídeos encontrados: {last_count}")

def extract_videos() -> list:
    """Extrai todos os vídeos da página."""
    print("🔍 Extraindo informações dos vídeos...")

    js_extract = '''
    (function() {
        const videos = [];

        // Tenta diferentes seletores (YouTube muda frequentemente)
        const selectors = [
            'ytd-rich-item-renderer #video-title-link',
            'ytd-grid-video-renderer #video-title',
            'ytd-rich-grid-media #video-title-link',
            '#contents ytd-rich-item-renderer a#video-title-link',
            '#contents a#video-title'
        ];

        let elements = [];
        for (const selector of selectors) {
            elements = document.querySelectorAll(selector);
            if (elements.length > 0) break;
        }

        elements.forEach((el, index) => {
            const title = el.textContent?.trim() || el.getAttribute('title') || '';
            let href = el.getAttribute('href') || '';

            if (href && !href.startsWith('http')) {
                href = 'https://www.youtube.com' + href;
            }

            if (title && href && href.includes('/watch')) {
                videos.push({
                    index: index + 1,
                    title: title,
                    url: href
                });
            }
        });

        return JSON.stringify(videos);
    })();
    '''

    result = execute_js_in_safari(js_extract)

    try:
        videos = json.loads(result)
        return videos
    except json.JSONDecodeError:
        print(f"⚠️  Erro ao parsear JSON: {result[:200]}")
        return []

def get_channel_name() -> str:
    """Obtém o nome do canal."""
    try:
        js = "document.querySelector('#channel-name, #text.ytd-channel-name, yt-formatted-string.ytd-channel-name')?.textContent?.trim() || 'unknown'"
        return execute_js_in_safari(js)
    except:
        return "unknown"

def main():
    # URL do canal
    if len(sys.argv) > 1:
        channel_url = sys.argv[1]
    else:
        channel_url = "https://www.youtube.com/@gomesnzt/videos"

    # Garante que está na aba de vídeos
    if not channel_url.endswith('/videos'):
        if channel_url.endswith('/'):
            channel_url += 'videos'
        else:
            channel_url += '/videos'

    print(f"🎬 YouTube Channel Video Extractor")
    print(f"{'='*50}")
    print(f"📺 Canal: {channel_url}\n")

    # Abre Safari
    print("🌐 Abrindo Safari...")
    open_safari_url(channel_url)

    # Aguarda carregar
    if not wait_for_page_load():
        print("❌ Timeout ao carregar página")
        return

    time.sleep(3)  # Aguarda render inicial

    # Obtém nome do canal
    channel_name = get_channel_name()
    print(f"📺 Canal: {channel_name}\n")

    # Scroll para carregar todos os vídeos
    scroll_to_load_all_videos()

    # Extrai vídeos
    videos = extract_videos()

    if not videos:
        print("❌ Nenhum vídeo encontrado. Verifique se a página carregou corretamente.")
        return

    # Inverte a ordem (mais antigo primeiro)
    videos.reverse()

    # Exibe resultados
    print(f"\n{'='*50}")
    print(f"📋 LISTA DE VÍDEOS (mais antigo → mais recente)")
    print(f"{'='*50}\n")

    for i, video in enumerate(videos, 1):
        print(f"{i:3}. {video['title']}")
        print(f"     {video['url']}\n")

    # Extrai handle do canal da URL
    handle_match = re.search(r'@([\w-]+)', channel_url)
    handle = handle_match.group(1) if handle_match else channel_name.replace(' ', '_')

    base_path = Path("/Users/neog/Downloads") / handle

    # === TXT ===
    txt_file = f"{base_path}_videos.txt"
    with open(txt_file, 'w', encoding='utf-8') as f:
        f.write(f"Canal: {channel_name}\n")
        f.write(f"Handle: @{handle}\n")
        f.write(f"URL: {channel_url}\n")
        f.write(f"Total: {len(videos)} vídeos\n")
        f.write(f"Ordem: mais antigo → mais recente\n")
        f.write(f"{'='*50}\n\n")

        for i, video in enumerate(videos, 1):
            f.write(f"{i}. {video['title']}\n")
            f.write(f"   {video['url']}\n\n")

    # === CSV ===
    csv_file = f"{base_path}_videos.csv"
    with open(csv_file, 'w', encoding='utf-8', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['#', 'Título', 'URL', 'Video ID'])
        for i, video in enumerate(videos, 1):
            # Extrai video ID da URL
            video_id_match = re.search(r'v=([a-zA-Z0-9_-]+)', video['url'])
            video_id = video_id_match.group(1) if video_id_match else ''
            writer.writerow([i, video['title'], video['url'], video_id])

    # === JSON ===
    json_file = f"{base_path}_videos.json"
    json_data = {
        'channel': {
            'name': channel_name,
            'handle': f'@{handle}',
            'url': channel_url
        },
        'metadata': {
            'total_videos': len(videos),
            'order': 'oldest_to_newest',
            'extracted_at': time.strftime('%Y-%m-%d %H:%M:%S')
        },
        'videos': []
    }

    for i, video in enumerate(videos, 1):
        video_id_match = re.search(r'v=([a-zA-Z0-9_-]+)', video['url'])
        video_id = video_id_match.group(1) if video_id_match else ''
        json_data['videos'].append({
            'index': i,
            'title': video['title'],
            'url': video['url'],
            'video_id': video_id
        })

    with open(json_file, 'w', encoding='utf-8') as f:
        json.dump(json_data, f, ensure_ascii=False, indent=2)

    print(f"\n💾 Arquivos salvos:")
    print(f"   📄 TXT:  {txt_file}")
    print(f"   📊 CSV:  {csv_file}")
    print(f"   📋 JSON: {json_file}")
    print(f"\n📊 Total: {len(videos)} vídeos")

if __name__ == "__main__":
    main()
