#!/usr/bin/env python3
"""
FTXUI → translations translator, v2.9 (Full Rich Console Output).
- Diff-aware and cost-optimized.
- AGGRESSIVE Throttling for Free Tier (max ~5-6 RPM).
- Parses JSON streams (Robust) for accurate token usage tracking.
- DEBUG: Prints the full raw output stream as formatted, syntax-highlighted YAML using 'rich'.
- CONSOLE: All logging and output now uses rich markup and styling.
"""

from __future__ import annotations

import argparse
import json
import shutil
import subprocess
import time
import sys
from collections import deque
from pathlib import Path
from typing import Any, Dict, List, Optional
# If 'rich' or 'pyyaml' is not installed, install them via: pip install rich pyyaml
from rich.console import Console
from rich.syntax import Syntax
import yaml

# Initialize rich console once for colored printing
# Using style="dark_sea_green4" for a pleasant terminal default color
console = Console()

# ---------------------------------------------------------------------------
# Config & Constants
# ---------------------------------------------------------------------------
FTXUI_REPO_URL = "git@github.com:ArthurSonzogni/FTXUI.git"
TRANSLATIONS_REPO_URL = "git@github.com:ArthurSonzogni/ftxui-translations.git"
MODEL = "gemini-2.5-flash"

# --- FREE TIER LIMITS (Conservative) ---
LIMIT_RPM = 1_000           # Max requests per minute
LIMIT_TPM = 1_000_000   # Tokens per minute
POST_REQUEST_DELAY = 2 # Seconds to SLEEP after every request.

CPP_EXT = {".cppm", ".cpp", ".hpp", ".h", ".ipp"}
MD_EXT = {".md"}
TRANSLATABLE_EXT = CPP_EXT | MD_EXT

ALLOWED_TOOLS = ",".join([
    "list_directory",
    "read_file",
    "write_file",
    "glob",
    "search_file_content",
    "replace",
    "read_many_files",
])

LANG_NAMES = {
    "fr": "French",
    "it": "Italian",
    "zh-CN": "Simplified Chinese",
    "zh-TW": "Traditional Chinese",
    "zh-HK": "Hong Kong Chinese",
    "ja": "Japanese",
    "es": "Spanish",
    "pt": "Portuguese",
    "de": "German",
    "ru": "Russian",
    "ko": "Korean",
}

CACHE_FILE = "translation_cache.json"

# ---------------------------------------------------------------------------
# Rich Console Helpers
# ---------------------------------------------------------------------------

def print_step(msg: str):
    # Big, bold step indicator
    console.print(f"\n[bold deep_sky_blue1]==> {msg}[/]", highlight=False)

def print_info(msg: str):
    # Standard informational message
    console.print(f"[cyan]  -> {msg}[/cyan]", highlight=False)

def print_success(msg: str):
    # Successful outcome
    console.print(f"[green]  ✓ {msg}[/green]", highlight=False)

def print_warn(msg: str):
    # Warning message
    console.print(f"[yellow]  ! {msg}[/yellow]", highlight=False)

def print_err(msg: str):
    # Critical error message with reverse styling
    console.print(f"[bold white on red]  X {msg}[/]", highlight=False)

# ---------------------------------------------------------------------------
# Rate Limiter
# ---------------------------------------------------------------------------
class RateLimiter:
    def __init__(self, rpm_limit: int, tpm_limit: int):
        self.rpm_limit = rpm_limit
        self.tpm_limit = tpm_limit
        self.requests: deque[float] = deque()
        self.tokens: deque[tuple[float, int]] = deque()
        self.session_requests = 0

    def _cleanup(self, now: float):
        """Remove entries older than 60 seconds."""
        window_start = now - 60.0
        while self.requests and self.requests[0] < window_start:
            self.requests.popleft()
        while self.tokens and self.tokens[0][0] < window_start:
            self.tokens.popleft()

    def wait_for_capacity(self, estimated_tokens: int = 1000):
        while True:
            now = time.time()
            self._cleanup(now)
            
            if len(self.requests) >= self.rpm_limit:
                wait_time = 60.0 - (now - self.requests[0]) + 1.0
                print_warn(f"RPM limit reached ({self.rpm_limit}). Cooling down for {wait_time:.1f}s...")
                time.sleep(wait_time)
                continue

            current_tpm = sum(count for _, count in self.tokens)
            if current_tpm + estimated_tokens > self.tpm_limit:
                if self.tokens:
                    wait_time = 60.0 - (now - self.tokens[0][0]) + 1.0
                    print_warn(f"TPM limit saturation ({current_tpm}/{self.tpm_limit}). Cooling down for {wait_time:.1f}s...")
                    time.sleep(wait_time)
                    continue
            
            break

    def record_usage(self, input_tok: int, output_tok: int):
        now = time.time()
        total = input_tok + output_tok
        self.requests.append(now)
        self.tokens.append((now, total))
        print_info(f"Usage recorded: {total} tokens (In: {input_tok}, Out: {output_tok})")
    
    def increment_session_counter(self):
        self.session_requests += 1

limiter = RateLimiter(LIMIT_RPM, LIMIT_TPM)

# ---------------------------------------------------------------------------
# Prompts
# ---------------------------------------------------------------------------
AGENT_NEW_FILE_PROMPT = """\
You are an autonomous documentation translator. You are translating the FTXUI
C++ library from English into {lang_name} ("{lang_code}").

GOAL
- Translate a single, NEW file to {lang_name} ("{lang_code}").
- The file at {tx_root}/{rel_path} is currently a copy of the English source.
- Translate IN-PLACE.

WORKFLOW
1. Read {tx_root}/{rel_path}
2. Translate ONLY documentation:
   * C++ comments (//, /* ... */)
   * Doxygen comments (///, /** ... */).
   * Prose in Markdown.
3. DO NOT translate/modify:
   * C/C++ code, identifiers, includes, macros.
   * Doxygen commands/params.
   * Markdown code fences/URLs.
4. Overwrite {tx_root}/{rel_path} with the translation.

TOOLS: {allowed_tools}.
"""

AGENT_DIFF_FILE_PROMPT = """\
You are an autonomous documentation translator. You are translating the FTXUI
C++ library from English into {lang_name} ("{lang_code}").

GOAL
- Update existing translation: {tx_root}/{rel_path}
- Target: "{lang_name}" ("{lang_code}").

CONTEXT: SOURCE DIFF
The English source has changed. `git diff`:
```diff
{diff}
```

WORKFLOW
1. Read {tx_root}/{rel_path}
2. Analyze the `diff`.
3. For each changed English section:
   a. Find corresponding old translation.
   b. Generate new translation for the new English text.
   c. Update the file using `replace`.
4. ONLY update text where the source changed.
5. DO NOT translate code.

RULES:
1. Translate ONLY documentation:
   * C++ comments (//, /* ... */)
   * Doxygen comments (///, /** ... */).
   * Prose in Markdown.
2. DO NOT translate/modify:
   * C/C++ code, identifiers, includes, macros.
   * Doxygen commands/params.
   * Markdown code fences/URLs.

TOOLS: {allowed_tools}.
"""

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def run(cmd: List[str], cwd: Path | None = None, check: bool = True) -> str:
    proc = subprocess.run(cmd, cwd=cwd, text=True, capture_output=True)
    if check and proc.returncode != 0:
        raise RuntimeError(f"Command failed: {' '.join(cmd)}\nStderr: {proc.stderr}")
    return proc.stdout.strip()

def ensure_repo(path: Path, url: str) -> None:
    if path.exists() and (path / ".git").is_dir():
        run(["git", "fetch", "--all", "--prune"], cwd=path)
        return
    if path.exists():
        shutil.rmtree(path)
    run(["git", "clone", url, str(path)])

def update_to_head(path: Path) -> None:
    try:
        ref = run(["git", "symbolic-ref", "--short", "refs/remotes/origin/HEAD"], cwd=path)
        default_branch = ref.split("/")[-1]
    except Exception:
        default_branch = "main"
    run(["git", "checkout", default_branch], cwd=path)
    run(["git", "pull", "--ff-only"], cwd=path)

def checkout_or_create_branch(repo: Path, branch: str) -> None:
    if run(["git", "branch", "--list", branch], cwd=repo):
        run(["git", "checkout", branch], cwd=repo)
        run(["git", "pull", "--ff-only"], cwd=repo, check=False)
    elif run(["git", "ls-remote", "--heads", "origin", branch], cwd=repo):
        run(["git", "checkout", "-t", f"origin/{branch}"], cwd=repo)
        run(["git", "pull", "--ff-only"], cwd=repo)
    else:
        run(["git", "checkout", "-b", branch], cwd=repo)

def ensure_gemini() -> None:
    if not shutil.which("gemini"):
        print_err("gemini CLI not found. Install it and set GEMINI_API_KEY.")
        sys.exit(1)

def parse_and_accumulate_usage(line_str: str) -> bool:
    """
    Parses a JSON line from the gemini CLI stream.
    Updates the global limiter if usage data is found, checking for both
    'usageMetadata' (standard) and 'stats' (error summary) formats.
    """
    try:
        data = json.loads(line_str)
    except json.JSONDecodeError:
        return False

    # 1. Look for standard usage keys (camelCase or snake_case)
    usage = data.get("usageMetadata") or data.get("usage_metadata")

    # 2. Look for nested usage (e.g. inside 'result' or 'candidates')
    if not usage and "result" in data and isinstance(data["result"], dict):
        usage = data["result"].get("usageMetadata") or data["result"].get("usage_metadata")
    
    # 3. CRITICAL: Check for 'stats' key in error/result chunks (CLI-specific format)
    if not usage:
        usage = data.get("stats")

    # If found, record it
    if usage:
        # Prioritize standard keys, fall back to 'stats' snake_case keys if needed.
        prompt_tok = usage.get("promptTokenCount") or usage.get("prompt_token_count") or usage.get("input_tokens") or 0
        cand_tok = usage.get("candidatesTokenCount") or usage.get("candidates_token_count") or usage.get("output_tokens") or 0
        total_tok = usage.get("totalTokenCount") or usage.get("total_token_count") or usage.get("total_tokens") or 0
        
        if total_tok > 0:
            console.print() # Newline before recording usage for clean output
            limiter.record_usage(prompt_tok, cand_tok)
    
    if "error" in data:
        console.print()
        print_err(f"API Error: {data['error']}")
    
    return True

def run_gemini_agent(workdir: Path, prompt: str) -> None:
    limiter.wait_for_capacity(estimated_tokens=500)
    limiter.increment_session_counter()

    cmd = [
        "gemini",
        "--model", MODEL,
        f"--allowed-tools={ALLOWED_TOOLS}",
        "--output-format", "stream-json",
        "--approval-mode", "auto_edit",
        prompt,
    ]
    
    print_info(f"Invoking Agent ({MODEL})...")
    
    process = subprocess.Popen(
        cmd, 
        cwd=workdir, 
        stdout=subprocess.PIPE, 
        stderr=subprocess.PIPE, 
        text=True, 
        bufsize=1
    )

    if process.stdout:
        for line in process.stdout:
            line = line.strip()
            if not line:
                continue
            
            try:
                # Load the JSON data
                data = json.loads(line)
                
                # Convert Python object (from JSON) to YAML string for readability
                # Use safe_dump for security and sort_keys=False to maintain streaming order
                yaml_output = yaml.safe_dump(data, indent=2, sort_keys=False)
                
                console.print("--- Agent Stream Chunk (YAML) ---", style="bold magenta")
                # Use rich.syntax.Syntax for colored YAML output with monokai theme
                syntax = Syntax(yaml_output, "yaml", theme="monokai", word_wrap=True)
                console.print(syntax, justify="left")
                
                # Still run the usage tracking logic on the raw line string
                parse_and_accumulate_usage(line)

            except json.JSONDecodeError:
                # Handle non-JSON output (e.g., occasional non-stream lines from the CLI)
                print(f"  [Agent Non-JSON Output] {line}")
            
    _, stderr_str = process.communicate()
    
    # No final print() needed, as usage recording handles the newlines.

    if process.returncode != 0:
        print_err("Gemini Agent failed.")
        console.print(stderr_str)
        raise RuntimeError("Agent execution failed")
    
    print_success("Agent finished task.")

    if POST_REQUEST_DELAY > 0:
        print_info(f"Cooling down for {POST_REQUEST_DELAY}s ...")
        time.sleep(POST_REQUEST_DELAY)

# ---------------------------------------------------------------------------
# Filesystem
# ---------------------------------------------------------------------------

def is_hidden(rel: Path) -> bool:
    return any(p.startswith(".") for p in rel.parts)

def list_repo_files(root: Path) -> List[Path]:
    out: List[Path] = []
    for p in root.rglob("*"):
        if p.is_dir(): continue
        rel = p.relative_to(root)
        if is_hidden(rel): continue
        out.append(rel)
    return out

def load_cache(path: Path) -> Dict[str, Any]:
    if path.exists():
        try:
            with open(path, "r") as f: return json.load(f)
        except: pass
    return {}

def save_cache(path: Path, data: Dict[str, Any]) -> None:
    with open(path, "w") as f: json.dump(data, f, indent=2)

# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--langs", nargs="+", required=True, help="Language codes")
    args = parser.parse_args()

    ensure_gemini()
    
    root_dir = Path.cwd()
    build_dir = root_dir / "build_translation"
    build_dir.mkdir(parents=True, exist_ok=True)
    
    ftxui_dir = build_dir / "ftxui"
    tx_dir = build_dir / "translations"

    print_step("Ensuring source repo (FTXUI)...")
    ensure_repo(ftxui_dir, FTXUI_REPO_URL)
    update_to_head(ftxui_dir)
    ftxui_head = run(["git", "rev-parse", "HEAD"], cwd=ftxui_dir)
    print_info(f"FTXUI HEAD: {ftxui_head[:8]}")

    print_step("Ensuring translations repo...")
    ensure_repo(tx_dir, TRANSLATIONS_REPO_URL)
    update_to_head(tx_dir)

    print_step("Scanning files...")
    all_files = list_repo_files(ftxui_dir)
    source_set = {str(p) for p in all_files}
    all_files.sort()

    for lang_code in args.langs:
        lang_name = LANG_NAMES.get(lang_code, "")
        print_step(f"Processing Language: {lang_name} ({lang_code})")

        if not lang_name:
            exit_msg = f"Unknown language code: {lang_code}. Please update LANG_NAMES dictionary."
            print_err(exit_msg)
            sys.exit(1)

        
        checkout_or_create_branch(tx_dir, lang_code)
        
        cache_path = tx_dir / CACHE_FILE
        cache = load_cache(cache_path)
        last_hash = cache.get("last_processed_ftxui_commit")

        if last_hash == ftxui_head:
            print_success("Up to date. Skipping.")
            continue

        changed_set = set()
        if last_hash:
            diff_files = run(["git", "diff", "--name-only", last_hash, ftxui_head], cwd=ftxui_dir, check=False)
            changed_set = set(diff_files.splitlines())
            print_info(f"Changes detected: {len(changed_set)} files changed since {last_hash[:8]}")
        else:
            print_info("No history found. Full scan.")

        for p in tx_dir.rglob("*"):
            if p.is_dir(): continue
            rel = p.relative_to(tx_dir)
            if is_hidden(rel) or str(rel) == CACHE_FILE: continue
            if str(rel) not in source_set:
                print_warn(f"Removing orphan: {rel}")
                p.unlink()

        processed_count = 0
        total_files = len(all_files)

        for idx, rel in enumerate(all_files, 1):
            src = ftxui_dir / rel
            dst = tx_dir / rel
            rel_s = str(rel)
            dst.parent.mkdir(parents=True, exist_ok=True)
            
            is_translatable = rel.suffix.lower() in TRANSLATABLE_EXT
            is_changed = rel_s in changed_set
            dst_exists = dst.exists()

            prefix = f"[{idx}/{total_files}] {rel_s}"

            if not is_translatable:
                if is_changed or not dst_exists:
                    print_info(f"{prefix} -> Copying (Asset)")
                    shutil.copy2(src, dst)
                    processed_count += 1
                continue

            if dst_exists and last_hash and not is_changed:
                continue

            console.print("-" * 60, style="dim")
            
            if not dst_exists or not last_hash:
                print_info(f"{prefix} -> New Translation")
                shutil.copy2(src, dst)
                prompt = AGENT_NEW_FILE_PROMPT.format(
                    tx_root="translations", rel_path=rel_s,
                    lang_code=lang_code, lang_name=lang_name,
                    allowed_tools=ALLOWED_TOOLS
                )
                run_gemini_agent(build_dir, prompt)
                processed_count += 1

            elif is_changed:
                print_info(f"{prefix} -> Updating (Diff-based)")
                diff_out = run(["git", "diff", last_hash, ftxui_head, "--", rel_s], cwd=ftxui_dir)
                
                if len(diff_out) > 20_000:
                    print_warn(f"Diff too large ({len(diff_out)} chars). Copying source for full re-translation.")
                    shutil.copy2(src, dst)
                    prompt = AGENT_NEW_FILE_PROMPT.format(
                        tx_root="translations", rel_path=rel_s,
                        lang_code=lang_code, lang_name=lang_name,
                        allowed_tools=ALLOWED_TOOLS
                    )
                else:
                    prompt = AGENT_DIFF_FILE_PROMPT.format(
                        tx_root="translations", rel_path=rel_s,
                        lang_code=lang_code, lang_name=lang_name,
                        allowed_tools=ALLOWED_TOOLS,
                        diff=diff_out
                    )
                
                run_gemini_agent(build_dir, prompt)
                processed_count += 1

        if processed_count > 0:
            cache["last_processed_ftxui_commit"] = ftxui_head
            save_cache(cache_path, cache)
            
            status = run(["git", "status", "--porcelain"], cwd=tx_dir, check=False)
            if status.strip():
                print_step("Committing changes...")
                run(["git", "add", "-A"], cwd=tx_dir)
                run(["git", "commit", "-m", f"{lang_name}: update translations to {ftxui_head[:8]}"], cwd=tx_dir, check=False)
                run(["git", "push", "--set-upstream", "origin", lang_code], cwd=tx_dir)
                print_success("Pushed.")
            else:
                print_success("No file changes detected after processing.")
        else:
            print_success("Nothing to process.")

    print_step("All Done.")

if __name__ == "__main__":
    main()