"""A/B test for TTS configuration.

Generates the same Korean sentence across multiple (model, text-format, settings)
configs so you can listen side-by-side and pick the best one before re-synthesizing
the full dataset.

Usage:
    export ELEVENLABS_API_KEY=...
    python -m scripts.compiler.tts_ab_test

Output: data/audio_ab/{emotion}_{config}.mp3
"""
from __future__ import annotations

import asyncio
import os
import subprocess
import tempfile
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parents[2]
OUT_DIR = PROJECT_ROOT / "data" / "audio_ab"

# Test sentences — one per emotion, in Korean
# Each sentence is chosen to have clear emotional content
TEST_CASES = [
    {
        "emotion": "excitement",
        "vad": [0.8, 0.8, 0.5],
        "text": "오늘 진짜 너무 신나! 빨리 가자!",
        "voice_id": "TbMNBJ27fH2U0VgpSNko",  # joy/female
        "v3_tag": "[excited]",
    },
    {
        "emotion": "sadness",
        "vad": [-0.5, -0.3, -0.5],
        "text": "정말 너무 힘들어. 아무것도 하기 싫어.",
        "voice_id": "m3yAHyFEFKtbCIM5n7GF",  # sadness/female
        "v3_tag": "[sadly]",
    },
    {
        "emotion": "anger",
        "vad": [-0.5, 0.7, 0.5],
        "text": "도대체 뭐 하는 거야! 당장 그만해!",
        "voice_id": "FCdKzv68Ofr4VUDcZXIy",  # anger/female
        "v3_tag": "[angrily]",
    },
    {
        "emotion": "crying",
        "vad": [-0.7, -0.5, -0.6],
        "text": "엄마 보고 싶어… 너무 슬퍼…",
        "voice_id": "m3yAHyFEFKtbCIM5n7GF",  # sadness/female
        "v3_tag": "[crying]",
    },
]

# Configs to compare
def build_configs(case: dict):
    return [
        {
            "name": "A_v3_leadbuffer",
            "model": "eleven_v3",
            "text": f". {case['v3_tag']} {case['text']}",
            "settings": {"stability": 0.3, "similarity_boost": 0.75, "style": 0.8, "use_speaker_boost": True},
        },
        {
            "name": "B_v3_notag_buffer",
            "model": "eleven_v3",
            "text": f"{case['v3_tag']} {case['text']}",
            "settings": {"stability": 0.3, "similarity_boost": 0.75, "style": 0.8, "use_speaker_boost": True},
        },
        {
            "name": "C_v2_high_style",
            "model": "eleven_multilingual_v2",
            "text": case["text"],
            "settings": {"stability": 0.25, "similarity_boost": 0.75, "style": 0.85, "use_speaker_boost": True},
        },
        {
            "name": "D_v2_low_stability",
            "model": "eleven_multilingual_v2",
            "text": case["text"],
            "settings": {"stability": 0.15, "similarity_boost": 0.75, "style": 0.95, "use_speaker_boost": True},
        },
    ]


async def synth(api_key: str, voice_id: str, text: str, model: str,
                settings: dict, out_path: Path):
    from elevenlabs.client import ElevenLabs
    loop = asyncio.get_event_loop()

    def _sync():
        client = ElevenLabs(api_key=api_key)
        audio = client.text_to_speech.convert(
            voice_id=voice_id,
            text=text,
            model_id=model,
            output_format="mp3_44100_128",
            voice_settings=settings,
        )
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tf:
            raw_path = Path(tf.name)
            for chunk in audio:
                tf.write(chunk)
        try:
            subprocess.run(
                [
                    "ffmpeg", "-y", "-loglevel", "error",
                    "-i", str(raw_path),
                    "-af", "adelay=180|180,apad=pad_dur=0.18",
                    "-c:a", "libmp3lame", "-b:a", "128k",
                    str(out_path),
                ],
                check=True,
            )
            raw_path.unlink(missing_ok=True)
        except Exception:
            raw_path.replace(out_path)

    await loop.run_in_executor(None, _sync)


async def main():
    api_key = os.getenv("ELEVENLABS_API_KEY")
    if not api_key:
        raise SystemExit("ELEVENLABS_API_KEY not set")

    OUT_DIR.mkdir(parents=True, exist_ok=True)
    print(f"[ab] output: {OUT_DIR}")
    print(f"[ab] {len(TEST_CASES)} emotions × 4 configs = {len(TEST_CASES)*4} clips\n")

    # Account concurrency cap is 15; stay under that
    sem = asyncio.Semaphore(4)

    async def run_one(case, cfg):
        out_path = OUT_DIR / f"{case['emotion']}__{cfg['name']}.mp3"
        print(f"  → {out_path.name}")
        async with sem:
            await synth(
                api_key=api_key,
                voice_id=case["voice_id"],
                text=cfg["text"],
                model=cfg["model"],
                settings=cfg["settings"],
                out_path=out_path,
            )

    tasks = [run_one(case, cfg) for case in TEST_CASES for cfg in build_configs(case)]
    await asyncio.gather(*tasks)
    print(f"\n[done] {len(tasks)} clips in {OUT_DIR}")
    print("\nListen side by side — for each emotion, compare A vs B vs C vs D.")
    print("Goal: pick the config with the most obvious emotion.")


if __name__ == "__main__":
    asyncio.run(main())