o
    @i,                     @  s   d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZ ee jd Zed	 d
 Zed ed ed gZed	 d Zd)ddZd*ddZd+ddZ	 	d,d-d$d%Zd&d' Zed(krqee  dS dS ).a,  Standalone audio generation for quality evaluation.

Generates ElevenLabs audio for a sample of seed scenario turns, so you can
listen and evaluate quality BEFORE running the full data pipeline.

Usage:
    # Set env vars first:
    export ELEVENLABS_API_KEY=...
    export ELEVENLABS_VOICE_ID=...    # optional
    export ELEVENLABS_MODEL=eleven_multilingual_v2   # or eleven_v3

    # Generate first 10 turns (in order):
    python -m scripts.compiler.generate_audio --limit 10

    # Generate 10 random turns:
    python -m scripts.compiler.generate_audio --limit 10 --random

    # One turn per emotion (16 total, good for quality matrix):
    python -m scripts.compiler.generate_audio --one-per-emotion

    # Specific scenarios:
    python -m scripts.compiler.generate_audio --scenarios long_125,daily_001
    )annotationsN)Path)List   )	synth_all   dataemotionzseed_train_final.jsonlzseed_val.jsonlzseed_test.jsonlaudio_previewsource_pathslistreturnc              	   C  s^   g }| D ](}|  sq| }|D ]
}|t| qW d   n1 s'w   Y  q|S )zKLoad all scenarios from multiple jsonl files, preserving order within each.N)existsopenappendjsonloads)r   	scenariospfline r   W/dataset/kemix-engine/package/face/animasync-face-v3/scripts/compiler/generate_audio.pyload_scenarios-   s   
r   out_dirr   scenario_idstr	num_turnsintboolc                 C  sN   t |D ] }dd | | d| dD }|r!|d  jdk r$ dS qdS )	u  Check if all turns of this scenario already have valid mp3 files.

    Filters out *.raw.mp3 orphans — these are written mid-synthesis and
    deleted on success; if a process is killed they remain on disk and
    would otherwise make a partially-failed scenario look complete.
    c                 S  s   g | ]
}|j d s|qS z.raw.mp3nameendswith).0mr   r   r   
<listcomp>A   s    
z)scenario_already_done.<locals>.<listcomp>_tz_*.mp3r     FT)rangeglobstatst_size)r   r   r   timatchesr   r   r   scenario_already_done9   s   r/   r   c                 C  sL   g }| D ]}|d }t |d D ]\}}|d  r"||||f qq|S )z4Return list of (scenario_id, turn_index, turn_dict).r   turnstext)	enumeratestripr   )r   itemsscensidr-   turnr   r   r   flatten_turns_for_scenariosJ   s   r8   F
batch_sizeone_per_emotionspecificc              	     s  |rt |  fdd| D S |rRt| }t  }g }|D ]}|d d }	|	|vr2||	 || qg }
|D ]\}}}|
| d| d|d  |gd q7|
S g }| D ])}tdd	 |d
 D }|dkrhqVt||d |rqqV|| t||kr |S qV|S )a
  Pick the next N scenarios to process, skipping already-done ones.

    - specific: list of scenario_ids to target (overrides batch logic)
    - one_per_emotion: diagnostic mode, one turn per emotion
    - default: next `batch_size` unfinished scenarios in order
    c                   s   g | ]
}|d   v r|qS )r   r   r$   s	requestedr   r   r&   `       z(pick_scenarios_batch.<locals>.<listcomp>r   r	   _singleTurn_)r   r0   c                 s       | ]}|d    rdV  qdS r1   r   Nr3   r$   tr   r   r   	<genexpr>w       z'pick_scenarios_batch.<locals>.<genexpr>r0   r   r   )setr8   addr   sumr/   len)r   r   r9   r:   r;   r4   seenpicked_itemsxemosynthr6   r-   r7   
unfinishedr5   r   r   r>   r   pick_scenarios_batchU   sB   	



rT   c            +        s  t  } | jddttdd | jdttd | jdtdd	d
 | jddddd | jddd d dd
 | jdtdd | jdd dd | jdddgdd | jddddd |    jj	d d d! t
 j}t|}td"d# |D }td$| d%| d& t fd'd#|D }td(| d)| d* t| j j j jd+}td,t| d- |std. d S d/d0lm}m} g g g g g f\}}	}
}}g g g }}}|D ]}|d1 }|d2p|d3}d }|r||d4 }|| }t|d4 D ]b\}}|d5  sq| d6| d7|d8  d9} j| }||d5  |	|d8  |
|d: || ||r7|nd  |d  || |||||d8 |d:|d5 ||d; qqi } jrf j|d<< td=d# |D rtd>d# |D }td?| d@ttdAd# |D  dB tdCd# |D }tdDd# |D }t|| }tdEd# t||D }tdF| dGt| dH tdI| dJ tdI| dK| dL|dM dN dOdP  jrtdQ d S tdRt| dS j  dT j!  t"||f j  j!|	|
|||dU|I d H } jdV } dWdX  j#dYD }!i }"| $ r_| j%dZd[/}#|#D ]$}$zt&'|$}%W n t(y>   Y q*w |%d\}&|&|!v rM|%|"|&< q*W d    n	1 sZw   Y  d]}'t||D ]\}(})|(d\ |"vru|'d/7 }'|)|(d^< |(|"|(d\ < qf| j%d_dZd[}#t)|"D ]}&|#*t&j+|"|& d`dadb  qW d    n	1 sw   Y  tdct|" dd|' det||'  df t|}*tdg|* d)t| dht| di tdj j  tdk|   tdl d S )mNz	--sources+z9Scenario jsonl files to combine (default: train+val+test))nargstypedefaulthelpz	--out_dir)rW   rX   z--batch_size
   z*Number of scenarios per batch. Default 10.)rW   rX   rY   z--one-per-emotion
store_truer:   z@Diagnostic mode: 16 turns (one per emotion) instead of scenarios)actiondestrY   z--scenariosc                 S  s   dd |  dD S )Nc                 S  s   g | ]
}|  r|  qS r   rE   )r$   rP   r   r   r   r&      r@   z*main.<locals>.<lambda>.<locals>.<listcomp>,)split)r=   r   r   r   <lambda>   s    zmain.<locals>.<lambda>z>Specific scenario IDs (comma-separated), overrides batch logicz--concurrency   z
--voice_idu%   Override auto emotion→voice mapping)rX   rY   z	--backend
elevenlabsedge)choicesrX   z	--dry-rundry_runz:Print char count + voice plan, then exit without API callsT)parentsexist_okc                 s  s&    | ]}t d d |d D V  qdS )c                 s  rC   rD   rE   rF   r   r   r   rH      rI   !main.<locals>.<genexpr>.<genexpr>r0   N)rL   r<   r   r   r   rH      s    
zmain.<locals>.<genexpr>z[pool] z scenarios, z turns totalc                 3  s8    | ]}t  j|d  tdd |d D rdV  qdS )r   c                 s  rC   rD   rE   rF   r   r   r   rH      rI   rh   r0   r   N)r/   r   rL   r<   argsr   r   rH      s    

z[progress] /z scenarios already have audio)r:   r;   z	[select] z scenarios this batchuC   [info] Nothing to do — all selected scenarios already have audio.r   )dominant_base_for_turnsFEMALE_BY_BASEr   long_solo_r0   r1   r'   rB   r	   z.mp3vad)filer   
turn_indexr	   rp   r1   	monologuevoice_idrt   c                 s  s    | ]}|d uV  qd S Nr   r$   vr   r   r   rH          c                 s  s    | ]	}|d urdV  qd S )Nr   r   rv   r   r   r   rH      s    z[voice] z) turns locked to dominant-emotion voice (c                 s  s    | ]}|r|V  qd S ru   r   rv   r   r   r   rH      rx   z unique voices)c                 s  s    | ]}t |V  qd S ru   )rM   rF   r   r   r   rH      rx   c                 s  s*    | ]}|  r| jd krdV  qdS )r(   r   N)r   r+   r,   r$   r   r   r   r   rH      s    c                 s  s2    | ]\}}|  r| jd kst|V  qdS )r(   N)r   r+   r,   rM   )r$   rG   r   r   r   r   rH      s    z[chars] z characters across z turnsz[plan]  z( turns will be skipped (already on disk)z turns will be synthesized (~z chars / ~$r(   g333333?z.2f)z#[dry-run] exiting without API callsz[tts] z turns, backend=z, concurrency=)backendconcurrencyemotionsvadsvoice_seedsvoice_pools	voice_idszmanifest.jsonlc                 S  s   h | ]}|j d s|j qS r    r!   ry   r   r   r   	<setcomp>  s    
zmain.<locals>.<setcomp>z*.mp3zutf-8)encodingrq   r   okwF)ensure_ascii
z[manifest] z rows total (+z new this batch, z	 updated)z
[done] z turns generated (z scenarios)z  audio:    z  manifest: uU   
[next batch] just re-run the same command — already-done scenarios will be skipped),argparseArgumentParseradd_argumentr   DEFAULT_SOURCES	AUDIO_OUTr   
parse_argsr   mkdirr   sourcesrM   rL   printrT   r9   r:   r   ttsrl   rm   
startswithr2   r3   r   getrt   anyrJ   zipre   r{   r|   r   r*   r   r   r   r   	Exceptionsortedwritedumps)+apr   total_scenariostotal_turns
done_countpicked_scenariosrl   rm   textsr}   r~   	out_pathsmanifestr   r   r   r5   r6   is_monologuescenario_voice_iddominant_baser-   r7   fnamepath
tts_kwargsn_domtotal_chars	will_skip
will_synthwill_synth_charsok_flagsmanifest_pathon_diskkeptr   r   rowfn	new_countr%   r   	successesr   ri   r   main   s,  














"	




 

$r   __main__)r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   )FN)
r   r   r   r   r9   r   r:   r   r;   r   )__doc__
__future__r   r   asyncior   randompathlibr   typingr   r   r   __file__resolverf   PROJECT_ROOTEMO_DIRr   r   r   r/   r8   rT   r   __name__runr   r   r   r   <module>   s6    


- %