o
    CjP                  
   @  sP  d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	Z	ddl
ZddlZddlmZmZmZmZ ddlmZ ddlmZmZmZ ddlmZ d	d
lmZmZmZmZ d	dlm Z m!Z!m"Z" ee#$ j%d Z&e&d d Z'e'j(ddd e&d Z)e&d Z*e&d d d d Z+e&d d d Z,e&d d d Z-ddddddddd d!	Z.e/ej01 rd"nd#Z/e2d$e/  e2d%e+  ee+e/\Z3Z4e2d&e,  e e,e-e/\Z5Z6e2d' ee7Z8e89d(d)d* Z:e89d+d,d- Z;e89d.d/d0 Z<e89d1d2d3 Z=e8j9d4d5gd6d7d8 Z>e7d9kr&e?e@d:d;ZAe8jBd<eAd=d=d> dS dS )?ud  Live demo server for the V3 face pipeline.

    user types text in browser
        ↓ POST /api/generate {text}
    KlueTeacher (text → emotion + VAD)
        ↓
    ElevenLabs TTS (text + emotion → mp3)
        ↓
    mel features  +  cond from teacher prediction
        ↓
    V3 face model  +  locked post-processing
        ↓
    blendshapes.json (+ audio.mp3) served back to browser
        ↓
    blendshape-player-live.html renders the avatar with audio

Usage:
    ELEVENLABS_API_KEY=sk-... PYTHONPATH=. python3 -m models.v3_face.serve_live

Then open http://localhost:8091/ in your browser.
    )annotationsN)Path)Flaskrequestjsonifysend_from_directory)ARKIT_52_NAMES)EMOTION_LABELSFPSmel_features)synth_one_elevenlabs   )crisp_mouthsmooth_browsinject_blinks
load_model)load_teacherteacher_predictbuild_cond_smoothed   dataviewer_liveT)exist_okparentstoolsavatarmodelsv3_facecheckpointszbest_expression.ptklue_teacher_clean_ctx2zbest.pt	tokenizerg333333?g      ?g?g       @g{Gz?g      @g      ?)	crisp_thresholdcrisp_scalecrisp_sigmacrisp_mouthclose_sigmabrow_min_cutoff	brow_betabrow_d_cutoffblink_intervalblink_expressive_capzcuda:0cpuz[live] device = z[live] loading V3 from z [live] loading KlueTeacher from u   [live] ready ✓/c                   C  s   t ttdS )Nzblendshape-player-live.htmlr   str	TOOLS_DIR r/   r/   Q/dataset/kemix-engine/package/face/animasync-face-v3/models/v3_face/serve_live.pyindexP      r1   z/tools/<path:filename>c                 C     t tt| S Nr,   filenamer/   r/   r0   
serve_toolU   r2   r7   z/live/<path:filename>c                 C  r3   r4   )r   r-   LIVE_DIRr5   r/   r/   r0   
serve_liveZ   r2   r9   z/avatar/<path:filename>c                 C  r3   r4   )r   r-   
AVATAR_DIRr5   r/   r/   r0   serve_avatar_   r2   r;   z/api/generatePOST)methodsc                    s&  t jddpi } | dpd }| dpd }|s"tddidfS t }d	tt d
  }t }tt	t
|gt\} }|d |d }}	t | }
t }t| d }ztt||||t|	|d W n ty } ztdd| idfW  Y d }~S d }~ww | stddidfS t | }t }tjt|ddd\}}t|dk rtddidfS t||td}|jd }t | }t|gtj|	dd|gdd}t }t|tj !d"t}t|tj !d"t}t#  t$||%d& ' tj }W d    n	1 sw   Y  t | }t }t(|t)d t)d t)d t)d d}t*|t)d t)d t)d  d!}t+||t)d" t)d# d$}t | } fd%d&t, d d'd  d d d( D }|d)t|jd t-d||	. ||d*gt/|d+. d,}t| d- }|0t1j2|d.d/ t | }t3d0| d1|d d) d2| d3|	/d4.  d5| d6|d7d8|
d7d9|d7d:|d;d<|d;d= t|d>| d-d>| d||	. |t|jd t|
d
 t|d
 t|d
 t|d
 t|d
 t|d
 d?d@S )ANT)silenttext voice_iderrorz
empty texti  live_i  r   z.mp3)r?   out_pathrA   emotionvad
voice_seedzTTS failed: i  zTTS returned no audioi>  )srmonog      @zaudio too short)rH   fps)axisg        )vad_smooth_sigmar!   r"   r#   r$   )	thresholdscalepre_smooth_sigmamouth_close_sigmar%   r&   r'   )
min_cutoffbetad_cutoffr(   r)   )scenario_idmean_interval_sexpressive_capc                   s&   g | ]}t | t d  | dqS )r   )labelprob)r	   float).0iprobsr/   r0   
<listcomp>   s    zgenerate.<locals>.<listcomp>   )turn_idxrE   rF   r?   top3_emotions   )rT   rJ   
num_framesnamesturnsblendshapesz.jsonF)ensure_asciiz[live] z text=z emo=z vad=r   z T=z total=z.2fzs (teacher=zs tts=zs v3=z.3fzs post=zs)z/live/)
teacher_mstts_msmel_msv3_mspost_mstotal_ms)rT   blendshapes_url	audio_urlrE   rF   top3re   timing)4r   get_jsongetstripr   timeperf_counterintr   teacherr    devicer8   asynciorunr   list	Exceptionexistslibrosaloadr-   lenr   r
   shaper   npexpand_dimstorch
from_numpyastypefloat32	unsqueezetono_gradmodelsqueezer*   numpyr   	POST_PROCr   r   argsortr   tolistround
write_textjsondumpsprint)r   r?   voice_overridet_totalsidt0emosvadsemorF   	t_teacher
audio_pathet_ttswavrH   melTt_melcondaudio_tcond_tpredt_v3t_postrr   blendshapes_json	json_pathr/   r\   r0   generated   s   "

"









r   __main__PORT8091z0.0.0.0F)hostportdebugthreaded)C__doc__
__future__r   r|   r   osrw   pathlibr   r   r   r   r   flaskr   r   r   r   scripts.compiler.constantsr   scripts.compiler.data_pipeliner	   r
   r   scripts.compiler.ttsr   inferr   r   r   r   	infer_e2er   r   r   __file__resolver   PROJECT_ROOTr8   mkdirr.   r:   CKPTTEACHER_CKPTTOKENIZER_DIRr   r{   cudais_availabler   r   cfgrz   r    __name__approuter1   r7   r9   r;   r   ry   getenvr   r}   r/   r/   r/   r0   <module>   s    





u