o
    :j                     @  s  d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlZddlZddlZddlmZ ddlmZ dd	lmZ ejdd
 ddlmZ ddlmZ dZee jd Z e d d Z!e d d Z"e d d Z#d"ddZ$d#ddZ%dd  Z&e'd!kre&  dS dS )$u  Generate V2 blendshapes for the curated scenarios in viewer_e2e/.

For each scenario, for each turn:
    audio → V2 ONNX (LAM + E2F int8) with GT emotion → (T, 52) AnimaSync ordering

Concatenates per-turn outputs, writes `<sid>_v2_dataset.json` next to the
existing `_dataset` and `_pred_dataset` entries so the comparison viewer
can A/B V2 vs V3 on the same audio.

Usage:
    PYTHONPATH=. python3 -m models.v3_face.infer_v2_compare --all-viewer
    )annotationsN)Path)ListOptional)ARKIT_52_NAMES)lookup_audio_for_scenario   )find_scenarioz-/dataset/text-to-face-se/LAM_Audio2Expression)AudioFeatureExtractor)run_v2zD/dataset/mead-expression-training/e2f/distill/emotion_face_int8.onnx   data
viewer_e2eaudio_previewemotionsidstrreturnOptional[dict]c                 C  s$  t | |j}|d u rtd|  d d S t||j}g }g }tt|d |D ]E\}\}	}
|	dd }|r?|
d u s?|
	 s@q't
jt|
ddd\}}t|d	k rSq't||||	d
d}||tj ||	 q'|sytd|  d d S tj|dd}|  d}g }t|D ]#\}}|||d
dt|dg d|dd|ddd q|dt|jd t|t|d d}|j| d }|tj|dd |j|  d }|j| d }|	 s| r|  |	 r| |j! td|  d|jd  dt|  | |dS ) Nu     ✗ z: scenario not foundturnstext i>  T)srmonog      @r   neutralz: no valid turnsr   )axis_v2_datasetvad)r   r   r   speaker)turn_idxr   r   r   r         )scenario_idfps
num_framesnamesr   blendshapesz.jsonF)ensure_asciiz_dataset.mp3z.mp3u     ✓ z	  frames=z  turns=)r   new_base)"r	   emotion_dirprintr   	audio_dir	enumeratezipgetstripexistslibrosaloadr   lenr   appendastypenpfloat32concatenatelistintshaper   roundtolist
viewer_dir
write_textjsondumps
is_symlinkunlink
symlink_toname)r   sessfeatargsscenaudio_paths	bs_concatvalid_turnstiturnapr   wavr   v2_bsbsr(   
turns_metatviewer_jsonout_jsonteacher_mp3pred_mp3 rY   W/dataset/kemix-engine/package/face/animasync-face-v3/models/v3_face/infer_v2_compare.py
predict_v2.   s\   




$
r[   r>   r   predictions
List[dict]c                 C  s   | d }|  rt| ndg i}dd |d D }|D ]*}|d }||d  di }||g |dd	|d
g d|dd d||< qt| |d< |tj|ddd d S )Nzmanifest.json	scenariosc                 S  s   i | ]}| d |qS )base)r.   ).0srY   rY   rZ   
<dictcomp>j   s    z#update_manifest.<locals>.<dictcomp>r(   r   _datasetn_turnsr   emotionsz[V2] text_previewr   )r_   r"   variantsrd   re   rf   Fr   )r'   indent)	r0   r@   loads	read_textr.   r9   valuesr?   rA   )r>   r\   manifest_pathmanifestseenpr(   teacher_entryrY   rY   rZ   update_manifestf   s$   

rq   c            	        s  t  } | jdttd | jdttd | jdttd | jdddd d | jd	d
d | jdttdt d | 	 }|j
sG|jsG| d |jr}g }t|jdD ]}|j t fdddD reqT| d td   qTtdt| d n|j
}td|j  tj|jdgd}t }td g }|D ]}t||||}|r|| q|rt|j| tdt| d|j  d S )Nz--audio_dir)typedefaultz--viewer_dirz--emotion_dirz-sz--scenarios+)nargsrs   z--all-viewer
store_true)actionz--onnxzV2 ONNX path. Default: )rr   rs   helpz#provide --scenarios or --all-viewerz*_dataset.jsonc                 3  s    | ]}| v V  qd S )NrY   )r`   xstemrY   rZ   	<genexpr>   s    zmain.<locals>.<genexpr>)_pred_dataset_e2e_datasetr   rc   zall-viewer: z
 scenarioszLoading V2 ONNX: CPUExecutionProvider)	providersu   V2 ready ✓
z
Done. z V2 outputs written to )argparseArgumentParseradd_argumentr   DEFAULT_AUDIO_DIRDEFAULT_VIEWER_DIRDEFAULT_EMOTION_DIRr   DEFAULT_V2_ONNX
parse_argsr^   
all_viewererrorsortedr>   globr{   anyr4   r3   r*   onnxortInferenceSessionr
   r[   rq   )	rO   rH   sidsro   rF   rG   r\   r   rrY   rz   rZ   mainz   sD   


r   __main__)r   r   r   r   )r>   r   r\   r]   )(__doc__
__future__r   r   r@   syspathlibr   typingr   r   r1   numpyr6   onnxruntimer   scripts.compiler.constantsr   scripts.compiler.data_pipeliner   	infer_e2er	   pathinsertdistillation.student_modelr
   scripts.compiler.abc_experimentr   r   __file__resolveparentsPROJECT_ROOTr   r   r   r[   rq   r   __name__rY   rY   rY   rZ   <module>   s6    

8*
