
    ^jĘ                    >   d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlZddlZddlZddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlm Z m!Z! i ddddddddddddddddddddddddddddddddZ"g dZ#dZ$d Z% ej&        d!          Z' ee(          )                                j*        d"         Z+e+d#z  d$z  d%z  Z,e+d#z  d&z  Z-e+d#z  d'z  Z.d(Z/g d)Z0d*  e1e0          D             Z2	 ddd7Z3dd;Z4dd=Z5dd?Z6dddGZ7ddKZ8ddMZ9ddRZ:	 	 	 ddd[Z;	 	 ddd]Z<	 	 	 ddddZ=	 	 	 dddeZ>	 dddhZ?	 dddlZ@	 dddqZAdduZB	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dddZCd ZDeEdk    r ejF         eD                       dS dS )u  Training data generation pipeline for V3 lipsync model.

Produces .npz triples per scenario:
    - audio_features: (T, 141) [mel or wav2vec features — TBD, simple mel for now]
    - conditioning: (T, 19) [16 emotion one-hot + 3 VAD]
    - target: (T, 52) [LAM lipsync + compiler expression merged by channel rules]

Usage:
    python -m scripts.compiler.data_pipeline --limit 10   # test run
    python -m scripts.compiler.data_pipeline              # full run
    )annotationsN)Path)List   )LAM_WEIGHTS_SHAREDLIPSYNC_ONLYEXPRESSION_ONLYSHARED_CHANNELS)compile_expressive_batch)apply_eye_motion)
LAMWrapper)apply_tremorsilence_gate_from_wav)	synth_all)build_synthetic_presetsload_presets_from_jsonneutraljoylaughter
excitement	agreement	gratitudesadnesscryingsulkapologystruggleangerrefusalsurpriseflustershy)r   r            皙?皙?data_pipeliner#   dataemotionzseed_train_final.jsonlv3_trainingaudio_preview   )r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   c                    i | ]\  }}||	S  r/   ).0ies      V/dataset/kemix-engine/package/face/animasync-face-v3/scripts/compiler/data_pipeline.py
<dictcomp>r4   H   s    ===41a!Q===    
elevenlabsr%   turns
List[dict]out_dirr   backendstrconcurrencyintreturn
List[Path]c                4  K                        dd           d | D             }d | D             }d | D             }fdt          t          |                     D             }t          ||f||||d| d{V }	d	 t	          ||	          D             S )
zDTTS all turns via selected backend, passing emotion+VAD for prosody.Tparentsexist_okc                    g | ]
}|d          S )textr/   r0   ts     r3   
<listcomp>z%synth_turns_batch.<locals>.<listcomp>P   s    &&&1QvY&&&r5   c                8    g | ]}|                     d           S r*   getrF   s     r3   rH   z%synth_turns_batch.<locals>.<listcomp>Q   s$    000Qi  000r5   c                8    g | ]}|                     d           S vadrK   rF   s     r3   rH   z%synth_turns_batch.<locals>.<listcomp>R   s"    (((QAEE%LL(((r5   c                "    g | ]}|d dz  S )06d.mp3r/   )r0   r1   r9   s     r3   rH   z%synth_turns_batch.<locals>.<listcomp>S   s&    AAA!W!~~~~%AAAr5   )r:   r<   emotionsvadsNc                     g | ]\  }}|r|nd S Nr/   )r0   poks      r3   rH   z%synth_turns_batch.<locals>.<listcomp>\   s%    @@@%!RAA@@@r5   )mkdirrangelenr   zip)
r7   r9   r:   r<   
tts_kwargstextsrS   rT   pathsok_flagss
    `        r3   synth_turns_batchra   K   s       MM$M...&&&&&E00%000H((%(((DAAAAuSZZ/@/@AAAEu         H A@3uh+?+?@@@@r5   scendict	audio_dirc           	        |                      d| d                   }|                      d          }g }t          | d                   D ]\  }}|                     dd                                          s|                    d           D|||         n|}|                     dd	          }|| d
| d| dz  }	|	                                r3|	                                j        dk    r|                    |	           t          |                    | d
| d                    }
d |
D             }
|                    |
r|
d         nd           |S )uG  Find pre-generated audio files for a scenario's turns.

    Matches filename pattern: {scenario_id}_t{turn_idx}_{emotion}.mp3
    Returns list aligned to scen['turns']; None if a turn's audio is missing
    or the text is empty.

    For per-turn dialogue splits, audio is named after the *original* scenario
    and turn index, not the new pseudo-scenario id. expand_split_dialogues
    sets `_source_scenario_id` and `_source_turn_indices` so we can reroute
    lookup to the original (sid, ti) here.

    SAFE: never calls TTS, never writes to audio_dir — read-only lookup.
    _source_scenario_idscenario_id_source_turn_indicesr7   rE    Nr*   r   _t_rR     z_*.mp3c                L    g | ]!}|                                 j        d k    |"S )rl   )statst_size)r0   ms     r3   rH   z-lookup_audio_for_scenario.<locals>.<listcomp>}   s-    AAA)9D)@)@1)@)@)@r5   r   )	rL   	enumeratestripappendexistsrn   ro   listglob)rb   rd   sidsrc_tisr_   local_titurn	actual_tiemoexpectedmatchess              r3   lookup_audio_for_scenarior   _   st    ((($}*=
>
>Chh-..GE#DM22 6 6$xx##))++ 	LL)0)<GH%%(	hhy),,#>>>>S>>>>?? 	!84!?!?LL"""y~~&A&A	&A&A&ABBCCAAgAAA74WQZZ5555Lr5   variantc                    ddl }ddl}|j                            dd           ddlm} ddlm}m}m	}m
} t                              d|            |                    |dg          } |            }	| ||	|||d	S )
ul  Lazy-load V2 ONNX session + feature extractor and pull the V2-dynamics
    helpers out of abc_experiment.py. Lazy import because abc_experiment.py
    imports `merge_lam_compiler` / `speech_gate` from THIS module — a
    module-level import would cycle.

    Returns a dict with keys: variant, sess, feat, run_v2, apply_v2_dynamics,
    get_preset_envelope.
    r   Nz-/dataset/text-to-face-se/LAM_Audio2Expression)AudioFeatureExtractor)ONNX_V2run_v2apply_v2_dynamicsget_preset_envelopezLoading V2 ONNX: CPUExecutionProvider)	providers)r   sessfeatr   r   r   )sysonnxruntimepathinsertdistillation.student_modelr   scripts.compiler.abc_experimentr   r   r   r   LOGinfoInferenceSession)
r   _sysortr   r   r   r   r   r   r   s
             r3   _load_v2_helpersr      s     IQGHHH@@@@@@            HH***+++4J3KLLD  ""D.2  r5   	scenariosc           	     `   g }| D ]}|                     dd          }|                    d          s|                    |           Ct          |d                   D ]O\  }}|                     dd                                          s.|                    | d| ||g|gd           P|S )u  Expand each daily_* dialogue scenario into one pseudo-scenario per
    non-empty turn (short monologue per turn).

    Pseudo-scenario shape:
        scenario_id:          "{original_sid}_t{turn_idx}"  (drives .npz name
                                                             + tremor/eye seed)
        turns:                [original turn]                (single-turn —
                                                              no transitions)
        _source_scenario_id:  original sid                   (audio lookup)
        _source_turn_indices: [original turn_idx]            (audio lookup,
                                                              filenames carry
                                                              the original ti)

    Rationale: the blendshape model only consumes (audio, VAD) → face at the
    rendering stage, and contextual emotion learning already lives in
    MicroAlbert (text + previous-turn context). So we drop the dialogue
    structure for this dataset and emit each turn as a self-contained short
    monologue. Long_/solo_ scenarios pass through unchanged — their multi-
    turn structure is what teaches inter-emotion transitions.
    rg   ri   daily_r7   rE   rj   )rg   rf   rh   r7   )rL   
startswithrs   rq   rr   )r   outsrw   tirz   s         r3   expand_split_dialoguesr      s    * C  eeM2&&~~h'' 	JJqMMM!!G*-- 	 	HB88FB''--// JJ"%~~~~'*)+	     	 Jr5   >  P   wav
np.ndarraysrfpsn_melsc                    t          ||z            }t          j                            | |||d          }t          j        |          j        }|                    t          j                  S )zKExtract mel features aligned to fps.

    Returns (T, n_mels) float32.
    i   )yr   r   
hop_lengthn_fft)	r=   librosafeaturemelspectrogrampower_to_dbTastypenpfloat32)r   r   r   r   r   mellog_mels          r3   mel_featuresr      sc    
 R#XJ
/
(
(
"V
$ )  C !#&&(G>>"*%%%r5   rO   List[float]r   c                    t          j        |dft           j                  }t                              | d          }d|dd|f<   t          j        |t           j                  |ddddf<   |S )zD(T, 19) conditioning: 16-dim one-hot + 3 VAD, broadcast over frames.   dtyper         ?N   )r   zerosr   EMOTION_TO_IDXrL   asarray)r*   rO   r   condidxs        r3   build_conditioningr      sm    8QG2:...D


Wa
(
(CDCL:c444DBCCLKr5   lam_bsc                    d| dddf         z  d| dddf         z  z   d| dddf         z  z   d| dddf         z  z   }t          j        |dz  d	d                              t           j                  S )
u   Compute per-frame speech activity [0, 1] from LAM mouth activity.

    Per V3_IMPLEMENTATION_PLAN_v2 §3.4:
        activity = 1.2*jawOpen + 1.5*mouthClose + 1.0*mouthFunnel + 1.0*mouthPucker
    Normalized via sigmoid-ish.
    g333333?N         ?   r      %           )r   clipr   r   )r   activitys     r3   speech_gater      s     	fQQQUm
qqq"u
	
qqq"u
	 qqq"u
	  78c>3,,33BJ???r5   prev_vfloatnext_vrG   c                J   t           dz  }|d|z
  k     rBd|z
  dk    r|d|z
  z  nd}ddt          j        |t          j        z            z
  z  }d|z
  | z  S |d|z   k     rdS d|z
  }|dk    r|d|z   z
  |z  nd}ddt          j        |t          j        z            z
  z  }||z  S )u  Brow channel value over a crossfade routed through the neutral (0)
    pose. Used when |delta| > BROW_SWING_DELTA so a sad↔anger inversion
    doesn't slide linearly between extremes.

    Same profile as abc_experiment.py:
        [0, 0.5−PAUSE/2]:  prev → 0 (cosine ramp-down)
        [0.5−PAUSE/2, 0.5+PAUSE/2]:  hold at 0
        [0.5+PAUSE/2, 1]:  0 → next (cosine ramp-up)
    r#         ?r   r   r   )NEUTRAL_PAUSE_FRACTIONr   cospi)r   r   rG   
half_pauselocal_teaseddenoms          r3   _brow_pass_through_zeror      s     (!+J3-0:-=,B,B!sZ'((sRVGbeO4445ev%%	
S:		sz!6;aii1j()U22SsRVGbeO4445v~r5         >@r   r   r   signal
min_cutoffbetad_cutoffc                   d }t          |           }t          j        |t          j                  }| d         |d<   d}d|z  }	t	          d|          D ]v}
 ||	|          }| |
         ||
dz
           z
  |	z  }||z  d|z
  |z  z   }|}||t          |          z  z   } ||	|          }|| |
         z  d|z
  ||
dz
           z  z   ||
<   w|S )zOne-Euro adaptive low-pass. Peak-preserving smoother for expression
    channels (not lipsync-critical). Same impl as abc_experiment.py.c                <    dt           j        z  |z  | z  }||dz   z  S )N       @r   )r   r   )tecutoffrs      r3   sfz_one_euro_filter.<locals>.sf
  s%    "%K& 2%AG}r5   r   r   r   r   r   )r[   r   r   r   rZ   abs)r   r   r   r   r   r   r   r   dx_prevr   r1   a_ddxdx_hatr   as                   r3   _one_euro_filterr     s    
   	FA
(1BJ
'
'
'CAYCFG	sB1a[[ 8 8bXQi#a!e*$*rS3Y'11dS[[00Br6NNVAY#'SQZ!77AJr5   targetc                T   |                                  }t          t          t                    t          t                    dhz
  z            }|D ]%}t          |dd|f         ||          |dd|f<   &t          j        |dd                              t          j	                  S )zDApply One-Euro filter to expression channels (not lipsync-critical).r   Nr   r   r   r   )
copysortedsetr	   r
   r   r   r   r   r   )r   r   r   result	smooth_chchs         r3   smooth_expression_channelsr     s     [[]]Fs?++s?/C/Crd/JKLLI K K(24>TK K Kqqq"u763$$++BJ777r5   333333?皙?Q?
base_alphajitter_alphajitter_thresholdc           	        t          |           }|dk    r!|                     t          j        d          S t          j        |t          j                  }| d         |d<   t          d|          D ]g}t          t          | |                   t          | |dz
                     z
            }||k    r|n|}|| |         z  d|z
  ||dz
           z  z   ||<   h|S )a^  V2-style jitter-gate EMA. Small per-frame deltas get heavy smoothing
    (alpha=jitter_alpha); deltas above `jitter_threshold` pass through with
    light smoothing (alpha=base_alpha). Removes sub-threshold mouth jitter
    without flattening real phoneme transitions.

    Mirrors animasync-face-v2/pipeline_v2/smooth_v2.py::jitter_gate_smooth.
    r   T)r   r   r   r   )r[   r   r   r   r   rZ   r   r   )	r   r   r   r   r   r   rG   deltaalphas	            r3   _jitter_gate_smoothr   )  s     	FAAvv}}RZd}333
(1BJ
'
'
'CAYCF1a[[ @ @E&)$$uVAE]';';;<<#&666

L"cEkSQZ%??AJr5   c                ,   |                                  }t          t          t                    dhz            }|D ]&}t	          |dd|f         |||          |dd|f<   't          j        |dd                              t
          j                  S )ac  Apply V2 jitter-gate smoothing to LIPSYNC_ONLY + jawOpen (ch 24).

    The compiler+LAM teacher target carries sub-threshold high-frequency
    noise in the mouth/jaw channels that V3 then learns and amplifies.
    Smoothing the GT before training removes that noise floor while
    preserving real phoneme onsets (which exceed the jitter threshold).
    r   Nr   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )r   r   r   r   r   lip_chr   s          r3   smooth_lipsync_channelsr  @  s     [[]]FC%%,--F 
 
+111b5Mj%8H
 
 
qqq"u 763$$++BJ777r5   `   fade_framesc                   t          j        | d                              t           j                  }|dz  }d}t	          |dd                   D ]\  }}||z  }| |         d         | |dz            d         t          d||z
            }t          |j        d         ||z             }	|	|z
  }
|
dk    rffdt          D             }t          ||	          D ]l}||z
  |
dz
  z  }dd	t          j
        |t           j        z            z
  z  }d	|z
  z  |z  z   ||<   |D ]$}t          |         |         |          |||f<   %m|S )
zCosine-eased blend across turn boundaries, with brow pass-through-zero
    on inverting (large-delta) brow channels. Mirrors abc_experiment.py.r   axisr#   Nr   c           	         g | ]E}t          t          |                   t          |                   z
            t          k    C|FS r/   )r   r   BROW_SWING_DELTA)r0   r   	next_pose	prev_poses     r3   rH   z-crossfade_turn_boundaries.<locals>.<listcomp>e  sT     
 
 
52''%	"*>*>>??BRRR RRRr5   r   r   )r   concatenater   r   rq   maxminshapeBROW_CHANNELSrZ   r   r   r   )
comp_stackturn_lengthsr  concathalfcursorr1   Ti
fade_startfade_endLbrow_pass_channelsfrG   r   r   r  r  s                   @@r3   crossfade_turn_boundariesr  U  s    ^JQ///66rzBBF!DF<,--  2"qM"%	qsOA&	FTM**
v|A66z!66
 
 
 
 
&
 
 
 z8,, 	 	AZAE*A3BE	!2!223Eu	1EI4EEF1I(   7bM9R=!! !q"u		 Mr5   rT   presetssigmac                   t          j        d |                                D             t           j                  }t          j        d |                                D             t           j                  }| j        d         }t          j        |dft           j                  }dd|dz  z  z  }t          |          D ]c}t          j        || |         z
  dz  d	
          }	t          j        |	 |z            }
|
                                }|dk    r|
|z  }
|
|z  ||<   dt          j	        |dd          
                    t           j                  S )u   RBF over ALL preset anchors based on VAD distance — cross-emotion blend.
    Matches abc_experiment.py.cross_emotion_compile.c                    g | ]
}|d          S rN   r/   r0   rW   s     r3   rH   z)cross_emotion_compile.<locals>.<listcomp>x  s    AAA1ahAAAr5   r   c                    g | ]
}|d          S )bsr/   r$  s     r3   rH   z)cross_emotion_compile.<locals>.<listcomp>z  s    >>>AdG>>>r5   r   4   r   r   r#   r   r  g&.>r   )r   r   valuesr   r  r   rZ   sumexpr   r   )rT   r   r!  anchor_vads	anchor_bsr   r   	inv_2sig2rG   d2wr   s               r3   cross_emotion_compiler0  t  sF    *AA0@0@AAA#%:/ / /K
>>W^^-=-=>>>!#- - -I
1A
(Ar7"*
-
-
-CsUaZ'(I1XX  V[47*q0q999FB3?##EEGGt88FAYA73S!!((444r5   ?turn_emotions	List[str]fleeting_scalec                   t          |           }|dk    sdk    rdg|z  S d | D             }t          t          |                    dk    rdg|z  S ddz   z  dg|z  }d}||k     rZ|}||k     r/||         ||         k    r|dz  }||k     r||         ||         k    t          ||          D ]
}||z
  ||<   |}||k     Zfd|D             S )u  For each turn, compute the magnitude scale based on emotion-family
    persistence across adjacent turns. Single-turn scenarios bypass entirely
    (returned scales are all 1.0). Same rule as abc_experiment.py:
       persistence == 1 → fleeting_scale
       persistence == 2 → midpoint(fleeting_scale, 1.0)
       persistence >= 3 → 1.0
    All-same-base monologues also bypass (sustained = full strength).
    r   r   c                D    g | ]}t                               |d           S )r   )SUB_TO_BASErL   )r0   r2   s     r3   rH   z.compute_persistence_scales.<locals>.<listcomp>  s&    BBBq[__Q	**BBBr5   r   r   c                4    g | ]}|d k    rn	|dk    rndS )r   r#   r   r/   )r0   rW   r4  paireds     r3   rH   z.compute_persistence_scales.<locals>.<listcomp>  sA        q&&avvS  r5   )r[   r   rZ   )	r2  r4  nbasesrun_lenr1   jkr9  s	    `      @r3   compute_persistence_scalesr?    s<    	MAAvv3&&uqyBBMBBBE
3u::!uqyNS()FcAgG	A
a%%!eeaE!H,,FA !eeaE!H,,q! 	 	AQGAJJ a%%       r5   lamcompgatec                8   | j         d         }t          j        |           }t          D ]}| dd|f         |dd|f<   t          D ]}|dd|f         |dd|f<   t
          D ]}t          |         }|dk    rEd|dd|f         dz  d|z
  z  z   }| dd|f         |z  |dd|f         d|z
  z  dz  z   |dd|f<   Z|| dd|f         z  d|z
  |dd|f         z  z   }||z  d|z
  |dd|f         z  z   |dd|f<   |dddf         |ddd	f         z   dz  }	|ddd
fxx         d|	dz  z
  z  cc<   |dddfxx         d|	dz  z
  z  cc<   |dddfxx         d|	dz  z
  z  cc<   t          j        |dd                              t          j	                  S )z=Merge LAM lipsync + compiler expression per V3 channel rules.r   Nr   r   r   r   333333?+   ,   r   皙?   g?r-   r   )
r  r   
zeros_liker   r	   r
   r   r   r   r   )
r@  rA  rB  r   r   r   r/  emotion_gainblended_activesmiles
             r3   merge_lam_compilerrM    s*   	!A
-

C     BZAAArE

  ! !!!!R%[AAArE

  J Jr"88aaaes!2cDj!AALQQQUl2T!!!R%[AH5MPS5SSC2JJQQQU^q1uQQQU.CCN.!d(d111b5k1IIC2JJ BZ#aaae*$+E2JJJ1us{?#JJJ2JJJ1us{?#JJJ2JJJ1us{?#JJJ73S!!((444r5   rD  ffffff?      @y&1?Fscenarioaudio_pathsr   out_pathpersistence_dampingcross_emotion_weightcross_emotion_sigmavad_damp_gammavad_damp_betavad_smooth_sigmablink_interval_soption_e_intensity
tremor_amptremor_sigma
v2_helperslipsync_smoothboollipsync_smooth_alphalipsync_smooth_jitter_alphalipsync_smooth_thresholdc                |   g }t          | d                   D ]!\  }}|d                                         s!||         }||                                s@t          j        t          |          dd          \  }}t          |          dk     r{|                    |          }|j        d         }t          ||t          	          }|j        d         |k    r|d|         }nV|j        d         |k     rE||j        d         z
  }t          j        |t          j        |d
d         |df          gd          }t          |          }t          |||t                    } d}!| |d         |d         |d         ||d                   }!|!j        d         |k    r|!d|         }!nV|!j        d         |k     rE||!j        d         z
  }t          j        |!t          j        |!d
d         |df          gd          }!|                    ||d         t#          |d                   ||||| |!d	           #|sdS t%          d |D             |          }"|dk    rt          |          dk    rt'          |          }#t'          |	          }$t          j        |d         d         t          j                  }%|dd         D ][}&t          j        |&d         t          j                  }'|#|'z  d|#z
  |%z  z   }(|$|%z  d|$z
  |'z  z   }%|(                                |&d<   \t          j        d |D             d          })t/          d |D                       }*t          j        |*t          t2                    ft          j                  }+d},|D ]@}&t4                              |&d         d          }-d|+|,|,|&d         z   |-f<   |,|&d         z  },A|
dk    rqt          |          dk    r^ddlm}.  |.|)|
dd                              t          j                  }) |.|+|
dd                              t          j                  }+g }/g g }1}0d}2g }3t?          ||"          D ]\  }&}4|&d         }|&d         }5|)|2|2|z            }6|+|2|2|z            }7|2|z  }2tA          |5g|z  |6||          }8|d k    rLtC          |6||!          }9t'          |          }:d|:z
  |8z  |:|9z  z                       t          j                  }8|4dk     r"|8|4z                      t          j                  }8|L|&                    d"          7 |d#         |5|          \  };}< |d$         |8|&d"         |d%         |;|<&          }8|/                    |8           |3                    |           |0                    |&d'                    t          j        |7|6                    t          j                  gd
                              t          j                  }=|1                    |=           t          |3          d(k    }>|>r|ntE          |d)          }?t          |/          dk    rtG          |/|3|?*          }@n|/d         }@t          j        d+ |D             d          }At          j        d, |D             d          }BtI          |A|@|B          }C|>rd-nd.}DtK          |C|Dd/0          }C|rtM          |C|||1          }CtO          |C| d2         t          |3          }C|d k    r]|d k    rWt          j        d4 |D             d                              t          j                  }EtQ          |C|E| d2         ||5          }Ct          j)        |t          j        |0d          t          j        |1d          |C6           dS )7u5  Two-pass scenario → .npz, mirroring abc_experiment.py's variant-C
    pipeline (compiler + LAM, no V2 ONNX). Defaults match the canonical
    lock-in (damp 0.65, xemo 0.2, blink 3.5, fade 96, σ=30, γ=0.3) — running
    with no flag overrides produces training targets that match what the
    viewer shows for _d65x20 scenarios.

    Pass 1: collect per-turn audio + LAM + speech gate + raw VAD.
    Persistence rule: pose-level scale per turn (multi-turn only).
    Causal VAD damping: pull each turn's VAD toward running mean of past.
    Cross-turn VAD smoothing: σ-frame Gaussian over per-frame VAD trajectory.
    Pass 2: per-turn compile (within-emotion + cross-emotion blend, then
    persistence damp).
    Crossfade between turn boundaries.
    Merge LAM, smooth expression channels, apply eye_motion.
    r7   rE   Nr   T)r   monog      @r   )r   r   r
  r   r  )r   r   r   r   r*   rO   )	turn_idxr*   rO   r   r   r   rB  silence_gatev2_bsFc                    g | ]
}|d          S rJ   r/   r0   cs     r3   rH   z$process_scenario.<locals>.<listcomp>%  s    )))!9)))r5   )r4  r   r   c                    g | ]C}t          j        t          j        |d          t           j                  |d         df          DS )rO   r   r   r   )r   tiler   r   rj  s     r3   rH   z$process_scenario.<locals>.<listcomp>5  sQ        	
1U82:6663DD  r5   c              3  &   K   | ]}|d          V  dS )r   Nr/   rj  s     r3   	<genexpr>z#process_scenario.<locals>.<genexpr>=  s&      ,,Q!C&,,,,,,r5   r   )gaussian_filter1dnearest)r!  r	  mode)rS   rT   r   parametric_overlay_intensityr   )r!  rh  r   r   r   )envelope_loenvelope_hir   r$      )r  c                    g | ]
}|d          S )r   r/   rj  s     r3   rH   z$process_scenario.<locals>.<listcomp>  s    ===aak===r5   c                    g | ]
}|d          S )rB  r/   rj  s     r3   rH   z$process_scenario.<locals>.<listcomp>  s    <<<Qqy<<<r5   rG  r   r   r   r  rg   )seed_strr   rZ  c                    g | ]
}|d          S )rg  r/   rj  s     r3   rH   z$process_scenario.<locals>.<listcomp>  s    2221Q~222r5   )rg  rg   ampr!  )audior   r   )*rq   rr   rt   r   loadr;   r[   infer_audior  r   FPSr   r  rm  r   r   rs   ru   r?  r   arrayr   tolistr)  r   EMOTION_LABELSr   rL   scipy.ndimagerp  r   r\   r   r0  r  r  rM  r   r  r   r   savez_compressed)FrQ  rR  r@  r   rS  rT  rU  rV  rW  rX  rY  r  rZ  r[  r\  r]  r^  r_  ra  rb  rc  	collectedrf  rz   
audio_pathr   r   r   r   r   padrB  sgaterh  persist_scales   γ   βrunning_meanrk  rawdampedall_vadsn_totalall_emos_c_emor   rp  r  audio_featscondsr  r  psr|   	vad_slice	emo_slicecomp_bsxemor/  env_loenv_hicond_per_frameis_monologuefade_for_thiscomp_catlam_catgate_catr   r   	sgate_catsF                                                                         r3   process_scenarior    s   N I#HW$566 . .$F|!!## 	 *
Z%6%6%8%8,s:5tDDDRs88k!!,,LO323///9Q<!bqb'CCYq\Acil"C.#rws233x#q'B'B!C!LLLC6"" &c2qc::: !(Jx(6"Jv$6T)_ E {1~!!bqb	Q!##%+a.(BGE"##Ja99:   	 IU$$!

 

 
	 
	 
	 
	  u 0))y)))*  N c)nnq00>""=!!x	!U 32:FFF122 	' 	'A(1U82:666C#Xr\ 99F,bC/??L}}AeHH ~       H ,,),,,,,Gx#n"5"56bjIIIHF    9q11033',-!C&!I 2 2333333$$,19
 
 

&

 	 %$,19
 
 

&

 	
 JRKFLY// *% *%2cF	lVFQJ./	VFQJ./	!*UQY);	
 
 
  #%%(G0CE E ED*++Aa7*QX5==bjIIG88|++BJ77G !aeeGnn&@>Z(=>sGLLNFF5j!457Z	%:"  G
 	'"""A1U8$$$
 	((445B
 
 

&

 	 	^$$$$ |$$)L#/HKKSa5H5HM
:,Z:GI I I a= n==9===AFFFG~<<)<<<1EEEH8<<F %-#J':CPPPF
  
(+45	
 
 
 -()	  F CL3..N22	222
 
 

&

 	 " /
 
 
 n[q111^E***	    4r5   c            
       K   t          j                    } |                     dt          t                     |                     dt          t
                     |                     dt          t          d           |                     dt          dd	           |                     d
t          t          dz  d           |                     dd            |                     ddd           |                     ddd           |                     dt          dd           |                     dt          dd           |                     dt          dd           |                     d t          d!d"           |                     d#t          d$d%           |                     d&t          d'd(           |                     d)t          d*d+           |                     d,t          d-d.           |                     d/t          d0d1           |                     d2t          d3d4           |                     d5t          d6d7           |                     d8dd9           |                     d:dd;           |                     d<t          d=d>           |                     d?t          d@dA           |                     dBt          dCdD           |                     dEg dFdGdHI           | 
                                }|j        rdJ|_        t          j        t          j        dKL           |j                            dMdMN           |j                                        st)          dO|j                   t*                              dP|j                    |j        rP|j                                        r7t*                              dQ|j                    t1          |j                  }n(t*                              dR           t3                      }t*                              dSt5          |           dT           g }|j                                        5 }|D ])}|                    t=          j        |                     *	 d d d            n# 1 swxY w Y   t*                              dUt5          |           dV           |j         !                                "                                }|r|dWvrtG          dX |$                    dY          D                       |j%        r8tM          dZ D                       sd[z   t*                              d\           t5          |          }fd]|D             }t*                              d^ d_| d`t5          |           da           |j%        rht5          |          }tO          |          }tQ          db |D                       }t*                              dc| d`t5          |           dd| de           |j)        r:|d |j)                 }t*                              dft5          |           da           g }	d}
|D ]WtU          |j                  }|	                    |           |
tQ          fdgtW          |          D                       z  }
XtQ          dh |D                       }||
z
  }t*                              di| dj| dk|
 dl           |dk    rt)          dm          t*                              dn           tY          |j-        o          }d }|j.        dpk    r7t_          |j.                  }t*                              dq|j.                    nt*                              dr           ddsl0m0} d}tW           ||dtu                    D ]\  }|j        dv          dwz  }|                                r|dxz  }2tc          |	|         |||fi dy|j2        dz|j3        d{|j4        d||j5        d}|j6        d~|j7        d|j8        d|j9        d|j:        d|j        d|j;        d|d|j<        d|j=        d|j>        d|j?        }|r|dxz  }t*                              d| djt5          |           d           t*                              d|j                    d S )Nz--scenarios)typedefaultz--outputz--audio_dirz5Directory with pre-generated audio (read-only lookup))r  r  helpz--limitr   z0 = allz--presets_jsonzexpression_presets.jsonzUser-authored preset JSON. Defaults to <project_root>/expression_presets.json (the same file abc_experiment.py uses). Falls back to synthetic bootstrap if the file doesn't exist.z--device)r  z--filter-prefixzlong_,solo_zComma-separated scenario_id prefixes to keep. Default 'long_,solo_' (monologues + single-turn). Use 'all' or '' to disable filtering and process every scenario. When --split-dialogues is set, 'daily_' is auto-added if missing.)r  r  z--split-dialogues
store_trueu  Expand each daily_* dialogue into one .npz per turn (short monologue per turn). Audio lookup is rerouted to the original (sid, turn_idx). Dialogue context learning is left to MicroAlbert; this pipeline only consumes (audio, VAD) → face at the render stage.)actionr  z--persistence-dampingr1  ziPose-level scale for fleeting (multi-turn isolated) emotions. Single-turn scenarios bypass. Default 0.65.z--cross-emotion-weightr'   z_Weight for cross-emotion VAD-distance blend (0=pure within-emotion, 1=pure cross). Default 0.2.z--cross-emotion-sigmar&   u?   Gaussian σ for cross-emotion VAD-distance kernel. Default 0.4.z--vad-damp-gammarD  u#   Causal VAD damping γ. Default 0.3.z--vad-damp-betarN  u#   Causal VAD damping β. Default 0.7.z--vad-smooth-sigmar   u<   Cross-turn VAD trajectory Gaussian σ in frames. Default 30.z--fade-framesr  z>Crossfade duration at turn boundaries (monologue). Default 96.z--blink-intervalrO  z3Mean seconds between blinks (Poisson). Default 3.5.z--option-e-intensityr   uC   α scalar for Option E parametric mouth/cheek overlay. Default 1.0.z--tremor-amprP  zcBrow + eyeSquint tremor amplitude. Default 0.014 (matches viewer runtime tremor). Set 0 to disable.z--tremor-sigmar   uR   Gaussian σ for tremor noise smoothing (frames). Default 1.5 → ~2.2 Hz dominant.z--no-tremorz/Disable tremor baking entirely (clean targets).z--lipsync-smoothzApply V2-style jitter-gate EMA to LIPSYNC_ONLY + jawOpen channels of the teacher target before saving. Removes sub-threshold mouth noise V3 would otherwise learn. Mirrors animasync-face-v2/pipeline_v2/smooth_v2.py.z--lipsync-smooth-alphar   z8EMA alpha for above-threshold frame deltas. Default 0.6.z--lipsync-smooth-jitter-alphar   zWEMA alpha for sub-threshold (jitter) frame deltas. Lower = more smoothing. Default 0.1.z--lipsync-smooth-thresholdr   u]   Frame-delta cutoff: |Δ|>threshold → real motion, |Δ|<=threshold → jitter. Default 0.03.z	--variant)ABCr  uy  V2-dynamics teacher variant. C = compiler only (no V2). A = strict V2 mask (brows + cheek/nose squint + eyeSquint). B = tiered V2 mask: A's channels PLUS mouth smile/frown, eye wide, mouth dimple at α=0.25. Default B — V3 learns to reproduce V2's prosody-driven motion from (audio, VAD) alone. V2 ONNX is loaded only for A/B (data generation only; not used at V3 inference).)choicesr  r  r   z&%(asctime)s %(levelname)s: %(message)s)levelformatTrA   zAudio dir not found: zAudio source (read-only): zLoading user presets from z>Using synthetic preset bootstrap (parametric layer on anchors)u     → z presetszLoaded z scenarios (pre-filter))all*c              3  f   K   | ],}|                                 |                                 V  -d S rV   )rr   r$  s     r3   ro  zmain.<locals>.<genexpr>%  s7      OOqQWWYYOOOOOOOr5   ,c              3  @   K   | ]}|                     d           V  dS )dailyN)r   r$  s     r3   ro  zmain.<locals>.<genexpr>&  s.      +T+TaALL,A,A+T+T+T+T+T+Tr5   )r   z;--split-dialogues set: auto-added 'daily_' to filter prefixc                f    g | ]-}|                     d d                                        +|.S )rg   ri   )rL   r   )r0   r   prefixess     r3   rH   zmain.<locals>.<listcomp>*  sP     G G G1mR00;;HEEGQ G G Gr5   zFilter prefixes=z: u    → z
 scenariosc              3  D   K   | ]}|                     d           dV  dS )rf   r   NrK   r0   r   s     r3   ro  zmain.<locals>.<genexpr>6  s4      KKAaee4I.J.JKaKKKKKKr5   z--split-dialogues: z scenarios (z  per-turn splits from dialogues)z--limit applied: c              3     K   | ]?\  }}|d         |                              dd                                          ;dV  @d S )Nr7   rE   ri   r   rL   rr   )r0   r   rW   rb   s      r3   ro  zmain.<locals>.<genexpr>D  s^       
 
"ayT']2.2262>>DDFFy yyy
 
r5   c              3  T   K   | ]#}t          d  |d         D                       V  $dS )c              3  j   K   | ].}|                     d d                                          *dV  /dS )rE   ri   r   Nr  rF   s     r3   ro  z!main.<locals>.<genexpr>.<genexpr>I  sA      ??!QUU62%6%6%<%<%>%>?A??????r5   r7   N)r)  r  s     r3   ro  zmain.<locals>.<genexpr>H  sQ         	??qz?????     r5   zAudio lookup: /z turns have audio (u)    missing — those turns will be skipped)z3No audio found for any scenario. Check --audio_dir.zLoading LAM model...)devicer  u   V2 teacher ENABLED — variant=z6V2 teacher DISABLED (variant=C, compiler-only targets))tqdmzProcess scenarios)descrg   z.npzr   rT  rU  rV  rW  rX  rY  r  rZ  r[  r\  r]  r^  r_  ra  rb  rc  zDone. z" scenarios successfully processed.zOutput: )@argparseArgumentParseradd_argumentr   	SCENARIOS
OUTPUT_DIR	AUDIO_DIRr=   PROJECT_ROOTr   
parse_args	no_tremorr\  loggingbasicConfigINFOoutputrY   rd   rt   
SystemExitr   r   presets_jsonr   r   r[   r   openrs   jsonloadsfilter_prefixrr   lowertuplesplitsplit_dialoguesanyr   r)  limitr   rq   r   r  r   r   r  r  rT  rU  rV  rW  rX  rY  r  blink_intervalr[  r]  r_  ra  rb  rc  )apargsr   r   r  line
prefix_strbeforen_splitscenario_audio_pathsmissing_totalr_   total_turnsfound_turnsr@  r^  r  successsirS  rX   r  rb   s                        @@r3   mainr    su     		 	"	"BOOMiO@@@OOJT:O>>>OOMiP  R R ROOICOCCCOO$4(+DDJ  K K K OOJO---OO%}]  ^ ^ ^
 OO'O  P P P OO+%Q  R R R OO,5#O  P P P OO+%Z  \ \ \OO&UC>  @ @ @OO%E3>  @ @ @OO(udW  Y Y YOOO#rY  [ [ [OO&UCN  P P POO*^  ` ` `OONN  O O O OO$5#=  > > > OOM,J  L L LOO&|V  W W W
 OO,5#S  U U UOO3%@  A A A OO0udE  F F F OOK#U  V V V ==??D~ gl3[\\\\KdT222 >  "" CAAABBBHH:$.::;;;  ,T.5577 ,Ad.?AABBB():;;QRRR)++HH,c'll,,,--- I					 /! 	/ 	/DTZ--....	// / / / / / / / / / / / / / / HH>s9~~>>>??? #))++1133J Yj44OOJ,<,<S,A,AOOOOO 	T+T+T8+T+T+T(T(T 	T+-HHHRSSSYG G G G	 G G G	WHWWWWS^^WWWXXX  @Y*955	KKKKKKK ?v ? ?C	NN ? ?? ? ? 	@ 	@ 	@ z Al
l+	?S^^???@@@ M 
 
)$??##E*** 
 
 
 
$U++
 
 
 
 
 	
       K -KHH Jk J JK J JJ J J K K KaNOOO HH#$$$
DK
(
(
(C J|s%dl33
A4<AABBBBIJJJ Gdd93FGGGHH  D;D$7!=!=!==?? 	qLG&r*C(
 
 
 $ 8 8
 "&!:!:
 !% 8 8	

  ..
 ,,
 "22
 ((
 "00
  $66
 
 **
 "z
  ..
 "&!:!:
  )-(H(H!
" &*%B%B#
&  	qLGHHRgRRIRRRSSSHH%%%&&&&&s   .-R((R,/R,__main__)r6   r%   )
r7   r8   r9   r   r:   r;   r<   r=   r>   r?   )rb   rc   rd   r   r>   r?   )r   r;   )r   r8   r>   r8   )r   r-   r   )
r   r   r   r=   r   r=   r   r=   r>   r   )r*   r;   rO   r   r   r=   r>   r   )r   r   r>   r   )r   r   r   r   rG   r   r>   r   )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r>   r   )r   r   )r   r   r   r   r   r   r>   r   )r   r   r   )
r   r   r   r   r   r   r   r   r>   r   )
r   r   r   r   r   r   r   r   r>   r   )r  )r  r=   r>   r   )r&   )rT   r   r   rc   r!  r   r>   r   )r1  )r2  r3  r4  r   r>   r   )r@  r   rA  r   rB  r   r>   r   )r1  r'   r&   rD  rN  r   r  rO  r   rP  r   NFr   r   r   ),rQ  rc   rR  r?   r@  r   r   rc   rS  r   rT  r   rU  r   rV  r   rW  r   rX  r   rY  r   r  r=   rZ  r   r[  r   r\  r   r]  r   r^  rc   r_  r`  ra  r   rb  r   rc  r   r>   r`  )G__doc__
__future__r   r  asyncior  r  pathlibr   typingr   r   numpyr   torch	constantsr   r   r	   r
   
expressiver   
eye_motionr   lam_wrapperr   tremorr   r   ttsr   utilsr   r   r7  r  r  r   	getLoggerr   __file__resolverB   r  r  r  r  r  r  rq   r   ra   r   r   r   r   r   r   r   r   r   r   r  r  r0  r?  rM  r  r  __name__runr/   r5   r3   <module>r     s  
 
 # " " " " "                                  1 0 0 0 0 0 ( ( ( ( ( ( # # # # # # 7 7 7 7 7 7 7 7       B B B B B B B By	5e%15  $U y	 #I	 06y	
 y
 %i W   
 &z 49*    g((tH~~%%''/26!I-0HH	F"]2
 6!O3	  
 >=99^#<#<=== NOA A A A A(       F   :$ $ $ $N
& 
& 
& 
& 
&   @ @ @ @"   0 7;<?'*    2 47-0	8 	8 	8 	8 	8 -0.126    0 14256:8 8 8 8 8, 24    @ *-5 5 5 5 5, 8<    @5 5 5 5L "&"%!$"! # "%),&*+n n n n nbD' D' D'N zGK r5   