
    +j5q                       d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	 ddl
ZddlZddlmc mZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZ d
dlmZ d
dlmZ d
dl m!Z! 	 ddl"Z"dZ#n# e$$ r dZ#Y nw xY w ee%          &                                j'        d         Z(e(dz  dz  Z)e(dz  dz  Z*e(dz  dz  dz  Z+d-dZ,d.d Z-d-d!Z.d/d#Z/	 d0d1d*Z0d+ Z1e2d,k    r e1             dS dS )2uB  V3 face training — distill teacher's .npz targets into a causal TCN.

Usage:
    PYTHONPATH=. python3 -m models.v3_face.train               # full train
    PYTHONPATH=. python3 -m models.v3_face.train --smoke       # 10 scenarios × 5 epochs
    PYTHONPATH=. python3 -m models.v3_face.train --device cuda:1 --epochs 80
    )annotationsN)Path)Dict)AdamW)LambdaLR)
DataLoaderWeightedRandomSampler)LIPSYNC_ONLYEXPRESSION_ONLYSHARED_CHANNELS   )V3FaceConfig)BlendshapeDataset)V3FaceModelTF   datav3_trainingemotionmodelsv3_facecheckpointscfgr   returntorch.Tensorc                    t          j        | j        f| j        t           j                  }t
          D ]}| j        ||<   t          D ]}d| j        | j        z   z  ||<   | j        |d<   | j        |d<   |S )zPer-channel L1 weight vector, shape (52,).

    Lipsync channels get cfg.lipsync_weight (audio-sync matters most).
    Eye-blink (ch 8, 9) get cfg.eye_blink_weight (sparse, hard to learn).
    Everything else gets cfg.expression_weight.
    dtype      ?   	   )	torchfull
output_dimexpression_weightfloat32r
   lipsync_weightr   eye_blink_weightr   wchs      L/dataset/kemix-engine/package/face/animasync-face-v3/models/v3_face/train.pymake_channel_weightsr,   +   s     	
CN$c&;5=QQQA # #"" C C s)C,AAB"AaDAaDH    predtargetvalid_length
ch_weightsc                   | j         \  }}}t          j        || j                                      d          }||                    d          k                                     }| |z
                                  |ddddf         z  }	|	                    d          }	|	|z  }	|                                	                    d          }
|	                                |
z  S )	u   L1 loss masked to valid frames, weighted per channel.

    pred, target:    (B, T, C)
    valid_length:    (B,) int  — number of valid frames per sample
    ch_weights:      (C,)
    devicer   r   Ndim      ?min)
shaper!   aranger4   	unsqueezefloatabsmeansumclamp)r.   r/   r0   r1   BTC	frame_idxmaskdiffdenoms              r+   	masked_l1rJ   ?   s     jGAq!Qt{333==a@@I..q11188::D6M  :dD!!!m#<<D999D$;DHHJJ%%E88::r-   c                8   t          j        | j        f| j        t           j                  }t
          D ]}| j        ||<   t          D ]}| j        ||<   | j	        |d<   | j	        |d<   | j
        | j
        |d<   | j
        |d<   | j        dD ]}| j        ||<   |S )a   Per-channel velocity penalty weights, shape (52,).

    Lipsync channels get heavy smoothing; brows + cheek get light smoothing
    (preserve V2 prosody motion); eye-blink channels get near-zero so the
    sharp 5-frame blink kernel survives training.
    r   r   r    N      )r   r   r         )r!   r"   r#   velocity_expression_weightr%   r
   velocity_lipsync_weightr   velocity_shared_weightvelocity_eye_blink_weightvelocity_eye_squint_weightvelocity_brow_weightr(   s      r+   make_velocity_weightsrV   R   s     	
CN$c&D-	) 	) 	)A , ,+" + +*"(AaD(AaD
 %1."." +! 	- 	-B,AbEEHr-   vel_weightsc                H   | j         \  }}}| ddddf         | ddddf         z
  }|ddddf         |ddddf         z
  }||z
                                  |ddddf         z  }	|	                    d          }	t          j        |dz
  | j                                      d          }
|
|dz
                      d          k                                     }|	|z  }	|                                	                    d          }|	                                |z  S )	zEPer-channel-weighted L1 on per-frame difference (smoothness penalty).Nr   r5   r6   r3   r   r8   r9   )
r;   r?   r@   r!   r<   r4   r=   r>   rA   rB   )r.   r/   r0   rW   rC   rD   _pred_vtarget_vrH   rF   rG   rI   s                r+   masked_velocityr\   r   s-    jGAq!!!!QRR%[43B3<'Faaae}vaaa"f~-HX""$${4qqq='AAD999DQU4;777AA!DDI)44Q777>>@@D$;DHHJJ%%E88::r-   r8   trainboolvelocity_scaler>   Dict[str, float]c
           	        |                      |           ddddd}
|rt          j                    nt          j                    }|5  |D ]}|d                             |d          }|d                             |d          }|d                             |d          }|d	                             |d          } | ||          }t          ||||          }t          |||||	z            }||z   }|r|                    d
           |                                 t          j	        j
                            |                                 |j                   |                                 |                                 |
dxx         t          |                                          z  cc<   |
dxx         t          |                                          z  cc<   |
dxx         |j        dk    r!t          |                                          ndz  cc<   |
dxx         dz  cc<   	 d d d            n# 1 swxY w Y   t%          d|
d                   }|
d         |z  |
d         |z  |
d         |z  dS )N        r   )lossl1velnaudioT)non_blockingcondr/   r0   )set_to_nonerc   rd   re   rf   r   )rc   rd   re   )r]   r!   enable_gradno_gradtorJ   r\   	zero_gradbackwardnnutilsclip_grad_norm_
parameters	grad_clipstepr>   itemvelocity_weightmax)modeldl	optimizer	schedulerr1   rW   r   r4   r]   r_   sumsgrad_ctxbatchrg   ri   r/   r0   r.   loss_l1loss_velrc   rf   s                         r+   	run_epochr      s   	KKs3Q77D&+@u """H	   	 	E'N%%f4%@@E=##F#>>D8_''T'BBF 033F3NNL5%%DflJGGG 'tV\(3n(DF FHX%D !###555..u/?/?/A/A3=QQQ      LLLE$))++...LLLJJJ%///JJJKKKS5H15L5L5111RUUKKKIIINIIII3	              8 	AtCyAL1$DJN4;QR?SSSs   G.II	Ic                 n  $%& t          j                    } |                     dt          t                     |                     dt          t
                     |                     dt          t                     |                     dt          d            |                     dt          d            |                     dt          d            |                     dd	
           |                     dt          d           |                     ddd           |                     dg ddd           |                     dt          d d           |                     ddd           |                     ddd           |                     ddd            |                     d!d d"            |                     d#d d$            |                     d%t          d&d'           |                     d(t          d&d)           |                     d*t          d d+           |                     d,t          d d-           |                     d.t          d d/           |                     d0t          d1d2           |                     d3t          d1d4           |                     d5t          d1d6           |                     d7t          d d8           |                     d9t          d d:           | 
                                }t                      $|j        r|j        $_        |j        r|j        $_        |j        r|j        $_        |j        $_        |j        $_        |j        $_        |j        $_        |j        $_        |j        $_        |j        $_        |j        $_        |j        $_        |j                            d;d;<           t9          j        |j                  }$j        d&k    s$j        d&k    s$j        ;$j        $j        n$j        }t=          d=$j        d>d?$j        d>d@|d>dA           $j        "t=          dB$j        d>dC$j        d>dD           $j        "t=          dE$j        d>dC$j        d>dD           $j        d1k    rt=          dF$j        d>dG           $j        d1k    r5t=          dH$j        d>dI$j        dJz  $j         z  dKdL$j          dM           $j        d1k    r5t=          dN$j        d>dI$j        dJz  $j         z  dKdL$j          dM           $j        "t=          dO$j        d>dP$j        d>dQ           tC          |j"        |j#        dRz  $j$        $j        $j        $j        $j        $j        $j        $j        S
  
        }tC          |j"        |j#        dTz  $j$        $j        $j        $j        $j        $j        $j        U	  	        }|j%        rg }dVD ]5\  &}&fdW|j&        D             d |         }|'                    |           6|s|j&        d dX         }||_&        |j&        d dY         |_&        dY$_        dY$_(        d$_        t=          dZtS          |           d[tS          |           d\$j         d]$j                    t=          d^tS          |           d_|*                                            t=          d`tS          |           d_|*                                            |+                    $j,                  }	t[          |	tS          |          d;a          }
t]          |$j        |
|j/        d;dbc          }t]          |$j        db|j/        d;d          }ta          $          1                    |          }t=          de|j2        dfz  d>dg|j3        dhdi|j3        dz  dhdj           |j4        |j4        5                                stm          dk|j4                   t9          j7        |j4        |dbl          }|8                    |dm                    t=          dn|j4         do|9                    dpdq           dr|9                    dst          dt                    dudD           |j:        rB|:                                }t=          dv|dfz  d>dw|j;        dfz  d>dx           dydzl<m=} |D ]}|j>        ot~          }|j>        rt~          st=          d{           |r.|j@        pJd|$jA         d}t          $d~          r$jC        ntS          $jD                   d$j        d|j%        rdndz   }t}          jE        |jF        |jG        |i $jH        |j2        |j3        |j3        dz  tS          |          tS          |          |*                                |*                                t          |          |j%        d	           t}          jI        |dd           t=          dt|          jJ        K                                            d |L                                D             }t          |$j        d$jN                  }t          dytS          |          $j        z            %d$%fd}t          ||          }t          $          1                    |          }t          $          1                    |          }dydl<m=}mS} |jT        dk    r0|D ]}d1||<   d1||<   t=          dtS          |           d           nM|jT        dk    s|j:        r;|D ]}d1||<   d1||<   t=          d|j:        rdnd dtS          |           d           t          d          }t          $j                  D ]}t          jV                    }$jW        dk    rt          d&|$jW        z            nd&}t          ||||||$|d;|
  
        }t          ||||||$|db|
  
        } t          jV                    |z
  }!t=          d|dd|d         dud|d         dud| d         dud| d         dud|Z                                d         dd|d>d|!dhd           | d         |k     }"|"r| d         }|[                                $jH        || d         | d         |d         |d         d}|jT        dk    rdn	d|jT         }#|j\        r|# d|j\         }#|"r8t9          j]        ||j        d|# dz             t=          d|# d|dudD           t9          j]        ||j        d|# dz             |r]t}          j^        ||d         |d         |d         | d         | d         | d         ||Z                                d         |!d
|           t=          d|du           t=          d|j                    |r$|t|          j_        d<   t}          j`                     d S d S )Nz	--npz_dir)typedefaultz--emotion_dirz	--out_dirz--epochsz--batch_sizez--lrz--devicezcuda:0)r   z--num_workersrO   z--smoke
store_truez)Smoke test: 10 train scenarios, 5 epochs.)actionhelpz--focus)alllipsync
expressionr   zChannel focus. 'all' = train all 52 channels (default). 'lipsync' = only LIPSYNC + SHARED channels have loss (expression branch sees zero gradient). 'expression' = only EXPRESSION_ONLY channels have loss (lipsync branch sees zero gradient).)choicesr   r   z--resumezvLoad model weights from a checkpoint .pt before training. Use with --freeze_lipsync for phase-2 expression retraining.)r   r   r   z--freeze_lipsynczFreeze shared backbone + lipsync branch + lipsync head. Only the expression branch + head will train. Lipsync output stays bit-for-bit identical to what was loaded via --resume.z--wandbz5Log to Weights & Biases. Requires `wandb login` once.z--wandb_projectzanimasync-v3-facezW&B project name.)r   r   z--wandb_run_namez)W&B run name. Defaults to auto-generated.z--wandb_entityz4W&B entity (team or user). Defaults to your default.z--lipsync-target-gainr8   zMultiply PURE_LIPSYNC target channels (jaw, mouth mechanics, tongue, cheekPuff) by this factor at load time, then clamp [0,1]. Default 1.0 = no change.z--expression-target-gainaR  Multiply EMOTIONAL target channels (brows incl. innerUp, cheekSquint, eyeSquint, eyeWide, mouth Dimple/Frown/Smile, noseSneer) by this factor at load time, then clamp [0,1]. Default 1.0 = no change. If --emotional-mouth-target-gain is also set, THIS knob covers only the pure-expression subset (brows + eyeSquint + eyeWide + cheekSquint).z--emotional-mouth-target-gainu  Optional separate gain for emotional-mouth target channels (mouthDimple, mouthFrown, mouthSmile, noseSneer). When set, decouples from --expression-target-gain so brows/eyes can go higher than mouth. Used by v18b to avoid pushing shared mouth channels past the point where crisp_mouth normalization destabilizes lipsync. None → same as --expression-target-gain (backward-compat with v14/v18).z--velocity-eye-squint-weightu   Per-channel velocity penalty for eyeSquint L/R (ch 18, 19). Higher than the default expression velocity weight suppresses jitter at high gain. None → use velocity_expression_weight (backward-compat). Suggested 0.8 for v18b.z--velocity-brow-weightu   Per-channel velocity penalty for the 5 brow channels (ch 0-4). Same mechanism as the eyeSquint weight — suppresses brow jitter at high expression gain. None → use velocity_expression_weight (backward-compat). Suggested 0.7 for v18c.z--plosive-damp-targetrb   a-  Bake the runtime plosive damper into training targets. When mouthClose > 0.4 on a frame, mouthPress/Roll/Shrug (ch 35,36,39,40,41,42) get multiplied by (1 - this_value * smoothstep). 0 = off (default). 0.30 = matches the production main-viewer setting that prevents 'lips swallowed' on m/b/p plosives.z--smooth-target-sigma-browu  Gaussian σ (frames @ 30 fps) for pre-smoothing the 5 brow target channels BEFORE the gain. The proper fix for brow flicker at high gain — smooths the input the model is asked to fit so jitter never enters the training signal. 0 = off (default). Suggested 2.0 (~67ms) for v18e.z --smooth-target-sigma-eye-squintu  Gaussian σ (frames @ 30 fps) for pre-smoothing the eyeSquint L/R target channels BEFORE the gain. Real orbicularis oculi is slow + sustained, so we can smooth heavier than brows without losing useful motion. 0 = off (default). Suggested 3.0 (~100ms) for v18e.z--brow-innerup-happy-gainuq  Override the gain on browInnerUp (ch 2) for happy emotions (joy / laughter / excitement / gratitude). Set to 1.0 with expression-target-gain=2.2 to keep browInnerUp un-amplified on happy frames so the avatar doesn't look concerned/apologetic when saying happy things, while leaving the 2.2× boost everywhere else. None → no override (backward compat with v14..v18f).z--variant-tagu   Optional suffix appended to checkpoint filenames after --focus, so different gain runs don't clobber each other. E.g. 'v14' → best_lipsync_v14.pt / best_expression_v14.pt.T)parentsexist_oku   [target-gain] lipsync×z.2fu     expression×u     emotional-mouth×z6  (applied per-channel in dataset, then clamped [0,1])u    [velocity] eyeSquint(ch 18,19)×z  (vs default expression )u   [velocity] brows(ch 0-4)×z[plosive-damper] target damp=z,  (applied in dataset when mouthClose > 0.4)u    [target-smooth] brow(ch 0-4) σ=z
 frames (~i  z.0fz ms @ z fps)u'   [target-smooth] eyeSquint(ch 18,19) σ=z:[per-emotion] browInnerUp(ch 2) on happy frames uses gain z (vs z elsewhere)zseed_train_final.jsonl)crop_frameslipsync_target_gainexpression_target_gainemotional_mouth_target_gainplosive_damp_targetsmooth_target_sigma_browsmooth_target_sigma_eye_squintbrow_innerup_happy_gainzseed_val.jsonl)r   r   r   r   r   r   r   ))long_rO   )solo_rN   )zdaily_-splitrN   c                ,    g | ]}|d          k    |S )r    ).0ewant_cats     r+   
<listcomp>zmain.<locals>.<listcomp>^  s'    EEE1AaDH4D4DQ4D4D4Dr-   
      z[smoke] train=z val=z epochs=z batch=ztrain: u    scenarios — counts: zval:   )num_samplesreplacementF)
batch_sizesamplernum_workers
pin_memory	drop_last)r   shuffler   r   zV3FaceModel (split-branch): g    .AzM params, ~z.1fz MB fp32 (~z	 MB int8)z--resume checkpoint not found: )map_locationweights_onlyry   z[resume] loaded weights from z  (prev epoch=epoch?z	, val_l1=val_l1nanz.4fz[freeze_lipsync] froze zGM params (shared backbone + lipsync branch + lipsync head). Trainable: zM (expression branch + head).r   )LIPSYNC_BRANCH_CHANNELSuY   [wandb] requested but `wandb` not installed — skipping. Install with: pip install wandbv3face_h_bn_blocks_lrz.0e_smoke )	n_paramssize_mb_fp32size_mb_int8train_scenariosval_scenariostrain_category_countsval_category_countsr4   smoke)projectentitynameconfig	gradientsd   )loglog_freqz[wandb] logging to c                     g | ]}|j         	|S r   )requires_grad)r   ps     r+   r   zmain.<locals>.<listcomp>  s    IIIaIIIIr-   )g?gffffff?)lrbetasweight_decayru   intr   r>   c           	         | j         k     r| t          dj                   z  S | j         z
  t          dj         z
            z  }ddt          j        t          j        t          d|          z            z   z  S )Nr   r   r8   )warmup_stepsrx   mathcospir:   )ru   progr   total_stepss     r+   	lr_lambdazmain.<locals>.lr_lambda  sv    #"""#a!12222s''3q+@P2P+Q+QQcDHTWs3~~%=>>>??r-   )r   EXPRESSION_BRANCH_CHANNELSr   z[focus=lipsync] masked z expression channelsr   z[focus=expressionz/freeze_lipsyncz	] masked z lipsync channelsinfr   )r]   r_   zepoch 3dz  train l1=rd   z vel=re   z	  val l1=z  lr=z.2ez  vw=z  s)ry   r   r   r   val_veltrain_l1	train_velrY   bestz.ptu     → saved bestz	 (val l1=latestrc   )
r   ztrain/l1ztrain/velocityz
train/losszval/l1zval/velocityzval/losszval/best_l1r   epoch_seconds)ru   z
Done. best val l1: zcheckpoints: best_val_l1)ru   r   r   r>   )aargparseArgumentParseradd_argumentr   DEFAULT_NPZ_DIRDEFAULT_EMOTION_DIRDEFAULT_OUT_DIRr   r>   str
parse_argsr   epochsn_epochsr   r   learning_rater   r   r   rT   rU   r   r   r   r   out_dirmkdirr!   r4   printrP   fpsr   npz_diremotion_dirr   r   entriesextendr   lencategory_countsget_sample_weightslong_oversample_weightr	   r   r   r   rm   r   size_mbresumeexists
SystemExitloadload_state_dictgetfreeze_lipsyncn_trainablery   r   wandb_WANDB_AVAILABLEwandb_run_name
hidden_dimhasattrr   	dilationsinitwandb_projectwandb_entity__dict__watchrunget_urlrs   r   r   rx   r   r,   rV   r   focusrangetimevelocity_warmup_epochsr:   r   get_last_lr
state_dictvariant_tagsaver   summaryfinish)'apargsr4   mouth_gtrain_dsval_dssmoke_picksrf   pickssample_weightsr   train_dlval_dlry   ckptfrozen_nr   r*   	use_wandbrun_nametrainable_paramsr{   r   r|   r1   rW   r   best_valr   t0r_   trvadtis_bestsuffixr   r   r   s'                                       @@@r+   mainr.     s   		 	"	"BOOKdOODDDOOO$8KOLLLOOKdOODDDOOJS$O777OONdO;;;OOFO555OOJO111OOO#qO999OOIlD  F F FOOI'G'G'G!E  F F F OOJT42  3 3 3 OO&|4  5 5 5
 OOIlP  R R ROO%/B,  . . .OO&D  F F FOO$dO  Q Q QOO+%#  $ $ $
 OO.UCO  P P P OO3%G  H H H OO2<  = = = OO,5$F  G G G OO+%.  / / / OO0uc;  < < < OO6UC.  / / / OO/eT3  4 4 4 OOO#tI  J J J
 ==??D
..C{.4;CL8w+DG!"6C!%!<C&*&FC#%)%DC"#8C"6C#'#@C )-)LC&"&">CLtd333\$+&&F3&&)S00.:5A 222 	 	 F(?G F F 7?F F")1F F F 	G 	G 	G %1 N1OW N N(+(FMN N N 	O 	O 	O
+ N3+CK N N(+(FMN N N 	O 	O 	O
$$ <c.EM < < < 	= 	= 	=
#c)) [1M[ [ [/$6@Z[ [LOG[ [ [ 	\ 	\ 	\
)C// a8Zh a a5<swF`a aRURYa a a 	b 	b 	b
". c,bc c7:7Qbc c c 	d 	d 	d !"&"25M"M.1o696M9<9S>A>]696M;>;WADAc:=:U	W 	W 	WH t| $ 03C C,/O474K7:7Q<?<[474K9<9U?B?ac c cF z ?L 	& 	&KHaEEEE 0EEEbqbIEu%%%% 	0"*3B3/K&+ >s8}} > >3v;; > >> >-0^> > 	? 	? 	? 

VCMM
V
V(:R:R:T:T
V
VWWW	
RCKK
R
R8N8N8P8P
R
RSSS 001KLLN#N14X157 7 7G (s~w&*&64$)+ + +H 3>5$($4G G GF ''E	 K);
K K Km
JK K,1M!O
JK K K L L L {{!!## 	NLt{LLMMMz$+FOOOd7m,,, @dk @ @!XXgs33@ @5<<88?@ @ @ 	A 	A 	A
  
'')) UC U U!-c1TU U U 	V 	V 	V 	322222) 	 	B 
//Iz 1* 1 0 	1 	1 	1 ;& 
*s~ * *j9Q9Q)iWZ[^[hWiWi * *#)* *:-xx2/ 	
 	
&$,!N % % 1#&x==!$V)1)A)A)C)C'-'='='?'?f++  		
 	
 	
 	
" 	E{S99999EI$5$5$7$799:::
 JI5#3#3#5#5III&*'#&#35 5 5I
 aX566K@ @ @ @ @ @ @ I..I%c**--f55J',,//77K KJJJJJJJzY, 	" 	"B JrN!KOO],F(G(G]]]^^^^	|	#	#t':	#) 	" 	"B JrN!KOO Ht7J"R"3"3PR H H344H H H 	I 	I 	I U||Hs|$$ < <Y[[ )A-- US7788836 	 uh	9!;V4&46 6 6 ufi!;V5&46 6 6 Y[[2 u#  T(> -/Y> 4< +-e9<  ))++A.6  #*	 
 l   	 	 	 T(X% 	 $xH%%''lh%y4E
 
  zU**0@DJ0@0@ 	433!133F 	GJtT\,>6,>,>,>>???EVEEhEEEEFFF
4(<(<(<(<<=== 	ItH"$U) jT( "5	vJ'++--a0!#      

0(
0
0
0111	
($,
(
())) '/m$ r-   __main__)r   r   r   r   )
r.   r   r/   r   r0   r   r1   r   r   r   )
r.   r   r/   r   r0   r   rW   r   r   r   )r8   )r]   r^   r_   r>   r   r`   )3__doc__
__future__r   r   r   r  pathlibr   typingr   numpynpr!   torch.nn.functionalrp   
functionalFtorch.optimr   torch.optim.lr_schedulerr   torch.utils.datar   r	   scripts.compiler.constantsr
   r   r   r   r   datasetr   ry   r   r  r  ImportError__file__resolver   PROJECT_ROOTr   r   r   r,   rJ   rV   r\   r   r.  __name__r   r-   r+   <module>rC     sb    # " " " " "                                    - - - - - - > > > > > > > > U U U U U U U U U U             & & & & & &      LLL    tH~~%%''/2'-7"V+i7 )I5E
   (   &   @   ( <?"T "T "T "T "TJu u up zDFFFFF s   #A* *A43A4