
    j#                    V   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZ  G d	 d
ej                  Z G d dej                  Z G d dej                  Z e ee	           ee          z            Z e ee
                    Z G d dej                  ZdS )u  V3 face model — split-branch causal TCN with FiLM conditioning.

Architecture:

    audio (T, 80) ⊕ cond (T, 19)  →  Linear(99 → hidden)
                                      │
                                      ▼
            shared backbone: 6× DilatedCausalConv1d (d=1..32) + FiLM
                                      │
                       ┌──────────────┴──────────────┐
                       ▼                             ▼
       lipsync branch                       expression branch
       2× TCN (d=64,128)                    2× TCN (d=64,128)
                       ▼                             ▼
       Linear(hidden → 31)                  Linear(hidden → 21)
       sigmoid                              sigmoid
                       │                             │
                       └──────────┬──────────────────┘
                                  ▼
                       combined (T, 52) blendshape output
                       (lipsync values at LIPSYNC + SHARED indices,
                        expression values at EXPRESSION_ONLY indices)

Freezing:
    model.freeze_lipsync()  →  shared backbone + lipsync branch + lipsync
                                head are no_grad. Expression branch +
                                head remain trainable. Lipsync output
                                becomes bit-for-bit deterministic from
                                audio input.

~3.7 M params at hidden=192, ~1.2 ms/frame CPU.
Quantization-friendly (Conv1d + Linear + GELU + sigmoid, no attention).
    )annotationsN)nn)
functional)LIPSYNC_ONLYEXPRESSION_ONLYSHARED_CHANNELS   )V3FaceConfigc                  ,     e Zd ZdZd fdZdd
Z xZS )FiLMu   Per-frame Feature-wise Linear Modulation conditioning.

    Predicts (γ, β) from `cond` and applies `x * (1 + γ) + β`.
    cond_dimint
hidden_dimc                &   t                                                       t          j        |d|z            | _        t          j                            | j        j                   t          j                            | j        j                   d S )N   )	super__init__r   Linearprojinitzeros_weightbias)selfr   r   	__class__s      L/dataset/kemix-engine/package/face/animasync-face-v3/models/v3_face/model.pyr   zFiLM.__init__5   sg    IhJ77	
ty'(((
ty~&&&&&    xtorch.Tensorcondreturnc                v    |                      |          }|                    dd          \  }}|d|z   z  |z   S )Nr   dimg      ?)r   chunk)r   r   r    gbgammabetas         r   forwardzFiLM.forward<   s>    YYt__hhqbh))tC%K 4''r   )r   r   r   r   r   r   r    r   r!   r   __name__
__module____qualname____doc__r   r*   __classcell__r   s   @r   r   r   /   s[         
' ' ' ' ' '( ( ( ( ( ( ( (r   r   c                  ,     e Zd ZdZd fdZdd
Z xZS )CausalConv1dz6Left-padded 1D conv that never looks at future frames.channelsr   kernel_sizedilationc                    t                                                       |dz
  |z  | _        t          j        ||||d          | _        d S )Nr	   r   )r7   padding)r   r   left_padr   Conv1dconv)r   r5   r6   r7   r   s       r   r   zCausalConv1d.__init__E   sP    $qH4Ih+'/< < <			r   r   r   r!   c                d    t          j        || j        df          }|                     |          S )Nr   )Fpadr:   r<   )r   r   s     r   r*   zCausalConv1d.forwardK   s*    E!dmQ'((yy||r   )r5   r   r6   r   r7   r   )r   r   r!   r   r,   r2   s   @r   r4   r4   B   sW        @@< < < < < <       r   r4   c                  ,     e Zd ZdZd fd	ZddZ xZS )TCNBlocku9   Residual block: 2× CausalConv1d + FiLM + GELU + dropout.r   r   r6   r7   r   dropoutfloatc                `   t                                                       t          |||          | _        t          |||          | _        t          j        |          | _        t          j        |          | _        t          ||          | _
        t          j        |          | _        d S N)r   r   r4   conv1conv2r   	LayerNormnorm1norm2r   filmDropoutrB   )r   r   r6   r7   r   rB   r   s         r   r   zTCNBlock.__init__S   s    !*k8DD
!*k8DD
\*--
\*--
:..	z'**r   r   r   r    r!   c                8   |}|                     dd          }|                     |          }|                     dd          }|                     |          }t          j        |          }|                     |          }|                     dd          }|                     |          }|                     dd          }|                     |          }|                     ||          }t          j        |          }|                     |          }||z   S )Nr	   r   )		transposerF   rI   r>   gelurB   rG   rJ   rK   )r   r   r    residualhs        r   r*   zTCNBlock.forward]   s    KK1JJqMMKK1JJqMMF1IILLOOKK1JJqMMKK1JJqMMIIaF1IILLOO!|r   )
r   r   r6   r   r7   r   r   r   rB   rC   r+   r,   r2   s   @r   rA   rA   P   sW        CC+ + + + + +       r   rA   c                  |     e Zd ZdZd fdZdd	ZddZedd            Zedd            Z	edd            Z
 xZS )V3FaceModelu?   Split-branch causal TCN: (audio, cond) → (T, 52) blendshapes.cfgr
   c                   t                                                       | _        |                     dt	          j        t          t          j                             |                     dt	          j        t          t          j                             t          t                    }t          t                    }||z   j
        k    sJ d| d| dj
                     t          j        j        j        z   j                  | _        t          j        fdj        D                       | _        t          j        fdj        D                       | _        t          j        j        |          | _        t          j        fd	j        D                       | _        t          j        j        |          | _        d S )
Nlipsync_idx)dtypeexpression_idxzchannel split mismatch: z + z != c           	     ^    g | ])}t          j        j        |j        j                  *S  rA   r   r6   r   rB   .0drT   s     r   
<listcomp>z(V3FaceModel.__init__.<locals>.<listcomp>   sA     ,
 ,
 ,
 S^S_as{SS,
 ,
 ,
r   c           	     ^    g | ])}t          j        j        |j        j                  *S rZ   r[   r\   s     r   r_   z(V3FaceModel.__init__.<locals>.<listcomp>   sA     -
 -
 -
 S^S_as{SS-
 -
 -
r   c           	     ^    g | ])}t          j        j        |j        j                  *S rZ   r[   r\   s     r   r_   z(V3FaceModel.__init__.<locals>.<listcomp>   sA     0
 0
 0
 S^S_as{SS0
 0
 0
r   )r   r   rT   register_buffertorchtensorLIPSYNC_BRANCH_CHANNELSlongEXPRESSION_BRANCH_CHANNELSlen
output_dimr   r   	audio_dimr   r   
input_proj
ModuleListshared_dilationsshared_blocksbranch_dilationslipsync_blockslipsync_headexpression_blocksexpression_head)r   rT   	n_lipsyncn_expressionr   s    `  r   r   zV3FaceModel.__init__|   s    	]"\*ATTT	V 	V 	V-"\*DEJWWW	Y 	Y 	Y /00	566<'3>999WyWW\WWs~WW :99 )CMCL$@#.QQ] ,
 ,
 ,
 ,
),
 ,
 ,
   !m -
 -
 -
 -
)-
 -
 -
   Icni@@ "$ 0
 0
 0
 0
)0
 0
 0
 " "  "yFFr   audior   r    r!   c                F   t          j        ||gd          }|                     |          }| j        D ]} |||          }|}| j        D ]} |||          }t          j        |                     |                    }|}| j        D ]} |||          }t          j        |                     |                    }|j	        d         |j	        d         }
}	t          j
        |	|
| j        j        |j        |j                  }||d| j        f<   ||d| j        f<   |S )z
        Args:
            audio: (B, T, audio_dim)
            cond:  (B, T, cond_dim)
        Returns:
            blendshapes: (B, T, 52) in [0, 1]
        r#   r$   r   r	   )devicerW   .)rc   catrk   rn   rp   sigmoidrq   rr   rs   shapezerosrT   ri   rx   rW   rV   rX   )r   rv   r    r   blocklxlipsync_outexexpression_outBTouts               r   r*   zV3FaceModel.forward   sL    Iudm,,,OOA' 	 	EaAA ( 	! 	!Er4BBmD$5$5b$9$9:: + 	! 	!Er4BBt';';B'?'?@@ {1~u{1~1k!Q 3!&[5FH H H%0C!!"(6C$$%
r   r   c                   d}| j                                         D ] }d|_        ||                                z  }!| j        D ]7}|                                D ] }d|_        ||                                z  }!8| j        D ]7}|                                D ] }d|_        ||                                z  }!8| j                                        D ] }d|_        ||                                z  }!|S )u  Freeze shared backbone + lipsync branch + lipsync head.

        Sets `requires_grad=False` on every parameter that contributes to
        the lipsync output path. After this, lipsync output is bit-for-bit
        deterministic from audio + cond — the expression branch can be
        retrained without ANY drift in lipsync.

        Returns the number of frozen parameters.
        r   F)rk   
parametersrequires_gradnumelrn   rp   rq   )r   frozenpr}   s       r   freeze_lipsynczV3FaceModel.freeze_lipsync   s    ++-- 	  	 A#AOaggiiFF' 	$ 	$E%%'' $ $"'!''))#$ ( 	$ 	$E%%'' $ $"'!''))#$ "--// 	  	 A#AOaggiiFFr   c                X    t          d |                                 D                       S )Nc              3  >   K   | ]}|                                 V  d S rE   )r   r]   r   s     r   	<genexpr>z'V3FaceModel.n_params.<locals>.<genexpr>   s*      8817799888888r   sumr   r   s    r   n_paramszV3FaceModel.n_params   s)    88doo&7&7888888r   c                X    t          d |                                 D                       S )Nc              3  L   K   | ]}|j         	|                                V   d S rE   )r   r   r   s     r   r   z*V3FaceModel.n_trainable.<locals>.<genexpr>   s1      KK1?K17799KKKKKKr   r   r   s    r   n_trainablezV3FaceModel.n_trainable   s)    KKdoo&7&7KKKKKKr   rC   c                    | j         dz  dz  S )zFloat32 disk size in MB.   i   )r   r   s    r   size_mbzV3FaceModel.size_mb   s     }q K00r   )rT   r
   )rv   r   r    r   r!   r   )r!   r   )r!   rC   )r-   r.   r/   r0   r   r*   r   propertyr   r   r   r1   r2   s   @r   rS   rS   y   s        II"G "G "G "G "G "GH       D   6 9 9 9 X9 L L L XL 1 1 1 X1 1 1 1 1r   rS   )r0   
__future__r   rc   r   torch.nnr   r>   scripts.compiler.constantsr   r   r   configr
   Moduler   r4   rA   sortedsetre   rg   rS   rZ   r   r   <module>r      s     B # " " " " "        $ $ $ $ $ $          !          ( ( ( ( (29 ( ( (&    29       ry   J !&\!2!2SS5I5I!IJJ #VCC$8$899 o1 o1 o1 o1 o1") o1 o1 o1 o1 o1r   