o
    ia                     @  s   d dl mZ d dlZd dlmZ d dlmZ ddlmZ G dd dej	Z
G d	d
 d
ej	ZG dd dej	ZG dd dej	ZG dd dej	ZdS )    )annotationsN)nn)
functional   )MicroAlbertConfigc                      s(   e Zd Zd
 fddZddd	Z  ZS )MicroAlbertEmbeddingscfgr   c                   s   t    tj|j|j|jd| _t|j|j| _	tj
|j|jdd| _tj|j|jd| _t|j| _| jdt|jddd d S )N)padding_idxF)biasepsposition_idsr   )
persistent)super__init__r   	Embedding
vocab_sizeembedding_sizepad_token_id	token_embmax_seq_lenpos_embLinearhidden_sizeemb_proj	LayerNormlayer_norm_eps
layer_normDropoutdropoutregister_buffertorcharange	unsqueezeselfr   	__class__ P/dataset/kemix-engine/package/face/animasync-face-v3/models/microalbert/model.pyr      s   

zMicroAlbertEmbeddings.__init__	input_idstorch.Tensorreturnc                 C  sR   | d}| jd d d |f }| || | }| |}| |}| |S )Nr   )sizer   r   r   r   r   r   )r%   r*   Lposxr(   r(   r)   forward   s   



zMicroAlbertEmbeddings.forwardr   r   )r*   r+   r,   r+   __name__
__module____qualname__r   r1   __classcell__r(   r(   r&   r)   r   
   s    r   c                      *   e Zd Zd fddZddddZ  ZS )MicroAlbertAttentionr   r   c                   sp   t    |j|j dksJ |j| _|j|j | _|j| _t|jd|j | _t|j|j| _|j	| _	d S )Nr      )
r   r   r   	num_headshead_dimr   r   qkvout_projattention_dropoutr$   r&   r(   r)   r   $   s   
zMicroAlbertAttention.__init__Fhr+   attention_mask
return_qkvboolc                 C  s  |j \}}}| |}|j| jdd\}}	}
|||| j| jdd}|	||| j| jdd}|
||| j| jdd}|jt	j
t	jt	jt	jfv sOJ |
 d d d d d d f }| jrc| jnd}tj|||||d}|dd ||| j}| |}|r|||	|
fS |S )N)dimr              )	attn_mask	dropout_p)shaper=   splitr   viewr;   r<   	transposedtyper!   rC   longint32int64trainingr?   Fscaled_dot_product_attention
contiguousr>   )r%   r@   rA   rB   Br.   _r=   q_flatk_flatv_flatqkvmask4dropoutr(   r(   r)   r1   .   s   

zMicroAlbertAttention.forwardr2   Fr@   r+   rA   r+   rB   rC   r3   r(   r(   r&   r)   r9   #   s    
r9   c                      r8   )SharedTransformerBlockr   r   c                   sr   t    t|| _tj|j|jd| _t	|j|j
| _t	|j
|j| _tj|j|jd| _t|j| _d S )Nr   )r   r   r9   attnr   r   r   r   attn_lnr   ffn_sizeffn_upffn_downffn_lnr   r   r$   r&   r(   r)   r   B   s   

zSharedTransformerBlock.__init__Fr@   r+   rA   rB   rC   c           	      C  s|   |r| j ||dd\}}}}n|  ||}| || | }| t| |}| || | }|r<||||fS |S )NTrB   )rd   re   r   rh   rS   gelurg   ri   )	r%   r@   rA   rB   attn_outr[   r\   r]   ffnr(   r(   r)   r1   K   s   zSharedTransformerBlock.forwardr2   ra   rb   r3   r(   r(   r&   r)   rc   A   s    	rc   c                      s4   e Zd Zd fddZdd	d
ZddddZ  ZS )MicroAlbertBackboner   r   c                   s<   t    || _t|| _t|| _|j| _| | j	 d S N)
r   r   r   r   
embeddingsrc   shared_block
num_layersapply_init_weightsr$   r&   r(   r)   r   Y   s   


zMicroAlbertBackbone.__init__module	nn.Moduler,   Nonec                 C     | j j}t|tjr$tjj|jd|d |jd ur"tj	|j d S d S t|tj
rZtjj|jd|d |jd urXt  |j|j   W d    d S 1 sQw   Y  d S d S t|tjrptj|j tj	|j d S d S NrG   )meanstdr   initializer_range
isinstancer   r   initnormal_weightr
   zeros_r   r	   r!   no_gradzero_r   ones_r%   ru   r{   r(   r(   r)   rt   a   "   


"z!MicroAlbertBackbone._init_weightsFr*   r+   rA   return_last_qkvrC   c           	      C  sr   |  |}d  } }}t| jD ]}|r(|| jd kr(| j||dd\}}}}q| ||}q|r7||||fS |S )Nr   Trj   )rp   rangerr   rq   )	r%   r*   rA   r   r@   last_qlast_klast_vir(   r(   r)   r1   p   s   
zMicroAlbertBackbone.forwardr2   ru   rv   r,   rw   ra   )r*   r+   rA   r+   r   rC   )r4   r5   r6   r   rt   r1   r7   r(   r(   r&   r)   rn   X   s    
rn   c                      s<   e Zd Zd fddZdd	d
ZdddZdddZ  ZS )MicroAlbertForEmotionVADr   r   c              	     s   t    || _t|| _tt|j|jt	 | _
t|j|j| _tt|j|jt t|jt|j|j| _| | j d S ro   )r   r   r   rn   backboner   
Sequentialr   r   Tanhpoolernum_emotionsemotion_headvad_head_hiddenGELUr   r   vad_dimvad_headrs   rt   r$   r&   r(   r)   r   ~   s   


z!MicroAlbertForEmotionVAD.__init__ru   rv   r,   rw   c                 C  rx   ry   r|   r   r(   r(   r)   rt      r   z&MicroAlbertForEmotionVAD._init_weightsr*   r+   rA   dictc                 C  sD   |  ||}|d d dd d f }| |}| || ||dS )Nr   )emotion_logitsvadpooled)r   r   r   r   )r%   r*   rA   r@   clsr   r(   r(   r)   r1      s   
z MicroAlbertForEmotionVAD.forwardintc                 C  s   t dd |  D S )Nc                 s  s    | ]
}|j r| V  qd S ro   )requires_gradnumel).0pr(   r(   r)   	<genexpr>   s    z6MicroAlbertForEmotionVAD.num_params.<locals>.<genexpr>)sum
parameters)r%   r(   r(   r)   
num_params   s   z#MicroAlbertForEmotionVAD.num_paramsr2   r   )r*   r+   rA   r+   r,   r   )r,   r   )r4   r5   r6   r   rt   r1   r   r7   r(   r(   r&   r)   r   }   s
    


r   )
__future__r   r!   r   torch.nnr   rS   configr   Moduler   r9   rc   rn   r   r(   r(   r(   r)   <module>   s    %