
    *jI:                    L   d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlZddlZddlmZ ddlmZ  ej        d          Zd	Zd
ZdZdZ e eeez                        e ee                    k    s
J d            dZdZdZdZdZddZddZ  G d de          Z!dS )u  BlendshapeDataset — loads (audio, cond, target) from data_pipeline.py .npz.

Each .npz contains:
  audio:  (T, 80)  log-mel features at 30 fps
  cond:   (T, 19)  16-dim emotion one-hot + 3-dim VAD per frame
  target: (T, 52)  ARKit blendshape values

Split membership is derived from the JSONL files in data/emotion/
(seed_train_final.jsonl / seed_val.jsonl / seed_test.jsonl).

For daily-split scenarios (per-turn pseudo-scenarios like `daily_007_t2`),
parent membership is checked: if the parent dialogue is in the split's JSONL,
all its per-turn splits belong to that split.

Crops:
  - Sequences longer than `crop_frames`  → random window crop
  - Sequences shorter than `crop_frames` → edge-pad to crop_frames + return
    `valid_length` so the trainer can mask loss to real frames only.
    )annotationsN)Path)ListSetTuple)gaussian_filter1d)Datasetz^(daily_.+)_t(\d+)$)                         !   "   #   $   %   &   '   (   )   *   -   .   /   0   3   )r                                             +   ,   1   2   )r   r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   )r+   r,   r-   r.   r/   r0   r1   r2   zsplit must partition EMOTIONAL)r   r   r   r   r   r   g?r   )r!   r"   r#   r
   r"   sidstrreturnc                    |                      d          rdS |                      d          rdS |                      d          rd| v rdS dS )Nlong_solo_daily__tzdaily_-splitother)
startswith)r3   s    N/dataset/kemix-engine/package/face/animasync-face-v3/models/v3_face/dataset.py	_classifyr>   g   sZ    
~~g w
~~g w
~~h DCKK~7    c                h    t                               |           }|r|                    d          n| S )uD   Daily-split id → parent dialogue id. Pass through for long_/solo_.r!   )	_SPLIT_REmatchgroup)r3   ms     r=   _parent_sidrE   q   s,    A#1771:::#r?   c                  J    e Zd Z	 	 	 	 	 	 	 	 dddZd dZd!dZd"d#dZd$dZdS )%BlendshapeDataset         ?N        npz_dirr   split_jsonlcrop_framesintlipsync_target_gainfloatexpression_target_gainemotional_mouth_target_gainfloat | Noneplosive_damp_targetsmooth_target_sigma_browsmooth_target_sigma_eye_squintbrow_innerup_happy_gainc                   t          |          | _        t          |          | _        t	          |          | _        t	          |          | _        ||}t	          |          | _        t          j	        dt          j
                  | _        t          D ]}| j        | j        |<   t          D ]}| j        | j        |<   t          D ]}| j        | j        |<   | j        dk    p| j        dk    p
| j        dk    | _        t	          |          | _        t          j        dt$                    | _        t(          D ]}d| j        |<   t	          |          | _        t	          |	          | _        g | _        | j        dk    r!| j                            d| j        f           | j        dk    r!| j                            d| j        f           |
| _        t5                      }t          |                                          5 }|D ]1}t9          j        |          }|                    |d                    2	 d d d            n# 1 swxY w Y   g | _        tA          | j        !                    d	                    D ]C}|j"        }tG          |          |v r)| j                            |tI          |          f           Dd S )
N4   dtyperI   TrJ   )r   r!   r"   r#   r$   )r'   r(   scenario_idz*.npz)%r   rK   rN   rM   rP   rO   rQ   rR   nponesfloat32_target_gainPURE_LIPSYNC_CHANNELSEMOTIONAL_PURE_CHANNELSEMOTIONAL_MOUTH_CHANNELS_gain_activerT   zerosbool_plosive_damp_maskPLOSIVE_DAMP_CHANNELSrU   rV   _smooth_groupsappendrW   setopenjsonloadsaddentriessortedglobstemrE   r>   )selfrK   rL   rM   rO   rQ   rR   rT   rU   rV   rW   ch	split_idsflinerowpr3   s                     r=   __init__zBlendshapeDataset.__init__x   s!    G}}{++#()<#=#= &+,B&C&C# '.*@'+01L+M+M( GBbj999' 	= 	=B$($<Db!!) 	@ 	@B$($?Db!!* 	E 	EB$($DDb!!!5< I!%!<!CI!%!AS!H 	 $))<#=#=  #%(2T":":":' 	/ 	/B*.D#B'' )..F(G(G%.34R.S.S+ (3..&&9V'WXXX.44&&$2U'VWWW (?$ "ee	+##%% 	2 2 2j&&c-011112	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 /1))'2233 	; 	;A&C39,,##S)C..$9:::	; 	;s   5II
I
r5   c                *    t          | j                  S )N)lenrp   )rt   s    r=   __len__zBlendshapeDataset.__len__   s    4<   r?   idxc                   | j         |         \  }}t          j        | j        | dz            }|d                             t          j                  }|d                             t          j                  }|d                             t          j                  }| j        r5| j        D ]-\  }}	|D ]%}
t          |d d |
f         |	d          |d d |
f<   &.| j        r|| j	        d d d f         z  }| j
        | j        r| j        dk    r|d d t          t                    f                             d	          }t          j        |d
d          }| j        d
k    rt          || j        d          }| j        }d|z
  || j
        |z  z  z   }|d d t"          fxx         |z  cc<   | j        d
k    r}|d d t&          f         }t          j        |t(          z
  dt(          z
  z  d
d          }t          j        d
d| j        |z  z
            }|d d | j        fxx         |d d d f         z  cc<   | j        s| j        d
k    r3t          j        |d
d                              t          j                  }|j        d         }|| j        k    rvt3          t          j                            d|| j        z
  dz                       }|||| j        z            }|||| j        z            }|||| j        z            }| j        }n]| j        |z
  }t          j        |d|fdfd          }t          j        |d|fdfd          }t          j        |d|fdfd          }|}t;          j        |          t;          j        |          t;          j        |          t;          j        |t:          j                   ||dS )Nz.npzaudiocondtargetnearest)sigmamoderI   r!   )axisrJ   r   )r   r   edge)r   rZ   )r   r   r   valid_lengthr\   category)!rp   r]   loadrK   astyper_   ri   r   rd   r`   rW   rQ   listHAPPY_EMOTION_INDICESsumcliprU   BROW_INNERUP_CHrT   MOUTH_CLOSE_CHPLOSIVE_TRIGGERmaximumrg   shaperM   rN   randomrandintpadtorch
from_numpytensorlong)rt   r   r3   catdatar   r   r   chsr   ru   happy	full_gainadjustmctattenTstartr   r   s                        r=   __getitem__zBlendshapeDataset.__getitem__   s   <$Swt|lll233W$$RZ00F|""2:..h&&rz22  	G"1 G G
U G GB$5fQQQUm5<E%G %G %GF111b5MMG  	9d/aaa88F (4% 5/366D!67778<<!<DDEGE3,,E,s22)%151N09; ; ; 3IEkUd.JY.V%WWF111o%&&&&0&&& #c))>)*Bo-#2GH#sSSAJsC$*BQ*F$FGGE111d--...%4.@...  	B 83 > >WVS#..55bjAAFLO   	))!Q1A-AA-EFFGGE%%$*:"::;E(8 889DEED,<$<<=F+LL"Q&CF5As8V"46BBBE6$!S6 2@@@DVFaXv$6VDDDFL %e,,$T**&v..!LUZHHH
 
 	
r?         @long_weight
np.ndarrayc                    t          j        t          | j                  t           j                  }t          | j                  D ]\  }\  }}|dk    r|||<   |S )u  Sample weights for WeightedRandomSampler.

        Long_ scenarios get `long_weight`, everything else gets 1.0.
        Used to compensate for the 5715 daily-split + 253 solo_ vs only 363
        long_ files — without this, the model sees ~6% multi-emotion gradient.
        rZ   r7   )r]   r^   r}   rp   r_   	enumerate)rt   r   weightsi_r   s         r=   get_sample_weightsz$BlendshapeDataset.get_sample_weights"  s]     '#dl++2:>>>$T\22 	) 	)KAx3g~~(
r?   dictc                \    i }| j         D ]!\  }}|                    |d          dz   ||<   "|S )Nr   r!   )rp   get)rt   countsr   r   s       r=   category_countsz!BlendshapeDataset.category_counts/  s?    l 	1 	1FAs **S!,,q0F3KKr?   )rH   rI   rI   NrJ   rJ   rJ   N)rK   r   rL   r   rM   rN   rO   rP   rQ   rP   rR   rS   rT   rP   rU   rP   rV   rP   rW   rS   )r5   rN   )r   rN   )r   )r   rP   r5   r   )r5   r   )__name__
__module____qualname__r{   r~   r   r   r    r?   r=   rG   rG   w   s        
 %((+48%(*-0304M; M; M; M; M;^! ! ! !V
 V
 V
 V
p         r?   rG   )r3   r4   r5   r4   )"__doc__
__future__r   rm   repathlibr   typingr   r   r   numpyr]   r   scipy.ndimager   torch.utils.datar	   compilerA   ra   EMOTIONAL_CHANNELSrb   rc   tuplerq   rh   r   r   r   r   r>   rE   rG   r   r?   r=   <module>r      s   & # " " " " "  				       # # # # # # # # # #      + + + + + + $ $ $ $ $ $ BJ-..	
  	 "   uVV+.FFGGHHuVV&''(() ) )*J) ) ) 1  %    $ $ $ $| | | | | | | | | |r?   