o
    iJ                     @  s<  U d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlZd dlmZ ddlmZ ddlmZ i d	d
didd
didd
didd
didd
didddidddidddidddidddidddddddidddidddidi di Zd ed!< d3d(d)ZG d*d+ d+eZd4d1d2ZdS )5    )annotationsN)Path)DictListSetTuple)Dataset   )EMOTION_TO_ID)MicroTokenizerjoyV_minglaughterg        
excitement	agreement	gratitudesadnessV_maxcryingsulkg?apologystruggleanger)r   A_minrefusalsurpriser   flusterneutralshyzDict[str, Dict[str, float]]_POLARITY_RULESemotionstrvadTuple[float, float, float]returnboolc                 C  sb   t | i }|\}}}d|v r||d k rdS d|v r#||d kr#dS d|v r/||d k r/dS dS )Nr   Tr   r   F)r   get)r    r"   ruleVA_D r+   R/dataset/kemix-engine/package/face/animasync-face-v3/models/microalbert/dataset.py_polarity_violation    s   
r-   c                   @  sB   e Zd Z	ddd	d
ZdddZd ddZd!ddZd"ddZdS )#SeedEmotionDatasetr   
jsonl_pathr   	tokenizerr   max_seq_lenintcontext_windowc                 C  s8  || _ || _|| _g | _t | _g }t|jdd}|D ]}t	|}|
dp,|d }	| j|	 |d }
t|
D ]\}}|d  }|sHq;|d }|tvr\td|d	|d  |d
 }t|dksmJ d| t|d t|d t|d f}t||r||d ||f |
dd}g }|dkrtd|| }|
|| D ]}|d  }|r|||
ddf q| j|||t| |f q;qW d    n1 sw   Y  |rtdt| d| dtjd |d d D ]&\}}}td| d| d|d dd|d dd|d d
tjd qd S d S )Nzutf-8)encodingsource_scenario_idscenario_idturnstextr    zunknown emotion z at scenario r"      zvad must be length-3, got r   r	      speaker?z[dataset] WARNING: u&    emotion↔VAD polarity violations in z (first 5):)file   z  [z] z V=z+.2fz A=z D=)tokr1   r3   samplesset
source_idsr   openjsonloadsr&   add	enumeratestripr
   
ValueErrorlenfloatr-   appendmaxprintsysstderr)selfr/   r0   r1   r3   polarity_warningsflinerowsrcr7   itr8   emor"   	vad_tuplecurr_speakerprevstartptptxsidr+   r+   r,   __init__-   sl   
"
4zSeedEmotionDataset.__init__r$   c                 C  s
   t | jS )N)rJ   r@   rQ   r+   r+   r,   __len__d   s   
zSeedEmotionDataset.__len__r\   List[Tuple[str, str]]	curr_textr!   r[   c                 C  sl   | j dkr|S |sd| S g }|D ]\}}||krdnd}|| d|  q|d|  d|S )Nr   z[SELF] z[SELF]z[OTHER] z [SEP] )r3   rL   join)rQ   r\   re   r[   partsr_   pspkmarkerr+   r+   r,   _compose_textg   s   


z SeedEmotionDataset._compose_textidxr   c           	      C  sB   | j | \}}}}}| |||}| j|| j}||t|dS )N)	input_ids
emotion_idr"   )r@   rk   r?   encoder1   list)	rQ   rl   r\   re   r[   emo_idr"   r8   idsr+   r+   r,   __getitem__v   s   zSeedEmotionDataset.__getitem__	List[int]c                 C  s   dd | j D S )Nc                 S     g | ]}|d  qS )r9   r+   ).0sr+   r+   r,   
<listcomp>}       z2SeedEmotionDataset.emotion_ids.<locals>.<listcomp>)r@   rb   r+   r+   r,   emotion_ids|   s   zSeedEmotionDataset.emotion_idsN)r   )r/   r   r0   r   r1   r2   r3   r2   )r$   r2   )r\   rd   re   r!   r[   r!   r$   r!   )rl   r2   r$   r   )r$   rt   )__name__
__module____qualname__ra   rc   rk   rs   rz   r+   r+   r+   r,   r.   ,   s    
7

r.   batch
List[Dict]pad_idr2   Dict[str, torch.Tensor]c                 C  s   t dd | D }g }g }| D ]%}|d }|t| }|||g|   |dgt| dg|   qtj|tjdtj|tjdtjdd | D tjdtjd	d | D tjdd
S )Nc                 s  s    | ]	}t |d  V  qdS )rm   N)rJ   rv   br+   r+   r,   	<genexpr>   s    zcollate_fn.<locals>.<genexpr>rm   r	   r   )dtypec                 S  ru   )rn   r+   r   r+   r+   r,   rx      ry   zcollate_fn.<locals>.<listcomp>c                 S  ru   )r"   r+   r   r+   r+   r,   rx      ry   )rm   attention_maskrn   r"   )rM   rJ   rL   torchtensorlongfloat32)r~   r   max_lenrm   	attn_maskr   rr   pad_nr+   r+   r,   
collate_fn   s    r   )r    r!   r"   r#   r$   r%   )r~   r   r   r2   r$   r   )
__future__r   rD   rO   pathlibr   typingr   r   r   r   r   torch.utils.datar   configr
   r0   r   r   __annotations__r-   r.   r   r+   r+   r+   r,   <module>   sZ    
		
T