o
    di                     @   sT   d dl mZmZ d dlZd dlmZ d dlmZ d dlm	Z	 dZ
G dd deZdS )	    )OptionalTupleN)CrossEntropyLoss)PreTrainedModel)%add_start_docstrings_to_model_forwardao  
    Arguments:
        input_ids (`torch.LongTensor`):
            Indices of decoder input sequence tokens in the vocabulary of shape `(batch_size, decoder_sequence_length)`.
        encoder_hidden_states (`torch.FloatTensor`):
            The encoder `last_hidden_state` of shape `(batch_size, encoder_sequence_length, hidden_size)`.
        attention_mask (`torch.LongTensor`, *optional*):
            Mask to avoid performing attention on padding token indices of `input_ids`.
        encoder_attention_mask (`torch.LongTensor`, *optional*):
            Mask to avoid performing cross-attention on padding tokens indices of encoder `input_ids`.
        past_key_values (`tuple(tuple(torch.FloatTensor), *optional*)`
            Contains the precomputed key and value hidden states of the attention blocks used to speed up decoding.
            The tuple is of length `config.n_layers` with each tuple having 2 tensors of shape
            `(batch_size, num_heads, decoder_sequence_length, embed_size_per_head)` and 2 additional tensors of shape
            `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
c                       s~   e Zd ZdZdef fddZee				ddej	dej
deej	 d	eej	 d
eeeej
   deej	 fddZ  ZS )_DecoderWithLMheadz
    Decoder model with a language modeling head on top.
    Arguments:
        model (`transformers.PreTrainedModel`):
            The model from which to extract the decoder and the language modeling head.
    modelc                    s<   t  |j |j| _| | _| | _t|dd | _d S )Nfinal_logits_bias)	super__init__configget_decoderdecoderget_output_embeddingslm_headgetattrr	   )selfr   	__class__ k/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/onnx/modeling_seq2seq.pyr   1   s
   

z_DecoderWithLMhead.__init__N	input_idsencoder_hidden_statesattention_maskencoder_attention_maskpast_key_valueslabelsc              	   C   s   | j |||||ddd}|j}| jjdkr!| jjr!|| jjd  }| |}	| jd ur0|	| j7 }	|d u r9|	|jfS t	dd}
|
|	
d|	d|
d}||	|jfS )NT)r   r   r   r   r   return_dict	use_cachet5g      i)ignore_index)r   last_hidden_stater   
model_typetie_word_embeddingsd_modelr   r	   r   r   viewsize)r   r   r   r   r   r   r   decoder_outputsr"   	lm_logitsloss_fctlossr   r   r   forward8   s(   
	




z_DecoderWithLMhead.forward)NNNN)__name__
__module____qualname____doc__r   r   r   %DECODER_WITH_LM_HEAD_INPUTS_DOCSTRINGtorch
LongTensorFloatTensorr   r   r,   __classcell__r   r   r   r   r   )   s*    r   )typingr   r   r2   torch.nnr   transformersr   transformers.file_utilsr   r1   r   r   r   r   r   <module>   s   