
    vj                     >   d dl Z d dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d
dlmZ d
dlmZ  ej        e	j                   G d de                      Z ej        e	j                   G d de                      ZdS )    N)TupleUnion)nn)Trainersbuild_metric)Model
TorchModel)Preprocessor)Config)ModeKeys   )TRAINERS)EpochBasedTrainer)module_namec                   v     e Zd ZdZ fdZd ZdefdZdee	j
        ef         fdZdeeef         fdZ xZS )	NlpEpochBasedTrainera  Add code to adapt with nlp models.

    This trainer will accept the information of labels&text keys in the cfg, and then initialize
    the nlp models/preprocessors with this information.

    Labels&text key information may be carried in the cfg like this:

    >>> cfg = {
    >>>     ...
    >>>     "dataset": {
    >>>         "train": {
    >>>             "first_sequence": "text1",
    >>>             "second_sequence": "text2",
    >>>             "label": "label",
    >>>             "labels": [1, 2, 3, 4],
    >>>         },
    >>>         "val": {
    >>>             "first_sequence": "text3",
    >>>             "second_sequence": "text4",
    >>>             "label": "label2",
    >>>         },
    >>>     }
    >>> }

    To view some actual finetune examples, please check the test files listed below:
    tests/trainers/test_finetune_sequence_classification.py
    tests/trainers/test_finetune_token_classification.py
    c                     d | _         d | _        d | _        d | _        d | _         t                      j        |i | d S N)label2idid2label
num_labels
train_keys	eval_keyssuper__init__)selfargskwargs	__class__s      o/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/trainers/nlp_trainer.pyr   zNlpEpochBasedTrainer.__init__2   sH    $)&)))))    c                    	 |j         j        j        }d t          |          D             | _        d t          |          D             | _        t          |          | _        n# t          $ r Y nw xY wd } ||	                    d                    | _
         ||	                    d                    | _        t          | j                  dk    r| j
        | _        d S d S )Nc                     i | ]\  }}||	S  r%   .0idxlabels      r!   
<dictcomp>z7NlpEpochBasedTrainer.prepare_labels.<locals>.<dictcomp>=   s    LLLJCUCLLLr"   c                     i | ]\  }}||	S r%   r%   r&   s      r!   r*   z7NlpEpochBasedTrainer.prepare_labels.<locals>.<dictcomp>>   s    LLLJCS%LLLr"   c                     | 4t          | dd           t          | dd           t          | dd           d}ni }d |                                D             S )Nfirst_sequencesecond_sequencer)   )r-   r.   r)   c                     i | ]
\  }}|||S r   r%   )r'   kvs      r!   r*   zSNlpEpochBasedTrainer.prepare_labels.<locals>.build_dataset_keys.<locals>.<dictcomp>M   s    IIITQ1=Aq===r"   )getattritems)cfg
input_keyss     r!   build_dataset_keysz?NlpEpochBasedTrainer.prepare_labels.<locals>.build_dataset_keysC   sk    &-c3CT&J&J'.s4Et'L'L$S'488 

  
IIZ%5%5%7%7IIIIr"   zdataset.trainzdataset.valr   )datasettrainlabels	enumerater   r   lenr   AttributeErrorsafe_getr   r   )r   r4   r9   r6   s       r!   prepare_labelsz#NlpEpochBasedTrainer.prepare_labels:   s    	[&-FLL)F:K:KLLLDMLL)F:K:KLLLDM!&kkDOO 	 	 	D	
	J 
	J 
	J -,S\\/-J-JKK++CLL,G,GHHt~!##!_DNNN $#s   A!A$ $
A10A1r4   c                    | j         |                      |          }|                     |           t          |j        d          sAt          |j        d          s,| j        | j        |j        d<   | j        | j        |j        d<   |S )Nr   r   )cfg_modify_fnr>   hasattrmodelr   r   )r   r4   s     r!   rebuild_configz#NlpEpochBasedTrainer.rebuild_configT   s    )$$S))CC   sy*-- 	6g	:7' 7' 	6}((,	*%}((,	*%
r"   returnc                     | j         i nd| j         i}t          j        | j        fd| j        i|}t          |t          j                  st          |d          r|j	        S t          |t          j                  r|S dS )z Instantiate a pytorch model and return.

        By default, we will create a model using config from configuration file. You can
        override this method in a subclass.

        Nr   cfg_dictrB   )
r   r	   from_pretrained	model_dirr4   
isinstancer   ModulerA   rB   )r   
model_argsrB   s      r!   build_modelz NlpEpochBasedTrainer.build_model`   s      ?2RR$/9

 %N= =%)X=1;= =%++ 	w0G0G 	;ry)) 	L	 	r"   c                 *   | j         i nd| j         i}t          j        | j        f| j        t
          j        d|| j        t
          j        dd}t          j        | j        f| j        t
          j        d|| j	        t
          j        dd}||fS )zBuild the preprocessor.

        User can override this method to implement custom logits.

        Returns: The preprocessor instance.

        Nr   )rF   preprocessor_modeT)modeuse_fast)
r   r   rG   rH   r4   r   TRAINr   EVALr   )r   
extra_argstrain_preprocessoreval_preprocessors       r!   build_preprocessorz'NlpEpochBasedTrainer.build_preprocessorq   s      =0RR7

 *9NX&n  	
 o     )8NX&m  	
 n     "#444r"   )__name__
__module____qualname____doc__r   r>   r   rC   r   r   rJ   r
   rL   r   r   rV   __classcell__)r    s   @r!   r   r      s         :* * * * *- - -4
& 
 
 
 
U29j#89    "5E,*D$E 5 5 5 5 5 5 5 5r"   r   c                       e Zd ZddZdS )VecoTrainerNc                 f   ddl m} |ddlm} |                    ||            | j                                         t          j        | _	        i }| j
        ,|                     | j        | j	        | j                  | _
        d}d}t          | j
        |          r3| j
                            |           t!          | j
        j                  }	  | j        | j
        fi | j        j                            di           | _        | j        | _        d	 | j        D             }|D ]	}| |_        
|                     | j        |           t5          |          D ]B\  }	}
d
| d|vr	i |d
| d<   |
                                |d
| d         | j        |	         <   C|dz  }||k     r| j
                            |           nn| j        D ]bfd|                                D             }|d                                         D ]%t=          j        fd|D                       |<   &c|S )z1Veco evaluates the datasets one by one.

        r   )VecoDatasetN)LoadCheckpointHook)	model_cfgrO   preprocessorr   T
dataloaderc                 ,    g | ]}t          |          S r%   r   )r'   metrics     r!   
<listcomp>z(VecoTrainer.evaluate.<locals>.<listcomp>   s     NNNvl622NNNr"   zeval_dataset[]c                      g | ]
}|         S r%   r%   )r'   mmetric_names     r!   rf   z(VecoTrainer.evaluate.<locals>.<listcomp>   s    JJJa1[>JJJr"   c                      g | ]
}|         S r%   r%   )r'   re   keys     r!   rf   z(VecoTrainer.evaluate.<locals>.<listcomp>   s    ;;;VVC[;;;r"   ) 1modelscope.msdatasets.dataset_cls.custom_datasetsr_   modelscope.trainers.hooksr`   load_checkpointrB   evalr   rR   _modeeval_datasetbuild_dataset_from_cfgr4   rU   rI   switch_datasetr;   datasets_build_dataloader_with_dataset
evaluationgeteval_dataloaderdata_loadermetricstrainerevaluation_loopr:   evaluatevalueskeysnpaverage)r   checkpoint_pathr_   r`   metric_valuesr(   dataset_cntmetric_classesri   m_idx
metric_clsall_metricsrl   rj   s               @@r!   r~   zVecoTrainer.evaluate   s    	RQQQQQ&DDDDDD..EEE
]
$ $ ; ;(Z!3 !< !5 !5D
 d'55 	:,,S111d/899K	#F4#F!$P $P%)X%8%<%<\2%N%N$P $PD #3DNNNNNN# ! ! 		  !5~FFF%.~%>%> A A!z)3)))>><>M"8#"8"8"89+5+>+>+@+@ 4c4445L') ) 1HC[  !005555)	,  < 	= 	=KJJJJ=3G3G3I3IJJJK"1~**,, = =%'Z;;;;{;;;&= &=c""= r"   r   )rW   rX   rY   r~   r%   r"   r!   r]   r]      s(        4 4 4 4 4 4r"   r]   )ostypingr   r   numpyr   torchr   modelscope.metainfor   modelscope.metrics.builderr   modelscope.models.baser	   r
   modelscope.preprocessorsr   modelscope.utils.configr   modelscope.utils.constantr   baser   r|   r   register_modulenlp_base_trainerr   nlp_veco_trainerr]   r%   r"   r!   <module>r      s   
			                   ( ( ( ( ( ( 3 3 3 3 3 3 4 4 4 4 4 4 4 4 1 1 1 1 1 1 * * * * * * . . . . . .       & & & & & & h&?@@@{5 {5 {5 {5 {5, {5 {5 A@{5| h&?@@@6 6 6 6 6& 6 6 A@6 6 6r"   