
    #j                     r   d Z ddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZmZ  e            r
ddlZddlmZ  ej        e          Zd Z e            r e            rdd	lmZ ndd
lmZ  G d de          Z G d de          Zdad Zd Zd Zd Zd Zd Z ddZ!d Z"ddZ#ddZ$ddZ%d Z&dS ) z
Integration with Deepspeed
    N)partialmethod   )dep_version_check)is_accelerate_availableis_torch_availablelogging)nnc                      t           j                            d          d u} | r=	 t           j                            d          }dS # t           j        j        $ r Y dS w xY wd S )N	deepspeedTF)	importlibutil	find_specmetadataPackageNotFoundError)package_exists_s     m/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/transformers/integrations/deepspeed.pyis_deepspeed_availabler   $   sx    ^--k::$FN  	"++K88A4!6 	 	 	55		 s   A AA)HfDeepSpeedConfig)objectc                   "     e Zd ZdZ fdZ xZS )r   aJ  
    This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.

    A `weakref` of this object is stored in the module's globals to be able to access the config from areas where
    things like the Trainer object is not available (e.g. `from_pretrained` and `_get_resized_embeddings`). Therefore
    it's important that this object remains alive while the program is still running.

    [`Trainer`] uses the `HfTrainerDeepSpeedConfig` subclass instead. That subclass has logic to sync the configuration
    with values of [`TrainingArguments`] by replacing special placeholder values: `"auto"`. Without this special logic
    the DeepSpeed configuration is not modified in any way.

    Args:
        config_file_or_dict (`Union[str, Dict]`): path to DeepSpeed config file or dict.

    c                     t          |            t          d           t          d           t                                          |           d S )N
accelerater   )set_hf_deepspeed_configr   super__init__selfconfig_file_or_dict	__class__s     r   r   zHfDeepSpeedConfig.__init__J   sL    %%%,'''+&&&,-----    )__name__
__module____qualname____doc__r   __classcell__r    s   @r   r   r   9   sB          . . . . . . . . .r!   r   c                   ^     e Zd ZdZ fdZd Zd ZddZ eed	          Z	dd
Z
d Z xZS )HfTrainerDeepSpeedConfigz
    The `HfTrainerDeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                 f    t                                          |           d | _        g | _        d S N)r   r   _dtype
mismatchesr   s     r   r   z!HfTrainerDeepSpeedConfig.__init__X   s.    ,---r!   c                 <    | j         t          d          | j         S )Nz8trainer_config_process() wasn't called yet to tell dtype)r,   
ValueError)r   s    r   dtypezHfTrainerDeepSpeedConfig.dtype]   s!    ;WXXX{r!   c                 @    |                      |          }|dS |dk    S )NFauto)	get_value)r   ds_key_longvals      r   is_autoz HfTrainerDeepSpeedConfig.is_autob   s'    nn[));5&= r!   NTc           
         |                      |          \  }}|dS |                    |          dk    r|||<   dS |sdS |                    |          }|.||k    r*| j                            d| d| d| d|            dS dS dS )a  
        A utility method that massages the config file and can optionally verify that the values match.

        1. Replace "auto" values with `TrainingArguments` value.

        2. If it wasn't "auto" and `must_match` is true, then check that DS config matches Trainer
        config values and if mismatched add the entry to `self.mismatched` - will assert during
        `trainer_config_finalize` for one or more mismatches.

        Nr2   z- ds =z vs hf )find_config_nodegetr-   append)r   r4   hf_valhf_key
must_matchconfigds_keyds_vals           r   
fill_matchz#HfTrainerDeepSpeedConfig.fill_matchi   s     ..{;;>F::f''#F6NF 	FF##&F"2"2O""#Y;#Y#Y#Y#Y#Y#YQW#Y#YZZZZZ "2"2r!   F)r>   c                 0   |j         |j        z  |j        z  }|                     d|j        d|            |                     d|j        d           |                     d|d|            |                     d|j        d           |                     d|j        d	           |                     d
|j        |j        gd           |                     d|j        d           |                     d|j	        d           | 
                    dd           |                     d|j        d	           |j        r8| j                            di           | j        d<   |j        | j        d         d<   |                     d|j        p|j        d           |                     d|j        p|j        d           |                     d          rt&          j        | _        dS |                     d          rt&          j        | _        dS t&          j        | _        dS )z
        Adjust the config with `TrainingArguments` values. This stage is run during `TrainingArguments` object
        creation.
        train_micro_batch_size_per_gpuper_device_train_batch_sizegradient_accumulation_stepstrain_batch_sizeztrain_batch_size (calculated)gradient_clippingmax_grad_normzoptimizer.params.lrlearning_ratezoptimizer.params.betaszadam_beta1+adam_beta2zoptimizer.params.epsadam_epsilonzoptimizer.params.weight_decayweight_decayzscheduler.params.warmup_min_lrr   zscheduler.params.warmup_max_lr
checkpointuse_node_local_storagezfp16.enabledzfp16|fp16_full_evalzbf16.enabledzbf16|bf16_full_evalN)
world_sizerE   rF   rB   rI   rJ   
adam_beta1
adam_beta2rK   rL   	fill_onlysave_on_each_noder?   r:   fp16fp16_full_evalbf16bf16_full_evalis_truetorchbfloat16r,   float16float32)r   argsauto_find_batch_sizerG   s       r   trainer_config_processz/HfTrainerDeepSpeedConfig.trainer_config_process   s'     ?T-MMPTPpp,,)$$		
 	
 	
 	),)	
 	
 	

 	+$$		
 	
 	
 	+T-?QQQ-t/A?SSS$_do.#	
 	
 	

 	.0A>RRR79JN[[[7;;;8$:Lo^^^ ! 	Y(,b(I(IDK%BFBXDK%&>? 	)Id6ILabbb)Id6ILabbb <<'' 	(.DKKK\\.)) 	(-DKKK-DKKKr!   c                 J    g d} fd|D             }t          |          dk    rud}t          |d          rt          |j        d          r|j        j        }nt          |j        d          rt	          |j        j                  }nt          |j        d          r,t          |j        j        d          r|j        j        j        }nMt          |j        d          r8t          |j        j        d          rt	          |j        j        j                  }|t          d	| d
                               d||z              	                                rB                     dt          d|z  |z                                            dd|z                                  d|d                                d|                    |          d           t           j                  dk    r-d                     j                  }t          d| d          dS )z
        This stage is run after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdc                 >    g | ]}                     |          |S  )r6   ).0xr   s     r   
<listcomp>zDHfTrainerDeepSpeedConfig.trainer_config_finalize.<locals>.<listcomp>   s)     V V VqdllSToo V V V Vr!   r   Nr?   hidden_sizehidden_sizestext_configzThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.ra   rb   g?rc   
   z scheduler.params.total_num_stepsznum_training_steps (calculated)z!scheduler.params.warmup_num_stepswarmup_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.)lenhasattrr?   ri   maxrj   rk   r/   rR   is_zero3intrB   get_warmup_stepsr-   join)r   r]   modelnum_training_stepshidden_size_based_keyshidden_size_auto_keysri   r-   s   `       r   trainer_config_finalizez0HfTrainerDeepSpeedConfig.trainer_config_finalize   s   "
 "
 "

 !W V V V,B V V V$%%))Kuh'' 
M5<77 	M"',":KKU\>:: M"%el&?"@"@KKU\=99 MgelF^`m>n>n M"',":"FKKU\=99 MgelF^`n>o>o M"%el&>&K"L"LK" Y5JY Y Y   NNA;Q\C\]]]}} 	Ck)K788   J$   	.-	
 	
 	

 	/!!"455	
 	
 	
 t!##4?33Jq'q q q   $#r!   )NTF)r"   r#   r$   r%   r   r0   r6   rB   r   rR   r_   rz   r&   r'   s   @r   r)   r)   R   s         
    
  
! ! ![ [ [ [4 jU;;;I8( 8( 8( 8(tC C C C C C Cr!   r)   c                 .    t          j        |           ad S r+   )weakrefref_hf_deepspeed_config_weak_ref)hf_deepspeed_config_objs    r   r   r   	  s    
 %,K0G$H$H!!!r!   c                  
    d a d S r+   )r   re   r!   r   unset_hf_deepspeed_configr     s     %)!!!r!   c                  p    t           .t                       t                                                      S dS )NF)r   rr   re   r!   r   is_deepspeed_zero3_enabledr     s1    $05R5T5T5`,..77999ur!   c                  V    t           !t                      t                      j        S d S r+   )r   r?   re   r!   r   deepspeed_configr     s(    $05R5T5T5`,..55tr!   c                 $   ddl ddl}ddlm} ddlm |                                 fd |j                    5   |            5   | | j                   ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )aA  
    DeepSpeed ZeRO-3 variant of `PreTrainedModel.initialize_weights`. Mirrors the `smart_apply`
    dispatch logic but gathers each module's partitioned parameters before calling
    `_initialize_weights`, so initialization operates on full tensors instead of empty shards.
    Only rank 0 performs the actual init.
    r   Nr   )guard_torch_init_functions)PreTrainedModelc                    |                                  D ]0}t          |          r ||j                   $ ||           1t          |                     d                    }|r`j                            |d          5  j                                        dk    r ||            d d d            d S # 1 swxY w Y   d S  ||            d S )NF)recurser   modifier_rank)	children
isinstance_initialize_weightslist
parameterszeroGatheredParameterscommget_rank)model_or_modulefnchildparamsr   _apply_zero3r   is_remote_codes       r   r   z.initialize_weights_zero3.<locals>._apply_zero34  sG   $--// 	( 	(E%11 (UE$=>>>>UB''''o000??@@ 	02262KK 8 8>**,,11B7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 B/////s   *B??CC)	r   rY   initializationr   modeling_utilsr   r   no_gradr   )rv   rY   r   r   r   r   r   s      @@@@r   initialize_weights_zero3r   %  sg    LLL;;;;;;000000))++N0 0 0 0 0 0 0 0 
 ; ;'')) 	; 	;L 9:::	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	;; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;s6   BA-!B-A1	1B4A1	5BB	B	c                    t                      }||                    di                               dd          }|                    di           }t          |t                    r8t	          ||                    di                               dd                    }|dk    rt          d          dd	lmmm	m
} t          |d
d          }| j        }i }	|                                                                 D ])\  }
}t          j        |j        |j        d          |	|
<   *fd|D             }fd|D             }t'          |          dk    rBi }|                                D ] \  }} |||g ||	          \  }}||	v r|||<   !|||_        |S d |D             }i }i }t+          |                                fd          }|D ]}|                    |          } ||||||	          \  }}||	v r[|T||         } |j        |j        |j                  }|                    ||          }|                    ||||           |||<   |                                D ]\  }}	 |                    || | j                  }|                                D ])\  }}t          |t>                    r|d         n|}|||<   *b# t@          $ r}tC          d| d|           |d}~ww xY w|||_        |S )z
    Apply weight conversions (renaming and merging/splitting operations) to a state dict.
    This is a simplified version that handles the conversion without loading into the model.
    Ntensor_parallelautotp_size   	inferencetp_sizezWeight conversions (e.g., MoE expert fusion) with DeepSpeed Tensor Parallelism are not yet implemented but support is coming soon. Please disable tensor_parallel in your DeepSpeed config or convert your checkpoint to the expected format first.r   )WeightConverterWeightRenamingdot_natural_keyrename_source_key	_metadatameta)r0   devicec                 4    g | ]}t          |          |S re   r   )rf   entryr   s     r   rh   z;_apply_weight_conversions_to_state_dict.<locals>.<listcomp>k  s(    XXX5j6W6WXXXXr!   c                 4    g | ]}t          |          |S re   r   )rf   r   r   s     r   rh   z;_apply_weight_conversions_to_state_dict.<locals>.<listcomp>l  s(    ZZZEz%7Y7YZ%ZZZr!   r   c                 (    i | ]}|j         D ]}||S re   )source_patterns)rf   	converterks      r   
<dictcomp>z;_apply_weight_conversions_to_state_dict.<locals>.<dictcomp>{  s*    hhhYiNghhAyhhhhr!   c                      |           S r+   re   )r   r   s    r   <lambda>z9_apply_weight_conversions_to_state_dict.<locals>.<lambda>  s    //!:L:L r!   )key)r   target_patterns
operations)rv   r?   z'Failed to apply weight conversion for 'zb'. This likely means the checkpoint format is incompatible with the current model version. Error: )"r   r:   r   dictrq   NotImplementedErrorcore_model_loadingr   r   r   r   getattrbase_model_prefix
state_dictitemsrY   emptyshaper0   ro   r   sortedkeyspopr   r   r   
setdefault
add_tensorconvertr?   r   	ExceptionRuntimeError) rv   r   weight_mapping	ds_configr   inference_configr   r   prefixmodel_state_dictr   param	renamings
convertersnew_state_dictoriginal_keytensorrenamed_keyr   pattern_to_converterconversion_mappingsorted_keyssource_patternr   new_convertermappingrealized_valuetarget_nameer   r   r   s                                 @@@r   '_apply_weight_conversions_to_state_dictr   H  s    !""I-- 1266::=!LL$==b99&-- 	b'#3#7#78I2#N#N#R#RS\^_#`#`aaGQ;;%d   ihhhhhhhhhhh z;55H$F &&((..00 [ [
U %EKu{SY Z Z ZXXXXNXXXIZZZZ^ZZZJ :!$.$4$4$6$6 	5 	5 L&..|YFTdeeNK....4{+'/N$ ih*hhh
 N**0L0L0L0LMMMK# 5 5--&7&7iQ[]ceu&v&v#^ ***) 1@	 /$-$=$-$=(3! ! !
 -77]SS"";nfUUUU /5{+ !3 8 8 : :  W	$__| -  N
 '5&:&:&<&< 4 4"U$.ud$;$;Fa.3{++4  	 	 	+     		 #+ s   8AK
K9K44K9c                   	 t          |dd          |                                }|_        d}|t          |dd          }|+t          |          dk    rt	          | ||          }|| _        g |                                 t                                                    t          | dd          		fd|	                                D             }dd	t          j        ffd
 | |d           fS )a  
    Loads state dict into a model specifically for Zero3, since DeepSpeed does not support the `transformers`
    tensor parallelism API.

    Nearly identical code to PyTorch's `_load_from_state_dict`

    Args:
        model_to_load: The model to load weights into
        state_dict: The state dict containing the weights
        load_config: Optional LoadStateDictConfig containing weight_mapping and other loading options
    r   Nr   r   r   c                 ^    i | ])\  }}                      d |            d | n||*S ).)r:   )rf   r   vmeta_model_state_dictprefix_models      r   r   z5_load_state_dict_into_zero3_model.<locals>.<dictcomp>  se       Aq #8";";|<Q<Qa<Q<Q"R"R"^L		1			dehi  r!    Fmodulec                    i n                     |d d         i           }||d<   |||dg g f}t                      rdd l}t          |                     |d d         d                    }g }|D ]?}	|	|v r9||	         }
d|
_        |                    |
                               |	           @t          |          dk    r`|j	        
                    |d          5  t          j                                        dk    r
 | j        |  d d d            n# 1 swxY w Y   t          |                     |d d         d                    }|                                D ]m\  }	}|	|v rd|b                    |	           t          j                    5  |                    ||	                    d d d            n# 1 swxY w Y   d|_        n| j                                        D ]\  }}| ||||z   dz   |           d S )	Nassign_to_params_buffersTr   F)r   r   r   r   )r:   r   r   r   named_parameters_is_hf_initializedr;   discardro   r   r   rY   distributedr   _load_from_state_dictnamed_buffersr   r   copy__modules)r   r   r   r   local_metadatar]   r   r   params_to_gatherr   r   r   bufnamer   
error_msgsloadr   missing_keyss                  r   r   z/_load_state_dict_into_zero3_model.<locals>.load  s   '/X\\&"+r5R5R5M12FND"b*M &'' 	2  $F$;$;6#2#;X]$;$^$^__!% , ,
??,Q/E/3E,$++E222 ((+++#$$q(( ^667GWX6YY < <(1133q8844d;;< < < < < < < < < < < < < < <
 !!5!5VCRC[RW!5!X!XYYM'--// 2 23
??s ((+++ 1 1		*Q-0001 1 1 1 1 1 1 1 1 1 1 1 1 1 1-1C*!?0022 	W 	WKD% UJ(;=UVVV	W 	Ws$   $-DD!$D!GG	
G	)r   )r   F)r   copyr   ro   r   _weight_conversionsr   setr   r   r	   Module)
model_to_loadr   load_configr   r   r   r   r   r   r   s
       @@@@@@r   !_load_state_dict_into_zero3_modelr    sr    z;55H""J'
 N .>EE !c.&9&9A&=&=<]JXfgg
,:)J)4466,113344L=*=tDDL    $$&&  J)W )WRY )W )W )W )W )W )W )W )W )WV 	D
UCCCC|##r!   c                 J    ddl m}m} |j        }d}d|v r ||          }nG|                                rt
                              d                                            }d|d<   d}	d	|v r ||          }	n#t          ||          r fd
}
 |||
          }	||	fS )zY
    A convenience wrapper that deals with optimizer and lr scheduler configuration.
    r   )
DummyOptimDummySchedulerN	optimizer)r   zDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)Tzero_allow_untested_optimizer	schedulerc                 l    t          j                   }d |_        |                    |           }|S )N)rw   r  )r   lr_schedulercreate_scheduler)r  trainer_copyr  rw   trainers      r   _lr_scheduler_callablez5deepspeed_optim_sched.<locals>._lr_scheduler_callable3  sC    #y11 -1)+<<'9Y  =     $#r!   )lr_scheduler_callable)	accelerate.utilsr  r  r?   
is_offloadloggerinfocreate_optimizerr   )r  hf_deepspeed_configr]   rw   model_parametersr  r  r?   r  r  r  s   `  `       r   deepspeed_optim_schedr    s    <;;;;;;; 'F IfJ&6777		))++ 	KKV   ,,..	26./Lf%~i00i,, 	c	$ 	$ 	$ 	$ 	$ 	$ *>)KabbbLl""r!   Fc                    ddl m} | j        }| j        }| j        j        j        j        }|                    |||           |	                    |
                                           |rU|                                st          d          |                    d           |                    d           d\  }}d}	nd| _        |j                            di                               d	d
          }
|
d
k    r4ddl}|                    ||
|                                |j                  }t)          t+          d |                                                    }	t/          | ||||	          \  }}||fS )a  
    Init DeepSpeed, after updating the DeepSpeed configuration with any relevant Trainer's args.

    If `resume_from_checkpoint` was passed then an attempt to resume from a previously saved checkpoint will be made.

    Args:
        trainer: Trainer object
        num_training_steps: per single gpu
        resume_from_checkpoint: path to a checkpoint if to resume from after normal DeepSpeedEngine load
        inference: launch in inference mode (no optimizer and no lr scheduler)
        auto_find_batch_size: whether to ignore the `train_micro_batch_size_per_gpu` argument as it's being
            set automatically by the auto batch size finder

    Returns: optimizer, lr_scheduler

    We may use `deepspeed_init` more than once during the life of Trainer, when we do - it's a temp hack based on:
    https://github.com/deepspeedai/DeepSpeed/issues/1394#issuecomment-937405374 until Deepspeed fixes a bug where it
    can't resume from a checkpoint after it did some stepping https://github.com/deepspeedai/DeepSpeed/issues/1612

    r   )r  zMZeRO inference only makes sense with ZeRO Stage 3 - please adjust your configr  r  )NNNr   r   r   )rv   r   r0   r?   c                     | j         S r+   )requires_grad)ps    r   r   z deepspeed_init.<locals>.<lambda>{  s     r!   )deepspeed.utilsr  rv   r]   acceleratorstatedeepspeed_pluginhf_ds_configrz   setLevelget_process_log_levelrr   r/   del_config_sub_treer  r?   r:   r   tp_model_initr0   r   filterr   r  )r  rw   r   	ds_loggerrv   r]   r  r  r  r  deepspeed_tp_sizer   s               r   deepspeed_initr)  C  s   * 433333ME<D!-3DQ //e=OPPP t1133444 
"++-- 	nlmmm 	//<<<//???",	< /6::;LbQQUUVcefggq  ++))//11*1	 ,  E  '@'@%BRBRBTBT U UVV"7($0BDT#
 #
	< l""r!   Tc                 <   dd l }t          |                     | d                    }t          |          dk    rOt                              d|            |                     ||dd          \  }}|t          d|           d S t          d|           )Nr   z/global_step*zAttempting to resume from T)load_module_strictload_optimizer_statesload_lr_scheduler_statesz-[deepspeed] failed to resume from checkpoint z!Can't find a valid checkpoint at )globr   ro   r  r  load_checkpointr/   )deepspeed_enginecheckpoint_pathr+  r.  deepspeed_checkpoint_dirs	load_pathr   s          r   deepspeed_load_checkpointr4    s    
 KKK &tyyO1R1R1R'S'S T T
$%%))BBBCCC'771"&%)	 8 
 
	1 ^_^^___  N_NNOOOr!   c                     | j         j        }t          |j        j                  |_        |j        j        |_        |j                            ||           dS )a  
    Sets values in the deepspeed plugin based on the TrainingArguments.

    Args:
        accelerator (`Accelerator`): The Accelerator object.
        args (`TrainingArguments`): The training arguments to propagate to DeepSpeed config.
        auto_find_batch_size (`bool`, *optional*, defaults to `False`):
            Whether batch size was auto-discovered by trying increasingly smaller sizes.
    N)r  r   r)   r!  r?   r   r_   )r  r]   r^   	ds_plugins       r   propagate_args_to_deepspeedr7    sS     !2I5i6L6STTI!*!7!>I11$8LMMMMMr!   c                    d|vrd|v r|d         |d<    |di |}|j         }|j        dk    r&|j        dk    rddlm} |                                }n6| j         | j        d                                         }nt          d	          |j        }	t          j
        j        j                            ||
          |d         dk                        d                                          }
t          j
        j        j                            |
|
          t          fdt!          |	          D                       }t                    }|t#          |d          z  }|r||fn|S )aq  
    Computes the loss under sequence parallelism with `sp_backend="deepspeed"` and `sp_size > 1`.

    Performs weighted loss aggregation across SP ranks, accounting for varying numbers of valid tokens per rank
    (e.g., when some ranks receive only padding or prompt tokens that are masked with -100).

    Args:
        accelerator (`Accelerator`): The accelerator instance with `torch_device_mesh` support.
        model (`torch.nn.Module`): The model to compute the loss for.
        inputs (`dict[str, torch.Tensor | Any]`): The input data for the model. Must include `"shift_labels"` key.
        return_outputs (`bool`): Whether to return the model outputs along with the loss.
        pc (`accelerate.parallelism_config.ParallelismConfig`): The parallelism configuration.

    Returns:
        The loss, or a tuple of `(loss, outputs)` if `return_outputs` is `True`.
    labelsshift_labelsr   r   r   )groupsNspzSequence parallelism is enabled but no SP process group is available. Ensure torch_device_mesh is initialized or sp_backend='deepspeed' with sp_size > 1.)groupir   c              3   R   K   | ]!}|         d k    |         |         z  V  "dS )r   Nre   )rf   rankgood_tokens_per_ranklosses_per_ranks     r   	<genexpr>z,deepspeed_sp_compute_loss.<locals>.<genexpr>  sO        %)) 	 4T ::)))) r!   re   )loss
sp_backendsp_sizer  r;  _get_sequence_parallel_grouptorch_device_mesh	get_groupr/   rY   r   r	   
functional
all_gatherviewsumrangerq   )r  rv   inputsreturn_outputspcoutputsrC  r;  sp_groupsp_world_sizegood_tokens
total_losstotal_good_tokensr@  rA  s                @@r   deepspeed_sp_compute_lossrW    s   , v.F":":!.1xeoofooG<D 
}##
Q******6688		&	206@@BBb
 
 	
 JM'*5@@X@VVO.)T177;;??AAK ,/:EEkYaEbb     -((    J
 011-q111D,6D'??$6r!   r+   r{   )T)'r%   r   importlib.metadatar   importlib.utilr}   	functoolsr   dependency_versions_checkr   utilsr   r   r   rY   r	   
get_loggerr"   r  r   accelerate.utils.deepspeedr   DeepSpeedConfigbuiltinsr   r)   r   r   r   r   r   r   r   r  r  r)  r4  r7  rW  re   r!   r   <module>ra     sn              # # # # # # 9 9 9 9 9 9 H H H H H H H H H H  LLL 
	H	%	%
 
 
  3!7!7!9!9 3OOOOOOO 322222. . . . . . . .2p p p p p0 p p ph !% I I I) ) )     ;  ;  ;Fh h hVW$ W$ W$ W$t3# 3# 3#l@# @# @# @#FP P P P0N N N N"77 77 77 77 77r!   