
    vj              
          d dl Z d dlZd dlmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ  e	            Zdddd	ddd
dd	d
ddZdZdad'dZdefdZdej        deeee j        f         deeee j        f         ddfdZdeeee j        f         ddfdZdeddfdZdej        deeee j        f         dedeeej        f         fdZdej        deeee j        f         dedeeej        f         fdZdeeej        f         deeee j        f         ddfdZdej        d ej        defd!Zdeeee j        f         d"edeeej        f         fd#Z d$ej        ded%edeej                 fd&Z!dS )(    N)DictListUnion)nn)
get_logger)	is_mastermoe   )version
world_sizev1i  )r   r   tensor_model_parallel_sizeseed)r   r   )zgpt-moeplugzmglm-text-summarizationzmp_rank_XX_model_states.ptFc                 N   ddl m} ddlm} | |
J d            | f ||          }	 |j        } nR# t
          $ rE 	 |j        j        }n# t
          $ r |j        j        }Y nw xY w|t          v rt          |         ni } Y nw xY w| 
                    |            ||            dadS )a  Initialize megatron_util environment for megatron_based model.

    If argument `megatron_cfg` is not specified, then the megatorn_cfg will be load
    from configuration.json file in the model_dir.

    Args:
        megatron_cfg (Dict, optional): Megatron Config will be send to megatron_util.
        model_dir (str, optional): The model path for configuration. Defaults to None.
    r   )read_config)initialize_megatronNzEcfg and model_dir cannot both be None when initializing megatron_utilT)modelscope.utils.hubr   megatron_utilr   megatronAttributeErrormodeltypepipeline_DEFAULT_CFG_WITH_MODEL_TYPEupdate_IS_MEGATRON_INITIALIZED)megatron_cfg	model_dirkwargsr   r   cfg
model_types          o/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/utils/megatron_utils.pyinit_megatron_utilr$   $   s    100000111111$):):O *;):;k)$$		F<LL 	F 	F 	F/ Y^

! / / / \.


/ !=== 8
CCCE LL	F %%%#s2   1 
B A	B 	A"B !A""B ?B returnc                      t           S N)r        r#   is_megatron_initializedr*   E   s    ##r)   r   checkpoint_dir
target_dirc                 ,   dt           fd}t          j                            t          j                            |d                    r t          j                            |d          }t          t          j        |                    }t          t          j        d                    }t          |           t          |            |d| d|            ||k     rGt          j        |d           t          | |||z            }t          ||            |d	           dS ||k    rGt          j        |d           t          | |||z            }t          ||            |d
           dS t          j        ||            |d           dS )a-  Split or Merge checkpoint for megatron_based model.

    Args:
        model (nn.Module): Any megatron_based model.
        checkpoint_dir (Union[str, bytes, os.PathLike]): The save path of origin checkpoint.
        target_dir (Union[str, bytes, os.PathLike]): The target path of new checkpoint.
    informationc                 Z    t                      rt                              |            d S d S r'   )r   loggerinfo)r.   s    r#   
log_masterz/convert_megatron_checkpoint.<locals>.log_masterT   s0    ;; 	%KK$$$$$	% 	%r)   r   
WORLD_SIZEzorigin_num_partitions: z, target_num_partitions: T)exist_okzSplit checkpoints succeeded.zMerge checkpoints succeeded.zCopy checkpoints succeeded.N)strospathexistsjoinlenlistdirintgetenv_check_origin_dir_check_target_num_partitionsmakedirs_split_checkpoint_save_converted_checkpoint_merge_checkpointshutilcopytree)r   r+   r,   r2   origin_num_partitionstarget_num_partitions
state_dicts          r#   convert_megatron_checkpointrI   I   s   % % % % % 
w~~bgll>7;;<< ?ng>>
> : :;;	, 7 788n%%% !6777Ji"7iiRgii   444
J....&>!%::< <
 	#:z:::
122222	!6	6	6
J....&>!%::< <
 	#:z:::
122222
333
011111r)   
origin_dirc                 *   t          j        |           }t          |          t          |          dz
  z  dk    s
J d            t          t          |                    D ]1}t                              d|d          }||v sJ d| d            2d S )N   r   z)The number of files must be a power of 2!XX02dzCan not find z file!)r6   r;   r:   range_CHECKPOINT_FORMATreplace)rJ   	filenamesicheckpoint_names       r#   r>   r>   w   s    
:&&Iy>>I ! ! !"M! ! !3y>>"" 4 4,44Ta::FF)+++3O333 ,+++4 4r)   num_partitionsc                 2    | | dz
  z  dk    s
J d            d S )NrL   r   z5The number of target partitions must be a power of 2!r(   )rU   s    r#   r?   r?      s.    ^a/0A555? 65555r)   c                 h   t          t          j        d                    }||z  }t          ||          }i }|                                 D ]d\  }}t          |||                   }	|	dk    r||         ||<   -t          ||         ||	          }
|
||z                                           ||<   e|S )NRANK)r<   r6   r=   _load_by_ranknamed_parameters_get_diff_dim_split_tensorclone)r   r+   rU   target_rankorigin_rankrH   target_state_dictname	parameterdimpartitions_lists              r#   rA   rA      s     bi''((K/K~{;;J 1133 L LiIz$'788"99&0&6d#'
4(8.#NN"1+4B3C #DDIEGG 	$r)   c                   	 t          t          j        d                    		fdt                    D             }fd|D             }i }|                                 D ]p\  }t          ||d                            }|dk    r|d                  |<   9t          j        fd|D             |                                          |<   q|S )NrX   c                      g | ]
}z  |z   S r(   r(   ).0rS   rU   r_   s     r#   
<listcomp>z%_merge_checkpoint.<locals>.<listcomp>   s0       -.n$q(  r)   c                 0    g | ]}t          |          S r(   )rZ   )rh   rS   r+   s     r#   ri   z%_merge_checkpoint.<locals>.<listcomp>   s0       -.na((  r)   r   rY   c                      g | ]
}|         S r(   r(   )rh   rH   rb   s     r#   ri   z%_merge_checkpoint.<locals>.<listcomp>   s    @@@*Z@@@r)   rd   )	r<   r6   r=   rO   r[   r\   torchcatr^   )
r   r+   rU   origin_rank_liststate_dict_listra   rc   rd   rb   r_   s
    ``     @@r#   rC   rC      s*    bi''((K    272G2G     2B  O  1133  iIq'9$'?@@"99&5a&8&>d#"')@@@@@@@# # #UWW 	$ r)   rH   c                     t          t          j        d                    }t                              d|d          }t          j        | t          j                            ||                     d S )NrX   rM   rN   )	r<   r6   r=   rP   rQ   rm   saver7   r9   )rH   r,   r_   target_names       r#   rB   rB      s^     bi''((K$,,Tk3G3GHHK	Jz27<<
K@@AAAAAr)   tensor1tensor2c                 z    t          t          | j        |j                            D ]\  }\  }}||k    r|c S dS )NrY   )	enumeratezipshape)rt   ru   rS   s1s2s        r#   r\   r\      sK     W]GM!B!BCC  8B88HHH 2r)   rankc                     t                               d|d          }t          j        t          j                            | |          d d          }d|v r|d         n|S )NrM   rN   c                     | S r'   r(   )storagelocs     r#   <lambda>z_load_by_rank.<locals>.<lambda>   s    ' r)   T)map_locationweights_onlymodule)rP   rQ   rm   loadr6   r7   r9   )r+   r|   rT   rH   s       r#   rZ   rZ      si    (00$}}EEO
^_5511  J $,z#9#9:hzIr)   tensorpartition_dimc                     ddl m} |j                            |                     |          |          }t          j        | ||          }|S )Nr   )mpurl   )r   r   utilsdividesizerm   split)r   rU   r   r   per_partition_sizere   s         r#   r]   r]      sa    !!!!!!))M""N4 4k"7 7 7Or)   )NN)"r6   rD   typingr   r   r   rm   r   modelscope.utils.loggerr   modelscope.utils.torch_utilsr   r0   r   rP   r   r$   boolr*   Moduler5   bytesPathLikerI   r>   r<   r?   TensorrA   rC   rB   r\   rZ   r]   r(   r)   r#   <module>r      sf   				  $ $ $ $ $ $ $ $ $ $        . . . . . . 2 2 2 2 2 2	  
 &'	         " 2   $ $ $ $B$ $ $ $ $+2y+2*/UBK0G*H+2#ubk12+27;+2 +2 +2 +2\4%UBK(?"@ 4T 4 4 4 4@ @ @ @ @ @
RY c5>@k?J 9K &).233D.E   &RY c5>@k?J 9K &).233D.E   .Bel*+B#ubk12B7;B B B B5< %, 3    J%UBK(?"@ JJ $S%,%6 7J J J J%,  !$)-el);     r)   