o
    diCG                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZ d dlZd dlZd dlmZ d dlm  mZ d dlmZmZ d dl m!Z! d d	l"m#Z# d
dl$m%Z%m&Z&m'Z' de(de(ddfddZ)dede*fddZ+dede*fddZ,dede*fddZ-dede*fddZ.dede*fddZ/dede*fddZ0dede*fddZ1dede*fd d!Z2d"efd#d$Z3d%d& Z4eej5j6d'd( Z7eej8j6d)d* Z9G d+d, d,Z:d-ej;ddfd.d/Z<ej=d-ej;d0eej>e?f fd1d2Z@eA ZBdId3e?d4ee? fd5d6ZCG d7d8 d8e!ZD		9	9dJd3e?d4ee? d:ee? d;e*d<e*de?fd=d>ZEd?e?de?fd@dAZFd3e?d4ee? dBee? dCee?e?f fdDdEZGd3e?d4ee? dFe?dee?e?f fdGdHZHdS )K    N)defaultdictwraps)chain)Path)CallableDictListOptionalUnion)GraphNode)tqdm)http_user_agent   )HashableSliceParameterMetaParameterSlice	numeratordenominatorreturnc                 C   s$   | | dkrt |  d| dd S )Nr   z is not divisible by z}, check if the parallel dimension of weight parameters is divisible by parallelism level(world size of tensor parallel group))RuntimeError)r   r    r   n/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/fx/parallelization/utils.pyensure_divisibility(   s
   r   nodec                 C   s0   | j dkrdS | jj}t|| jdddS )Ncall_moduleF
__module__ ztorch.nn.modules.activation)opgraphowning_modulegetattrget_submoduletarget
startswithr   modr   r   r   is_activation0   s   
r(   c                 C   *   | j dkrdS | jj}t|| jtjS Nr   F)r   r    r!   
isinstancer#   r$   nnLinearr&   r   r   r   	is_linear8      
r.   c                 C   r)   r*   )r   r    r!   r+   r#   r$   r,   	Embeddingr&   r   r   r   is_embedding?   r/   r1   c                 C   s0   | j dkr
| jdv S | j dkr| jtjhv S dS )Ncall_method>   viewexpandresizereshaperesize_call_functionF)r   r$   torchr6   r   r   r   r   is_shape_consumerF   s
   


r;   c                 C   s
   | j dkS )Noutput)r   r:   r   r   r   	is_outputN   s   
r=   c                 C   s   | j dko	| jdkS )Nr2   size)r   r$   r:   r   r   r   is_shape_generatorR   s   r?   c                 C   s@   | j dkr| jtju S | j dkr| jj}t|| jtj	S dS )Nr8   r   F)
r   r$   Fcross_entropyr    r!   r+   r#   r,   CrossEntropyLossr&   r   r   r   is_cross_entropyV   s   

rC   c                 C   s   | j dkrV| jdd}| jdd}| jdd}t| jdkr*|du r*| jd }t| jd	kr:|dkr:| jd	 }t| jd
krJ|dkrJ| jd
 }|du oU|dkoU|dkS | j dkrz| jj| j}|j	|j
|j}}}|du oy|dkoy|dkS dS )zu
    For now `VocabParallelCrossEntropyLoss` does not support weighted mode, index ignoring and label smoothing.
    r8   weightNignore_indexilabel_smoothingg                 r   F)r   kwargsgetlenargsr    r!   r#   r$   rD   rF   rE   )r   rD   rE   rF   r'   r   r   r   $is_cross_entropy_parallel_compatible_   s    




rN   r    c                    s   dt jjdtt jjj fdd}tt| j}t	  t
t}d }|r]| } fdd||D }|r=||d  | n | |rN|j|urN|| |}|t||d |s"|sht t| jksjJ d S )	Nnr   c                 S   s    g }t j| j| jf|j |S N)r9   fxmap_argrM   rJ   append)rO   rM   r   r   r   _argsy   s   z&stable_topological_sort.<locals>._argsc                    s   g | ]}| vr|qS r   r   ).0xreadyr   r   
<listcomp>   s    z+stable_topological_sort.<locals>.<listcomp>r   )r9   rQ   r   r	   r   Argumentlistreversednodessetr   poprS   addnextextendrL   )r    rT   pendingwaitingcursorr   waiting_forr   rW   r   stable_topological_sortx   s"    

rh   c                    s   t   fdd}|S )Nc                     s$   | dtd|d<  | i |S )Ndevicemeta)r`   r9   ri   )rM   rJ   init_fnr   r   wrapper   s   zmeta_init.<locals>.wrapperr   )rl   rm   r   rk   r   	meta_init   s   rn   c                  O   sd   | d }| d }|j jtdkrt||j |jS |j}|d}t||j |j}tj||dS )Nr   r   rj   ri   )rD   ri   r9   r@   linearbiasto
empty_likerM   rJ   selfinputorig_devicemeta_outputr   r   r   meta_aware_linear_forward   s   
ry   c               	   O   s   | d }| d }|j jtdkr#tj||j |j|j|j|j|j	dS |j}|
d}tj||j |j|j|j|j|j	d}tj||dS )Nr   r   rj   )rv   rD   padding_idxmax_norm	norm_typescale_grad_by_freqsparsero   )rD   ri   r9   r@   	embeddingrz   r{   r|   r}   r~   rr   rs   rt   r   r   r   meta_aware_embedding_forward   s0   

	r   c                   @   s   e Zd ZU dZdeejjfdeejjfde	fde
fgZeeef ed< dd	d
ZdefddZdefddZdd Zdd Zdd Zdd Zdd Zdd ZdS )MetaAwareMethodsPatchera  
    A patcher class which patches `__init__` and `forward` methods on modules which will be put on meta
    devices for memory efficiency purposes during initialization.

    Note that for `__init__` method, it can be unpatched once we have finished the initialization of the
    model, however, for `forward`, we need it to constantly being patched during the whole process in case
    recompile happens and torch dynamo needs meta-aware `forward` to be able to re-capture the graph.
    ztorch.nn.Linear.__init__ztorch.nn.Embedding.__init__ztorch.nn.Linear.forwardztorch.nn.Embedding.forwardmethods_to_patchr   Nc           	      C   s   g | _ | jD ]V\}}|jddd\}}zt|}W n1 tyK } z%|jddd\}}t|}zt||}W n	 ty@   |w W Y d }~nd }~ww t||}| j ||||dg qd S )N.r   maxsplitF)	patching_specsr   rsplit	importlibimport_moduleModuleNotFoundErrorr"   AttributeErrorrS   )	ru   origpatch_fnmodule_qualified_nameattribute_namemoduleemodule_attribute_nameorig_fnr   r   r   __init__   s&   

z MetaAwareMethodsPatcher.__init__
identifierc                 C   sF   | j D ]}|d r
q||d v r t|d |d |d  d|d< qd S )NrZ   r   r      Tr   setattrru   r   specr   r   r   _patch      
zMetaAwareMethodsPatcher._patchc                 C   sF   | j D ]}|d s
q||d v r t|d |d |d  d|d< qd S )NrZ   r   r   rG   Fr   r   r   r   r   _unpatch  r   z MetaAwareMethodsPatcher._unpatchc                 C      |  d d S Ninitr   ru   r   r   r   patch_meta_init     z'MetaAwareMethodsPatcher.patch_meta_initc                 C   r   Nforwardr   r   r   r   r   patch_meta_forward  r   z*MetaAwareMethodsPatcher.patch_meta_forwardc                 C   r   r   r   r   r   r   r   unpatch_meta_init  r   z)MetaAwareMethodsPatcher.unpatch_meta_initc                 C   r   r   r   r   r   r   r   unpatch_meta_forward  r   z,MetaAwareMethodsPatcher.unpatch_meta_forwardc                 C   s   |    |   d S rP   )r   r   r   r   r   r   	__enter__  s   z!MetaAwareMethodsPatcher.__enter__c                 C   s   |    d S rP   )r   )ru   exc_type	exc_value	tracebackr   r   r   __exit__%  s   z MetaAwareMethodsPatcher.__exit__)r   N)__name__r   __qualname____doc__rn   r,   r-   r   r0   ry   r   r   r   strr   __annotations__r   r   r   r   r   r   r   r   r   r   r   r   r      s    
 

		r   modelc                 C   sr   t  }| jddD ]-\}}t|}||vr2t|dtdtd d d t|t|jdid |	| q	d|j
_q	d S )NF)remove_duplicaterj   r   )sourceshape)dimmappingT)r_   named_parametersidr   r   r   r   tupler   ra   rj   is_tied)r   parameter_idsnametensorkeyr   r   r   initialize_parameter_meta)  s   
r   ri   c           	      C   s   t |  |  D ]D\}}|jtdkrq	|jddd}t|dkr*| }|d }n|d }| |}|d }||}t	|t
jrGt
|}t||| q	dS )z
    Move everything except tensors on meta devices on current device
    this function should be called before `intialize_parameter_meta`
    rj   r   r   r   r   N)r   r   named_buffersri   r9   r   rL   r#   rr   r+   r,   	Parameterr   )	r   ri   r   r   splits
parent_mod	attr_namequalified_name
new_tensorr   r   r   move_model_to_device;  s   



r   model_name_or_path	cache_dirc                 C   sd   |pt }tjtj|dd | dd}t| 	 }|| d }t
jtj||dd}|S )NT)exist_ok/-z.locki  )mode)temp_dirosmakedirspathdirnamereplacehashlibsha256encode	hexdigestfilelockFileLockjoin)r   r   lock_dir
model_name	hash_namelock_file_namelockr   r   r   get_lockU  s   r   c                       s   e Zd Z fddZ  ZS )DisabledTqdmc                    s   t  j|i |ddi d S )NdisableT)superr   )ru   rM   rJ   	__class__r   r   r   b  s   zDisabledTqdm.__init__)r   r   r   r   __classcell__r   r   r   r   r   a  s    r   Frevisionlocal_files_onlyskip_download_weightsc              	   C   s
  ddl }ddlm}m} ddlm}m}	m}
 ddg}|sA|jj	sA| }|j
| d|d}|D ]}t||}t|dkr@|g} nq-|rG|g}n|d dkrT|||	g }n|||
g }t| | |t d	j| ||||jj	po|td
}W d   |S 1 s~w   Y  |S )aR  Download model weights, index and config files from Hugging Face Hub.

    Args:
        model_name_or_path (`str`): The model name or path.
        cache_dir (`Optional[str]`): The cache directory to store the model
            weights. If None, will use HF defaults.
        revision (`Optional[str]`, defaults to `None`): The revision of the model.
        local_files_only(`bool`): Should only use local files if True.
        skip_download_weights (`bool`, defaults to `False`): Whether to skip downloading weights to disk.

    Returns:
        str: The path to the downloaded files.
    r   N)HfApiHfFileSystem)CONFIG_NAMESAFE_WEIGHTS_INDEX_NAMEWEIGHTS_INDEX_NAME*.safetensors*.binF)detailr   )
user_agent)allow_patternsr   r   r   
tqdm_class)huggingface_hub.constantshuggingface_hubr   r   transformers.utilsr   r   r   	constantsHF_HUB_OFFLINElsfnmatchfilterrL   r   r   snapshot_downloadr   )r   r   r   r   r   r   r   r   r   r   r   r   fs	file_listpatternmatching	hf_folderr   r   r   download_model_from_hfg  s>   

		r  filenamec           
      C   s   ddl m} | jddd\}}d| }t|| }|s$td|  d|d}|d	ur/|nd
}|jddd\}}	| | d|	 S )zOTransforms the filename for any kind of checkpoint to a safetensors equivalent.r   )SAFE_WEIGHTS_NAMEr   r   r   z\w+(-[0-9]*-of-[0-9]*)?\.zCould not convert z to a safetensor filename.Nr   )r   r	  r   rematch
ValueErrorgroup)
r  r	  _	extensionr  match_group_1index_out_of_total_strsafetensor_filenamesafetensor_extensionr   r   r   *_original_filename_to_safetensors_filename  s   

r  weight_files
weight_mapc                    s  ddl m} t| |u |D ]i t }t|j}|r|n|j}tj	||}tj
|sctj tdd}	t }
|	 D ]\}}| |
v rN|  }| }||	|< |
|  q>||	|  fdd| D }|D ]}|||< qpqW d   dS 1 sw   Y  dS )z=Convert to pytorch bin files to their safetensors equivalent.r   )	save_filecpu)map_locationc                    s   g | ]
\}}| kr|qS r   r   )rU   r   valueweight_filer   r   rY     s    z.convert_bin_to_safetensors.<locals>.<listcomp>N)safetensors.torchr  r   r   r  r   parentr   r   r   isfiler9   loadri   r_   itemsdata_ptrdetachclone
contiguousra   )r   r   r  r  r  weight_file_pathsafetensors_filename
output_diroutput_file_path
checkpointdata_pointerskvkeysr   r   r  r   convert_bin_to_safetensors  s.   


"r0  folder_pathc              	      sd  ddl m}m} i }dddg}}|D ]}tttj |dkr)|dk} nqtj |r2|n|}	ttj |r?dnd}
tj|	rot	|	}t
|}W d   n1 s]w   Y   fdd	|d
  D }|sxt| ||
| |sddlm} ttj d}
|
D ]#}||dd}| D ]}|||< qW d   n1 sw   Y  q|S )z@Try collecting weight mapping information from the model folder.r   )r   r   Fz*safetensorsr   r   Nc                    s    i | ]\}}|t j |qS r   )r   r   r   )rU   r-  r.  r1  r   r   
<dictcomp>  s     z*try_collect_weight_map.<locals>.<dictcomp>r  )	safe_openpt)r  	framework)r   r   r   rL   globr   r   r   r   openjsonr!  r"  r0  safetensorsr4  r/  )r   r   r1  r   r   r  use_safetensorsweight_patternsr  
index_pathr  f
index_dictr4  r  r   r   r2  r   try_collect_weight_map  s8   

r@  rP   )NFF)Ir   r7  r   r   r9  r   r
  tempfilecollectionsr   	functoolsr   	itertoolsr   pathlibr   typingr   r   r	   r
   r   r   r9   torch.nnr,   torch.nn.functional
functionalr@   torch.fxr   r   	tqdm.autor   r   r   corer   r   r   intr   boolr(   r.   r1   r;   r=   r?   rC   rN   rh   rn   r-   r   ry   r0   r   r   Moduler   no_gradri   r   r   
gettempdirr   r   r   r  r  r0  r@  r   r   r   r   <module>   s   	)
	


S 	
=

*