o
    di                  
   @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ ddlmZmZ ddlmZ dd	lmZmZmZmZmZ d
e	deej dedede	f
ddZdededefddZdS )    N)partial)CallableList)GraphModule)
AutoConfig   )ConfigParallelExecutionCtx)build_parallel_pass_pipeline)MetaAwareMethodsPatcherdownload_model_from_hfinitialize_parameter_metamove_model_to_devicetry_collect_weight_mapgraph_moduleexample_inputsctxconfigreturnc                 C   s2   ||_ t }|| ||d} | jd7  _| |_| S )N)r   r   r   r   )r   r
   compile_timeslast_optimized_graph_module)r   r   r   r   pass_pipeline r   l/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/optimum/fx/parallelization/api.pyparallelize_backend#   s   r   modelparallel_ctxc                 O   s  | dd}| dd}| dd}| dd}t }t| D ]\}	}
|	|jv r5t||	|
 | |	 q!tj| }|sHt	| ||||d}n| }t
j|f|d	d	d
|\}}|j}ttd|d }|sot| |||_| ddd}}|dur||}t  ||g|R i |} |   W d   n1 sw   Y  |durt| t| |jd t|  tt||d}tj| d	|d} | S )a  
    API for automatic model parallelism through Pytorch FX.

    Args:
        model (`str`):
            Model to parallelize, a model id on the Huggingface Hub or path to a local directory containing config and weights
            of the model.
        parallel_ctx (`ParallelExecutionCtx`):
            Parallel execution context containing process groups the current process belongs to.
        *model_args (`Any`):
            Additional postional arguments for intializing the model if a model id is passed.
        revision (`str`, defaults to `main`):
            Model revision for weights downloading if a model id is passed.
        cache_dir (`Optional[str]`, defaults to `None`):
            Cache directory to store downloaded weights. Defaults to None.
        local_files_only (`bool`, defaults to `False`):
            Whether to use local files only, will avoid downloading from remote if set to `True`.
        skip_load_weights (`bool`, defaults to `False`):
            Whether to skip loading weights from disk to model.
        **kwargs (`Dict[str, Any]`):
            Addtional keyword arguments for overriding fields in parallel config, model config and `Model.__init__`.
    revisionmain	cache_dirNlocal_files_onlyFskip_load_weights)model_name_or_pathr   r   r    skip_download_weightsT)r   r    return_unused_kwargstransformersr   torch_dtype)device)r   r   )	fullgraphbackend)popr   dictitems__dict__setattrospathisdirr   r   from_pretrainedarchitecturesgetattr	importlibimport_moduler   
weight_map_set_default_torch_dtyper   evaltorchset_default_dtyper   current_devicer   r   r   compile)r   r   
model_argskwargsr   r   r    r!   parallel_configkvis_local	hf_foldermodel_config
model_arch	model_clsr&   
dtype_origr)   r   r   r   parallelize_model.   sZ   





rI   )r5   r/   	functoolsr   typingr   r   r:   torch.fxr   r%   r   corer   r	   passesr
   utilsr   r   r   r   r   Tensorr   strrI   r   r   r   r   <module>   s8   	
