
    )j!}                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZmZ d dlmZ d dlmZ  ej        dd                                          dk    r	 d dlmZ n# e$ r  ed	          w xY wd dlmZ  ej         ej!        d
           d dl"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl'm)Z* dddddddddd	Z+dZ,dej-        dej-        fdZ.dee/ej-        f         dee/ef         deee/ej-        f         ee/ef         f         fdZ0de1fdZ2d  Z3d! Z4	 	 dUd"e/d#ee/         d$ee/         de	fd%Z5d& Z6d'e	de1fd(Z7d)d*de2fd'e	d+e8d,e8d-eee/ef                  d.ee1geeej9                 ef         f         deej9        e1f         fd/Z:d0ej9        d1e/dej9        fd2Z;dUd3Z<	 	 	 	 	 	 dVd"e/d4eee/ef                  d-eee/ef                  d1ee/         d+e8d5e8d#ee/         deeej9        e(f         eej9        e(ee/ef         f         f         fd6Z)	 	 	 dWd7eej=        j>                 d8eej=        j>                 d5e8fd9Z?dXd:Z@e,fde1d;eAdeBfd<ZCd=ee/e	f         d>ee/e	df         fd?ZDd=e/d@e/fdAZEd)dBdCee/e	f         d0ej9        dDe8ddfdEZF	 	 dYd0ej9        de1dGeeA         dHeeA         dIe/dJeee/ej9        gee8e1f         f                  deej9        e1f         fdKZGd0ej9        dej9        fdLZHde1dMee/e	f         ddfdNZI	 dZdOee/e	f         dPee/e	f         d0ej9        dQe(dee/ef         dDe8fdRZJdS ZKd0ej9        de8fdTZLdS )[    N)Path)dedent)AnyCallableDictListOptionalTupleTypeUnionMLXLM_USE_MODELSCOPEFalsetrue)snapshot_downloadz/Run `pip install modelscope` to use ModelScope.)i   i   )tree_flattentree_maptree_reducetree_unflatten   )TokenizerWrapper)loadllamamistral3phixtralmambadeepseek_v3qwen2_vlminimax)	mistralllavazphi-msftfalcon_mambajoyai_llm_flashkimi_k2
qwen2_5_vl
minimax_m2iquestcoder   qweightreturnc                     d}d|z  }| j         \  }}||z  }d|z  dz
  }t          j        g d          |z  }| d         |z	  |z  }|                    ||          S )N       r   )r   r+   r   r'               ).N)shapemxarrayreshape)	r(   bitspack_factorout_features	packed_inin_featuresmaskshiftsunpackeds	            V/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/mlx_lm/utils.py_unpack_awq_weightsr>   <   sx    D*K%mL)k)KI?DX...//$6F	"f,4HL+666    weightsquantization_configc                    |                     dd          }|dk    rt          d|d          |                     dd          }i }t          |                                           D ]                    d          rt          d d	                              d
          rd d         }| | d
         }| d}| d}| |         }	d|z  }
|j        \  }}||
z  }||z  }t          |          }|j        }||
z  }|                    |||
          }t          j
        |
          |z  }|                    t          j                  |z                      d                              t          j                  }t          j        |	j                  }	|| v rB| |         }t          |          }|j        }|                    t          j                   |	z  }n2d|dz
  z  }t          j        |	j        | t          j                  |	z  }||| d<   |	|| d<   |                    |	j                  || d<   |	j        }t%          fddD                       s|          |<   |                                D ]A\  }}t          j        |j        t          j                  r|                    |          ||<   B||d}||fS )Nr5   r+   z
Only bits=z& is supported for AutoAWQ/GPTQ models.
group_size   z.g_idxzFound z in weights. Models with non-contiguous group indices (g_idx) are not currently supported. Please use a model without g_idx or re-quantize the model using mlx_lm.convert..qweighti.scales.qzerosr,   )axisr   )dtypez.weightz.biasesc              3   B   K   | ]}                     |          V  d S N)endswith).0suffixkeys     r=   	<genexpr>z)_transform_awq_weights.<locals>.<genexpr>   s@       
 
%+CLL  
 
 
 
 
 
r?   )rE   rG   rF   )rC   r5   )get
ValueErrorlistkeysrM   r1   r>   Tr4   r2   arangeastypeuint32sum
contiguousfloat32fullrJ   anyitems
issubdtypefloating)r@   rA   r5   rC   new_weightsprefixr(   
scales_key
qzeros_keyscalesr6   r9   
packed_outr7   n_groupsunpacked_weightr8   repackedr;   weightqzerosunpacked_zerosbiases
zero_pointmodel_dtypekwmlx_quantizationrP   s                               @r=   _transform_awq_weightsrt   G   se    ""61--DqyyNNNNOOO$((s;;JKGLLNN## C, C,<<!! 	A A A A   <<
## ;	,"XF1112G"+++J"+++JZ(F *K&-m#K%3L"j0H 2'::O-/O ${2I&..|YTTHY{++d2F++v5:::CCJJ29UU  ]68,,F W$$ , "5V!<!<!/!1
 )//
;;;fD 4!8_

{"*MMMPVV.4K6***+.4K6***+.4mmFL.I.IK6***+ ,KK 
 
 
 
/Q
 
 
 
 
 	,  's|K!!## 3 31="+.. 	3XXk22KN ! 
 (((r?   configc                     | d         }t                               ||          }	 t          j        d|           }n## t          $ r d| d}t          |          w xY w|j        |j        fS )z
    Retrieve the model and model args classes based on the configuration.

    Args:
        config (dict): The model configuration.

    Returns:
        A tuple containing the Model class and the ModelArgs class.
    
model_typezmlx_lm.models.zModel type z not supported.)MODEL_REMAPPINGrR   	importlibimport_moduleImportErrorrS   Model	ModelArgs)ru   rw   archmsgs       r=   _get_classesr      s     %J $$Z<<J&'D
'D'DEE   7J777oo :t~%%s	   =  Ac                     t          |                                 d           }d t          fd|D                       S )Nc                 6    t          | t          j                  S rL   )
isinstancennModule)ms    r=   <lambda>z&get_total_parameters.<locals>.<lambda>   s    
1bi0H0H r?   is_leafc                    t          | d          r8t          | d          sdn| j        j        }|| j        j        dz  | j        z  z   S t          d t          |                                           D                       S )Nr5   biasr   r,   c              3   *   K   | ]\  }}|j         V  d S rL   )size)rN   _vs      r=   rQ   z8get_total_parameters.<locals>.nparams.<locals>.<genexpr>   s(      CCda16CCCCCCr?   )hasattrr   r   rk   r5   rZ   r   
parameters)r   ns     r=   nparamsz%get_total_parameters.<locals>.nparams   sx    1f 	4 F++<Aqx}r)QV333CCl1<<>>&B&BCCCCCCr?   c              3   4   K   | ]\  }} |          V  d S rL    )rN   r   r   r   s      r=   rQ   z'get_total_parameters.<locals>.<genexpr>   s/      33dawwqzz333333r?   )r   leaf_modulesrZ   )modelr   r   s     @r=   get_total_parametersr      sc    &H&H  LD D D 3333l333333r?   c                 T    t          d | d          }t          |           }|dz  |z  S )Nc                 N    t          |t          j                  r
| |j        z   n| S rL   )r   r2   r3   nbytes)accxs     r=   r   z)compute_bits_per_weight.<locals>.<lambda>   s!    Arx)@)@IsQX~~c r?   r      )r   r   )r   model_bytesmodel_paramss      r=   compute_bits_per_weightr      s:    II5RS K (..L?\))r?   path_or_hf_reporevisionallow_patternsc                     t          |           }|                                s%|pg d}t          t          | ||                    }|S )a  
    Ensures the model is available locally. If the path does not exist locally,
    it is downloaded from the Hugging Face Hub.

    Args:
        path_or_hf_repo (str): The local path or Hugging Face repository ID of the model.
        revision (str, optional): A revision id which can be a branch name, a tag, or a commit hash.

    Returns:
        Path: The local file path.
    )	*.jsonmodel*.safetensors*.pytokenizer.model
*.tiktokentiktoken.model*.txt*.jsonl*.jinja)r   r   )r   existsr   )r   r   r   
model_paths       r=   	_downloadr      sr      o&&J 
' 

 
,
 
,
 
,
 !-  
 

 r?   c                 >    t          t          | d                    S )NT)local_files_only)r   r   )hf_repos    r=   hf_repo_to_pathr      s    !'DAAABBBr?   r   c                    t          | dz  d          5 }t          j        |          }d d d            n# 1 swxY w Y   | dz  }|                                rri }	 t          |d          5 }t          j        |          }d d d            n# 1 swxY w Y   n# t          j        $ r Y nw xY w|                    dd          x}r||d<   |S )Nconfig.jsonrgeneration_config.jsoneos_token_idF)openjsonr   r   JSONDecodeErrorrR   )r   fru   generation_config_filegeneration_configr   s         r=   load_configr      sf   	j=(#	.	. !1               (*BB$$&& 	2	,c22 1a$(IaLL!1 1 1 1 1 1 1 1 1 1 1 1 1 1 1# 	 	 	D	 -00GGG< 	2%1F>"MsB   599B ,BB BB BB B+*B+FTlazystrictmodel_configget_model_classesc                 x   t          |           |                    |           t          j        t          | dz                      }|s|rt	          d|            i |D ])}                    t          j        |                     *                    d          x}kt          j	        
                    d| |z            }t          j	                            |          }	|j                            |	           |	j        |	j        }}
n |          \  }
}dvr%                    di           }d|v r|d         d<   |                              } |
|          t#          d	          r                              fd
}                    dd          x} ||           n                    dd          x}r|d         }|dk    rddlm}  ||          np|dk    rdddd}|d<   |d<    ||           nN|dk    rdddd}|d<   |d<    ||           n,|dv r(t+          |          \  }|d<   |d<    ||                               dd          rKd }t-          |                                t0          j        j                  }                    |                                                                t=                                                    |           |s&t          j                                                    fS )aB  
    Load and initialize the model from a given path.

    Args:
        model_path (Path): The path to load the model from.
        lazy (bool): If False eval the model parameters to make sure they are
            loaded in memory before returning, otherwise they will be loaded
            when needed. Default: ``False``
        strict (bool): Whether or not to raise an exception if weights don't
            match. Default: ``True``
        model_config (dict, optional): Optional configuration parameters for the
            model. Defaults to an empty dictionary.
        get_model_classes (Callable[[dict], Tuple[Type[nn.Module], Type]], optional):
            A function that returns the model class and model args class given a config.
            Defaults to the ``_get_classes`` function.

    Returns:
        Tuple[nn.Module, dict[str, Any]]: The loaded and initialized model and config.

    Raises:
        FileNotFoundError: If the weight files (.safetensors) are not found.
        ValueError: If the model class or args class are not found or cannot be instantiated.
    Nr   zNo safetensors found in 
model_filecustom_model)ru   rA   text_configsanitizec           	          fd}t          j        | d         | d         |                     dd          |           d S )Nc                 f    | d         v rd         |          S t          |d          sdS |  dv S )Nquantizationto_quantizedFrF   )r   )pr   ru   r@   s     r=   class_predicatez6load_model.<locals>._quantize.<locals>.class_predicateQ  sK    F>***n-a001n-- u===G++r?   rC   r5   modeaffine)rC   r5   r   r   )r   quantizerR   )r   r   ru   r   r@   s     r=   	_quantizezload_model.<locals>._quantizeP  so    	, 	, 	, 	, 	, 	, 	#L1f%!!&(33+	
 	
 	
 	
 	
 	
r?   r   Fquant_methodbitnetr   )bitnet_quantizemxfp4r,   r+   rC   r5   r   zcompressed-tensorsr   )awqgptqquantize_activationsc                 :   t          | t          j                  r| j        dvrt	          d          |                     dd          rt	          d          | j        j        \  }}|d| j        z  z  }t          j	        ||| j
        | j        | j                  S | S )N)nvfp4mxfp8z8Mode ({m.mode}) does not support activation quantizationr   Fz?Linear layer with bias does not support activation quantizationr,   )r   r   QuantizedLinearr   rS   rR   rk   r1   r5   QQLinearrC   )r   out_dimsin_dimss      r=   	_maybe_qqzload_model.<locals>._maybe_qq~  s    !R/00 6!333$R   55'' $Y   %&HN!'2<'{7HalAFAFSSSr?   r   )r   )!r   updateglobstrFileNotFoundErrorr2   r   rR   ry   utilspec_from_file_locationmodule_from_specloaderexec_moduler|   r}   	from_dictr   r   models.bitlinear_layersr   rt   r   r   r   r   	is_moduleupdate_modulesevalload_weightsrT   r_   r   )r   r   r   r   r   weight_fileswfr   specr~   model_classmodel_args_classr   
model_argsr   r   rA   r   r   r   leavesru   r   r@   s                        @@@r=   
load_modelr     s   < $$Fl###9S.B!BCCDDL IF I G: G GHHHG $ $rwr{{####jj...
;~55#
 
 ~..t44%%%(,
DN%(9(9(H(H(H%%F**jj33 K//,78M,NF()!++F33JK
##Euj!! *..))
 
 
 
 
 
 
" 

>4888E	, &

+@% H H	H	 $*>:8##@@@@@@#OE+>??EEW$$*,aIIL%1F>",8F()Il####111*,aJJL%1F>",8F()Il####_,,$:7DW$X$X!G\%1F>",8F()Il###zz(%00 %	 	 	  )U%7%7%9%929CVWWWV$$$	JJLLL	tGMMOO,,V<<< $
  ""###&=r?   r   adapter_pathc                 &    ddl m}  || |          S )Nr   )load_adapters)tuner.utilsr   )r   r   _load_adapterss      r=   r   r     s&    <<<<<<>%...r?   c                 L    t          | g d          } t          | ||          S )z`Load a huggingface tokenizer and try to infer the type of streaming
    detokenizer to use.
    r   r   r   r   r   r   r   r   r   eos_token_ids)r   _load_tokenizer)r   tokenizer_config_extrar  s      r=   load_tokenizerr    sL     	
 	
 	
  J #   r?   tokenizer_configreturn_configc                     t          | |          }t          |||          \  }}	|$t          ||          }|                                 t	          |||	                    dd                    }
|r||
|	fS ||
fS )aT  
    Load the model and tokenizer from a given path or a huggingface repository.

    Args:
        path_or_hf_repo (Path): The path or the huggingface repository to load the model from.
        tokenizer_config (dict, optional): Configuration parameters specifically for the tokenizer.
            Defaults to an empty dictionary.
        model_config(dict, optional): Configuration parameters specifically for the model.
            Defaults to an empty dictionary.
        adapter_path (str, optional): Path to the LoRA adapters. If provided, applies LoRA layers
            to the model. Default: ``None``.
        lazy (bool): If ``False`` eval the model parameters to make sure they are
            loaded in memory before returning, otherwise they will be loaded
            when needed. Default: ``False``
        return_config (bool: If ``True`` return the model config as the last item..
        revision (str, optional): A revision id which can be a branch name, a tag, or a commit hash.
    Returns:
        Union[Tuple[nn.Module, TokenizerWrapper], Tuple[nn.Module, TokenizerWrapper, Dict[str, Any]]]:
            A tuple containing the loaded model, tokenizer and, if requested, the model config.

    Raises:
        FileNotFoundError: If config file or safetensors are not found.
        ValueError: If model class or args class are not found.
    )r   )r   Nr   r  )r   r   r   r   r  rR   )r   r	  r   r   r   r
  r   r   r   ru   	tokenizers              r=   r   r     s    H ?X>>>Jz4lKKKME6e\22

$FJJ~t4T4T  I   i''ir?   pipeline_grouptensor_groupc                    t          | g d          }t          |dd          \  }}t          |d          ot          |j        d          }t          |d          }||st	          d	          ||st	          d
          |s|st	          d          ||cxu rDn nA|rt
          j                                        }n |rt
          j                                        }||j                            |           t          |dz  d          5 }	t          j        |	          d         }
d d d            n# 1 swxY w Y   t                      }t          |                                          D ]I\  }}|
                    |d           d u x}rt	          d          |                    |
|                    Jt          | |           nt          |            t#          |ddi|                    dd                     }t          |dd          \  }}||                    |           ||j                            |           t          j        |                                           t          j        t
          j                            t          j        d          t
          j                             |r|||fS ||fS )Nr  r  TF)r   r   r   pipelineshardzGThe model does not support pipelining but a pipeline_group was providedzMThe model does not support tensor parallelism but a tensor_group was providedz'The model does not support any shardingmodel.safetensors.index.jsonr   
weight_mapz<Pipeline loading is only supported for MLX converted models.trust_remote_coder   r  g      ?)stream)r   r   r   r   rS   r2   distributedinitr  r   r   r   setr   r   rR   addr  r  r   all_sumr3   cpu)repor  r  r
  r   r   ru   has_pipelininghas_tensor_parallelfidweight_indexlocal_filesrq   r   	file_namer  s                   r=   sharded_loadr#    s]    	
 	
 	
  J  zUCCCME6UG,,Qj1Q1QN!%11!.!U
 
 	
 (;[
 
 	
  D"5 DBCCC-------- 	3>..00LL 	3^0022N !^,,, *==sCC 	8s9S>>,7L	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 ee !1!1!3!344 	- 	-DAq(,,Q55==y  R   OOLO,,,, 	${33333$ 	d#jj66  I
 *4>>>HE1L!!!!^,,,GE GBN""28C=="@@AAA  i''is   D88D<?D<c                 ^    t          | t          j                                        d |          S rL   )r#  r2   r  r  )r  r
  s     r=   pipeline_loadr%  D  s$    bn1133T=IIIr?   max_file_size_gbc                     |dz  }g }i d}}|                                  D ];\  }}||j        z   |k    r|                    |           i d}}|||<   ||j        z  }<|                    |           |S )z
    Splits the weights into smaller shards.

    Args:
        weights (dict): Model weights.
        max_file_size_gb (int): Maximum size of each shard in gigabytes.

    Returns:
        list: List of weight shards.
       r   )r_   r   append)r@   r&  max_file_size_bytesshardsr  
shard_sizerq   r   s           r=   make_shardsr-  H  s     +b0FA:E  1 #666MM%    "A:Eaah


MM%Mr?   pathhf_pathc                    ddl m}m} | |                     |d                    }n|                    |          }d|j        _        d|j        _        |j        j        dg|j        _        n$d|j        j        vr|j        xj        dgz  c_        |t          |          |j        _
        d|_        |                    t          j                            | d	                     dS )
z
    Uploads the model to Hugging Face hub.

    Args:
        path (Union[str, Path]): Local path to the model.
        hf_path (Union[str, Path, None]): Path to the original Hugging Face model.
    r   )	ModelCardModelCardDataNen)languagemlxztext-generation 	README.md)huggingface_hubr1  r2  from_templater   datalibrary_namepipeline_tagtagsr   
base_modeltextsaveosr.  join)r.  r/  r1  r2  cards        r=   create_model_cardrD  `  s     98888888&&}}d'C'C'CDD~~g&&"DI.DIy~		din	$	$	5'!"7||	DIIIbgll4--.....r?   upload_repoc                    ddl m}m}m} ddlm} |                                 t          |           dz  }|                    |          }|j	        j
        }|d| d| d	| d| d
| d}	nd}	t          d| d|	 d| d          |_        |                    |            |            }
|
                    |d           |
                    | |d           t!          d| d           dS )z
    Uploads the model to Hugging Face hub.

    Args:
        path (str): Local path to the model.
        upload_repo (str): Name of the HF repo to upload to.
    r   )HfApir1  loggingr   )__version__r7  Nz
        This model [z](https://huggingface.co/z,) was
        converted to MLX format from [z!)
        using mlx-lm version **z**.
        r6  z
        # z	
        z
        ## Use with mlx

        ```bash
        pip install mlx-lm
        ```

        ```python
        from mlx_lm import load, generate

        model, tokenizer = load("av  ")

        prompt = "hello"

        if tokenizer.chat_template is not None:
            messages = [{"role": "user", "content": prompt}]
            prompt = tokenizer.apply_chat_template(
                messages, add_generation_prompt=True, return_dict=False,
            )

        response = generate(model, tokenizer, prompt=prompt, verbose=True)
        ```
        T)repo_idexist_okr   )folder_pathrJ  	repo_typez0Upload successful, go to https://huggingface.co/z for details.)r8  rG  r1  rH  r6  rI  set_verbosity_infor   r   r:  r>  r   r?  r@  create_repoupload_large_folderprint)r.  rE  rG  r1  rH  rI  	card_pathrC  r/  
provenanceapis              r=   upload_to_hubrU  z  s    :999999999   T

[(I>>)$$Di"G  ;F '. IP  !,  

 
		 			 	 #.	 	 	 DI6 	IIi
%''COOK$O777    
 

W[
W
W
WXXXXXr?   donate_model	save_pathrW  c                8   t          | t                    rt          |           } |                     dd           t	          t          |                                                    }t          |          }t          |          }|dk    rdnd}t          d |
                                D                       }|t          |          di d|r6|                    t          d	 |                                                     |                                 ~t          t          |                    D ]v}||         }	d
||<   |                    |dz   |          }
| |
z  }t#          j        t          |          |	ddi           |	                                D ]}|
d         |<   ~	wfdt)          d                   D             d<   t+          | dz  d          5 }t-          j        |d           d
d
d
           d
S # 1 swxY w Y   d
S )z?Save model weights and metadata index into specified directory.T)parentsrK  r   z"model-{:05d}-of-{:05d}.safetensorszmodel.safetensorsc              3   $   K   | ]}|j         V  d S rL   )r   )rN   r   s     r=   rQ   zsave_model.<locals>.<genexpr>  s$      88!QX888888r?   )
total_sizetotal_parameters)metadatar  c                 *    t          j        g           S rL   )r2   r3   )r   s    r=   r   zsave_model.<locals>.<lambda>  s     r?   Nformatr5  )r^  r  c                 .    i | ]}|d          |         S )r  r   )rN   rq   
index_datas     r=   
<dictcomp>zsave_model.<locals>.<dictcomp>  s2          +,:l#A&     r?   r  rr   r+   indent)r   r   r   mkdirdictr   r   r-  lenrZ   valuesr   r   r   clearranger`  r2   save_safetensorsrU   sortedr   r   dump)rX  r   rW  r@   r+  shards_countshard_file_formatr\  ir  
shard_name
shard_pathweight_namer   rb  s                 @r=   
save_modelru    s    )S!! $OO	OOD4O000< 0 0 2 23344G!!Fv;;L ! 	-,   88w~~'7'788888J % 4U ; ;
 
  J  KX44e6F6F6H6HIIJJJ MMOOO3v;; 
 
q	q	&--a!e\BB
+

C
OOUh=NOOOO ::<< 	? 	?K4>J|$[11E       06z,7O0P0P     J| 
i88#	>	> 
!		
 	
 	
 	

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   *HHHr   rC   r5   r   quant_predicatec                 P  	
 d }t          j        |          pt          | dd           |||          \  }||d
dv rd	nd	
d<   	
fd}t          j        | |||	           d         d
<   t          |           }t          d|dd           | fS )a  
    Applies quantization to the model weights.

    Args:
        model (nn.Module): The model to be quantized.
        config (dict): Model configuration.
        group_size (Optional[int]): Group size for quantization.
        bits (Optional[int]): Bits per weight for quantization.
        mode (str): The quantization mode.
        quant_predicate (Callable): A callable that decides how to quantize
          each layer based on the path. Accepts the layer `path` and the
          `module`. Returns either a bool to signify quantize/no quantize or
          a dict of quantization parameters to pass to `to_quantized`.

    Returns:
        Tuple: Tuple containing quantized model and config.
    c                 6    ddddd}||          \  }}|p||p|fS )N)@   r+   )r,   r+   )   r+   )r,   r   )r   r   r   r   r   )r   rC   r5   mode_defaultsdefault_group_sizedefault_bitss         r=   defaults_for_modez)quantize_model.<locals>.defaults_for_mode  sB    	
 
 ,9+>(L//1EEEr?   rv  Nr   r   TFc                     t          |d          sdS |j        j        d         z  dk    rdS d} | |          }t          |t                    r|d         | <   nr|rd         | <   |S )Nr   FrH   r   Tr   )r   rk   r1   r   rg  )r.  modulebool_or_paramsfine_grained_configrC   quant_paramsrv  quantized_configs      r=   wrapped_predicatez)quantize_model.<locals>.wrapped_predicate)  s    v~.. 	5=r"Z/1445&,_T6::Nnd++ 	B5C^,T22  	B^ 	B5A^,T2r?   )r   r   rA   z[INFO] Quantized model with z.3fz bits per weight.)copydeepcopygetattrr   r   r   rQ  )r   ru   rC   r5   r   rv  r~  r  bpwr  r  r  s     `  `   @@@r=   quantize_modelr    s2   4F F F }V,,%P8I4)P)PO((z4@@J",dDIIL))) ##+7(         K)    /?~.N*+
!%
(
(C	
C
C
C
C
CDDD"""r?   c           	         ddl m}m} g }|                                 D ]\  }}d|v }t	          |t
          j                  rt
          j        }d|i}nAt	          |t
          j                  ri }t
          j	        }nt	          ||          rd|i}|}nut          j        |j        |j        |j        |j        |j        |j                  }	|	j        ddd         }
 ||
i |}|r|j        |_        |	|_        |                    ||f           t+          |          dk    r"|                     t/          |                     | S )z
    Dequantize the quantized layers in the model.

    Args:
        model (nn.Module): The model with quantized layers.

    Returns:
        nn.Module: The model with dequantized layers.
    r   )QuantizedSwitchLinearSwitchLinearr   NrH   r   )models.switch_layersr  r  named_modulesr   r   r   LinearQuantizedEmbedding	Embeddingr2   
dequantizerk   rf   rn   rC   r5   r   r1   r   r)  rh  r   r   )r   r  r  dequantize_layersnamer  r   clskwargsrk   argsr   s               r=   dequantize_modelr  G  s}    JIIIIIII++-- , ,ffb011 
	)Cd^FF 566 	F,CC 566 	d^FCCMMMKK
 
 |DDbD!C    	![AF  $++++
!!^,=>>???Lr?   config_pathc                 X   |                      dd           |                      dd           d| v r| d         | d<   t          t          |                                                     } t	          |d          5 }t          j        | |d           ddd           dS # 1 swxY w Y   dS )	a  Save the model configuration to the ``config_path``.

    The final configuration will be sorted before saving for better readability.

    Args:
        config (dict): The model configuration.
        config_path (Union[str, Path]): Model configuration file path.
    _name_or_pathNvision_configr   rA   rr   r+   rd  )poprg  rm  r_   r   r   rn  )ru   r  r  s      r=   save_configr  u  s     JJ%%%
JJ%%%(.~(>$% &(())F 
k3		 )3	&#a(((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )s   :BB#&B#dst_pathsrc_path_or_repor  c                    t          |          }|                                s|}t          |          }nd }t          |           } t          | |d           t	          || dz             |                    |            dD ]>}t          j        t          ||z                      D ]}	t          j	        |	|            ?t          | |           d S )NTrV  r   )r  )r   r   )r   r   r   ru  r  save_pretrainedr   r   shutilr  rD  )
r  r  r   r  ru   rW  src_pathr   r   files
             r=   r@  r@    s     $%%H?? ""7++H~~HxT2222H}$<====h'''/ ( (Ic(Q,//00 	( 	(DKh''''	( h(((((r?   c                     t          t          |           t          |                    }t          |          D ]}| |         ||         k    r|c S |S )a$  
    Calculates the length of the common prefix of two lists.

    Args:
        list1: The first list of strings.
        list2: The second list of strings.

    Returns:
        The length of the common prefix. Returns 0 if lists are empty
        or do not match at the first element.
    )minrh  rk  )list1list2min_lenrq  s       r=   common_prefix_lenr    s^     #e**c%jj))G 7^^  8uQxHHH   Nr?   c                 x    	 t          j        | j                  }d|j        v S # t          t
          f$ r Y dS w xY w)z
    Check if the model supports input_embeddings in its call signature.
    Args:
        model (nn.Module): The model to check.
    Returns:
        bool: True if the model supports input_embeddings, False otherwise.
    input_embeddingsF)inspect	signature__call__r   rS   	TypeError)r   r  s     r=   #does_model_support_input_embeddingsr    sN    %en55	!Y%999	"   uus   !$ 99)NN)NNNFFN)NNF)F)r   N)T)Mr  r   ry   r  r   rA  resourcer  pathlibr   textwrapr   typingr   r   r   r   r	   r
   r   r   mlx.corecorer2   mlx.nnr   getenvlower
modelscoper   r{   r8  	setrlimitRLIMIT_NOFILE	mlx.utilsr   r   r   r   tokenizer_utilsr   r   r  rx   MAX_FILE_SIZE_GBr3   r>   r   rt   rg  r   r   r   r   r   r   boolr   r   r   r  r  Groupr#  r%  intrT   r-  rD  rU  ru  r  r  r  r@  r  r  r   r?   r=   <module>r     s           				              	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	            29#W--3355??M0000000 M M MkKLLLM 211111  8)< 8 8 8 I I I I I I I I I I I I . - - - - - 4 4 4 4 4 4 $
 
  7 7bh 7 7 7 7Y)#rx- Y)c3hY) 4RXS#X./Y) Y) Y) Y)x& & & & &*4 4 4* * * # $& &&sm& I& 
	& & & &RC C CD T    * -1HTJ JJ
J J 4S>*	J
  d29ot.C(D DEJ 29d?J J J JZ/ /# /") / / / /   4 26-1"&"1  1 1 tCH~.1  4S>*1  3-	1 
 1  1  sm1  	")%
%&	")%tCH~
5681  1  1  1 l 6:37	T  T R^12T  2>/0T  	T  T  T  T nJ J J J 8H    D    0/E#t), /uS$_7M / / / /4?Y ?Y# ?Y ?Y ?Y ?YL 	9
 9
 9
S$Y9
99
 	9

 
9
 9
 9
 9
B OSL# L#9L#L# L# 3-	L#
 L# hRY'7tTz9J'JKLL# 29d?L# L# L# L#^+BI +") + + + +\))sDy!) 
) ) ) )@ ) )CI)CI&) 9)  	)
 cN) ) ) ) )8  4ry T      s   5A< <B