o
    z3iK                     @   s,  d Z ddlZddlZddlmZ ddlmZmZ ddlZddl	m
Z
 ddlmZ G dd	 d	ejZd
d Zdd Zdd Zdd Zeje
dk rNdd Zne rUdd Zeedd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zeje
dk reed$d% ZdS e reed&d' ZdS dS )(zPExtends `dill` to support pickling more types and produce more consistent dumps.    N)BytesIO)CodeTypeFunctionType)version   )configc                       sF   e Zd Zejejj ZdZ	d	ddZ
 fddZdd Z  ZS )
PicklerFTc           	      C   s:  t |}|| jvrdtjv rdd l}||ju rt|t dtjv r2dd l}t	||j
r2t|t dtjv rFdd l}||ju rFt|t dtjv rsdd l}t	||jr[t|t ||ju rft|t t	||jjrst|d|}dtjv rdd l}t	||jrt|t |tu rt|d|}tjj| ||d	 d S )
Nregexr   spacytiktokentorch	_orig_modtransformers_torchdynamo_orig_callable)save_persistent_id)typedispatchsysmodulesr	   Patternpklregister_save_regexPatternr
   
issubclassLanguage_save_spacyLanguager   Encoding_save_tiktokenEncodingr   Tensor_save_torchTensor	Generator_save_torchGeneratornnModulegetattrr   PreTrainedTokenizerBase)_save_transformersPreTrainedTokenizerBaser   dillr   save)	selfobjr   obj_typer	   r
   r   r   r    r+   b/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/datasets/utils/_dill.pyr'      s:   








zPickler.savec                    sZ   zt |}W n ty   ddlm  t | fddd}Y nw t j|g|R i |S )Nr   Hasherc                    s     | d S )Nr   )hash)xr-   r+   r,   <lambda>P   s    z)Pickler._batch_setitems.<locals>.<lambda>key)sorted	Exceptiondatasets.fingerprintr.   super_batch_setitems)r(   itemsargskwargs	__class__r-   r,   r8   H   s   zPickler._batch_setitemsc                 C   s"   t |turtj| | d S d S N)r   strr&   r   memoize)r(   r)   r+   r+   r,   r@   S   s   zPickler.memoize)T)__name__
__module____qualname__r&   _dillMetaCatchingDictr   r   copy_legacy_no_dict_keys_sortingr'   r8   r@   __classcell__r+   r+   r<   r,   r      s    
)r   c                    s    fdd}|S )z'Register a custom reducer for the type.c                    s   | t j < | S r>   )r   r   )functr+   r,   proxy\   s   
zpklregister.<locals>.proxyr+   )rK   rL   r+   rJ   r,   r   Y   s   r   c                	   C   sP   t jjdd tdjtdjtdjtdjtdjtdjfv S )	z<Check if the current dill version is in the supported range.N   0.3.6z0.3.7z0.3.8z0.3.9z0.4.0z0.4.1)r   DILL_VERSIONreleaser   parser+   r+   r+   r,   _is_supported_dill_versionc   s   





rR   c                 C   s   t |dd|  dS )zPickle an object to a file.T)recurseN)r   dumpr)   filer+   r+   r,   rT   o   s   rT   c                 C   s   t  }t| | | S )zPickle an object to a string.)r   rT   getvaluerU   r+   r+   r,   dumpst   s   
rX   rN   c                 C   s   t jj| d S r>   )r&   rD   loginfopicklermsgr+   r+   r,   rY   }   s   rY   c                 C   s   t jj| | d S r>   )r&   rD   loggertracer[   r+   r+   r,   rY      s   c                 C   sl   t | d|  zt|f}W n ty&   ddlm} t||jdf}Y nw | jt||d t | d d S )NzSe: r   r-   r2   r)   z# Se)rY   r4   r5   r6   r.   r/   save_reduceset)r\   r)   r:   r.   r+   r+   r,   	_save_set   s   rc   c                 C   sD   dd l }t| d|  |j|jf}| j|j||d t| d d S )Nr   zRe: r`   z# Re)r	   rY   patternflagsra   compile)r\   r)   r	   r:   r+   r+   r,   r      s
   r   c                 C   sL   dd l }t| d|  |j|j|j|jf}| j|j||d t| d d S )Nr   zEnc: r`   z# Enc)r   rY   name_pat_str_mergeable_ranks_special_tokensra   r   )r\   r)   r   r:   r+   r+   r,   r      s
   r   c                    s   dd l  d fdd	}t| d|  |j jkr)|  j   jf}n	|   f}| j	|||d t| d d S )Nr   c                    s     | }|r||}|S r>   )
from_numpyr   )np_arraydtypetensorr   r+   r,   create_torchTensor   s   

z-_save_torchTensor.<locals>.create_torchTensorzTo: r`   z# Tor>   )
r   rY   rm   bfloat16detachtofloatcpunumpyra   )r\   r)   rp   r:   r+   ro   r,   r      s    r   c                    sL   dd l   fdd}t| d|  | f}| j|||d t| d d S )Nr   c                    s      }||  |S r>   )r   	set_state)state	generatorro   r+   r,   create_torchGenerator   s   
z3_save_torchGenerator.<locals>.create_torchGeneratorzGe: r`   z# Ge)r   rY   	get_statera   )r\   r)   rz   r:   r+   ro   r,   r       s   
r    c                    sP   dd l   fdd}t| d|  |j| f}| j|||d t| d d S )Nr   c                    s(    j | d d }|| }||S )Nnlplang)utilget_lang_classfrom_config
from_bytes)r   byteslang_cls	lang_instr
   r+   r,   create_spacyLanguage   s   

z1_save_spacyLanguage.<locals>.create_spacyLanguagezSp: r`   z# Sp)r
   rY   r   to_bytesra   )r\   r)   r   r:   r+   r   r,   r      s   r   c                 C   s  t | d|  |j }d|v rt|d tri |d< d|v r+t|d tr+i |d< |jd}d  }}|d urst|drst|drs|j}|j}z|d urWt|drW|	  |d urdt|drd|
  W n tyr   d  }}Y nw z>| jt|d	||d
 W z(|d ur|d urt|dr|jd	i | |d urt|dr|jd	i | W nA ty   Y n9w z.|d ur|d urt|dr|jd	i | |d urt|dr|jd	i | W w W w W w W w  ty   Y w w t | d d S )NzTok: cachedeprecation_warnings
_tokenizer
truncationpaddingno_truncation
no_paddingr+   )rx   r)   enable_truncationenable_paddingz# Tok)rY   __dict__rF   
isinstancedictgethasattrr   r   r   r   r5   ra   r   r   r   )r\   r)   rx   backend_tokenizerr   r   r+   r+   r,   r%      s`   
	r%   c                 C   sn  t jjd|  |jds.t|jtj	j
dkr)|jtj	j
d ds.|jdkr0dntj	|j}d}t jjrt|drk|j|j|j|j|j|j|j|j|j|j||j|tjd	krb|jn|j|j|jf}n;|j|j|j|j|j|j|j|j|j||j||j|j|jf}n|j|j|j|j|j|j|j|j||j||j|j|jf}| jt ||d
 t jjd dS )z
        From dill._dill.save_code
        This is a modified version that removes the origin (filename + line no.)
        of functions created in notebooks or shells for example.
        zCo: <   
ipykernel_<lambda> co_posonlyargcount)rM   
   r`   # CoN)!r&   rD   rY   rZ   co_filename
startswithlensplitospathsepco_namebasenamePY3r   co_argcountr   co_kwonlyargcount
co_nlocalsco_stacksizeco_flagsco_code	co_constsco_namesco_varnamesr   version_infoco_linetable	co_lnotabco_freevarsco_cellvarsra   r   r\   r)   r   co_firstlinenor:   r+   r+   r,   
_save_code  s   



r   c                 C   s(  t jj| d| |jds-t|jtj	j
dkr(|jtj	j
d ds-|jdkr/dntj	|j}d}t|drh|j|j|j|j|j|j|j|j|j|j|j||j|j||j|j|j|j|j|jf}nt|d	r|j|j|j|j|j|j|j|j|j|j|j||j|j||j|j|j|jf}nlt|d
r|j|j|j|j|j|j|j|j|j|j|j||j||j|j|jf}nDt|dr|j|j|j|j|j|j|j|j|j|j||j||j|j|jf}n|j|j|j|j|j|j|j|j|j||j||j|j|jf}| j t jj!||d t jj| d d S )NzCo: %sr   r   r   r   r   r   co_endlinetableco_exceptiontabler   r   r`   r   )"r&   rD   r^   r_   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   co_qualnamer   co_columntabler   r   r   r   ra   _create_coder   r+   r+   r,   	save_codeh  s   






r   )__doc__r   r   ior   typesr   r   r&   	packagingr   r   r   r   r   rR   rT   rX   rO   rQ   rY   rb   rc   r   r   r   r    r   r%   r   r   r+   r+   r+   r,   <module>   s@   >


		/^