
    vjl6                     ,   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZmZmZ d dlmZ d dlmZ  e            Zd Zd Zd	efd
Zd	efdZd	efdZded	efdZded	efdZd ZddeddfdZdeeeeef         d	efdZ	 	 	 d$deeeeef         dee         dee         dee          d	e!f
dZ"	 	 	 d$deeef         dedee         dee          d	e!f
dZ#	 	 	 	 d%deeeeef         dee         d edee         dee          d	e!fd!Z$d"ed	e fd#Z%dS )&    N)Path)Errorcopy2copystat)BinaryIOOptionalUnion)urlparse)
get_loggerc                     t          j        |           }|j        }|j        }||dS |j        sg n|j        }|r|d         dv r|                    d          n| t          |          dk    r|d         dv rdS dS )zto decide if a func could receive dict inputs or not

    Args:
        func (class): the target function to be inspected

    Returns:
        bool: if func only has one arg ``input`` or ``inputs``, return True, else return False
    NFr   )selfcls   )inputinputsT)inspectgetfullargspecvarargsvarkwargspoplen)funcfull_args_specr   r   r   s        k/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/utils/file_utils.pyfunc_receive_dict_inputsr      s     +D11N$G EOu#(A22n.ADBT!W77DHHQKKKd
4yyA~~$q'%888t5    c                      t           j                            t          j                                        ddd                    } | S )z1
    default base dir: '~/.cache/modelscope'
    z.cache
modelscopehub)ospath
expanduserr   homejoinpath)default_cache_dirs    r    get_default_modelscope_cache_dirr'   )   sA     **49;;+?+?,,' ,' ( (r   returnc                  ~    t           j                            t          j        dt	                                          S )zGet modelscope cache dir, default location or
       setting with MODELSCOPE_CACHE

    Returns:
        str: the modelscope cache root.
    MODELSCOPE_CACHE)r!   r"   r#   getenvr'    r   r   get_modelscope_cache_dirr-   2   s8     7
	$&F&H&HIIK K Kr   c                  Z    t           j                            t                      d          S )z[Get model cache root path.

    Returns:
        str: the modelscope model cache root.
    modelsr!   r"   joinr-   r,   r   r   get_model_cache_rootr2   =   s!     7<<022H===r   c                  Z    t           j                            t                      d          S )zGet dataset raw file cache root path.
    if `MODELSCOPE_CACHE` is set, return `MODELSCOPE_CACHE/datasets`,
    else return `~/.cache/modelscope/hub/datasets`

    Returns:
        str: the modelscope dataset raw file cache root.
    datasetsr0   r,   r   r   get_dataset_cache_rootr5   F   s!     7<<022J???r   
dataset_idc                 l    t                      }| |n"t          j                            || dz             S )zGet the dataset_id's path.
       dataset_cache_root/dataset_id.

    Args:
        dataset_id (str): The dataset id.

    Returns:
        str: The dataset_id's cache root path.
    N/)r5   r!   r"   r1   )r6   dataset_roots     r   get_dataset_cache_dirr:   Q   s:     *++L%-<<27<<j3&4( 4( (r   model_idc                 l    t                      }| |n"t          j                            || dz             S )zcache dir precedence:
        function parameter > environment > ~/.cache/modelscope/hub/model_id

    Args:
        model_id (str, optional): The model id.

    Returns:
        str: the model_id dir if model_id not None, otherwise cache root dir.
    Nr8   )r2   r!   r"   r1   )r;   	root_paths     r   get_model_cache_dirr>   `   s9     %&&I (99bgll8c>/# /# #r   c                 ~    t          | d          5 }|                                }d d d            n# 1 swxY w Y   |S )Nr)openread)r"   ftexts      r   	read_filerE   o   st    	dC Avvxx              Ks   266Fc           	         t          j        |           }| || |          }nt                      }t          j        ||           g }	|D ]}
|
|v rt           j                            | |
          }t           j                            ||
          }	 t           j                            |          rt          j        |          }|r)t          j        ||           t          |||            nt           j        
                    |          s|rt           j                            |          rt          ||||||           nN |||           nAt           j                            |          rt          ||||||           n |||           N# t          $ r+}|	                    |j        d                    Y d}~~d}~wt           $ r0}|	                    ||t%          |          f           Y d}~d}~ww xY w	 t          | |           nM# t           $ r@}t'          |dd          %|	                    | |t%          |          f           Y d}~nd}~ww xY w|	rt          |	          |S )z7copy from py37 shutil. add the parameter dirs_exist_ok.N)exist_ok)follow_symlinks)dirs_exist_okr   winerror)r!   listdirsetmakedirsr"   r1   islinkreadlinksymlinkr   existsisdircopytree_py37r   extendr   OSErrorappendstrgetattr)srcdstsymlinksignorecopy_functionignore_dangling_symlinksrI   namesignored_nameserrorsnamesrcnamedstnamelinktoerrwhys                   r   rS   rS   v   s    JsOOEsE**Km,,,,F -8 -8=  ',,sD))',,sD))(	8w~~g&& !0W-- 8 Jvw///Wg8|LLLLL 7>>&11 !6N ! w}}W-- 	8%##$")*79 9 9 9 9 &gw7777w'' 
0!"/1 1 1 1 1 gw///  	' 	' 	'MM#(1+&&&&&&&& 	8 	8 	8MM7GSXX677777777	80c 0 0 03
D))1MM3SXX.///0  FmmJsD   A?FBF
H$ G

H%HHH 
I&&6I!!I&file_path_or_objc                    t          | t          t          f          r(t          |           }|                                j        S t          | t
                    rt          |           S t          | t          j                  r_| 	                                }| 
                    dt          j                   | 	                                }| 
                    |           |S t          d          )Nr   zCUnsupported type: must be string, Path, bytes, or io.BufferedIOBase)
isinstancerW   r   statst_sizebytesr   ioBufferedIOBasetellseekr!   SEEK_END	TypeError)rh   	file_pathcurrent_positionsizes       r   get_file_sizerw      s    "S$K00 
)**	~~''	$e	,	, 
#$$$	$b&7	8	8 	
+0022a---$$&&.///Q
 
 	
r      [Calculating]Tbuffer_size_mb	tqdm_descdisable_tqdmc           	         ddl m} t          |           }|dk    rSd}d}t          | t          t
          f          r-t          | t
                    r| nt          |           }|j        }d| d}|dz  dz  }t          j                    }	 ||dd	d	d
||          }
t          | t          t
          f          rd}t          | d          5 }|
                    |          x}r`|	                    |           |t          |          z  }|
                    t          |                     |
                    |          x}`ddd           n# 1 swxY w Y   |	                                }	||k    r#t                              d| d| d|             |}nt          | t                     r\|	                    |            |	                                }	|
                    t          |                      t          |           }n6t          | t"          j                  rd}|                     dt(          j                   | 
                    |          x}r`|	                    |           |t          |          z  }|
                    t          |                     | 
                    |          x}`|	                                }	|                     dt(          j                   ||k    r#t                              d| d| d|             |}n#|
                                 t/          d          |
                                 | |	|dS )a  Compute SHA256 hash for a file path, bytes, or file-like object.

    Args:
        file_path_or_obj: File path, bytes, or file-like object.
        buffer_size_mb: Read buffer size in MB. Default 16MB.
        tqdm_desc: Progress bar description.
        disable_tqdm: Whether to disable progress bar.

    Returns:
        dict with keys: file_path_or_obj, file_hash, file_size.
    r   tqdmi   @Fz
Large Filez[Validating Hash for ]   TB)totalinitial
unit_scaledynamic_ncolsunitdescdisablerbN4File size changed during hash computation: declared  bytes, actually hashed % bytes. File may have been modified: z5Input must be str, Path, bytes or a io.BufferedIOBaserh   	file_hash	file_size)	tqdm.autor   rw   rj   rW   r   rb   hashlibsha256rA   rB   updater   	hexdigestloggerwarningrm   rn   ro   rq   r!   SEEK_SETclose
ValueError)rh   rz   r{   r|   r   declared_sizerb   r"   buffer_sizer   progressbytes_hashedrC   
byte_chunkr   s                  r   get_file_hashr      s1   " !"233M)))&d44 	'1 $(( (( D##-12B-C-C 9D3D333	 4'$.K  It  H "S$K00 (E"D)) 	1Q !{ 3 33* 1  ,,,J/J000 !"{ 3 33* 1	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1 	1
 ''))	=((NNC)C CCOC C0@C CD D D !			$e	,	, E)***''))	,--...())			$b&7	8	8 Ea---,11+>>>j 	-Z(((C
OO+LOOC
OO,,, -11+>>>j 	- ''))	a---=((NNC)C CCOC C0@C CD D D !		 	CE E 	E NN -  s   
A8EEErt   c           	      .    ddl }ddl}ddlm} t	                      t
          j                                       }|dz  dz  |                    d          dg fd}|	                    |d	          }	|	
                                 t          j                    }
d} ||||dd
dd          }	                                 }|nJ|
                    |           |t          |          z  }|                    t          |                     a|	                                 |                                 d         d         ||k    r#t$                              d| d| d              |
                                |dS )a  Compute SHA256 with async I/O double-buffering for high-latency storage.

    Uses a producer-consumer pattern: a background thread reads file chunks
    into a bounded queue while the main thread computes the hash. This
    overlaps I/O latency with CPU computation.
    r   Nr~   r      )maxsizec                  R   	 t          d          5 } 	 |                               }|sn                    |           .	 d d d            n# 1 swxY w Y   n# t          $ r}|d<   Y d }~nd }~ww xY w                    d            d S #                     d            w xY w)Nr   Tr   )rA   rB   put	Exception)rC   chunker   chunk_queuert   
read_errors      r   	_producerz'_get_file_hash_async.<locals>._producerF  s   
	"i&& +!+FF;//E  OOE***	+ 	+ + + + + + + + + + + + + + +  	 	 	JqMMMMMM	 OOD!!!!!KOOD!!!!sR   A 0AA AA AA B 
A5&A0+B 0A55B B&T)targetdaemonr   )r   r   r   r   r   r   unit_divisorr   r   r   r   )queue	threadingr   r   rW   r!   r"   getsizeQueueThreadstartr   r   getr   r   r1   r   r   r   r   )rt   rz   r{   r|   r   r   r   r   r   reader_threadr   r   r   r   r   r   r   s   `             @@@r   _get_file_hash_asyncr   .  s    LLLIIGOOI..M 4'$.K++a+((KJ" " " " " " " " $$Id$CCM  ILt  H$!!=E

"E

###$ NN!} m}$$8%8 8?K8 8,58 8	9 	9 	9 &((**!  r       async_threshold_mbc                     t          | t          t          f          rKt          j                            t          |                     }||dz  dz  k    rt          | |||          S t          | |||          S )a  Compute SHA256 hash with automatic optimization for large files.

    For file paths larger than async_threshold_mb, uses async double-buffered
    I/O to overlap disk reads with hash computation. For smaller files or
    non-path inputs (bytes, BinaryIO), falls back to synchronous hashing.

    Args:
        file_path_or_obj: File path, bytes, or file-like object.
        buffer_size_mb: Read buffer size in MB. Default 16MB for NAS optimization.
        async_threshold_mb: File size threshold for async mode. Default 32MB.
        tqdm_desc: Progress bar description.
        disable_tqdm: Whether to disable progress bar.

    Returns:
        dict with keys: file_path_or_obj, file_hash, file_size.
    r   )rz   r{   r|   )rj   rW   r   r!   r"   r   r   r   )rh   rz   r   r{   r|   r   s         r   compute_file_hashr   }  s    0 "S$K00 GOOC(8$9$9::	*T1D888' -#)	    %!	   r   url_or_filenamec                 r    t          |           j        dk    ot          j                            |            S )z5
    Check if a given string is a relative path.
     )r
   schemer!   r"   isabs)r   s    r   is_relative_pathr     s>      2& M.0gmmO.L.L*LMr   )rx   ry   T)rx   r   ry   T)&r   r   rn   r!   pathlibr   shutilr   r   r   typingr   r   r	   urllib.parser
   modelscope.utils.loggerr   r   r   r'   rW   r-   r2   r5   r:   r>   rE   rS   rm   intrw   booldictr   r   r   r   r,   r   r   <module>r      s_     				 				       ) ) ) ) ) ) ) ) ) ) , , , , , , , , , , ! ! ! ! ! ! . . . . . .	  0  K# K K K K>c > > > >@ @ @ @ @(c (c ( ( ( (## ## # # # #   ! %+0 %F F F FR
E#tUH*D$E 
# 
 
 
 
( %'.#'	Z ZCuh67ZSMZ }Z 4.	Z
 
Z Z Z Z~ .#'	L LS$YLL }L 4.	L
 
L L L Lb %' .#'( (Cuh67(SM( ( }	(
 4.( 
( ( ( (VMc Md M M M M M Mr   