
    ujɥ              '          d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZ d d	lmZ d d
lmZ d dl m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6m7Z7  e            Z8dZ9dddddddddddddeddddfde:dee:         dee:edf         deeee:f                  dee;         dee
         deee:ee:         f                  deee:ee:         f                  dee:         deeee:         e:f                  d eeee:         e:f                  d!ee<         d"e:d#ee:         d$ee;         d%eee&                  d&ee:         d'ee:         d(e:f&d)Z=eddddddddddd*ddfd+e:dee:         dee:edf         dee:         deeee:f                  dee;         dee
         deee:ee:         f                  deee:ee:         f                  deeee:         e:f                  d eeee:         e:f                  d$ee;         d!e<d&ee:         d'ee:         d(e:f d,Z>dedddddddddd*dddd-d"e:d#ee:         dee:         dee:edf         deeee:f                  dee;         dee
         deee:ee:         f                  deee:ee:         f                  dee:         deeee:         e:f                  d eeee:         e:f                  d!e<d%eee&                  d&ee:         d'ee:         f d.Z?dd/dddde9fd0Z@d1e:fd2ZAd3ee:ee:         f         fd4ZBd3ee:         fd5ZCddddd6d7eeD         deee:                  deee:                  deee:                  d eee:                  d(eeD         fd8ZEdd/dddde9fd9ZFe(ddfd:ZGdedddddd*dddfd7ee:         d;e1d<e:d"e:d=e#d>e:d?e:d#ee:         dee:         dee
         deee:ee:         f                  deee:ee:         f                  deeee:         e:f                  d eeee:         e:f                  d!e<d'ee:         d%eee&                  d@e;f$dAZHdS )B    N)ThreadPoolExecutor)nullcontext)	CookieJarPath)DictListOptionalTypeUnion)tqdm)DEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISIONINTRA_CLOUD_ACCELERATIONREPO_TYPE_DATASETREPO_TYPE_MODELREPO_TYPE_SUPPORT)get_modelscope_cache_dir)
get_logger)thread_executor   )HubApiModelScopeConfig)ProgressCallback)DEFAULT_MAX_WORKERS)FileDownloadErrorInvalidParameter)$create_temporary_directory_and_cachedownload_fileget_file_download_url)ModelFileSystemCache)extract_root_from_patternsget_model_masked_directorymodel_id_to_group_owner_name	strtoboolweak_file_lock   Fmodel_idrevision	cache_dir
user_agentlocal_files_onlycookiesignore_file_patternallow_file_pattern	local_dirallow_patternsignore_patternsmax_workersrepo_id	repo_typeenable_file_lockprogress_callbackstokenendpointreturnc                    |p| }|st          d          |t          vrt          d| dt                     |pt          }||t          k    rt          nt
          }|-t          t          j        	                    dd                    }|r||nt                      }t          j        t          j                            |d          d	           t          j                            |d|                    d
d                    }t          |          }nt!                      }|5  t#          |f||||||||||
|	||||dcddd           S # 1 swxY w Y   dS )a  Download all files of a repo.
    Downloads a whole snapshot of a repo's files at the specified revision. This
    is useful when you want all files from a repo, because you don't know which
    ones you will need a priori. All files are nested inside a folder in order
    to keep their actual filename relative to that folder.

    An alternative would be to just clone a repo but this would require that the
    user always has git and git-lfs installed, and properly configured.

    Args:
        repo_id (str): A user or an organization name and a repo name separated by a `/`.
        model_id (str): A user or an organization name and a model name separated by a `/`.
            if `repo_id` is provided, `model_id` will be ignored.
        repo_type (str, optional): The type of the repo, either 'model' or 'dataset'.
        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
            commit hash. NOTE: currently only branch and tag name is supported
        cache_dir (str, Path, optional): Path to the folder where cached files are stored, model will
            be save as cache_dir/model_id/THE_MODEL_FILES.
        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        cookies (CookieJar, optional): The cookie of the request, default None.
        ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be ignored in downloading, like exact file names or file extensions.
        allow_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be downloading, like exact file names or file extensions.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.
        allow_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
            For hugging-face compatibility.
        ignore_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
            For hugging-face compatibility.
        max_workers (`int`): The maximum number of workers to download files, default 8.
        enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`.
            If you find something wrong with file lock and have a problem modifying your code,
            change `MODELSCOPE_HUB_FILE_LOCK` env to `false`.
        progress_callbacks (`List[Type[ProgressCallback]]`, **optional**, default to `None`):
            progress callbacks to track the download progress.
        token (str, optional): The user token.
    Raises:
        ValueError: the value details.

    Returns:
        str: Local folder path (string) of repo snapshot

    Note:
        Raises the following errors:
        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
        if `use_auth_token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
        ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
        if some parameter value is invalid
    z*Please provide a valid model_id or repo_idInvalid repo type: , only support: NMODELSCOPE_HUB_FILE_LOCKtrue.lockTexist_ok/___)r5   r)   r*   r+   r,   r-   r.   r/   r0   r2   r1   r3   r7   r8   r9   )
ValueErrorr   r   r   r   r   r%   osenvirongetr   makedirspathjoinreplacer&   r   _snapshot_download)r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   system_cache	lock_filecontexts                        p/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/hub/snapshot_download.pysnapshot_downloadrR   )   s   X !G GEFFF)))P)PP=NPP
 
 	
 4!4K/8<M/M/M++Si$JNN5v>>@ @   $-$9yy?W @
 @

BGLLw77$GGGGGLLw!(e!<!<> >	 ++--	  !!- 31+)#1!                   s   )EEE   
dataset_idc                    |-t          t          j                            dd                    }|r||nt	                      }t          j        t          j                            |d          d           t          j                            |d|                     dd                    }t          |          }nt                      }|5  t          | t          |||||||||
|	|||	          cddd           S # 1 swxY w Y   dS )
a  Download raw files of a dataset.
    Downloads all files at the specified revision. This
    is useful when you want all files from a dataset, because you don't know which
    ones you will need a priori. All files are nested inside a folder in order
    to keep their actual filename relative to that folder.

    An alternative would be to just clone a dataset but this would require that the
    user always has git and git-lfs installed, and properly configured.

    Args:
        dataset_id (str): A user or an organization name and a dataset name separated by a `/`.
        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
            commit hash. NOTE: currently only branch and tag name is supported
        cache_dir (str, Path, optional): Path to the folder where cached files are stored, dataset will
            be save as cache_dir/dataset_id/THE_DATASET_FILES.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.
        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        cookies (CookieJar, optional): The cookie of the request, default None.
        ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be ignored in downloading, like exact file names or file extensions.
            Use regression is deprecated.
        allow_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be downloading, like exact file names or file extensions.
        allow_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
            For hugging-face compatibility.
        ignore_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
            For hugging-face compatibility.
        enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`.
            If you find something wrong with file lock and have a problem modifying your code,
            change `MODELSCOPE_HUB_FILE_LOCK` env to `false`.
        max_workers (`int`): The maximum number of workers to download files, default 8.
        token (str, optional): The user token.
    Raises:
        ValueError: the value details.

    Returns:
        str: Local folder path (string) of repo snapshot

    Note:
        Raises the following errors:
        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
        if `use_auth_token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
        ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
        if some parameter value is invalid
    Nr>   r?   r@   TrA   rC   rD   )r5   r)   r*   r+   r,   r-   r.   r/   r0   r2   r1   r3   r8   r9   )r%   rF   rG   rH   r   rI   rJ   rK   rL   r&   r   rM   r   )rT   r)   r*   r0   r+   r,   r-   r.   r/   r1   r2   r6   r3   r8   r9   rN   rO   rP   s                     rQ   dataset_snapshot_downloadrV      sj   H $JNN5v>>@ @   $-$9yy?W @
 @

BGLLw77$GGGGGLLw!+!3!3C!?!?A A	 ++--	  !'!- 31+)#                   s   #C==DD)r5   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r7   r8   r9   c                   |st           }|t          vrt          d|dt                    t          | |	||          \  }}||nt	                      }|rXt          |j                  dk    rt          d          t          	                    d|z             |
                                S t          j        |          t          t          j                              d}t           d	k    rXt#          j        d
          pt'                                                      }|r"t                              d|             ||d<   t'          |          }||                    | ||          }||                                }|t           k    r|	r t"          j                            |	          }nX|r+t#          j        j        |g|                     d          R  }n+t#          j        j        |dg|                     d          R  }t9          d| d|            |                    | |||          }|d         }dt"          j        v r	i |ddi}ni |ddi}|j        
|j        |d<   tA          tC          |          tC          |
                    }|"                    | ||d|dn|||          }|s@|>t          	                    d| d           |"                    | |d d|dn|||          }tG          ||||
|           }tI          |||| |d d ||||d|||!           d"| v rtK          ||           }t"          j        &                    |          rt                              d#           nt                              d$| d%           	 t#          j'        t"          j                            |          |d&           n# tP          $ rB t          	                    d'| d(t"          j                            |           d"           Y n5w xY wn/|tR          k    r#|	r t"          j                            |	          }nX|r+t#          j        j        |g|                     d          R  }n+t#          j        j        |d)g|                     d          R  }t9          d*|            tU          |           \  }}|ptV          }tA          tC          |          tC          |
                    }|rd|z   nd}t9          d+| d,           tY          || |||||||
|-
  
        }t[          |||| |||||||||.           |.                    |/           |
                                } | S )0Nr<   r=   )r0   r*   r5   r   zCannot find the requested files in the cached path and outgoing traffic has been disabled. To enable look-ups and downloads online, set 'local_files_only' to False.z6We can not confirm the cached file is for revision: %s)r+   )z
user-agentzsnapshot-identifierr?   INTRA_CLOUD_ACCELERATION_REGIONz6Intra-cloud acceleration enabled for downloading from zx-aliyun-region-id)r8   )r4   r5   r8   rC   modelszDownloading Model from z to directory: )r)   r-   r9   RevisionCI_TESTzsnapshot-ci-testTrueSnapshotcached_model_revision)r/   r1   TF)r(   r)   root	recursiveuse_cookiesheadersr9   zroot='zF' returned no model files, falling back to root=None for full listing.r/   r.   r1   r2   )r5   r)   r-   pre_filteredr3   r9   r7   .z3Target directory already exists, skipping creation.zCreating symbolic link [z].)target_is_directoryzFailed to create symbolic link z for datasetsz"Downloading Dataset to directory: zFetching file list (root: z)...)r8   	root_pathr/   r.   r1   r2   )cachetemporary_cache_dirr4   apidataset_name	namespacerb   r)   r-   r3   r9   r7   )revision_info)/r   r   r   r   r   lencached_filesrE   loggerwarningget_root_locationr   get_user_agentstruuiduuid4r   rF   getenvr   !_get_internal_acceleration_domaininfoget_endpoint_for_readget_cookiesrJ   abspathrK   splitprintget_valid_revision_detailrG   r^   r"   _normalize_patternsget_model_filesfilter_files_by_patterns_download_file_listsr#   existssymlinkOSErrorr   r$   r   _iter_dataset_file_pages_pipeline_download_datasetsave_model_version)!r4   r5   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r7   r8   r9   rj   ri   rN   rb   	region_id_api	directoryrevision_detailsnapshot_headerextracted_root
repo_filesmasked_directorygroup_or_ownernamerh   file_page_itercache_root_paths!                                    rQ   rM   rM     sD   &  $#	))) )		+<+< > ? ? 	? "F9	Y"P "P "P ) 599;S < <L qu!""a''<= = = 	O!" 	# 	# 	#&& 
 
 	
 +zDDD#&tz||#4#4
 
 $v--	;<< @88==??   :VWVV   1:,-E"""119E 2 C CH?&&((G'' >GOOI66		 >GLKc8J8JKKK		GLx >*1--*<*<> > >	N(NN9NNP P P"<<(Gh = P PO&z2H BJ&&"MW"M1CV0L"M"EW"EV0D"E*6/4/J  +- 8#67I#J#J2>BBD D DN -- !#%,_EE''! . # #J  '."<C^ C C CD D D "11$%")0g+% 2 ' '
 2#5$7- /1 1 1J !##!!'!#5   " g~~#=w$( $( 7>>),, KKMO O O O KK H9 H H HIII
GOO,<==%046 6 6 6 6 #   rirrbgoo^nNoNorrr     $ +++ >GOOI66		 >GLKc8J8JKKK		GLz >*1--*<*<> > >	ByBBCCC#?#H#H ND&B*BO 8#67I#J#J2>BBD D DN 1?Gn,,CI>y>>>???5##5$7- /
1 
1 
1N '$7!(!'!#57 7 7 7 	   ???1133s   !4O AP#"P#rC   c                 t   	
 dvrt          d d                              dd          \  }}                     ||          \  }t          	g           	
fd} ||          }|s/|dk    r)t                              d| d            |d          }|S )	aw  Fetch and filter dataset repo files with pagination and server-side prefix filtering.

    Applies per-page pattern filtering to minimize memory usage.
    Falls back to root_path='/' if the extracted prefix yields no results.

    Args:
        _api: HubApi instance.
        repo_id: Dataset repo identifier (owner/name).
        revision: Git revision.
        endpoint: API endpoint URL.
        token: Authentication token.
        root_path: Server-side directory prefix filter.
        allow_file_pattern: Include patterns for client-side filtering.
        ignore_file_pattern: Exclude patterns for client-side filtering.
        allow_patterns: Additional include patterns (HF-compatible).
        ignore_patterns: Additional exclude patterns (HF-compatible).
        page_size: Number of files per API page request.

    Returns:
        List of filtered file entry dicts.
    rC   Invalid repo_id: '', expected format 'owner/name'r   rl   rm   r9   r8   c                    d}g }	 	                      | d|
	  	        }n7# t          $ r*}t                              d| d|            Y d}~npd}~ww xY w|sner*t	          |	          }|                    |           n|                    d |D                        t          |          k     rn|dz  }|S )	zFFetch all pages with the given root_path, applying per-page filtering.r   T	r4   r)   rh   r`   page_number	page_sizer9   r8   dataset_hub_id#Error fetching dataset files (page ): Nrc   c              3   L   K   | ]}|                     d           dk    |V   dS )r   treeNrH   .0fs     rQ   	<genexpr>zAfetch_repo_files.<locals>._paginate_and_filter.<locals>.<genexpr>.  sC       "I "If0G0GA0G0G0G0G"I "I    )get_dataset_files	Exceptionrq   errorr   extendro   )effective_root_pathr   r   dataset_filesepage_filteredr   _hub_idr/   r1   r9   has_patternsr.   r2   r   r4   r)   r8   s         rQ   _paginate_and_filterz.fetch_repo_files.<locals>._paginate_and_filter
  si   
%	 $ 6 6#%1" +'%#* !7 	!, 	!,    M+MM!MMO O O
 !   I 8!'9(;#1$3!5 !5 !5 !!-0000 !! "I "I,"I "I "I I I I =!!I--1KK%	N s   ' 
A AAzroot_path='F' returned no results, falling back to root_path='/' for full listing.)r   r~   get_dataset_id_and_typeanyrq   rr   )r   r4   r)   r9   r8   rh   r/   r.   r1   r2   r   _owner_dataset_name_r   r   r   r   s   ````` `````     @@rQ   fetch_repo_filesr     sb   D 'IIIIK K 	K#MM#q11FM--"	 .  JGQ /  L
, , , , , , , , , , , , , , , ,^ &%i00J  /)s** JY J J J 	K 	K 	K))#..
r   patternc                 R    	 t          j        |            dS # t          $ r Y dS w xY w)NTF)recompileBaseException)r   s    rQ   _is_valid_regexr   D  s>    

7t   uus    
&&patternsc                 R    t          | t                    r| g} | d | D             } | S )Nc                 F    g | ]}|                     d           s|n|dz   S )rC   *)endswith)r   items     rQ   
<listcomp>z'_normalize_patterns.<locals>.<listcomp>P  s>     
 
 
?Cc**:DDs

 
 
r   )
isinstanceru   )r   s    rQ   r   r   L  sF    (C   :
 
GO
 
 
 Or   c                 d    | -g }| D ]&}t          |          r|                    |           '|S d S N)r   append)r   regex_patternsr   s      rQ   _get_valid_regex_patternr   V  sL     	, 	,Dt$$ ,%%d+++tr   rc   r   c                   t          |          }t          |          }t          |          }t          |          }t          |          }g }| D ]d         dk    r	 |rt          fd|D                       r.|rt          fd|D                       rL|rt          fd|D                       rj|rt          fd|D                       s|rt          fd|D                       sn4# t          $ r'}t                              d|z             Y d	}~d	}~ww xY w|                               |S )
a  Filter repo file entries by include/exclude patterns.

    Skips 'tree' type entries. Applies fnmatch and regex pattern matching.
    Returns only file entries that pass all filter criteria.

    Args:
        repo_files: List of file entry dicts with 'Type', 'Path', 'Name' keys.
        allow_file_pattern: Include patterns (fnmatch). Files must match at least one.
        ignore_file_pattern: Exclude patterns (fnmatch). Matching files are skipped.
        allow_patterns: Additional include patterns (HF-compatible).
        ignore_patterns: Additional exclude patterns (HF-compatible).

    Returns:
        List of file entries that pass all filters.
    r   r   c              3   N   K   | ]}t          j         d          |          V   dS r   Nfnmatchr   p	repo_files     rQ   r   z+filter_files_by_patterns.<locals>.<genexpr>  sH       '. '. OIf$5q99'. '. '. '. '. '.r   c              3   N   K   | ]}t          j         d          |          V   dS r   r   r   s     rQ   r   z+filter_files_by_patterns.<locals>.<genexpr>  sH       +2 +2 OIf$5q99+2 +2 +2 +2 +2 +2r   c              3   R   K   | ]!}t          j        |d                    duV  "dS NameNr   searchr   s     rQ   r   z+filter_files_by_patterns.<locals>.<genexpr>  sM       ,3 ,3 Ia6!2334?,3 ,3 ,3 ,3 ,3 ,3r   c              3   N   K   | ]}t          j         d          |          V   dS r   r   r   s     rQ   r   z+filter_files_by_patterns.<locals>.<genexpr>  sH       *- *- OIf$5q99*- *- *- *- *- *-r   c              3   N   K   | ]}t          j         d          |          V   dS r   r   r   s     rQ   r   z+filter_files_by_patterns.<locals>.<genexpr>  sH       .1 .1 OIf$5q99.1 .1 .1 .1 .1 .1r   zInvalid file pattern: %sN)r   r   r   r   rq   rr   r   )	r   r/   r.   r1   r2   ignore_regex_patternfilteredr   r   s	           @rQ   r   r   a  s+   . */::O(88N-.ABB,-?@@34GHHH  #  #	V&&	 3 '. '. '. '.,'. '. '. $. $.  " s +2 +2 +2 +20+2 +2 +2 (2 (2  #  ,3 ,3 ,3 ,31,3 ,3 ,3 )3 )3   c *- *- *- *-+*- *- *- '- '-  ! # .1 .1 .1 .1/.1 .1 .1 +1 +1   	 	 	NN59:::HHHH	 		""""Os0   !C8?C8C8;C8C88
D)D$$D)c              #      	
K   dvrt          d d                              dd          \  }}                     ||          \  }t          	g           	
fd}	 d} ||          D ]}d}|V  	|s+|dk    r%t	          d	| d
            |d          D ]}|V  t	                       dS # t	                       w xY w)a  Generator that yields filtered file pages from a dataset repo.

    Each yield is a non-empty list of file-entry dicts for one API page.
    Applies per-page pattern filtering to minimize memory usage.
    Falls back to root_path='/' if the extracted prefix yields no results.

    Args:
        _api: HubApi instance.
        repo_id: Dataset repo identifier (owner/name).
        revision: Git revision.
        endpoint: API endpoint URL.
        token: Authentication token.
        root_path: Server-side directory prefix filter.
        allow_file_pattern: Include patterns (fnmatch).
        ignore_file_pattern: Exclude patterns (fnmatch).
        allow_patterns: Additional include patterns (HF-compatible).
        ignore_patterns: Additional exclude patterns (HF-compatible).
        page_size: Number of files per API page request.

    Yields:
        List[dict]: Non-empty list of filtered file entries per page.
    rC   r   r   r   r   c              3     K   d}d}	 	                      | d|
	  	        }n8# t          $ r+}t                              d| d|            Y d}~dS d}~ww xY w|sdS rt	          |	          }nd	 |D             }|t          |          z  }|r|V  t          d
| d| ddd           t          |          k     rdS |dz  })z2Yield filtered file pages for the given root_path.r   r   Tr   r   r   Nrc   c                 D    g | ]}|                     d           dk    |S )r   r   r   r   s     rQ   r   zE_iter_dataset_file_pages.<locals>._paginate_pages.<locals>.<listcomp>  s3     ! ! !f0G0GA0G0G0Gr   z  Fetched z matching files (z
 pages)... )endflush)r   r   rq   r   r   ro   r   )r   r   total_foundr   r   r   r   r   r/   r1   r9   r   r.   r2   r   r4   r)   r8   s         rQ   _paginate_pagesz1_iter_dataset_file_pages.<locals>._paginate_pages  s     /	 $ 6 6#%1" +'%#* !7 	!, 	!,    M+MM!MMO O O
 !    8!'9(;#1$3!5 !5 !5! !,! ! ! 3}---K $####,{ , ,, , ,	    =!!I--1K_/	s   ) 
A AAFTz
  root_path='r   N)r   r~   r   r   r   )r   r4   r)   r9   r8   rh   r/   r.   r1   r2   r   r   r   r   r   yielded_anypager   r   s   ````` `````      @@rQ   r   r     s     F 'IIIIK K 	K $MM#q11FM--"	 .  JGQ /  L
4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4n#OI.. 	 	DKJJJJ  	yC// EI E E E F F F',,  



 	s   ?AC C"c                    d}d}g t          j                    fdt          ddd          5 t          |
          5 }| D ]}|D ]}|dz  }|_                                         |                    |          r|dz  }                    d           R|                    |d         ||||	          }|	                    t          ||||||	d|
	  	        }|                    |ffd	           	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   rVd D             }t                              t                     dd                    d |D                       z              ||z
  t                    z
  }t!          d| d| d| drdt                     dndz   dz              r"t#          t                     d| d          dS )a_  Pipeline consumer: download dataset files as pages are yielded.

    Consumes the page iterator from _iter_dataset_file_pages, submitting
    each file to a thread pool for concurrent download. Uses tqdm for
    real-time progress and thread-safe error collection.

    Args:
        file_page_iter: Iterator yielding List[dict] file pages.
        cache: ModelFileSystemCache instance for dedup.
        temporary_cache_dir: Temp staging directory.
        repo_id: Dataset repo identifier.
        api: HubApi instance.
        dataset_name: Dataset name component.
        namespace: Owner/namespace component.
        headers: HTTP request headers.
        revision: Git revision.
        cookies: HTTP cookies.
        max_workers: Thread pool concurrency.
        endpoint: API endpoint URL.
        progress_callbacks: Optional progress callback list.
    r   c                    	 |                                   n|# t          $ ro}5                      ||f           ddd           n# 1 swxY w Y   t                              d|                    dd           d|            Y d}~nd}~ww xY w                    d           dS #                     d           w xY w)z8Done callback: update progress bar and collect failures.NzDownload failed for r   ?z: r   )resultr   r   rq   debugrH   update)futurer   excfailed_itemslockpbars      rQ   _on_donez,_pipeline_download_dataset.<locals>._on_doneH  s/   	MMOOOO 	L 	L 	L 6 6##Y$45556 6 6 6 6 6 6 6 6 6 6 6 6 6 6LLJy}}VS'A'AJJSJJL L L L L L L L	L KKNNNNNDKKNNNNsM    B+ 
BBABA	BA	7BB+ BB+ +Cz filesF)totalunitdisable)r3   r   r   	file_namerl   rm   r)   r9   disable_tqdmr7   c                      | |          S r    )r   rfr   s     rQ   <lambda>z,_pipeline_download_dataset.<locals>.<lambda>x  s    B r   Nc                     g | ]?\  }}t          |t                    r|                    d d          nt          |          @S )r   r   )r   dictrH   ru   r   r   r   s      rQ   r   z._pipeline_download_dataset.<locals>.<listcomp>~  sT     
 
 
a &0d%;%;JDHHVS!!!T
 
 
r    file(s) failed to download:

c              3       K   | ]	}d | V  
dS z  - Nr   r   r   s     rQ   r   z-_pipeline_download_dataset.<locals>.<genexpr>  s(       B B B B B B B Br   zDownload complete: z files found, z	 cached, z downloadedz, z failedr   re   # file(s) failed to download out of )	threadingLockr   r   r   refreshr   r   get_dataset_file_urlsubmitr   add_done_callbackrq   r   ro   rK   r   r   )r   ri   rj   r4   rk   rl   rm   rb   r)   r-   r3   r9   r7   r   total_cachedexecutor
page_filesr   urlr   failed_paths
downloadedr   r   r   r   s                         @@@@rQ   r   r     s   H KLL>D
 
 
 
 
 
 
 
AHe	4	4	4 #AK888 "	AH, !A !A
!+  A  AI1$K!,DJLLNNN ||I.. !$)A  22"+F"3%1"+!)!) 3 + +C &__%!+%*+= - 
 
F ,,%.?????A A A A? A!A"	A "	A "	A "	A "	A "	A "	A "	A "	A "	A "	A "	A "	A "	A "	A#A #A #A #A #A #A #A #A #A #A #A #A #A #A #AP  D
 
'
 
 
 	L))IIIyy B B\ B B BBBC 	D 	D 	D |+c,.?.??J	 < < << <$.< < <0<D,#l##,,,,"FHKL M M M  <        	 s6   D$B8DD$D	D$D	D$$D(+D(ri   rj   rk   r   r   rd   c                   	
 |rwg }| D ]p                               rDt          j                            d                   }t                              d| d           [|                               qnt          |          }t          |          }t          |          }t          |          }t          |          }g }| D ]Pd         dk    r	 |rt          fd|D                       r/|rt          fd|D                       rM|rt          fd|D                       rk||rt          fd	|D                       s||rt          fd
|D                       s                               rEt          j                            d                   }t                              d| d           	 |                               # t          $ r(}t                              d|z             Y d }~Jd }~ww xY wt          |dd          
	fd            }t          |          dk    rt                              dt          |           d            ||          }g }t          |t                     rpt          |          dk    r]|\  }}|rVd |D             }t                              t          |           dd                    d |D                       z              t                              dt          |           d d           |r1t'          t          |           dt          |           d          d S d S )Nr   zFile z8 already in cache with identical hash, skip downloading!r   r   c                 F    g | ]}t          j         d          |          S r   r   r   r   r   s     rQ   r   z(_download_file_lists.<locals>.<listcomp>  s:     , , ,#  	&(97CC, , ,r   c                 F    g | ]}t          j         d          |          S r   r   r  s     rQ   r   z(_download_file_lists.<locals>.<listcomp>  s:     0 0 0#  	&(97CC0 0 0r   c                 J    g | ]}t          j        |d                    du S r   r   r  s     rQ   r   z(_download_file_lists.<locals>.<listcomp>  s?     1 1 1# 	'9V+<==TI1 1 1r   c              3   N   K   | ]}t          j         d          |          V   dS r   r   r  s     rQ   r   z'_download_file_lists.<locals>.<genexpr>  sH       ; ; ' $OIf,=wGG; ; ; ; ; ;r   c              3   N   K   | ]}t          j         d          |          V   dS r   r   r  s     rQ   r   z'_download_file_lists.<locals>.<genexpr>  sH       ? ? ' $OIf,=wGG? ? ? ? ? ?r   z The file pattern is invalid : %sFT)r3   r   fault_tolerantc           
         t           k    rt          
| d                   }nFt          k    r!                    | d                   }nt	          d dt
                     t          || d	           d S )Nr   )r(   	file_pathr)   r9   r   r<   z, supported types: Fr   )r   r    r   r  r   r   r   )r   r  rk   ri   r-   r9   r   rb   r   r7   r4   r5   r)   rj   s     rQ   _download_single_filez3_download_file_lists.<locals>._download_single_file  s     ''' #F+!!	# # #CC
 +++**#F+!(!! + # #CC #WiWWDUWW   	1		
 		
 		
 		
 		
 		
r   r   zGot z files, start to download ...   c                 l    g | ]1\  }}t          |t                    r|d          nt          |          2S r   )r   r  ru   r  s      rQ   r   z(_download_file_lists.<locals>.<listcomp>  sI          a %/tT$:$:IDLLD		     r   r  r  c              3       K   | ]	}d | V  
dS r  r   r	  s     rQ   r   z'_download_file_lists.<locals>.<genexpr>  s(      AAq
q

AAAAAAr   zFinish downloading z files for repo ''r
  re   )r   rF   rJ   basenamerq   r   r   r   r   r   r   rr   r   ro   rz   r   tupler   rK   r   )r   ri   rj   r4   rk   r   r   rb   r5   r)   r-   r.   r/   r1   r2   r3   r9   r7   rd   filtered_repo_filesr   r   r   r!  download_resultr   r   r  r   s    ``````````     ``          @rQ   r   r     sK   *  A6 # 	2 	2I||I&& G,,Yv->??	_I___   &&y1111	2 .o>>,^<<12EFF01CDD78KLL # ,	6 ,	6I F**)6" s , , , ,'6, , , ( (  & 3 0 0 0 0':0 0 0 , ,  ' C 1 1 1 1';1 1 1 - -  !-.- ; ; ; ;+9; ; ; ; ; ! !%16H1 ? ? ? ?+=? ? ? ? ? ! !<<	**  " 0 061B C CILLc	ccc    $**95555  G G GAAEFFFFFFFFG
 eDJ J J
 
 
 
 
 
 
 
 
 
 
 
 
 
 
J J
< !##J3*++JJJ	L 	L 	L//0CDD ou-- 		C#o2F2F!2K2K-OA| C   #/      <((HHHiiAALAAAAABC C C 	W#&9":":WWWWWW	
 	
 	
  	0#|$$ / /*++/ / /0 0 0/ $#,	0 	0s7   'G3G3#G3G3!G3AG33
H%=H  H%)Ir   rF   r   r  rv   concurrent.futuresr   
contextlibr   http.cookiejarr   pathlibr   typingr   r	   r
   r   r   	tqdm.autor   modelscope.utils.constantr   r   r   r   r   r   modelscope.utils.file_utilsr   modelscope.utils.loggerr   modelscope.utils.thread_utilsr   rk   r   r   callbackr   	constantsr   errorsr   r   file_downloadr   r   r    utils.cachingr!   utils.utilsr"   r#   r$   r%   r&   rq   DEFAULT_DATASET_PAGE_SIZEru   boolintrR   rV   rM   r   r   r   r   r  r   r   r   r   r   r   rQ   <module>r=     s	    				 				      1 1 1 1 1 1 " " " " " " $ $ $ $ $ $       4 4 4 4 4 4 4 4 4 4 4 4 4 4      : : : : : : : : : : : : : : : :
 A @ @ @ @ @ . . . . . . 9 9 9 9 9 9 ) ) ) ) ) ) ) ) & & & & & & * * * * * * 7 7 7 7 7 7 7 7B B B B B B B B B B / / / / / /* * * * * * * * * * * * * *
 
  "(,-1',#';?:>#6:7;!%.'+7;"%x xxsmx S$_%x tSy)*	x
 tnx i x "%T#Y"78x !sDI~!67x }x U49c>23x eDIsN34x #x x }x tnx  T"234!x" C=#x$ sm%x& 	'x x x xz 7(,#-1',#';?:>6:7;'+"a aasma S$_%a }	a
 tSy)*a tna i a "%T#Y"78a !sDI~!67a U49c>23a eDIsN34a tna a C=a sma  	!a a a aN  $4(,-1',#';?:>#6:7;7;"#N N NN }N sm	N
 S$_%N tSy)*N tnN i N "%T#Y"78N !sDI~!67N }N U49c>23N eDIsN34N N T"234N  C=!N" sm#N N N Nl 'h h h hVS    %T#Y"7    tCy     /3/3*.+/@ @ @T
@ !c+@ "$s),	@
 T#Y'@ d3i(@ 
$Z@ @ @ @P 'x x x xL $o o o ov  $4#';?:>6:7;"7;'Q0 Q0S	Q0Q0 Q0 	Q0
 
Q0 Q0 Q0 }Q0 smQ0 i Q0 "%T#Y"78Q0 !sDI~!67Q0 U49c>23Q0 eDIsN34Q0  !Q0" sm#Q0$ T"234%Q0& 'Q0 Q0 Q0 Q0 Q0 Q0r   