
    ujJ                     0   d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ddlmZ  e            ZddgZ ed           G d d                      Z  ed           G d d                      Z! ed           G d d                      Z" ed           G d d                      Z#	 d#deee$ef                  de#fdZ%dede&fdZ'd$dede$de&fd Z(d!ede$dee"         fd"Z)dS )%z<Contains utilities to manage the ModelScope cache directory.    N)	dataclass)Path)Dict	FrozenSetListLiteralOptionalSetUnion)CacheNotFoundCorruptedCacheException)ModelFileSystemCache)convert_readable_sizeformat_timesincetabulate)REPO_TYPE_DATASETREPO_TYPE_MODEL)get_modelscope_cache_dir)
get_loggerz	.DS_Storez
._____tempT)frozenc                       e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<   eed<   e	d	efd
            Z
e	d	efd            Ze	d	efd            ZdS )CachedFileInfoa	  Frozen data structure holding information about a single cached file.

    Args:
        file_name (`str`):
            Name of the file. Example: `config.json`.
        file_path (`Path`):
            Path of the file in the `snapshots` directory. The file path is a symlink
            referring to a blob in the `blobs` folder.
        blob_path (`Path`):
            Path of the blob file. This is equivalent to `file_path.resolve()`.
        size_on_disk (`int`):
            Size of the blob file in bytes.
        blob_last_accessed (`float`):
            Timestamp of the last time the blob file has been accessed (from any
            revision).
        blob_last_modified (`float`):
            Timestamp of the last time the blob file has been modified/created.
    	file_name	file_pathfile_revision_hash	blob_pathsize_on_diskblob_last_accessedblob_last_modifiedreturnc                 *    t          | j                  S )z
        (property) Timestamp of the last time the blob file has been accessed (from any
        revision), returned as a human-readable string.

        Example: "2 weeks ago".
        )r   r   selfs    l/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/hub/cache_manager.pyblob_last_accessed_strz%CachedFileInfo.blob_last_accessed_str4          7888    c                 *    t          | j                  S )z
        (property) Timestamp of the last time the blob file has been modified, returned
        as a human-readable string.

        Example: "2 weeks ago".
        )r   r   r"   s    r$   blob_last_modified_strz%CachedFileInfo.blob_last_modified_str>   r&   r'   c                 *    t          | j                  S )zi
        (property) Size of the blob file as a human-readable string.

        Example: "42.2K".
        r   r   r"   s    r$   size_on_disk_strzCachedFileInfo.size_on_disk_strH        %T%6777r'   N)__name__
__module____qualname____doc__str__annotations__r   intfloatpropertyr%   r)   r,    r'   r$   r   r      s          & NNNOOOOOO9 9 9 9 X9 9 9 9 9 X9 8# 8 8 8 X8 8 8r'   r   c                       e Zd ZU dZeed<   eed<   eed<   ee	         ed<   e
ed<   edefd            Zedefd	            Zedefd
            ZdS )CachedRevisionInfoa  Frozen data structure holding information about a revision.

    Args:
        commit_hash (`str`):
            Hash of the revision (unique).
            Example: `"9338f7b671827df886678df2bdd7cc7b4f36dffd"`.
        snapshot_path (`Path`):
            Path to the revision directory in the `snapshots` folder. It contains the
            exact tree structure as the repo on the Hub.
        files: (`FrozenSet[CachedFileInfo]`):
            Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
        size_on_disk (`int`):
            Sum of the blob file sizes that are symlink-ed by the revision.
        last_modified (`float`):
            Timestamp of the last time the revision has been created/modified.
    commit_hashsnapshot_pathr   fileslast_modifiedr    c                 *    t          | j                  S )z
        (property) Timestamp of the last time the revision has been modified, returned
        as a human-readable string.

        Example: "2 weeks ago".
        r   r=   r"   s    r$   last_modified_strz$CachedRevisionInfo.last_modified_strl          2333r'   c                 *    t          | j                  S zn
        (property) Sum of the blob file sizes as a human-readable string.

        Example: "42.2K".
        r+   r"   s    r$   r,   z#CachedRevisionInfo.size_on_disk_strv   r-   r'   c                 *    t          | j                  S )zC
        (property) Total number of files in the revision.
        )lenr<   r"   s    r$   nb_fileszCachedRevisionInfo.nb_files   s    
 4:r'   N)r.   r/   r0   r1   r2   r3   r   r4   r   r   r5   r6   r@   r,   rF   r7   r'   r$   r9   r9   R   s          " ^$$$$43 4 4 4 X4 8# 8 8 8 X8 #    X  r'   r9   c                       e Zd ZU dZeed<   eed<   eed<   eed<   eed<   ee	         ed<   e
ed<   e
ed	<   ed
efd            Zed
efd            Zed
efd            ZdS )CachedRepoInfoa  Frozen data structure holding information about a cached repository.

    Args:
        repo_id (`str`):
            Repo id of the repo on the Hub. Example: `"damo/bert-base-chinese"`.
        repo_type (`Literal["dataset", "model"]`):
            Type of the cached repo.
        repo_path (`Path`):
            Local path to the cached repo.
        size_on_disk (`int`):
            Sum of the blob file sizes in the cached repo.
        nb_files (`int`):
            Total number of blob files in the cached repo.
        revisions (`FrozenSet[CachedRevisionInfo]`):
            Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
        last_accessed (`float`):
            Timestamp of the last time a blob file of the repo has been accessed.
        last_modified (`float`):
            Timestamp of the last time a blob file of the repo has been modified/created.
    repo_id	repo_type	repo_pathr   rF   	revisionslast_accessedr=   r    c                 *    t          | j                  S )z
        (property) Last time a blob file of the repo has been accessed, returned as a
        human-readable string.

        Example: "2 weeks ago".
        )r   rM   r"   s    r$   last_accessed_strz CachedRepoInfo.last_accessed_str   rA   r'   c                 *    t          | j                  S )z
        (property) Last time a blob file of the repo has been modified, returned as a
        human-readable string.

        Example: "2 weeks ago".
        r?   r"   s    r$   r@   z CachedRepoInfo.last_modified_str   rA   r'   c                 *    t          | j                  S rC   r+   r"   s    r$   r,   zCachedRepoInfo.size_on_disk_str   r-   r'   N)r.   r/   r0   r1   r2   r3   r   r4   r   r9   r5   r6   rO   r@   r,   r7   r'   r$   rH   rH      s          * LLLNNNOOOMMM+,,,,43 4 4 4 X4 43 4 4 4 X4 8# 8 8 8 X8 8 8r'   rH   c                   r    e Zd ZU dZeed<   ee         ed<   ee	         ed<   e
defd            ZdefdZdS )	ModelScopeCacheInfoa  Frozen data structure holding information about the entire cache-system.

    This data structure is returned by [`scan_cache_dir`] and is immutable.

    Args:
        size_on_disk (`int`):
            Sum of all valid repo sizes in the cache-system.
        repos (`FrozenSet[CachedRepoInfo]`):
            Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
            cache-system while scanning.
        warnings (`List[CorruptedCacheException]`):
            List of [`~CorruptedCacheException`] that occurred while scanning the cache.
            Those exceptions are captured so that the scan can continue. Corrupted repos
            are skipped from the scan.
    r   reposwarningsr    c                 *    t          | j                  S )zp
        (property) Sum of all valid repo sizes in the cache-system as a human-readable
        string.
        r+   r"   s    r$   r,   z$ModelScopeCacheInfo.size_on_disk_str   s     %T%6777r'   c                     dt           dt          dt          t                   fdg d}fdt	          | j        d           D             }t          ||	          S )
a  Generate a detailed table from the [`ModelScopeCacheInfo`] object.

        Returns a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
        "repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "local_path".

        Example:
        ```py
        >>> from modelscope.hub.cache_manager import scan_cache_dir

        >>> ms_cache_info = scan_cache_dir()
        ModelScopeCacheInfo(...)

        >>> print(ms_cache_info.export_as_table())
        REPO ID                REPO TYPE REVISION   SIZE ON DISK NB FILES LAST_MODIFIED LOCAL PATH
        ---------------------- --------- ---------- ------------ -------- -------------  -------------------------------------------------------------
        damo/bert-base-chinese model     master             2.7M        5 1 week ago     ~/.cache/modelscope/hub/models--damo--bert-base-chinese/...
        damo/structured-bert   model     master             8.8K        1 1 week ago     ~/.cache/modelscope/hub/models--damo--structured-bert/...
        damo/t5-base           model     master           893.8M        4 7 months ago   ~/.cache/modelscope/hub/models--damo--t5-base/...
        ```

        Returns:
            `str`: The table as a string.
        reporevisionr    c           
          | j         | j        |j        d                    | j                  | j        | j        | j        t          | j	                  gS )zHFormat a single repo and revision into a list of strings for tabulation.z{:>12})
rI   rJ   r:   formatr,   rF   rO   r@   r2   rK   )rX   rY   s     r$   format_repo_revisionzAModelScopeCacheInfo.export_as_table.<locals>.format_repo_revision   sN     $ 566&&DN##	 	r'   )zREPO IDz	REPO TYPEREVISIONzSIZE ON DISKzNB FILESLAST_ACCESSEDLAST_MODIFIEDz
LOCAL PATHc                 \    g | ](}t          |j        d            D ]} ||          )S )c                     | j         S N)r:   )rY   s    r$   <lambda>z@ModelScopeCacheInfo.export_as_table.<locals>.<listcomp>.<lambda>  s	    X5I r'   key)sortedrL   ).0rX   rY   r\   s      r$   
<listcomp>z7ModelScopeCacheInfo.export_as_table.<locals>.<listcomp>  sj     
 
 
"$I$IK K K
 
  ! x00
 
 
 
r'   c                     | j         S rb   )rI   )rX   s    r$   rc   z5ModelScopeCacheInfo.export_as_table.<locals>.<lambda>  s    DL r'   rd   )rowsheaders)rH   r9   r   r2   rf   rT   r   )r#   column_headers
table_datar\   s      @r$   export_as_tablez#ModelScopeCacheInfo.export_as_table   s    2	~ 	+=	BFs)	 	 	 		
 	
 	

 
 
 
tz/H/HIII
 
 

 "
 
 
 	
r'   N)r.   r/   r0   r1   r4   r3   r   rH   r   r   r6   r2   r,   rn   r7   r'   r$   rS   rS      s            ^$$$$*++++8# 8 8 8 X8<
 <
 <
 <
 <
 <
 <
r'   rS   	cache_dirr    c                    | t                      } t          |                                                                           } |                                 st          d|  d|           |                                 rt          d|  d          t                      }g }| dz  }| dz  }|                                rW|	                                rCt          |t                    \  }}|                    |           |                    |           |                                rW|	                                rCt          |t                    \  }}|                    |           |                    |           t          | t          d	
          \  }	}
|                    |	           |                    |
           t          t!          |          t#          d |D                       |          S )a  Scan the entire ModelScope cache-system and return a [`ModelScopeCacheInfo`] structure.

    Use `scan_cache_dir` to programmatically scan your cache-system. The cache
    will be scanned repo by repo. If a repo is corrupted, a [`~CorruptedCacheException`]
    will be thrown internally but captured and returned in the [`~ModelScopeCacheInfo`]
    structure. Only valid repos get a proper report.

    ```py
    >>> from modelscope.hub.utils import scan_cache_dir

    >>> ms_cache_info = scan_cache_dir()
    ModelScopeCacheInfo(
        size_on_disk=3398085269,
        repos=frozenset({
            CachedRepoInfo(
                repo_id='damo/t5-small',
                repo_type='model',
                repo_path=PosixPath(...),
                size_on_disk=970726914,
                nb_files=11,
                revisions=frozenset({
                    CachedRevisionInfo(
                        commit_hash='master',
                        size_on_disk=970726339,
                        snapshot_path=PosixPath(...),
                        files=frozenset({
                            CachedFileInfo(
                                file_name='config.json',
                                size_on_disk=1197
                                file_path=PosixPath(...),
                                blob_path=PosixPath(...),
                            ),
                            CachedFileInfo(...),
                            ...
                        }),
                    ),
                    CachedRevisionInfo(...),
                    ...
                }),
            ),
            CachedRepoInfo(...),
            ...
        }),
        warnings=[
            CorruptedCacheException("Snapshots dir doesn't exist in cached repo: ..."),
            CorruptedCacheException(...),
            ...
        ],
    )
    ```

    Args:
        cache_dir (`str` or `Path`, `optional`):
            Cache directory to scan. Defaults to the default ModelScope cache directory.

    Raises:
        `CacheNotFound`: If the cache directory does not exist.
        `ValueError`: If the cache directory is a file, instead of a directory.

    Returns: a [`ModelScopeCacheInfo`] object.
    NzCache directory not found: zQ. Please use `cache_dir` argument or set `MODELSCOPE_CACHE` environment variable.)ro   z1Scan cache expects a directory but found a file: modelsdatasetsrJ   T)rJ   inplacec              3   $   K   | ]}|j         V  d S rb   )r   )rg   rX   s     r$   	<genexpr>z!scan_cache_dir.<locals>.<genexpr>  s%      ==t*======r'   )rT   r   rU   )r   r   
expanduserresolveexistsr   is_file
ValueErrorsetis_dir	_scan_dirr   updateextendr   rS   	frozensetsum)ro   rT   rU   	model_dirdataset_dirmodel_reposmodel_warningsdataset_reposdataset_warningsother_reposother_warningss              r$   scan_cache_dirr   #  sG   ~ ,..	Y**,,4466I 
 G)  G  G  G
 
 
 	

  
 ]	  ]  ]  ]
 
 	
 "%E.0H H$Ij(K  (i..00 (&/'2 '2 '2#^[!!!'''  * 2 2 4 4 **3#4+6 +6 +6'']###())) #,_d#< #< #<K	LLOON###==u=====   r'   dirc                     |                                  sdS |                                 sdS |                                 rdS | j        t          v rdS dS )z+Check if a directory is valid for scanning.FT)ry   r}   
is_symlinknameFILES_TO_IGNORE)r   s    r$   _is_valid_dirr     sZ    ::<< u::<< u
~~ u
x?""u4r'   FrJ   rt   c                    t                      }g }|                                 D ]}|r
|j        dv rt          |          s|                                D ]g}t          |          s	 t	          ||          }||                    |           <# t          $ r}|                    |           Y d}~`d}~ww xY w||fS )zWScan a directory for cached repos and return a set of [`~CachedRepoInfo`] and warnings.)rq   rr   hubrs   N)r|   iterdirr   r   _scan_cached_repoaddr   append)	r   rJ   rt   rT   rU   	owner_dirname_dirinfoes	            r$   r~   r~     s    EEEH[[]] # #	 	y~)FFFY'' 	!))++ 	# 	#H ** #(YGGG#IIdOOO* # # #""""""""#	# (?s   *(B
B<B77B<rK   c                 ^   |                                  st          d|            	 t          t          |                     }|j        }|j        }|                                                    dd          }|dk    rdS n$# t          $ r}t          d|           d}~ww xY wi t                      }|D ]}t          j                            | |d                   }	|                    dd	          }
t          j                            |	          s^t          |	          }|                                |<   |                    t%          t          j                            |d                   ||
|         j        ||         j        |         j        
                     d}|r8d|v r4|                    d          d                             d          d         }|rt1          fd|D                       }n|                                 j        }t3          |t5          |          t7          fd|D                       | |          }rWt1          d                                 D                       }t1          d                                 D                       }n"|                                 }|j        }|j        }t;          t=                    || |t5          |g          t7          d                                 D                       ||          S )zScan a single cache repo and return information about it.

    Any unexpected behavior will raise a [`~CorruptedCacheException`].
    zRepo path is not a directory: ___.unknownNz"Failed to load cache information: r   Revision )r   r   r   r   r   r   r   masterz	Revision:   ,r   c              3   <   K   | ]}|j                  j        V  d S rb   )r   st_mtimerg   file
blob_statss     r$   rv   z$_scan_cached_repo.<locals>.<genexpr>  sI       %D %D)- &0%?%H %D %D %D %D %D %Dr'   c              3   <   K   | ]}|j                  j        V  d S rb   )r   st_sizer   s     r$   rv   z$_scan_cached_repo.<locals>.<genexpr>  sA       8 8! $DN3; 8 8 8 8 8 8r'   )r:   r<   r   r;   r=   c              3   $   K   | ]}|j         V  d S rb   )st_atimerg   stats     r$   rv   z$_scan_cached_repo.<locals>.<genexpr>  $       O O4 O O O O O Or'   c              3   $   K   | ]}|j         V  d S rb   )r   r   s     r$   rv   z$_scan_cached_repo.<locals>.<genexpr>  r   r'   c              3   $   K   | ]}|j         V  d S rb   )r   r   s     r$   rv   z$_scan_cached_repo.<locals>.<genexpr>  s$      FF$FFFFFFr'   )rF   rI   rK   rJ   rL   r   rM   r=   )r}   r   r   r2   cached_filescached_model_revisionget_model_idreplace	Exceptionr|   ospathjoingetry   r   r   r   r   basenamer   r   r   splitmaxr9   r   r   valuesrH   rE   )rK   rJ   cacher   r   rI   r   cached_files_infocached_filer   r   r   revision_hashrevision_last_modifiedcached_revisionrepo_last_accessedrepo_last_modified
repo_statsr   s                     @r$   r   r     s     :%8Y88: : 	:	P$S^^44) % ;$$&&..uc::i4   P P P%&N1&N&NOOOP J $  GLLK,?@@	(__Z<<w~~i(( 	OO	 ) 0 0
9 	'**;v+>??##5'	2:##-i#8#A#-i#8#A  		 		 		 		 M ///177DDQGMM M  ;!$ %D %D %D %D1B%D %D %D "D "D "+!1!1!: )!)** 8 8 8 8%68 8 8 8 8,  O  1  O O:;L;L;N;N O O OOO  O O:;L;L;N;N O O OOO^^%%
'0'0 Z_-..FF*2C2C2E2EFFFFF((	 	 	 	s   AB 
B%B  B%rb   )F)*r1   r   dataclassesr   pathlibr   typingr   r   r   r   r	   r
   r   modelscope.hub.errorsr   r   modelscope.hub.utils.cachingr   modelscope.hub.utils.utilsr   r   r   modelscope.utils.constantr   r   modelscope.utils.file_utilsr   modelscope.utils.loggerr   loggerr   r   r9   rH   rS   r2   r   boolr   r~   r   r7   r'   r$   <module>r      s   B B 				 ! ! ! ! ! !       G G G G G G G G G G G G G G G G G G H H H H H H H H = = = = = =D D D D D D D D D D H H H H H H H H @ @ @ @ @ @ . . . . . .	 - $88 88 88 88 88 88 88 88v $1 1 1 1 1 1 1 1h $;8 ;8 ;8 ;8 ;8 ;8 ;8 ;8| $Y
 Y
 Y
 Y
 Y
 Y
 Y
 Y
z 15p pE#t),-p9Lp p p pf
t 
 
 
 
 
 4 C $    .\ \!$\)1.)A\ \ \ \ \ \r'   