
    uj6                        d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlZddlZddlmZ  e            ZdZdZ G d	 d
          Z G d dee          ZdedefdZ G d d          Z G d d          ZdS )zUnified file-level upload tracker.

Merges hash cache and upload progress into a single .ms_upload_cache file
with per-file status tracking, eliminating batch-granularity issues.
    N)Enum)Path)DictListOptionalTupleUnion)
get_loggerz.ms_upload_progress   c                       e Zd ZdZdZdZdZdS )
FileStatusz7Single-character status codes for compact JSON storage.ucfN)__name__
__module____qualname____doc__UPLOADED	COMMITTEDFAILED     m/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/hub/upload_tracker.pyr   r      s#        AAHIFFFr   r   c                   J    e Zd ZdZdZdZdZdZdZdZ	dZ
ed	efd
            ZdS )ErrorCategoryz:Classification of upload/commit errors for retry strategy.transient_networktransient_server	throttledauth_failed	not_foundfile_invalidunknownreturnc                 L    | t           j        t           j        t           j        fvS N)r   AUTH_FAILED	NOT_FOUNDFILE_INVALIDselfs    r   is_retryablezErrorCategory.is_retryable.   s&    %#&
 
 	
r   N)r   r   r   r   TRANSIENT_NETWORKTRANSIENT_SERVER	THROTTLEDr'   r(   r)   UNKNOWNpropertyboolr,   r   r   r   r   r   $   sd        DD+)IKI!LG
d 
 
 
 X
 
 
r   r   errorr$   c                    t          |                                           }t          | t                    rt          j        S t          | t                    rt          j        S t          | t          t          f          rt          j	        S t          | t          j        j                  rlt          | dd          }|M|j        }|dk    rt          j        S |dv rt          j        S |dk    rt          j        S |dk    rt          j        S t          j        S t          | t(                    rad|v rt          j        S d|v sd	|v rt          j        S d
|v rt          j        S t+          j        d|          rt          j        S t          j        S t          | t.          t0          f          r4d|v sd|v rt          j        S d|v sd|v rt          j        S t          j	        S d|v sd|v rt          j	        S d|v rt          j	        S t          j        S )zClassify an exception into a retry category.

    Returns an ErrorCategory that indicates whether the error is transient
    (retryable) or permanent, and what kind of failure occurred.
    responseNi  )i  i  i  i  429401403404z#(?:http[/\s]*)?5\d{2}|server.*errorzsize changedzno such file
permissionzaccess deniedtimeoutz	timed out
connection)strlower
isinstanceFileNotFoundErrorr   r)   PermissionErrorConnectionErrorTimeoutErrorr-   requests
exceptions	HTTPErrorgetattrstatus_coder/   r'   r(   r.   r0   
ValueErrorresearchIOErrorOSError)r3   	error_strrespstatuss       r   classify_errorrQ   7   s    E

  ""I %*++ *))%)) *)) %/<899 /.. %,677 %uj$//%F}}$..##$00}}$..}}$55$$ %$$ 	%I **I)!3!3 ,,I **9;YGG 	2 11$$ %'7+,, /Y&&.I*E*E --9$$9(D(D --.. I	!9!9..y  ..  r   c            	       P   e Zd ZdZdeeef         defdZedede	de
defd	            Zdede	de
dee         fd
Zdede	de
defdZdede	de
defdZdede	de
dee         fdZdede	de
fdZdeeee	e
f                  fdZ	 ddede	de
defdZd Zd Zd ZdefdZd ZdS )UploadTrackera  Unified file-level upload tracker.

    Replaces both UploadHashCache (.ms_upload_cache) and
    UploadProgress (.ms_upload_progress) with a single file that tracks
    per-file hash and upload status.

    File format (version 3):
        {
            "version": 3,
            "repo_id": "user/repo",
            "files": {
                "path|mtime|size": {"hash": "...", "size": 123, "status": "c"},
                ...
            }
        }

    Status values:
        "c" = committed (blob uploaded AND committed to repo)
        "u" = uploaded (blob uploaded, NOT yet committed)
        "f" = failed
        (no status field) = hash cached only, upload not attempted

    Thread safety: all mutations are protected by a lock.
    Persistence: atomic write via temp file + rename.
    
cache_pathrepo_idc                     t          |          | _        || _        i | _        t	          j                    | _        d| _        |                                  d S NF)	r   _path_repo_id_files	threadingLock_lock_dirty_load)r+   rT   rU   s      r   __init__zUploadTracker.__init__   sF    *%%
')^%%


r   rel_pathmtimesizer$   c                     |  d| d| S )zKBuild cache key from file metadata (same format as legacy UploadHashCache).|r   )ra   rb   rc   s      r   	_make_keyzUploadTracker._make_key   s      ++U++T+++r   c                     |                      |||          }| j        5  | j                            |          }ddd           n# 1 swxY w Y   |d|vrdS ||d         |d         dS )zGet cached hash info for a file.

        Returns dict compatible with legacy UploadHashCache.get():
            {'file_path_or_obj': rel_path, 'file_hash': ..., 'file_size': ...}
        or None if not cached or file has changed.
        Nhashrc   )file_path_or_obj	file_hash	file_sizerf   r]   rZ   getr+   ra   rb   rc   keyentrys         r   get_hashzUploadTracker.get_hash   s     nnXud33Z 	) 	)KOOC((E	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	)=F%//4 (vv
 
 	
   AA
A
	hash_infoc                     |                      |||          }| j        5  | j                            |i           }|d         |d<   |d         |d<   || j        |<   d| _        ddd           dS # 1 swxY w Y   dS )zStore computed hash info for a file.

        Args:
            hash_info: dict with 'file_hash' and 'file_size' keys.
        rj   rh   rk   rc   TN)rf   r]   rZ   rm   r^   )r+   ra   rb   rc   rs   ro   rp   s          r   put_hashzUploadTracker.put_hash   s     nnXud33Z 	 	KOOC,,E%k2E&M%k2E&M$DKDK	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   AA//A36A3c                     |                      |||          }| j        5  | j                            |          }ddd           n# 1 swxY w Y   |duo"|                    d          t          j        k    S )z<Check if a file is committed (with matching mtime and size).NrP   )rf   r]   rZ   rm   r   r   rn   s         r   is_committedzUploadTracker.is_committed   s    nnXud33Z 	) 	)KOOC((E	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	)D  .UYY& &#-&. 	.rr   c                     |                      |||          }| j        5  | j                            |          }ddd           n# 1 swxY w Y   |r|                    d          ndS )z(Get file status, or None if not tracked.NrP   rl   rn   s         r   
get_statuszUploadTracker.get_status   s     nnXud33Z 	) 	)KOOC((E	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	)&+5uyy"""5rr   c                     |                      |||          }| j        5  || j        v r!t          j        | j        |         d<   d| _        ddd           dS # 1 swxY w Y   dS )z1Mark a file as blob-uploaded (not yet committed).rP   TN)rf   r]   rZ   r   r   r^   )r+   ra   rb   rc   ro   s        r   mark_uploadedzUploadTracker.mark_uploaded   s    nnXud33Z 	# 	#dk!!-7-@C *"	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	#s   +AAA	file_keysc                     | j         5  |D ]@\  }}}|                     |||          }|| j        v rt          j        | j        |         d<   Ad| _        ddd           dS # 1 swxY w Y   dS )zMark multiple files as committed after a successful commit.

        Args:
            file_keys: list of (rel_path, mtime, size) tuples.
        rP   TN)r]   rf   rZ   r   r   r^   )r+   r|   ra   rb   rc   ro   s         r   mark_committed_batchz"UploadTracker.mark_committed_batch   s     Z 	 	)2 F F%%nnXud;;$+%%1;1EDK$X.DK	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   AA  A$'A$ 
error_typec                 ,   |                      |||          }| j        5  || j        v r-t          j        | j        |         d<   |r|| j        |         d<   ndt          j        i}|r||d<   || j        |<   d| _        ddd           dS # 1 swxY w Y   dS )z9Mark a file as failed with optional error classification.rP   r   TN)rf   r]   rZ   r   r   r^   )r+   ra   rb   rc   r   ro   rp   s          r   mark_failedzUploadTracker.mark_failed   s     nnXud33Z 
	 
	dk!!-7->C * @5?DK$\2!:#45 5*4E,'#(C DK
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	s   AB		BBc                    | j         5  | j        s	 ddd           dS t          | j        d | j                                        D             d}d| _        ddd           n# 1 swxY w Y   	 | j        j                            dd           t          j
        t          | j        j                  d          \  }}	 t          j        |d	d
          5 }t          j        ||d           ddd           n# 1 swxY w Y   t          j        |t          | j                             dS # t"          $ r t          j        |            w xY w# t&          $ r(}t(                              d|            Y d}~dS d}~ww xY w)z&Atomically save tracker state to disk.Nc                 4    i | ]\  }}|t          |          S r   )dict).0kvs      r   
<dictcomp>z&UploadTracker.save.<locals>.<dictcomp>  s4     ; ; ;"a T!WW ; ; ;r   )versionrU   filesFT)parentsexist_okz.tmp)dirsuffixwzutf-8)encoding)ensure_asciizFailed to save upload tracker: )r]   r^   _TRACKER_VERSIONrY   rZ   itemsrX   parentmkdirtempfilemkstempr=   osfdopenjsondumpreplaceBaseExceptionunlink	Exceptionloggerwarning)r+   datafdtmp_pathr   es         r   savezUploadTracker.save   sS   Z 		  		 ; 		  		  		  		  		  		  		  		  ,=; ;&*k&7&7&9&9; ; ; D  DK		  		  		  		  		  		  		  		  		  		  		  		  		  		  		 	BJ##D4#@@@#+
)**6; ; ;LBYr3999 ;QIdAE::::; ; ; ; ; ; ; ; ; ; ; ; ; ; ;
8S__55555    	(###  	B 	B 	BNN@Q@@AAAAAAAAA	Bse   	A"8A""A&)A&.AE  D/ C;/D/ ;C??D/ C?*D/ / EE 
FE??Fc                 (   	 | j                             d           n4# t          $ r'}t                              d|            Y d}~nd}~ww xY w| j        5  | j                                         d| _        ddd           dS # 1 swxY w Y   dS )zDelete the tracker file.T)
missing_okzFailed to delete tracker file: NF)	rX   r   rM   r   r   r]   rZ   clearr^   )r+   r   s     r   r   zUploadTracker.clear  s    	BJ.... 	B 	B 	BNN@Q@@AAAAAAAA	BZ 	  	 KDK	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	  	 s&    
AA

A!BBBc                    | j                                         s|                                  dS 	 t          | j         d          5 }t	          j        |          }ddd           n# 1 swxY w Y   nA# t          j        t          f$ r(}t          	                    d|            Y d}~dS d}~ww xY w|
                    d          }||                     |           dS |t          k     r&t          	                    d| dt           d           |
                    dd	          }|r3|| j        k    r(t          	                    d
| d| j         d           dS |
                    di           | _        t          d | j                                        D                       }|dk    r3t                              dt%          | j                   d| d           |                                  dS )z8Load tracker state from disk, handling format migration.Nrz/Failed to load upload tracker, starting fresh: r   zUpload tracker version z is older than current z%. Data will be migrated on next save.rU   r   z"Tracker repo_id mismatch (cached: z, current: z), ignoring stale tracker.r   c              3   `   K   | ])}|                     d           t          j        k    %dV  *dS )rP      N)rm   r   r   )r   r   s     r   	<genexpr>z&UploadTracker._load.<locals>.<genexpr>D  sI       J JA!"xJ4H!H!H  !H!H!H!HJ Jr   r   zUpload tracker loaded: z
 entries, z committed.)rX   exists_check_legacy_progressopenr   loadJSONDecodeErrorrM   r   r   rm   _migrate_v1r   rY   rZ   sumvaluesinfolen)r+   r   r   r   r   stored_repocommitted_counts          r   r_   zUploadTracker._load"  s   z  "" 	'')))F	dj#&& $!y||$ $ $ $ $ $ $ $ $ $ $ $ $ $ $$g. 	 	 	NNE!EEG G GFFFFF	
 ((9%%?T"""F%%%NNK' K K#K K KL L L
 hhy"-- 	;$-77NNF[ F F MF F FG G G Fhhw++ J J););)=)= J J J J JQKK 8#dk2B2B 8 8*8 8 8 9 9 9 	##%%%%%s;   A3 A'A3 'A++A3 .A+/A3 3B1	B,,B1r   c                 .   i }|                                 D ]@\  }}t          |t                    r&d|v r"|d         |                    dd          d||<   A|| _        d| _        |r-t                              dt          |           d           dS dS )	ad  Migrate from legacy hash-only format (UploadHashCache v1).

        Old format: {"rel_path|mtime|size": {"file_hash": "...", "file_size": 123}}
        New format: {"rel_path|mtime|size": {"hash": "...", "size": 123}}

        Status is NOT set during migration -- cached hashes do not imply
        the file was committed (conservative approach).
        rj   rk   r   )rh   rc   Tz	Migrated z' entries from legacy hash cache format.N)	r   r?   r   rm   rZ   r^   r   r   r   )r+   r   migratedro   values        r   r   zUploadTracker._migrate_v1L  s     **,, 	 	JC%&& ;%+?+?!+.!IIk155! !  	KKRCMMRRR    	 	r   c                     | j         j        t          z  }|                                r t                              d| d           dS dS )z/Warn if legacy .ms_upload_progress file exists.z&Legacy upload progress file detected: z8. This file is no longer used. You may delete it safely.N)rX   r   _LEGACY_PROGRESS_FILEr   r   r   )r+   legacy_paths     r   r   z$UploadTracker._check_legacy_progressc  sr    j'*?? 	KNNJ J J JK K K K K	K 	Kr   Nr   )r   r   r   r   r	   r=   r   r`   staticmethodfloatintrf   r   r   rq   ru   r2   rw   ry   r{   r   r   r~   r   r   r   r_   r   r   r   r   r   rS   rS   u   sH        45d#3 c     ,C , ,S ,S , , , \,
 
U 

'~
 
 
 
& U #     $.S . .c .d . . . .63 6u 66!)#6 6 6 6#c #% #s # # # #d5eS3I.J    " ')	 !   !$	   *B B B4     (& (& (&T    .K K K K Kr   rS   c            	           e Zd ZdZdedededdfdZdedededefd	Z	dededede
fd
ZdededefdZdededefdZd Z	 ddedededefdZd Zd ZdS )NullTrackerzNo-op tracker for when caching is disabled.

    Implements the same interface as UploadTracker but does nothing,
    eliminating 'if tracker is not None' checks throughout api.py.
    ra   rb   rc   r$   Nc                     d S r&   r   r+   ra   rb   rc   s       r   rq   zNullTracker.get_hashs      tr   rs   c                     d S r&   r   )r+   ra   rb   rc   rs   s        r   ru   zNullTracker.put_hashv  s    r   c                     dS rW   r   r   s       r   rw   zNullTracker.is_committedz  s    ur   c                     d S r&   r   r   s       r   ry   zNullTracker.get_status}  r   r   c                     d S r&   r   r   s       r   r{   zNullTracker.mark_uploaded      r   c                     d S r&   r   )r+   r|   s     r   r~   z NullTracker.mark_committed_batch  r   r   r   r   c                     d S r&   r   )r+   ra   rb   rc   r   s        r   r   zNullTracker.mark_failed  s	    
 	r   c                     d S r&   r   r*   s    r   r   zNullTracker.save  r   r   c                     d S r&   r   r*   s    r   r   zNullTracker.clear  r   r   r   )r   r   r   r   r=   r   r   rq   r   ru   r2   rw   ry   r{   r~   r   r   r   r   r   r   r   r   l  sp         U # $     U #     S  c d    3 u C    c % s       ')	 !   !$	         r   r   )r   r   rJ   r   r[   enumr   pathlibr   typingr   r   r   r   r	   r   rD   modelscope.utils.loggerr
   r   r   r   r   r=   r   r   rQ   rS   r   r   r   r   <module>r      s   
 
			 				                  5 5 5 5 5 5 5 5 5 5 5 5 5 5   . . . . . .	 .          
 
 
 
 
C 
 
 
&;!) ;! ;! ;! ;! ;!|tK tK tK tK tK tK tK tKn% % % % % % % % % %r   