
    vj
                     l    d dl Z d dlZdZej        ZdefdZdefdZdedefdZ	d	edefd
Z
d Zd ZdS )    Nu   ＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､　、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·！？｡。decoded_strc                    |                      d          }g }d}t          |          D ]j\  }}t          |          r	|dk    r|}|dk    r2|                    d                    |||                              d}|                    |           k|dk    r0|                    d                    ||d                               d                    |                                          S )N  )split	enumerate_is_chinese_strappendjoinstrip)r   old_word_listnew_word_liststartiwords         n/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/modelscope/utils/chinese_utils.py"remove_space_between_chinese_charsr   
   s    %%c**MME]++ ' '44   	'{{{{$$RWW]57-C%D%DEEE  &&&&{{RWW]566%:;;<<<88M""((***    stringc                     d                     d                     d | D                                                                 S )Nr   r   c                 N    g | ]"}t          |          s	|t          v rd | d n|#S )r   )_is_chinese_charCHINESE_PUNCTUATION).0chars     r   
<listcomp>z'rebuild_chinese_str.<locals>.<listcomp>   sV         D!!	K%)-@%@%@ 	DFJ  r   )r   r   r   s    r   rebuild_chinese_strr      sQ    88BGG        uww	  r   returnc                 4    t          d | D                       S )Nc              3   X   K   | ]%}t          |          p|t          v p|t          v V  &d S )N)r   r   ENGLISH_PUNCTUATION)r   cps     r   	<genexpr>z"_is_chinese_str.<locals>.<genexpr>&   sY       7 7)+ 	 	%&9 9 	%$$7 7 7 7 7 7r   )allr   s    r   r
   r
   %   s0     7 7/57 7 7 7 7 7r   r$   c                     t          |           } | dk    r| dk    sT| dk    r| dk    sH| dk    r| dk    s<| dk    r| dk    s0| d	k    r| d
k    s$| dk    r| dk    s| dk    r| dk    s| dk    r| dk    rdS dS )z6Checks whether CP is the codepoint of a CJK character.i N  i  i 4  iM  i   iߦ i  i? i@ i i  i i   i  i  i TF)ord)r$   s    r   r   r   +   s    	RB	v",,B&LLR6\\g"--g"--g"--g"--fvg"--t5r   c                 b    ddl m} g d}d}| D ]}|dv r|d         }||z  } ||d          }|S )Nr   )convert)
u   零u   一u   二u   三u   四u   五u   六u   七u   八u   九r   
0123456789zzh-hans)zhconvr*   )textr*   chinese_numbernew_textxs        r   normalize_chinese_numberr1   :   si    [[[NH  q!AAwx++HOr   c                 "   |                                                      t          d                              t          d          } t	          j        dd|           } |                     d          } |                     d          d |         } | S )Nr   z\s{2,}
)lowerreplacer   r#   resubrstripr   )r-   	max_wordss     r   pre_chineser:   G   s    ::<< 3 #% %%,W-@#%F%F 	6 D
 ;;tD::c??:I:&DKr   )r6   r   r   punctuationr#   strr   r   boolr
   r   r1   r:    r   r   <module>r?      s    
			  N ( +C + + + +&    7C 7D 7 7 7 7     
 
 
    r   