
    j                     |    d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZ  G d d	ee          ZdS )
    N)AbbreviationReplacer)BetweenPunctuation)CommonStandard)replace_punctuation)Cleaner)TextRulec                   f    e Zd ZdZ G d de          Z G d de          Z G d de          ZdS )	Japanesejac                   ,     e Zd Zd fd	Zd Zd Z xZS )Japanese.CleanerNc                 L    t                                          ||           d S Nsuper__init__)selftextlangdoc_type	__class__s       ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/pysbd/lang/japanese.pyr   zJapanese.Cleaner.__init__   s#    GGT4(((((    c                 8    |                                   | j        S r   ) remove_newline_in_middle_of_wordr   r   s    r   cleanzJapanese.Cleaner.clean   s    113339r   c                 ~    t          dd          }t          | j                                      |          | _        d S )Nu   (?<=の)\n(?=\S) )r
   r	   r   apply)r   NewLineInMiddleOfWordRules     r   r   z1Japanese.Cleaner.remove_newline_in_middle_of_word   s5    (,-@"(E(E%TY--.GHHDIIIr   r   )__name__
__module____qualname__r   r   r   __classcell__r   s   @r   r   r      sb        	) 	) 	) 	) 	) 	)	 	 		I 	I 	I 	I 	I 	I 	Ir   r   c                       e Zd Zg ZdS )Japanese.AbbreviationReplacerN)r$   r%   r&   SENTENCE_STARTERS r   r   r   r*      s        r   r   c                   6     e Zd Z fdZd Zd Zd Zd Z xZS )Japanese.BetweenPunctuationc                 J    t                                          |           d S r   r   )r   r   r   s     r   r   z$Japanese.BetweenPunctuation.__init__    s!    GGT"""""r   c                 8    |                                   | j        S r   ))sub_punctuation_between_quotes_and_parensr   r   s    r   replacez#Japanese.BetweenPunctuation.replace#   s    ::<<<9r   c                 T    d}t          j        |t          | j                  | _        d S )Nu0   （(?=(?P<tmp>[^（）]+|\\{2}|\\.)*)(?P=tmp)）resubr   r   )r   BETWEEN_PARENS_JA_REGEXs     r   !sub_punctuation_between_parens_jaz=Japanese.BetweenPunctuation.sub_punctuation_between_parens_ja'   s*    &Y#68Ki! !DIIIr   c                 T    d}t          j        |t          | j                  | _        d S )Nu0   「(?=(?P<tmp>[^「」]+|\\{2}|\\.)*)(?P=tmp)」r4   )r   BETWEEN_QUOTE_JA_REGEXs     r   !sub_punctuation_between_quotes_jaz=Japanese.BetweenPunctuation.sub_punctuation_between_quotes_ja,   s*    %X"57Ji! !DIIIr   c                 V    |                                   |                                  d S r   )r8   r;   r   s    r   r1   zEJapanese.BetweenPunctuation.sub_punctuation_between_quotes_and_parens1   s,    224442244444r   )	r$   r%   r&   r   r2   r8   r;   r1   r'   r(   s   @r   r   r.      st        	# 	# 	# 	# 	#	 	 		! 	! 	!
	! 	! 	!
	5 	5 	5 	5 	5 	5 	5r   r   N)r$   r%   r&   iso_coder   r   r   r,   r   r   r   r   
   s        HI I I I I' I I I    3   5 5 5 5 5/ 5 5 5 5 5r   r   )r5   pysbd.abbreviation_replacerr   pysbd.between_punctuationr   pysbd.lang.commonr   r   pysbd.punctuation_replacerr   pysbd.cleanerr   pysbd.utilsr	   r
   r   r,   r   r   <module>rD      s    				 < < < < < < 8 8 8 8 8 8 . . . . . . . . : : : : : : ! ! ! ! ! ! " " " " " " " ")5 )5 )5 )5 )5vx )5 )5 )5 )5 )5r   