
    j                     B    d dl Z d dlmZ d Zd Z G d de          ZdS )    N)Textc                     d| z   } t          j        d                    |                                          d|           } | dd          } | S )N z(?<=\s{abbr})\.(?=(\s\d|\s+\())abbr   ∯   resubformatstriptxtr   s     e/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/pysbd/abbreviation_replacer.pyreplace_pre_number_abbrr      sK    
)C
&3::

:MMuVY
Z
ZC
abb'CJ    c                     d| z   } t          j        d                    |                                          d|           } | dd          } | S )Nr   z(?<=\s{abbr})\.(?=(\s|:\d+))r   r   r	   r
   r   s     r   replace_prepositive_abbrr      sK    
)C
&077TZZ\\7JJESV
W
WC
abb'CJr   c                   8    e Zd Zd Zd Zd Zd Zd Zd Zd Z	dS )	AbbreviationReplacerc                 "    || _         || _        d S )N)textlang)selfr   r   s      r   __init__zAbbreviationReplacer.__init__   s    				r   c                     t          | j                  j        | j        j        | j        j        g| j        j        j        R  | _        d}| j                            d          D ]}|| 	                    |          z  }|| _        | 
                                  t          | j                  j        | j        j        j         | _        |                                 | _        | j        S )N T)r   r   applyr   PossessiveAbbreviationRuleKommanditgesellschaftRuleSingleLetterAbbreviationRulesAll
splitlines"search_for_abbreviations_in_string"replace_multi_period_abbreviations	AmPmRules)replace_abbreviation_as_sentence_boundary)r   abbr_handled_textlines      r   replacezAbbreviationReplacer.replace   s    )DOO)I0I/
 Y48
 
 
	
 I((.. 	O 	OD!H!H!N!NN%	//111)DOO)49+>+BC	BBDD	yr   c                     d                     d | j        D                       }d                    |          }t          j        |d| j                  | _        | j        S )N|c              3   @   K   | ]}d                      |          V  dS )z
(?=\s{}\s)N)r   ).0words     r   	<genexpr>zQAbbreviationReplacer.replace_abbreviation_as_sentence_boundary.<locals>.<genexpr>-   s0      !`!`-"6"6t"<"<!`!`!`!`!`!`r   u@   (U∯S|U\.S|U∯K|E∯U|E\.U|U∯S∯A|U\.S\.A|I|i.v|I.V)∯({})z\1.)joinSENTENCE_STARTERSr   r   r   r   )r   sent_startersregexs      r   r(   z>AbbreviationReplacer.replace_abbreviation_as_sentence_boundary,   sX    !`!`I_!`!`!`aaSZZ[hiiF5&$)44	yr   c                 x    d }t          j        | j        j        || j        t           j                  | _        d S )Nc                 ~    |                                  } t          j        t          j        d          d|           } | S )N.r   )groupr   r   escape)matchs    r   mpa_replacezLAbbreviationReplacer.replace_multi_period_abbreviations.<locals>.mpa_replace3   s/    KKMMEF29T??E599ELr   flags)r   r   r   MULTI_PERIOD_ABBREVIATION_REGEXr   
IGNORECASE)r   r<   s     r   r&   z7AbbreviationReplacer.replace_multi_period_abbreviations2   sD    	 	 	
 FI5I-	
 
 
			r   c                     d|z   }t          j        d                    t          j        |                                                    d|          }|dd          }|S )Nr   zB(?<=\s{abbr})\.(?=((\.|\:|-|\?|,)|(\s([a-z]|I\s|I'm|I'll|\d|\())))r   r   r	   )r   r   r   r:   r   )r   r   r   s      r   replace_period_of_abbrz+AbbreviationReplacer.replace_period_of_abbr?   sf    CifQXXYtzz||,, Y   
 
 !""g
r   c                    |                                 }| j        j        j        D ]}|                                }||vrt          j        d                    |          |t
          j                  }|sRdt          t          j
        |                    z   dz   }t          j        ||          }t          |          D ]\  }}	|                     ||	||          }|S )Nz(?:^|\s|\r|\n){}r=   z(?<={z} ).{1})lowerr   AbbreviationABBREVIATIONSr   r   findallr   r@   strr:   	enumeratescan_for_replacements)
r   r   loweredr   strippedabbrev_matchnext_word_start
char_arrayindr;   s
             r   r%   z7AbbreviationReplacer.search_for_abbreviations_in_stringN   s    **,,I*8 	 	Dzz||Hw&&:#**844d"-  L   &RYx-@-@)A)AAIMOOT::J'55  
U11%j  r   c                 (   	 ||         }n# t           $ r d}Y nw xY w| j        j        j        }| j        j        j        }t          |                                          }|r(|                                                                |v r|                                                                |v rt          ||          }nO|                                                                |v rt          ||          }n|                     ||          }|S )Nr   )
IndexErrorr   rE   PREPOSITIVE_ABBREVIATIONSNUMBER_ABBREVIATIONSrH   isupperr   rD   r   r   rB   )	r   r   amrP   rO   charprepositivenumber_abbruppers	            r   rJ   z*AbbreviationReplacer.scan_for_replacementsa   s   	c?DD 	 	 	DDD	i,Fi,AD		!!## 	;

((**k99xxzz!![00.sB77!!##{22-c26611#r::
s    N)
__name__
__module____qualname__r   r+   r(   r&   rB   r%   rJ    r   r   r   r      s}              
 
 
    &    r   r   )r   pysbd.utilsr   r   r   objectr   r^   r   r   <module>ra      s|    				          X X X X X6 X X X X Xr   