
    j                     8    d dl mZ d dlmZ  G d d          ZdS )    )Rule)AbbreviationReplacerc                   j   e Zd Zg dZ edd          Z edd          Z edd          Z edd          Z ed	d
          Z	 edd          Z
 G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          Z G d de          ZdS )Standard)   。   ．.   ！!?   ？u   (?<=[a-zA-z]°)\.(?=\s*\d+)   ∯z(?<=\s)\.(?=(jpe?g|png|gif|tiff?|pdf|ps|docx?|xlsx?|svg|bmp|tga|exif|odt|html?|txt|rtf|bat|sxw|xml|zip|exe|msi|blend|wmv|mp[34]|pptx?|flac|rb|cpp|cs|js)\s)z\n   ȹz\?(?=(\'|\"))   &ᓷ&z\s{3,} u   &⎋&'c                   B    e Zd ZdZg dZg dZg dZ edd          ZdS )Standard.Abbreviationz:Defines the abbreviations for each language (if available))adjadmadvalalaaltaaprarcarizarkartassnasstattysaugavebartbldbldgblvdbrigbrosbtwcalcalifcaptclcmdrcocolcolocomdrconconncorpcplcresctzd.phildakdecdeldeptdetdistdrzdr.philz	dr.philosdrsze.gensespesqetcexpexpyextfebfedflaftfwyfygagengovhonhosphrhwayhwyzi.eiaididaillincindinginspisjanjrjuljunkankanskenkylaltltdmajmanmarmassmaymdmemedmessrsmexmfgmichminminnmissmllemmmmemomontmrmrsmsmsgrmssrsmtmtnnebnebrnevnonosnovnroctokoklaontopordoreppapdpdepennpennapfcphzph.dplplzppprofpvtquerdrsrefreprepsresrevrtsasksecsensenssepseptsfcsgtsrstsuptsurgtcetenntexunivusafazu.sutvavvervizvsvtwashwiswiscwywyoyukfig)!r   r"   r)   r.   r0   r2   r8   r?   rA   rQ   rR   r^   rj   rl   r   r   r   r   rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   rI   r   r   r   r   z ([a-zA-Z0-9_])(\.)([a-zA-Z0-9_])u   \1∮\3N)	__name__
__module____qualname____doc__ABBREVIATIONSPREPOSITIVE_ABBREVIATIONSNUMBER_ABBREVIATIONSr   WithMultiplePeriodsAndEmailRule     d/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/pysbd/lang/common/standard.pyAbbreviationr      sa        HH W  W  W %P  %P  %P!EEE
 +/$/RT_*`*`'''r   r   c                   ~    e Zd Z edd          Z edd          Z edd          Z edd          Zd	ZeeeegZ	d
S )Standard.DoublePunctuationRulesz\?!   ☉z!\?   ☈z\?\?   ☇!!   ☄z\?!|!\?|\?\?|!!N)
r   r   r   r   	FirstRule
SecondRule	ThirdRule	ForthRuleDoublePunctuationAllr   r   r   DoublePunctuationRulesr   #   sf        D''	T&%((
D%((	D&&	.*i;r   r   c                   `    e Zd Z edd          Z edd          Z edd          ZeeegZdS )Standard.ExclamationPointRulesz\!(?=(\'|\"))   &ᓴ&z\!(?=\,\s[a-z])z\!(?=\s[a-z])N)r   r   r   r   InQuotationRuleBeforeCommaMidSentenceRuleMidSentenceRuler   r   r   r   ExclamationPointRulesr   +   sV        $/99 &*T*<g%F%F" $/99 :OLr   r   c                      e Zd Z edd          Z edd          Z edd          Z edd          Z ed	d
          Z edd          Z	 edd          Z
 edd          Z edd          Z edd          Z edd          Z edd          Z edd          Z edd          Z edd          Z edd           Z ed!d"          Zeeeeee	e
eeeeeeeeeegZd#S )$Standard.SubSymbolsRulesr   r	   u   ♬u   ،u   ♭:u   &ᓰ&r   u   &ᓱ&r   u   &ᓳ&r
   r   r   r   r   u   &ᓸ&r   r   z?!r   z??r   z!?r   r   u   &✂&(u   &⌬&)u   ȸ r   
N)r   r   r   r   PeriodArabicComma	SemiColonFullWidthPeriodSpecialPeriodFullWidthExclamationExclamationPointQuestionMarkFullWidthQuestionMarkMixedDoubleQEMixedDoubleQQMixedDoubleEQMixedDoubleEE
LeftParensRightParensTemporaryEndingPunctutationNewliner   r   r   r   SubSymbolsRulesr   7   sa       fc""d64((D%%	$x//Xu--#tHe444#..tHc** $Xu 5 5VT**VT**VT**VT**T(C((
d8S))&*d5"oo#$ud##{I#%5|$m]Mj+7R	r   r  c                       e Zd Z edd          Z edd          Z edd          Z edd          Z ed	d          ZeeeeegZ	d
S )Standard.EllipsisRulesz\.\.\.(?=\s+[A-Z])u   ☏☏.z(?<=\S)\.{3}(?=\.\s[A-Z])   ƪƪƪz(\s\.){3}\s   ♟♟♟♟♟♟♟z(?<=[a-z])(\.\s){3}\.($|\\n)   ♝♝♝♝♝♝♝z\.\.\.N)
r   r   r   r   ThreeConsecutiveRuleFourConsecutiveRuleThreeSpaceRuleFourSpaceRuleOtherThreePeriodRuler   r   r   r   EllipsisRulesr  O   s        
  $t$99EE #d#?JJ n.EFF <>UVV#tIx88}.A#%9;r   r  c                       e Zd Z edd          Z edd          Z edd          Z edd          Z ed	d
          ZeeeeegZ	dS )Standard.ReinsertEllipsisRulesr  z...r  z . . . r  z. . . .u   ☏☏z..u   ∮r	   N)
r   r   r   r   SubThreeConsecutivePeriodSubThreeSpacePeriodSubFourSpacePeriodSubTwoConsecutivePeriodSubOnePeriodr   r   r   r   ReinsertEllipsisRulesr  d   s         %)DE$:$:!"d#;YGG!T":IFF"&$y$"7"7tFC(((*=?Q&6r   r  c                   8    e Zd Zd                    d          ZdS )Standard.AbbreviationReplacerzgA Being Did For He How However I In It Millions More She That The There They We What When Where Who Whyr   N)r   r   r   splitSENTENCE_STARTERSr   r   r   r   r  o   s&        FFKeCjj 	r   r   N)r   r   r   Punctuationsr   GeoLocationRuleFileFormatRuleSingleNewLineRuleQuestionMarkInQuotationRuleExtraWhiteSpaceRuleSubSingleQuoteRuleobjectr   r   r   r  r  r  r   r   r   r   r   r      s        ?>>L d95AAOT  y  {@  A  ANUD)) #'$'7"A"A$y#..h,,	a 	a 	a 	a 	av 	a 	a 	a< < < < < < < <
M 
M 
M 
M 
M 
M 
M 
M    &   0; ; ; ; ; ; ; ;*	6 	6 	6 	6 	6 	6 	6 	6Q Q Q Q Q3 Q Q Q Q Qr   r   N)pysbd.utilsr   pysbd.abbreviation_replacerr   r   r   r   r   <module>r%     so          < < < < < <lQ lQ lQ lQ lQ lQ lQ lQ lQ lQr   