
    j@                         d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ  G d	 d
ee          ZdS )    N)AbbreviationReplacer)BetweenPunctuation)CommonStandard)	Processor)Text)replace_punctuation)ListItemReplacerc                       e Zd ZdZ G d de          Z G d de          Z G d dej                  Z G d d	e          Z G d
 de	          Z	dS )Slovakskc                       e Zd Zd ZdS )Slovak.ListItemReplacerc                     |                                   |                                  |                                  | j        S N)format_roman_numeral_lists!format_numbered_list_with_periods format_numbered_list_with_parenstextselfs    [/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/pysbd/lang/slovak.pyadd_line_breakz&Slovak.ListItemReplacer.add_line_break   sB     ++---22444113339    N)__name__
__module____qualname__r    r   r   r
   r      s#        		 		 		 		 		r   r
   c                       e Zd Zg Zd ZdS )Slovak.AbbreviationReplacerc                 j    |                     dd          dz   }|                     |dz   |          }|S )N.   ∯)replace)r   txtabbrabbr_news       r   replace_period_of_abbrz2Slovak.AbbreviationReplacer.replace_period_of_abbr    s8     ||C//%7H++dSj(33CJr   N)r   r   r   SENTENCE_STARTERSr(   r   r   r   r   r       s)        	 	 	 	 	r   r   c                   &    e Zd Zg dZg dZg dZdS )Slovak.Abbreviation)   čnonrzs. r. oingpza. dzo. kzpol. prz
a. s. a. pzp. n. lredzo.kza.dzm.ozpol.prza.s.a.pzp.n.lppslcorpplgrtzrtgzo.c.pzo. c. pzc.kzc. kzn.azn. aza.mza. mvzzi.bzi. bu   ú.p.v.ou   ú. p. v. obrosrsdrdoctuodszn.w.azn. w. au   nárpedgpaeddrrndrnaprkza.g.pza. g. pprofprza.vza. vpormvdru   nešpzu.szu. sktvydze.tze. talzll.mzll. mzo.f.izo. f. imrapodu   súkrstredzs.e.gzs. e. gsrtvzindvaretcatdzn.ozn. ozs.azs. au   napřza.i.iza. i. iza.k.aza. k. akonkru   čsloddltdzt.zzt. zzo.zzo. zobvobrpoktelu   štskrphdrxxu   š.pu   š. pzph.dzph. dzm.n.mzm. n. mzzrozu   atď.evzv.spzv. spdrscmudru   t.ču   t. čeloscozr.ozr. ostrzp.azp. azdravotprekgenu   viďdrccazp.szp. su   zákslovarmincmaxzd.czk.oa. r. kzd. czk. orn   socbczsakadszpozntru   námkolcsculspzo.ijrzbsvtju   čstznu   prípivhlstpodvitisstorrozhmldu   atďmgrza.sza. sphdzz.zzz. zjudrr/   hodvsu   písmzs.r.ominmliiizt.jzt. jspolmiliinaprresptzv)r   r0   rh   r`   r   r/   r   rp   r_   r;   rB   )r,   r-   r.   N)r   r   r   ABBREVIATIONSPREPOSITIVE_ABBREVIATIONSNUMBER_ABBREVIATIONSr   r   r   Abbreviationr+   *   s<         t  t  t$p$p$p!111r   r   c                   "    e Zd ZdZdZd Zd ZdS )Slovak.BetweenPunctuationu   „(?>[^“\\]+|\\{2}|\\.)*“u1   \„(?=(?P<tmp>[^“\\]+|\\{2}|\\.)*)(?P=tmp)\“c                 B    t          j        | j        t          |          S r   )resub$BETWEEN_SLOVAK_DOUBLE_QUOTES_REGEX_2r	   r   r%   s     r   ,sub_punctuation_between_slovak_double_quoteszFSlovak.BetweenPunctuation.sub_punctuation_between_slovak_double_quotes4   s    6$CEXZ]^^^r   c                    |                      |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|S r   )	%sub_punctuation_between_single_quotes,sub_punctuation_between_single_quote_slanted%sub_punctuation_between_double_quotes'sub_punctuation_between_square_bracketssub_punctuation_between_parens$sub_punctuation_between_quotes_arrow!sub_punctuation_between_em_dashes&sub_punctuation_between_quotes_slantedr   r   s     r   )sub_punctuation_between_quotes_and_parenszCSlovak.BetweenPunctuation.sub_punctuation_between_quotes_and_parens7   s    <<SAACCCCHHC<<SAAC>>sCCC55c::C;;C@@C88==C==cBBCCCCHHCJr   N)r   r   r   "BETWEEN_SLOVAK_DOUBLE_QUOTES_REGEXr   r   r   r   r   r   r   r   /   sA        -N*/c,	_ 	_ 	_
	 
	 
	 
	 
	r   r   c                   >     e Zd Zd fd	Zd Zd Zd Zd Zd Z xZ	S )	Slovak.ProcessorFc                 N    t                                          |||           d S r   )super__init__)r   r   lang	char_span	__class__s       r   r   zSlovak.Processor.__init__E   s%    GGT433333r   c                 <   | j         s| j         S | j                             dd          | _         | j                            | j                   }|                                | _         |                                  |                                  |                                  |                                  t          | j                   
                    | j        j        j        | j        j        | j        j                  | _         |                                 }|S )N
)r   r$   r   r
   r   replace_abbreviationsreplace_numbersreplace_continuous_punctuation)replace_periods_before_numeric_referencesr   applyr   WithMultiplePeriodsAndEmailRuleGeoLocationRuleFileFormatRulesplit_into_segments)r   lipostprocessed_sentss      r   processzSlovak.Processor.processH   s    9 !y 	))$55DI ++DI66B))++DI&&(((  """//111::<<<TY--	&F	)49+CE EDI #'":":"<"<&&r   c                      t          | j                  j        | j        j        j         | _        |                                  |                                  |                                  | j        S r   )	r   r   r   r   NumbersAllreplace_period_in_slovak_dates"replace_period_in_ordinal_numerals replace_period_in_roman_numeralsr   s    r   r   z Slovak.Processor.replace_numbers[   sa    -TY-ty/@/DEDI//11133555113339r   c                 F    t          j        dd| j                  | _        d S )Nz(?<=\d)\.(?=\s*[a-z]+)r#   )r   r   r   r   s    r   r   z3Slovak.Processor.replace_period_in_ordinal_numeralsb   s    8%KKDIIIr   c                 \    t          j        dd| j        t           j                  | _        d S )Nz"((\s+[VXI]+)|(^[VXI]+))(\.)(?=\s+)u   \1∯)r   r   r   
IGNORECASEr   s    r   r   z1Slovak.Processor.replace_period_in_roman_numeralsf   s&    DhPTPY[][hiiDIIIr   c                     g d}|D ]6}t          j        d                    |          d| j                  | _        7d S )N)u   Januáru   FebruárMarecu   Aprílu   Máju   Júnu   JúlAugust	Septemberu   OktóberNovemberDecemberu   Januárau	   FebruáraMarcau   Aprílau   Májau   Júnau   JúlaAugusta	Septembrau   OktóbraNovembraDecembraz(?<=\d)\.(?=\s*{month}))monthr#   )r   r   formatr   )r   MONTHSr   s      r   r   z/Slovak.Processor.replace_period_in_slovak_datesj   sa    Z Z ZF e eF#=#D#D5#D#Q#QSXZ^Zcdd		e er   )F)
r   r   r   r   r   r   r   r   r   __classcell__)r   s   @r   r   r   C   s        	4 	4 	4 	4 	4 	4	' 	' 	'&	 	 		L 	L 	L	j 	j 	j	e 	e 	e 	e 	e 	e 	er   r   N)
r   r   r   iso_coder
   r   r   r   r   r   r   r   r   r   r      s        H    +       3   2 2 2 2 2x, 2 2 2
    /   (,e ,e ,e ,e ,eI ,e ,e ,e ,e ,er   r   )r   pysbd.abbreviation_replacerr   pysbd.between_punctuationr   pysbd.lang.commonr   r   pysbd.processorr   pysbd.utilsr   pysbd.punctuation_replacerr	   pysbd.lists_item_replacerr
   r   r   r   r   <module>r      s    				 < < < < < < 8 8 8 8 8 8 . . . . . . . . % % % % % %       : : : : : : 6 6 6 6 6 6ce ce ce ce ceVX ce ce ce ce cer   