
    jV	                         d dl Z d dlZ G d de          Z G d de          Z G d de          Z G d d	e          ZdS )
    Nc                       e Zd Zd Zd ZdS )Rulec                 "    || _         || _        d S N)patternreplacement)selfr   r   s      U/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/pysbd/utils.py__init__zRule.__init__   s    &    c                 X    d                     | j        j        | j        | j                  S )Nz&<{} pattern="{}" and replacement="{}">)format	__class____name__r   r   r	   s    r
   __repr__zRule.__repr__   s.    7>>N#T\43CE E 	Er   N)r   
__module____qualname__r   r    r   r
   r   r      s7        ' ' 'E E E E Er   r   c                       e Zd ZdZd ZdS )Textax  Extending str functionality to apply regex rules

    https://stackoverflow.com/questions/4698493/can-i-add-custom-methods-attributes-to-built-in-python-types

    Parameters
    ----------
    str : str
        string content

    Returns
    -------
    str
        input as it is if rule pattern doesnt match
        else replacing found pattern with replacement chars
    c                 P    |D ]"}t          j        |j        |j        |           } #| S r   )resubr   r   )r	   ruleseach_rs      r
   applyz
Text.apply!   s2     	D 	DF6&.&*<dCCDDr   N)r   r   r   __doc__r   r   r   r
   r   r      s-             r   r   c                        e Zd Zd Zd Zd ZdS )TextSpanc                 0    || _         || _        || _        dS )aV  
        Sentence text and its start & end character offsets within original text

        Parameters
        ----------
        sent : str
            Sentence text
        start : int
            start character offset of a sentence in original text
        end : int
            end character offset of a sentence in original text
        N)sentstartend)r	   r"   r#   r$   s       r
   r   zTextSpan.__init__)   s     	
r   c                 ~    d                     | j        j        t          | j                  | j        | j                  S )Nz!{0}(sent={1}, start={2}, end={3}))r   r   r   reprr"   r#   r$   r   s    r
   r   zTextSpan.__repr__:   s7    299N#T$)__dj$(L L 	Lr   c                     t          | |j                  r0| j        |j        k    o| j        |j        k    o| j        |j        k    S d S r   )
isinstancer   r"   r#   r$   )r	   others     r
   __eq__zTextSpan.__eq__>   sP    dEO,, 	c9
*btzU[/HbTXY^YbMbb	c 	cr   N)r   r   r   r   r   r*   r   r   r
   r    r    '   sI          "L L Lc c c c cr   r    c                        e Zd ZdZddZd ZdS )PySBDFactoryz.pysbd as a spacy component through entrypointsenc                 L    || _         t          j        |dd          | _        d S )NFT)languageclean	char_span)nlppysbd	Segmenterseg)r	   r2   r/   s      r
   r   zPySBDFactory.__init__F   s,    ?HE-13 3 3r   c                     | j                             |j                  }d |D             }|D ]}|j        |v rdnd|_        |S )Nc                     g | ]	}|j         
S r   )r#   ).0r"   s     r
   
<listcomp>z)PySBDFactory.__call__.<locals>.<listcomp>M   s    CCC$4:CCCr   TF)r5   segmenttext_with_wsidxis_sent_start)r	   docsents_char_spansstart_token_idstokens        r
   __call__zPySBDFactory.__call__K   si    8++C,<==CC2BCCC 	B 	BE+09&5,6 ,644;@ 
r   N)r-   )r   r   r   r   r   rB   r   r   r
   r,   r,   C   s=        883 3 3 3
    r   r,   )r   r3   objectr   strr   r    r,   r   r   r
   <module>rE      s    
			 E E E E E6 E E E    3   ,c c c c cv c c c8    6     r   