
    j                         d Z ddlmZmZ ddlmZ ddlmZmZ ddl	m
Z
 ddlmZ  G d de          Z G d	 d
e          ZdS )z
Tokenizer Interface
    )ABCabstractmethod)Iterator)ListTuple)
overridden)string_span_tokenizec                       e Zd ZdZededee         fd            Zdedee	e
e
f                  fdZdee         deee                  fdZdee         deee	e
e
f                           fdZd	S )

TokenizerIz
    A processing interface for tokenizing a string.
    Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
    sreturnc                 f    t          | j                  r|                     |g          d         S dS )zL
        Return a tokenized copy of *s*.

        :rtype: List[str]
        r   N)r   tokenize_sentsselfr   s     [/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/nltk/tokenize/api.pytokenizezTokenizerI.tokenize   s<     d)** 	/&&s++A..	/ 	/    c                     t                      )z
        Identify the tokens using integer offsets ``(start_i, end_i)``,
        where ``s[start_i:end_i]`` is the corresponding token.

        :rtype: Iterator[Tuple[int, int]]
        NotImplementedErrorr   s     r   span_tokenizezTokenizerI.span_tokenize%   s     "###r   stringsc                        fd|D             S )z
        Apply ``self.tokenize()`` to each element of ``strings``.  I.e.:

            return [self.tokenize(s) for s in strings]

        :rtype: List[List[str]]
        c                 :    g | ]}                     |          S  )r   ).0r   r   s     r   
<listcomp>z-TokenizerI.tokenize_sents.<locals>.<listcomp>6   s%    222Qa  222r   r   )r   r   s   ` r   r   zTokenizerI.tokenize_sents.   s     3222'2222r   c              #   \   K   |D ]&}t          |                     |                    V  'dS )z
        Apply ``self.span_tokenize()`` to each element of ``strings``.  I.e.:

            return [self.span_tokenize(s) for s in strings]

        :yield: List[Tuple[int, int]]
        N)listr   )r   r   r   s      r   span_tokenize_sentszTokenizerI.span_tokenize_sents8   sF        	. 	.At))!,,------	. 	.r   N)__name__
__module____qualname____doc__r   strr    r   r   tupleintr   r   r!   r   r   r   r   r      s         
 /# /$s) / / / ^/$s $xc3h'@ $ $ $ $3d3i 3DcO 3 3 3 3.Cy.	$uS#X'	(. . . . . .r   r   c                   D    e Zd ZdZeed                         Zd Zd ZdS )StringTokenizerzxA tokenizer that divides a string into substrings by splitting
    on the specified string (defined in subclasses).
    c                     t           Nr   )r   s    r   _stringzStringTokenizer._stringK   s
     "!r   c                 6    |                     | j                  S r,   )splitr-   r   s     r   r   zStringTokenizer.tokenizeP   s    wwt|$$$r   c              #   @   K   t          || j                  E d {V  d S r,   )r	   r-   r   s     r   r   zStringTokenizer.span_tokenizeS   s0      '4<88888888888r   N)	r"   r#   r$   r%   propertyr   r-   r   r   r   r   r   r*   r*   F   sa          " " ^ X"% % %9 9 9 9 9r   r*   N)r%   abcr   r   collections.abcr   typingr   r   nltk.internalsr   nltk.tokenize.utilr	   r   r*   r   r   r   <module>r7      s     $ # # # # # # # $ $ $ $ $ $         % % % % % % 3 3 3 3 3 3.. .. .. .. .. .. .. ..b9 9 9 9 9j 9 9 9 9 9r   