
    #j                    t    d Z ddlmZ ddlmZ ddlmZ 	 ddlZdZn# e	$ r dZY nw xY w G d d	e          Z
dS )
zNLTK text splitter.    )annotations)Any)TextSplitterNTFc                  8     e Zd ZdZ	 	 dddd fdZddZ xZS )NLTKTextSplitterz"Splitting text using NLTK package.

englishF)use_span_tokenize	separatorstrlanguager
   boolkwargsr   returnNonec               b    t                      j        di | || _        || _        || _        | j        r| j        rd}t          |          t          sd}t          |          | j        r+t          j	        
                    | j                  | _        dS t          j	        j        | _        dS )zInitialize the NLTK splitter.z6When use_span_tokenize is True, separator should be ''zANLTK is not installed, please install it with `pip install nltk`.N )super__init__
_separator	_language_use_span_tokenize
ValueError	_HAS_NLTKImportErrornltktokenize_get_punkt_tokenizer
_tokenizersent_tokenize)selfr   r   r
   r   msg	__class__s         g/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/langchain_text_splitters/nltk.pyr   zNLTKTextSplitter.__init__   s     	""6"""#!"3" 	"t 	"JCS//! 	#UCc"""" 	:"m@@PPDOOO"m9DOOO    text	list[str]c                   | j         rt          | j                            |                    }g }t	          |          D ]T\  }\  }}|dk    r'||dz
           d         }|||         |||         z   }n
|||         }|                    |           Un|                     || j                  }|                     || j                  S )z&Split incoming text and return chunks.r      )r   )	r   listr   span_tokenize	enumerateappendr   _merge_splitsr   )	r!   r&   spanssplitsistartendprev_endsentences	            r$   
split_textzNLTKTextSplitter.split_text,   s     " 	D66t<<==EF#,U#3#3 ( (<E3q55$QU|AH#HUN3d59oEHH#E#IHh''''( __TDN_CCF!!&$/:::r%   )r   r	   )
r   r   r   r   r
   r   r   r   r   r   )r&   r   r   r'   )__name__
__module____qualname____doc__r   r6   __classcell__)r#   s   @r$   r   r      ss        ,,  !:
 #(: : : : : : : :0; ; ; ; ; ; ; ;r%   r   )r:   
__future__r   typingr   langchain_text_splitters.baser   r   r   r   r   r   r%   r$   <module>r?      s      " " " " " "       6 6 6 6 6 6KKKII   III*; *; *; *; *;| *; *; *; *; *;s    ''