
    j*                        U d dl Z d dlmZ d dlmZmZ e j        dk     rd dlmZ nd dlmZ d dl	Z	d dl	m
Z d dl	mZ d dl	mZ d	Zee         ed
<   dedefdZ ee          dedee         fd            Z ee          dedee         fd            Z ee          dedeeeef                  fd            Z
dS )    N)	lru_cache)ListTuple)      )Final)pos_tag)sent_tokenize)word_tokenize   CACHE_MAX_SIZEpackage_namepackage_categoryc                     	 t          j        | d|             dS # t          $ r t          j        |            Y dS w xY w)z8If the required nlt package is not present, download it./N)nltkfindLookupErrordownload)r   r   s     c/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/nlp/tokenize.py%_download_nltk_package_if_not_presentr      s`    $	%666677777 $ $ $l######$s    ??)maxsizetextreturnc                 B    t          dd           t          |           S )zFA wrapper around the NLTK sentence tokenizer with LRU caching enabled.
tokenizerspunktr   r   )r   _sent_tokenizer   s    r   r
   r
      &     *<V]^^^^$    c                 B    t          dd           t          |           S )zBA wrapper around the NLTK word tokenizer with LRU caching enabled.r   r   r   )r   _word_tokenizer    s    r   r   r   !   r!   r"   c                     t          dd           t          dd           t          |           }g }|D ]3}t          |          }|                    t	          |                     4|S )z>A wrapper around the NLTK POS tagger with LRU caching enabled.r   r   r   taggersaveraged_perceptron_tagger)r   r   r$   extend_pos_tag)r   	sentencesparts_of_speechsentencetokenss        r   r	   r	   (   s     *<V]^^^^)"1    t$$IO 1 1))x//0000r"   )sys	functoolsr   typingr   r   version_infotyping_extensionsr   r   r	   r)   r
   r   r   r$   r   int__annotations__strr    r"   r   <module>r7      s   




              f'''''''  $ $ $ $ $ $ 0 0 0 0 0 0 0 0 0 0 0 0 c
      $ $s $ $ $ $ >"""   S	       #"  >"""   S	       #"  >"""# $uS#X/    #"  r"   