
    js                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZmZmZmZ d dlmZ dgZg d	Zdd
ede
e         defdZddede
e         defdZddede
e         defdZe G d d                      ZdS )    N)	dataclass)Path)ListOptional)LocalSourceConnectorSimpleLocalConfig)BaseConnectorConfigBaseSingleIngestDocProcessorConfig
ReadConfig)loggerz.zip)z.tarz.tar.gzz.tgzfilenamepathreturnc           	          |r$t          |                              dd           t           fdt          D                       rt	           |          S t           fdt
          D                       rt           |          S t          d                     d	                    t          t
          z                                 )	zC
    Takes in a compressed zip or tar file and uncompresses it
    T)parentsexist_okc              3   B   K   | ]}                     |          V  d S Nendswith.0extr   s     o/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/ingest/utils/compression.py	<genexpr>z"uncompress_file.<locals>.<genexpr>   s1      
:
:c8S!!
:
:
:
:
:
:    )zip_filenamer   c              3   B   K   | ]}                     |          V  d S r   r   r   s     r   r   z"uncompress_file.<locals>.<genexpr>    s1      <<Xs##<<<<<<r   )tar_filenamer   z5filename {} not a recognized compressed extension: {}z, )
r   mkdiranyZIP_FILE_EXTuncompress_zip_fileTAR_FILE_EXTuncompress_tar_file
ValueErrorformatjoin)r   r   s   ` r   uncompress_filer*      s    
  6T

555

:
:
:
:\
:
:
::: 

"tDDDD	<<<<|<<<	<	< 
"tDDDDCJJ		,566 
 
 	
r   r   c                    t           j                            |           \  }}t          D ]1}|                    |          r|d t          |                    } n2|r|n"t           j                            || d          }t          j        d|  d|            t          j
        |           5 }|                    |           d d d            n# 1 swxY w Y   |S )Nz-zip-uncompressedzextracting zip  -> r   )osr   splitr#   r   lenr)   r   infozipfileZipFile
extractall)r   r   headtailr   zfiles         r   r$   r$   +   s$   |,,JD$  == 	CHH+&DE	 K44RW\\$40J0J0JKKD
K:,::D::;;;		&	& $%d###$ $ $ $ $ $ $ $ $ $ $ $ $ $ $Ks   1CCCr    c                    t           j                            |           \  }}t          D ]1}|                    |          r|d t          |                    } n2|r|n"t           j                            || d          }t          j        d|  d|            t          j
        | d          5 }|                    |           d d d            n# 1 swxY w Y   |S )Nz-tar-uncompressedzextracting tar r,   zr:gzr-   )r.   r   r/   r%   r   r0   r)   r   r1   tarfileopenr4   )r    r   r5   r6   r   tfiles         r   r&   r&   8   s&   |,,JD$  == 	CHH+&DE	 K44RW\\$40J0J0JKKD
K:,::D::;;;	lF	+	+ $ud###$ $ $ $ $ $ $ $ $ $ $ $ $ $ $Ks   2CCCc                   J    e Zd ZU eed<   eed<   eed<   dedee         fdZ	dS )CompressionSourceConnectorMixinprocessor_configread_configconnector_configdocr   c                    |                                  t          t          |j                            }t	          j        | j                  }t	          j        | j                  }|                    | j        j        d          }| j        j	        
                    t          j                  r| j        j	         | |_	        n!| j        j	         t          j         | |_	        t          t          |d          ||          }t          j        d|                                            |                                 |                                S )z
        Utility function which helps process compressed files. Extracts the contents and returns
        generated ingest docs via local source connector
        )r    T)
input_path	recursive)r@   r?   r>   z Created local source connector: )get_filer*   strr   copyr?   r>   replacedownload_dir
output_dirr   r.   sepr   r   r   r1   to_json
initializeget_ingest_docs)selfrA   r   new_read_configsnew_process_configsrelative_pathlocal_connectors          r   process_compressed_docz6CompressionSourceConnectorMixin.process_compressed_docL   sL    	CL(9(9:::9T%566"i(=>>T%5%BBGG +44RV<< 	040E0P-aR_-a-a** (3LRVL]LL  * /.   )0
 
 
 	R7N7N7P7PRRSSS""$$$..000r   N)
__name__
__module____qualname__r   __annotations__r   r	   r
   r   rU    r   r   r=   r=   F   sc         %%%%))))1*= 1$GZB[ 1 1 1 1 1 1r   r=   r   )rH   r.   r9   r2   dataclassesr   pathlibr   typingr   r   #unstructured.ingest.connector.localr   r   unstructured.ingest.interfacesr	   r
   r   r   unstructured.ingest.loggerr   r#   r%   rG   r*   r$   r&   r=   rZ   r   r   <module>ra      s    				   ! ! ! ! ! !       ! ! ! ! ! ! ! ! W W W W W W W W            . - - - - -x***
 
c 
# 
# 
 
 
 
*
 
c 
# 
# 
 
 
 
 c # #     "1 "1 "1 "1 "1 "1 "1 "1 "1 "1r   