
    jj                         U d dl Z d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlmZ d dlmZ daej        e	         ed<   e G d d	e                      ZdS )
    N)	dataclass)create_ingest_doc_from_dict)BaseIngestDocBatchBaseSessionHandleBaseSingleIngestDocIngestDocSessionHandleMixin)logger)
SourceNodesession_handlec                       e Zd ZdededefdZdededej	        e         fdZ
dedej        ej        eej	        e         f                  fdZdS )	Readerdocingest_doc_dictreturnc                    | j         j        si|j                                        rP|j                                        j        r2t          j        d|j         d           |                                 n	 |	                    d          }t          j
        d| dt          j                                na# t          $ rT}t          j        d|           t          j
        d|j        j         dt          j                                Y d }~nd }~ww xY w| j        r|                     |j                   n|                                 |                                                                D ]
\  }}|||<   |j        S )NzFile exists: z, skipping downloadT)redact_sensitivez	Fetching z - PID: zfailed to print full doc: )read_configre_downloadfilenameis_filestatst_sizer	   infoupdate_source_metadatato_jsondebugosgetpid	Exceptionwarning	__class____name__retry_strategyget_fileto_dictitems)selfr   r   serialized_docekvs          m/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/ingest/pipeline/source.py
get_singlezReader.get_single   s    ,	$$&&	 !!##+	
 KIIIIJJJ&&((((X!$d!C!CNNNNNOOOO X X X;Q???V)?VVVVWWWWWWWWX " ##CL1111KKMM'')) 	# 	#DAq!"OA|s   7AB9 9
DA
DD	doc_batchc                     | j         r|                      |j                   n|                                 |                                                                D ]
\  }}|||<   d |j        D             S )Nc                     g | ]	}|j         
S  )r   ).0r   s     r,   
<listcomp>z$Reader.get_batch.<locals>.<listcomp>6   s    >>>>>>    )r#   	get_filesr%   r&   ingest_docs)r'   r.   r   r*   r+   s        r,   	get_batchzReader.get_batch/   s     	"	 34444!!!%%''--// 	# 	#DAq!"OA>>	(=>>>>r4   c                    	 t          |          }t          |t                    rt          |j        ant          |_        t          |t
                    r|                     ||          S t          |t                    r|                     ||          S t          dt          |           d          # t          $ r4}| j        j        r t          j        d| d| d           Y d }~d S d }~ww xY w)	N)r   r   )r.   r   ztype of doc (zF) is not a recognized type: BaseSingleIngestDoc or BaseSingleIngestDocz/failed to get data associated with source doc: z, T)exc_info)r   
isinstancer   r   _session_handler   r-   r   r7   
ValueErrortyper   pipeline_contextraise_on_errorr	   error)r'   r   r   r)   s       r,   runz
Reader.run8   s6   	-o>>C#:;; 9!) &)%7NN*8C'#233 3PPPC!344 ~~_~UUU BDII B B B    	 	 	$3 LX/XXUVXX    44444	s$   A*B9 -+B9  B9 9
C7)C22C7N)r"   
__module____qualname__r   dictstrr-   r   tListr7   OptionalUnionrA   r1   r4   r,   r   r      s        1 D S    4?#5 ? ?QRQWX[Q\ ? ? ? ?4 AJqwsAF3K?O7P,Q      r4   r   )r   typingrF   dataclassesr   &unstructured.ingest.connector.registryr   unstructured.ingest.interfacesr   r   r   r   unstructured.ingest.loggerr	   'unstructured.ingest.pipeline.interfacesr
   r   rH   __annotations__r   r1   r4   r,   <module>rQ      s    					     ! ! ! ! ! ! N N N N N N            . - - - - - > > > > > > 15
,- 4 4 4 ? ? ? ? ?Z ? ? ? ? ?r4   