
    j                        d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ  e             eej                   e
            ddddddddd ddgdfdee         deee                   dee!         de"dee         dee         de"dee         dee!         dee         deee                  de"dee         fd                                    Z#dgfdej$        dee         dee         deee                  def
dZ%	 	 	 	 d"dee         deee                   d ee         deej$                 deeeef                  f
d!Z&dS )#    N)IOCallableDictListOptional)add_chunking_strategy)ElementElementMetadataprocess_metadata)FileTypeadd_metadata_with_filetype)logger)exactly_one)convert_to_iso_8601)partition_html)apply_lang_metadata)partition_texti  TFautofilenamefilemax_partitioninclude_metadatametadata_filenamemetadata_last_modifiedprocess_attachmentsattachment_partitionermin_partitionchunking_strategy	languagesdetect_language_per_elementreturnc                 D   t          | |           | t          j        |           }nk|it          j        d          }|                    |                                           |                                 t          j        |j                  }|j	        
                    dd          }d|v }|j        }g }|rt          j        d           n6|n3d	|v sd
|v rt          |dgdd          }nt          |||dgdd          }t!          ||p| |          }|D ]	}||_        
|rt          j                    5 }t'          ||           t)          j        |          }|D ]}t(          j                            ||          }|t1          d           |||||          }|D ]=}||j        _        d|j        _        |p| |j        _        |                    |           >	 ddd           n# 1 swxY w Y   t;          t=          ||
|                    }|S )a!  Partitions a MSFT Outlook .msg file

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    max_partition
        The maximum number of characters to include in a partition. If None is passed,
        no maximum is applied. Only applies if processing text/plain content.
    metadata_filename
        The filename to use for the metadata.
    process_attachments
        If True, partition_email will process email attachments in addition to
        processing the content of the email itself.
    attachment_partitioner
        The partitioning function to use to process attachments.
    metadata_last_modified
        The last modified date for the document.
    min_partition
        The minimum number of characters to include in a partition. Only applies if
        processing text/plain content.
    languages
        User defined value for `metadata.languages` if provided. Otherwise language is detected
        using naive Bayesian filter via `langdetect`. Multiple languages indicates text could be
        in either language.
        Additional Parameters:
            detect_language_per_element
                Detect language per element instead of at the document level.
    )r   r   NFdeletezContent-Type 	encryptedzGEncrypted email detected. Partition function will return an empty list.z<html>z</div>msg)textr   r   detection_origin)r(   r   r   r   r   r)   )r   )msg_obj
output_dirz@Specify the attachment_partitioner kwarg to process attachments.)r   r   r   r   )elementsr   r    )r   
msg_parserMsOxMessagetempfileNamedTemporaryFilewritereadclosenameheader_dictgetbodyr   warningr   r   build_msg_metadatametadataTemporaryDirectoryextract_msg_attachment_infooslistdirpathjoin
ValueErrorr   file_directoryattached_to_filenameappendlistr   )r   r   r   r   r   r   r   r   r   r   r   r    kwargsr*   tmpcontent_typeis_encryptedr(   r,   r:   elementtmpdirattached_filesattached_fileattached_filenameattached_elementss                             d/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/partition/msg.pypartition_msgrQ      s   b ----(22		)777		$))++		(22
 &**>2>>L,.L<D H 
U	
 	
 	
 	
 
	T		X--!d""	
 
 
 "''d""
 
 
 "%X5  H
  $ $# -(** 	-f'FKKKKZ//N!/ - -$&GLL$G$G!)1$Z   %;$:.+A"/"/	% % %!  1 - -G0=G$-6:G$3<M<YQYG$9OOG,,,,	--	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	-* (C	
 	
 	
 H Os   >B+G66G:=G:r*   c           	          t          | dd          }|t          |          }t          | dd          }|d |D             }t          | dd          }|d |D             }t          ||t          | dd          |p|||          }d	|_        |S )
zKCreates an ElementMetadata object from the header information in the email.	sent_dateNsenderc                 ,    g | ]}t          |          S  str).0rT   s     rP   
<listcomp>z&build_msg_metadata.<locals>.<listcomp>   s    999VS[[999    
recipientsc                 ,    g | ]}t          |          S rV   rW   )rY   	recipients     rP   rZ   z&build_msg_metadata.<locals>.<listcomp>   s    ;;;i3y>>;;;r[   subject)sent_to	sent_fromr_   last_modifiedr   r   r'   )getattrr   r
   r)   )r*   r   r   r   
email_datera   r`   element_metadatas           rP   r9   r9      s     +t44J(44
400I99y999	g|T22G;;7;;;&D11,:
   ).%r[   r+   c                 r   t          | ||           | t          j        |           }np|jt          j        d          }|                    |                                           |                                 t          j        |j                  }n||}g }|j	        D ]}i }|j
        |d<   |j        |d<   |j        |d<   |j        |d<   |                    |           |P|d	z   |d         z   }t          |d
          5 }	|	                    |j                   ddd           n# 1 swxY w Y   |S )zExtracts information from email message attachments and returns a list of dictionaries.
    If 'output_dir' is provided, attachments are also saved to that directory.
    )r   r   r*   NFr#   r   	extension	file_sizepayload/wb)r   r-   r.   r/   r0   r1   r2   r3   r4   attachmentsAttachLongFilenameAttachExtensionAttachmentSizedatarD   open)
r   r   r+   r*   rG   list_attachments
attachmentattachment_infooutput_filenamefs
             rP   r<   r<      s    g>>>>(22		)777		$))++		(22		) ) )
&0&C
#'1'A$'1'@$%/_	"000!(3.1LLOot,, )
((() ) ) ) ) ) ) ) ) ) ) ) ) ) ) s   D++D/	2D/	)NNNN)'r=   r/   typingr   r   r   r   r   r-   unstructured.chunkingr   unstructured.documents.elementsr	   r
   r    unstructured.file_utils.filetyper   r   unstructured.loggerr   unstructured.partition.commonr   unstructured.partition.emailr   unstructured.partition.htmlr   unstructured.partition.langr   unstructured.partition.textr   MSGrX   bytesintboolrQ   r.   r9   r<   rV   r[   rP   <module>r      s.   				  5 5 5 5 5 5 5 5 5 5 5 5 5 5     7 7 7 7 7 7 V V V V V V V V V V Q Q Q Q Q Q Q Q & & & & & & 5 5 5 5 5 5 < < < < < < 6 6 6 6 6 6 ; ; ; ; ; ; 6 6 6 6 6 6 HL))" $#'!'+,0 %15#$'+&,X(-{ {sm{
2e9
{ C={ 	{
  }{ %SM{ { %X.{ C={  }{ S	"{ "&{ 
']{ { {  *) {D '-X	 #sm %SM S	"	
    @ # $ $04	& &sm&
2e9
& & j,-	&
 
$sCx.& & & & & &r[   