
    j                        d dl Z d dlZd dlmZmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZmZmZmZ d dlmZ  e
             eej                   e            dddd	d	dddd
gdf
dee         deee                  dedededee         dee         dee         deee                  dedee	         fd                                    ZdS )    N)IOListOptional)add_chunking_strategy)Elementprocess_metadata)FileTypeadd_metadata_with_filetype)convert_office_docexactly_oneget_last_modified_date get_last_modified_date_from_file)partition_pptxFTautofilenamefileinclude_page_breaksinclude_metadatainfer_table_structuremetadata_filenamemetadata_last_modifiedchunking_strategy	languagesdetect_language_per_elementreturnc
           	      R   | d} t          | |           d}t          |           dk    rt          j                            t          j                            |                     \  }}t          j                            |          \  }}t          j                            |           st          d|  d          t          |           }n|t          |          }t          j        d          }|                    |                                           |                                 |j        } t          j                            t          j                            |j                            \  }}t          j                            |          \  }}t          j                    5 }t%          | |d	d
           t          j                            || d          }t)          ||||p|||	          }ddd           n# 1 swxY w Y   |r|D ]}||j        _        |S )a  Partitions Microsoft PowerPoint Documents in .ppt format into their document elements.

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    include_page_breaks
        If True, includes a PageBreak element between slides
    infer_table_structure
        If True, any Table elements that are extracted will also have a metadata field
        named "text_as_html" where the table's text content is rendered into an html string.
        I.e., rows and cells are preserved.
        Whether True or False, the "text" field is always present in any Table element
        and is the text content of the table (no structure).
    metadata_last_modified
        The last modified date for the document.
    languages
        User defined value for `metadata.languages` if provided. Otherwise language is detected
        using naive Bayesian filter via `langdetect`. Multiple languages indicates text could be
        in either language.
        Additional Parameters:
            detect_language_per_element
                Detect language per element instead of at the document level.
    N )r   r   r   z	The file z does not exist.F)deletepptxzImpress MS PowerPoint 2007 XML)target_formattarget_filterz.pptx)r   r   r   r   r   r   )r   lenospathsplitabspathsplitextexists
ValueErrorr   r   tempfileNamedTemporaryFilewritereadclosenameTemporaryDirectoryr   joinr   metadatar   )r   r   r   r   r   r   r   r   r   r   kwargslast_modification_date_filename_no_pathbase_filenametmptmpdirpptx_filenameelementselements                       d/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/partition/ppt.pypartition_pptr>      sS   V ----!
8}}q gmmBGOOH,E,EFF7++,<==qw~~h'' 	ECCCCDDD!7!A!A		!A$!G!G)777		$))++		8 gmmBGOOCH,E,EFFw''(899M1		$	&	& 
& :		
 	
 	
 	
 V-D-D-DEE!""7/#9#S=S(C
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
$  : 	: 	:G(9G%%Os   /AH		HH)r#   r*   typingr   r   r   unstructured.chunkingr   unstructured.documents.elementsr   r    unstructured.file_utils.filetyper	   r
   unstructured.partition.commonr   r   r   r   unstructured.partition.pptxr   PPTstrbytesboolr>        r=   <module>rK      s   				  % % % % % % % % % % 7 7 7 7 7 7 E E E E E E E E Q Q Q Q Q Q Q Q            7 6 6 6 6 6 HL))" $ %!"&'+,0'+&,X(-T TsmT
2e9
T T 	T
  T  }T %SMT  }T S	"T "&T 
']T T T  *) T T TrJ   