
    j                        U d dl Z d dlZd dlmZmZ d dlmZmZmZm	Z	m
Z
 d dlmZ dZe	e         ed<   e G d d                      Z ed	          	 	 ddede
ee                  defd            Z ed          	 	 ddede
ee                  defd            Z ed          	 	 ddede
ee                  defd            Zdeeef         dede
e j                  fdZdS )    N)	dataclassfield)IOAnyDictFinalOptional)requires_dependenciesz%Y:%m:%d %H:%M:%SEXIF_DATETIME_FMTc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	e
ej                 ed<   dZeed<   dZeed	<   dZeed
<   dZeed<   dZe
ej                 ed<   dZe
ej                 ed<   dZe
e         ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<    ee          Zeeef         ed<   d ZdS )Metadata authorcategorycommentscontent_statusNcreated
identifierkeywordslanguagelast_modified_bylast_printedmodifiedr   revisionsubjecttitleversiondescription	namespace)default_factory	exif_datac                     | j         S )N)__dict__)selfs    j/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/file_utils/metadata.pyto_dictzMetadata.to_dict#   s
    }    )__name__
__module____qualname__r   str__annotations__r   r   r   r   r	   datetimer   r   r   r   r   r   r   intr   r   r   r   r   r   dictr!   r   r   r&    r'   r%   r   r      sv        FCHcHcNC+/GXh'(///JHcHcc04L(8,-444,0Hhx()000HhsmGSE3OOOGSKIs !&d ; ; ;ItCH~;;;    r'   r   docxr   filenamefilereturnc                     ddl }| r|                    |           }n'|r|                    |          }nt          d          t          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        d	d          t	          |j        d
d          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        dd          t	          |j        dd                    }|S )z;Extracts document metadata from a Microsoft .docx document.r   N#No filename nor file were specifiedr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r1   DocumentFileNotFoundErrorr   getattrcore_properties)r2   r3   r1   docmetadatas        r%   get_docx_metadatar=   '   s    KKK GmmH%%	 GmmD!! EFFFs*Hb99,j"==,j"==s24DbII+Y==3.bAA,j"==,j"== !46H"MMS0.$GG,j$??,j$??+Y;;c)7B77+Y;;  H$ Or'   openpyxlc           
         ddl }| r|                    |           }n'|r|                    |          }nt          d          t          di dt	          |j        dd          dt	          |j        dd          dt	          |j        d	d          d
t	          |j        d
d          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          dt	          |j        dd          }|S )z;Extracts document metadata from a Microsoft .xlsx document.r   Nr6   r   creatorr   r   r   contentStatusr   r   r   r   r   r   lastModifiedByr   lastPrintedr   r   r   r   r   r   r0   )r>   load_workbookr8   r   r9   
properties)r2   r3   r>   workbookr<   s        r%   get_xlsx_metadatarG   K   s0    OOO G))(33	 G))$// EFFF   x*Ir:::,j"=== x2ORHHH +Y===	
 H/CCC 8.bAAA ,j"=== ,j"=== !!46FKKK X0-FFF ,j$??? (-{B??? ,j$??? +Y;;; h)7B777  +Y;;;!H& Or'   PILc           	      *   ddl m} ddlm} | r|                    |           }nK|r:|                    t          j        |                                                    }nt          d          |	                                }i }|D ]2}|
                    ||          }|
                    |          }	|	||<   3t          |
                    dd          |
                    dd          t          |d          t          |d	          |
          }
|
S )z<Extracts metadata from a JPG image, including EXIF metadata.r   )Image)TAGSr6   Artistr   UserCommentDateTimeOriginalDateTime)r   r   r   r   r!   )rH   rJ   PIL.ExifTagsrK   openioBytesIOreadr8   getexifgetr   _get_exif_datetime)r2   r3   rJ   rK   imager!   	exif_dicttag_idtagdatar<   s              r%   get_jpg_metadatar]   p   s.    !!!!!! G

8$$	 G

2:diikk2233 EFFFI "I  hhvv&&}}V$$	#}}Xr**}b11"9.@AA $Iz::  H Or'   rY   keyc                     |                      |          }|sdS 	 t          j                            |t                    S # t          $ r Y dS w xY w)zJConverts a datetime string from the EXIF data to a Python datetime object.N)rV   r-   strptimer   
ValueError)rY   r^   dates      r%   rW   rW      sc    ==D t ))$0ABBB    tts   $A   
AA)r   N)r-   rR   dataclassesr   r   typingr   r   r   r   r	   unstructured.utilsr
   r   r+   r,   r   bytesr=   rG   r]   rW   r0   r'   r%   <module>rg      s    				 ( ( ( ( ( ( ( ( 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 4 4 4 4 4 !4 5: 3 3 3        4 v $    
2e9
         F z"" $! !!
2e9
! ! ! ! #"!H u $    
2e9
         F$sCx. s xHY?Z      r'   