
    jׂ                    n   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZmZmZ d dlm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& ee'e'f         Z(d	e)d
<   ee(df         Z*d	e)d<    G d dej+                  Z, G d dej+                  Z-ej.         G d d                      Z/ej.         G d d                      Z0 G d de          Z1 G d de          Z2 G d d          Z3 G d dej4                  Z5 ed          Z6dLd Z7i fdMd%Z8 G d& d'          Z9 G d( d)ej+                  Z: G d* d+e:          Z; G d, d-e:          Z< G d. d/e<          Z= G d0 d1e<          Z> G d2 d3e<          Z? G d4 d5e<          Z@ G d6 d7e<          ZA G d8 d9e<          ZB G d: d;e<          ZC G d< d=e<          ZD G d> d?e<          ZE G d@ dAe<          ZF G dB dCe<          ZG G dD dEeG          ZH G dF dGe<          ZI G dH dIe<          ZJi e9jK        eBe9jL        eBe9jM        eBe9jN        eBe9jO        eBe9jP        e<e9jQ        e<e9jR        e@e9jS        e@e9jT        e@e9jU        e@e9jV        e@e9jW        e@e9jX        e@e9jY        eAe9jZ        eAe9j[        eAe9j\        eIe9j]        eIe9j^        eJe9j_        eJe9j`        eJe9ja        e?e9jb        e?e9jc        eEe9jd        eEe9je        eEe9jf        eGe9jg        eCe9jh        eDe9ji        e=e9jj        eFiZkdJe)dK<   dS )N    )annotationsN)MappingProxyType)
AnyCallableDict	FrozenSetListOptionalSequenceTupleUnioncast)	ParamSpec	TypeAlias	TypedDict)TYPE_TO_COORDINATE_SYSTEM_MAPCoordinateSystemRelativeCoordinateSystem)#UNSTRUCTURED_INCLUDE_DEBUG_METADATA)lazypropertyr   Point.Pointsc                      e Zd ZdZdS )NoIDz4Class to indicate that an element do not have an ID.N__name__
__module____qualname____doc__     i/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/documents/elements.pyr   r      s        >>>>r!   r   c                      e Zd ZdZdS )UUIDz5Class to indicate that an element should have a UUID.Nr   r    r!   r"   r$   r$   #   s        ????r!   r$   c                      e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	ded	<   dZ
ded
<   dZded<   d Zedd            ZdS )DataSourceMetadataz@Metadata fields that pertain to the data source of the document.NOptional[str]urlversionzOptional[Dict[str, Any]]record_locatordate_createddate_modifieddate_processedzOptional[List[Dict[str, Any]]]permissions_datac                H    d | j                                         D             S )Nc                    i | ]
\  }}|||S Nr    ).0keyvalues      r"   
<dictcomp>z.DataSourceMetadata.to_dict.<locals>.<dictcomp>4   s#    XXXzsEeFWUFWFWFWr!   )__dict__itemsselfs    r"   to_dictzDataSourceMetadata.to_dict3   s$    XXT]-@-@-B-BXXXXr!   
input_dictDict[str, Any]c                    d t          j        |           D             fd|                                D             } | di |S )Nc                    g | ]	}|j         
S r    )name)r2   fs     r"   
<listcomp>z0DataSourceMetadata.from_dict.<locals>.<listcomp>9   s    ;;;qAF;;;r!   c                $    i | ]\  }}|v 	||S r    r    )r2   kvsupported_fieldss      r"   r5   z0DataSourceMetadata.from_dict.<locals>.<dictcomp>:   s*    MMMAq<L7L7L17L7L7Lr!   r    )dcfieldsr7   )clsr;   argsrE   s      @r"   	from_dictzDataSourceMetadata.from_dict6   sX     <;BIcNN;;;MMMM!1!1!3!3MMMs{{T{{r!   r;   r<   )r   r   r   r   r(   __annotations__r)   r*   r+   r,   r-   r.   r:   classmethodrJ   r    r!   r"   r&   r&   '   s         JJC!G!!!!/3N3333"&L&&&&#'M''''$(N((((7;;;;;Y Y Y    [  r!   r&   c                  V    e Zd ZU dZded<   ded<   ddZddZd Zedd            Z	dS )CoordinatesMetadataz?Metadata fields that pertain to the coordinates of the element.Optional[Points]pointsOptional[CoordinateSystem]systemc                P    ||||t          d          || _        || _        d S )NzNCoordinates points should not exist without coordinates system and vice versa.)
ValueErrorrQ   rS   )r9   rQ   rS   s      r"   __init__zCoordinatesMetadata.__init__F   s>    Nv1v7Ifn`   r!   otherr   returnboolc                    t          |t                    sdS t          | j        |j        k    | j        |j        k    g          S NF)
isinstancerO   allrQ   rS   r9   rW   s     r"   __eq__zCoordinatesMetadata.__eq__O   sI    %!455 	5,,
 
 	
r!   c                    | j         | j        d nt          | j        j        j                  | j        d n| j        j        | j        d n| j        j        dS )N)rQ   rS   layout_widthlayout_height)rQ   rS   str	__class__r   widthheightr8   s    r"   r:   zCoordinatesMetadata.to_dictY   sZ    k"k1dds4;;P;Y7Z7Z$(K$7DDT[=N%)[%8TTdk>P	
 
 	
r!   r;   r<   c                d   dd}|                     d          }| ||          nd }|                     d          }|                     d          }|                     d	          }|d n8|d
k    rt                      n$|!||t          v rt          |         ||          nd } | ||          S )Nsequence_of_sequencesSequence[Sequence[float]]rX   r   c           	     >   g }| D ]}t          |t                    r6|                    t          t          t          |                               Mt          |t
                    r(|                    t          t          |                     t          |          S r1   )r\   listappendr   r   tuple)rh   rQ   seqs      r"   convert_to_pointsz8CoordinatesMetadata.from_dict.<locals>.convert_to_pointsd   s    "$F, 4 4c4(( 4MM$ueCjj"9"9::::U++ 4MM$uc"2"2333== r!   rQ   rS   ra   rb   r   rQ   rS   )rh   ri   rX   r   )getr   r   )	rH   r;   ro   input_pointsrQ   system_namere   rf   rS   s	            r"   rJ   zCoordinatesMetadata.from_dicta   s    	! 	! 	! 	! "~~h//4@4L""<000RV !nnX..~..00 " D 888 *+++ !&#@@@	 /{;E6JJJ  	 s&0000r!   N)rQ   rP   rS   rR   )rW   r   rX   rY   rK   )
r   r   r   r   rL   rV   r_   r:   rM   rJ   r    r!   r"   rO   rO   ?   s         II&&&&   
 
 
 

 
 
 !1 !1 !1 [!1 !1 !1r!   rO   c                  2    e Zd ZU dZded<   ded<   ded<   dS )RegexMetadataz=Metadata that is extracted from a document element via regex.rc   textintstartendNr   r   r   r   rL   r    r!   r"   ru   ru      s1         GGIIIJJJHHHHHr!   ru   c                  2    e Zd ZU dZded<   ded<   ded<   dS )	Linkz#Metadata related to extracted linksr'   rv   rc   r(   rw   start_indexNrz   r    r!   r"   r|   r|      s9         --HHHr!   r|   c                  6    e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded <   d!ed"<   d#ed$<   ded%<   ded&<   ded'<   ded(<   ded)<   ded*<    edg          Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dEdFd/ZdGd3ZdHd6Z	dI fd:Z
edJd=            ZedKd?            ZedKd@            ZdLdAZdMdBZedNdD            Z xZS )OElementMetadataz>Fully-dynamic replacement for dataclass-based ElementMetadata.r'   attached_to_filenameOptional[int]category_depthOptional[CoordinatesMetadata]coordinatesOptional[DataSourceMetadata]data_sourceOptional[float]detection_class_probdetection_originOptional[List[str]]emphasized_text_contentsemphasized_text_tagsfile_directoryfilenamefiletype
image_pathimage_base64image_mime_typeheader_footer_typeOptional[bool]is_continuation	languageslast_modified
link_texts	link_urlsOptional[List[Link]]links	page_namepage_number'Optional[str | uuid.UUID | NoID | UUID]	parent_id(Optional[Dict[str, List[RegexMetadata]]]regex_metadatasection	sent_fromsent_tosubjecttext_as_htmlr(   NOptional[str | pathlib.Path]rX   Nonec                8   || _         || _        || _        || _        || _        || _        || _        t          |	t          j	                  rt          |	          n|	}	t          j                            |	pd          \  }}|p|pd | _        |pd | _        |
| _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _         || _!        || _"        d S )N )#r   r   r   r   r   r   r   r\   pathlibPathrc   ospathsplitr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   )r9   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   directory_path	file_names                                  r"   rV   zElementMetadata.__init__   s)   > %9!,&&$8!(@%$8! %/x$F$FT3x===H$&GMM(.b$A$A!	 -FF$!)T "4$."*$"
"&","(r!   rW   objectrY   c                P    t          |t                    sdS | j        |j        k    S )zImplments equivalence, like meta == other_meta.

        All fields at all levels must match. Unpopulated fields are not considered except when
        populated in one and not the other.
        F)r\   r   rG   r^   s     r"   r_   zElementMetadata.__eq__  s*     %11 	5{el**r!   	attr_namerc   c                >    || j         v rdS t          d| d          )z)Only called when attribute doesn't exist.Nz+'ElementMetadata' object has no attribute '')_known_field_namesAttributeError)r9   r   s     r"   __getattr__zElementMetadata.__getattr__"  s/    ///4W9WWWXXXr!   _ElementMetadata__name_ElementMetadata__valuer   c                    ||| j         v rt          | |           d S t          s|| j        v rd S t	                                          ||           d S r1   )r6   delattrr   DEBUG_FIELD_NAMESsuper__setattr__)r9   r   r   rd   s      r"   r   zElementMetadata.__setattr__(  sd    ?&&f%%%F2 	vAW7W7WFFG,,,,,r!   	meta_dictr<   c                8   t          j        |          }t                      }|                                D ]b\  }}|dk    r t                              |          |_        +|dk    r t                              |          |_        Qt          |||           c|S )zConstruct from a metadata-dict.

        This would generally be a dict formed using the `.to_dict()` method and stored as JSON
        before "rehydrating" it using this method.
        r   r   )
copydeepcopyr   r7   rO   rJ   r   r&   r   setattr)rH   r   r9   
field_namefield_values        r"   rJ   zElementMetadata.from_dict2  s     M),,	  '0'8'8 	7 	7#J]**#6#@#@#M#M  },,#5#?#?#L#L  j+6666r!   MappingProxyType[str, Any]c                h     t           fd j                                        D                       S )zPopulated metadata fields in this object as a read-only dict.

        Basically `self.__dict__` but it needs a little filtering to remove entries like
        "_known_field_names". Note this is a *snapshot* and will not reflect later changes.
        c                X    i | ]&\  }}|                     d           s|j        v#||'S )_)
startswithr   )r2   r   r   r9   s      r"   r5   z*ElementMetadata.fields.<locals>.<dictcomp>N  sP       +J!,,S11 7AH^6^6^ K6^6^6^r!   )r   r6   r7   r8   s   `r"   rG   zElementMetadata.fieldsF  sK         /3}/B/B/D/D  
 
 	
r!   c                z      j         t           fd j                                        D                       S )a,  Populated non-ad-hoc fields in this object as a read-only dict.

        Only fields declared at the top of this class are included. Ad-hoc fields added to this
        instance by assignment are not. Note this is a *snapshot* and will not reflect changes that
        occur after this call.
        c                6    i | ]\  }}|v 	|j         v||S r    )r   )r2   r   r   known_field_namesr9   s      r"   r5   z0ElementMetadata.known_fields.<locals>.<dictcomp>_  sB       +J"333
$J`8`8` K8`8`8`r!   )r   r   r6   r7   )r9   r   s   `@r"   known_fieldszElementMetadata.known_fieldsU  sZ     !3    /3}/B/B/D/D  
 
 	
r!   c                Z   t          j        t          | j                            }| j        D ]}|                    |d           d |                                D             }| j        | j                                        |d<   | j	        | j	                                        |d<   |S )zConvert this metadata to dict form, suitable for JSON serialization.

        The returned dict is "sparse" in that no key-value pair appears for a field with value
        `None`.
        Nc                2    i | ]\  }}|g k    |i k    ||S r    r    )r2   r   r4   s      r"   r5   z+ElementMetadata.to_dict.<locals>.<dictcomp>s  s7     %
 %
 %
!
E{{u{{ *{{r!   r   r   )
r   r   dictrG   r   popr7   r   r:   r   )r9   r   r   s      r"   r:   zElementMetadata.to_dictf  s     M$t{"3"344	 0 	, 	,JMM*d++++%
 %
%.__%6%6%
 %
 %
	 ''+'7'?'?'A'AIm$''+'7'?'?'A'AIm$r!   c                    t          |t                    st          d          |j                                        D ]\  }}t          | ||           dS )ak  Update self with all fields present in `other`.

        Semantics are like those of `dict.update()`.

        - fields present in both `self` and `other` will be updated to the value in `other`.
        - fields present in `other` but not `self` will be added to `self`.
        - fields present in `self` but not `other` are unchanged.
        - `other` is unchanged.
        - both ad-hoc and known fields participate in update with the same semantics.

        Note that fields listed in DEBUG_FIELD_NAMES are skipped in this process. Those can only be
        updated by direct assignment to the instance.
        z@argument to '.update()' must be an instance of 'ElementMetadata'N)r\   r   rU   rG   r7   r   )r9   rW   r   r   s       r"   updatezElementMetadata.update  sg     %11 	a_```',|'9'9';'; 	3 	3#JD*k2222	3 	3r!   FrozenSet[str]c                *    t          | j                  S )aK  field-names for non-user-defined fields, available on all ElementMetadata instances.

        Note that the first call to this lazyproperty adds a `"_known_field_names"` item to the
        `__dict__` of this instance, so this be called *before* iterating through `self.__dict__`
        to avoid a mid-iteration mutation.
        )	frozensetrL   r8   s    r"   r   z"ElementMetadata._known_field_names  s     -...r!   )NNNNNNNNNNNNNNNNNNNNNNNNNNNN):r   r'   r   r   r   r   r   r   r   r   r   r   r   r   r   r'   r   r   r   r'   r   r'   r   r'   r   r   r   r   r   r'   r   r   r   r   r   r   r   r'   r   r   r   r   r   r   r   r'   r   r   r   r   r   r'   r   r'   r(   r'   rX   r   rW   r   rX   rY   )r   rc   rX   r   )r   rc   r   r   rX   r   )r   r<   rX   r   )rX   r   rX   r<   )rW   r   rX   r   )rX   r   )r   r   r   r   rL   r   r   rV   r_   r   r   rM   rJ   propertyrG   r   r:   r   r   r   __classcell__rd   s   @r"   r   r      s        HH ('''!!!!....----))))####1111----!!!!""""%%%%####""""    ####""""6666<<<< #"""      
 "	#5"677 /3(,5948048<48(,15"&,0$(*.)-'+*.)-&*#'%)=ACG!%)-'+!%&*!;B B B B BH+ + + +Y Y Y Y- - - - - -    [& 
 
 
 X
 
 
 
 X
    63 3 3 3( 	/ 	/ 	/ \	/ 	/ 	/ 	/ 	/r!   r   c                  H    e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 e	dd	            Z
d
S )ConsolidationStrategya  Methods by which a metadata field can be consolidated across a collection of elements.

    These are assigned to `ElementMetadata` field-names immediately below. Metadata consolidation is
    part of the chunking process and may arise elsewhere as well.
    dropfirstLIST_CONCATENATElist_uniqueregexrX    Dict[str, ConsolidationStrategy]c                   i d| j         d| j        d| j        d| j         d| j        d| j        d| j        d| j        d	| j         d
| j         d| j         d| j        d| j        d| j        d| j        d| j        d| j        | j         | j        | j        | j        | j        | j         | j         | j        | j        | j         | j         | j         | j         | j        | j         dS )a7  Mapping from ElementMetadata field-name to its consolidation strategy.

        Note that only _TextSection objects ("pre-chunks" containing only `Text` elements that are
        not `Table`) have their metadata consolidated, so these strategies are only applicable for
        non-Table Text elements.
        r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   max_charactersr   r   r   r   r   r   r   r   r   r(   )FIRSTDROPr   LIST_UNIQUEREGEXrH   s    r"   field_consolidation_strategiesz4ConsolidationStrategy.field_consolidation_strategies  sW   !
"CI!
ch!
 38!
 39	!

 #CH!
 !
 '(<!
 #C$8!
 ci!
 	!
 	!
 !#(!
 #(!
 CH!
 sx!
  sx!!
" #!
$ !Y.-X!h9!iyyyH9A!
 !
 !
 !	
r!   N)rX   r   )r   r   r   r   r   r   r   r   r   rM   r   r    r!   r"   r   r     sg          DHEO)_K_Eb(
 (
 (
 [(
 (
 (
r!   r   _PrX   DCallable[[Callable[_P, List[Element]]], Callable[_P, List[Element]]]c                     dd} | S )a  Post-process element-metadata for this document.

    This decorator adds a post-processing step to a document partitioner. It adds documentation for
    `metadata_filename` and `include_metadata` parameters if not present. Also adds regex-metadata
    when `regex_metadata` keyword-argument is provided and changes the element-id to a UUID when
    `unique_element_ids` argument is provided and True.
    funcCallable[_P, List[Element]]rX   c                      j         rNd j        j        v rd j         vr xj         dz  c_         d j        j        v rd j         vr xj         dz  c_         t          j                   d fd            }|S )Nmetadata_filenamezT
Metadata Parameters:
	metadata_filename:
		The filename to use in element metadata.include_metadataz
	include_metadata:
		Determines whether or not metadata is included in the metadata
                    attribute on the elements in the output.rI   _P.argskwargs	_P.kwargsrX   List[Element]c            
         	| i |}t          j        	          }t          di t          t          |j        |                     |}|j                                        D ](}|j        |vr|j        |j        ur|j        ||j        <   )|	                    di           }|rt          ||          }|	                    dd          }|r|D ]}|                                 |S )Nr   unique_element_idsFr    )inspect	signaturer   zip
parametersvaluesr?   defaultemptyrq   _add_regex_metadata
id_to_uuid)
rI   r   elementssigparamsparamr   r   elementr   s
            r"   wrapperz4process_metadata.<locals>.decorator.<locals>.wrapper  s   tT,V,,H#D))C%)%V%VDS^T1J1J,K,K%Vv%V%VF..00 7 7:V++U[0P0P).F5:&17<Lb1Q1QN  I.xHH'-zz2F'N'N! )' ) )G&&((((Or!   )rI   r   r   r   rX   r   )r   __code__co_varnames	functoolswraps)r   r  s   ` r"   	decoratorz#process_metadata.<locals>.decorator  s    < 	#t}'@@@'t|;;G
 #dm&???&dl::@ 
			 	 	 	 	 
		* r!   )r   r   rX   r   r    )r  s    r"   process_metadatar    s    * * * *X r!   r  r   r   Dict[str, str]c           	     r   | D ]}t          |t                    ri }|                                D ]y\  }}g }t          j        ||j                  D ]?}|                                \  }}	|                    |j        ||	         ||	d           @t          |          dk    r|||<   z||j	        _
        | S )zAdds metadata based on a user provided regular expression.

    The additional metadata will be added to the regex_metadata attrbuted in the element metadata.
    )rv   rx   ry   r   )r\   Textr7   refinditerrv   spanrl   lenmetadatar   )
r  r   r
  _regex_metadatar   patternresultsresultrx   ry   s
             r"   r  r    s      > >gt$$ 	>@BO'5';';'='= : :#
G/1 k'7<@@  F!'JE3NN$+Ls$;%*#&     w<<!##29OJ/.=G+Or!   c                      e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ dZ!d Z"d!Z#d"Z$d#Z%e&d$             Z'd%S )&ElementTypeTitler  UncategorizedTextNarrativeTextBulletedTextAbstract	ThreadingFormz
Field-NameValuer|   CompositeElementImagePictureFigureCaptionFigureCaptionr	   ListItemz	List-itemChecked	UncheckedAddressEmailAddress	PageBreakFormulaTableHeaderHeadlineSubheadlinezPage-headerzSection-headerFooterFootnotezPage-footerc                :      fdt                     D             S )z
        Convert class attributes to a dictionary.

        Returns:
            dict: A dictionary where keys are attribute names and values are attribute values.
        c                    i | ]E}t          t          |                    |                    d           4|t          |          FS )__)callablegetattrr   )r2   attrrH   s     r"   r5   z'ElementType.to_dict.<locals>.<dictcomp>j  sc     
 
 
GC..//
 9=8M8M
'#t$$
 
 
r!   )dirr   s   `r"   r:   zElementType.to_dictb  s4    
 
 
 
C
 
 
 	
r!   N)(r   r   r   TITLETEXTUNCATEGORIZED_TEXTNARRATIVE_TEXTBULLETED_TEXTABSTRACT	THREADINGFORM
FIELD_NAMEVALUELINKCOMPOSITE_ELEMENTIMAGEPICTUREFIGURE_CAPTIONFIGURECAPTIONLIST	LIST_ITEMLIST_ITEM_OTHERCHECKED	UNCHECKEDADDRESSEMAIL_ADDRESS
PAGE_BREAKFORMULATABLEHEADERHEADLINESUB_HEADLINEPAGE_HEADERSECTION_HEADERFOOTERFOOTNOTEPAGE_FOOTERrM   r:   r    r!   r"   r  r  =  s        ED,$N"MHIDJED*EG$NFGDI!OGIG"MJGEFH LK%NFHK
 
 [
 
 
r!   r  c                  \    e Zd ZU dZded<    e            ddddfddZd ZddZ	 dddZ	dS )Elementz2An element is a section of a page in the document.rc   rv   N
element_id!Union[str, uuid.UUID, NoID, UUID]r   )Optional[Tuple[Tuple[float, float], ...]]coordinate_systemrR   r  Optional[ElementMetadata]r   r'   c                    || _         |t                      n|| _        ||t          ||          | j        _        || j        _        t          | d          r| j        nd| _        d S )Nrp   rv   r   )idr   r  rO   r   r   hasattrrv   )r9   rg  r   rj  r  r   s         r"   rV   zElement.__init__v  s|     6@-5-=)))8"&7&C(;"+<) ) )DM% *:& ")v!6!6>DIIB			r!   c                P    t          t          j                              | _        d S r1   )rc   uuiduuid4rm  r8   s    r"   r  zElement.id_to_uuid  s    djll##r!   rX   r<   c                R    d | j         | j        | j                                        dS )N)typerg  rv   r  )rm  rv   r  r:   r8   s    r"   r:   zElement.to_dict  s/    'I--//	
 
 	
r!   T
new_systemr   in_placerY   rP   c                     j         j        " j         j        j         j         j        j        dS t	           fd j         j        j        D                       }|r"| j         j        _         j         j        _        |S )zConverts the element location coordinates to a new coordinate system.

        If inplace is true, changes the coordinates in place and updates the coordinate system.
        Nc              3  l   K   | ].\  }}j         j        j                            ||           V  /dS ))rt  xyN)r  r   rS   !convert_coordinates_to_new_system)r2   rx  ry  rt  r9   s      r"   	<genexpr>z<Element.convert_coordinates_to_new_system.<locals>.<genexpr>  sd        
  
 1 M%,NN% O   
  
  
  
  
  
r!   )r  r   rS   rQ   rm   )r9   rt  ru  new_coordinatess   ``  r"   rz  z)Element.convert_coordinates_to_new_system  s     M%-}(/7}(/74  
  
  
  
  
 18 
  
  
 
 
  	:/>DM%,/9DM%,r!   )
rg  rh  r   ri  rj  rR   r  rk  r   r'   r   )T)rt  r   ru  rY   rX   rP   )
r   r   r   r   rL   r   rV   r  r:   rz  r    r!   r"   rf  rf  q  s         <<III 9=AE8<.2*.? ? ? ? ?&$ $ $
 
 
 
 >B      r!   rf  c                  V     e Zd ZdZ e            dddddfd fdZddZd fdZ xZS )CheckBoxzyA checkbox with an attribute indicating whether its checked or not.

    Primarily used in documents that are forms.
    NFrg  rh  r   ri  rj  rR   checkedrY   r  rk  r   r'   c                    |r|nt                      }t                                          |||||           || _        d S )N)rg  r   rj  r  r   )r   r   rV   r  )r9   rg  r   rj  r  r  r   rd   s          r"   rV   zCheckBox.__init__  sV      (>88_->->!#/- 	 	
 	
 	
 %r!   rW   r   rX   c                    t          |t                    sdS t          | j        |j        k    | j        j        |j        j        k    f          S r[   )r\   r~  r]   r  r  r   r^   s     r"   r_   zCheckBox.__eq__  sN    %** 	5-)U^-GG
 
 	
r!   r<   c                z    t                                                      }d|d<   | j        |d<   | j        |d<   |S )-Serialize to JSON-compatible (str keys) dict.r~  rs  r  rg  )r   r:   r  rm  r9   outrd   s     r"   r:   zCheckBox.to_dict  s;    ggoo FI GL
r!   )rg  rh  r   ri  rj  rR   r  rY   r  rk  r   r'   r   r   )	r   r   r   r   r   rV   r_   r:   r   r   s   @r"   r~  r~    s          9=AE8<.2*.% % % % % % %&
 
 
 
         r!   r~  c                  h     e Zd ZdZdZ e            dddddfd fdZddZd Zd fdZ	d dZ
 xZS )!r  z:Base element for capturing free text from within document.r!  Nrv   rc   rg  rh  r   ri  rj  rR   r  rk  r   r'   
embeddingsOptional[List[float]]c                   |r|nt                      }|| _        || _        t          |t                    rAt          j        |                                                                          d d         }n5t          |t                    r t          t          j                              }t                                          |||||           d S )N    )rg  r  r   rj  r   )r   rv   r  r\   r   hashlibsha256encode	hexdigestr$   rc   rp  rq  r   rV   )	r9   rv   rg  r   rj  r  r   r  rd   s	           r"   rV   zText.__init__  s      (>88_->->	1;j$'' 	+ 66@@BB3B3GJJ
D)) 	+TZ\\**J!#/- 	 	
 	
 	
 	
 	
r!   rW   r   c                    t          |t                    sdS t          | j        |j        k    | j        j        |j        j        k    | j        |j        k    | j        |j        k    f          S r[   )r\   r  r]   rv   r  r   categoryr  r^   s     r"   r_   zText.__eq__   sg    %&& 	5	UZ')U^-GG/5#33	
 
 	
r!   c                    | j         S r1   )rv   r8   s    r"   __str__zText.__str__  s
    yr!   rX   r<   c                    t                                                      }| j        |d<   | j        |d<   | j        |d<   | j        r
| j        |d<   |S )r  rg  rs  rv   r  )r   r:   rm  r  rv   r  r  s     r"   r:   zText.to_dict  sS    ggoo GLmFiF? 	0 $C
r!   cleanersCallable[[str], str]c                    | j         }|D ]} ||          }t          |t                    st          d          || _         dS )zApplies a cleaning brick to the text element.

        The function that's passed in should take a string as input and produce a string as
        output.
        z%Cleaner produced a non-string output.N)rv   r\   rc   rU   )r9   r  cleaned_textcleaners       r"   applyz
Text.apply  sX     y 	1 	1G"7<00LL,,, 	FDEEE 			r!   )rv   rc   rg  rh  r   ri  rj  rR   r  rk  r   r'   r  r  )rW   r   r   )r  r  )r   r   r   r   r  r   rV   r_   r  r:   r  r   r   s   @r"   r  r    s        DD"H
 9=AE8<.2*.,0
 
 
 
 
 
 
:

 

 

 

       ! ! ! ! ! ! ! !r!   r  c                      e Zd ZdZd ZdS )r4  z,An element containing formulas in a documentNr   r   r   r   r  r    r!   r"   r4  r4  )  s        22HHHr!   r4  c                      e Zd ZdZd ZdS )r(  z:A section of text consisting of a combination of elements.Nr  r    r!   r"   r(  r(  /  s        DD!HHHr!   r(  c                      e Zd ZdZd ZdS )r+  z>An element for capturing text associated with figure captions.Nr  r    r!   r"   r+  r+  5  s        HHHHHr!   r+  c                      e Zd ZdZd ZdS )r"  zNarrativeText is an element consisting of multiple, well-formulated sentences. This
    excludes elements such titles, headers, footers, and captions.Nr  r    r!   r"   r"  r"  ;  s!        F F HHHr!   r"  c                      e Zd ZdZd ZdS )r.  z;ListItem is a NarrativeText element that is part of a list.Nr  r    r!   r"   r.  r.  B  s        EEHHHr!   r.  c                      e Zd ZdZd ZdS )r   z$A text element for capturing titles.Nr  r    r!   r"   r   r   H  s        ..HHHr!   r   c                      e Zd ZdZd ZdS )r1  z'A text element for capturing addresses.Nr  r    r!   r"   r1  r1  N  s        11HHHr!   r1  c                      e Zd ZdZd ZdS )r2  z&A text element for capturing addressesNr  r    r!   r"   r2  r2  T  s        00HHHr!   r2  c                       e Zd ZdZej        ZdS )r)  z,A text element for capturing image metadata.N)r   r   r   r   r  rN  r  r    r!   r"   r)  r)  Z  s        66 HHHr!   r)  c                      e Zd ZdZd ZdS )r3  z%An element for capturing page breaks.Nr  r    r!   r"   r3  r3  `  s        //HHHr!   r3  c                      e Zd ZdZd ZdS )r5  z An element for capturing tables.Nr  r    r!   r"   r5  r5  f  s        **HHHr!   r5  c                      e Zd ZdZdZdS )
TableChunkz*An element for capturing chunks of tables.r5  Nr  r    r!   r"   r  r  l  s        44HHHr!   r  c                      e Zd ZdZd ZdS )r6  z*An element for capturing document headers.Nr  r    r!   r"   r6  r6  r          44HHHr!   r6  c                      e Zd ZdZd ZdS )r9  z*An element for capturing document footers.Nr  r    r!   r"   r9  r9  x  r  r!   r9  r<   TYPE_TO_TEXT_ELEMENT_MAP)rX   r   )r  r   r   r  rX   r   )l
__future__r   abcr   dataclassesrF   enumr  r  r   r   r   r  rp  typesr   typingr   r   r   r   r	   r
   r   r   r   r   typing_extensionsr   r   r   "unstructured.documents.coordinatesr   r   r   &unstructured.partition.utils.constantsr   unstructured.utilsr   floatr   rL   r   ABCr   r$   	dataclassr&   rO   ru   r|   r   Enumr   r   r  r  r  rf  r~  r  r4  r(  r+  r"  r.  r   r1  r2  r)  r3  r5  r  r6  r9  rB  ra  r^  r_  rJ  rD  rM  rC  rE  rG  rH  rI  rK  rL  rT  rF  rU  r]  r`  rb  rd  rc  rP  rR  rN  rQ  rO  r\  rX  rY  r[  rZ  r  r    r!   r"   <module>r     s   " " " " " " " 



             				  				  " " " " " " _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ = = = = = = = = = =         
 W V V V V V + + + + + +& & & & &%*% % % % %? ? ? ? ?37 ? ? ?@ @ @ @ @37 @ @ @        . C1 C1 C1 C1 C1 C1 C1 C1L    I       9   I/ I/ I/ I/ I/ I/ I/ I/X?
 ?
 ?
 ?
 ?
DI ?
 ?
 ?
D Yt__5 5 5 5t &(    <1
 1
 1
 1
 1
 1
 1
 1
h> > > > >cg > > >B) ) ) ) )w ) ) )XH! H! H! H! H!7 H! H! H!V    d   " " " " "t " " "    D       D       t       D       d       4   ! ! ! ! !D ! ! !           D              T       T   ",u",", %", e	",
 E", "D", !4", m", ", -", =", m", }", m",  8!"," x#",$ %",& VV&uu|IC", ",  " " " " " "r!   