
    j>`                       U d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
mZ ej        dk     rd dlmZ nd dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZmZmZmZmZmZmZm Z  d d
l!m"Z"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- g dZ.de/d<   ddgZ0de/d<   g dZ1de/d<   g dZ2de/d<   g dZ3de/d<   dgZ4de/d<   dgZ5de/d<   e5e4z   Z6de/d<   dd gZ7de/d!<   d"d#gZ8de/d$<    G d% d&          Z9 G d' d(e9e          Z: G d) d*e9e          Z; G d+ d,e9e          Z< G d- d.e9e           Z= G d/ d0e9e          Z> G d1 d2e9e          Z? G d3 d4e9e          Z@dnd8ZAdnd9ZBdnd:ZC G d; d<e#          ZDdod@ZEdpdBZFdqdDZGdrdFZHdsdGZIg g fdtdQZJdudRZKdvdSZLdwdTZMdxdydWZNdudXZOdzdZZPd{d|d]ZQ	 d{d}d_ZRd~dcZSduddZTddgZUddiZVddkZWdxddmZXdS )    )annotationsN)AnyCallableDictListOptionalSequenceTuplecast)      )Final)etree)clean_bulletsreplace_unicode_quotes)Page)
AddressElementElementMetadataEmailAddressLinkListItemNarrativeTextTableTextTitle)VALID_PARSERSXMLDocument)logger)is_bulleted_textis_email_addressis_possible_narrative_textis_possible_titleis_us_city_state_zip)htmlify_matrix_of_cell_texts)patdspanfontzFinal[List[str]]	TEXT_TAGSliddLIST_ITEM_TAGS)uloldl	LIST_TAGS)h1h2h3h4h5h6HEADING_TAGS)tabletbodyr(   tr
TABLE_TAGSbrTEXTBREAK_TAGShrPAGEBREAK_TAGS
EMPTY_TAGSheaderfooterHEADER_OR_FOOTER_TAGSdivpreSECTION_TAGSc                  2     e Zd ZdZddg g ddd fdZ xZS )	TagsMixinz4Mixin that allows a class to retain tag information.N )tagancestortagslinksemphasized_textstext_as_htmlargsr   rL   Optional[str]rM   Sequence[str]rN   Sequence[Link]rO   Sequence[Dict[str, str]]rP   kwargsc                   |t          d          || _        || _        || _        || _        || _         t                      j        |i | d S )Nz(tag argument must be passed and not None)	TypeErrorrL   rM   rN   rO   rP   super__init__)	selfrL   rM   rN   rO   rP   rQ   rV   	__class__s	           e/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/documents/html.pyrZ   zTagsMixin.__init__:   sa     ;FGGGDH(
 0($)&)))))    )rQ   r   rL   rR   rM   rS   rN   rT   rO   rU   rP   rR   rV   r   )__name__
__module____qualname____doc__rZ   __classcell__r\   s   @r]   rJ   rJ   7   s^        >>
 "&( "57&** * * * * * * * * * * *r^   rJ   c                      e Zd ZdZdS )HTMLTextzText with tag information.Nr_   r`   ra   rb   rK   r^   r]   rf   rf   O   s        $$$$r^   rf   c                      e Zd ZdZdS )HTMLAddresszAddress with tag information.Nrg   rK   r^   r]   ri   ri   S   s        ''''r^   ri   c                      e Zd ZdZdS )HTMLEmailAddressz!EmailAddress with tag informationNrg   rK   r^   r]   rk   rk   W   s        ++++r^   rk   c                      e Zd ZdZdS )	HTMLTitlezTitle with tag information.Nrg   rK   r^   r]   rm   rm   [   s        %%%%r^   rm   c                      e Zd ZdZdS )HTMLNarrativeText#NarrativeText with tag information.Nrg   rK   r^   r]   ro   ro   _           ----r^   ro   c                      e Zd ZdZdS )HTMLListItemrp   Nrg   rK   r^   r]   rs   rs   c   rq   r^   rs   c                      e Zd ZdZdS )	HTMLTablez"NarrativeText with tag informationNrg   rK   r^   r]   ru   ru   g   s        ,,,,r^   ru   elementreturnboolc                >    t          d | j        D                       S )zChecks to see if an element has ancestors that are table elements. If so, we consider
    it to be a table element rather than a section of narrative text.c              3  (   K   | ]}|t           v V  d S N)r=   .0ancestors     r]   	<genexpr>z%has_table_ancestor.<locals>.<genexpr>n   s'      KK(x:%KKKKKKr^   anyrM   rv   s    r]   has_table_ancestorr   k   s$     KKg6JKKKKKKr^   c                F    t          d | j        D                       rdS dS )zIChecks to see if an element is contained within a header or a footer tag.c              3  (   K   | ]}|t           v V  d S r{   )rE   r|   s     r]   r   z&in_header_or_footer.<locals>.<genexpr>s   s(      
R
R8,,
R
R
R
R
R
Rr^   TFr   r   s    r]   in_header_or_footerr   q   s.    

R
RW=Q
R
R
RRR t5r^   c                    | j         t          v S )z&Checks to see if an element is a table)rL   r=   r   s    r]   is_tabler   x   s    ;*$$r^   c                  D     e Zd ZdZ	 	 	 dd fd
ZddZ	 	 	 dddZ xZS )HTMLDocumentzuClass for handling HTML documents. Uses rules based parsing to identify sections
    of interest within the document.NT
stylesheetrR   parserr   assemble_articlesrx   c                \    || _         t                                          ||           d S )N)r   r   )assembled_articlesrY   rZ   )r[   r   r   r   r\   s       r]   rZ   zHTMLDocument.__init__   s0     #4Jv>>>>>r^   rw   
List[Page]c           
        | j         r| j         S t          j        d           g }t          j        | j        ddgd           t          | j                  }t          || j                  }d}t          |          }|D ]}d	}|
                                D ]}||v rt          |          rt          |          r@t          |          }	|	D ]-}
t          |
          }||j                            |           .n+t          |          }||j                            |           t#          |                                          }t'          |          r_t)          |          }t+          |          }|j        J t/          |j        dd	d||          }||j                            |           !t1          |          rLt3          |          }|j                            |           t#          |                                          }|t7          |          r@t9          |          \  }}|*|j                            |           t;          ||          }|j        t>          v r[tA          |          }||j                            |           |s|j        dk    r!t#          |                                          }4|j        tB          v rBtE          |j                  dk    r*|                    |           |dz  }t          |          }tE          |j                  dk    r*|                    |           |dz  }t          |          }|S )a  Parse HTML elements into pages.

        A *page* is a subsequence of the document-elements parsed from the HTML document
        corresponding to a distinct topic. At present pagination is determined by `<article>`
        elements that surround something like a blog-post. Each article becomes its own page. If no
        article tags are present in the HTML the entire HTML document is a single page.
        zReading document ...scriptstyleF)	with_tail)r   r   numberrK   NrF   )depthrN   rO   r:      )#_pagesr   infor   strip_elementsdocument_tree
_find_main_find_articlesr   r   iter_is_text_tag_has_break_tags_unfurl_break_tags
_parse_tagelementsappendtupleiterdescendants_is_container_with_text_get_links_from_tag_get_emphasized_texts_from_tagtext_text_to_element_is_bulleted_table_bulleted_text_from_tableextendis_list_item_tag_process_list_item_get_bullet_descendantsrL   r=    _parse_HTMLTable_from_table_elemrA   len)r[   pagesrootarticlespage_numberpagearticledescendanttag_elemstag_elemflattened_elems	_tag_elemrv   rN   rO   bulleted_textnext_elements                   r]   _parse_pages_from_element_treez+HTMLDocument._parse_pages_from_element_tree   s    ; 	;*+++T/(G1DPUVVVV$,--!$$:QRRR;''' C	0 C	0G>@#LLNN <4 <4222 )) 64&x00 
:*<X*F*F)8 > >I&0&;&;G&2 $ 4 4W = = => #-X"6"6". M00999*/0H0H0J0J*K*K'',X66 (4/99E'Eh'O'O$#=444. #)9  G *,,W555'11 4$=h$G$GMM((777*/0H0H0J0J*K*K''%h// 4,>x,H,H)G\*,,W555.E$(/ /+
 \Z//>xHHG*,,W555 P(,'"9"9.3H4L4L4N4N.O.O+\^33DM8J8JQ8N8NLL&&&1$K{333D4=!!A%%T"""q ;///r^   Fskip_headers_and_footers
skip_tableinplacec           
        g }|r|                     t                     |r|                     t                     g }d}t          |          }| j        D ]}g }	|j        D ]t          t                    s't          d| j	         dt                               t          fd|D                       s|	                                |r!dt          j                  j        fz   v r n|	r1|	|_        |                     |           |dz  }t          |          }|r|| _        d| _        | S | j	                            |          }
t          |
t$                    st          d	| j	        j                   |
S )
a  Filters elements returning new instance based on the criteria specified.

        Note that the number of pages can change in the case that all elements on a page are
        filtered out.

        Parameters
        ----------
        skip_table:
            If True, skips table element
        skip_headers_and_footers:
            If True, ignores any content that is within <header> or <footer> tags
        inplace:
            If True, document is modified in place and returned.
        r   r   zelements of class zb should be of type HTMLTitle HTMLNarrativeText, or HTMLListItem but object has an element of type c              3  .   K   | ]} |          V  d S r{   rK   )r}   excluderels     r]   r   z2HTMLDocument.doc_after_cleaners.<locals>.<genexpr>  s+      BBH88B<<BBBBBBr^   rD   r   NzUnexpected class: )r   r   r   r   r   r   
isinstancerJ   
ValueErrorr\   typer   r   rM   rL   r   	_elements
from_pagesr   r_   )r[   r   r   r   	excludersr   r   new_pager   r   outr   s              @r]   doc_after_cleanerszHTMLDocument.doc_after_cleaners   s   ( 8:	# 	20111 	'X&&&{+++J 	4 	4D&(Hm 
 
!"i00 $DT^ D D9=bD D  
 BBBB	BBBBB (OOB'''+ E"/<R<RVXV\U^<^0^0^E 4$,!X&&&q {333 	DK!DNK.++E22Cc<00  !B)@BB   Jr^   )NNT)r   rR   r   r   r   rx   )rw   r   )FFF)r   rx   r   rx   r   rx   rw   r   )r_   r`   ra   rb   rZ   r   r   rc   rd   s   @r]   r   r      s        ( (
 %) $"&	? ? ? ? ? ? ?W W W Wv */ 	< < < < < < < < <r^   r   r   etree._Element
List[Link]c                   g }|                      d          }|r|                    | j        |dd           |                                 D ]7}|                     d          }|r|                    |j        |dd           8|S )z'Hyperlinks within and below `tag_elem`.href)r   urlstart_index)getr   r   r   )r   rN   r   rL   s       r]   r   r   %  s    E<<D NhmDLLMMM'')) M Mwwv 	MLL#(4KKLLLLr^   
table_elemc                    | j         dk    rdS |                     d          }d |D             }t          d |D                       rdS t          d |D                       rdS dS )zTrue when all text in `table_elem` is bulleted text.

    A table-row containing no text is not considered, but at least one bulleted-text item must be
    present. A table with no text in any row is not a bulleted table.
    r:   F.//trc                ,    g | ]}t          |          S rK   )_construct_textr}   r<   s     r]   
<listcomp>z&_is_bulleted_table.<locals>.<listcomp>=  s     222##222r^   c              3     K   | ]}| V  d S r{   rK   r}   r   s     r]   r   z%_is_bulleted_table.<locals>.<genexpr>@  s$      
)
)t8
)
)
)
)
)
)r^   c              3  :   K   | ]}|ot          |           V  d S r{   )r    r   s     r]   r   z%_is_bulleted_table.<locals>.<genexpr>D  s3      
D
D44.(...
D
D
D
D
D
Dr^   T)rL   findallallr   )r   trstr_textss      r]   r   r   3  s     ~  u


W
%
%C22c222H 
)
)
)
)
))) u 
D
D8
D
D
DDD u4r^   Optional[Element]c           
        | j         dk    rdS t          t          t          j                 |                     d                    }|sdS d |D             }t          |          }d                    d |D                                                       }|dk    rdS t          ||| j         t          d |                                 D                       ddd	         
          S )z)Form `HTMLTable` element from `tbl_elem`.r:   Nz+./tr | ./thead/tr | ./tbody/tr | ./tfoot/trc                J    g | ] }d  |                                 D             !S )c                ,    g | ]}t          |          S rK   strr   s     r]   r   z?_parse_HTMLTable_from_table_elem.<locals>.<listcomp>.<listcomp>Z  s    7773t99777r^   )itertextr   s     r]   r   z4_parse_HTMLTable_from_table_elem.<locals>.<listcomp>Z  s/    FFFB77777FFFr^    c              3  @   K   | ]}d                      |          V  dS )r   N)join)r}   rows     r]   r   z3_parse_HTMLTable_from_table_elem.<locals>.<genexpr>\  s,      >>C#((3-->>>>>>r^    c              3  $   K   | ]}|j         V  d S r{   rL   r}   r   s     r]   r   z3_parse_HTMLTable_from_table_elem.<locals>.<genexpr>e  s$      GGb26GGGGGGr^   r   )r   rP   rL   rM   )rL   r   r   r   _Elementxpathr%   r   stripru   r   iterancestors)r   r   
table_data
html_table
table_texts        r]   r   r   J  s    ~  t U^j../\]] C  tFF#FFFJ-j99J>>:>>>>>DDFFJRtNGGJ,D,D,F,FGGGGG"M	   r^   List[Dict[str, str]]c                    g }g d}| j         |v r/t          | d          }|r|                    || j         d            | j        | D ]1}t          |d          }|r|                    ||j         d           2|S )zEmphasized text within and below `tag_element`.

    Emphasis is indicated by `<strong>`, `<em>`, `<span>`, `<b>`, `<i>` tags.
    )strongemr)   biFr   rL   )rL   r   r   r   )r   rO   tags_to_trackr   descendant_tag_elems        r]   r   r   i  s    
 .0666M|}$$x// 	I##T(,$G$GHHH7x7G T T2E:: 	T##T:M:Q$R$RSSSr^   c                   t          d |                                 D                       ddd         }t          |           }t          |           }| j        t
          v rt          | j        d                   dz
  nB| j        t          t          z   v r+t          d |                                 D                       nd}| j        dk    rdS t          |           }|sdS t          || j        ||||          S )	a1  Parses `tag_elem` to a Text element if it contains qualifying text.

    Ancestor tags are kept so they can be used for filtering or classification without processing
    the document tree again. In the future we might want to keep descendants too, but we don't have
    a use for them at the moment.
    c              3  $   K   | ]}|j         V  d S r{   r   r   s     r]   r   z_parse_tag.<locals>.<genexpr>  s$      )T)TR"&)T)T)T)T)T)Tr^   Nr   r   c                >    g | ]}|j         t          t          z   v |S rK   rL   r2   r.   r   s     r]   r   z_parse_tag.<locals>.<listcomp>  s*    ```26YQ_E_;_;_";_;_;_r^   r   r   )rN   rO   r   )r   r   r   r   rL   r9   intr2   r.   r   r   r   )r   rM   rN   rO   r   r   s         r]   r   r   ~  s     %*)T)T8;Q;Q;S;S)T)T)T$T$TUYUYWYUY$ZL))E5h?? <<'' 	HLOq   <9~555 ``x5577```aaa 
 |xt8$$D t)   r^   r   r   rL   rM   Tuple[str, ...]r   r  rN   rO   c           
     H   t          |           rAt          |           sdS t          t          |           ||||t          |                    S t	          |           rt          | ||||          S t          |           rt          | |||          S t          |           dk     rdS t          | |          rt          | ||||          S t          |          st          |           r#t          | ||||t          |                    S t          | ||||          S )	zBProduce a document-element of the appropriate sub-type for `text`.Ncategory_depth)r   rL   rM   rN   rO   metadata)r   rL   rM   rN   rO   )r   rL   rN   rO      )rL   rM   rN   rO   )rL   rM   rN   rO   r  )r    r   rs   r   r$   ri   r!   rk   r   is_narrative_tagro   is_heading_tagr#   rm   rf   )r   rL   rM   r   rN   rO   s         r]   r   r     s     
T"" 	4t$$%-$E:::
 
 
 	
 
d	#	# 
%-
 
 
 	
 
$		 
-	
 
 
 	
 4yy1}}t	$	$	$ 
 %-
 
 
 	
 
		 
 1$ 7 7 
%-$E:::
 
 
 	
 %-
 
 
 	
r^   c                    | j         t          dgz   vst          |           dk    rdS | j        | j                                        dk    rdS dS )zChecks if a tag is a container that also happens to contain text.

    Example
    -------
    <div>Hi there,
        <div>This is my message.</div>
        <div>Please read my message!</div>
    </div>
    bodyr   FNr   T)rL   rH   r   r   r   r   s    r]   r   r     sW     |<6(222c(mmq6H6Hu} 3 3 5 5 ; ;u4r^   c                2    |t           vot          |           S )z8Uses tag information to infer whether text is narrative.)r9   r"   r  s     r]   r  r    s    l"G'A$'G'GGr^   c                    | t           v S )z8Uses tag information to infer whether text is a heading.)r9   r   s    r]   r  r    s    ,r^   Tinclude_tail_textc                    d                     d |                                 D                       }|r| j        r
|| j        z   }t          |          }|                                S )z&Extract "clean"" text from `tag_elem`.r   c              3  8   K   | ]}|t          |          V  d S r{   r   )r}   ts     r]   r   z"_construct_text.<locals>.<genexpr>  s-      <<a!<3q66<<<<<<r^   )r   r   tailr   r   )r   r  r   s      r]   r   r     sf    77<<8#4#4#6#6<<<<<D $X] $hm#!$''D::<<r^   c                X    t          d |                                 D                       S )z2True when `tab_elem` contains a `<br>` descendant.c              3  2   K   | ]}|j         t          v V  d S r{   )rL   r?   )r}   
descendants     r]   r   z"_has_break_tags.<locals>.<genexpr>  s)      ]]Jz~/]]]]]]r^   )r   r   r  s    r]   r   r     s+    ]](BZBZB\B\]]]]]]r^   List[etree._Element]c                   g }| j         r:t          j        | j                  }| j         |_         |                    |           |                                 }|D ]}t          |          s|                    |           '|j         r:t          j        |j                  }|j         |_         |                    |           |                    t          |                     |S )a  Sequence of `tag_elem` and its children with `<br>` elements removed.

    NOTE that these are "loose" `etree._Element` instances that are NOT linked to the original HTML
    element-tree, so methods like `.getchildren()`, `.find()` etc. will happily produce empty
    results.
    )	r   r   r   rL   r   getchildrenr   r   r   )r   unfurledr   childrenchilds        r]   r   r     s     &(H} #M(,//	!		"""##%%H 7 7u%% 	7OOE""""z +!M%)44	!&		***OO.u556666Or^      max_predecessor_lenc                f   t          d |                                 D                       }t          |           ||z   k    rdS | j        t          t          z   t
          z   v rdS |                                 }| j        t          dgz   v rt          |          dk    rdS t          | |          rdS dS )z<True when `tag_element` potentially contains narrative text.c                .    g | ]}|j         t          v |S rK   rL   rB   r   s     r]   r   z _is_text_tag.<locals>.<listcomp>3  s$    WWW""&JBVBV2BVBVBVr^   FTr  r   )r   r#  rL   r+   r9   r?   rH   _has_adjacent_bulleted_spans)r   r(  empty_elems_lenr%  s       r]   r   r   .  s    
 WW(<(<(>(>WWWXXO
8}}*_<<<u|y</.@@@t ##%%H||vh...3x==A3E3Et#Hh77 t5r^   2Tuple[Optional[Element], Optional[etree._Element]]c           	     v   | j         t          t          z   v rt          |           }t	          |           }t          |           }t          d |                                 D                       }t          || j         ||t          |                    | fS | j         t          v rt          |           }|                                 }|dS t          |          }t          d |                                 D                       }t          |           ||z   k    rdS |rt          ||j                   |fS dS )zProduces an `HTMLListItem` document element from `tag_elem`.

    When `tag_elem` contains bulleted text, the relevant bulleted text is extracted. Also returns
    the next html element so we can skip processing if bullets are found in a div element.
    c                >    g | ]}|j         t          t          z   v |S rK   r
  r   s     r]   r   z&_process_list_item.<locals>.<listcomp>T  s)    [[[Bbf	N@Z6Z6ZR6Z6Z6Zr^   r  )r   rL   rN   rO   r  N)NNc                .    g | ]}|j         t          v |S rK   r+  r   s     r]   r   z&_process_list_item.<locals>.<listcomp>j  s$    IIIBBFj4H4HR4H4H4Hr^   r  )rL   r2   r.   r   r   r   r   r   rs   r   rH   getnextr#  )	r   r(  r   rN   rO   r   r   	next_textr-  s	            r]   r   r   F  sc    |y>111x((#H--9(CC[[(0022[[[
 
 L!1(>>>   	
 		
 
	%	%x(('')):#L11	 II(..00III
 
 x==.@@@: 	TYL4DEEE|SS:r^   Optional[etree._Element]r   Tuple[etree._Element, ...]c                P    | |dn t          |                                          S )zsHelper for list-item processing.

    Gathers the descendants of `next_element` so they can be marked visited.
    NrK   )r   r   )rv   r   s     r]   r   r   t  s+     L$822eLD`D`DbDb>c>ccr^   c                r    | j         t          v p)| j         t          v ot          t	          |                     S )z,True when `tag_elem` contains bulleted text.)rL   r.   rH   r    r   r  s    r]   r   r   ~  s6    <>) $T)9/(:S:S)T)Tr^   r:   List[Element]c                    g }|                      d          }|D ]V}t          |          }t          |          r6|                    t	          t          |          |j                             W|S )zExtracts bulletized narrative text from the `<table>` element in `table`.

    NOTE: if a table has mixed bullets and non-bullets, only bullets are extracted. I.e., _read()
    will drop non-bullet narrative text in the table.
    r   r  )r   r   r    r   rs   r   rL   )r:   r   rowsr   r   s        r]   r   r     s{     $&M==!!D V Vs##D!! 	V  =3F3FCG!T!T!TUUUr^   r%  c                    | j         t          v rHt          d |D                       }|d         j        duot	          |d         j                  }|r|rdS dS )zTrue when `tag_elem` is a <div> or <pre> containing two or more adjacent bulleted spans.

    A bulleted span is one beginning with a bullet. If there are two or more adjacent to each other
    they are treated as a single bulleted text element.
    c              3  ,   K   | ]}|j         d k    V  dS )r)   Nr   )r}   r&  s     r]   r   z/_has_adjacent_bulleted_spans.<locals>.<genexpr>  s)      BB	V+BBBBBBr^   r   NTF)rL   rH   r   r   r    )r   r%  	all_spans_is_bulleteds       r]   r,  r,    sm     ||##BBBBBBB	{'t3Z8HRSIY8Z8Z 	 	45r^   r   c                8    |                      d          }||n| S )z@The first <main> tag under `root` if it exists, othewise `root`.z.//main)find)r   main_tag_elems     r]   r   r     s"    IIi((M)5==4?r^   r   c                    |du r| gS |                      d          }t          |          dk    r|                      d          }t          |          dk    r| gn|S )zParse articles from `root` of an HTML document.

    Each `<article>` element in the HTML becomes its own "sub-document" (article). If no article
    elements are present, the entire document (`root`) is returned as the single document article.
    Fz
.//articler   z.//div[@itemprop='articleBody'])r   r   )r   r   r   s      r]   r   r     sf     E!!v||L))H
8}}<< ABB]]a''D66X5r^   )rv   rJ   rw   rx   )r   r   rw   r   )r   r   rw   rx   )r   r   rw   r   )r   r   rw   r   )r   r   rw   r   )r   r   rL   r   rM   r  r   r  rN   r   rO   r   rw   r   )r   r   rw   rx   )r   r   rL   r   rw   rx   )rL   r   rw   rx   )T)r   r   r  rx   rw   r   )r   r   rw   r!  )r'  )r   r   r(  r  rw   rx   )r   r   r(  r  rw   r.  )rv   r4  r   r4  rw   r5  )r:   r   rw   r8  )r   r   r%  r!  rw   rx   )r   r   rw   r   )r   r   r   rx   rw   r!  )Y
__future__r   systypingr   r   r   r   r   r	   r
   r   version_infotyping_extensionsr   lxmlr   unstructured.cleaners.corer   r   unstructured.documents.baser   unstructured.documents.elementsr   r   r   r   r   r   r   r   r   r   unstructured.documents.xmlr   r   unstructured.loggerr    unstructured.partition.text_typer    r!   r"   r#   r$   unstructured.utilsr%   r+   __annotations__r.   r2   r9   r=   r?   rA   rB   rE   rH   rJ   rf   ri   rk   rm   ro   rs   ru   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r   r   r   r   r   r,  r   r   rK   r^   r]   <module>rQ     sL   # " " " " " " 



 M M M M M M M M M M M M M M M M M M M Mf'''''''       L L L L L L L L , , , , , ,                        B A A A A A A A & & & & & &              < ; ; ; ; ;>>>	 > > > >$($< / / / /000	 0 0 0 0!E!E!E E E E E===
 = = = =$(6 ) ) ) )$(6 ) ) ) )->
 > > > >+3X*>  > > > >"' / / / /* * * * * * * *0% % % % %y$ % % %( ( ( ( ()W ( ( (, , , , ,y, , , ,& & & & &	5 & & &. . . . .	= . . .. . . . .9h . . .- - - - -	5 - - -L L L L   % % % %b b b b b; b b bJ      .   >   *# # # #V -/>
 >
 >
 >
 >
B   &H H H H
   
    ^ ^ ^ ^
   8    4  !+ + + + +\d d d d         @ @ @ @6 6 6 6 6 6 6r^   