
    j+                     P   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
  ej                    d             Z ej                    d             Z ej                    d             Zd Zd	 Zd
 Zd Zd Zd Zd Zej                            dd dgd ed          d gd ed          g          d             Zej                            dg dg          d             Zej                            dd gd ed          g          d             Zej                            dd dgd ed          g          d             Zej                            dg dg          d             Zej                            d e
dd edd           ed                      e	d!d" edd           ed                     gd#d$d%d&d'gd(d)dd*d+d+d,d-d.gff e
dd edd           ed                      e	d!d" edd           ed                     gdd$d%d/d0gd1d2d3d*d+d+d,d-d.gff e
dd edd           ed                      e	dd" edd           ed                     gdd$d%d/d0gd4d2dd*d+d+d,d-d.gff e
dd edd           ed                      e	d5d" edd           ed                     gdd$d%d/d0gd6d2d d*d+d+d,d-d.gff e
dd7 edd           ed                      e	d!d8 edd           ed                     gdd$d%d/d0gd9d:dd;d<d=d>d-d.gff e
dd? edd           ed                      e	d!d@ edd           ed                     gdd$dAg ffg          dB             ZdS )C    N)utils)
PixelSpace)ElementMetadataNarrativeTextTitlec                      ddidddidgS )NtextzThis is a sentence.zThis is another sentence.scoreg?)r	   meta r       f/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/test_unstructured/test_utils.py
input_datar      s&     
&',wnEE r   c                 B    t           j                            | d          S )Nzoutput.jsonl)ospathjoin)tmp_paths    r   output_jsonl_filer      s    7<<.111r   c                     t           j                            | d          }t          |d          5 }|                    d |D                        d d d            n# 1 swxY w Y   |S )Nzinput.jsonlzw+c                 <    g | ]}t          j        |          d z   S )
)jsondumps).0objs     r   
<listcomp>z$input_jsonl_file.<locals>.<listcomp>   s%    LLL#tz#5LLLr   )r   r   r   open
writelines)r   r   	file_path
input_files       r   input_jsonl_filer"      s    X}55I	i		 N*LLLLLMMMN N N N N N N N N N N N N N Ns    AA!$A!c                     t          j        | |           t          |          5 }d |D             }d d d            n# 1 swxY w Y   || k    sJ d S )Nc                 6    g | ]}t          j        |          S r   )r   loads)r   lines     r   r   z&test_save_as_jsonl.<locals>.<listcomp>#   s"    >>>$TZ%%>>>r   )r   save_as_jsonlr   )r   r   output_file	file_datas       r   test_save_as_jsonlr*       s    	
$5666		 	  ?K>>+>>>	? ? ? ? ? ? ? ? ? ? ? ? ? ? ?
""""""s   >AAc                 >    t          j        |           }||k    sJ d S N)r   read_from_jsonl)r"   r   r)   s      r   test_read_as_jsonlr.   '   s*    %&677I
""""""r   c                  V    t          j        d          d             }  |              d S )Nnumpydependenciesc                      dd l } d S Nr   r0   r5   s    r   	test_funcz7test_requires_dependencies_decorator.<locals>.test_func-   s    r   r   requires_dependenciesr6   s    r   $test_requires_dependencies_decoratorr:   ,   s;    
 g666  76 IKKKKKr   c                  Z    t          j        ddg          d             }  |              d S )Nr0   pandasr1   c                      dd l } dd l}d S r4   r0   r<   r>   s     r   r6   z@test_requires_dependencies_decorator_multiple.<locals>.test_func5   s    r   r7   r9   s    r   -test_requires_dependencies_decorator_multipler?   4   sA    
 w.ABBB  CB IKKKKKr   c                      t          j        d          d             } t          j        t                    5   |              d d d            d S # 1 swxY w Y   d S )Nnot_a_packager1   c                      dd l } d S r4   rA   rC   s    r   r6   zDtest_requires_dependencies_decorator_import_error.<locals>.test_func>   s    r   r   r8   pytestraisesImportErrorr9   s    r   1test_requires_dependencies_decorator_import_errorrH   =   s    
 o>>>  ?> 
{	#	#  	                 s   AAAc                      t          j        ddg          d             } t          j        t                    5   |              d d d            d S # 1 swxY w Y   d S )NrA   r0   r1   c                      dd l } dd l}d S r4   rA   r0   rK   s     r   r6   zMtest_requires_dependencies_decorator_import_error_multiple.<locals>.test_funcG   s    r   rD   r9   s    r   :test_requires_dependencies_decorator_import_error_multiplerL   F   s    
 .HIII  JI 
{	#	#  	                 s   AAAc                  j    t          j        d           G d d                      }  |              d S )Nr0   r1   c                       e Zd Zd ZdS )@test_requires_dependencies_decorator_in_class.<locals>.TestClassc                     dd l }d S r4   r5   )selfr0   s     r   __init__zItest_requires_dependencies_decorator_in_class.<locals>.TestClass.__init__S   s    LLLLLr   N)__name__
__module____qualname__rR   r   r   r   	TestClassrO   Q   s#        	 	 	 	 	r   rV   r7   )rV   s    r   -test_requires_dependencies_decorator_in_classrW   P   sT    
 g666       76 IKKKKKr   iterator   )r   rY   
   )r   c                 :    t          j        |           dk    sJ d S r4   r   firstrX   s    r   test_first_gives_firstr_   Y   #    ;x  A%%%%%%r   r   c                     t          j        t                    5  t          j        |            d d d            d S # 1 swxY w Y   d S r,   )rE   rF   
ValueErrorr   r]   r^   s    r   test_first_raises_if_emptyrc   ^   s    	z	"	"  H                    <A A c                 :    t          j        |           dk    sJ d S r4   r\   r^   s    r   test_only_gives_onlyrf   d   r`   r   c                     t          j        t                    5  t          j        |           dk     d d d            d S # 1 swxY w Y   d S r4   rE   rF   rb   r   onlyr^   s    r   %test_only_raises_when_len_more_than_1rj   i   s    	z	"	" " "
8!!" " " " " " " " " " " " " " " " " "s   A  AAc                     t          j        t                    5  t          j        |            d d d            d S # 1 swxY w Y   d S r,   rh   r^   s    r   test_only_raises_if_emptyrl   o   s    	z	"	"  
8                 rd   )elementsnested_error_tolerance_pxsm_overlap_thresholdexpectationzSome lovely title))      )rq      )   rs   )rt   rr      )widthheight)page_number)r	   coordinatescoordinate_systemmetadatazSome lovely text))      )r|      rr   r~   rr   r}   rr   g      $@TzTitle(ix=0)zNarrativeText(ix=1)znested NarrativeText in Titlez100%z5.88%u   9pxˆ2u   18pxˆ2)largest_ngram_percentageoverlap_percentage_totalmax_areamin_area
total_area)overlapping_elementsoverlapping_caseoverlap_percentager{   z0. Title(ix=0)z1. NarrativeText(ix=1)zFpartial overlap sharing 50.0% of the text from1. NarrativeText(2-gram)z11.11%g      I@z#partial overlap with duplicate textz Something totally different herez$partial overlap without sharing text)r   )rr   rZ   )rs   rZ   )rs   r~   ))rY   r}   )r|   rt   )r~   rt   r   zSmall partial overlapz8.33%z3.23%u   20pxˆ2u   12pxˆ2u   32pxˆ2))rq   r~   )rq   rt   )rt   rt   )rt   r~   ))r~   rs   )r~   	   )r   r   )r   rs   Fc                 p    t          j        | ||          \  }}||d         k    sJ ||d         k    sJ d S )Nr   rY   )r   #catch_overlapping_and_nested_bboxes)rm   rp   rn   ro   overlapping_flagoverlapping_casess         r   (test_catch_overlapping_and_nested_bboxesr   u   sY    \ +0*S!+ +''
 {1~----A......r   )r   r   rE   unstructuredr   "unstructured.documents.coordinatesr   unstructured.documents.elementsr   r   r   fixturer   r   r"   r*   r.   r:   r?   rH   rL   rW   markparametrizeranger_   rc   rf   rj   rl   r   r   r   r   <module>r      sk    				        9 9 9 9 9 9 Q Q Q Q Q Q Q Q Q Q    2 2 2   # # ## # #
           q!ffeeBii!dEERSHH%UVV& & WV& b"X..  /.
 qc4q%:;;& & <;& q!ffeeBii%@AA" " BA"
 b"X..  /.
 T , @&0jr"&E&E&E,_;;;	   + @&0jr"&E&E&E,_;;;	    2?@U0V,K.48<8?(0(0*3% %	 #"	
J , @&0jr"&E&E&E,_;;;	   + @&0jr"&E&E&E,_;;;	    2BC[0\-0.68<8?(0(0*3% % ##	
L , @&0jr"&E&E&E,_;;;	   , @&0jr"&E&E&E,_;;;	    2BC[0\,Q.68<8?(0(0*3% %	 #"	
J , @&0jr"&E&E&E,_;;;	   ; @&0jr"&E&E&E,_;;;	    2BC[0\,R.6898?(0(0*3% %	 #"	
J , B&0jr"&E&E&E,_;;;	   + @&0jr"&E&E&E,_;;;	    2BC[0\,C.58<8?(1(1*3% %	 #"	
J , @&0jr"&E&E&E,_;;;	   + @&0jr"&E&E&E,_;;;	   BK#	
cDG GP/ /QG GP/ / /r   