
    j                    v    d dl mZ d dlmZ d dlmZmZ d dlmZm	Z	  G d de          Z
 G d de          Zd	S )
    )annotations)ABC)ListOptional)ElementNarrativeTextc                      e Zd ZdZd ZddZddZedd	            Zedd            Z	ddZ
ddZd Zedd            Zedd            ZdS )DocumentzWThe base class for all document types. A document consists of an ordered list of pages.c                "    d | _         d | _        d S N)_pages	_elementsselfs    e/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/documents/base.py__init__zDocument.__init__   s    ,026    returnstrc                J    d                     d | j        D                       S )N

c                ,    g | ]}t          |          S  r   ).0pages     r   
<listcomp>z$Document.__str__.<locals>.<listcomp>   s    ===$CII===r   )joinpagesr   s    r   __str__zDocument.__str__   s%    {{==$*===>>>r   List[NarrativeText]c                    g }| j         D ]6}|j        D ],}t          |t                    r|                    |           -7|S )z?Pulls out all of the narrative text sections from the document.)r   elements
isinstancer   append)r   	narrativer   elements       r   get_narrativezDocument.get_narrative   s]    )+	J 	. 	.D= . .g}55 .$$W---. r   
List[Page]c                <    | j         t          d          | j         S )1Gets all elements from pages in sequential order.NzTWhen subclassing, _pages should always be populated before using the pages property.)r   NotImplementedErrorr   s    r   r   zDocument.pages   s-     ;%,   {r   List[Element]c                J    | j         d | j        D             | _         | j         S )r+   Nc                &    g | ]}|j         D ]}|S r   )r#   )r   r   els      r   r   z%Document.elements.<locals>.<listcomp>*   s'    PPPT$-PPBbPPPPr   )r   r   r   s    r   r#   zDocument.elements&   s+     >!PPTZPPPDN~r   r'   r   c                    | j         }d |D             }|                    t          |                    dz   }| j                            ||d                   S )zRReturns a single page document containing all the elements after the given elementc                ,    g | ]}t          |          S r   idr   r0   s     r   r   z*Document.after_element.<locals>.<listcomp>0       111"r"vv111r      Nr#   indexr4   	__class__from_elements)r   r'   r#   element_ids	start_idxs        r   after_elementzDocument.after_element-   sY    =11111%%bkk22Q6	~++HYZZ,@AAAr   c                    | j         }d |D             }|                    t          |                    }| j                            |d|                   S )zSReturns a single page document containing all the elements before the given elementc                ,    g | ]}t          |          S r   r3   r5   s     r   r   z+Document.before_element.<locals>.<listcomp>7   r6   r   Nr8   )r   r'   r#   r<   end_idxs        r   before_elementzDocument.before_element4   sT    =11111##BwKK00~++HXgX,>???r   c                    t          d                    d |                                 D                                  dS )z3Prints the narrative text sections of the document.r   c                ,    g | ]}t          |          S r   r   r5   s     r   r   z,Document.print_narrative.<locals>.<listcomp>=   s    BBBr3r77BBBr   N)printr   r(   r   s    r   print_narrativezDocument.print_narrative;   s=    fkkBBT-?-?-A-ABBBCCDDDDDr   r#   c                j    |rt          d          }||_        |g}ng }|                     |          S )z?Generates a new instance of the class from a list of `Element`sr   )number)Pager#   
from_pages)clsr#   r   r   s       r   r;   zDocument.from_elements?   s@      	q>>>D$DMFEEE~~e$$$r   r   c                (     |             }||_         |S )z<Generates a new instance of the class from a list of `Page`s)r   )rK   r   docs      r   rJ   zDocument.from_pagesJ   s     cee

r   N)r   r   )r   r!   )r   r)   )r   r-   )r'   r   r   r
   )r#   r-   r   r
   )r   r)   r   r
   )__name__
__module____qualname____doc__r   r    r(   propertyr   r#   r>   rB   rF   classmethodr;   rJ   r   r   r   r
   r
   	   s       aa7 7 7? ? ? ?       X    XB B B B@ @ @ @E E E % % % [%    [  r   r
   c                       e Zd ZdZddZd ZdS )rI   zA page consists of an ordered set of elements. The intent of the ordering is to align
    with the order in which a person would read the document.rH   intc                "    || _         g | _        d S r   )rH   r#   )r   rH   s     r   r   zPage.__init__V   s    !')r   c                J    d                     d | j        D                       S )Nr   c                ,    g | ]}t          |          S r   r   )r   r'   s     r   r   z Page.__str__.<locals>.<listcomp>[   s    FFFWCLLFFFr   )r   r#   r   s    r   r    zPage.__str__Z   s%    {{FFFFFGGGr   N)rH   rU   )rN   rO   rP   rQ   r   r    r   r   r   rI   rI   R   sH        A A* * * *H H H H Hr   rI   N)
__future__r   abcr   typingr   r   unstructured.documents.elementsr   r   r
   rI   r   r   r   <module>r]      s    " " " " " "       ! ! ! ! ! ! ! ! B B B B B B B BF F F F Fs F F FR	H 	H 	H 	H 	H3 	H 	H 	H 	H 	Hr   