
    j                        U d dl mZmZmZmZ d dlZd dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZ d dlmZmZmZ d dlmZ d	eeef         d
efdZdZeed<    e             eej                   e            dddddddddddgdfdee         deee                  dee         dee         dedededee         dee         dee         deee                  ded
ee
         fd                                    ZdS )    )IOListOptionalUnionN)add_chunking_strategy)Elementprocess_metadata)VALID_PARSERS)FileTypeadd_metadata_with_filetype)exactly_oneget_last_modified_date get_last_modified_date_from_file)partition_htmlcontentsreturnc                 Z    t          | t                    r|                     d          S | S )Nzutf-8)
isinstancebytesdecode)r   s    c/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/unstructured/partition/md.pyoptional_decoder      s+    (E"" (w'''O    mdDETECTION_ORIGINFTautofilenamefiletexturlinclude_page_breaksinclude_metadataparsermetadata_filenamemetadata_last_modifiedchunking_strategy	languagesdetect_language_per_elementc                    |d}t          | |||           d}| Zt          |           }t          | d          5 }t          |                                          }ddd           n# 1 swxY w Y   n|1t          |          }t          |                                          }n~||t          j        |          }|j        st          d|j
                   |j                            dd          }|                    d          st          d	| d
          |j        }t          j        |dg          }t          ||||d||p||
|t           
  
        S )a  Partitions a markdown file into its constituent elements

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    text
        The string representation of the markdown document.
    url
        The URL of a webpage to parse. Only for URLs that return a markdown document.
    include_page_breaks
        If True, the output will include page breaks if the filetype supports it.
    include_metadata
        Determines whether or not metadata is included in the output.
    parser
        The parser to use for parsing the markdown document. If None, default parser will be used.
    metadata_last_modified
        The last modified date for the document.
    languages
        User defined value for `metadata.languages` if provided. Otherwise language is detected
        using naive Bayesian filter via `langdetect`. Multiple languages indicates text could be
        in either language.
        Additional Parameters:
            detect_language_per_element
                Detect language per element instead of at the document level.
    N )r   r   r   r    utf8)encodingzURL return an error: zContent-Typeztext/markdownz)Expected content type text/markdown. Got .tables)
extensionsr   )
r   r!   r"   r#   source_formatr$   r%   r'   r(   detection_origin)r   r   openr   readr   requestsgetok
ValueErrorstatus_codeheaders
startswithr   markdownr   r   )r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   kwargslast_modification_datefresponsecontent_typehtmls                     r   partition_mdrB      s   ^ |4SAAAA!!7!A!A(V,,, 	-"16688,,D	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 
	!A$!G!Gtyy{{++	<$${ 	MKX5IKKLLL'++NB??&&77 	KLKKK   }Txj999D/)+5O9O$?)   s   "A**A.1A.)typingr   r   r   r   r;   r4   unstructured.chunkingr   unstructured.documents.elementsr   r	   unstructured.documents.xmlr
    unstructured.file_utils.filetyper   r   unstructured.partition.commonr   r   r   unstructured.partition.htmlr   strr   r   r   __annotations__MDboolrB    r   r   <module>rO      sa   , , , , , , , , , , , , ,   7 7 7 7 7 7 E E E E E E E E 4 4 4 4 4 4 Q Q Q Q Q Q Q Q         
 7 6 6 6 6 6eCJ/ C      #    HK((" $ %! '+,0'+&,X(-U UsmU
2e9
U 3-U 
#	U
 U U U  }U %SMU  }U S	"U "&U 
']U U U  )( U U Ur   