
    j7                       d Z ddlmZ ddlZddlZddlZddlmZm	Z	 erddl
mZ ej                            d          Zej                            e          Zej                            e           eej        d<    ej        d          e_         ej        d	          e_        ej        e_         ej        d
ej                  e_         ej        d          Z G d dej                  ZdS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownzhtml.parser
htmlparserz\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
z^([ ]*\n){2}c                       e Zd ZU dZd* fdZ fdZ fdZed+d	            Zd,dZ	d-dZ
d.dZd/dZd0dZd1dZd/dZd2dZd2dZd0dZd0dZd0dZd0dZd3 fd Zd3 fd!Zd4d5 fd$Zd%Zd&ed'<   d6d(Zd3d)Z xZS )7HTMLExtractorz
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    mdr   c                    d|vrd|d<   t          dg          | _        dg| _         t                      j        |i | || _        d S )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cachesuper__init__r
   )selfr
   argskwargs	__class__s       ]/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/markdown/htmlparser.pyr   zHTMLExtractor.__init__S   s]    V++).F%& tf++#$# 	$)&)))    c                    d| _         d| _        g | _        g | _        g | _        dg| _        t                                                       dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr   r   resetr   r   s    r   r   zHTMLExtractor.reset`   sE    
 "
!##%#$#r   c                   t                                                       t          | j                  r[| j        r:| j        s3|                     t                              | j                             n|                     | j                   t          | j	                  rX| j
                            | j        j                            d                    | j	                                       g | _	        dS dS )zHandle any buffered data. N)r   closelenrawdatar   
cdata_elemhandle_datar   unescaper   r   appendr
   	htmlStashstorejoinr    s    r   r#   zHTMLExtractor.closek   s    t| 	/ $ /T_ /  !4!4T\!B!BCCCC  ...t{ 	M  !2!8!89M9M!N!NOOODKKK	 	r   returnintc                J   t          t          | j                  dz
  | j        dz
            D ]a}| j        |         }| j                            d|          }|dk    rt          | j                  }| j                            |dz              b| j        | j        dz
           S )zHReturns char index in `self.rawdata` for the start of the current line.    
)ranger$   r   linenor%   findr)   )r   iilast_line_start_poslf_poss       r   line_offsetzHTMLExtractor.line_offsetz   s     D344Q6AFF 	5 	5B"&"9""=\&&t-@AAF||T\**#**6!84444&t{1}55r   boolc                    | j         dk    rdS | j         dk    rdS | j        | j        | j        | j         z                                            dk    S )z
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   T   Fr"   )offsetr%   r9   stripr   s    r   at_line_startzHTMLExtractor.at_line_start   sW     ;!4;??5|D,T-=-KKLRRTTXZZZr   tagstrc                    | j         | j        z   }t          j                            | j        |          }|r!| j        ||                                         S d                    |          S )z
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)r9   r=   r   	endendtagsearchr%   endformat)r   rA   startms       r   get_endtag_textzHTMLExtractor.get_endtag_text   sb      4;. ''e<< 	'<aeegg.. >>#&&&r   attrsSequence[tuple[str, str]]c                   || j         v r|                     ||           d S | j                            |          rC| j        s|                                 r(| j        s!d| _        | j                            d           | 	                                }| j        r6| j
                            |           | j                            |           d S | j                            |           || j        v r|                                  d S d S )NTr1   )r   handle_startendtagr
   is_block_levelr   r@   r   r   r)   get_starttag_textr   r   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rA   rK   texts       r   handle_starttagzHTMLExtractor.handle_starttag   s   $/!!##C///F7!!#&& 	'DK 	'D<N<N<P<P 	'Y]Yc 	'DJM  &&&%%'': 	(Jc"""Kt$$$$$M  &&&d111%%''''' 21r   c                   |                      |          }| j        rH| j                            |           || j        v r,| j        r%| j                                        |k    rn| j        %t          | j                  dk    rt                              | j	        | j
        | j        z   t          |          z   d                    r| j                            d           nd| _        d| _        | j                            | j        j                            d                    | j                                       | j                            d           g | _        d S d S | j                            |           d S )Nr   r1   TFr"   

)rJ   r   r   r)   r   popr$   blank_line_rematchr%   r9   r=   r   r   r
   r*   r+   r,   )r   rA   rS   s      r   handle_endtagzHTMLExtractor.handle_endtag   sj   ##C((: 	'Kt$$$dj  j z~~''3.. j  4:!## &&t|D4Dt{4RUXY]U^U^4^4_4_'`aa 'K&&t,,,, #'DK"
$$TW%6%<%<RWWT[=Q=Q%R%RSSS$$V,,,  $# M  &&&&&r   datac                    | j         rd|v rd| _         | j        r| j                            |           d S | j                            |           d S )Nr1   F)r   r   r   r)   r   r   r[   s     r   r'   zHTMLExtractor.handle_data   s]    ; 	 44<<DK: 	'Kt$$$$$M  &&&&&r   is_blockc                   | j         s| j        r| j                            |           dS |                                 r |rt
                              | j        | j        | j	        z   t          |          z   d                   r|dz  }nd| _        | j        r| j        d         nd}|                    d          s/|                    d          r| j                            d           | j                            | j        j                            |                     | j                            d           dS | j                            |           dS )z Handle empty tags (`<data>`). Nr1   Tr2   r"   rV   )r   r   r   r)   r@   rX   rY   r%   r9   r=   r$   r   endswithr
   r*   r+   )r   r[   r^   items       r   handle_empty_tagzHTMLExtractor.handle_empty_tag   sR   : 	' 	'Kt$$$$$!! 	'h 	'""4<0@4;0NQTUYQZQZ0Z0[0[#\]] # #(,=4=$$2D==(( +T]]4-@-@ +$$T***M  !2!8!8!>!>???M  (((((M  &&&&&r   c                    |                      |                                 | j                            |                     d S )Nr^   )rb   rP   r
   rO   )r   rA   rK   s      r   rN   z HTMLExtractor.handle_startendtag   s>    d4466AWAWX[A\A\]]]]]r   namec                Z    |                      d                    |          d           d S )Nz&#{};Frd   rb   rG   r   re   s     r   handle_charrefzHTMLExtractor.handle_charref   s-    gnnT22UCCCCCr   c                Z    |                      d                    |          d           d S )Nz&{};Frd   rg   rh   s     r   handle_entityrefzHTMLExtractor.handle_entityref   s-    fmmD11EBBBBBr   c                Z    |                      d                    |          d           d S )Nz	<!--{}-->Trd   rg   r]   s     r   handle_commentzHTMLExtractor.handle_comment   s/    k0066FFFFFr   c                Z    |                      d                    |          d           d S )Nz<!{}>Trd   rg   r]   s     r   handle_declzHTMLExtractor.handle_decl   s-    gnnT22TBBBBBr   c                Z    |                      d                    |          d           d S )Nz<?{}?>Trd   rg   r]   s     r   	handle_pizHTMLExtractor.handle_pi  s-    hood33dCCCCCr   c                    |                     d          rdnd}|                     d                    ||          d           d S )NzCDATA[z]]>z]>z<![{}{}Trd   )
startswithrb   rG   )r   r[   rF   s      r   unknown_declzHTMLExtractor.unknown_decl  sK    x00:eedi..tS99DIIIIIr   ic                    |                                  s| j        r!t                                          |          S |                     d           |dz   S )Nz<?   )r@   r   r   parse_pir'   r   ru   r   s     r   rx   zHTMLExtractor.parse_pi  sW     	'4; 	'77##A&&& 	1ur   c                    |                                  s| j        r!t                                          |          S |                     d           |dz   S )Nz<!rw   )r@   r   r   parse_html_declarationr'   ry   s     r   r{   z$HTMLExtractor.parse_html_declaration  sW     	54; 	57711!444 	1ur   r   reportc                    t                                          ||          }|dk    rdS |                     | j        ||         d           |S )Nr2   Frd   )r   parse_bogus_commentrb   r%   )r   ru   r|   posr   s       r   r~   z!HTMLExtractor.parse_bogus_comment  sU     gg))!V44"992dl1S51EBBB
r   Nz
str | None_HTMLExtractor__starttag_textc                    | j         S )z)Return full source of start tag: `<...>`.)r   r?   s    r   rP   zHTMLExtractor.get_starttag_text'  s    ##r   c                   d | _         |                     |          }|dk     r|S | j        }|||         | _         g }t          j                            ||dz             }|s
J d            |                                }|                    d                                          x| _	        }||k     rt          j
                            ||          }|sn|                    ddd          \  }	}
}|
sd }nI|d d         dcxk    r|dd          k    s"n |d d         dcxk    r|dd          k    rn n
|dd         }|rt                              |          }|                    |	                                |f           |                                }||k     |||                                         }|d	vr|                                 \  }}d
| j         v rM|| j                             d
          z   }t!          | j                   | j                             d
          z
  }n|t!          | j                   z   }|                     |||                    |S |                    d          r|                     ||           n4|| j        v r|                     |           |                     ||           |S )Nr   r0   z#unexpected call to parse_starttag()rw   r<   'r2   ")>/>r1   r   )r   check_for_whole_start_tagr%   r   tagfind_tolerantrY   rF   grouplowerlasttagattrfind_tolerantr(   r)   r>   getposcountr$   rfindr'   r`   rN   rQ   set_cdata_moderT   )r   ru   endposr%   rK   rY   krA   rI   attrnamerest	attrvaluerF   r4   r=   s                  r   parse_starttagzHTMLExtractor.parse_starttag+  s   #//22A::M,&qx0 +11'1Q3??;;;;;uIIKK"[[^^11333s&jj,227A>>A ()1a(8(8%HdI , 		2A2$8888)BCC.88882A2#777723377777%adO	 ;&//	::	LL(..**I6777A &jj ah%%''k!!![[]]NFFt+++$"6"<"<T"B"BBT122/55d;;<  #d&:";";;WQvX.///M<< 	-##C//// d111##C(((  e,,,r   )r
   r   )r-   r.   )r-   r:   )rA   rB   r-   rB   )rA   rB   rK   rL   )rA   rB   )r[   rB   )r[   rB   r^   r:   )re   rB   )ru   r.   r-   r.   )r   )ru   r.   r|   r.   r-   r.   )r-   rB   )__name__
__module____qualname____doc__r   r   r#   propertyr9   r@   rJ   rT   rZ   r'   rb   rN   ri   rk   rm   ro   rq   rt   rx   r{   r~   r   __annotations__rP   r   __classcell__)r   s   @r   r	   r	   J   sY              	 	 	 	 	     
6 
6 
6 X
6[ [ [ [' ' ' '( ( ( (*' ' ' '6' ' ' '' ' ' '.^ ^ ^ ^D D D DC C C CG G G GC C C CD D D DJ J J J                 #'O&&&&$ $ $ $0 0 0 0 0 0 0 0r   r	   )r   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   util	find_specspecmodule_from_specr   loaderexec_modulemodulescompilepiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantrX   
HTMLParserr	    r   r   <module>r      so  (  # " " " " " 				     



 * * * * * * * * "!!!!!!
 ~..^,,T22
   
 # # #&L   RZ''
 !rz"?@@
  #,
 (2
 4 Z) )
 %$ 
?++Q Q Q Q QJ) Q Q Q Q Qr   