
    Ug7                    &   S r SSKJr  SSKrSSKrSSKrSSKJrJ	r	  \(       a  SSK
Jr  \R                  R                  S5      r\R                  R                  \5      r\R"                  R%                  \5        \\R&                  S'   \R(                  " S5      \l        \R(                  " S	5      \l        \R,                  \l        \R(                  " S
\R0                  5      \l        \R(                  " S5      r " S S\R6                  5      rg)a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownzhtml.parser
htmlparserz\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
z^([ ]*\n){2}c                  6  ^  \ rS rSr% SrSU 4S jjrU 4S jrU 4S jr\SS j5       r	SS jr
S S jrS!S	 jrS"S
 jrS#S jrS$S jrS"S jrS%S jrS%S jrS#S jrS#S jrS#S jrS#S jrS&U 4S jjrS&U 4S jjrS'S(U 4S jjjrSrS\S'   S)S jrS&S jrSrU =r$ )*HTMLExtractorJ   z
Extract raw HTML from text.

The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
[`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
is stored in `cleandoc` as a list of strings.
c                z   > SU;  a  SUS'   [        S/5      U l        S/U l        [        TU ]  " U0 UD6  Xl        g )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cachesuper__init__md)selfr   argskwargs	__class__s       5/usr/lib/python3/dist-packages/markdown/htmlparser.pyr   HTMLExtractor.__init__S   sH    V+).F%& tf+#$# 	$)&)    c                x   > SU l         SU l        / U l        / U l        / U l        S/U l        [        TU ]  5         g)z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr   r   resetr   r   s    r   r!   HTMLExtractor.reset`   s9    
 "
!##%#$#r   c                   > [         TU ]  5         [        U R                  5      (       al  U R                  (       a@  U R
                  (       d/  U R                  [        R                  U R                  5      5        OU R                  U R                  5        [        U R                  5      (       a_  U R                  R                  U R                  R                  R                  SR                  U R                  5      5      5        / U l	        gg)zHandle any buffered data. N)r   closelenrawdatar   
cdata_elemhandle_datar   unescaper   r    appendr   	htmlStashstorejoinr"   s    r   r&   HTMLExtractor.closek   s    t|| $$T__  !4!4T\\!BC  .t{{MM  !2!2!8!89M!NODK r   c                n   [        [        U R                  5      S-
  U R                  S-
  5       Hg  nU R                  U   nU R                  R                  SU5      nUS:X  a  [        U R                  5      nU R                  R                  US-   5        Mi     U R                  U R                  S-
     $ )zHReturns char index in `self.rawdata` for the start of the current line.    
)ranger'   r   linenor(   findr,   )r   iilast_line_start_poslf_poss       r   line_offsetHTMLExtractor.line_offsetz   s     D334Q6AFB"&"9"9""=\\&&t-@AF|T\\*##**6!84 G &&t{{1}55r   c                    U R                   S:X  a  gU R                   S:  a  gU R                  U R                  U R                  U R                   -    R                  5       S:H  $ )zn
Returns True if current position is at start of line.

Allows for up to three blank spaces at start of line.
r   T   Fr%   )offsetr(   r;   stripr   s    r   at_line_startHTMLExtractor.at_line_start   sV     ;;!;;?||D,,T-=-=-KLRRTXZZZr   c                    U R                   U R                  -   n[        R                  R	                  U R
                  U5      nU(       a  U R
                  X#R                  5        $ SR                  U5      $ )z
Returns the text of the end tag.

If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
z</{}>)r;   r?   r   	endendtagsearchr(   endformat)r   tagstartms       r   get_endtag_textHTMLExtractor.get_endtag_text   s]       4;;.  ''e<<<eeg.. >>#&&r   c                ^   XR                   ;   a  U R                  X5        g U R                  R                  U5      (       aY  U R                  (       d&  U R                  5       (       a3  U R                  (       d"  SU l        U R                  R                  S5        U R                  5       nU R                  (       a7  U R                  R                  U5        U R                  R                  U5        g U R                  R                  U5        XR                  ;   a  U R                  5         g g )NTr3   )r   handle_startendtagr   is_block_levelr   rB   r   r    r,   get_starttag_textr   r   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   rI   attrstexts       r   handle_starttagHTMLExtractor.handle_starttag   s    //!##C/77!!#&&DKKD<N<N<P<PY]YcYcDJMM  &%%'::JJc"KKt$MM  &111%%' 2r   c                j   U R                  U5      nU R                  (       Gau  U R                  R                  U5        XR                  ;   aC  U R                  (       a2  U R                  R                  5       U:X  a  OU R                  (       a  M2  [        U R                  5      S:X  a  [        R                  U R                  U R                  U R                  -   [        U5      -   S  5      (       a  U R                  R                  S5        OSU l        SU l        U R                  R                  U R                  R                  R!                  SR#                  U R                  5      5      5        U R                  R                  S5        / U l        g g U R                  R                  U5        g )Nr   r3   TFr%   

)rL   r   r   r,   r   popr'   blank_line_rematchr(   r;   r?   r   r    r   r-   r.   r/   )r   rI   rU   s      r   handle_endtagHTMLExtractor.handle_endtag   s1   ##C(:::KKt$jj jjzz~~'3. jjj 4::!# &&t||D4D4Dt{{4RUXY]U^4^4_'`aaKK&&t, #'DK"
$$TWW%6%6%<%<RWWT[[=Q%RS$$V,  $ MM  &r   c                    U R                   (       a  SU;   a  SU l         U R                  (       a  U R                  R                  U5        g U R                  R                  U5        g )Nr3   F)r   r   r   r,   r    r   datas     r   r*   HTMLExtractor.handle_data   sA    ;;44<DK::KKt$MM  &r   c                &   U R                   (       d  U R                  (       a  U R                  R                  U5        gU R	                  5       (       Ga"  U(       Ga  [
        R                  U R                  U R                  U R                  -   [        U5      -   S 5      (       a  US-  nOSU l        U R                  (       a  U R                  S   OSnUR                  S5      (       d1  UR                  S5      (       a  U R                  R                  S5        U R                  R                  U R                  R                  R                  U5      5        U R                  R                  S5        gU R                  R                  U5        g)zHandle empty tags (`<data>`). Nr3   Tr4   r%   rY   )r   r   r   r,   rB   r[   r\   r(   r;   r?   r'   r    endswithr   r-   r.   )r   ra   is_blockitems       r   handle_empty_tagHTMLExtractor.handle_empty_tag   s   ::KKt$!!h""4<<0@0@4;;0NQTUYQZ0Z0[#\]] #(,4==$2D==((T]]4-@-@$$T*MM  !2!2!8!8!>?MM  (MM  &r   c                r    U R                  U R                  5       U R                  R                  U5      S9  g )Nre   )rg   rQ   r   rP   )r   rI   rT   s      r   rO    HTMLExtractor.handle_startendtag   s.    d446AWAWX[A\]r   c                B    U R                  SR                  U5      SS9  g )Nz&#{};Frj   rg   rH   r   names     r   handle_charrefHTMLExtractor.handle_charref   s    gnnT2UCr   c                B    U R                  SR                  U5      SS9  g )Nz&{};Frj   rm   rn   s     r   handle_entityrefHTMLExtractor.handle_entityref   s    fmmD1EBr   c                B    U R                  SR                  U5      SS9  g )Nz	<!--{}-->Trj   rm   r`   s     r   handle_commentHTMLExtractor.handle_comment   s     k006Fr   c                B    U R                  SR                  U5      SS9  g )Nz<!{}>Trj   rm   r`   s     r   handle_declHTMLExtractor.handle_decl   s    gnnT2TBr   c                B    U R                  SR                  U5      SS9  g )Nz<?{}?>Trj   rm   r`   s     r   	handle_piHTMLExtractor.handle_pi  s    hood3dCr   c                v    UR                  S5      (       a  SOSnU R                  SR                  X5      SS9  g )NzCDATA[z]]>z]>z<![{}{}Trj   )
startswithrg   rH   )r   ra   rG   s      r   unknown_declHTMLExtractor.unknown_decl  s5    x00edi..t9DIr   c                   > U R                  5       (       d  U R                  (       a  [        TU ]  U5      $ U R	                  S5        US-   $ )Nz<?   )rB   r   r   parse_pir*   r   ir   s     r   r   HTMLExtractor.parse_pi  sA    4;;7#A&& 	1ur   c                   > U R                  5       (       d  U R                  (       a  [        TU ]  U5      $ U R	                  S5        US-   $ )Nz<!r   )rB   r   r   parse_html_declarationr*   r   s     r   r   $HTMLExtractor.parse_html_declaration  sA    4;;71!44 	1ur   c                l   > [         TU ]  X5      nUS:X  a  gU R                  U R                  X SS9  U$ )Nr4   Frj   )r   parse_bogus_commentrg   r(   )r   r   reportposr   s       r   r   !HTMLExtractor.parse_bogus_comment  s?     g)!4"9dll11EB
r   Nz
str | None_HTMLExtractor__starttag_textc                    U R                   $ )z)Return full source of start tag: `<...>`.)r   rA   s    r   rQ   HTMLExtractor.get_starttag_text'  s    ###r   c                    S U l         U R                  U5      nUS:  a  U$ U R                  nX1U U l         / n[        R                  R                  X1S-   5      nU(       d   S5       eUR                  5       nUR                  S5      R                  5       =U l	        nXb:  a  [        R                  R                  X65      nU(       d  OUR                  SSS5      u  pnU
(       d  S nO0US S Ss=:X  a	  USS  :X  d  O  US S Ss=:X  a	  USS  :X  a  O  OUSS nU(       a  [        R                  U5      nUR                  U	R                  5       U45        UR                  5       nXb:  a  M  X6U R                  5       nUS	;  a  U R                  5       u  pS
U R                   ;   aO  XR                   R                  S
5      -   n[!        U R                   5      U R                   R#                  S
5      -
  nOU[!        U R                   5      -   nU R%                  X1U 5        U$ UR'                  S5      (       a  U R)                  Xt5        U$ XpR*                  ;   a  U R-                  U5        U R/                  Xt5        U$ )Nr   r2   z#unexpected call to parse_starttag()r   r>   'r4   ")>/>r3   r   )r   check_for_whole_start_tagr(   r   tagfind_tolerantr\   rG   grouplowerlasttagattrfind_tolerantr+   r,   r@   getposcountr'   rfindr*   rd   rO   rR   set_cdata_moderV   )r   r   endposr(   rT   r\   krI   rK   attrnamerest	attrvaluerG   r6   r?   s                  r   parse_starttagHTMLExtractor.parse_starttag+  sI   #//2A:M,,&0 ++11'Q3?;;;uIIK"[[^1133sj,,227>A()1a(8%HI 	2A$8)BC.82A#7237%aO	&//	:	LL(..*I67A j %%'k!![[]NFt+++"6"6"<"<T"BBT112//55d;<  #d&:&:";;Wv./M<<##C/  111##C(  ,r   )
__starttag_textr   r    r   r   r   r   r   r   r   )r   r   )returnint)r   bool)rI   strr   r   )rI   r   rT   zSequence[tuple[str, str]])rI   r   )ra   r   )ra   r   re   r   )ro   r   )r   r   r   r   )r   )r   r   r   r   r   r   )r   r   ) __name__
__module____qualname____firstlineno____doc__r   r!   r&   propertyr;   rB   rL   rV   r]   r*   rg   rO   rp   rs   rv   ry   r|   r   r   r   r   r   __annotations__rQ   r   __static_attributes____classcell__)r   s   @r   r	   r	   J   s    	 
6 
6['(*'6''.^DCGCDJ  #'OZ&$0 0r   r	   )r   
__future__r   reimportlib.util	importlibsystypingr   r   markdownr   util	find_specspecmodule_from_specr   loaderexec_modulemodulescompilepiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantr[   
HTMLParserr	    r   r   <module>r      s   ( # 	  
 *!
 ~~.^^,,T2
   
 #&L  ZZ'
 zz"?@
  #,,
 (*

 4 ZZ)
 %$ 

?+QJ)) Qr   