
    Xj@                    `   U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ  ee	          Z
dddddddddd		Zd
ed<   dZ G d d          Zi Zded<    ej                    Zd6dZddddddddddddddddd d!ddd"d#ddd$d%ddd&dd'dgZd(d)d*d+d,d-d.d/d0d		Zd1Z G d2 d3          Zd7d5ZdS )8    )annotationsN)urlparse)
get_logger      
      )	               r      r   	   zdict[int, int]_CATEGORY_RATEc                      e Zd ZddZd ZdS )_TokenBucketrate_per_minuteintc                R    || _         g | _        t          j                    | _        d S N)rate_timestampsasyncioLock_lock)selfr   s     [/lsinfo/ai/hellotax_ai/data_center/backend/app/services/tax_data_processor/anti_blocking.py__init__z_TokenBucket.__init__   s"    #	(*\^^


    c                d  K   | j         4 d {V  t          j                    dfd| j        D             | _        t	          | j                  | j        k    r| j        d         }|z
  z
  dz   }|dk    ret                              d|dd           t          j	        |           d {V  t          j                    fd| j        D             | _        | j        
                    t          j                               d d d           d {V  d S # 1 d {V swxY w Y   d S )	Ng      N@c                &    g | ]}|z
  k     |S  r#   .0tnowwindows     r   
<listcomp>z(_TokenBucket.acquire.<locals>.<listcomp>   s'    PPPasQw?O?O?O?O?Or    r   g?u   令牌桶满，等待 .2fsc                &    g | ]}|z
  k     |S r#   r#   r$   s     r   r)   z(_TokenBucket.acquire.<locals>.<listcomp>"   s(    'X'X'XasQwQWGWGWGWGWGWr    )r   time	monotonicr   lenr   loggerdebugr   sleepappend)r   oldestwaitr'   r(   s      @@r   acquirez_TokenBucket.acquire   s     : 	6 	6 	6 	6 	6 	6 	6 	6.""CFPPPPP4+;PPPD4#$$	11)!,v.5!88LL!E$!E!E!E!EFFF!----------.**C'X'X'X'X'X43C'X'X'XD$##DN$4$4555	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6 	6s   C:D
D),D)N)r   r   )__name__
__module____qualname__r   r6   r#   r    r   r   r      s7        $ $ $ $
6 6 6 6 6r    r   zdict[tuple, _TokenBucket]_bucketsdomainstrcategory_idr   returnc                  K   | |f}t           4 d {V  |t          vr7t                              |t                    }t          |          t          |<   t          |         cd d d           d {V  S # 1 d {V swxY w Y   d S r   )_buckets_lockr:   r   get_DEFAULT_RATEr   )r;   r=   keyr   s       r   _get_bucketrD   '   s     ;
C        h!%%k=AAD(..HSM}	                             s   AA44
A>A>zoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36z#zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7Win32)ualangplatformzoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36zzh-CN,zh;q=0.9,en;q=0.8zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36MacIntelzuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36zzh-CN,zh;q=0.9zPMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0z;zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2zTMozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0z#zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3z}Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0z/zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6zeMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36zLinux x86_64z:https://fgk.chinatax.gov.cn/zcfgk/c100009/listflfg_fg.htmlz:https://fgk.chinatax.gov.cn/zcfgk/c100010/listflfg_fg.htmlz7https://fgk.chinatax.gov.cn/zcfgk/c102440/listflfg.htmlz<https://fgk.chinatax.gov.cn/zcfgk/c100011/list_guizhang.htmlz7https://fgk.chinatax.gov.cn/zcfgk/c102416/listflfg.htmlz7https://fgk.chinatax.gov.cn/zcfgk/c100012/listflfg.htmlz7https://fgk.chinatax.gov.cn/zcfgk/c100013/listflfg.htmlz7https://fgk.chinatax.gov.cn/zcfgk/c102424/listflfg.htmlz8https://fgk.chinatax.gov.cn/zcfgk/c100015/list_zcjd.htmlzhttps://fgk.chinatax.gov.cn/c                  ^    e Zd Zd eD             ZdddZd d!dZd"d#dZd$dZd Z	d Z
d ZdS )%AntiBlockingStrategyc                    g | ]
}|d          S )rF   r#   )r%   ps     r   r)   zAntiBlockingStrategy.<listcomp>3   s    111q1T7111r          ?      @r   Nfgk.chinatax.gov.cn	delay_minfloat	delay_maxr=   r   
proxy_listlist[str] | Noner;   r<   max_requests_per_minute
int | Nonec                    || _         || _        || _        |pg | _        || _        ||z   dz  }|| _        ||z
  dz  | _        || _        g | _        |pt          
                    |t                    | _        d S )Nr   r   )rQ   rS   r=   rT   r;   	_delay_mu_delay_sigma_override_raterequest_timesr   rA   rB   rV   )r   rQ   rS   r=   rT   r;   rV   mids           r   r   zAntiBlockingStrategy.__init__5   s    ""&$*9$)&2a75*,'>'p.BTBTU`boBpBp$$$r     urlc                "  K   |rt          |          n| j        }| j        }t          ||           d {V }| j        r|j        | j        k    r| j        |_        |                                 d {V  |                                  d {V  d S r   )_extract_domainr;   r=   rD   r[   r   r6   _apply_normal_delay)r   r_   r;   r=   buckets        r   before_requestz#AntiBlockingStrategy.before_requestB   s      ),=%%%$+&"6;77777777 	.6;$2E#E#E-FKnn&&(((((((((((r    r>   dictc                    t          j        t                    }||n| j        }t                              |t                    }|d         d|d         d|ddddd	d
dS )NrF   zUtext/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8rG   zgzip, deflate, brz
keep-alive1z	max-age=0documentnavigatezsame-origin)z
User-AgentAcceptzAccept-LanguagezAccept-EncodingReferer
ConnectionzUpgrade-Insecure-RequestszCache-ControlzSec-Fetch-DestzSec-Fetch-ModezSec-Fetch-Site)randomchoice_UA_PROFILESr=   	_REFERERSrA   _DEFAULT_REFERER)r   r=   profilecidreferers        r   get_headersz AntiBlockingStrategy.get_headersK   s    ---(4kk$:J--%566%dm  8O  dk  lr  ds  H[  ho  K  jm  @K  _i  }G  [h  i  i  	ir    
str | Nonec                F    | j         sd S t          j        | j                   S r   )rT   rm   rn   r   s    r   	get_proxyzAntiBlockingStrategy.get_proxyQ   s#     	4}T_---r    c                >   K   |                                   d {V  d S r   )rb   rx   s    r   apply_delayz AntiBlockingStrategy.apply_delayV   s0      &&(((((((((((r    c                
   K   d S r   r#   rx   s    r   check_rate_limitz%AntiBlockingStrategy.check_rate_limitY   s      r    c                
  K   t          j        | j        | j                  }t	          | j        t          | j        |                    }t          	                    d|dd           t          j        |           d {V  d S )Nu   延迟 r*   r+   )rm   gaussrY   rZ   maxrQ   minrS   r0   r1   r   r2   )r   delays     r   rb   z(AntiBlockingStrategy._apply_normal_delay\   s      T^T->??DNC$>$>??+u++++,,,mE"""""""""""r    )rN   rO   r   NrP   N)rQ   rR   rS   rR   r=   r   rT   rU   r;   r<   rV   rW   )r^   )r_   r<   r   )r=   rW   r>   re   )r>   rv   )r7   r8   r9   ro   USER_AGENTSr   rd   ru   ry   r{   r}   rb   r#   r    r   rK   rK   2   s        11L111Kq q q q q) ) ) ) )i i i i i. . . .
) ) )  # # # # #r    rK   r_   c                T    	 t          |           j        p| S # t          $ r | cY S w xY wr   )r   netloc	Exception)r_   s    r   ra   ra   b   s?    }}#*s*   


s    '')r;   r<   r=   r   r>   r   )r_   r<   r>   r<   )
__future__r   r   rm   r-   urllib.parser   common_loggingr   r7   r0   r   __annotations__rB   r   r:   r   r@   rD   ro   rp   rq   rK   ra   r#   r    r   <module>r      s   " " " " " " "    ! ! ! ! ! ! % % % % % %	H		%&1qRB2RTYZ![![ [ [ [ [6 6 6 6 6 6 6 6* ') ( ( ( (    I  Sx  FM  N  N  WH  Rk  y@  PA  PA  JA  Kp  ~H	  CI	  CI	  R	I  Sc  q{  K	|  K	|  EW  a^  ls  ~t  ~t  }S  ]B  PZ  v[  v[  dc  m^  ls  ]t  ]t  }d  nS  ao  vp  vp  qL  RN  SL  QO  TM  RK  PI  NG  LF	  G	  G		1 .# .# .# .# .# .# .# .#`     r    