
    j'                     6   d Z 	 ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ ddlmZ ddlmZmZ dZdadad Zdd	Z	 ddZ ed          d             ZddZ G d dej        j                  Zd ZddZ G d de	j                  Zg dZdS )z+Centralized I/O security sentinel for NLTK.    N)	lru_cache)Path)unquoteurlparseFc                     g } dt           j        v r.t          t          t           j        d         dg                     } t          j                            dd          }| |f}t          t          |k    rt          S t                      }| |
                    t          j                  z   D ]}}|ry	 t          |d          r|j        n|}|                    t          t!          |                                                               `# t$          t&          t(          f$ r Y yw xY w~ddl}dd|                                fD ]|}	 t          |                                                                          }|                                r|                    |           `# t$          t&          t(          f$ r Y yw xY w|a|a|S )	zDDynamically determines allowed directories based on NLTK data paths.z	nltk.datapath	NLTK_DATA Nr   z~/nltk_dataz/usr/share/nltk_data)sysmoduleslistgetattrosenvironget_ALLOWED_ROOTS_CACHE_LAST_DATA_PATHSsetsplitpathsephasattrr   addr   strresolveOSError
ValueErrorRuntimeErrortempfile
gettempdir
expanduserexists)current_paths	env_pathscurrent_staterootspraw_pr   locs           V/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/nltk/pathsec.py_get_allowed_rootsr*      s    Mck!!WS[%=vrJJKK
{B//I"I.M',<,M,M##EEEY__RZ888   	")!V"4"4;!		$s5zz**22445555Z6   	 OOO5x7J7J7L7LM  	S		$$&&..00Axxzz 		!\2 	 	 	H	 !$Ls&   1ADD&%D&AF%%F?>F?NLTKc                 *   t          | t                    s#| r!t          |                                           sdS 	 t	          | d          r| j        nt          |           }d|v r9t          |          }|j        dv rdS |j        dk    rt          |j                  }	 t          |          
                                n# t          t          f$ rl |                                }d|v rB|                    d          dz   }t          |d|                   
                                nt          |          Y nw xY w|rzt	          |d          r|j        nt          |          }t          |          
                                }|k    s-                    |          st          d| d	 d
|           t!                      }	t#          fd|	D                       rdS 	 t          t%          j                              
                                k    s                              r^t#          fd|	D                       rdS d| d}
t(          rt+          |
          t-          j        d| d	 dt0          d           dS n# t          t          f$ r Y nw xY wd| d }
t(          rt+          |
          t-          j        |
t0          d           dS # t*          t          f$ r  t2          $ r t(          r Y dS w xY w)aG  
    Ensures file access is restricted to allowed data directories.

    :param path_input: The path to validate.
    :param context: Diagnostic context for warnings/errors.
    :param required_root: If provided, enforces that the path is strictly
                          within this specific directory (scoped sandbox).
    Nr   z://)httphttpsftpfilez.zip   Security Violation [z]: Path z escapes root c              3   N   K   | ]}|k    p                     |          V   d S N)is_relative_to).0roottargets     r)   	<genexpr>z validate_path.<locals>.<genexpr>x   s:      WWv~<!6!6t!<!<WWWWWW    c              3   $   K   | ]
}|k    V  d S r4    )r6   r7   cwds     r)   r9   z validate_path.<locals>.<genexpr>   s'      ==tsd{======r:   zS]: CWD access restricted in ENFORCE mode. Authorize via: nltk.data.path.append('.')zSecurity Warning [z allowed via CWD.   
stacklevelz]: Unauthorized path )
isinstanceintr   stripr   r   r   schemer   r   r   r   r   lowerfindr5   r*   anyr   getcwdENFORCEPermissionErrorwarningswarnRuntimeWarning	Exception)
path_inputcontextrequired_rootrawparsed	lower_rawzip_idxroot_rawscoped_rootallowed_rootsmsgr=   r8   s              @@r)   validate_pathrZ   F   s    *c"" * C
OO<Q<Q<S<S I!(V!<!<Qjoo#j//C<<c]]F} 888}&&fk**		##YY&&((FF$ 	# 	# 	#		I""#..0014c(7(m,,4466c	#  	 =&11(""'' 
 x..0022Kk))V-B-B;-O-O) _7__F__R]__  
 +,,WWWWWWWWW 	F	ry{{##++--C}} 5 5c : :}====}===== F@7 @ @ @   )#...MWWWWfWWW&#$   
 F   $ 	 	 	D	 LWKK6KK 	=!#&&&M#~!<<<<<<Z(       		 	 	si   AK+ K+ "!C K+ A:E>K+  EB(K+ ,A(J ?J K+ J,)K+ +J,,=K+ +#LLZipAuditc                    	 t          |                                          fd}t          | t          j                  r ||            dS t          j        | d          5 } ||           ddd           dS # 1 swxY w Y   dS # t
          t          f$ r  t          t          j        f$ r t          rt          d          Y dS w xY w)zEEnhanced Zip-Slip protection using Pathlib for cross-platform safety.c                    gn|                                  }|D ]}t          |d          r|j        nt          |          }d|v rt	          d|           |z                                  }|k    sP|                              s;d d| d}t          rt          |          t          j
        |t          d           d S )	Nfilename zNull byte in ZIP member: r2   z]: Traversal member 'z' detected.r>   r?   )namelistr   r^   r   r   r   r5   rI   rJ   rK   rL   rM   )	zfmembersnamename_strmember_pathrY   rP   specific_memberr8   s	         r)   _auditz$validate_zip_archive.<locals>._audit   s    %4%@!!bkkmm    I I,3D*,E,ET4==3t998##$%K%K%KLLL%099;;#v--1K1KF1S1S-dddxdddC I-c222 c>aHHHHI Ir:   rNzZip validation failed)
r   r   rA   zipfileZipFilerJ   r   r   
BadZipFilerI   )zip_obj_or_pathtarget_rootrf   rP   rg   ra   r8   s     ``  @r)   validate_zip_archivern      s\   ;k""**,,	I 	I 	I 	I 	I 	I 	I" ow77 	F?######66 "r


                 Z(   W'( ; ; ; 	;!"9:::	; 	; 	;;s<   AB B )B5B BB 	B
B =CC   )maxsizec                 x    	 t          j        | dt           j                  S # t          t          f$ r g cY S w xY w)z5Cached hostname resolution to mitigate DNS rebinding.N)proto)socketgetaddrinfoIPPROTO_TCPr   r   )hostnames    r)   _resolve_hostnamerw      sK    !(D8JKKKKZ    			s    # 99	NetworkIOc                    | r!t          |                                           sdS 	 t          t          |                     }|j        dk    r(t	          t          |j                  | d           dS |j        dvrBd| d|j         d}t          rt          |          t          j
        |t          d	
           dS t          |j        pd          D ]x}t          j        |d         d                   }|j        s|j        s|j        s|j        r:d| d| }t          rt          |          t          j
        |t          d	
           ydS # t          t(          f$ r  t*          $ r t          r Y dS w xY w)z-Hardened URL validation with SSRF protection.Nr0   z.file_schemerP   )r-   r.   r2   z]: Unsupported scheme 'z'.r>   r?   r
   r1   r   z!]: SSRF attempt to restricted IP )r   rC   r   rD   rZ   r   r   rI   rJ   rK   rL   rM   rw   rv   	ipaddress
ip_addressis_loopbackis_link_localis_multicast
is_privater   rN   )	url_inputrP   rS   rY   resultips         r)   validate_network_urlr      s    C	NN0022 #i..))=F""'&+..78P8P8PQQQQF= 111XwXXv}XXX   A%c***c>a@@@@F'(=2>> 	E 	EF%fQil33B~ E!1 ER_ E E[W[[WY[[ E)#...M#~!DDDD	E 	E Z(       		 	 	s    AE 6A	E BE #E98E9c                   "     e Zd ZdZ fdZ xZS )_ValidatingRedirectHandlerzIEnsures that every step of a redirect chain is re-validated against SSRF.c                 r    t          |d           t                                          ||||||          S )NNetworkRedirectrz   )r   superredirect_request)selfreqfpcoderY   headersnewurl	__class__s          r)   r   z+_ValidatingRedirectHandler.redirect_request   s9    V->????ww''RsGVLLLr:   )__name__
__module____qualname____doc__r   __classcell__r   s   @r)   r   r      sG        SSM M M M M M M M Mr:   r   c                     t          | d          r| j        nt          |           }t          |d           t          j                            t                                } |j        | g|R i |S )zCSecure wrapper for urllib.request.urlopen with redirect validation.full_urlzpathsec.urlopenrz   )	r   r   r   r   urllibrequestbuild_openerr   open)urlargskwargsurl_stropeners        r)   urlopenr      st    %c:66DcllCHHG*;<<<<^(()C)E)EFFF6;s,T,,,V,,,r:   rh   c                 L    t          | d           t          j        | fd|i|S )z!Secure wrapper for builtins.open.zpathsec.openrz   mode)rZ   builtinsr   )r0   r   r   s      r)   r   r      s2    $////=33D3F333r:   c                   :     e Zd ZdZ fdZd fd	Zd fd	Z xZS )rj   z#Secure wrapper for zipfile.ZipFile.c                     t          |t          t          f          rt          |d            t	                      j        |g|R i | d S )Nzpathsec.ZipFilerz   )rA   r   r   rZ   r   __init__)r   r0   r   r   r   s       r)   r   zZipFile.__init__  sZ    dS$K(( 	;$(9:::://///////r:   Nc                     t          | |pt          j                    |           t                                          |||          S )N)rf   )rn   r   rH   r   extract)r   memberr   pwdr   s       r)   r   zZipFile.extract  s=    T4#629;;OOOOwwvtS111r:   c                     t          | |pt          j                               t                                          |||           d S r4   )rn   r   rH   r   
extractall)r   r   rb   r   r   s       r)   r   zZipFile.extractall  s@    T4#629;;7774#.....r:   )NN)NNN)r   r   r   r   r   r   r   r   r   s   @r)   rj   rj     sz        --0 0 0 0 0
2 2 2 2 2 2/ / / / / / / / / /r:   rj   )rZ   r   rn   r   r   rj   rI   )r+   N)Nr[   )rx   )rh   ) r   r   r{   r   rs   r   urllib.requestr   rK   ri   	functoolsr   pathlibr   urllib.parser   r   rI   r   r   r*   rZ   rn   rw   r   r   HTTPRedirectHandlerr   r   r   rj   __all__r<   r:   r)   <module>r      s   2 1 1      				  



                   * * * * * * * *   % % %PT T T Tp AK!; !; !; !;H 3         FM M M M M!C M M M- - -4 4 4 4/ / / / /go / / /"  r:   