
    Xjp
                        d dl Z d dlZd dlZd dlmZ  ee          Z eg dd  edd          D             z   d  edd          D             z             Z	 ej
        d          Z ej
        d	          Zd
ZdddddddddZddddddedededz  dedz  dedz  dedefdZdedefdZdedefdZdededefdZd edz  d!edefd"ZdS )#    N)
get_logger)CONPRNAUXNULc                     g | ]}d | S )COM .0is     \/lsinfo/ai/hellotax_ai/data_center/backend/app/services/tax_data_processor/filename_utils.py
<listcomp>r   	   s    ;\;\;\!I!II;\;\;\       
   c                     g | ]}d | S )LPTr
   r   s     r   r   r   	   s;      `A  `A  `Ano`ifg`i`i  `A  `A  `Ar   z[<>:"/\\|?*\x00-\x1f\x7f]u   [​-‏‪-‮⁠-⁤﻿­]   
title_only
title_datetitle_docno_datetitle_subtitle)r                        z.md)
doc_numbercompose_date	sub_title	extensiontitlecategory_idr!   r"   r#   r$   returnc                8   t                               |d          }t          |           }|rt          |          nd}|rt          |          nd}	|rt          |          nd}
|dk    r|g}n9|dk    rd ||	fD             }n$|dk    rd ||
fD             }nd |||	fD             }|rd	                    |          nd
}|                                t          v rd	| }t          |t          t          |
                                          z
            }||z   S )Nr    r   r   c                     g | ]}||S r
   r
   r   ps     r   r   z!safe_filename.<locals>.<listcomp>   s    ;;;q;;;;r   r   c                     g | ]}||S r
   r
   r+   s     r   r   z!safe_filename.<locals>.<listcomp>   s    ???qQ????r   c                     g | ]}||S r
   r
   r+   s     r   r   z!safe_filename.<locals>.<listcomp>   s    HHHqaHHHHr   _untitled)_CATEGORY_RULESget	_sanitize_normalize_datejoinupper_RESERVED_NAMES_truncate_to_bytes
_MAX_BYTESlenencode)r%   r&   r!   r"   r#   r$   ruleclean_titleclean_docno
clean_dateclean_subtitlepartsstems                r   safe_filenamerC      sL   {,>??DE""K+5=)J'''2K2>F...BJ-6>Yy)))BN|			;;[*5;;;	!	!	!??[.9???HH[+zBHHH#3388E???Dzz||&&4zzdJY5E5E5G5G1H1H$HIID)r   textc                    | sdS t          j        d|           } t                              d|           } t                              d|           } t          j        dd|           } |                     d                                          S )Nr)   NFCr/   z[\s_]+)unicodedata	normalize_ZERO_WIDTH_REsub_ILLEGAL_CHARS_RErestrip)rD   s    r   r3   r3   #   sz     r --Db$''D  d++D6)S$''D::c??  """r   date_strc                     t          j        dd|           }t          |          dk    r|S t          |          dk    r
|d d         S | S )Nz\Dr)   r    r   )rL   rJ   r:   )rN   digitss     r   r4   r4   ,   sO    VE2x((F
6{{a
6{{abqbzOr   	max_bytesc                     |                      d          }t          |          |k    r| S |d |         }|                    dd          S )Nutf-8ignore)errors)r;   r:   decode)rD   rQ   encoded	truncateds       r   r8   r8   4   sP    kk'""G
7||y  

#IGH555r   
issue_date
source_urlc                 
   t          j        |                    d                                                    d d         }| rt	          |           nd}t          |          dk    r|                                sd}| d| S )NrS   r    00000000r/   )hashlibsha256r;   	hexdigestr4   r:   isdigit)rY   rZ   hash8rN   s       r   generate_directory_namerb   ;   s    N:,,W5566@@BB2A2FE.8Hz***jHMMQ8#3#3#5#5  h   r   )r]   rL   rG   common_loggingr   __name__logger	frozensetranger7   compilerK   rI   r9   r1   strintrC   r3   r4   r8   rb   r
   r   r   <module>rk      sA    				     % % % % % %	H		)888;\;\uuUVXZ||;\;\;\\  `A  `Asxsxyz|~ss  `A  `A  `A  A  B  BBJ?@@ QRR
"|8JO_dv  |N  Se  j|  }  }JNim  FJ  [`    3 sTz ^adh^h z}  AE  {E   WZ   eh    (#C #C # # # #c c    6S 6S 6S 6 6 6 6!d
 ! ! ! ! ! ! ! !r   