
    #jb                         d dl Z d dlZd dlmZ d dlmZmZmZ d dlZddl	m
Z
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ  G d de          Z G d d          ZdS )    N)partial)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)query_table)Json)tqdm)!get_json_field_paths_from_featurejson_decode_field)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                        e Zd Z	 	 	 	 	 	 	 ddee         dee         dee         dede	de	d	ee         d
ee
         f fdZd Z xZS )JsonDatasetReaderNFpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingfieldnum_procc	           
           t                      j        |f||||||d|	 || _        t          |t                    r|n| j        |i}t          d||||d|	| _        d S )N)r   r   r   r   r   r   )r   
data_filesr   r    )super__init__r   
isinstancedictr   r   builder)selfr   r   r   r   r   r   r   r   kwargs	__class__s             Z/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/datasets/io/json.pyr#   zJsonDatasetReader.__init__   s     			
)		
 		
 		
 		
 		
 
)3M4)H)Hitz[hNi 
$	
 

 
 
    c                     | j         r!| j                            | j                  }nRd }d }d }d }| j                            ||||| j                   | j                            | j        | j                  }|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   	in_memory)r   r&   as_streaming_datasetr   download_and_preparer   
as_datasetr   )r'   datasetr-   r.   r/   r0   s         r*   readzJsonDatasetReader.read2   s    > 	_l77dj7IIGG #O M $IL-- /+"3# .    l--DJ$J]-^^Gr+   )NNNFFNN)__name__
__module____qualname__r   r   r   r
   r	   strboolintr#   r6   __classcell__)r)   s   @r*   r   r      s         '+'+$#"&
 
.x8
 
#
 8$	

 
 
 
 }
 3-
 
 
 
 
 
@      r+   r   c                       e Zd Z	 	 	 ddedeeef         dee         dee         dee	         f
dZ
defd	Zd
 ZdedefdZdS )JsonDatasetWriterNr5   path_or_buf
batch_sizer   storage_optionsc                     ||dk    rt          d| d          || _        || _        |r|nt          j        | _        || _        d| _        |pi | _        || _	        d S )Nr   z	num_proc z must be an integer > 0.zutf-8)

ValueErrorr5   r@   r   DEFAULT_MAX_BATCH_SIZErA   r   encodingrB   to_json_kwargs)r'   r5   r@   rA   r   rB   rG   s          r*   r#   zJsonDatasetWriter.__init__I   sw     HMMKKKKLLL&(2U**8U .4",r+   returnc                 N   | j                             dd           }| j                             dd          }| j                             d|dk    rdnd          }d| j         vr|dv r
d| j         d<   t          | j        t          t
          t          j        f          rd	nd }| j                             d
|          }|dvrt          d| d          |s$| j	        | j
        j        k     rt          d          t          | j        t          t
          t          j        f          rPt          j        | j        dfd
|i| j        pi 5 } | j        d|||d| j         }d d d            n# 1 swxY w Y   n0|rt          d| d           | j        d| j        ||d| j         }|S )Nr@   orientrecordslinesTFindex)r   tableinfercompression)NrO   gzipbz2xzz&`datasets` currently does not support z compressionzOutput JSON will not be formatted correctly when lines = False and batch_size < number of rows in the dataset. Use pandas.DataFrame.to_json() instead.wb)file_objrJ   rL   zUThe compression parameter is not supported when writing to a buffer, but compression=z1 was passed. Please provide a local path instead.r!   )rG   popr$   r@   r:   bytesosr   NotImplementedErrorrA   r5   num_rowsfsspecopenrB   _write)r'   _rJ   rL   default_compressionrP   bufferwrittens           r*   writezJsonDatasetWriter.write]   s`   ##M488$((9==#''99L9LRWXX$---&<N2N2N+0D( *4D4DsESUS^F_)`)`jggfj)--m=PQQBBB%&h{&h&h&hiii 	4<+@@@% i   d&eR[(ABB 	p $ 4?DHDXD^\^  j%$+ivfEiiUYUhiij j j j j j j j j j j j j j j
  )Hlw H H H   "dko4+;FRWoo[_[nooGs   E))E-0E-c                    |\  }}}}t          | j        j        t          ||| j        z             | j        j                  }|                                }t          | j        j                  D ]8}|^}}	||         	                    t          t          |	                    ||<   9 |j        dd ||d|}
|
                    d          s|
dz  }
|
                    | j                  S )N)rN   keyindices)json_field_path)r@   rJ   rL   
r!   )r   r5   dataslicerA   _indices	to_pandasr   r   applyr   r   to_jsonendswithencoderF   )r'   argsoffsetrJ   rL   rG   batchrf   coljson_field_subpathjson_strs              r*   _batch_jsonzJsonDatasetWriter._batch_json~   s   04-~,#fft677L)
 
 

 !!@AVWW 	j 	jO'6$C$s))'2CUg*h*h*hiiE#JJ 5=`T&``Q_``  && 	Ht}---r+   rU   c                    d}| j         | j         dk    rmt          t          dt          | j                  | j                  dd          D ]3}|                     |f          }||                    |          z  }4nt          | j                  | j        }	}t          j	        | j                   5 }
t          |

                    | j        fdt          d||	          D                       ||	z  r||	z  dz   n||	z  dd          D ]}||                    |          z  }	 ddd           n# 1 swxY w Y   |S )	zWrites the pyarrow table as JSON lines to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   Nr   bazCreating json from Arrow format)unitdescc                     g | ]}|f	S r!   r!   ).0rq   rL   rJ   rG   s     r*   
<listcomp>z,JsonDatasetWriter._write.<locals>.<listcomp>   s"    nnnV&&%@nnnr+   )totalry   rz   )r   hf_tqdmrangelenr5   rA   rv   rb   multiprocessingPoolimap)r'   rU   rJ   rL   rG   ra   rq   ru   rZ   rA   pools     ```      r*   r]   zJsonDatasetWriter._write   s    = DMQ$6$6!aT\**DO<<6   4 4
  ++VVUN,STT8>>(3334 $'t|#4#4dojH %dm44 
8 'II(nnnnnnuUVX`blOmOmnnn  ;CZ:Ok8z1Q66U]akUk:! ! ! 	8 	8H x~~h777GG	8
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 
8 s   9A5D;;D?D?)NNN)r7   r8   r9   r   r   r   r   r   r<   r%   r#   rb   rv   r]   r!   r+   r*   r?   r?   H   s        
 %)"&*.- -- 8X-.- SM	-
 3-- "$- - - -(s    B. . ."## 
# # # # # #r+   r?   )r   rX   	functoolsr   typingr   r   r   r[    r   r	   r
   r   
formattingr   packaged_modules.json.jsonr   utilsr   r   
utils.jsonr   r   utils.typingr   r   abcr   r   r?   r!   r+   r*   <module>r      sW       				       , , , , , , , , , ,  4 4 4 4 4 4 4 4 4 4 4 4 $ $ $ $ $ $ - - - - - - # # # # # # M M M M M M M M < < < < < < < < & & & & & &4 4 4 4 4- 4 4 4nj j j j j j j j j jr+   