
    "j                        d Z ddlZddlmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZ dd	lmZmZ  e ee                    Z ee          Z  ej!        d
d e D             e"          Z#ee"dz   ej$        dd%                    e           d ee                    f         Z& ed          Z'e'(                    dg d          ddddddej)        dfdedededee#dz   ej$        d          f         dede&deded dfd!            Z*e'(                    d"d#d$g          ddej)        dfd%ee" ej+        d&          f         d'ede&deded dfd(            Z,e'(                    d)g d*          ddej)        dfd%ee" ej+        d&          f         d+ee"dz   ej$        d,d-          f         d.ee"dz   ej$        d/          f         deded dfd0            Z-e'(                    d1d2d3g          ej)        dfd1ee" ej+        d4          f         deded dfd5            Z.dS )6a  Contains commands to interact with datasets on the Hugging Face Hub.

Usage:
    # list datasets on the Hub
    hf datasets ls

    # list datasets with a search query
    hf datasets ls --search "code"

    # get info about a dataset
    hf datasets info HuggingFaceFW/fineweb
    N)	Annotatedget_args)execute_raw_sql_query)CLIErrorRepositoryNotFoundErrorRevisionNotFoundError)DatasetSort_TExpandDatasetProperty_T   )	AuthorOpt	FilterOptFormatWithAutoOptLimitOptRevisionOpt	SearchOptTokenOptapi_object_to_dict
get_hf_apimake_expand_properties_parsertyper_factory)OutputFormatWithAutooutDatasetSortEnumc                     i | ]}||S  r   ).0ss     f/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/huggingface_hub/cli/datasets.py
<dictcomp>r   6   s    /L/L/L1/L/L/L    )typezComma-separated properties to return. When used, only the listed properties (and id) are returned. Example: '--expand=downloads,likes,tags'. Valid: z, .)helpcallbackz"Interact with datasets on the Hub.)r#   z	list | ls)zhf datasets lsz*hf datasets ls --sort downloads --limit 10zhf datasets ls --search "code")examples
   searchauthorfiltersortzSort results.limitexpandformattokenreturnc           	          t          |          }|r|j        nd}	d |                    ||| |	||          D             }
t          j        |
           dS )zList datasets on the Hub.r.   Nc                 ,    g | ]}t          |          S r   )r   )r   dataset_infos     r   
<listcomp>zdatasets_ls.<locals>.<listcomp>]   s0     
 
 
 	<((
 
 
r    )r)   r(   r'   r*   r+   r,   )r   valuelist_datasetsr   table)r'   r(   r)   r*   r+   r,   r-   r.   apisort_keyresultss              r   datasets_lsr;   E   s    , 5
!
!
!C!+tzztH
 
-- . 
 

 
 
G Igr    infoz&hf datasets info HuggingFaceFW/finewebz9hf datasets info my-dataset --expand downloads,likes,tags
dataset_idz+The dataset ID (e.g. `username/repo-name`).revisionc                    t          |          }	 |                    | ||          }nJ# t          $ r}t          d|  d          |d}~wt          $ r}t          d| d|  d          |d}~ww xY wt          j        |           dS )	z$Get info about a dataset on the Hub.r1   )repo_idr>   r,   z	Dataset 'z' not found.Nz
Revision 'z' not found on 'z'.)r   r3   r   r   r   r   dict)r=   r>   r,   r-   r.   r8   r<   es           r   datasets_inforC   k   s     5
!
!
!CU
XfUU" D D D;:;;;<<!C  U U ULHLLjLLLMMSTTUHTNNNNNs    + 
A2A		A2A--A2parquet)z(hf datasets parquet cfahlgren1/hub-statsz8hf datasets parquet cfahlgren1/hub-stats --subset modelsz6hf datasets parquet cfahlgren1/hub-stats --split trainz6hf datasets parquet cfahlgren1/hub-stats --format jsonsubsetz--subsetz(Filter parquet entries by subset/config.splitz Filter parquet entries by split.c                     t          |          }|                    | |          }fd|D             }d |D             }t          j        |g dd           dS )	z/List parquet file URLs available for a dataset.r1   )r@   configc                 .    g | ]}|j         k    |S )N)rF   )r   entryrF   s     r   r4   z$datasets_parquet.<locals>.<listcomp>   s)    TTT%emu{e?S?S?S?S?Sr    c                 D    g | ]}|j         |j        |j        |j        d S )rE   rF   urlsize)rH   rF   rM   rN   )r   rJ   s     r   r4   z$datasets_parquet.<locals>.<listcomp>   s;       af5<%+eiQVQ[\\  r    rL   rM   )headersid_keyN)r   list_dataset_parquet_filesr   r7   )	r=   rE   rF   r-   r.   r8   entriesfilteredr:   s	     `      r   datasets_parquetrT      s    " 5
!
!
!C,,Z,OOGTTTT7TTTH jr  G IgAAA%PPPPPPr    sqlzhf datasets sql "SELECT COUNT(*) AS rows FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet')"zhf datasets sql "SELECT * FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet') LIMIT 5" --format jsonzRaw SQL query to execute.c                     	 t          | |          }n/# t          $ r"}t          t          |                    |d}~ww xY wt	          j        |           dS )zAExecute a raw SQL query with DuckDB against dataset parquet URLs.)	sql_queryr.   N)r   ImportErrorr   strr   r7   )rU   r-   r.   resultrB   s        r   datasets_sqlr[      sf    &&EBBB & & &s1vvA%&Ifs    
A ;A )/__doc__enumtypingr   r   typerhuggingface_hub._dataset_viewerr   huggingface_hub.errorsr   r   r   huggingface_hub.hf_apir	   r
   
_cli_utilsr   r   r   r   r   r   r   r   r   r   r   _outputr   r   sorted_EXPAND_PROPERTIES_SORT_OPTIONSEnumrY   r   Optionjoin	ExpandOptdatasets_clicommandautor;   ArgumentrC   rT   r[   r   r    r   <module>rp      s?     & & & & & & & &  A A A A A A [ [ [ [ [ [ [ [ [ [ I I I I I I I I                          / . . . . . . . VHH%<==>> ''$)-/L/Lm/L/L/LSVWWW $JEL E  ei  en  en  oA  eB  eB  E  E  E../ABB  	 }"FGGG        	 4 9   $/***	,	     
   < 
0C    ! 4 9 #~u~3`aaaab  	
  
   $       ptZ^ 4 9Q Q#~u~3`aaaabQcDj,%,z@j"k"k"kklQ S4Z3U!V!V!VVWQ 	Q
 Q 
Q Q Q Q" 	 	c 	j    !5 9
 
	3,GHHHH	I

 
 
	
 
 
 
 
 
r    