o
    z3i:                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlZddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZ e	r^d dlmZ ddl m!Z! eG dd dZ"ddde#fddZ$dS )    N)	dataclassfield)BytesIO)Path)TYPE_CHECKINGAnyClassVarOptionalUnion   )config)DownloadConfig)
array_cast)is_local_pathxopen)no_op_if_value_is_nullstring_to_dictAudioDecoder   )FeatureTypec                   @   s^  e Zd ZU dZdZee ed< dZe	ed< dZ
ee ed< dZee ed< eddd	Zee ed
< dZee ed< ee e dZee ed< ed dddZeed< dd Zdeeeeedf defddZ	d#dedeeeeee	df f  ddfddZdedeedf f fddZdeej ej!f dej!fdd Z"d#dej!dej!fd!d"Z#dS )$Audioa
  Audio [`Feature`] to extract audio data from an audio file.

    Input: The Audio feature accepts as input:
    - A `str`: Absolute path to the audio file (i.e. random access is allowed).
    - A `pathlib.Path`: path to the audio file (i.e. random access is allowed).
    - A `dict` with the keys:

        - `path`: String with relative path of the audio file to the archive file.
        - `bytes`: Bytes content of the audio file.

      This is useful for parquet or webdataset files which embed audio files.

    - A `dict` with the keys:

        - `array`: Array containing the audio sample
        - `sampling_rate`: Integer corresponding to the sampling rate of the audio sample.

    - A `torchcodec.decoders.AudioDecoder`: torchcodec audio decoder object.

    Output: The Audio features output data as `torchcodec.decoders.AudioDecoder` objects, with additional keys:

    - `array`: Array containing the audio sample
    - `sampling_rate`: Integer corresponding to the sampling rate of the audio sample.

    Args:
        sampling_rate (`int`, *optional*):
            Target sampling rate. If `None`, the native sampling rate is used.
        num_channels (`int`, *optional*):
             The desired number of channels of the samples. By default, the number of channels of the source is used.
             Audio decoding will return samples with shape (num_channels, num_samples)
             Currently `None` (number of channels of the source, default), `1` (mono) or `2` (stereo) channels are supported.
             The `num_channels` argument is passed to `torchcodec.decoders.AudioDecoder`.

             <Added version="4.4.0"/>
        decode (`bool`, defaults to `True`):
            Whether to decode the audio data. If `False`,
            returns the underlying dictionary in the format `{"path": audio_path, "bytes": audio_bytes}`.
        stream_index (`int`, *optional*):
            The streaming index to use from the file. If `None` defaults to the "best" index.

    Example:

    ```py
    >>> from datasets import load_dataset, Audio
    >>> ds = load_dataset("PolyAI/minds14", name="en-US", split="train")
    >>> ds = ds.cast_column("audio", Audio(sampling_rate=44100, num_channels=2))
    >>> ds[0]["audio"]
    <datasets.features._torchcodec.AudioDecoder object at 0x11642b6a0>
    >>> audio = ds[0]["audio"]
    >>> audio.get_samples_played_in_range(0, 10)
    AudioSamples:
        data (shape): torch.Size([2, 110592])
        pts_seconds: 0.0
        duration_seconds: 2.507755102040816
        sample_rate: 44100
    ```
    Nsampling_rateTdecodenum_channelsstream_indexF)defaultrepriddictdtypebytespathpa_type)r   initr   _typec                 C   s   | j S N)r$   )self r)   e/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/datasets/features/audio.py__call__]   s   zAudio.__call__valuer   returnc              
   C   s.  zddl }ddlm} W n ty } ztd|d}~ww |du r&tdtjr0ddlm} nd}t	|t
r<d|dS t	|trJdt
| dS t	|ttfrV|ddS |durct	||rct|S d|v rt }|||d tj|d	 d
j|d| jd | ddS |ddurtj|d r|d dr|d	du rtd|drtj|d tjdtjd }ntj |d dddtjd }t }||||d	 d
j|d| jd | ddS d|ddS |ddus|ddur|d|ddS td| d)zEncode example into a format for Arrow.

        Args:
            value (`str`, `bytes`,`bytearray`,`dict`, `AudioDecoder`):
                Data passed as input to Audio feature.

        Returns:
            `dict`
        r   NAudioEncoder<To support encoding audio data, please install 'torchcodec'.zvalue must be providedr   r!   arrayr   sample_ratewavformatr   r#   pcmzBTo use PCM files, please specify a 'sampling_rate' in Audio objectr"   )r    i  hr)r    modezUAn audio sample should have one of 'path' or 'bytes' but they are missing or None in .)!torchtorchcodec.encodersr/   ImportError
ValueErrorr   TORCHCODEC_AVAILABLEtorchcodec.decodersr   
isinstancestrr   absoluter"   	bytearrayencode_torchcodec_audior   
from_numpyastypenpfloat32to_file_liker   getvaluegetosr#   isfileendswithKeyError
frombufferint16memmap)r(   r,   r<   r/   errr   bufferbytes_valuer)   r)   r*   encode_example`   s\   







"  
zAudio.encode_exampletoken_per_repo_idc                 C   sV  t jr
ddlm} ntd| jstd|d dur#|d |d fn|d df\}}|du r;|du r;td| d	|du rOt|rO||| j	| j
| jd
}nP|du r|pVi }|dd }|t jrgt jnt j}t||}	|	durz||	d nd}
t|
d}t|d|d}||| j	| j
| jd
}n||| j	| j
| jd
}||d|_||j_|S )a,  Decode example audio file into audio data.

        Args:
            value (`dict`):
                A dictionary with keys:

                - `path`: String with relative audio file path.
                - `bytes`: Bytes of the audio file.
            token_per_repo_id (`dict`, *optional*):
                To access and decode
                audio files from private repositories on the Hub, you can pass
                a dictionary repo_id (`str`) -> token (`bool` or `str`)

        Returns:
            `torchcodec.decoders.AudioDecoder`
        r   r   z<To support decoding audio data, please install 'torchcodec'.zMDecoding is disabled for this feature. Please use Audio(decode=True) instead.r"   Nr#   zJAn audio sample should have one of 'path' or 'bytes' but both are None in r;   )r   r3   r   ::repo_idtokenrbdownload_config)r#   r"   )r   r@   _torchcodecr   r>   r   RuntimeErrorr?   r   r   r   r   split
startswithHF_ENDPOINTHUB_DATASETS_URLHUB_DATASETS_HFFS_URLr   rM   r   r   _hf_encodedmetadatar#   )r(   r,   rY   r   r#   r"   audio
source_urlpatternsource_url_fieldsr^   ra   fr)   r)   r*   decode_example   s<   ,

zAudio.decode_exampler   c                 C   s,   ddl m} | jrtd|d|ddS )z[If in the decodable state, raise an error, otherwise flatten the feature into a dictionary.r   )Valuez'Cannot flatten a decoded Audio feature.binarystringr!   )featuresrq   r   r?   )r(   rq   r)   r)   r*   flatten   s   zAudio.flattenstoragec                 C   s  t j|jr%t jdgt| t  d}t jj||gddg|	 d}nt j
|jrQt|t  }t jdgt| t  d}t jj||gddg|	 d}nt j|jrvt jdgt| t  d}t jj||gddg|	 d}njt j|jr|jdrt dd |jd	d
D }nNt j|jr|jddkr|d}nt jdgt| t  d}|jddkr|d}nt jdgt| t  d}t jj||gddg|	 d}t|| jS )a  Cast an Arrow array to the Audio arrow storage type.
        The Arrow types that can be converted to the Audio pyarrow storage type are:

        - `pa.string()` - it must contain the "path" data
        - `pa.binary()` - it must contain the audio bytes
        - `pa.struct({"bytes": pa.binary()})`
        - `pa.struct({"path": pa.string()})`
        - `pa.struct({"bytes": pa.binary(), "path": pa.string()})`  - order doesn't matter

        Args:
            storage (`Union[pa.StringArray, pa.StructArray]`):
                PyArrow array to cast.

        Returns:
            `pa.StructArray`: Array in the Audio arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`
        Ntyper"   r#   maskr1   c                 S   s$   g | ]}|d urt  |nd qS r'   )r   rX   .0xr)   r)   r*   
<listcomp>
     $ z&Audio.cast_storage.<locals>.<listcomp>F)zero_copy_onlyr   )patypes	is_stringrx   r1   lenrr   StructArrayfrom_arraysis_nullis_large_binaryr   rs   	is_binary	is_structget_all_field_indicesto_numpyget_field_indexr   r$   )r(   rv   bytes_array
path_arrayr)   r)   r*   cast_storage   s2      zAudio.cast_storagec                    s   du ri t fdd tj fdd| D t d}tjdd |d D t d}tjj||gd	dg|	 d
}t
|| jS )a8  Embed audio files into the Arrow array.

        Args:
            storage (`pa.StructArray`):
                PyArrow array to embed.

        Returns:
            `pa.StructArray`: Array in the Audio arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        Nc                    s   |  dd }|tjrtjntj}t||}|d ur# |d nd }t|d}t	| d|d}|
 W  d    S 1 s@w   Y  d S )NrZ   r[   r\   r]   r_   r`   )rd   re   r   rf   rg   rh   r   rM   r   r   read)r#   rl   rm   rn   r^   ra   ro   )rY   r)   r*   path_to_bytes&  s   

$z*Audio.embed_storage.<locals>.path_to_bytesc                    s8   g | ]}|d ur|d d u r |d n|d nd qS )Nr"   r#   r)   r{   )r   r)   r*   r~   3  s    *z'Audio.embed_storage.<locals>.<listcomp>rw   c                 S   s$   g | ]}|d urt j|nd qS r'   )rN   r#   basename)r|   r#   r)   r)   r*   r~   :  r   r#   r"   ry   )r   r   r1   	to_pylistrr   r   rs   r   r   r   r   r$   )r(   rv   rY   r   r   r)   )r   rY   r*   embed_storage  s    
zAudio.embed_storager'   )$__name__
__module____qualname____doc__r   r	   int__annotations__r   boolr   r   r   r   rC   r    r   r   structrr   rs   r$   r   r&   r+   r
   r"   rE   r   rX   rp   ru   StringArrayr   r   r   r)   r)   r)   r*   r      s.   
 :$ E
; .r   rk   r   r-   c              
   C   s   t | dr| jS zddlm} W n ty! } ztd|d }~ww |  }t }|jjd }||j	 |j
dj|d|d | d dS )	Nri   r   r.   r0   r2   r4   r5   r!   )hasattrri   r=   r/   r>   get_all_samplesr   datashapecpur3   rK   rL   )rk   r/   rU   samplesrV   r   r)   r)   r*   rF   A  s   

rF   )%rN   dataclassesr   r   ior   pathlibr   typingr   r   r   r	   r
   numpyrI   pyarrowr    r   download.download_configr   tabler   utils.file_utilsr   r   utils.py_utilsr   r   rA   r   rt   r   r   r   rF   r)   r)   r)   r*   <module>   s(      +