o
    z3iC                     @   s  U d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlZd dlZddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZmZm Z  ergd dl!Z"ddl#m$Z$ da%ee&e'  e(d< ej)dkrxdndZ*e+de+de+de+de+de+de+de+de+de+de+de+de+de+dgZ,eG d d! d!Z-d"e&e' fd#d$Z.d%d&d"e/fd'd(Z0d%d&d"e1fd)d*Z2d+ej3d"e1fd,d-Z4d.ee&e' e&e1 e&ej3 e&d& f d"e&e1 fd/d0Z5dS )1    N)	dataclassfield)BytesIO)Path)TYPE_CHECKINGAnyClassVarOptionalUnion   )config)DownloadConfig)
array_cast)is_local_pathxopen)first_non_null_valueno_op_if_value_is_nullstring_to_dict   )FeatureType_IMAGE_COMPRESSION_FORMATSlittle<>z|b1|u1z<u2z>u2z<i2z>i2z<u4z>u4z<i4z>i4z<f4z>f4z<f8z>f8c                   @   s*  e Zd ZU dZdZee ed< dZe	ed< e
dddZee ed< d	Zee ed
< ee e dZee ed< e
d dddZeed< dd Zdeeeeeejd	f defddZddedd	fddZdedeedf f fddZdeejej ej!f dej fddZ"ddej dej fddZ#dS ) Imagea=  Image [`Feature`] to read image data from an image file.

    Input: The Image feature accepts as input:
    - A `str`: Absolute path to the image file (i.e. random access is allowed).
    - A `pathlib.Path`: path to the image file (i.e. random access is allowed).
    - A `dict` with the keys:

        - `path`: String with relative path of the image file to the archive file.
        - `bytes`: Bytes of the image file.

      This is useful for parquet or webdataset files which embed image files.

    - An `np.ndarray`: NumPy array representing an image.
    - A `PIL.Image.Image`: PIL image object.

    Output: The Image features output data as `PIL.Image.Image` objects.

    Args:
        mode (`str`, *optional*):
            The mode to convert the image to. If `None`, the native mode of the image is used.
        decode (`bool`, defaults to `True`):
            Whether to decode the image data. If `False`,
            returns the underlying dictionary in the format `{"path": image_path, "bytes": image_bytes}`.

    Examples:

    ```py
    >>> from datasets import load_dataset, Image
    >>> ds = load_dataset("AI-Lab-Makerere/beans", split="train")
    >>> ds.features["image"]
    Image(decode=True, id=None)
    >>> ds[0]["image"]
    <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x500 at 0x15E52E7F0>
    >>> ds = ds.cast_column('image', Image(decode=False))
    {'bytes': None,
     'path': '/root/.cache/huggingface/datasets/downloads/extracted/b0a21163f78769a2cf11f58dfc767fb458fc7cea5c05dccc0144a2c0f0bc1292/train/healthy/healthy_train.85.jpg'}
    ```
    NmodeTdecodeF)defaultrepridPIL.Image.Imagedtypebytespathpa_type)r   initr   _typec                 C   s   | j S N)r&   )self r+   e/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/datasets/features/image.py__call___   s   zImage.__call__valuereturnc                 C   s  t jrddl}ntdt|trt|}t|tr |ddS t|t	r.t|
 ddS t|ttfr:d|dS t|tjrDt|S t||jjrOt|S |ddurftj|d rfd|ddS |ddust|ddur|d|ddS td| d	)
a   Encode example into a format for Arrow.

        Args:
            value (`str`, `np.ndarray`, `PIL.Image.Image` or `dict`):
                Data passed as input to Image feature.

        Returns:
            `dict` with "path" and "bytes" fields
        r   N4To support encoding images, please install 'Pillow'.r%   r$   r%   r#   r$   zUAn image sample should have one of 'path' or 'bytes' but they are missing or None in .)r   PIL_AVAILABLE	PIL.ImageImportError
isinstancelistnparraystrr   absoluter$   	bytearrayndarrayencode_np_arrayr   encode_pil_imagegetosr%   isfile
ValueError)r*   r.   PILr+   r+   r,   encode_exampleb   s,   








zImage.encode_examplec                 C   s~  | j stdtjrddl}ddl}ntd|du ri }|d |d }}|du r|du r6td| dt|rA|j	
|}nV|d	d
 }|tjrQtjntj}t||}	|	durd||	d nd}
t|
d}t|d|d}t| }W d   n1 sw   Y  |j	
|}n|j	
t|}|  | |j	jjjdur|j|}| jr| j|jkr|| j}|S )aq  Decode example image file into image data.

        Args:
            value (`str` or `dict`):
                A string with the absolute image file path, a dictionary with
                keys:

                - `path`: String with absolute or relative image file path.
                - `bytes`: The bytes of the image file.
            token_per_repo_id (`dict`, *optional*):
                To access and decode
                image files from private repositories on the Hub, you can pass
                a dictionary repo_id (`str`) -> token (`bool` or `str`).

        Returns:
            `PIL.Image.Image`
        zMDecoding is disabled for this feature. Please use Image(decode=True) instead.r   Nz4To support decoding images, please install 'Pillow'.r%   r$   zCAn image should have one of 'path' or 'bytes' but both are None in r2   ::repo_idtokenrbdownload_config)r   RuntimeErrorr   r3   r4   PIL.ImageOpsr5   rC   r   r   opensplit
startswithHF_ENDPOINTHUB_DATASETS_URLHUB_DATASETS_HFFS_URLr   r@   r   r   r   readloadgetexifExifTagsBaseOrientationImageOpsexif_transposer   convert)r*   r.   token_per_repo_idrD   r%   bytes_image
source_urlpatternsource_url_fieldsrJ   rM   fr+   r+   r,   decode_example   sD   



zImage.decode_exampler   c                 C   s(   ddl m} | jr| S |d|ddS )zfIf in the decodable state, return the feature itself, otherwise flatten the feature into a dictionary.r   )Valuebinarystringr#   )featuresrg   r   )r*   rg   r+   r+   r,   flatten   s   zImage.flattenstoragec              
   C   sL  t j|jr&z	|t  }W n t jy% } ztd| |d}~ww t j|jrKt j	dgt
| t  d}t jj||gddg| d}nt j|jrwt|t  }t j	dgt
| t  d}t jj||gddg| d}nt j|jrt j	dgt
| t  d}t jj||gddg| d}nt j|jr|jddkr|d}nt j	dgt
| t  d}|jddkr|d}nt j	dgt
| t  d}t jj||gddg| d}n5t j|jr t j	dd	 | D t  d}t j	dgt
| t  d}t jj||gddg| d}t|| jS )
a  Cast an Arrow array to the Image arrow storage type.
        The Arrow types that can be converted to the Image pyarrow storage type are:

        - `pa.string()` - it must contain the "path" data
        - `pa.large_string()` - it must contain the "path" data (will be cast to string if possible)
        - `pa.binary()` - it must contain the image bytes
        - `pa.struct({"bytes": pa.binary()})`
        - `pa.struct({"path": pa.string()})`
        - `pa.struct({"bytes": pa.binary(), "path": pa.string()})`  - order doesn't matter
        - `pa.list(*)` - it must contain the image array data

        Args:
            storage (`Union[pa.StringArray, pa.StructArray, pa.ListArray]`):
                PyArrow array to cast.

        Returns:
            `pa.StructArray`: Array in the Image arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        zvFailed to cast large_string to string for Image feature. This can happen if string values exceed 2GB. Original error: Ntyper$   r%   maskr   c                 S   s*   g | ]}|d urt t|d nd qS )Nr$   )r>   r8   r9   ).0arrr+   r+   r,   
<listcomp>
  s   * z&Image.cast_storage.<locals>.<listcomp>)patypesis_large_stringrn   castri   ArrowInvalidrC   	is_stringr9   lenrh   StructArrayfrom_arraysis_nullis_large_binaryr   	is_binary	is_structget_field_indexr   is_list	to_pylistr&   )r*   rl   ebytes_array
path_arrayr+   r+   r,   cast_storage   sV       zImage.cast_storagec                    s   du ri t fdd tj fdd| D t d}tjdd |d D t d}tjj||gd	dg|	 d
}t
|| jS )a8  Embed image files into the Arrow array.

        Args:
            storage (`pa.StructArray`):
                PyArrow array to embed.

        Returns:
            `pa.StructArray`: Array in the Image arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        Nc                    s   |  dd }|tjrtjntj}t||}|d ur# |d nd }t|d}t	| d|d}|
 W  d    S 1 s@w   Y  d S )NrF   rG   rH   rI   rK   rL   )rQ   rR   r   rS   rT   rU   r   r@   r   r   rV   )r%   rb   rc   rd   rJ   rM   re   )r_   r+   r,   path_to_bytes!  s   

$z*Image.embed_storage.<locals>.path_to_bytesc                    s8   g | ]}|d ur|d d u r |d n|d nd qS )Nr$   r%   r+   )rq   x)r   r+   r,   rs   .  s    *z'Image.embed_storage.<locals>.<listcomp>rm   c                 S   s$   g | ]}|d urt j|nd qS r)   )rA   r%   basename)rq   r%   r+   r+   r,   rs   5  s   $ r%   r$   ro   )r   rt   r9   r   rh   r   ri   r{   r|   r}   r   r&   )r*   rl   r_   r   r   r+   )r   r_   r,   embed_storage  s    
zImage.embed_storager)   )$__name__
__module____qualname____doc__r   r	   r:   __annotations__r   boolr   r    r"   r   rt   structrh   ri   r&   r   r(   r-   r
   r$   r<   dictr8   r=   rE   rf   rk   StringArrayr{   	ListArrayr   r   r+   r+   r+   r,   r   .   s   
 '$$)=$>r   r/   c                  C   sR   t jrdd l} ntdtd u r'| j  tt| jj	
 t| jj
 @ atS )Nr   r0   )r   r3   r4   r5   r   r   r'   r7   setOPENkeysSAVE)rD   r+   r+   r,   list_image_compression_formats<  s   

$r   ra   r!   c                 C   sB   t  }| jt v r| j}n	| jdv rdnd}| j||d | S )zmConvert a PIL Image object to bytes using native compression if possible, otherwise use PNG/TIFF compression.)1LLARGBRGBAPNGTIFF)format)r   r   r   r   savegetvalue)ra   bufferr   r+   r+   r,   image_to_bytesI  s   r   c                 C   s.   t | dr| jdkr| jd dS d t| dS )Nfilename r1   )hasattrr   r   )ra   r+   r+   r,   r?   T  s   r?   r9   c           	      C   s>  t jrdd l}ntd| j}|jdkr|jnt}|j}|j}d }| j	dd  rM|dvr7t
d| d| dtd	}||krLtd
| d| d nB|tv rT|}n;|dkr|| t| }t|tv ryt|}td
| d| d n|d }|dksX|d u rt
d| dt |j| |}d t|dS )Nr   r0   =r   )uizUnsupported array dtype z for image encoding. Only z' is supported for multi-channel arrays.r   zDowncasting array dtype z to z to be compatible with 'Pillow'r   zCannot downcast dtype z- to a valid image dtype. Valid image dtypes: r1   )r   r3   r4   r5   r"   	byteorder_NATIVE_BYTEORDERkinditemsizeshape	TypeErrorr8   warningswarn_VALID_IMAGE_ARRAY_DTPYESr:   r   	fromarrayastyper   )	r9   rD   r"   dtype_byteorder
dtype_kinddtype_itemsize
dest_dtype	dtype_strra   r+   r+   r,   r>   [  sB   


r>   objsc                    s   t jrddl}ntd| rIt| \}}t|tr dd | D S t|tjr3t	t
  fdd| D S t||jjrGt	t  fdd| D S | S | S )zmEncode a list of objects into a format suitable for creating an extension array of type `ImageExtensionType`.r   Nr0   c                 S   s"   g | ]}|d ur|d dnd qS )Nr1   r+   rq   objr+   r+   r,   rs     s   " z2objects_to_list_of_image_dicts.<locals>.<listcomp>c                       g | ]} |qS r+   r+   r   obj_to_image_dict_funcr+   r,   rs         c                    r   r+   r+   r   r   r+   r,   rs     r   )r   r3   r4   r5   r   r6   r:   r8   r=   r   r>   r   r?   )r   rD   _r   r+   r   r,   objects_to_list_of_image_dicts  s   

r   )6rA   sysr   dataclassesr   r   ior   pathlibr   typingr   r   r   r	   r
   numpyr8   pyarrowrt   r   r   download.download_configr   tabler   utils.file_utilsr   r   utils.py_utilsr   r   r   r4   rD   rj   r   r   r7   r:   r   r   r   r"   r   r   r   r$   r   r   r?   r=   r>   r   r+   r+   r+   r,   <module>   s^   
   + 