
    #jG                        U d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlZd dlZddlmZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZm Z m!Z! er
d dl"Z#ddl$m%Z% da&ee'e(                  e)d<   ej*        dk    rdndZ+ ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d           ej,        d          gZ-e G d  d!                      Z.d"e'e(         fd#Z/d$d%d"e0fd&Z1d$d%d"e2fd'Z3d(ej4        d"e2fd)Z5d*ee'e(         e'e2         e'ej4                 e'd%         f         d"e'e2         fd+Z6dS ),    N)	dataclassfield)BytesIO)Path)TYPE_CHECKINGAnyClassVarOptionalUnion   )config)DownloadConfig)
array_cast)is_local_pathis_remote_urlxopen)first_non_null_valueno_op_if_value_is_nullstring_to_dict   )FeatureType_IMAGE_COMPRESSION_FORMATSlittle<>z|b1|u1z<u2z>u2z<i2z>i2z<u4z>u4z<i4z>i4z<f4z>f4z<f8z>f8c            	          e Zd ZU dZdZee         ed<   dZe	ed<    e
dd          Zee         ed<   d	Zee         ed
<    ej         ej                     ej                    d          Zee         ed<    e
d dd          Zeed<   d Zdeeeeeej        d	f         defdZddedd	fdZdedeedf         f         fdZdeej        ej         ej!        f         dej         fdZ"	 ddej         de	de	dej         fdZ#dS )Imagea=  Image [`Feature`] to read image data from an image file.

    Input: The Image feature accepts as input:
    - A `str`: Absolute path to the image file (i.e. random access is allowed).
    - A `pathlib.Path`: path to the image file (i.e. random access is allowed).
    - A `dict` with the keys:

        - `path`: String with relative path of the image file to the archive file.
        - `bytes`: Bytes of the image file.

      This is useful for parquet or webdataset files which embed image files.

    - An `np.ndarray`: NumPy array representing an image.
    - A `PIL.Image.Image`: PIL image object.

    Output: The Image features output data as `PIL.Image.Image` objects.

    Args:
        mode (`str`, *optional*):
            The mode to convert the image to. If `None`, the native mode of the image is used.
        decode (`bool`, defaults to `True`):
            Whether to decode the image data. If `False`,
            returns the underlying dictionary in the format `{"path": image_path, "bytes": image_bytes}`.

    Examples:

    ```py
    >>> from datasets import load_dataset, Image
    >>> ds = load_dataset("AI-Lab-Makerere/beans", split="train")
    >>> ds.features["image"]
    Image(decode=True, id=None)
    >>> ds[0]["image"]
    <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=500x500 at 0x15E52E7F0>
    >>> ds = ds.cast_column('image', Image(decode=False))
    {'bytes': None,
     'path': '/root/.cache/huggingface/datasets/downloads/extracted/b0a21163f78769a2cf11f58dfc767fb458fc7cea5c05dccc0144a2c0f0bc1292/train/healthy/healthy_train.85.jpg'}
    ```
    NmodeTdecodeF)defaultrepridPIL.Image.Imagedtypebytespathpa_type)r!   initr"   _typec                     | j         S N)r)   )selfs    a/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/datasets/features/image.py__call__zImage.__call___   s
    |    valuereturnc                    t           j        rddl}nt          d          t	          |t
                    rt          j        |          }t	          |t                    r|ddS t	          |t                    r$t          |
                                          ddS t	          |t          t          f          rd|dS t	          |t          j                  rt          |          S t	          ||j        j                  rt!          |          S |                    d          =t$          j                            |d                   rd|                    d          dS |                    d          |                    d          +|                    d          |                    d          dS t+          d| d	          )
a   Encode example into a format for Arrow.

        Args:
            value (`str`, `np.ndarray`, `PIL.Image.Image` or `dict`):
                Data passed as input to Image feature.

        Returns:
            `dict` with "path" and "bytes" fields
        r   N4To support encoding images, please install 'Pillow'.r(   r'   r(   r&   r'   zUAn image sample should have one of 'path' or 'bytes' but they are missing or None in .)r   PIL_AVAILABLE	PIL.ImageImportError
isinstancelistnparraystrr   absoluter'   	bytearrayndarrayencode_np_arrayr   encode_pil_imagegetosr(   isfile
ValueError)r.   r2   PILs      r/   encode_examplezImage.encode_exampleb   s     	VTUUUeT"" 	$HUOOEeS!! 	!D111t$$ 	 0 011DAAAy122 	 5111rz** 	"5)))sy// 	#E***YYv*rw~~eFm/L/L*!599V+<+<===YYw+uyy/@/@/L"YYw//69J9JKKKphmppp  r1   c                    | j         st          d          t          j        r	ddl}ddl}nt          d          |i }|d         |d         }}|7|t          d| d          t          |          r|j	        
                    |          }n|                    d	          d
         }|                    t          j                  rt          j        nt          j        }t!          ||          }	|	|                    |	d                   nd}
t%          |
          }t'          |d|          5 }t)          |                                          }ddd           n# 1 swxY w Y   |j	        
                    |          }n'|j	        
                    t)          |                    }|                                 |                                                    |j	        j        j        j                  |j                            |          }| j        r*| j        |j        k    r|                    | j                  }|S )aq  Decode example image file into image data.

        Args:
            value (`str` or `dict`):
                A string with the absolute image file path, a dictionary with
                keys:

                - `path`: String with absolute or relative image file path.
                - `bytes`: The bytes of the image file.
            token_per_repo_id (`dict`, *optional*):
                To access and decode
                image files from private repositories on the Hub, you can pass
                a dictionary repo_id (`str`) -> token (`bool` or `str`).

        Returns:
            `PIL.Image.Image`
        zMDecoding is disabled for this feature. Please use Image(decode=True) instead.r   Nz4To support decoding images, please install 'Pillow'.r(   r'   zCAn image should have one of 'path' or 'bytes' but both are None in r7   ::repo_idtokenrbdownload_config)r    RuntimeErrorr   r8   r9   PIL.ImageOpsr:   rH   r   r   opensplit
startswithHF_ENDPOINTHUB_DATASETS_URLHUB_DATASETS_HFFS_URLr   rE   r   r   r   readloadgetexifExifTagsBaseOrientationImageOpsexif_transposer   convert)r.   r2   token_per_repo_idrI   r(   bytes_image
source_urlpatternsource_url_fieldsrP   rS   fs                r/   decode_examplezImage.decode_example   sn   $ { 	pnooo 	VTUUU$ "V}eGnf>| !ogl!o!o!oppp && 3INN400EE!%D!1!1"!5J &001CDD://#9 
 )7z7(K(K%O`Ol)--.?	.JKKKrv  '55&A&A&AOtT?KKK 3q!(!2!23 3 3 3 3 3 3 3 3 3 3 3 3 3 3INN622EEINN76??33E

==??sy16BCCOL//66E9 	-ej00MM$),,Es   5"E##E'*E'r   c                 N    ddl m} | j        r| n |d           |d          dS )zfIf in the decodable state, return the feature itself, otherwise flatten the feature into a dictionary.r   )Valuebinarystringr&   )featuresrn   r    )r.   rn   s     r/   flattenzImage.flatten   sK    ###### {DD xh 	
r1   storagec                    t           j                            |j                  rR	 |                    t          j                              }n*# t           j        $ r}t          d|           |d}~ww xY wt           j                            |j                  rrt          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }nZt           j                            |j                  rt!          |t          j                              }t          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }nt           j                            |j                  rrt          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }nt           j                            |j                  r|j                            d          dk    r|                    d          }n8t          j	        dgt          |          z  t          j                              }|j                            d          dk    r|                    d          }n8t          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }nt           j                            |j                  rt          j	        d |                                D             t          j                              }t          j	        dgt          |          z  t          j                              }t           j                            ||gddg|                                          }t!          || j                  S )	a  Cast an Arrow array to the Image arrow storage type.
        The Arrow types that can be converted to the Image pyarrow storage type are:

        - `pa.string()` - it must contain the "path" data
        - `pa.large_string()` - it must contain the "path" data (will be cast to string if possible)
        - `pa.binary()` - it must contain the image bytes
        - `pa.struct({"bytes": pa.binary()})`
        - `pa.struct({"path": pa.string()})`
        - `pa.struct({"bytes": pa.binary(), "path": pa.string()})`  - order doesn't matter
        - `pa.list(*)` - it must contain the image array data

        Args:
            storage (`Union[pa.StringArray, pa.StructArray, pa.ListArray]`):
                PyArrow array to cast.

        Returns:
            `pa.StructArray`: Array in the Image arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        zvFailed to cast large_string to string for Image feature. This can happen if string values exceed 2GB. Original error: Ntyper'   r(   maskr   c                 d    g | ]-}|'t          t          j        |                    d         nd .S )Nr'   )rC   r=   r>   ).0arrs     r/   
<listcomp>z&Image.cast_storage.<locals>.<listcomp>
  s8    uuuZ]CO#//88QUuuur1   )patypesis_large_stringrv   castrp   ArrowInvalidrH   	is_stringr>   lenro   StructArrayfrom_arraysis_nullis_large_binaryr   	is_binary	is_structget_field_indexr   is_list	to_pylistr)   )r.   rs   ebytes_array
path_arrays        r/   cast_storagezImage.cast_storage   s   ( 8##GL11 	!,,ry{{33?    +'(+ +  	 8gl++ 	(D6CLL#8ry{{KKKKn00+w1G'SYIZahapaparar0ssGGX%%gl33 	  G 4&3w<<"7bikkJJJJn00':1FRXHY`g`o`o`q`q0rrGGX-- 	4&3w<<"7bikkJJJJn00':1FRXHY`g`o`o`q`q0rrGGX-- 	|++G4499%mmG44 hvG'<29;;OOO|++F33q88$]]622

Xtfs7||&;")++NNN
n00+z1JWV\L]dkdsdsdudu0vvGGXgl++ 	(uuahararatatuuuY[[  K 4&3w<<"7bikkJJJJn00j)GV+<;CVCVCXCX 1  G '4<000s   &A A4A//A4local_filesremote_filesc                    i t           fd            t          j        fd|                                D             t          j                              }t          j        fd|                    d                                          D             t          j                              }t          j                            ||gddg|	                                          }t          || j                  S )	a7  Embed image files into the Arrow array.

        Args:
            storage (`pa.StructArray`):
                PyArrow array to embed.
            token_per_repo_id (`dict`, optional):
                Dictionary repo_id -> token to fetch the files bytes.
            local_files (`bool`, defaults to `True`)
                Whether to embed local files data in the array

                <Added version="4.8.5"/>
            remote_files (`bool`, defaults to `True`)
                Whether to embed remote files data in the array.
                E.g. files with paths that start with hf:// or https://

                <Added version="4.8.5"/>

        Returns:
            `pa.StructArray`: Array in the Image arrow storage type, that is
                `pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
        Nc                    |                      d          d         }|                    t          j                  rt          j        nt          j        }t          ||          }|                    |d                   nd }t          |          }t          | d|          5 }|
                                cd d d            S # 1 swxY w Y   d S )NrL   rM   rN   rO   rQ   rR   )rW   rX   r   rY   rZ   r[   r   rE   r   r   r\   )r(   rh   ri   rj   rP   rS   rk   re   s          r/   path_to_bytesz*Image.embed_storage.<locals>.path_to_bytes.  s   D))"-J+5+@+@AS+T+Tv''Z`Zv  !/z7 C CK\Kh%))*;I*FGGGnrE,5999OtT?CCC  qvvxx                                   s   %CC
C
c                     g | ]U}|O|d         ?rt          |d                   sr&t          |d                   r |d                   n	|d         nd VS )Nr'   r(   )r   r   )rz   xr   r   r   s     r/   r|   z'Image.embed_storage.<locals>.<listcomp>;  s     
 
 
  =	 z)% **7&	*B*B *HT *YfghiogpYqYq * "M!F),,, 7 
 
 
r1   ru   c                     g | ]I}|Crt          |          sr.t          |          rt          j                            |          n|nd JS r-   )r   r   rF   r(   basename)rz   r(   r   r   s     r/   r|   z'Image.embed_storage.<locals>.<listcomp>I  s     	 	 	  # $(5d(;(;AMR_`dReReBG$$T*** 	 	 	r1   r(   r'   rw   )r   r}   r>   r   ro   r   rp   r   r   r   r   r)   )r.   rs   re   r   r   r   r   r   s     ```  @r/   embed_storagezImage.embed_storage  sI   0 $ "			  		  		  		  
 			  h
 
 
 
 
 
 !**,,
 
 
 
 
 
 X	 	 	 	 	 $MM&11;;==	 	 	 
 
 

 .,,k:-FRXHY`g`o`o`q`q,rr'4<000r1   r-   )NTT)$__name__
__module____qualname____doc__r   r
   r?   __annotations__r    boolr   r#   r%   r	   r}   structro   rp   r)   r   r+   r0   r   r'   rA   dictr=   rB   rJ   rl   rr   StringArrayr   	ListArrayr   r    r1   r/   r   r   .   s        % %N D(3-FDd777B777,E8C=,,,&RYibikk'R'RSSGXc]SSSwU???E3???  'E#uirzSd*d$e 'jn ' ' ' 'R; ;D ;EV ; ; ; ;z
}d33E.FFG 
 
 
 
<1E".".",*V$W <1\^\j <1 <1 <1 <1~ osC1 C1~C1LPC1gkC1	C1 C1 C1 C1 C1 C1r1   r   r3   c                  X   t           j        rdd l} nt          d          t          }| j                                         t          t          | j        j	        
                                          t          | j        j        
                                          z            at          S )Nr   r5   )r   r8   r9   r:   r   r   r*   r<   setOPENkeysSAVE)rI   s    r/   list_image_compression_formatsr   Y  s     RPQQQ ")	%)#cin.A.A.C.C*D*Ds39>K^K^K`K`GaGa*a%b%b"%%r1   rg   r$   c                     t                      }| j        t                      v r| j        }n| j        dv rdnd}|                     ||           |                                S )zmConvert a PIL Image object to bytes using native compression if possible, otherwise use PNG/TIFF compression.)1LLARGBRGBAPNGTIFF)format)r   r   r   r   savegetvalue)rg   bufferr   s      r/   image_to_bytesr   f  sd    YYF|57777*(GGGV	JJvfJ%%%??r1   c                 p    t          | d          r| j        dk    r
| j        d dS d t          |           dS )Nfilename r6   )hasattrr   r   )rg   s    r/   rD   rD   q  sF    uj!! >en&:&:666~e'<'<===r1   r>   c                    t           j        rdd l}nt          d          | j        }|j        dk    r|j        nt          }|j        }|j        }d }| j	        dd          rP|dvrt          d| d| d          t          j        d	          }||k    rt          j        d
| d| d           n|t          v r|}n|dk    rk||z   t          |          z   }t          j        |          t          v r0t          j        |          }t          j        d
| d| d           n|dz  }|dk    k|t          d| dt                     |j                            |                     |                    }d t'          |          dS )Nr   r5   =r   )uizUnsupported array dtype z for image encoding. Only z' is supported for multi-channel arrays.r   zDowncasting array dtype z to z to be compatible with 'Pillow'r   zCannot downcast dtype z- to a valid image dtype. Valid image dtypes: r6   )r   r8   r9   r:   r%   	byteorder_NATIVE_BYTEORDERkinditemsizeshape	TypeErrorr=   warningswarn_VALID_IMAGE_ARRAY_DTPYESr?   r   	fromarrayastyper   )	r>   rI   r%   dtype_byteorder
dtype_kinddtype_itemsize
dest_dtype	dtype_strrg   s	            r/   rC   rC   x  s    RPQQQKE).C)?)?eooEVOJ^NJ {122 Z''5J   Xe__
JMkUkk
kkklll	+	+	+

!!'*4s>7J7JJIx	""&???Xi00
oooJoooppp1$ !! xxx]vxx   IZ 8 899E>%#8#8999r1   objsc                    t           j        rddl}nt          d          | rt	          |           \  }}t          |t                    rd | D             S t          |t          j                  r"t          t                    fd| D             S t          ||j        j                  r"t          t                    fd| D             S | S | S )zmEncode a list of objects into a format suitable for creating an extension array of type `ImageExtensionType`.r   Nr5   c                      g | ]}||d dnd S )Nr6   r   )rz   objs     r/   r|   z2objects_to_list_of_image_dicts.<locals>.<listcomp>  s*    ^^^RUCOS4000^^^r1   c                 &    g | ]} |          S r   r   rz   r   obj_to_image_dict_funcs     r/   r|   z2objects_to_list_of_image_dicts.<locals>.<listcomp>  %    @@@C**3//@@@r1   c                 &    g | ]} |          S r   r   r   s     r/   r|   z2objects_to_list_of_image_dicts.<locals>.<listcomp>  r   r1   )r   r8   r9   r:   r   r;   r?   r=   rB   r   rC   r   rD   )r   rI   _r   r   s       @r/   objects_to_list_of_image_dictsr     s      RPQQQ %d++3c3 	_^^Y]^^^^c2:&& 	%;O%L%L"@@@@4@@@@SY_-- 	%;<L%M%M"@@@@4@@@@Kr1   )7rF   sysr   dataclassesr   r   ior   pathlibr   typingr   r   r	   r
   r   numpyr=   pyarrowr}   r   r   download.download_configr   tabler   utils.file_utilsr   r   r   utils.py_utilsr   r   r   r9   rI   rq   r   r   r<   r?   r   r   r   r%   r   r   r   r'   r   r   rD   rB   rC   r   r   r1   r/   <module>r      sH   					 



  ( ( ( ( ( ( ( (             @ @ @ @ @ @ @ @ @ @ @ @ @ @               5 5 5 5 5 5       B B B B B B B B B B Y Y Y Y Y Y Y Y Y Y  &%%%%%% 37 HT#Y/ 6 6 6=H44CC#  BHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOOBHUOO $ g1 g1 g1 g1 g1 g1 g1 g1T	
&S	 
& 
& 
& 
&+     >- >$ > > > >(:2: (:$ (: (: (: (:V
S	4:tBJ'7>O9PP
Q	$Z     r1   