
    |jH                    P   U d dl mZ d dlmZmZmZmZ d dlmZ er%d dl	m
Z
 d dlmZ d dlmZ ddlmZ ed	         Zd
ed<   d dlZd dlmZ d dlZd dlmZ d dlmZ g Zd dZd!dZ G d deedef                            Z d	Z!d Z"d Z#d Z$ G d dee%d                            Z&dS )"    )annotations)TYPE_CHECKINGAnyCallableLiteral)	TypeAlias)Sequence)_DTypeLiteral)
_Transform   )_ImageDataType)	z.jpgz.jpegz.pngz.ppmz.bmpz.pgmz.tifz.tiffz.webpr   _AllowedExtensionsN)Image)Dataset)
try_importfilenamestr
extensionsSequence[str]returnboolc                    t          |t          t          f          s
J d            t          d |D                       }|                                                     |          S )zChecks if a file is a valid extension.

    Args:
        filename (str): path to a file
        extensions (list[str]|tuple[str]): extensions to consider

    Returns:
        bool: True if the filename ends with one of given extensions
    z#`extensions` must be list or tuple.c                6    g | ]}|                                 S  )lower).0xs     m/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/vision/datasets/folder.py
<listcomp>z'has_valid_extension.<locals>.<listcomp>A   s     666a		666    )
isinstancelisttupler   endswith)r   r   s     r   has_valid_extensionr%   4   sk     j4-00  - 0 66:66677J>>$$Z000r    c                ,   g }t           j                            |           } fd}t          |                                          D ]}t           j                            | |          }t           j                            |          sBt          t          j        |d                    D ]b\  }}}	t          |	          D ]L}
t           j                            ||
          } ||          r|||         f}|                    |           Mc|S )Nc                $    t          |           S Nr%   r   r   s    r   is_valid_filez#make_dataset.<locals>.is_valid_fileK   s    &q*555r    Tfollowlinks)	ospath
expandusersortedkeysjoinisdirwalkappend)dirclass_to_idxr   r+   imagestargetdroot_fnamesfnamer/   items     `          r   make_datasetrA   E   s*   F
'

S
!
!C	6 	6 	6 	6 	6 **,,-- 	( 	(GLLf%%w}}Q 	%bgaT&B&B&BCC 	( 	(OD!V ( (w||D%00 =&& ( ,v"67DMM$'''	(	( Mr    c                      e Zd ZU dZded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   	 	 	 	 d!d"dZd#dZd$dZd  ZdS )%DatasetFoldera"  A generic data loader where the samples are arranged in this way:

    .. code-block:: text

        root/class_a/1.ext
        root/class_a/2.ext
        root/class_a/3.ext

        root/class_b/123.ext
        root/class_b/456.ext
        root/class_b/789.ext

    Args:
        root (str): Root directory path.
        loader (Callable|None, optional): A function to load a sample given its path. Default: None.
        extensions (list[str]|tuple[str]|None, optional): A list of allowed extensions.
            Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
            If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
            '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
        transform (Callable|None, optional): A function/transform that takes in
            a sample and returns a transformed version. Default: None.
        is_valid_file (Callable|None, optional): A function that takes path of a file
            and check if the file is a valid file. Both :attr:`extensions` and
            :attr:`is_valid_file` should not be passed. Default: None.

    Returns:
        :ref:`api_paddle_io_Dataset`. An instance of DatasetFolder.

    Attributes:
        classes (list[str]): List of the class names.
        class_to_idx (dict[str, int]): Dict with items (class_name, class_index).
        samples (list[tuple[str, int]]): List of (sample_path, class_index) tuples.
        targets (list[int]): The class_index value for each image in the dataset.

    Example:

        .. code-block:: python

            >>> import shutil
            >>> import tempfile
            >>> import cv2
            >>> import numpy as np
            >>> import paddle.vision.transforms as T
            >>> from pathlib import Path
            >>> from paddle.vision.datasets import DatasetFolder

            >>> def make_fake_file(img_path: str):
            ...     if img_path.endswith((".jpg", ".png", ".jpeg")):
            ...         fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
            ...         cv2.imwrite(img_path, fake_img)
            ...     elif img_path.endswith(".txt"):
            ...         with open(img_path, "w") as f:
            ...             f.write("This is a fake file.")

            >>> def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
            ...     root = Path(root)
            ...     root.mkdir(parents=True, exist_ok=True)
            ...     for subpath in directory_hierarchy:
            ...         if isinstance(subpath, str):
            ...             filepath = root / subpath
            ...             file_maker(str(filepath))
            ...         else:
            ...             dirname = list(subpath.keys())[0]
            ...             make_directory(root / dirname, subpath[dirname])

            >>> directory_hierarchy = [
            ...     {"class_0": [
            ...         "abc.jpg",
            ...         "def.png"]},
            ...     {"class_1": [
            ...         "ghi.jpeg",
            ...         "jkl.png",
            ...         {"mno": [
            ...             "pqr.jpeg",
            ...             "stu.jpg"]}]},
            ...     "this_will_be_ignored.txt",
            ... ]

            >>> # You can replace this with any directory to explore the structure
            >>> # of generated data. e.g. fake_data_dir = "./temp_dir"
            >>> fake_data_dir = tempfile.mkdtemp()
            >>> make_directory(fake_data_dir, directory_hierarchy)
            >>> data_folder_1 = DatasetFolder(fake_data_dir)
            >>> print(data_folder_1.classes)
            ['class_0', 'class_1']
            >>> print(data_folder_1.class_to_idx)
            {'class_0': 0, 'class_1': 1}
            >>> print(data_folder_1.samples)
            >>> # doctest: +SKIP(it's different with windows)
            [('./temp_dir/class_0/abc.jpg', 0), ('./temp_dir/class_0/def.png', 0),
             ('./temp_dir/class_1/ghi.jpeg', 1), ('./temp_dir/class_1/jkl.png', 1),
             ('./temp_dir/class_1/mno/pqr.jpeg', 1), ('./temp_dir/class_1/mno/stu.jpg', 1)]
            >>> # doctest: -SKIP
            >>> print(data_folder_1.targets)
            [0, 0, 1, 1, 1, 1]
            >>> print(len(data_folder_1))
            6

            >>> for i in range(len(data_folder_1)):
            ...     img, label = data_folder_1[i]
            ...     # do something with img and label
            ...     print(type(img), img.size, label)
            ...     # <class 'PIL.Image.Image'> (32, 32) 0


            >>> transform = T.Compose(
            ...     [
            ...         T.Resize(64),
            ...         T.ToTensor(),
            ...         T.Normalize(
            ...             mean=[0.5, 0.5, 0.5],
            ...             std=[0.5, 0.5, 0.5],
            ...             to_rgb=True,
            ...         ),
            ...     ]
            ... )

            >>> data_folder_2 = DatasetFolder(
            ...     fake_data_dir,
            ...     loader=lambda x: cv2.imread(x),  # load image with OpenCV
            ...     extensions=(".jpg",),  # only load *.jpg files
            ...     transform=transform,  # apply transform to every image
            ... )

            >>> print([img_path for img_path, label in data_folder_2.samples])
            >>> # doctest: +SKIP(it's different with windows)
            ['./temp_dir/class_0/abc.jpg', './temp_dir/class_1/mno/stu.jpg']
            >>> # doctest: -SKIP
            >>> print(len(data_folder_2))
            2

            >>> for img, label in iter(data_folder_2):
            ...     # do something with img and label
            ...     print(type(img), img.shape, label)  # type: ignore
            ...     # <class 'paddle.Tensor'> [3, 64, 64] 0

            >>> shutil.rmtree(fake_data_dir)
    $Callable[..., _ImageDataType] | Noneloader#Sequence[_AllowedExtensions] | Noner   _Transform[Any, Any] | None	transform	list[str]classeszdict[str, int]r8   zlist[tuple[str, int]]samplestargetsr
   dtypeNr<   r   r+   _ImageDataType | Noner   Nonec                   || _         || _        |t          }|                     | j                   \  }}t	          | j         |||          }t          |          dk    r0t          d| j         z   dz   d                    |          z             |t          n|| _	        || _
        || _        || _        || _        d |D             | _        t          j                    | _        d S )Nr   z&Found 0 directories in subfolders of: 
Supported extensions are: ,c                    g | ]
}|d          S )   r   )r   ss     r   r   z*DatasetFolder.__init__.<locals>.<listcomp>  s    ...!...r    )r<   rH   IMG_EXTENSIONS_find_classesrA   lenRuntimeErrorr3   default_loaderrE   r   rJ   r8   rK   rL   paddleget_default_dtyperM   )	selfr<   rE   r   rH   r+   rJ   r8   rK   s	            r   __init__zDatasetFolder.__init__   s     	"'J $ 2 249 = =I|Z
 
 w<<1<tyH L1 13688J3G3GH  )/nnF$(..g...-//


r    r7    tuple[list[str], dict[str, int]]c                    d t          j        |          D                                              fdt          t	                              D             }|fS )a  
        Finds the class folders in a dataset.

        Args:
            dir (string): Root directory path.

        Returns:
            tuple: (classes, class_to_idx) where classes are relative to (dir),
                    and class_to_idx is a dictionary.

        c                D    g | ]}|                                 |j        S r   )is_dirname)r   r;   s     r   r   z/DatasetFolder._find_classes.<locals>.<listcomp>  s'    AAAaahhjjA16AAAr    c                "    i | ]}|         |S r   r   )r   irJ   s     r   
<dictcomp>z/DatasetFolder._find_classes.<locals>.<dictcomp>!  s    CCC!
ACCCr    )r.   scandirsortrangerX   )r]   r7   r8   rJ   s      @r   rW   zDatasetFolder._find_classes  s`     BA2:c??AAACCCCuS\\/B/BCCC$$r    indexinttuple[_ImageDataType, int]c                    | j         |         \  }}|                     |          }| j        |                     |          }||fS )z
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        rK   rE   rH   )r]   rj   r/   r:   samples        r   __getitem__zDatasetFolder.__getitem__$  sH     |E*fT"">%^^F++Fv~r    c                *    t          | j                  S r(   rX   rK   r]   s    r   __len__zDatasetFolder.__len__3      4<   r    NNNNr<   r   rE   rD   r   rF   rH   rG   r+   rN   r   rO   )r7   r   r   r_   )rj   rk   r   rl   )	__name__
__module____qualname____doc____annotations__r^   rW   rp   rt   r   r    r   rC   rC   \   s         I IV 10003333****    """"
 8<:>15/3 0  0  0  0  0D% % % %"   ! ! ! ! !r    rC   r   c                    t          | d          5 }t          j         |          }|                    d          cd d d            S # 1 swxY w Y   d S )NrbRGB)openr   convert)r/   fimgs      r   
pil_loaderr   D  s    	dD		 "Qjmm{{5!!" " " " " " " " " " " " " " " " " "s   )AAAc                |    t          d          }|                    |                    |           |j                  S )Ncv2)r   cvtColorimreadCOLOR_BGR2RGB)r/   r   s     r   
cv2_loaderr   J  s2    
U

C<<

4((#*;<<<r    c                f    ddl m}  |            dk    rt          |           S t          |           S )Nr   )get_image_backendr   )paddle.visionr   r   r   )r/   r   s     r   rZ   rZ   O  sD    //////e##$$r    c                  ^    e Zd ZU dZded<   ded<   ded<   ded	<   	 	 	 	 dddZddZddZd
S )ImageFolderam  A generic data loader where the samples are arranged in this way:

    .. code-block:: text

        root/1.ext
        root/2.ext
        root/sub_dir/3.ext

    Args:
        root (str): Root directory path.
        loader (Callable|None, optional): A function to load a sample given its path. Default: None.
        extensions (list[str]|tuple[str]|None, optional): A list of allowed extensions.
            Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
            If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
            '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
        transform (Callable|None, optional): A function/transform that takes in
            a sample and returns a transformed version. Default: None.
        is_valid_file (Callable|None, optional): A function that takes path of a file
            and check if the file is a valid file. Both :attr:`extensions` and
            :attr:`is_valid_file` should not be passed. Default: None.

    Returns:
        :ref:`api_paddle_io_Dataset`. An instance of ImageFolder.

    Attributes:
        samples (list[str]): List of sample path.

    Example:

        .. code-block:: python

            >>> import shutil
            >>> import tempfile
            >>> import cv2
            >>> import numpy as np
            >>> import paddle.vision.transforms as T
            >>> from pathlib import Path
            >>> from paddle.vision.datasets import ImageFolder

            >>> def make_fake_file(img_path: str):
            ...     if img_path.endswith((".jpg", ".png", ".jpeg")):
            ...         fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
            ...         cv2.imwrite(img_path, fake_img)
            ...     elif img_path.endswith(".txt"):
            ...         with open(img_path, "w") as f:
            ...             f.write("This is a fake file.")

            >>> def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
            ...     root = Path(root)
            ...     root.mkdir(parents=True, exist_ok=True)
            ...     for subpath in directory_hierarchy:
            ...         if isinstance(subpath, str):
            ...             filepath = root / subpath
            ...             file_maker(str(filepath))
            ...         else:
            ...             dirname = list(subpath.keys())[0]
            ...             make_directory(root / dirname, subpath[dirname])

            >>> directory_hierarchy = [
            ...     "abc.jpg",
            ...     "def.png",
            ...     {"ghi": [
            ...         "jkl.jpeg",
            ...         {"mno": [
            ...             "pqr.jpg"]}]},
            ...     "this_will_be_ignored.txt",
            ... ]

            >>> # You can replace this with any directory to explore the structure
            >>> # of generated data. e.g. fake_data_dir = "./temp_dir"
            >>> fake_data_dir = tempfile.mkdtemp()
            >>> make_directory(fake_data_dir, directory_hierarchy)
            >>> image_folder_1 = ImageFolder(fake_data_dir)
            >>> print(image_folder_1.samples)
            >>> # doctest: +SKIP(it's different with windows)
            ['./temp_dir/abc.jpg', './temp_dir/def.png',
             './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
            >>> # doctest: -SKIP
            >>> print(len(image_folder_1))
            4

            >>> for i in range(len(image_folder_1)):
            ...     (img,) = image_folder_1[i]
            ...     # do something with img
            ...     print(type(img), img.size)
            ...     # <class 'PIL.Image.Image'> (32, 32)


            >>> transform = T.Compose(
            ...     [
            ...         T.Resize(64),
            ...         T.ToTensor(),
            ...         T.Normalize(
            ...             mean=[0.5, 0.5, 0.5],
            ...             std=[0.5, 0.5, 0.5],
            ...             to_rgb=True,
            ...         ),
            ...     ]
            ... )

            >>> image_folder_2 = ImageFolder(
            ...     fake_data_dir,
            ...     loader=lambda x: cv2.imread(x),  # load image with OpenCV
            ...     extensions=(".jpg",),  # only load *.jpg files
            ...     transform=transform,  # apply transform to every image
            ... )

            >>> print(image_folder_2.samples)
            >>> # doctest: +SKIP(it's different with windows)
            ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
            >>> # doctest: -SKIP
            >>> print(len(image_folder_2))
            2

            >>> for (img,) in iter(image_folder_2):
            ...     # do something with img
            ...     print(type(img), img.shape)  # type: ignore
            ...     # <class 'paddle.Tensor'> [3, 64, 64]

            >>> shutil.rmtree(fake_data_dir)
    rD   rE   rF   r   rI   rK   rG   rH   Nr<   r   r+   rN   r   rO   c                @   || _         t          g }t          j                            |          }fd}t          t          j        |d                    D ]X\  }}}	t          |	          D ]B}
t          j                            ||
          } ||          r|                    |           CYt          |          dk    r0t          d| j         z   dz   d                              z             |t          n|| _        | _        || _        || _        d S )Nc                $    t          |           S r(   r)   r*   s    r   r+   z+ImageFolder.__init__.<locals>.is_valid_file  s    *1j999r    Tr,   r   z Found 0 files in subfolders of: rQ   rR   )r<   rV   r.   r/   r0   r1   r5   r3   r6   rX   rY   rZ   rE   r   rK   rH   )r]   r<   rE   r   rH   r+   rK   r/   r=   r>   r?   r   s      `        r   r^   zImageFolder.__init__  sK    	'Jw!!$''!: : : : :  &bgd&E&E&EFF 	& 	&OD!V & &GLLu-- =## &NN1%%%&
 w<<16B F1 13688J3G3GH  )/nnF$"r    rj   rk   list[_ImageDataType]c                    | j         |         }|                     |          }| j        |                     |          }|gS )zn
        Args:
            index (int): Index

        Returns:
            sample of specific index.
        rn   )r]   rj   r/   ro   s       r   rp   zImageFolder.__getitem__  sA     |E"T"">%^^F++Fxr    c                *    t          | j                  S r(   rr   rs   s    r   rt   zImageFolder.__len__  ru   r    rv   rw   )rj   rk   r   r   )r   rk   )rx   ry   rz   r{   r|   r^   rp   rt   r   r    r   r   r   X  s         x xt 10003333****
 8<:>15/3%# %# %# %# %#N   ! ! ! ! ! !r    r   )r   r   r   r   r   r   r(   )'
__future__r   typingr   r   r   r   typing_extensionsr   collections.abcr	   paddle._typing.dtype_liker
   #paddle.vision.transforms.transformsr   imager   r   r|   r.   PILr   r[   	paddle.ior   paddle.utilsr   __all__r%   rA   r#   rk   rC   rV   r   r   rZ   r"   r   r   r    r   <module>r      s   # " " " " " " 8 8 8 8 8 8 8 8 8 8 8 8 ' ' ' ' ' ' ((((((777777>>>>>>&&&&&&$+	
% 
 
 
 
 
			              # # # # # #
1 1 1 1"   .X! X! X! X! X!GE"2C"789 X! X! X!v
" " "= = =
     v! v! v! v! v!'$/01 v! v! v! v! v!r    