o
    ~ri                     @   sX  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlZd dlZddlmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZ e rd dl Z!d dl"Z!e!j#j$Z%e rd d	l&m'Z' e%j(e'j)e%j*e'j*e%j+e'j+e%j,e'j,e%j-e'j-e%j.e'j.iZ/d
d e/0 D Z1ni Z/i Z1e rd dl2Z2e3e4Z5edej6de7d e7ej6 e7d f Z8G dd deZ9G dd deZ:e;e<e=e<B e7e; B f Z>dd Z?G dd deZ@dd ZAdd ZBde7fddZCdd ZDdd  ZEd!d" ZFd#ej6d$eGfd%d&ZHdpd(e=d$e7e8 fd)d*ZI	'dpde7e8 e8B d(e=d$e8fd+d,ZJ	'dpde7e8 e8B d(e=d$e7e8 fd-d.ZKd$ej6fd/d0ZL	dqd#ej6d1e=eMe=d2f B dB d$e9fd3d4ZNdqd#ej6d5e9e<B dB d$e=fd6d7ZOdqd#ej6d8e9dB d$eMe=e=f fd9d:ZPd;eMe=e=f d<e=d=e=d$eMe=e=f fd>d?ZQd@ee
 d$e7e
 fdAdBZRe9jSfde7edej6f  d5e<e9B d$e7e= fdCdDZTdEe;e<e7eMB f d$eGfdFdGZUdEe;e<e7eMB f d$eGfdHdIZVdJee;e<e7eMB f  d$eGfdKdLZWdJee;e<e7eMB f  d$eGfdMdNZXdqd#ee<df dOeYdB d$dfdPdQZZ	dqdee7eMe<df dOeYdB d$ede7d e7e7d  f fdRdSZ[												drdTeGdB dUeYdB dVeGdB dWeYe7eY B dB dXeYe7eY B dB dYeGdB dZe;e<e=f e=B dB d[eGdB d\e;e<e=f dB d]eGdB d^e;e<e=f dB d_ed`dae=f dB fdbdcZ\G ddde deZ]dfe:dgeMe:d2f dJe7e; d$dfdhdiZ^dje7e< dke7e< fdldmZ_e G dndo doZ`dS )s    N)Iterable)	dataclassfields)BytesIO)AnyUnion   )	ExplicitEnumis_numpy_arrayis_torch_availableis_torch_tensoris_torchvision_availableis_vision_availableloggingrequires_backendsto_numpy)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDIMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)InterpolationModec                 C   s   i | ]\}}||qS  r   ).0kvr   r   f/lsinfo/ai/hellotax_ai/llm_service/venv_embed/lib/python3.10/site-packages/transformers/image_utils.py
<dictcomp>@   s    r   zPIL.Image.Imageztorch.Tensorc                   @      e Zd ZdZdZdS )ChannelDimensionchannels_firstchannels_lastN)__name__
__module____qualname__FIRSTLASTr   r   r   r   r    R       r    c                   @   r   )AnnotationFormatcoco_detectioncoco_panopticN)r#   r$   r%   COCO_DETECTIONCOCO_PANOPTICr   r   r   r   r)   W   r(   r)   c                 C   s   t  o	t| tjjS N)r   
isinstancePILImageimgr   r   r   is_pil_image_   s   r4   c                   @   s   e Zd ZdZdZdZdS )	ImageTypepillowtorchnumpyN)r#   r$   r%   r0   TORCHNUMPYr   r   r   r   r5   c   s    r5   c                 C   s<   t | rtjS t| rtjS t| rtjS tdt|  )NzUnrecognized image type )	r4   r5   r0   r   r9   r
   r:   
ValueErrortypeimager   r   r   get_image_typei   s   r?   c                 C   s   t | pt| pt| S r.   )r4   r
   r   r2   r   r   r   is_valid_images      r@   imagesc                 C   s   | o
t dd | D S )Nc                 s       | ]}t |V  qd S r.   )r@   r   r>   r   r   r   	<genexpr>x       z*is_valid_list_of_images.<locals>.<genexpr>all)rB   r   r   r   is_valid_list_of_imagesw   s   rI   c                 C   s\   t | d trdd | D S t | d tjrtj| ddS t | d tjr,tj| ddS d S )Nr   c                 S      g | ]	}|D ]}|qqS r   r   )r   sublistitemr   r   r   
<listcomp>}       z$concatenate_list.<locals>.<listcomp>axis)dim)r/   listnpndarrayconcatenater7   Tensorcat)
input_listr   r   r   concatenate_list{   s   rY   c                 C   s:   t | ttfr| D ]	}t|s dS q	dS t| sdS dS )NFT)r/   rR   tuplevalid_imagesr@   )imgsr3   r   r   r   r[      s   r[   c                 C   s   t | ttfrt| d S dS )Nr   F)r/   rR   rZ   r@   r2   r   r   r   
is_batched   s   r]   r>   returnc                 C   s,   | j tjkrdS t| dkot| dkS )zV
    Checks to see whether the pixel values have already been rescaled to [0, 1].
    Fr   r   )dtyperS   uint8minmaxr=   r   r   r   is_scaled_image   s   rc      expected_ndimsc                 C   s   t | r| S t| r| gS t| r9| j|d krt| } | S | j|kr(| g} | S td|d  d| d| j dtdt|  d)a  
    Ensure that the output is a list of images. If the input is a single image, it is converted to a list of length 1.
    If the input is a batch of images, it is converted to a list of images.

    Args:
        images (`ImageInput`):
            Image of images to turn into a list of images.
        expected_ndims (`int`, *optional*, defaults to 3):
            Expected number of dimensions for a single input image. If the input image has a different number of
            dimensions, an error is raised.
    r   z%Invalid image shape. Expected either z or z dimensions, but got z dimensions.z]Invalid image type. Expected either PIL.Image.Image, numpy.ndarray, or torch.Tensor, but got .)r]   r4   r@   ndimrR   r;   r<   rB   re   r   r   r   make_list_of_images   s&   	
ri   c                 C   s   t | ttfr tdd | D r tdd | D r dd | D S t | ttfrJt| rJt| d s8| d j|kr:| S | d j|d krJdd | D S t| ret| sW| j|krZ| gS | j|d kret| S td	|  )
a  
    Ensure that the output is a flat list of images. If the input is a single image, it is converted to a list of length 1.
    If the input is a nested list of images, it is converted to a flat list of images.
    Args:
        images (`Union[list[ImageInput], ImageInput]`):
            The input image.
        expected_ndims (`int`, *optional*, defaults to 3):
            The expected number of dimensions for a single input image.
    Returns:
        list: A list of images or a 4d array of images.
    c                 s       | ]
}t |ttfV  qd S r.   r/   rR   rZ   r   images_ir   r   r   rE          z+make_flat_list_of_images.<locals>.<genexpr>c                 s       | ]
}t |p
| V  qd S r.   rI   rl   r   r   r   rE      rn   c                 S   rJ   r   r   r   img_listr3   r   r   r   rM      rN   z,make_flat_list_of_images.<locals>.<listcomp>r   r   c                 S   rJ   r   r   rq   r   r   r   rM      rN   z*Could not make a flat list of images from 	r/   rR   rZ   rH   rI   r4   rg   r@   r;   rh   r   r   r   make_flat_list_of_images   s$   rt   c                 C   s   t | ttfrtdd | D rtdd | D r| S t | ttfrFt| rFt| d s3| d j|kr6| gS | d j|d krFdd | D S t| rct| sS| j|krW| ggS | j|d krct| gS td)	as  
    Ensure that the output is a nested list of images.
    Args:
        images (`Union[list[ImageInput], ImageInput]`):
            The input image.
        expected_ndims (`int`, *optional*, defaults to 3):
            The expected number of dimensions for a single input image.
    Returns:
        list: A list of list of images or a list of 4d array of images.
    c                 s   rj   r.   rk   rl   r   r   r   rE      rn   z-make_nested_list_of_images.<locals>.<genexpr>c                 s   ro   r.   rp   rl   r   r   r   rE      rn   r   r   c                 S      g | ]}t |qS r   )rR   rD   r   r   r   rM         z.make_nested_list_of_images.<locals>.<listcomp>z]Invalid input type. Must be a single image, a list of images, or a list of batches of images.rs   rh   r   r   r   make_nested_list_of_images   s$   
rw   c                 C   s@   t | stdt|  t rt| tjjrt| S t	| S )NzInvalid image type: )
r@   r;   r<   r   r/   r0   r1   rS   arrayr   r2   r   r   r   to_numpy_array  s
   
ry   num_channels.c                 C   s   |dur|nd}t |tr|fn|}| jdkrd\}}n| jdkr&d\}}n| jdkr0d\}}ntd| j | j| |v rS| j| |v rStd	| j d
 tjS | j| |v r]tjS | j| |v rgtj	S td)a[  
    Infers the channel dimension format of `image`.

    Args:
        image (`np.ndarray`):
            The image to infer the channel dimension of.
        num_channels (`int` or `tuple[int, ...]`, *optional*, defaults to `(1, 3)`):
            The number of channels of the image.

    Returns:
        The channel dimension of the image.
    Nr   rd   rd   )r            )r|   r}   z(Unsupported number of image dimensions: z4The channel dimension is ambiguous. Got image shape z. Assuming channels are the first dimension. Use the [input_data_format](https://huggingface.co/docs/transformers/main/internal/image_processing_utils#transformers.image_transforms.rescale.input_data_format) parameter to assign the channel dimension.z(Unable to infer channel dimension format)
r/   intrg   r;   shapeloggerwarningr    r&   r'   )r>   rz   	first_dimlast_dimr   r   r   infer_channel_dimension_format  s&   





r   input_data_formatc                 C   sF   |du rt | }|tjkr| jd S |tjkr| jd S td| )a  
    Returns the channel dimension axis of the image.

    Args:
        image (`np.ndarray`):
            The image to get the channel dimension axis of.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format of the image. If `None`, will infer the channel dimension from the image.

    Returns:
        The channel dimension axis of the image.
    Nrd   r   Unsupported data format: )r   r    r&   rg   r'   r;   )r>   r   r   r   r   get_channel_dimension_axisD  s   



r   channel_dimc                 C   sZ   |du rt | }|tjkr| jd | jd fS |tjkr&| jd | jd fS td| )a  
    Returns the (height, width) dimensions of the image.

    Args:
        image (`np.ndarray`):
            The image to get the dimensions of.
        channel_dim (`ChannelDimension`, *optional*):
            Which dimension the channel dimension is in. If `None`, will infer the channel dimension from the image.

    Returns:
        A tuple of the image's height and width.
    Nr   )r   r    r&   r   r'   r;   )r>   r   r   r   r   get_image_sizeZ  s   

r   
image_size
max_height	max_widthc           
      C   sB   | \}}|| }|| }t ||}t|| }t|| }	||	fS )a  
    Computes the output image size given the input image and the maximum allowed height and width. Keep aspect ratio.
    Important, even if image_height < max_height and image_width < max_width, the image will be resized
    to at least one of the edges be equal to max_height or max_width.

    For example:
        - input_size: (100, 200), max_height: 50, max_width: 50 -> output_size: (25, 50)
        - input_size: (100, 200), max_height: 200, max_width: 500 -> output_size: (200, 400)

    Args:
        image_size (`tuple[int, int]`):
            The image to resize.
        max_height (`int`):
            The maximum allowed height.
        max_width (`int`):
            The maximum allowed width.
    )ra   r   )
r   r   r   heightwidthheight_scalewidth_scale	min_scale
new_height	new_widthr   r   r   #get_image_size_for_max_height_widthr  s   
r   valuesc                 C   s   dd t |  D S )zO
    Return the maximum value across all indices of an iterable of values.
    c                 S   ru   r   )rb   )r   values_ir   r   r   rM     rv   z&max_across_indices.<locals>.<listcomp>)zip)r   r   r   r   max_across_indices  s   r   c                 C   sb   |t jkrtdd | D \}}}||fS |t jkr*tdd | D \}}}||fS td| )zH
    Get the maximum height and width across all images in a batch.
    c                 S      g | ]}|j qS r   r   r   r3   r   r   r   rM         z(get_max_height_width.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   rM     r   z"Invalid channel dimension format: )r    r&   r   r'   r;   )rB   r   _r   r   r   r   r   get_max_height_width  s   

r   
annotationc                 C   sV   t | tr)d| v r)d| v r)t | d ttfr)t| d dks't | d d tr)dS dS )Nimage_idannotationsr   TFr/   dictrR   rZ   lenr   r   r   r   "is_valid_annotation_coco_detection  s   "r   c                 C   s^   t | tr-d| v r-d| v r-d| v r-t | d ttfr-t| d dks+t | d d tr-dS dS )Nr   segments_info	file_namer   TFr   r   r   r   r   !is_valid_annotation_coco_panoptic  s   "r   r   c                 C      t dd | D S )Nc                 s   rC   r.   )r   r   annr   r   r   rE     rF   z3valid_coco_detection_annotations.<locals>.<genexpr>rG   r   r   r   r    valid_coco_detection_annotations     r   c                 C   r   )Nc                 s   rC   r.   )r   r   r   r   r   rE     rF   z2valid_coco_panoptic_annotations.<locals>.<genexpr>rG   r   r   r   r   valid_coco_panoptic_annotations  r   r   timeoutc              
   C   s   t tdg t| trf| ds| dr%tjtt	j
| |ddj} nLtj| r2tj| } n?| dr>| dd } zt|  }tjt|} W n! tye } z
td	|  d
| d}~ww t| tjjsqtdtj| } | d} | S )a3  
    Loads `image` to a PIL Image.

    Args:
        image (`str` or `PIL.Image.Image`):
            The image to convert to the PIL Image format.
        timeout (`float`, *optional*):
            The timeout value in seconds for the URL request.

    Returns:
        `PIL.Image.Image`: A PIL Image.
    visionzhttp://zhttps://T)r   follow_redirectszdata:image/,r   zIncorrect image source. Must be a valid URL starting with `http://` or `https://`, a valid path to an image file, or a base64 encoded string. Got z. Failed with NzuIncorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image.RGB)r   
load_imager/   str
startswithr0   r1   openr   httpxgetcontentospathisfilesplitbase64decodebytesencode	Exceptionr;   	TypeErrorImageOpsexif_transposeconvert)r>   r   b64er   r   r   r     s0   
 

r   c                    sX   t | ttfr&t| rt | d ttfr fdd| D S  fdd| D S t|  dS )a  Loads images, handling different levels of nesting.

    Args:
      images: A single image, a list of images, or a list of lists of images to load.
      timeout: Timeout for loading images.

    Returns:
      A single image, a list of images, a list of lists of images.
    r   c                    s   g | ]} fd d|D qS )c                       g | ]}t | d qS r   r   rD   r   r   r   rM         z*load_images.<locals>.<listcomp>.<listcomp>r   )r   image_groupr   r   r   rM     s    zload_images.<locals>.<listcomp>c                    r   r   r   rD   r   r   r   rM     r   r   )r/   rR   rZ   r   r   )rB   r   r   r   r   load_images  s
   r   
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padpad_sizedo_center_crop	crop_size	do_resizesizeresamplePILImageResamplingr   c                 C   s|   | r
|du r
t d|r|du rt d|r"|du s|du r"t d|r,|du r,t d|	r:|
dur6|dus<t ddS dS )a  
    Checks validity of typically used arguments in an `ImageProcessor` `preprocess` method.
    Raises `ValueError` if arguments incompatibility is caught.
    Many incompatibilities are model-specific. `do_pad` sometimes needs `size_divisor`,
    sometimes `size_divisibility`, and sometimes `size`. New models and processors added should follow
    existing arguments when possible.

    Nz=`rescale_factor` must be specified if `do_rescale` is `True`.zgDepending on the model, `size_divisor` or `pad_size` or `size` must be specified if `do_pad` is `True`.zP`image_mean` and `image_std` must both be specified if `do_normalize` is `True`.z<`crop_size` must be specified if `do_center_crop` is `True`.zA`size` and `resample` must be specified if `do_resize` is `True`.)r;   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   validate_preprocess_arguments  s   r   c                   @   s   e Zd ZdZdd ZdddZdd Zd	ejd
e	e
B dejfddZd ddZdd Zd!ddZd"ddZdd Zdd Zd#ddZdS )$ImageFeatureExtractionMixinzD
    Mixin that contain utilities for preparing image features.
    c                 C   s8   t |tjjtjfst|stdt| dd S d S )Nz	Got type zU which is not supported, only `PIL.Image.Image`, `np.ndarray` and `torch.Tensor` are.)r/   r0   r1   rS   rT   r   r;   r<   selfr>   r   r   r   _ensure_format_supported@  s
   z4ImageFeatureExtractionMixin._ensure_format_supportedNc                 C   s   |  | t|r| }t|tjrE|du r t|jd tj}|jdkr3|j	d dv r3|
ddd}|r9|d }|tj}tj|S |S )a"  
        Converts `image` to a PIL Image. Optionally rescales it and puts the channel dimension back as the last axis if
        needed.

        Args:
            image (`PIL.Image.Image` or `numpy.ndarray` or `torch.Tensor`):
                The image to convert to the PIL Image format.
            rescale (`bool`, *optional*):
                Whether or not to apply the scaling factor (to make pixel values integers between 0 and 255). Will
                default to `True` if the image type is a floating type, `False` otherwise.
        Nr   rd   r{   r   r|      )r   r   r8   r/   rS   rT   flatfloatingrg   r   	transposeastyper`   r0   r1   	fromarray)r   r>   rescaler   r   r   to_pil_imageG  s   
z(ImageFeatureExtractionMixin.to_pil_imagec                 C   s&   |  | t|tjjs|S |dS )z
        Converts `PIL.Image.Image` to RGB format.

        Args:
            image (`PIL.Image.Image`):
                The image to convert.
        r   )r   r/   r0   r1   r   r   r   r   r   convert_rgbe  s   

z'ImageFeatureExtractionMixin.convert_rgbr>   scaler^   c                 C   s   |  | || S )z7
        Rescale a numpy image by scale amount
        )r   )r   r>   r   r   r   r   r   s  s   
z#ImageFeatureExtractionMixin.rescaleTc                 C   s   |  | t|tjjrt|}t|r| }|du r&t|jd tj	n|}|r4| 
|tjd}|rB|jdkrB|ddd}|S )a  
        Converts `image` to a numpy array. Optionally rescales it and puts the channel dimension as the first
        dimension.

        Args:
            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor`):
                The image to convert to a NumPy array.
            rescale (`bool`, *optional*):
                Whether or not to apply the scaling factor (to make pixel values floats between 0. and 1.). Will
                default to `True` if the image is a PIL Image or an array/tensor of integers, `False` otherwise.
            channel_first (`bool`, *optional*, defaults to `True`):
                Whether or not to permute the dimensions of the image to put the channel dimension first.
        Nr   p?rd   r|   r   )r   r/   r0   r1   rS   rx   r   r8   r   integerr   r   float32rg   r   )r   r>   r   channel_firstr   r   r   ry   z  s   

z*ImageFeatureExtractionMixin.to_numpy_arrayc                 C   sD   |  | t|tjjr|S t|r|d}|S tj|dd}|S )z
        Expands 2-dimensional `image` to 3 dimensions.

        Args:
            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor`):
                The image to expand.
        r   rO   )r   r/   r0   r1   r   	unsqueezerS   expand_dimsr   r   r   r   r     s   

z'ImageFeatureExtractionMixin.expand_dimsFc                 C   sh  |  | t|tjjr| j|dd}n|r3t|tjr'| |tj	d}nt
|r3| | d}t|tjrXt|tjsHt||j}t|tjsWt||j}n6t
|rddl}t||jswt|tjrr||}n||}t||jst|tjr||}n||}|jdkr|jd dv r||ddddf  |ddddf  S || | S )a  
        Normalizes `image` with `mean` and `std`. Note that this will trigger a conversion of `image` to a NumPy array
        if it's a PIL Image.

        Args:
            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor`):
                The image to normalize.
            mean (`list[float]` or `np.ndarray` or `torch.Tensor`):
                The mean (per channel) to use for normalization.
            std (`list[float]` or `np.ndarray` or `torch.Tensor`):
                The standard deviation (per channel) to use for normalization.
            rescale (`bool`, *optional*, defaults to `False`):
                Whether or not to rescale the image to be between 0 and 1. If a PIL image is provided, scaling will
                happen automatically.
        T)r   r   r   Nrd   r{   )r   r/   r0   r1   ry   rS   rT   r   r   r   r   floatrx   r_   r7   rV   
from_numpytensorrg   r   )r   r>   meanstdr   r7   r   r   r   	normalize  s6   


(z%ImageFeatureExtractionMixin.normalizec                 C   sJ  |dur|nt j}| | t|tjjs| |}t|tr#t|}t|t	s.t
|dkr|rBt|t	r9||fn|d |d f}n\|j\}}||krO||fn||f\}}	t|t	r\|n|d }
||
krf|S |
t	|
|	 | }}|dur||
krtd| d| ||krt	|| | |}}||kr||fn||f}|j||dS )a  
        Resizes `image`. Enforces conversion of input to PIL.Image.

        Args:
            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor`):
                The image to resize.
            size (`int` or `tuple[int, int]`):
                The size to use for resizing the image. If `size` is a sequence like (h, w), output size will be
                matched to this.

                If `size` is an int and `default_to_square` is `True`, then image will be resized to (size, size). If
                `size` is an int and `default_to_square` is `False`, then smaller edge of the image will be matched to
                this number. i.e, if height > width, then image will be rescaled to (size * height / width, size).
            resample (`int`, *optional*, defaults to `PILImageResampling.BILINEAR`):
                The filter to user for resampling.
            default_to_square (`bool`, *optional*, defaults to `True`):
                How to convert `size` when it is a single int. If set to `True`, the `size` will be converted to a
                square (`size`,`size`). If set to `False`, will replicate
                [`torchvision.transforms.Resize`](https://pytorch.org/vision/stable/transforms.html#torchvision.transforms.Resize)
                with support for resizing only the smallest edge and providing an optional `max_size`.
            max_size (`int`, *optional*, defaults to `None`):
                The maximum allowed for the longer edge of the resized image: if the longer edge of the image is
                greater than `max_size` after being resized according to `size`, then the image is resized again so
                that the longer edge is equal to `max_size`. As a result, `size` might be overruled, i.e the smaller
                edge may be shorter than `size`. Only used if `default_to_square` is `False`.

        Returns:
            image: A resized `PIL.Image.Image`.
        Nr   r   zmax_size = zN must be strictly greater than the requested size for the smaller edge size = )r   )r   BILINEARr   r/   r0   r1   r   rR   rZ   r   r   r   r;   resize)r   r>   r   r   default_to_squaremax_sizer   r   shortlongrequested_new_short	new_shortnew_longr   r   r   r     s4   


$
z"ImageFeatureExtractionMixin.resizec                 C   sx  |  | t|ts||f}t|st|tjr8|jdkr"| |}|jd dv r0|jdd n|jdd }n
|j	d |j	d f}|d |d  d }||d  }|d |d  d }||d  }t|t
jjrr|||||fS |jd dv }|st|tjr|ddd}t|r|ddd}|dkr||d kr|dkr||d kr|d||||f S |jdd t|d |d t|d |d f }	t|tjrtj||	d}
n	t|r||	}
|	d |d  d }||d  }|	d	 |d  d }||d  }||
d||||f< ||7 }||7 }||7 }||7 }|
dtd|t|
jd |td|t|
jd	 |f }
|
S )
a  
        Crops `image` to the given size using a center crop. Note that if the image is too small to be cropped to the
        size given, it will be padded (so the returned result has the size asked).

        Args:
            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor` of shape (n_channels, height, width) or (height, width, n_channels)):
                The image to resize.
            size (`int` or `tuple[int, int]`):
                The size to which crop the image.

        Returns:
            new_image: A center cropped `PIL.Image.Image` or `np.ndarray` or `torch.Tensor` of shape: (n_channels,
            height, width).
        r|   r   r{   r   N.r   r   r   )r   r/   rZ   r   rS   rT   rg   r   r   r   r0   r1   cropr   permuterb   
zeros_like	new_zerosra   )r   r>   r   image_shapetopbottomleftrightr   	new_shape	new_imagetop_pad
bottom_padleft_pad	right_padr   r   r   center_crop%  sP   



,(2
4z'ImageFeatureExtractionMixin.center_cropc                 C   s>   |  | t|tjjr| |}|dddddddf S )a  
        Flips the channel order of `image` from RGB to BGR, or vice versa. Note that this will trigger a conversion of
        `image` to a NumPy array if it's a PIL Image.

        Args:
            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor`):
                The image whose color channels to flip. If `np.ndarray` or `torch.Tensor`, the channel dimension should
                be first.
        Nr   )r   r/   r0   r1   ry   r   r   r   r   flip_channel_orderp  s   


z.ImageFeatureExtractionMixin.flip_channel_orderr   c                 C   sL   |dur|nt jj}| | t|t jjs| |}|j||||||dS )a  
        Returns a rotated copy of `image`. This method returns a copy of `image`, rotated the given number of degrees
        counter clockwise around its centre.

        Args:
            image (`PIL.Image.Image` or `np.ndarray` or `torch.Tensor`):
                The image to rotate. If `np.ndarray` or `torch.Tensor`, will be converted to `PIL.Image.Image` before
                rotating.

        Returns:
            image: A rotated `PIL.Image.Image`.
        N)r   expandcenter	translate	fillcolor)r0   r1   NEARESTr   r/   r   rotate)r   r>   angler   r  r  r  r  r   r   r   r    s   

z"ImageFeatureExtractionMixin.rotater.   )NT)F)NTN)Nr   NNN)r#   r$   r%   __doc__r   r   r   rS   rT   r   r   r   ry   r   r   r   r  r  r  r   r   r   r   r   ;  s    

 

4CKr   annotation_formatsupported_annotation_formatsc                 C   sX   | |vrt dt d| | tju rt|st d| tju r(t|s*t dd S d S )NzUnsupported annotation format: z must be one of zInvalid COCO detection annotations. Annotations must a dict (single image) or list of dicts (batch of images) with the following keys: `image_id` and `annotations`, with the latter being a list of annotations in the COCO format.zInvalid COCO panoptic annotations. Annotations must a dict (single image) or list of dicts (batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with the latter being a list of annotations in the COCO format.)r;   formatr)   r,   r   r-   r   )r   r!  r   r   r   r   validate_annotations  s   

r#  valid_processor_keyscaptured_kwargsc                 C   s:   t |t | }|rd|}td| d d S d S )Nz, zUnused or unrecognized kwargs: rf   )set
differencejoinr   r   )r$  r%  unused_keysunused_key_strr   r   r   validate_kwargs  s
   
r+  c                   @   s   e Zd ZU dZdZedB ed< dZedB ed< dZedB ed< dZ	edB ed< dZ
edB ed< dZedB ed< d	d
 ZdddZdd Zdd Zdd Zdd Zdd ZdddZdefddZdS )SizeDictz>
    Hashable dictionary to store image size information.
    Nr   r   longest_edgeshortest_edger   r   c                 C   s$   t | |r
t| |S td| d)NKey z not found in SizeDict.)hasattrgetattrKeyErrorr   keyr   r   r   __getitem__  s   

zSizeDict.__getitem__c                 C   s&   t | |rt| |d urt| |S |S r.   r0  r1  )r   r4  defaultr   r   r   r     s   
zSizeDict.getc                 c   s4    t | D ]}t| |j}|d ur|j|fV  qd S r.   )r   r1  name)r   fvalr   r   r   __iter__  s   zSizeDict.__iter__c                 C   s    t | j| j| j| j| j| jfS r.   )hashr   r   r-  r.  r   r   r   r   r   r   __hash__  s    zSizeDict.__hash__c                 C   s   t | |ot| |d uS r.   r6  r3  r   r   r   __contains__  rA   zSizeDict.__contains__c                 C   s,   t | |std| dt| || d S )Nr/  z" is not a valid field of SizeDict.)r0  r2  object__setattr__)r   r4  valuer   r   r   __setitem__  s   
zSizeDict.__setitem__c                    sX   t  trt kS t  tr*tfddtD t fddtD kS tS )Nc                 3       | ]	}t  |jV  qd S r.   r1  r8  r   r9  r=  r   r   rE     s    z"SizeDict.__eq__.<locals>.<genexpr>c                 3   rD  r.   rE  rF  )otherr   r   rE     s    
)r/   r   r,  rZ   r   NotImplemented)r   rG  r   )rG  r   r   __eq__  s   

$
zSizeDict.__eq__r^   c                 C   s6   t |ttB rt| }|t| tdi |S tS )Nr   )r/   r   r,  updaterH  r   rG  mergedr   r   r   __or__  s
   zSizeDict.__or__c                 C   s(   t |trt|}|t|  |S tS r.   )r/   r   rJ  rH  rK  r   r   r   __ror__  s
   
zSizeDict.__ror__r.   )r^   r,  )r#   r$   r%   r  r   r   __annotations__r   r-  r.  r   r   r5  r   r;  r>  r?  rC  rI  rM  r   rN  r   r   r   r   r,    s"   
 

	r,  )rd   r.   )NNNNNNNNNNNN)ar   r   collections.abcr   dataclassesr   r   ior   typingr   r   r   r8   rS   utilsr	   r
   r   r   r   r   r   r   r   utils.constantsr   r   r   r   r   r   	PIL.Imager0   PIL.ImageOpsr1   
Resamplingr   torchvision.transformsr   r  NEAREST_EXACTBOXr   HAMMINGBICUBICLANCZOSpil_torch_interpolation_mappingitemstorch_pil_interpolation_mappingr7   
get_loggerr#   r   rT   rR   
ImageInputr    r)   r   r   r   AnnotationTyper4   r5   r?   r@   rI   rY   r[   r]   boolrc   ri   rt   rw   ry   rZ   r   r   r   r   r   r&   r   r   r   r   r   r   r   r   r   r   r#  r+  r,  r   r   r   r   <module>   s6  , 
	

	(

(

'

"'&



""$+
	

.  a

