
    |j,                     b   d dl Z i ad ZdedededefdZd Z ed	          d
             Z ed          d             Z	 ed          d             Z
d Z ed          d             Z ed          d             Z ed          d             Z ed          d             Z ed          d             Z ed          d             Z ed          d             Zd Z ed          d              Z ed!          d"             Z ed#          d$             Z ed%          d&             Z ed'          d(             Z ed)          d*             Z ed+          d,             Z ed-          d.             Z ed/          d0             Z ed1          d2             ZdS )3    Nc                     d}| D ]}||z  }|S )N    )spvs      b/lsinfo/ai/hellotax_ai/data_center/backend/venv/lib/python3.11/site-packages/paddle/utils/flops.pyprodr
      s&    	A  	QH    op_typeinput_shapesattrsreturnc                     | t           vrdS t           |          }	  |||          }n# t          $ r}Y d}~dS d}~ww xY w|S )z
    count FLOPs for operation.

    Args:
        op_type (str): the type of operation.
        input_shapes (dict): the shapes of inputs.
        attrs (dict): the attributes of the operation.

    Returns:
        the total FLOPs of the operation.
    r   N)_FLOPS_COMPUTE_FUNC_MAP	Exception)r   r   r   funcflopses         r	   r   r      sh     ---q&w/	Du--EE 	 	 	11111	s   ' 
<<c                       fd}|S )z<
    register flops computation function for operation.
    c                     | t           <   | S N)r   )r   r   s    r	   registerz register_flops.<locals>.register8   s    +/(r   r   )r   r   s   ` r	   register_flopsr   3   s#    
    
 Or   c_embeddingc                     dS )zbFLOPs computation for c_embedding op.
    For c_embedding(input):
        equation: flops = 0
    r   r   r   r   s     r	   _c_embedding_flopsr   @   	     1r   conv2dc                 8   t          |                     d                    dk    r|                     d          d         nd}|                     d          d         }|                     d          d         }|                    d          }|                    d          }|                    d          }|                    d	          }|d         }	|d
         }
|d         }t          |dd                   }t          |dd                   }t          |          }t          |t                    r|n|g|z  }t          |t                    r|n|g|z  }t          |t                    r|n|g|z  }g }t	          |          D ]L\  }}|d||         z  z   ||         ||         d
z
  z  d
z   z
  ||         z  d
z   }|                    |           M||z  }t          |          |
z  |z  }|	t          |          z  }||z  }d|z  }d}|||z  }||z   S )a   FLOPs computation for conv2d op.
    For conv2d(input,filter):
        active_elements = batch_size * numel(output)
        conv_flops = 2 * macs_per_position_conv * active_elements
        bias_flops = out_channels * active_elements
        equation: flops = conv_flops + bias_flops
    Biasr   NInputFilterpaddingsstrides	dilationsgroupsr      )lengetlist
isinstance	enumerateappendr
   )r   r   biasinputweightpaddingstridedilationr(   
batch_sizein_channelsout_channelskernel_dims
input_dimslengthr%   r&   r'   output_dimsidx	input_dim
output_dimfilters_per_channelmacs_conv_per_positionactive_elementsoverall_conv_macsoverall_conv_flopsoverall_bias_flopss                               r	   _conv2d_flopsrF   I   s    |''((1,, 	  ## 	
 W%%a(Eh''*Fii
##GYYy!!Fyy%%HYYx  FqJ(K!9Lvabbz""KeABBiJ__F gt$$	 
   fd##	 
   h%%	 
   K#J// ' 'Y(3- ~S!1A!56:< S\	 	

 	:&&&&&&0[K'*==  !4#4#44O.@..)O; 222r   dropoutc                     dS )zZFLOPs computation for dropout op.
    For dropout(input):
        equation: flops = 0
    r   r   r   s     r	   _dropout_flopsrI      r   r   c                    |                      d          d         }|                      d          d         }t          |          }t          |          }t          ||          }g }t          |          D ]Q}||k     r||dz
  |z
           nd}	||k     r||dz
  |z
           nd}
|                    t          |	|
                     Rt          |          S )NXr   Yr   )r+   r*   maxranger/   r
   )r   r   input_xinput_ydim_xdim_y
dim_outputoutputiin_xin_ys              r	   _elementwise_flops_computerX      s    s##A&Gs##A&GLLELLEUE""JF: ' ')*Uwuqy1}%%)*Uwuqy1}%%c$oo&&&&<<r   elementwise_addc                 "    t          | |          S )a7  FLOPs computation for elementwise_add op.
    For elementwise_add(input,other):
        input_shapes = [shape_of_input, shape_of_other]
        shape_of_input = [dim1, dim2, dim3 ...]
        shape_of_other = [odim1, odim2, odim3...]
        equation: flops = max(dim1, odim1) * max(dim2, odim2) * max()...
    rX   r   s     r	   _elementwise_add_flopsr\           &lE:::r   elementwise_mulc                 "    t          | |          S )a6  FLOPs computation for elementwise_mul op.
    For elementwise_mul(input,other):
        input_shapes = [shape_of_input, shape_of_other]
        shape_of_input = [dim1, dim2, dim3 ...]
        shape_of_other = [odim1, odim2, odim3...]
        equation: flops = max(dim1, odim1) * max(dim2, odim2)* max()...
    r[   r   s     r	   _elementwise_mul_flopsr`      r]   r   elementwise_divc                 "    t          | |          S )a1  FLOPs computation for elementwise_div op.
    For elementwise_div(input,other):
        input_shapes = [shape_of_input, shape_of_other]
        shape_of_input = [dim1, dim2, dim3 ...]
        shape_of_other = [odim1, odim2, odim3...]
        equation: flops = max(dim1,odim1)*max(dim2,odim2)*max()...
    r[   r   s     r	   _elementwise_div_flopsrc      r]   r   geluc                 \    |                      d          d         }t          |          dz  S )zFLOPs computation for gelu op.
    For gelu(input):
        equation: flops = 5 * (numel)total number of elements in the input tensor.
    rK   r      r+   r
   r   r   r1   s      r	   _gelu_flopsri      ,     S!!!$E;;?r   
layer_normc                     |                      d          d         }t          |          dz  }|                     d          r|t          |          z  }|S )a  FLOPs computation for layer_norm op.
    For layer_norm(input):
        equation:
        1): WITHOUT epsilon flops = 7 * (numel)total number of elements in the input tensor.
        2): WITH epsilon flops = 8 * (numel)total number of elements in the input tensor.
    rK   r      epsilonrg   )r   r   r1   r   s       r	   _layer_norm_flopsro      sT     S!!!$EKK!OEyy eLr   matmulc           	         t          j        |                     d|                     ddgg                    d                   }t          j        |                     d|                     ddgg                    d                   }|                    d          s|                    d          r|d         |d	         c|d	<   |d<   |                    d
          s|                    d          r|d         |d	         c|d	<   |d<   t          |          }t          |          }t	          ||          }g }t          |dd	          D ]K}||k    r|||z
           nd}	||k    r|||z
           nd}
|                    t	          |	|
                     Lt          |          |d         z  |d	         z  |d	         z  }d|z  S )a  FLOPs computation for matmul op.
    For matmul(input,other):
        input_shapes = [shape_of_input, shape_of_other]
        shape_of_input =                  [dim1,dim2 ...dim_n_1,dim_n]  length:n
        shape_of_other = [odim1,odim2 ... odim(n-m)... odim_m_1,dim_m]  length:m
        suppose n > m and dim_n = odim_m_1:
        shape_of_output = [dim1, dim2 ... max(dim(n-m), odim(n-m)), max(dim(n-m+1), odim(n-m+1)) ... dim_n_1, dim_m]
        equation: flops = 2 * numel(output) * dim_n
    rK   xr   rL   ytranspose_Xtranspose_xtranspose_Ytranspose_yr)   r   copydeepcopyr+   r*   rM   rN   r/   r
   r   r   x_shapey_shaperQ   rR   
output_lenoutput_shaper=   x_idxy_idxmacss               r	   _matmul_flopsr      s    ml..saSE::;;A> G ml..saSE::;;A> G yy <599]#;#; <#*2; WR[yy <599]#;#; <#*2; WR[LLELLEUE""JLZB'' / /(+u$$!(+u$$!Cu--....+gbk9GBKGDt8Or   	matmul_v2c                    t          j        |                     d          d                   }t          j        |                     d          d                   }|                    d          r|d         |d         c|d<   |d<   |                    d          r|d         |d         c|d<   |d<   t          |          }t          |          }t	          ||          }g }t          |dd          D ]K}||k    r|||z
           nd	}	||k    r|||z
           nd	}
|                    t	          |	|
                     Lt          |          |d         z  |d         z  |d         z  }d|z  S )
a  FLOPs computation for matmul_v2 op.
    For matmul_v2(input,other):
        input_shapes = [shape_of_input, shape_of_other]
        shape_of_input =                   [dim1, dim2 ...dim_n_1, dim_n] length:n
        shape_of_other = [odim1, odim2 ... odim(n-m) ... odim_m_1, dim_m] length:m
        suppose n > m and dim_n = odim_m_1:
        shape_of_output = [dim1, dim2 ... max(dim(n-m), odim(n-m)), max(dim(n-m+1), odim(n-m+1))...dim_n_1, dim_m]
        equation: flops = 2 * numel(outputs) * dim_n
    rK   r   rL   trans_xrv   rw   trans_yr)   r   rz   r}   s               r	   _matmul_v2_flopsr     su    mL,,S11!455GmL,,S11!455Gyy <#*2; WR[yy <#*2; WR[LLELLEUE""JLZB'' / /(+u$$!(+u$$!Cu--....+gbk9GBKGDt8Or   c                 V    |                      d          d         }t          |          S )zFLOPs computation for relu_like ops.
    For elu/leaky_relu/prelu/relu/relu6/silu (input):
        equation: flops = (numel)total number of elements in the input tensor.
    rK   r   rg   rh   s      r	   _relu_class_flopsr   /  s(    
 S!!!$E;;r   eluc                 "    t          | |          S r   r   r   s     r	   
_elu_flopsr   8      \5111r   
leaky_reluc                 "    t          | |          S r   r   r   s     r	   _leaky_relu_flopsr   =  r   r   preluc                 "    t          | |          S r   r   r   s     r	   _prelu_flopsr   B  r   r   reluc                 "    t          | |          S r   r   r   s     r	   _relu_flopsr   G  r   r   relu6c                 "    t          | |          S r   r   r   s     r	   _relu6_flopsr   L  r   r   siluc                 "    t          | |          S r   r   r   s     r	   _silu_flopsr   Q  r   r   reshape2c                     dS )z\FLOPs computation for reshape2 op.
    For reshape2(input):
        equation: flops = 0
    r   r   r   s     r	   _reshape2_flopsr   V  r   r   softmaxc                 \    |                      d          d         }t          |          dz  S )zFLOPs computation for softmax op.
    For softmax(input):
        equation: flops = 3 * (numel)total number of elements in the input tensor.
    rK   r      rg   rh   s      r	   _softmax_flopsr   _  rj   r   
transpose2c                     dS )z`FLOPs computation for transpose2 op.
    For transpose2(input):
        equation: flops = 0
    r   r   r   s     r	   _transpose2_flopsr   i  r   r   poolc                 V    |                      d          d         }t          |          S )zFLOPs computation for pool op.
    For pool(input):
        equation: flops = (numel)total number of elements in the input tensor.
    rK   r   rg   rh   s      r	   _pool_flopsr   r  s(     S!!!$E;;r   )r{   r   r
   strdictintr   r   r   rF   rI   rX   r\   r`   rc   ri   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   <module>r      s      3 d 4 C    0
 
 
    K3 K3 K3\ 	     !""; ; #"; !""; ; #"; !""; ; #";       ! ! !H   :   2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 
   	          r   