
    #j                       d Z ddlmZ ddlZddlZddlmZ ddlmZ ddl	Z	ddl
mZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ  ej        e          ZdRdZdSdTdZdUdZdVdZdSdWdZdXdZdSdZdYdZ 	 dZd[dZ!d\d Z"	 d]d^d!Z#d_d%Z$	 d`d(Z%dd)dad,Z&	 dbdcd-Z'd.d/ddd1Z 	 	 	 	 dedfd4Z(d.dd5dgd7Z)	 	 	 	 	 dhdid8Z*djd?Z+	 	 	 	 	 	 	 dkdldBZ, G dC dD          Z- G dE dF          Z.dUdGZ/ej0        eej1        e!ej2        e#ej3        e%ej4        eej5        eej6        e'ej7        eej8        e(ej9        e*ej:        e,ej;        e/iZ<	 	 	 dmdndLZ= G dM dNe          Z> G dO dPe          Z?dodQZ@dS )pz$PyTorch optimization for BERT model.    )annotationsN)partial)Any)	Optimizer)LambdaLRReduceLROnPlateau   )LayerWiseDummyOptimizerLayerWiseDummyScheduler)SchedulerType)loggingc                    dS Nr	    )_s    c/lsinfo/ai/hellotax_ai/base_platform/venv/lib/python3.11/site-packages/transformers/optimization.py_get_constant_lambdar   #   s    1    	optimizerr   
last_epochintc                0    t          | t          |          S )a  
    Create a schedule with a constant learning rate, using the learning rate set in optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    r   )r   r   )r   r   s     r   get_constant_scheduler   '   s     I3
KKKKr   c                    t          | fi |S )a  
    Create a schedule with a constant learning rate that decreases when a metric has stopped improving.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        kwargs (`dict`, *optional*):
            Extra parameters to be passed to the scheduler. See `torch.optim.lr_scheduler.ReduceLROnPlateau`
            for possible parameters.

    Return:
        `torch.optim.lr_scheduler.ReduceLROnPlateau` with the appropriate schedule.
    )r   r   kwargss     r   get_reduce_on_plateau_scheduler   8   s     Y11&111r   current_stepnum_warmup_stepsc               l    | |k     r-t          |           t          t          d|                    z  S dS )N      ?floatmax)r    r!   s     r   ,_get_constant_schedule_with_warmup_lr_lambdar'   J   s9    &&&\""U3s4D+E+E%F%FFF3r   c                R    t          t          |          }t          | ||          S )ad  
    Create a schedule with a constant learning rate preceded by a warmup period during which the learning rate
    increases linearly between 0 and the initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    r!   r   )r   r'   r   )r   r!   r   	lr_lambdas       r   !get_constant_schedule_with_warmupr+   P   s-    " DWghhhIIyZ@@@@r   num_training_stepsc                   | |k     r-t          |           t          t          d|                    z  S t          dt          || z
            t          t          d||z
                      z            S )Nr	           r$   )r    r!   r,   s      r   *_get_linear_schedule_with_warmup_lr_lambdar/   e   sp    &&&\""U3q2B+C+C%D%DDDsE,|;<<uSL^aqLqErEr?s?sstttr   c                R    t          t          ||          }t          | ||          S )a  
    Create a schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after
    a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    r!   r,   )r   r/   r   )r   r!   r,   r   r*   s        r   get_linear_schedule_with_warmupr2   k   s5    & 2)-  I
 Iy*555r   
num_cyclesr%   c               ^   | |k     r-t          |           t          t          d|                    z  S t          | |z
            t          t          d||z
                      z  }t          dddt          j        t          j        t          |          z  dz  |z            z   z            S )Nr	   r.         ?r#          @r%   r&   mathcospir    r!   r,   r3   progresss        r   *_get_cosine_schedule_with_warmup_lr_lambdar=      s     &&&\""U3q2B+C+C%D%DDD\$4455c!EWZjEj>k>k8l8llHsC3$'E*4E4E*E*Kh*V!W!WWXYYYr   r5   c                T    t          t          |||          }t          | ||          S )a  
    Create a schedule with a learning rate that decreases following the values of the cosine function between the
    initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
    initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        num_cycles (`float`, *optional*, defaults to 0.5):
            The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
            following a half-cosine).
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    r!   r,   r3   )r   r=   r   r   r!   r,   r3   r   r*   s         r   get_cosine_schedule_with_warmuprA      s8    2 2)-	  I Iy*555r   c               n   | |k     r-t          |           t          t          d|                    z  S t          | |z
            t          t          d||z
                      z  }|dk    rdS t          dddt          j        t          j        t          |          |z  dz  z            z   z            S )Nr	   r#   r.   r5   r7   r;   s        r   =_get_cosine_with_hard_restarts_schedule_with_warmup_lr_lambdarC      s     &&&\""U3q2B+C+C%D%DDD\$4455c!EWZjEj>k>k8l8llH3ssC3$'eJ6G6G(6RVY5Y*Z![![[\]]]r   c                T    t          t          |||          }t          | ||          S )a  
    Create a schedule with a learning rate that decreases following the values of the cosine function between the
    initial lr set in the optimizer to 0, with several hard restarts, after a warmup period during which it increases
    linearly between 0 and the initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        num_cycles (`int`, *optional*, defaults to 1):
            The number of hard restarts to use.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    r?   )r   rC   r   r@   s         r   2get_cosine_with_hard_restarts_schedule_with_warmuprE      s8    0 E)-	  I Iy*555r   lr_endpowerlr_initc                   | |k     r-t          |           t          t          d|                    z  S | |k    r||z  S ||z
  }||z
  }d| |z
  |z  z
  }|||z  z  |z   }	|	|z  S r   r$   )
r    r!   r,   rF   rG   rH   lr_rangedecay_stepspct_remainingdecays
             r   4_get_polynomial_decay_schedule_with_warmup_lr_lambdarN      s     &&&\""U3q2B+C+C%D%DDD	*	*	*V#(+;;\,<<KK=%//&8wr   Hz>r#   c                    | j         d         }||k    st          d| d| d          t          t          |||||          }t	          | ||          S )a  
    Create a schedule with a learning rate that decreases as a polynomial decay from the initial lr set in the
    optimizer to end lr defined by *lr_end*, after a warmup period during which it increases linearly from 0 to the
    initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        lr_end (`float`, *optional*, defaults to 1e-7):
            The end LR.
        power (`float`, *optional*, defaults to 1.0):
            Power factor.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Note: *power* defaults to 1.0 as in the fairseq implementation, which in turn is based on the original BERT
    implementation at
    https://github.com/google-research/bert/blob/f39e881b169b9d53bea03d2d341b31707a6c052b/optimization.py#L37

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.

    lrzlr_end (z#) must be smaller than initial lr ())r!   r,   rF   rG   rH   )defaults
ValueErrorr   rN   r   )r   r!   r,   rF   rG   r   rH   r*   s           r   )get_polynomial_decay_schedule_with_warmuprU      sx    >  &GfYFYYwYYYZZZ<)-  I Iy*555r   )	timescalerV   
int | Nonec                   | |k     r-t          |           t          t          d|                    z  S ||z
  }dt          j        | |z   |z            z  }|S )Nr	   r#   )r%   r&   r8   sqrt)r    r!   rV   shiftrM   s        r   $_get_inverse_sqrt_schedule_lr_lambdar[      sb    &&&\""U3q2B+C+C%D%DDD((E$)\E1Y>???ELr   c                `    ||pd}t          t          ||          }t          | ||          S )a  
    Create a schedule with an inverse square-root learning rate, from the initial lr set in the optimizer, after a
    warmup period which increases lr linearly from 0 to the initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        timescale (`int`, *optional*, defaults to `num_warmup_steps`):
            Time scale.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    Ni'  )r!   rV   r   )r   r[   r   )r   r!   rV   r   r*   s        r   get_inverse_sqrt_scheduler]   (  s@    . $.	<O_ktuuuIIyZ@@@@r   r.   )min_lr_rater^   c               x   | |k     r-t          |           t          t          d|                    z  S t          | |z
            t          t          d||z
                      z  }ddt          j        t          j        t          |          z  dz  |z            z   z  }|d|z
  z  |z   }t          d|          S )Nr	   r5   r#   r6   r   r7   )r    r!   r,   r3   r^   r<   factors          r   r=   r=   F  s     &&&\""U3q2B+C+C%D%DDD\$4455c!EWZjEj>k>k8l8llHC$(47U:->->#>#Dx#OPPPQFq;'+5Fq&>>r   min_lrfloat | Nonec                    ||t          d          ||| j        d         z  }n|t          d          t          t          ||||          }t	          | ||          S )a  
    Create a schedule with a learning rate that decreases following the values of the cosine function between the
    initial lr set in the optimizer to min_lr, after a warmup period during which it increases linearly between 0 and the
    initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        num_cycles (`float`, *optional*, defaults to 0.5):
            The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
            following a half-cosine).
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.
        min_lr (`float`, *optional*):
            The minimum learning rate to reach after the cosine schedule.
        min_lr_rate (`float`, *optional*):
            The minimum learning rate as a ratio of the initial learning rate. If set, `min_lr` should not be set.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    N/Only one of min_lr or min_lr_rate should be setrQ   LOne of min_lr or min_lr_rate should be set through the `lr_scheduler_kwargs`)r!   r,   r3   r^   )rT   rS   r   r=   r   )r   r!   r,   r3   r   ra   r^   r*   s           r   +get_cosine_with_min_lr_schedule_with_warmuprf   Q  s    F k5JKKK		y1$77		ghhh2)-  I Iy*555r   )r^   warmup_lr_raterg   c                  t          |           } t          |          }t          |          }| |k     rF|| dz   t          d|          z  S t          |          }|d|z
  | z  t          d|dz
            z  z   S | |z
  dz   t          d||z
            z  }ddt          j        t          j        |z  dz  |z            z   z  }|d|z
  z  |z   }t          d|          S )Nr#   r	   r5   r6   r   r7   )r    r!   r,   r3   r^   rg   r<   r`   s           r   ;_get_cosine_with_min_lr_schedule_with_warmup_lr_rate_lambdari     s    &&L-..122&&&! 3&#c3C*D*DDD">22N!S>%9l$KsSTVfijVjOkOk$lll//#5#cCUXhCh:i:ijHC$(47Z#7##=#HIIIJFq;'+5Fq&>>r   c                    ||t          d          ||| j        d         z  }n|t          d          t          t          |||||          }t	          | ||          S )a  
    Create a schedule with a learning rate that decreases following the values of the cosine function between the
    initial lr set in the optimizer to min_lr, after a warmup period during which it increases linearly between 0 and the
    initial lr set in the optimizer.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        num_cycles (`float`, *optional*, defaults to 0.5):
            The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
            following a half-cosine).
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.
        min_lr (`float`, *optional*):
            The minimum learning rate to reach after the cosine schedule.
        min_lr_rate (`float`, *optional*):
            The minimum learning rate as a ratio of the initial learning rate. If set, `min_lr` should not be set.
        warmup_lr_rate (`float`, *optional*):
            The minimum learning rate as a ratio of the start learning rate. If not set, `warmup_lr_rate` will be treated as float(1/num_warmup_steps).

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    Nrd   rQ   re   )r!   r,   r3   r^   rg   )rT   rS   r   ri   r   )	r   r!   r,   r3   r   ra   r^   rg   r*   s	            r   3get_cosine_with_min_lr_schedule_with_warmup_lr_raterk     s    L k5JKKK		y1$77		ghhhC)-%  I Iy*555r   num_stable_stepsnum_decay_stepswarmup_typestr
decay_typemin_lr_ratioc                  | |k     rt          |           t          t          d|                    z  }|dk    r|}	nN|dk    r(ddt          j        t          j        |z            z
  z  }	n |dk    rdt          j        d|z
            z
  }	|	d|z
  z  |z   }	t          d|	          S | ||z   k     rdS | ||z   |z   k     rt          | |z
  |z
            t          t          d|                    z  }|dk    rd|z
  }	n^|dk    r;ddt          j        t          j        t          |          z  dz  |z            z   z  }	n|dk    rdt          j        |          z
  }	|	d|z
  z  |z   }	t          d|	          S |S )	Nr	   linearcosiner5   r#   1-sqrtr.   r6   )r%   r&   r8   r9   r:   rY   )
r    r!   rl   rm   rn   rp   rq   r3   r<   r`   s
             r   _get_wsd_scheduler_lambdarv     s    &&&&&s16F/G/G)H)HH(""FFH$$C$(47X+=">">>?FFH$$49S8^444F3-.=3&)9999s&)99OKKK(88;KKLLuUXYZ\kUlUlOmOmm!!8^FF8##C$(47U:5F5F+F+Lx+W"X"XXYFF8##49X...F3-.=3r   rs   rt   c
           
     "   ||t          d          ||t          j        d           |dvrt          d| d          |dvrt          d| d          |||z
  |z
  }t          t          |||||||          }
t          | |
|	          S )	a  
    Create a schedule with a learning rate that has three stages:
    1. warmup: increase from min_lr_ratio times the initial learning rate to the initial learning rate following a warmup_type.
    2. stable: constant learning rate.
    3. decay: decrease from the initial learning rate to min_lr_ratio times the initial learning rate following a decay_type.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_decay_steps (`int`):
            The number of steps for the decay phase.
        num_training_steps (`int`, *optional*):
            The total number of training steps. This is the sum of the warmup, stable and decay steps. If `num_stable_steps` is not provided, the stable phase will be `num_training_steps - num_warmup_steps - num_decay_steps`.
        num_stable_steps (`int`, *optional*):
            The number of steps for the stable phase. Please ensure that `num_warmup_steps + num_stable_steps + num_decay_steps` equals `num_training_steps`, otherwise the other steps will default to the minimum learning rate.
        warmup_type (`str`, *optional*, defaults to "linear"):
            The type of warmup to use. Can be 'linear', 'cosine' or '1-sqrt'.
        decay_type (`str`, *optional*, defaults to "cosine"):
            The type of decay to use. Can be 'linear', 'cosine' or '1-sqrt'.
        min_lr_ratio (`float`, *optional*, defaults to 0):
            The minimum learning rate as a ratio of the initial learning rate.
        num_cycles (`float`, *optional*, defaults to 0.5):
            The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
            following a half-cosine).
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.

    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    Nz@Either num_training_steps or num_stable_steps must be specified.zZBoth num_training_steps and num_stable_steps are specified. num_stable_steps will be used.)rs   rt   ru   zUnknown warmup type: z), expected 'linear', 'cosine' or '1-sqrt'zUnknown decay type: )r!   rl   rm   rn   rp   rq   r3   )rT   warningswarnr   rv   r   )r   r!   rm   r,   rl   rn   rp   rq   r3   r   r*   s              r   get_wsd_schedulerz     s    Z !&6&>[\\\%*:*Frsss888gggghhh777e
eeefff-0@@?R!))'!	 	 	I Iy*555r   c                  2    e Zd ZdZddZdd	ZddZddZdS )StreamingAverageaB  Rolling window average for smoothing metric values.

    Maintains a sliding window of values and computes their average,
    useful for smoothing noisy metric values before making learning rate decisions.

    Args:
        window_size (`int`):
            The maximum number of values to keep in the rolling window.
    window_sizer   returnNonec                0    || _         g | _        d| _        d S )Nr.   r}   valuessum)selfr}   s     r   __init__zStreamingAverage.__init__P  s     +#%r   valuer%   c                   | j                             |           | xj        |z  c_        t          | j                   | j        k    r*| j                             d          }| xj        |z  c_        | j        t          | j                   z  S )z3Add a value and return the current rolling average.r   )r   appendr   lenr}   pop)r   r   removeds      r   	streamavgzStreamingAverage.streamavgU  sz    5!!!Et{d...kooa((GHHHHx#dk****r   dict[str, Any]c                P    | j         | j                                        | j        dS )Nr   )r}   r   copyr   r   s    r   
state_dictzStreamingAverage.state_dict`  s-    +k&&((8
 
 	
r   r   c                    |                     d| j                  | _        |                     dg                                           | _        |                     dd          | _        d S )Nr}   r   r   r.   )getr}   r   r   r   )r   r   s     r   load_state_dictz StreamingAverage.load_state_dictg  sU    %>>-9IJJ nnXr227799>>%--r   N)r}   r   r~   r   )r   r%   r~   r%   r~   r   r   r   r~   r   )__name__
__module____qualname____doc__r   r   r   r   r   r   r   r|   r|   E  sn            
	+ 	+ 	+ 	+
 
 
 
. . . . . .r   r|   c                  z    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 d7d8d%Zd9d:d*Zd;d-Zd<d.Zd<d/Zd=d0Z	d>d2Z
d?d4Zd@d6Zd&S )AGreedyLRa  Adaptive learning rate scheduler that responds to training metrics.

    GreedyLR dynamically adjusts the learning rate based on training performance:
    - Increases LR when metrics improve consistently (divides by factor)
    - Decreases LR when metrics plateau (multiplies by factor)

    This differs from traditional schedulers like cosine annealing by responding
    to actual training dynamics rather than following a predetermined schedule.

    Reference: `GreedyLR: A Novel Adaptive Learning Rate Scheduler <https://arxiv.org/abs/2512.14527>`_

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        mode (`str`, *optional*, defaults to `"min"`):
            One of 'min' or 'max'. In 'min' mode, LR will be reduced when the
            metric has stopped decreasing; in 'max' mode when it has stopped increasing.
        factor (`float`, *optional*, defaults to 0.95):
            Factor by which the learning rate will be adjusted. LR is multiplied by
            factor on plateau and divided by factor on improvement. Must be < 1.0.
        patience (`int`, *optional*, defaults to 10):
            Number of epochs with no improvement after which learning rate will be adjusted.
        threshold (`float`, *optional*, defaults to 1e-06):
            Threshold for measuring the new optimum.
        threshold_mode (`str`, *optional*, defaults to `"abs"`):
            One of 'rel' or 'abs'.
        cooldown (`int`, *optional*, defaults to 0):
            Number of epochs to wait before resuming normal operation after LR has been reduced.
        warmup (`int`, *optional*, defaults to 0):
            Number of epochs to wait before resuming normal operation after LR has been increased.
        min_lr (`float` or `list[float]`, *optional*, defaults to 0.001):
            A lower bound on the learning rate.
        max_lr (`float` or `list[float]`, *optional*, defaults to 1.0):
            An upper bound on the learning rate.
        eps (`float`, *optional*, defaults to 1e-08):
            Minimal decay applied to lr.
        verbose (`bool`, *optional*, defaults to `False`):
            If True, prints a message to stdout for each update.
        smooth (`bool`, *optional*, defaults to `False`):
            If True, applies streaming average smoothing to metrics.
        window_size (`int`, *optional*, defaults to 50):
            The window size for the streaming average when smooth=True.
        reset_start (`int`, *optional*, defaults to 500):
            Number of steps to wait at min_lr before resetting to initial state.

    Example:
        ```python
        >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        >>> scheduler = GreedyLR(optimizer, mode="min", patience=10)
        >>> for epoch in range(100):
        ...     train(...)
        ...     val_loss = validate(...)
        ...     scheduler.step(val_loss)
        ```
    minffffff?
   ư>absr   MbP?r#   :0yE>F2     r   r   modero   r`   r%   patiencer   	thresholdthreshold_modecooldownwarmupra   float | list[float]max_lrepsverboseboolsmoothr}   reset_startr~   r   c                   |dk    rt          d          t          |t                    s$t          t	          |          j         d          || _        || _        || _        || _	        || _
        || _        d| _        d| _        || _        || _        || _        || _        || _        || _        || _        || _        d| _        t          |	t.          t0          f          rnt3          |	          t3          |j                  k    r4t          dt3          |j                   dt3          |	                     t/          |	          | _        n|	gt3          |j                  z  | _        t          |
t.          t0          f          rnt3          |
          t3          |j                  k    r4t          dt3          |j                   dt3          |
                     t/          |
          | _        n|
gt3          |j                  z  | _        d |j        D             | _        | j                                        | _        |d	k    rtA          d
          ntA          d          | _!        d| _"        d| _#        |dvrt          d| d          |dvrt          d| d          d | _$        |rtK          |          | _$        d S d S )Nr#   zFactor should be < 1.0.z is not an Optimizerr   z	expected z min_lrs, got z max_lrs, got c                    g | ]
}|d          S rQ   r   .0groups     r   
<listcomp>z%GreedyLR.__init__.<locals>.<listcomp>  s    JJJ%%+JJJr   r   inf-inf)r   r&   zmode z is unknown!)relr   zthreshold mode )&rT   
isinstancer   	TypeErrortyper   r   r`   r   r   r   r   cooldown_counterwarmup_counterr   r   r   r   r   r}   r   reset_start_originalr   listtupler   param_groupsmin_lrsmax_lrs	_init_lrsr   _last_lrr%   bestnum_bad_epochsnum_good_epochs_streaming_avgr|   )r   r   r   r`   r   r   r   r   r   ra   r   r   r   r   r}   r   s                   r   r   zGreedyLR.__init__  s   $ S==6777)Y// 	OtI7MMMNNN"   !	",&&$/!ftUm,, 	B6{{c)"89999 !eS1G-H-H!e!eX[\bXcXc!e!efff<<DLL"8c)*@&A&AADLftUm,, 	B6{{c)"89999 !eS1G-H-H!e!eX[\bXcXc!e!efff<<DLL"8c)*@&A&AADLJJ93IJJJ++--+/5==5<<<eFmm	 ~%%7T777888//K~KKKLLL7; 	@"2;"?"?D	@ 	@r   NmetricsepochrW   c                   t          |          }| j        r!| j        | j                            |          }|
| j        dz   }|| _        | j        dk    r| xj        dz  c_        d| _        d| _        n| j        dk    r| xj        dz  c_        d| _        d| _        n| 	                    || j
                  r|| _
        d| _        | xj        dz  c_        n| xj        dz  c_        d| _        | j        | j        k    r)|                     |           | j        | _        d| _        n8| j        | j        k    r(|                     |           | j        | _        d| _        d | j        j        D             | _        dS )a!  Perform a scheduler step based on the given metrics.

        Args:
            metrics (`float`):
                The metric value to use for LR adjustment decisions.
            epoch (`int`, *optional*):
                The current epoch number. If None, uses internal counter.
        Nr	   r   c                    g | ]
}|d          S r   r   r   s     r   r   z!GreedyLR.step.<locals>.<listcomp>  s    NNNtNNNr   )r%   r   r   r   r   r   r   r   r   	is_betterr   r   _increase_lrr   
_reduce_lrr   r   r   r   )r   r   r   currents       r   stepzGreedyLR.step  s    ..; 	=4.:)33G<<G=Oa'E 1$$!!Q&!!"#D#$D   1$$1$"#D#$D  ~~gty11 )#	&'#$$)$$$##q(##'($#dm33!!%(((&*k#'($$$t}44&&&(,%&'#NN$.2MNNNr   r   r   c                    | j         dk    r*| j        dk    r||d| j        z
  z  k     S ||| j        z
  k     S | j        dk    r||d| j        z   z  k    S ||| j        z   k    S )Nr   r   r#   )r   r   r   )r   r   r   s      r   r   zGreedyLR.is_better  s}    9"e++t~)=!>>>!666"e++t~)=!>>>!666r   c           
        d}t          | j        j                  D ]\  }}t          |d                   }t	          || j        z  | j        |                   }||z
  | j        k    r&||d<   | j        rt          d| d| d|dd           |d         | j        |         k    rd}|r1| xj
        d	z  c_
        | j
        d
k    r|                                  d S d S d S )NTrQ   Epoch z": reducing learning rate of group  to .4e.Fr	   r   )	enumerater   r   r%   r&   r`   r   r   r   printr   _reset)r   r   
all_at_miniparam_groupold_lrnew_lrs          r   r   zGreedyLR._reduce_lr'  s   
'(CDD 
	# 
	#NA{;t,--F$+-t|A??F))$*D!< b`5``A``SY````aaa4 4<?22"
 	!1$$	 	$$r   c           
     4   t          | j        j                  D ]q\  }}t          |d                   }t	          || j        z  | j        |                   }||z
  | j        k    r&||d<   | j        rt          d| d| d|dd           r| j
        | _        d S )NrQ   r   z$: increasing learning rate of group r   r   r   )r   r   r   r%   r   r`   r   r   r   r   r   r   )r   r   r   r   r   r   s         r   r   zGreedyLR._increase_lr:  s    '(CDD 	d 	dNA{;t,--F$+-t|A??F))$*D!< db5bbabbU[bbbbccc4r   c                   t          | j        j                  D ]\  }}| j        |         |d<   | j        dk    rt          d          nt          d          | _        d| _        d| _        d| _	        d| _
        | j        | _        | j        r | j        t          | j                  | _        | j        rt%          d           d S d S )NrQ   r   r   r   r   z!Scheduler reset to initial state.)r   r   r   r   r   r%   r   r   r   r   r   r   r   r   r   r|   r}   r   r   )r   r   r   s      r   r   zGreedyLR._resetF  s    '(CDD 	2 	2NA{ $q 1K$(I$6$6E%LLLE&MM	  !4; 	E4.:"243C"D"DD< 	7566666	7 	7r   list[float]c                    | j         S )z8Return last computed learning rate by current scheduler.)r   r   s    r   get_last_lrzGreedyLR.get_last_lrW  s
    }r   r   c                   i d| j         d| j        d| j        d| j        d| j        d| j        d| j        d| j        d	| j        d
| j	        d| j
        d| j        d| j        d| j        d| j        d| j        d| j        | j        | j        | j        | j        | j        | j        d}| j        r#| j        | j                                        |d<   |S )z2Return the state of the scheduler as a dictionary.r`   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r}   r   r   r   r   Nr   )r`   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}   r   r   r   r   r   r   )r   states     r   r   zGreedyLR.state_dict[  s^   
dk
t|
 t|
 	

 t|
 
 dk
  5
 d1
 DI
 
 d1
 DI
 d1
 t3
  48!
" $/#
$ k++$($=/
 
 
4 ; 	G4.:&*&9&D&D&F&FE"#r   r   c                   |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d	| j	                  | _	        |                     d
| j
                  | _
        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        |                     d| j                  | _        d|v r@| j        t3          | j                  | _        | j                            |d                    d|v r*t7          | j        j        | j                  D ]\  }}||d<   dS dS )zLoad state from a dictionary.r`   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}   r   r   r   r   r   NrQ   )r   r`   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}   r   r   r   r   r   r|   r   zipr   r   )r   r   r   rQ   s       r   r   zGreedyLR.load_state_dict|  s    nnXt{;;!~~i>>!~~i>>"z4=AA!~~i>>"z4=AA nnXt{;; */A4CX Y Y(nn-=t?RSSNN64955	#T^DD(nn-=t?RSSNN64955	(nn-=t?RSS)~~.?AUVV>>%22$..tGG nnXt{;;%>>-9IJJ%>>-9IJJ$.NN3I4Kd$e$e!"z4=AA#T^DDz))"*&6t7G&H&H#//
;K0LMMM###&t~'BDM#R#R ' 'R$&D!! $#' 'r   )r   r   r   r   r   r   r   r   r#   r   FFr   r   ) r   r   r   ro   r`   r%   r   r   r   r%   r   ro   r   r   r   r   ra   r   r   r   r   r%   r   r   r   r   r}   r   r   r   r~   r   N)r   r%   r   rW   r~   r   )r   r%   r   r%   r~   r   )r   r   r~   r   )r~   r   )r~   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   m  s       6 6v #&*&)!E@ E@ E@ E@ E@N,O ,O ,O ,O ,O\
7 
7 
7 
7   &
5 
5 
5 
57 7 7 7"      B!' !' !' !' !' !'r   r   c                    t          | fi |S )a  
    Create an adaptive learning rate scheduler that adjusts LR based on training metrics.

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        kwargs (`dict`, *optional*):
            Extra parameters passed to the scheduler. See [`GreedyLR`] for possible parameters.

    Return:
        [`GreedyLR`] with the appropriate schedule.
    )r   r   s     r   get_greedy_scheduler     s     I(((((r   namestr | SchedulerTypescheduler_specific_kwargsdict | Nonec                  	 t          |           } t          |          }|t          |t                    rm|j        }i 	|D ]}t          | ||         |||          	|<    	fd}|D ]}|j        r|                    |           t          ||j	        d                   S | t           j
        k    r ||          S |i }| t           j        k    r	 ||fi |S | t           j        k    r	 ||fi |S |t          |  d          | t           j        k    r |||          S | t           j        k    r ||fd|i|S | t           j        k    r ||f||d	|S |t          |  d
           ||f||d	|S )a  
    Unified API to get any scheduler from its name.

    Args:
        name (`str` or `SchedulerType`):
            The name of the scheduler to use.
        optimizer (`torch.optim.Optimizer`):
            The optimizer that will be used during training.
        num_warmup_steps (`int`, *optional*):
            The number of warmup steps to do. This is not required by all schedulers (hence the argument being
            optional), the function will raise an error if it's unset and the scheduler type requires it.
        num_training_steps (`int``, *optional*):
            The number of training steps to do. This is not required by all schedulers (hence the argument being
            optional), the function will raise an error if it's unset and the scheduler type requires it.
        scheduler_specific_kwargs (`dict`, *optional*):
            Extra parameters for schedulers such as cosine with restarts. Mismatched scheduler types and scheduler
            parameters will cause the scheduler function to raise a TypeError.
    N)r   r!   r,   r   c                <    |                                            d S r   )r   )paramscheduler_dicts    r   scheduler_hookz%get_scheduler.<locals>.scheduler_hook  s"     5!&&(((((r   rQ   )optimizer_dictrQ   z; requires `num_warmup_steps`, please provide that argument.r)   r!   r1   z= requires `num_training_steps`, please provide that argument.)r   TYPE_TO_SCHEDULER_FUNCTIONr   r
   r   get_schedulerrequires_grad"register_post_accumulate_grad_hookr   rS   CONSTANTREDUCE_ON_PLATEAUGREEDYrT   CONSTANT_WITH_WARMUPINVERSE_SQRTWARMUP_STABLE_DECAY)
r   r   r!   r,   r   schedule_funcr   r   r   r   s
            @r   r   r     sb   2 D.t4M I7N!O!O"1# 	 	E$1(/!1#5*C% % %N5!!	) 	) 	) 	) 	)
 $ 	I 	IE" I88HHH&nI[\`Iabbbb}%%%}Y''' ($&!}...}YDD*CDDD}###}YDD*CDDD D]]]^^^}111}Y9IJJJJ})))}Ygg9IgMfggg }000}
-1
 
 (	
 
 	
 !D___```=)-  $	  r   c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 d fd		Zed
             Zed             Zed             Zed             Z	 e
j                    dd            Z xZS )	Adafactora9  
    AdaFactor pytorch implementation can be used as a drop in replacement for Adam original fairseq code:
    https://github.com/pytorch/fairseq/blob/master/fairseq/optim/adafactor.py

    Paper: *Adafactor: Adaptive Learning Rates with Sublinear Memory Cost* https://huggingface.co/papers/1804.04235 Note that
    this optimizer internally adjusts the learning rate depending on the `scale_parameter`, `relative_step` and
    `warmup_init` options. To use a manual (external) learning rate schedule you should set `scale_parameter=False` and
    `relative_step=False`.

    Arguments:
        params (`Iterable[nn.parameter.Parameter]`):
            Iterable of parameters to optimize or dictionaries defining parameter groups.
        lr (`float`, *optional*):
            The external learning rate.
        eps (`tuple[float, float]`, *optional*, defaults to `(1e-30, 0.001)`):
            Regularization constants for square gradient and parameter scale respectively
        clip_threshold (`float`, *optional*, defaults to 1.0):
            Threshold of root mean square of final gradient update
        decay_rate (`float`, *optional*, defaults to -0.8):
            Coefficient used to compute running averages of square
        beta1 (`float`, *optional*):
            Coefficient used for computing running averages of gradient
        weight_decay (`float`, *optional*, defaults to 0.0):
            Weight decay (L2 penalty)
        scale_parameter (`bool`, *optional*, defaults to `True`):
            If True, learning rate is scaled by root mean square
        relative_step (`bool`, *optional*, defaults to `True`):
            If True, time-dependent learning rate is computed instead of external learning rate
        warmup_init (`bool`, *optional*, defaults to `False`):
            Time-dependent learning rate computation depends on whether warm-up initialization is being used

    This implementation handles low-precision (FP16, bfloat) values, but we have not thoroughly tested.

    Recommended T5 finetuning settings (https://discuss.huggingface.co/t/t5-finetuning-tips/684/3):

        - Training without LR warmup or clip_threshold is not recommended.

           - use scheduled LR warm-up to fixed LR
           - use clip_threshold=1.0 (https://huggingface.co/papers/1804.04235)
        - Disable relative updates
        - Use scale_parameter=False
        - Additional optimizer operations like gradient clipping should not be used alongside Adafactor

    Example:

    ```python
    Adafactor(model.parameters(), scale_parameter=False, relative_step=False, warmup_init=False, lr=1e-3)
    ```

    Others reported the following combination to work well:

    ```python
    Adafactor(model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
    ```

    When using `lr=None` with [`Trainer`] you will most likely need to use [`~optimization.AdafactorSchedule`]
    scheduler as following:

    ```python
    from transformers.optimization import Adafactor, AdafactorSchedule

    optimizer = Adafactor(model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
    lr_scheduler = AdafactorSchedule(optimizer)
    trainer = Trainer(..., optimizers=(optimizer, lr_scheduler))
    ```

    Usage:

    ```python
    # replace AdamW with Adafactor
    optimizer = Adafactor(
        model.parameters(),
        lr=1e-3,
        eps=(1e-30, 1e-3),
        clip_threshold=1.0,
        decay_rate=-0.8,
        beta1=None,
        weight_decay=0.0,
        relative_step=False,
        scale_parameter=False,
        warmup_init=False,
    )
    ```NgKH9r   r#   皙r.   TFc           
         ||	rt          d          |
r|	st          d          ||||||||	|
d	}t                                          ||           d S )Nz;Cannot combine manual `lr` and `relative_step=True` optionsz0`warmup_init=True` requires `relative_step=True`)	rQ   r   clip_threshold
decay_ratebeta1weight_decayscale_parameterrelative_stepwarmup_init)rT   superr   )r   paramsrQ   r   r  r  r  r  r  r  r  rS   	__class__s               r   r   zAdafactor.__init__v  s     >m>Z[[[ 	Q} 	QOPPP ,$(.*&

 

 	*****r   c                   | d         }| d         r@| d         rd|d         z  nd}t          |dt          j        |d                   z            }d}| d         r"t          | d	         d
         |d                   }||z  S )NrQ   r  r  r   r   g{Gz?r#   r  r   r	   RMS)r   r8   rY   r&   )r   param_staterel_step_szmin_stepparam_scales        r   _get_lrzAdafactor._get_lr  s    !$'' 	N5@5OYtk&111UYHhdiF8K.L.L(LMMK() 	Ik%03[5GHHK[((r   c                D    t          |          dk    }| d         d u}||fS )N   r  )r   )r   param_shapefactoreduse_first_moments       r   _get_optionszAdafactor._get_options  s0    {##q(&w/t;)))r   c                \    |                      d          |                                 dz  z  S )Nr  r5   )normnumel)tensors    r   _rmszAdafactor._rms  s$    {{1~~3!677r   c                    | |                      dd          z                                                      d          }|                    d                                          }t	          j        ||          S )Nr   T)dimkeepdim)meanrsqrt_	unsqueezersqrttorchmul)exp_avg_sq_rowexp_avg_sq_colr_factorc_factors       r   _approx_sq_gradzAdafactor._approx_sq_grad  sm     #^%8%8R%8%N%NNVVXXbbceff!++B//5577y8,,,r   c                	   d}|
 |            }| j         D ]}|d         D ]}|j        |j        }|j        t          j        t          j        hv r|                                }|j        rt          d          | j	        |         }|j
        }|                     ||          \  }}	t          |          dk    rd|d<   |	rt          j        |          |d<   |rpt          j        |dd                                       |          |d<   t          j        |dd	         |dd         z                                 |          |d
<   nt          j        |          |d<   d|d<   n}|	r|d                             |          |d<   |r=|d                             |          |d<   |d
                             |          |d
<   n|d                             |          |d<   |}
|j        t          j        t          j        hv r|
                                }
|dxx         dz  cc<   |                     |
          |d<   |                     ||          }dt%          j        |d         |d                   z
  }|dz  |d         d         z   }|r|d         }|d
         }|                    |                              |                    d          d|z
             |                    |                              |                    d	          d|z
             |                     ||          }|                    |           n\|d         }|                    |                              |d|z
             |                                                    |          }|                    |                     |          |d         z                      d                     |                    |           |	rC|d         }|                    |d                                       |d|d         z
             |}|d         dk    r!|
                    |
|d          |z             |
                    |            |j        t          j        t          j        hv r|                    |
           |S )z
        Performs a single optimization step

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr  z,Adafactor does not support sparse gradients.r   r   exp_avgr   r1  r*  r2  
exp_avg_sqr  r	   r#   r  r  r   )r(  )alphar  )r   r  r  )r   graddtyper/  float16bfloat16r%   	is_sparseRuntimeErrorr   shaper!  r   
zeros_likezerostor&  r  r8   powmul_add_r+  r5  r.  div_clamp_copy_)r   closurelossr   pr:  r   
grad_shaper  r   p_data_fp32rQ   beta2tupdater1  r2  r8  r7  s                     r   r   zAdafactor.step  s    799D& M	) M	)E8_ L) L)6>v:%-!@@@::<<D> W&'UVVV
1!Z
-1->->uj-Q-Q**u::??$%E&M' B+0+;D+A+Ai( E27+j"o2N2N2Q2QRV2W2W./27+j"oPZ[][^[^P_>_2`2`2c2cdh2i2i.//.3.>t.D.Dl+#$E%LL' E+0+;+>+>t+D+Di( K278H2I2L2LT2R2R./278H2I2L2LT2R2R.//.3L.A.D.DT.J.Jl+7u}en==="-"3"3"5"5Kf"#yy55e\\%//txfu\7JKKK'U5\!_4 ;%*+;%<N%*+;%<N"''//44V[[R[5H5HQTW]Q]4___"''//44V[[R[5H5HQTW]Q]4___ "11..QQFKK%%%%!&|!4JOOF++00f0NNN'--//44T::FTYYv..7G1HHPPUXPYYZZZB# %#I.GLLw0055fQwEW5YYY$F(A--$$[%:O9ORT9T$VVV  &)))7u}en===GGK(((YL)\ r   )	Nr  r#   r	  Nr.   TTFr   )r   r   r   r   r   staticmethodr  r!  r&  r5  r/  no_gradr   __classcell__r  s   @r   r  r  !  s       R Rn + + + + + +> ) ) \) * * \*
 8 8 \8 - - \- U]__[ [ [ _[ [ [ [ [r   r  c                  *     e Zd ZdZd fd	Zd Z xZS )AdafactorSchedulea8  
    Since [`~optimization.Adafactor`] performs its own scheduling, if the training loop relies on a scheduler (e.g.,
    for logging), this class creates a proxy object that retrieves the current lr values from the optimizer.

    It returns `initial_lr` during startup and the actual `lr` during stepping.
    r.   c                    fd}|j         D ]}|d<   t                                          ||           |j         D ]}|d= d S )Nc                    S r   r   )r   
initial_lrs    r   r*   z-AdafactorSchedule.__init__.<locals>.lr_lambda  s	    r   rY  )r   r  r   )r   r   rY  r*   r   r  s     `  r   r   zAdafactorSchedule.__init__  s    	 	 	 	 	 + 	- 	-E",E,I...+ 	$ 	$El##	$ 	$r   c                p    | j         fdj        D             }t          |          dk    r| j        }|S )Nc                    g | ]B}|d          d         j                             |j        |d          d                            CS )r  r   )r:  r  r   )r   r   opts     r   r   z,AdafactorSchedule.get_lr.<locals>.<listcomp>%  sS     
 
 
Xq!&2 KKsyx);<==222r   r   )r   r   r   base_lrs)r   lrsr\  s     @r   get_lrzAdafactorSchedule.get_lr#  sR    n
 
 
 
)
 
 

 s88q==-C
r   r.   )r   r   r   r   r   r_  rS  rT  s   @r   rV  rV    sV         $ $ $ $ $ $	 	 	 	 	 	 	r   rV  c                "    t          | |          S )aX  
    Get a proxy schedule for [`~optimization.Adafactor`]

    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        initial_lr (`float`, *optional*, defaults to 0.0):
            Initial lr

    Return:
        [`~optimization.Adafactor`] proxy schedule object.


    )rV  )r   rY  s     r   get_adafactor_schedulerb  /  s     Y
333r   r   )r   )r   r   r   r   )r   r   )r    r   r!   r   )r   r   r!   r   r   r   )r    r   r!   r   r,   r   )r    r   r!   r   r,   r   r3   r%   )r5   r   )
r   r   r!   r   r,   r   r3   r%   r   r   )r    r   r!   r   r,   r   r3   r   )r	   r   )
r   r   r!   r   r,   r   r3   r   r   r   )r    r   r!   r   r,   r   rF   r%   rG   r%   rH   r   )rO   r#   r   )r    r   r!   r   rV   rW   )Nr   )r   r   r!   r   rV   rW   r   r   )
r    r   r!   r   r,   r   r3   r%   r^   r%   )r5   r   NN)r   r   r!   r   r,   r   r3   r%   r   r   ra   rb   r^   rb   )r    r   r!   r   r,   r   r3   r%   r^   r%   rg   rb   )r5   r   NNN)r   r   r!   r   r,   r   r3   r%   r   r   ra   rb   r^   rb   rg   rb   )r    r   r!   r   rl   r   rm   r   rn   ro   rp   ro   rq   r%   r3   r%   )NNrs   rt   r   r5   r   )r   r   r!   r   rm   r   r,   rW   rl   rW   rn   ro   rp   ro   rq   r%   r3   r%   r   r   )NNN)
r   r   r   r   r!   rW   r,   rW   r   r   r`  )Ar   
__future__r   r8   rx   	functoolsr   typingr   r/  torch.optimr   torch.optim.lr_schedulerr   r   trainer_pt_utilsr
   r   trainer_utilsr   utilsr   
get_loggerr   loggerr   r   r   r'   r+   r/   r2   r=   rA   rC   rE   rN   rU   r[   r]   rf   ri   rk   rv   rz   r|   r   r   LINEARCOSINECOSINE_WITH_RESTARTS
POLYNOMIALr   r  r  r   COSINE_WITH_MIN_LRCOSINE_WARMUP_WITH_MIN_LRr  r  r   r   r  rV  rb  r   r   r   <module>rs     s   + * " " " " " "                ! ! ! ! ! ! @ @ @ @ @ @ @ @ N N N N N N N N ( ( ( ( ( (       
	H	%	%   L L L L L"2 2 2 2$   A A A A A*u u u u6 6 6 66Z Z Z Z vx6 6 6 6 6D^ ^ ^ ^ rt6 6 6 6 6B   , Y[+6 +6 +6 +6\ os       bdA A A A A> sv       $16 16 16 16 16t #'     :  $#'56 56 56 56 56p# # # #T &*#'F6 F6 F6 F6 F6R%. %. %. %. %. %. %. %.Pp' p' p' p' p' p' p' p'f	) ) ) )" 99&(ZG1&(I 9#%C$&Q+-`%'7- & $(%)-1^ ^ ^ ^ ^Bm m m m m	 m m m`       <4 4 4 4 4 4r   