Source code for aitoolbox.torchtrain.schedulers.warmup

import math

from aitoolbox.torchtrain.schedulers.basic import LambdaLRScheduler

[docs]class ConstantWithWarmupScheduler(LambdaLRScheduler): def __init__(self, num_warmup_steps, last_epoch=-1, **kwargs): """Constant scheduler with the initial warmup Schedule with a constant learning rate preceded by a warmup period during which the learning rate increases linearly between 0 and the initial lr set in the optimizer. Args: num_warmup_steps (int): The number of steps for the warmup phase last_epoch (int): The index of the last epoch when resuming training **kwargs: learning rate scheduler additional parameters """ def lr_lambda(current_step: int): if current_step < num_warmup_steps: return float(current_step) / float(max(1.0, num_warmup_steps)) return 1.0 super().__init__(lr_lambda=lr_lambda, execute_epoch_end=False, execute_batch_end=True, last_epoch=last_epoch, **kwargs) self.callback_name = 'Warmed up constant learning rate scheduler'
[docs]class CosineWithWarmupScheduler(LambdaLRScheduler): def __init__(self, num_warmup_steps, num_training_steps, num_cycles=0.5, last_epoch=-1, **kwargs): """Cosine decreasing scheduler with the initial warmup Schedule with a learning rate that decreases following the values of the cosine function between the initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the initial lr set in the optimizer. Args: num_warmup_steps (int): The number of steps for the warmup phase num_training_steps (int): The total number of training steps num_cycles (float): The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0 following a half-cosine). last_epoch (int): The index of the last epoch when resuming training **kwargs: learning rate scheduler additional parameters """ def lr_lambda(current_step): if current_step < num_warmup_steps: return float(current_step) / float(max(1, num_warmup_steps)) progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps)) return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) super().__init__(lr_lambda=lr_lambda, execute_epoch_end=False, execute_batch_end=True, last_epoch=last_epoch, **kwargs) self.callback_name = 'Warmed up cosine decreasing learning rate scheduler'
[docs]class HardRestartsCosineWithWarmupScheduler(LambdaLRScheduler): def __init__(self, num_warmup_steps, num_training_steps, num_cycles=0.5, last_epoch=-1, **kwargs): """Cosine scheduler with hard restarts and the initial warmup Schedule with a learning rate that decreases following the values of the cosine function between the initial lr set in the optimizer to 0, with several hard restarts, after a warmup period during which it increases linearly between 0 and the initial lr set in the optimizer. Args: num_warmup_steps (int): The number of steps for the warmup phase num_training_steps (int): The total number of training steps num_cycles (float): The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0 following a half-cosine). last_epoch (int): The index of the last epoch when resuming training **kwargs: learning rate scheduler additional parameters """ def lr_lambda(current_step): if current_step < num_warmup_steps: return float(current_step) / float(max(1, num_warmup_steps)) progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps)) if progress >= 1.0: return 0.0 return max(0.0, 0.5 * (1.0 + math.cos(math.pi * ((float(num_cycles) * progress) % 1.0)))) super().__init__(lr_lambda=lr_lambda, execute_epoch_end=False, execute_batch_end=True, last_epoch=last_epoch, **kwargs) self.callback_name = 'Warmed up hard restarts cosine learning rate scheduler'
[docs]class LinearWithWarmupScheduler(LambdaLRScheduler): def __init__(self, num_warmup_steps, num_training_steps, last_epoch=-1, **kwargs): """Linearly decreasing scheduler with the initial warmup Schedule with a learning rate that decreases linearly from the initial lr set in the optimizer to 0, after a warmup period during which it increases linearly from 0 to the initial lr set in the optimizer. Especially useful in the context of BERT-like models. Implementation based on HuggingFace Transformers library's ``get_linear_schedule_with_warmup()`` method. Args: num_warmup_steps (int): The number of steps for the warmup phase num_training_steps (int): The total number of training steps last_epoch (int): The index of the last epoch when resuming training **kwargs: learning rate scheduler additional parameters """ def lr_lambda(current_step: int): if current_step < num_warmup_steps: return float(current_step) / float(max(1, num_warmup_steps)) return max( 0.0, float(num_training_steps - current_step) / float(max(1, num_training_steps - num_warmup_steps)) ) super().__init__(lr_lambda=lr_lambda, execute_epoch_end=False, execute_batch_end=True, last_epoch=last_epoch, **kwargs) self.callback_name = 'Warmed up linearly decreasing learning rate scheduler'