Source code for slugnet.optimizers

import numpy as np

from slugnet.initializations import _zero


class Optimizer(object):
    def __init__(self, lr=0.001, clip=-1, decay=0., lr_min=0., lr_max=np.inf):
        self.lr = lr
        self.clip = clip
        self.decay = decay
        self.lr_min = lr_min
        self.lr_max = lr_max

        self.iterations = 0

    def update(self, params, grads):
        """Update parameters.
        Parameters
        ----------
        params : list
            A list of parameters in model.
        grads : list
            A list of gradients in model.
        """
        self.iterations += 1
        self.lr *= (1. / 1 + self.decay * self.iterations)
        self.lr = np.clip(self.lr, self.lr_min, self.lr_max)

    def __str__(self):
        return self.__class__.__name__


[docs]class SGD(Optimizer):
    """
    Optimize model parameters using common stochastic gradient descent.
    """

[docs]    def update(self, params, grads):
        for p, g in zip(params, grads):
            p -= self.lr * npdl_clip(g, self.clip)

        super(SGD, self).update(params, grads)


def npdl_clip(grad, boundary):
    if boundary > 0:
        return np.clip(grad, -boundary, boundary)
    else:
        return grad


[docs]class RMSProp(Optimizer):
    """RMSProp updates
    Scale learning rates by dividing with the moving average of the root mean
    squared (RMS) gradients. See [1]_ for further description.

    :param rho: Gradient moving average decay factor.
    :type rho: float
    :param epsilon: Small value added for numerical stability.
    :type epsilon: float

    `rho` should be between 0 and 1. A value of `rho` close to 1 will decay the
    moving average slowly and a value close to 0 will decay the moving average
    fast.
    Using the step size :math:`\\eta` and a decay factor :math:`\\rho` the
    learning rate :math:`\\eta_t` is calculated as:

    .. math::

       r_t &= \\rho r_{t-1} + (1-\\rho)*g^2\\\\
       \\eta_t &= \\frac{\\eta}{\\sqrt{r_t + \\epsilon}}

    References
    ----------
    .. [1] Tieleman, T. and Hinton, G. (2012):
           Neural Networks for Machine Learning, Lecture 6.5 - rmsprop.
           Coursera. http://www.youtube.com/watch?v=O3sxAc4hxZU (formula @5:20)
    """

    def __init__(self, rho=0.9, epsilon=1e-6, *args, **kwargs):
        super(RMSProp, self).__init__(*args, **kwargs)

        self.rho = rho
        self.epsilon = epsilon

        self.cache = None
        self.iterations = 0

[docs]    def update(self, params, grads):
        # init cache
        if self.cache is None:
            self.cache = [_zero(p.shape) for p in params]

        # update parameters
        for i, (c, p, g) in enumerate(zip(self.cache, params, grads)):
            c = self.rho * c + (1 - self.rho) * np.power(g, 2)
            p -= (self.lr * g / np.sqrt(c + self.epsilon))
            self.cache[i] = c
Source code for slugnet.optimizers

Navigation

Related Topics