Source code for researchutils.chainer.functions.activation.grad_clip_lstm

import numpy
import chainer.functions as F

from chainer.functions.activation import lstm
from chainer.functions.activation.lstm import _extract_gates


[docs]class GradClipLSTM(lstm.LSTM):
    """
    Long short-term memory unit with forget gate and gradient clipping before each gates.
    It has two inputs (c, x) and two outputs (c, h), where c indicates the cell
    state. x must have four times channels compared to the number of units.

    Gradient clipping is done during backward process
    and not before applying the gradient to weights.
    
    See: https://arxiv.org/abs/1308.0850
    """

    def __init__(self, clip_min, clip_max):
        super(GradClipLSTM, self).__init__()
        clip_min = clip_min if clip_min is not None else numpy.finfo(
            numpy.float32).min
        clip_max = clip_max if clip_max is not None else numpy.finfo(
            numpy.float32).max
        self.clip_min = float(clip_min)
        self.clip_max = float(clip_max)

[docs]    def backward(self, inputs, grads):
        gc_prev, gx = super(GradClipLSTM, self).backward(inputs, grads)
        return gc_prev, F.clip(gx, self.clip_min, self.clip_max)


def grad_clip_lstm(c_prev, x, clip_min=None, clip_max=None):
    return GradClipLSTM(clip_min, clip_max).apply((c_prev, x))