m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
self.updates.append((self.m_schedule, m_schedule_new))
ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
vs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
self.weights = ms + vs
for p, g, m, v in zip(params, grads, ms, vs):
// the following equations given in [1]
g_prime = g / (1. - m_schedule_new)
After Change
m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
self.updates.append((self.m_schedule, m_schedule_new))
shapes = [x.shape for x in K.batch_get_value(params)]
ms = [K.zeros(shape) for shape in shapes]
vs = [K.zeros(shape) for shape in shapes]