self.sparse_params = []
for k, p in params:
if p.requires_grad:
if self.method != "sparseadam" or "embed" not in k:
self.params.append(p)
else:
self.sparse_params.append(p)
After Change
if not param.requires_grad:
continue
// TODO: Find a better way to check for sparse gradients.
if "embed" in name:
sparse.append(param)
else:
dense.append(param)
self.optimizer = MultipleOptimizer(
[optim.Adam(dense, lr=self.learning_rate,
betas=self.betas, eps=1e-8),
optim.SparseAdam(sparse, lr=self.learning_rate,