x = array.reshape((-1,)).astype("float32", copy=False)
return nd.dot(x, x)
norm_arrays = [_norm(arr) for arr in arrays]
// group norm arrays by ctx
def group_by_ctx(arr_list):
After Change
mx.autograd.backward(ls)
def step(self, batch_size, max_norm=None):
Makes one step of parameter update. Should be called after
`fp16_optimizer.backward()`, and outside of `record()` scope.
Parameters
----------
batch_size : int
Batch size of data processed. Gradient will be normalized by `1/batch_size`.
Set this to 1 if you normalized loss manually with `loss = mean(loss)`.