f19ace982075ea009af81f5e9f687cc2276f50ea,scripts/bert/fp16_utils.py,,grad_global_norm,#Any#Any#,24

Before Change


            ctx = arr.context
            groups[ctx].append(arr)
        return groups
    norm_groups = group_by_ctx(norm_arrays)

    // reduce
    ctx, dtype = arrays[0].context, "float32"
    norms = [nd.add_n(*g).as_in_context(ctx) for g in norm_groups.values()]
    total_norm = nd.add_n(*norms).sqrt()
    scale = total_norm / max_norm
    // is_finite = 0 if NaN or Inf, 1 otherwise.

After Change


            Batch size of data processed. Gradient will be normalized by `1/batch_size`.
            Set this to 1 if you normalized loss manually with `loss = mean(loss)`.
        max_norm : NDArray, optional, default is None
            max value for global 2-norm of gradients.
        """
        self.fp32_trainer.allreduce_grads()
        step_size = batch_size * self._scaler.loss_scale
        if max_norm:
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 5

Instances


Project Name: dmlc/gluon-nlp
Commit Name: f19ace982075ea009af81f5e9f687cc2276f50ea
Time: 2020-01-20
Author: 50716238+MoisesHer@users.noreply.github.com
File Name: scripts/bert/fp16_utils.py
Class Name:
Method Name: grad_global_norm


Project Name: mozilla/bugbug
Commit Name: f16992b25bb153df3ab87c5111db2a101cf68c73
Time: 2020-04-09
Author: mcastelluccio@mozilla.com
File Name: bugbug/models/testselect.py
Class Name: TestSelectModel
Method Name: train_test_split


Project Name: keras-team/keras
Commit Name: 8e95a38e7a4be3a3edd8139dbd26e994e50d0a0c
Time: 2021-04-04
Author: scottzhu@google.com
File Name: keras/distribute/multi_worker_test.py
Class Name: KerasMultiWorkerTestIndependentWorker
Method Name: testSimpleModelIndependentWorkerSync


Project Name: dmlc/gluon-cv
Commit Name: 2318052dc79966bf36675606b7d992a347418292
Time: 2019-01-07
Author: cheungchih@gmail.com
File Name: scripts/detection/ssd/train_ssd.py
Class Name:
Method Name: