// In order to normalize the localization loss, we perform element-wise division by the default box widths and heights.
// Deviations in xmin and xmax are divided by their respective default box widths, deviations in ymin and ymax are divided
// by their respective default box heights.
absolute_loss = tf.abs(y_true - y_pred) / self.loc_norm
square_loss = 0.5 * (y_true - y_pred)**2 / self.loc_norm
l1_loss = tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5)
return tf.reduce_sum(l1_loss, axis=-1)