109 + 'adam': torch.optim.Adam, 110 + 'sgdm': partial(torch.optim.SGD, momentum=0.9), 111 + 'adamax': torch.optim.Adamax 112 + } 113 + opt_func = opt_dict[opt_name] 114 + opt = SphericalOptimizer(opt_func,
adam_beta1 ... 0.9 adam_beta2 ... 0.999 adam_eps ... 1e-08 add_bias_linear ... False add_gate ... True adlr_autoresume ...
epsilon=1e-6, exclude_from_weight_decay=None, name="AdamWeightDecayOptimizer"): """Constructs a AdamWeightDecayOptimizer.""" super(AdamWeightDecayOptimizer, self).__init__(False, name) self.learning_rate = learning_rate self.weight_decay_rate = weight_decay_rate self.beta_1 = beta_1 se...