diff --git a/transformer_lm/main.py b/transformer_lm/main.py index e5ec37dd..044af58c 100644 --- a/transformer_lm/main.py +++ b/transformer_lm/main.py @@ -189,7 +189,7 @@ if __name__ == "__main__": "--num_iters", type=int, default=100000, help="Iterations to train for." ) parser.add_argument( - "--learning_rate", type=float, default=3e-4, help="SGD learning rate." + "--learning_rate", type=float, default=3e-4, help="AdamW learning rate." ) parser.add_argument( "--weight_decay", type=float, default=1e-5, help="Set the weight decay"