diff --git a/llms/mlx_lm/lora.py b/llms/mlx_lm/lora.py index fdc400c6..b68c99ab 100644 --- a/llms/mlx_lm/lora.py +++ b/llms/mlx_lm/lora.py @@ -324,7 +324,7 @@ def evaluate_model(args, model: nn.Module, tokenizer: TokenizerWrapper, test_set delta=args.delta, loss_type=args.dpo_loss_type, ) - print(f"Test loss {test_loss:.8f}, Rewards: {test_rewards[0]:.8f}, {test_rewards[1]:.8f}") + print(f"Test loss {test_loss:.3f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}") else: test_loss = evaluate( model=model, diff --git a/llms/mlx_lm/tuner/dpo_trainer.py b/llms/mlx_lm/tuner/dpo_trainer.py index 8979ec0d..2f4a74b6 100644 --- a/llms/mlx_lm/tuner/dpo_trainer.py +++ b/llms/mlx_lm/tuner/dpo_trainer.py @@ -327,7 +327,7 @@ def train_dpo( if rank == 0: print( f"Iter {it}: " - f"Val loss {val_loss:.8f}, " + f"Val loss {val_loss:.3f}, " f"Val chosen reward {val_rewards[0]:.3f}, " f"Val rejected reward {val_rewards[1]:.3f}, " f"Val accuracy {val_metrics['accuracies']:.3f}, " @@ -375,7 +375,7 @@ def train_dpo( if rank == 0: print( - f"Iter {it}: Train loss {train_loss:.8f}, " + f"Iter {it}: Train loss {train_loss:.3f}, " f"Chosen reward {train_rewards[0]:.3f}, " f"Rejected reward {train_rewards[1]:.3f}, " f"Accuracy {avg_metrics['accuracies']:.3f}, "