From 0bb001121e77a933b517e4368f625b4c1b03cb79 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Wed, 22 Jan 2025 21:39:29 +0100 Subject: [PATCH] niits --- llms/mlx_lm/lora.py | 4 ++-- llms/mlx_lm/tuner/orpo_trainer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llms/mlx_lm/lora.py b/llms/mlx_lm/lora.py index 89f49f54..35c20274 100644 --- a/llms/mlx_lm/lora.py +++ b/llms/mlx_lm/lora.py @@ -331,7 +331,7 @@ def evaluate_model(args, model: nn.Module, tokenizer: TokenizerWrapper, test_set delta=args.delta, loss_type=args.dpo_loss_type, ) - print(f"Test loss {test_loss:.3f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}") + print(f"Test loss {test_loss:.8f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}") elif args.training_mode == "orpo": test_loss, test_rewards = evaluate_orpo( model=model, @@ -343,7 +343,7 @@ def evaluate_model(args, model: nn.Module, tokenizer: TokenizerWrapper, test_set beta=args.beta, reward_scaling=args.reward_scaling, ) - print(f"Test loss {test_loss:.3f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}") + print(f"Test loss {test_loss:.8f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}") else: test_loss = evaluate( model=model, diff --git a/llms/mlx_lm/tuner/orpo_trainer.py b/llms/mlx_lm/tuner/orpo_trainer.py index 4bba813a..cadfb049 100644 --- a/llms/mlx_lm/tuner/orpo_trainer.py +++ b/llms/mlx_lm/tuner/orpo_trainer.py @@ -312,7 +312,7 @@ def train_orpo( if rank == 0: print( f"Iter {it}: " - f"Val loss {val_loss:.3f}, " + f"Val loss {val_loss:.8f}, " f"Val chosen reward {val_rewards[0]:.3f}, " f"Val rejected reward {val_rewards[1]:.3f}, " f"Val took {val_time:.3f}s", @@ -353,7 +353,7 @@ def train_orpo( if rank == 0: print( - f"Iter {it}: Train loss {train_loss:.3f}, " + f"Iter {it}: Train loss {train_loss:.8f}, " f"Chosen reward {train_rewards[0]:.3f}, " f"Rejected reward {train_rewards[1]:.3f}, " f"Learning Rate {learning_rate:.3e}, "