mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-27 11:21:32 +08:00
niits
This commit is contained in:
parent
4098c3bd2f
commit
0bb001121e
@ -331,7 +331,7 @@ def evaluate_model(args, model: nn.Module, tokenizer: TokenizerWrapper, test_set
|
|||||||
delta=args.delta,
|
delta=args.delta,
|
||||||
loss_type=args.dpo_loss_type,
|
loss_type=args.dpo_loss_type,
|
||||||
)
|
)
|
||||||
print(f"Test loss {test_loss:.3f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}")
|
print(f"Test loss {test_loss:.8f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}")
|
||||||
elif args.training_mode == "orpo":
|
elif args.training_mode == "orpo":
|
||||||
test_loss, test_rewards = evaluate_orpo(
|
test_loss, test_rewards = evaluate_orpo(
|
||||||
model=model,
|
model=model,
|
||||||
@ -343,7 +343,7 @@ def evaluate_model(args, model: nn.Module, tokenizer: TokenizerWrapper, test_set
|
|||||||
beta=args.beta,
|
beta=args.beta,
|
||||||
reward_scaling=args.reward_scaling,
|
reward_scaling=args.reward_scaling,
|
||||||
)
|
)
|
||||||
print(f"Test loss {test_loss:.3f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}")
|
print(f"Test loss {test_loss:.8f}, Rewards: {test_rewards[0]:.3f}, {test_rewards[1]:.3f}")
|
||||||
else:
|
else:
|
||||||
test_loss = evaluate(
|
test_loss = evaluate(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -312,7 +312,7 @@ def train_orpo(
|
|||||||
if rank == 0:
|
if rank == 0:
|
||||||
print(
|
print(
|
||||||
f"Iter {it}: "
|
f"Iter {it}: "
|
||||||
f"Val loss {val_loss:.3f}, "
|
f"Val loss {val_loss:.8f}, "
|
||||||
f"Val chosen reward {val_rewards[0]:.3f}, "
|
f"Val chosen reward {val_rewards[0]:.3f}, "
|
||||||
f"Val rejected reward {val_rewards[1]:.3f}, "
|
f"Val rejected reward {val_rewards[1]:.3f}, "
|
||||||
f"Val took {val_time:.3f}s",
|
f"Val took {val_time:.3f}s",
|
||||||
@ -353,7 +353,7 @@ def train_orpo(
|
|||||||
|
|
||||||
if rank == 0:
|
if rank == 0:
|
||||||
print(
|
print(
|
||||||
f"Iter {it}: Train loss {train_loss:.3f}, "
|
f"Iter {it}: Train loss {train_loss:.8f}, "
|
||||||
f"Chosen reward {train_rewards[0]:.3f}, "
|
f"Chosen reward {train_rewards[0]:.3f}, "
|
||||||
f"Rejected reward {train_rewards[1]:.3f}, "
|
f"Rejected reward {train_rewards[1]:.3f}, "
|
||||||
f"Learning Rate {learning_rate:.3e}, "
|
f"Learning Rate {learning_rate:.3e}, "
|
||||||
|
Loading…
Reference in New Issue
Block a user