freeze ref model

This commit is contained in:
Goekdeniz-Guelmez 2025-02-09 15:30:51 +01:00
parent 9ba6146a76
commit 39e9469059

View File

@ -295,13 +295,12 @@ def train_model(
if args.reference_model_path: if args.reference_model_path:
reference_model, _ = load(args.reference_model_path) reference_model, _ = load(args.reference_model_path)
reference_model = reference_model.freeze()
else: else:
reference_model, _ = load(args.model) reference_model, _ = load(args.model)
train_grpo( train_grpo(
model=model, model=model,
ref_model=reference_model, ref_model=reference_model.freeze(),
tokenizer=tokenizer, tokenizer=tokenizer,
optimizer=opt, optimizer=opt,
train_dataset=train_set, train_dataset=train_set,
@ -340,11 +339,11 @@ def evaluate_model(args, model: nn.Module, tokenizer: TokenizerWrapper, test_set
if args.reference_model_path: if args.reference_model_path:
reference_model, _ = load(args.reference_model_path) reference_model, _ = load(args.reference_model_path)
else: else:
reference_model = model reference_model, _ = load(args.model)
test_loss, _, test_rewards = evaluate_grpo( test_loss, _, test_rewards = evaluate_grpo(
model=model, model=model,
ref_model=reference_model, ref_model=reference_model.freeze(),
dataset=test_set, dataset=test_set,
tokenizer=tokenizer, tokenizer=tokenizer,
batch_size=args.batch_size, batch_size=args.batch_size,