Generation refactor: part 2 (#1099)

* unify with stream_generate

* fixes

* nit

* some cleanup, warnings, tests

* fix test + faster min p + test

* version
This commit is contained in:
Awni Hannun
2024-11-23 11:47:06 -08:00
committed by GitHub
parent 004eb4cc9d
commit 0f135396ae
13 changed files with 184 additions and 197 deletions

View File

@@ -23,14 +23,6 @@ max_tokens = 1_000
# Specify if tokens and timing information will be printed
verbose = True
# Some optional arguments for causal language model generation
generation_args = {
"temp": 0.7,
"repetition_penalty": 1.2,
"repetition_context_size": 20,
"top_p": 0.95,
}
# Generate a response with the specified settings
response = generate(
model=model,
@@ -38,5 +30,4 @@ response = generate(
prompt=prompt,
max_tokens=max_tokens,
verbose=verbose,
**generation_args,
)