Make attention faster for a some models (#574)

* make attention faster for a couple models

* remove unused generation flags

* add comment on lora

* include text files as well
This commit is contained in:
Awni Hannun
2024-03-14 21:35:54 -07:00
committed by GitHub
parent 3f3741d229
commit e4b19bb9e1
6 changed files with 35 additions and 56 deletions

View File

@@ -61,19 +61,6 @@ def build_parser():
"--model",
help="The path to the local model directory or Hugging Face repo.",
)
parser.add_argument(
"--max-tokens",
"-m",
type=int,
help="The maximum number of tokens to generate",
)
parser.add_argument("--temp", type=float, help="The sampling temperature")
parser.add_argument(
"--prompt",
"-p",
type=str,
help="The prompt for generation",
)
# Training args
parser.add_argument(