mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 04:14:38 +08:00
Make attention faster for a some models (#574)
* make attention faster for a couple models * remove unused generation flags * add comment on lora * include text files as well
This commit is contained in:
@@ -61,19 +61,6 @@ def build_parser():
|
||||
"--model",
|
||||
help="The path to the local model directory or Hugging Face repo.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-tokens",
|
||||
"-m",
|
||||
type=int,
|
||||
help="The maximum number of tokens to generate",
|
||||
)
|
||||
parser.add_argument("--temp", type=float, help="The sampling temperature")
|
||||
parser.add_argument(
|
||||
"--prompt",
|
||||
"-p",
|
||||
type=str,
|
||||
help="The prompt for generation",
|
||||
)
|
||||
|
||||
# Training args
|
||||
parser.add_argument(
|
||||
|
Reference in New Issue
Block a user