Some improvements to LoRA (#528)

* set cache_limit

* remove set cache_limit

* cleanup

* add gradient checkpointing

* fix sort

* mokey patch call for checkpoint

* fix example config
This commit is contained in:
Awni Hannun
2024-03-12 20:02:03 -07:00
committed by GitHub
parent e56d9015ef
commit 39084e81c2
4 changed files with 68 additions and 25 deletions

View File

@@ -1,6 +1,5 @@
# The path to the local model directory or Hugging Face repo.
model: "mlx_model"
# Whether or not to train (boolean)
train: true
@@ -49,6 +48,9 @@ test_batches: 500
# Maximum sequence length.
max_seq_length: 2048
# Use gradient checkpointing to reduce memory use.
grad_checkpoint: false
# LoRA parameters can only be specified in a config file
lora_parameters:
# The layer keys to apply LoRA to.