Some improvements to LoRA (#528)

* set cache_limit * remove set cache_limit * cleanup * add gradient checkpointing * fix sort * mokey patch call for checkpoint * fix example config
2025-10-24 06:28:07 +08:00 · 2024-03-12 20:02:03 -07:00
parent e56d9015ef
commit 39084e81c2
4 changed files with 68 additions and 25 deletions
--- a/llms/mlx_lm/examples/lora_config.yaml
+++ b/llms/mlx_lm/examples/lora_config.yaml
@@ -1,6 +1,5 @@
 # The path to the local model directory or Hugging Face repo.
 model: "mlx_model"
-
 # Whether or not to train (boolean)
 train: true

@@ -49,6 +48,9 @@ test_batches: 500
 # Maximum sequence length.
 max_seq_length: 2048

+# Use gradient checkpointing to reduce memory use.
+grad_checkpoint: false
+
 # LoRA parameters can only be specified in a config file
 lora_parameters:
  # The layer keys to apply LoRA to.