From 8ec802f468c1dcbc1f3c7556aa572373605d12db Mon Sep 17 00:00:00 2001
From: Chime Ogbuji <chimezie@gmail.com>
Date: Sun, 3 Nov 2024 19:41:09 -0500
Subject: [PATCH] Updates to LoRA documentation

---
 llms/mlx_lm/LORA.md | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/llms/mlx_lm/LORA.md b/llms/mlx_lm/LORA.md
index 4714c282..d553cecc 100644
--- a/llms/mlx_lm/LORA.md
+++ b/llms/mlx_lm/LORA.md
@@ -283,7 +283,7 @@ it on the command line. For example, pass `--data mlx-community/wikisql` to
 train on the pre-formatted WikiwSQL data.
 
 Otherwise, provide a mapping of keys in the dataset to the features MLX LM
-expects. Use a YAML config to specify the Hugging Face dataset arguments. For
+expects. Use a YAML config to specify the Hugging Face (HF)  dataset arguments. For
 example:
 
 ```yaml
@@ -295,11 +295,30 @@ hf_dataset:
 
 - Use `prompt_feature` and `completion_feature` to specify keys for a
   `completions` dataset. Use `text_feature` to specify the key for a `text`
-  dataset. 
+  dataset. Use `chat_feature` to specify the key for a chat dataset.
 
 - To specify the train, valid, or test splits, set the corresponding
   `{train,valid,test}_split` argument. 
 
+You can specify a list of HF datasets using the `hf_datasets` (plural) configuration, which is a list of records
+each with the same structure as above.  For example:
+
+```yaml
+hf_datasets: [
+  "hf_dataset":
+    name: "Open-Orca/OpenOrca"
+    train_split: "train[:90%]"
+    valid_split: "train[-10%:]"
+    prompt_feature: "question"
+    completion_feature: "response",
+  "hf_dataset":
+    name: "trl-lib/ultrafeedback_binarized"
+    train_split: "train[:90%]"
+    valid_split: "train[-10%:]"
+    chat_feature: "chosen"
+]
+```
+
 - Arguments specified in `config` will be passed as keyword arguments to
   [`datasets.load_dataset`](https://huggingface.co/docs/datasets/v2.20.0/en/package_reference/loading_methods#datasets.load_dataset).