Merge 8bd8121713 into 4b2a0df237

2025-08-28 16:16:27 +08:00 · 2025-08-25 09:31:14 -07:00 · 2025-08-25 09:31:14 -07:00 · 95036ccf9b
commit 95036ccf9b
parent 4b2a0df237 8bd8121713
1 changed files with 36 additions and 400 deletions
--- a/wwdc25/Explore_language_models_on_Apple_silicon_with_MLX.ipynb
+++ b/wwdc25/Explore_language_models_on_Apple_silicon_with_MLX.ipynb
@ -10,7 +10,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "a9f4b67f",
   "metadata": {},
   "outputs": [],
@ -31,7 +31,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "id": "5a45bf5a",
   "metadata": {},
   "outputs": [],
@ -58,30 +58,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "id": "51bd2ed4",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fetching 7 files: 100%|███████████████████████| 7/7 [00:00<00:00, 120328.39it/s]\n",
-      "==========\n",
-      "Here's a simple implementation of the QuickSort algorithm in Swift. This version uses Swift's built-in `swapAt()` function to swap elements in an array.\n",
-      "\n",
-      "```swift\n",
-      "func quickSort(_ array: inout [Int], _ low: Int, _ high: Int) {\n",
-      "    if low < high {\n",
-      "        let pivotIndex = partition(array, low, high)\n",
-      "        quickSort(&array, low, pivot\n",
-      "==========\n",
-      "Prompt: 12 tokens, 78.111 tokens-per-sec\n",
-      "Generation: 100 tokens, 32.263 tokens-per-sec\n",
-      "Peak memory: 4.138 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "!mlx_lm.generate --model \"mlx-community/Mistral-7B-Instruct-v0.3-4bit\" \\\n",
    "                 --prompt \"Write a quick sort in Swift\""
@ -97,60 +77,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "id": "f7add212",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fetching 7 files: 100%|███████████████████████| 7/7 [00:00<00:00, 100205.22it/s]\n",
-      "==========\n",
-      "Here's a simple implementation of the QuickSort algorithm in Swift. This version uses Swift's built-in `swapAt()` function to swap elements in an array.\n",
-      "\n",
-      "```swift\n",
-      "func quickSort(_ array: inout [Int], _ low: Int, _ high: Int) {\n",
-      "    if low < high {\n",
-      "        let pivotIndex = partition(array, low, high)\n",
-      "        quickSort(&array, low, pivotIndex - 1)\n",
-      "        quickSort(&array, pivotIndex + 1, high)\n",
-      "    }\n",
-      "}\n",
-      "\n",
-      "func partition(_ array: inout [Int], _ low: Int, _ high: Int) -> Int {\n",
-      "    let pivot = array[high]\n",
-      "    var i = low\n",
-      "    for j in low..<high {\n",
-      "        if array[j] < pivot {\n",
-      "            swapAt(&array, i, j)\n",
-      "            i += 1\n",
-      "        }\n",
-      "    }\n",
-      "    swapAt(&array, i, high)\n",
-      "    return i\n",
-      "}\n",
-      "\n",
-      "func swapAt(_ array: inout [Int], _ i: Int, _ j: Int) {\n",
-      "    let temp = array[i]\n",
-      "    array[i] = array[j]\n",
-      "    array[j] = temp\n",
-      "}\n",
-      "\n",
-      "// Example usage:\n",
-      "var arr = [3,6,8,5,2,1,9,7,4]\n",
-      "quickSort(&arr, 0, arr.count - 1)\n",
-      "print(arr) // Output: [1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
-      "```\n",
-      "\n",
-      "This code sorts an array of integers in ascending order using the QuickSort algorithm. The `quickSort` function takes an array, a starting index, and an ending index, and recursively sorts the subarrays on either side of the pivot element. The `partition` function finds the pivot index, and the `swapAt` function swaps two elements at given indices.\n",
-      "==========\n",
-      "Prompt: 12 tokens, 79.511 tokens-per-sec\n",
-      "Generation: 448 tokens, 31.514 tokens-per-sec\n",
-      "Peak memory: 4.184 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "!mlx_lm.generate --model \"mlx-community/Mistral-7B-Instruct-v0.3-4bit\" \\\n",
    "                 --prompt \"Write a quick sort in Swift\" \\\n",
@ -170,62 +100,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "id": "e042a321",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cf86d861ac5e4879a194bfbc3f0e908d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "==========\n",
-      "Here's a simple implementation of the QuickSort algorithm in Swift. This version uses Swift's built-in `swapAt()` function to swap elements in an array.\n",
-      "\n",
-      "```swift\n",
-      "func quickSort(_ array: inout [Int], _ low: Int, _ high: Int) {\n",
-      "    if low < high {\n",
-      "        let pivotIndex = partition(array, low, high)\n",
-      "        quickSort(&array, low, pivotIndex - 1)\n",
-      "        quickSort(&array, pivotIndex + 1, high)\n",
-      "    }\n",
-      "}\n",
-      "\n",
-      "func partition(_ array: inout [Int], _ low: Int, _ high: Int) -> Int {\n",
-      "    let pivot = array[high]\n",
-      "    var i = low\n",
-      "    for j in low..<high {\n",
-      "        if array[j] < pivot {\n",
-      "            swapAt(&array, i, j)\n",
-      "            i += 1\n",
-      "        }\n",
-      "    }\n",
-      "    swapAt(&array, i, high)\n",
-      "    return i\n",
-      "}\n",
-      "\n",
-      "func swapAt(_ array: inout [Int], _ i: Int, _ j: Int) {\n",
-      "    let temp\n",
-      "==========\n",
-      "Prompt: 12 tokens, 78.600 tokens-per-sec\n",
-      "Generation: 256 tokens, 31.893 tokens-per-sec\n",
-      "Peak memory: 4.184 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Using MLX LM from Python\n",
    "\n",
@ -255,25 +133,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "id": "629dfa50",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6602e0adecbe4b58ba99e514fc9c9032",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
   "source": [
    "from mlx_lm import load, generate\n",
    "\n",
@ -302,24 +165,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
   "id": "a3b56bdc",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Attention(\n",
-      "  (q_proj): QuantizedLinear(input_dims=4096, output_dims=4096, bias=False, group_size=64, bits=4)\n",
-      "  (k_proj): QuantizedLinear(input_dims=4096, output_dims=1024, bias=False, group_size=64, bits=4)\n",
-      "  (v_proj): QuantizedLinear(input_dims=4096, output_dims=1024, bias=False, group_size=64, bits=4)\n",
-      "  (o_proj): QuantizedLinear(input_dims=4096, output_dims=4096, bias=False, group_size=64, bits=4)\n",
-      "  (rope): RoPE(128, traditional=False)\n",
-      ")\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print(model.layers[0].self_attn)"
   ]
@ -334,62 +183,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
   "id": "775fd3f3",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ed3f0f097da64b379819f577a29dc9f6",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "==========\n",
-      "Here's a simple implementation of the QuickSort algorithm in Swift. This version uses Swift's built-in `swapAt()` function to swap elements in an array.\n",
-      "\n",
-      "```swift\n",
-      "func quickSort(_ array: inout [Int], _ low: Int, _ high: Int) {\n",
-      "    if low < high {\n",
-      "        let pivotIndex = partition(array, low, high)\n",
-      "        quickSort(&array, low, pivotIndex - 1)\n",
-      "        quickSort(&array, pivotIndex + 1, high)\n",
-      "    }\n",
-      "}\n",
-      "\n",
-      "func partition(_ array: inout [Int], _ low: Int, _ high: Int) -> Int {\n",
-      "    let pivot = array[high]\n",
-      "    var i = low\n",
-      "    for j in low..<high {\n",
-      "        if array[j] < pivot {\n",
-      "            swapAt(&array, i, j)\n",
-      "            i += 1\n",
-      "        }\n",
-      "    }\n",
-      "    swapAt(&array, i, high)\n",
-      "    return i\n",
-      "}\n",
-      "\n",
-      "func swapAt(_ array: inout [Int], _ i: Int, _ j: Int) {\n",
-      "    let temp\n",
-      "==========\n",
-      "Prompt: 12 tokens, 76.085 tokens-per-sec\n",
-      "Generation: 256 tokens, 31.792 tokens-per-sec\n",
-      "Peak memory: 8.155 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from mlx_lm import load, generate\n",
    "from mlx_lm.models.cache import make_prompt_cache\n",
@ -420,30 +217,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
   "id": "0d669073",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "==========\n",
-      "Imagine you have a big box full of toys. You want to sort them so that all the red toys are together, all the blue toys are together, and all the green toys are together.\n",
-      "\n",
-      "1. First, you pick one toy (this is your pivot toy).\n",
-      "2. Then, you look at all the other toys one by one. If a toy is not red, you move it to the left if it's not red, and if it's blue, you move it to the right. You keep doing this until you have looked at all the toys.\n",
-      "3. Now, you have a group of toys on the left that are red or blue, and a group of toys on the right that are green or blue. You swap the pivot toy with one of the toys in the group on the left or right, depending on whether you want red toys on the left or right.\n",
-      "4. Now, you repeat the same process with the group of toys on the left and the group of toys on the right, until all the toys are sorted!\n",
-      "\n",
-      "This is a quick way to sort a big box of toys, and it's called QuickSort!\n",
-      "==========\n",
-      "Prompt: 16 tokens, 116.542 tokens-per-sec\n",
-      "Generation: 245 tokens, 29.632 tokens-per-sec\n",
-      "Peak memory: 8.155 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "prompt = \"how can I explain it to a five year old?\"\n",
    "messages = [{\"role\": \"user\", \"content\": prompt}]\n",
@ -468,22 +245,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
   "id": "f8218994",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[INFO] Loading\n",
-      "Fetching 9 files: 100%|███████████████████████| 9/9 [00:00<00:00, 161319.38it/s]\n",
-      "[INFO] Using dtype: float16\n",
-      "[INFO] Quantizing\n",
-      "[INFO] Quantized model with 4.500 bits per weight.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import os\n",
    "mlx_path=\"./mistral-7b-v0.3-4bit\"\n",
@ -496,24 +261,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
   "id": "d4e62b96",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Size of original bfloat16 model\n",
-      "===============================\n",
-      "3.8174 GB\n",
-      "\n",
-      "Size of quantized model\n",
-      "===============================\n",
-      "13.5049 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "import subprocess\n",
    "\n",
@ -523,12 +274,13 @@
    "    size_gb = size_mb / 1024\n",
    "    return size_gb\n",
    "\n",
-    "directory_path = './mistral-7b-v0.3-4bit'\n",
+    "\n",
+    "directory_path = os.path.expanduser('~/.cache/huggingface/hub/models--mlx-community--Mistral-7B-Instruct-v0.3')\n",
    "print(\"Size of original bfloat16 model\")\n",
    "print(\"===============================\")\n",
    "print(f\"{get_directory_size_mb(directory_path):2.4f} GB\")\n",
    "print()\n",
-    "directory_path = os.path.expanduser('~/.cache/huggingface/hub/models--mlx-community--Mistral-7B-Instruct-v0.3')\n",
+    "directory_path = './mistral-7b-v0.3-4bit'\n",
    "print(\"Size of quantized model\")\n",
    "print(\"===============================\")\n",
    "print(f\"{get_directory_size_mb(directory_path):2.4f} GB\")"
@ -544,79 +296,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
   "id": "9d2cd325",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[INFO] Loading\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "15c0aac6b06b4541ab1d5d20f5c5a255",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ba81f7892189428cadcfed19173a0731",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "consolidated.safetensors:  42%|####1     | 10.5G/25.0G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[INFO] Using dtype: bfloat16\n",
-      "[INFO] Quantizing\n",
-      "[INFO] Quantized model with 4.574 bits per weight.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "60442f8dcab848ef9771a8e2b5516a13",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "README.md:   0%|          | 0.00/7.82k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Size of mixed 4-6-bit quantized model\n",
-      "============================\n",
-      "3.8799 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Model quantization with MLX LM in Python\n",
    "\n",
@ -658,23 +341,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
   "id": "5efb794d",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "==========\n",
-      "The latest Super Bowl, Super Bowl LV (55), was played on February 7, 2021, between the Kansas City Chiefs and the Tampa Bay Buccaneers. The Tampa Bay Buccaneers, led by quarterback Tom Brady, won the game, making it his seventh Super Bowl victory. This made Tom Brady the most successful quarterback in Super Bowl history.\n",
-      "==========\n",
-      "Prompt: 11 tokens, 8.131 tokens-per-sec\n",
-      "Generation: 87 tokens, 31.385 tokens-per-sec\n",
-      "Peak memory: 4.137 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "!mlx_lm.generate --model \"./mistral-7b-v0.3-4bit\" \\\n",
    "    --prompt \"Who played in the latest super bowl?\""
@ -690,7 +360,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
   "id": "b4c31126",
   "metadata": {},
   "outputs": [],
@ -713,23 +383,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
   "id": "7dcf9874",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "==========\n",
-      "In the latest Super Bowl, the Philadelphia Eagles soared to victory, claiming their championship title with a resounding 40-22 win over the Kansas City Chiefs. The Eagles' triumphant flight was led by their fearless leader, Jalen Hurts, who not only secured his place in the annals of Super Bowl history but also etched his name into the hearts of Eagles fans everywhere. This wasn't just any Super Bowl; it was Super Bowl\n",
-      "==========\n",
-      "Prompt: 11 tokens, 28.533 tokens-per-sec\n",
-      "Generation: 100 tokens, 30.986 tokens-per-sec\n",
-      "Peak memory: 4.151 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "!mlx_lm.generate --model \"./mistral-7b-v0.3-4bit\" \\\n",
    "                 --prompt \"Who played in the latest super bowl?\" \\\n",
@ -746,18 +403,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
   "id": "8935f7b6",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading pretrained model\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "!mlx_lm.fuse --model \"./mistral-7b-v0.3-4bit\" \\\n",
    "            --adapter-path \"adapters\" \\\n",
@ -774,23 +423,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
   "id": "343a8977",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "==========\n",
-      "The latest Super Bowl, Super Bowl LIX, was played between the Philadelphia Eagles and the Kansas City Chiefs. The Philadelphia Eagles emerged victorious, with Jalen Hurts leading the charge for the Eagles.\n",
-      "==========\n",
-      "Prompt: 11 tokens, 11.760 tokens-per-sec\n",
-      "Generation: 46 tokens, 32.194 tokens-per-sec\n",
-      "Peak memory: 4.137 GB\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "!mlx_lm.generate --model \"./fused-mistral-7b-v0.3-4bit\" \\\n",
    "                 --prompt \"Who played in the latest super bowl?\" \\\n",
@ -800,7 +436,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "mlx",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@ -814,7 +450,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
+   "version": "3.9.17"
  }
 },
 "nbformat": 4,