Files

164 lines
115 KiB
Plaintext
Raw Permalink Normal View History

2025-12-17 10:53:43 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAGzCAYAAAA8I13DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAACxh0lEQVR4nOzdd1gU19cH8O/SewcBRUBQBFTAjg0VFSyIxlhQFERNYje22LH3EmPsCmjsGgua2AOKil1QAyI2sIANKdLLef/wZX6uC7iLIEjO53nmedw7d+6cWXdnDzP33hEREYExxhhjrAqQq+gAGGOMMcbKCic2jDHGGKsyOLFhjDHGWJXBiQ1jjDHGqgxObBhjjDFWZXBiwxhjjLEqgxMbxhhjjFUZnNgwxhhjrMrgxIYxxhhjVQYnNv8Bbdu2Rdu2bSs6jAoVGxuLTp06QVtbGyKRCIcPH67okBhjpWRhYQFfX99y3UdoaChEIhFCQ0PLdT+s7HFiUwnduXMH33//PczNzaGiooLq1aujY8eOWLNmTUWH9s3y8fHBnTt3sGDBAvzxxx9o3LhxRYdU5bx//x7+/v5wd3eHnp4eRCIRgoKCiq0fHR0Nd3d3aGhoQE9PDwMHDsTr168l6hUUFGDp0qWwtLSEiooKGjRogN27d5c6zlOnTmHIkCGoV68e5OXlYWFhUWxdWfYt7fFI41t5LxmrlIhVKhcvXiQlJSWytramefPm0ebNm2nWrFnUqVMnsrKyKlWbLi4u5OLiUraBfkMyMjIIAE2fPr2iQ6nSHj9+TACoZs2a1LZtWwJAgYGBRdZ9+vQpGRgYkJWVFa1evZoWLFhAurq65ODgQNnZ2WJ1p0yZQgBo2LBhtGnTJuratSsBoN27d5cqTh8fH1JRUaEWLVpQjRo1yNzcvNi60u5bluORxrfyXlaUrKwsysnJKdd9hISEEAAKCQkp1/2wsseJTSXTpUsXMjQ0pHfv3kmse/nyZanarIjEJjc3t1Qn9LKUmZlJ+fn5FBcXRwBo2bJlZdb2+/fvy6ytqiIrK4sSEhKIiOjatWsl/hgPHz6cVFVVKS4uTig7ffo0AaCNGzcKZc+ePSNFRUUaOXKkUFZQUECtW7emGjVqUF5ensxxPn/+XPhR7Nq1a7GJjSz7lvZ4pPWtvJdV2beW2OTn51NmZmZFh1Ep8K2oSubhw4ewt7eHjo6OxDojIyOx13l5eZg3bx6srKygrKwMCwsLTJs2DdnZ2cW2//LlSygoKGDOnDkS62JiYiASifD7778LZcnJyRg3bhzMzMygrKwMa2trLFmyBAUFBUKdJ0+eQCQSYfny5fj111+FeKKiooqNQyQSYdSoUdi5cydsbGygoqKCRo0a4fz58xJ1nz9/Dj8/P1SrVg3Kysqwt7dHQECAWJ3C++F79uzBjBkzUL16daipqWH8+PEwNzcHAEyaNAkikUjs1sOtW7fQuXNnaGlpQUNDA66urrh8+bJY20FBQRCJRDh37hxGjBgBIyMj1KhRA8CH/kv16tXD7du34eLiAjU1NVhbW+PAgQMAgHPnzqFZs2ZQVVWFjY0Nzpw5I9Z2XFwcRowYARsbG6iqqkJfXx+9e/fGkydPiozh4sWLGD9+PAwNDaGuro6ePXsWecvh+PHjcHFxgaamJrS0tNCkSRPs2rVLrM6VK1fg7u4ObW1tqKmpwcXFBRcvXpRo6969e4iPj5co/5SysjKMjY0/Ww8A/vzzT3Tr1g01a9YUyjp06IA6depg3759QtmRI0eQm5uLESNGCGUikQjDhw/Hs2fPEB4eDgD4559/ICcnh1mzZontZ9euXRCJRFi/fr1QZmpqCkVFxc/GKO2+pT0eIkK7du1gaGiIV69eCfVycnJQv359WFlZIT09HUDFvpcl+eeff9C6dWuoq6tDR0cHnp6eiI6OFqsze/ZsiEQiPHjwAL6+vtDR0YG2tjYGDx6MjIyMz+4jNjYWvXr1grGxMVRUVFCjRg3069cPKSkpQp1P+9jI8v0oKCjA7NmzYWpqCjU1NbRr1w5RUVFS99uR9ntTlKysLMyePRt16tSBiooKTExM8N133+Hhw4dCnfT0dEyYMEE459rY2GD58uUgIrG2Pj6H2tvbQ1lZGSdOnAAg3TmzSqvozIqJ69SpE2lqatKdO3c+W9fHx4cA0Pfff09r166lQYMGEQDq0aOHWL1Pr9i0b9+e7OzsJNqbM2cOycvLU2JiIhERpaenU4MGDUhfX5+mTZtGGzZsoEGDBpFIJKKxY8cK2xVeNrezs6NatWrR4sWLadWqVWJ/QX4KANWrV48MDAxo7ty5tGTJEjI3NydVVVWxY09MTKQaNWqQmZkZzZ07l9avX0/du3cnALRq1SqhXuFfV3Z2duTo6EgrV66kRYsWUWRkJK1atYoAkJeXF/3xxx906NAhIiK6e/cuqaurk4mJCc2bN48WL15MlpaWpKysTJcvXxbaDgwMFNp2cXGhNWvW0OLFi4X31tTUlMzMzGjSpEm0Zs0asrOzI3l5edqzZw8ZGxvT7Nmz6ddff6Xq1auTtrY2paamCm3v37+fHBwcaNasWbRp0yaaNm0a6erqkrm5OaWnp0vE4OTkRO3bt6c1a9bQhAkTSF5envr06SP23gYGBpJIJKJ69erRggULaO3atTR06FAaOHCgUOfs2bOkpKREzs7OtGLFClq1ahU1aNCAlJSU6MqVKxL/V7Je8SvpKsOzZ88IAC1ZskRinbe3N+np6Qmvhw4dSurq6lRQUCBW78GDBwSAfvvtN6Fs5MiRpKCgQDdu3CAiohcvXpCenh516NBBYvtCJV2xkXbfshzPo0ePSENDg3r27CmUTZkyhUQiEZ07d67IOCrivSzK6dOnSUFBgerUqUNLly6lOXPmkIGBAenq6tLjx4+Fev7+/sJn9bvvvqN169bR0KFDCQBNnjy5xH1kZ2eTpaUlmZqa0vz582nLli00Z84catKkCT158kSoZ25uTj4+PsJrWb4fkydPJgDk4eFBv//+Ow0bNoxq1KhBBgYGYm0WdcVGlu/Np/Ly8sjV1ZUAUL9+/ej333+nRYsWUfv27enw4cNE9OEKWvv27UkkEtHQoUPp999/Jw8PDwJA48aNE2sPANna2pKhoSHNmTOH1q5dS7du3ZL6nFmVcWJTyZw6dYrk5eVJXl6enJ2dafLkyXTy5EmJ+8kREREEgIYOHSpWPnHiRAJA//zzj1D2aWKzceNGAiCRPNnZ2VH79u2F1/PmzSN1dXW6f/++WL0pU6aQvLw8xcfHE9H/EhstLS169eqVVMcJgADQ9evXhbK4uDhSUVERO+kPGTKETExM6M2bN2Lb9+vXj7S1tSkjI4OI/ncSqlWrllBWqDC+T29F9ejRg5SUlOjhw4dC2YsXL0hTU5PatGkjlBWeNFu1aiVxud7FxYUA0K5du4Sye/fuEQCSk5MTS5BOnjwp8QP1aaxEROHh4QSAtm/fLhHDpz/SP//8M8nLy1NycjIRESUnJ5OmpiY1a9ZM4rJ04XYFBQVUu3ZtcnNzE2srIyODLC0tqWPHjmLblXViU7ju4+MrNGnSJAJAWVlZRPQh8ahVq5ZEvfT0dAJAU6ZMESuztrYme3t7ysrKoq5du5KWllaJCXZJiY20+5bleIj+9/3bsWMHXb58meTl5SV+tD5WEe9lURwdHcnIyIjevn0rlEVGRpKcnBwNGjRIKCtMbPz8/MS279mzJ+nr65e4j1u3bhEA2r9/f4n1iktsPvf9SExMJAUFBYk//mbPnk0ASkxsZP3efCogIIAA0MqVKyXWFbZ3+PBhAkDz588XW//999+TSCSiBw8eCGWF55h///1XrK6058yqjG9FVTIdO3ZEeHg4unfvjsjISCxduhRubm6oXr06goODhXp///03AGD8+PFi20+YMAEA8NdffxW7j++++w4KCgrYu3evUHb37l1ERUWhb9++Qtn+/fvRunVr6Orq4s2bN8LSoUMH5OfnS9w26tWrFwwNDaU+VmdnZzRq1Eh4XbNmTXh6euLkyZPIz88HEeHPP/+Eh4cHiEgsBjc3N6SkpODmzZt
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# notebook for plotting the effect of SIMD on the solver performance\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"# data is the fastest time among more than 10 iteration\n",
"# grid size is 100 x 100 x 100\n",
"\n",
"\n",
"t_local_no_simd=0.955768\n",
"t_local_avx=0.281595\n",
"t_fugaku_no_simd=5.10849\n",
"t_fugaku_avx=2.58907\n",
"t_local_cuda=0.058076\n",
"\n",
"speedup_local = t_local_no_simd / t_local_avx\n",
"speedup_fugaku = t_fugaku_no_simd / t_fugaku_avx\n",
"speedup_cuda = t_local_no_simd / t_local_cuda\n",
"\n",
"# plot\n",
"\n",
"\n",
"fig, ax = plt.subplots()\n",
"ax.set_title('Solver performance: 100x100x100 on single core')\n",
"#ax.set_xlabel('Grid size')\n",
"ax.set_ylabel('Elapsed time 1 iteration (best) [s]')\n",
"ax.set_xticks([0, 1, 2])\n",
"ax.set_xticklabels(['intel core i7', 'Fujitsu A64FX', 'NVIDIA GeForce GTX 1070'])\n",
"ax.set_ylim(0, 10)\n",
"ax.bar(0, t_local_no_simd, color='r', label='local (no SIMD)')\n",
"ax.bar(0, t_local_avx, color='b', label='local (AVX+FMA 256bits)')\n",
"ax.bar(1, t_fugaku_no_simd, color='r', label='Fugaku (no SIMD)')\n",
"ax.bar(1, t_fugaku_avx, color='b', label='Fugaku (ARM SVE+FMA 512bits)')\n",
"ax.bar(2, t_local_no_simd, color='r', label='local (no SIMD)')\n",
"ax.bar(2, t_local_cuda, color='b', label='local (CUDA NVIDIA GeForce GTX 1070)')\n",
"ax.legend()\n",
"\n",
"# add text speed up large font\n",
"ax.text(0, 1.4, 'speed up: {:.2f}x'.format(speedup_local), fontsize=15, ha='center')\n",
"ax.text(1, 5.5, 'speed up: {:.2f}x'.format(speedup_fugaku), fontsize=15, ha='center')\n",
"ax.text(2, 1.4, 'speed up: {:.2f}x'.format(speedup_cuda), fontsize=15, ha='center')\n",
"\n",
"plt.show()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnIAAAHWCAYAAADzS2TwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAACYV0lEQVR4nOzdd1gU1/s28HsBKVKlg6JgQYoNjQWNSJSIGFs0FiwgYsdC1FjytffeomKjaOwGo8QkGjWKBLGLFbEERSN2BUEBhfP+4Y95XXcpqyis3p/r2ivZM2fOeWZ2XJ49M3NGJoQQICIiIiK1o1HSARARERHRu2EiR0RERKSmmMgRERERqSkmckRERERqiokcERERkZpiIkdERESkppjIEREREakpJnJEREREaoqJHBEREZGaYiJHEplMhsmTJ5d0GB9FREQEZDIZbty4UWxtnjhxAo0bN4a+vj5kMhni4+OLrW1lrl69ipYtW8LY2BgymQw7d+78oP2VlBs3bkAmkyEiIuK925o8eTJkMtn7B/UOinM73mRvb4/evXsXa5sfkqenJzw9PYtct0aNGh82IPqs9e7dG/b29oXW+1D/fosDE7lidv78eXz33XeoVKkSdHV1Ub58eXz99df46aefSjq0z9LMmTM/SoLz8uVLdO7cGY8fP8aiRYvw888/o1KlSh+0T39/f5w/fx4zZszAzz//jC+++OKD9kcl58iRI5g8eTKePn1a0qEUuzt37mDy5MnF/sPn+fPnWL58OVq2bAkbGxsYGhrCzc0NISEhyMnJUaifm5uLuXPnwsHBAbq6uqhVqxY2b96stO2EhAS0atUKBgYGMDU1Ra9evfDgwYN3ivPEiRMYMmQIXF1doa+vj4oVK6JLly64cuXKe/WtyvYURl325WdLULGJjY0V2traomrVqmLatGlizZo1YuLEiaJly5aiSpUqJR1eoQCISZMmlXQYxUpfX1/4+/srlL969Uq8ePFC5ObmFks/CQkJAoBYs2ZNsbRXmOfPnwsA4n//+99H6a8kJSUlCQAiPDz8vdt6+fKlePHixfsH9Q7eZzvmzZsnAIikpCSFZZmZmSI7O/v9A/xIsrKyRFZWlvT+xIkT+e6XZs2aCVdX13fq5/z580ImkwkvLy8xd+5csXLlSvHtt98KAMLPz0+h/tixYwUA0a9fP7F69WrxzTffCABi8+bNcvVu3bolzM3NRZUqVcSSJUvEjBkzRLly5UTt2rXltquoOnXqJKytrcXQoUPFmjVrxLRp04SVlZXQ19cX58+ff+e+i7o9RaEu+/JdZGdni8zMzELrFef3UHFjIleMWrduLSwsLMSTJ08Ult27d+/jB6Si0p7I5eTkqPxHOL9ErrhFR0cLAGL79u3F1mZ6enq+y27evCkAiHnz5n2U/j60jIyMfJeV5i9QVXyoRE7dfahE7sGDB+LChQsK5QEBAQKAuHr1qlR2+/ZtUaZMGREUFCSV5ebmiqZNm4oKFSqIV69eSeWDBg0Senp64ubNm1LZvn37BACxatUqleOMjY1VSFquXLkidHR0RI8ePeTKi9q3KttTFOqyL1Wh6vddaf4eYiJXjKpXry48PT2LVBeACAoKEhs2bBCOjo5CR0dH1K1bV0RHRyvUvX37tggICBCWlpZCW1tbuLi4iNDQUIV6mZmZYuLEiaJKlSpCW1tbVKhQQfzwww8KvzYyMzNFcHCwMDc3FwYGBqJt27bi1q1bRUrkwsPDlf5BOXjwoAAgDh48KJXlfQmfPHlSuLu7C11dXWFvby9CQkJU3kcuLi5CS0tL/Prrr0KI13/Y3N3dhampqdDV1RV169ZVSKIAKLzykrr8tmP58uXCxcVFaGtrCxsbGzF48GClifmb/P39Ffpp1qyZtPzAgQPiyy+/FGXLlhXGxsaiXbt24tKlS3JtTJo0SQAQFy9eFL6+vsLExETUqVNHaX95dd98VapUSVp++vRp0apVK2FoaCj09fVF8+bNRVxcnFwbedt/6NAhMWjQIGFhYSFMTEzE2bNnBQCxa9cuqe7JkycFAOHm5ibXRqtWrUSDBg2k9zt37hStW7cWNjY2QltbW1SuXFlMnTpV4Y/Gm8dF06ZNhZ6enhg+fLgQQognT54If39/YWRkJIyNjYWfn584c+ZMkb5As7OzxeTJk0XVqlWFjo6OMDU1FU2aNBF//fWXwr57U95xtm3bNuHs7Cx0dXVFo0aNxLlz54QQQqxcuVJUqVJF6OjoiGbNmikcM5UqVVL6Y6FZs2Zyx4GyPwRnz54V/v7+wsHBQejo6AgrKysREBAgHj58qBDz26+8OJT1f/36dfHdd9+JcuXKCT09PdGwYUOxe/duuTp5/2a3bt0qpk+fLsqXLy90dHRE8+bN5f4wK/M+x8mb+yUvhrdfefso71i5ePGi8PT0FHp6esLW1lbMmTOnwPgKEhUVJQCIqKgoqWz58uXSv783bdq0SQAQMTExUpmlpaXo3LmzQruOjo6iRYsWQojXiYunp6cwNzeX+xGflZUlatSoISpXrlxoIlG3bl1Rt25dubKi9K3K9hw4cEDIZDIxYcIEuXobN24UAMSKFSsKjPFj7MuCPHz4UPTs2VMYGhpK3xfx8fEK/878/f2Fvr6+uHbtmvDx8REGBgaiffv20rI3vz+FeL/voZKgpfrJWMpPpUqVEBcXhwsXLhTpAt3o6Ghs3boVw4YNg46ODlasWIFWrVrh+PHj0vr37t1Do0aNIJPJMGTIEFhYWODPP/9EYGAg0tLSEBwcDOD1NQnt2rXDP//8g/79+8PZ2Rnnz5/HokWLcOXKFbnrxPr27YsNGzage/fuaNy4Mf7++2988803H2KX4MmTJ2jdujW6dOkCX19fbNu2DYMGDYK2tjb69OlT6Pp///03tm3bhiFDhsDc3Fy6KHXJkiVo164devTogezsbGzZsgWdO3fG7t27pW35+eef0bdvXzRo0AD9+/cHAFSpUiXfviZPnowpU6bAy8sLgwYNQmJiIkJCQnDixAnExsaiTJkyStcbMGAAypcvj5kzZ2LYsGGoX78+rKysAAD79++Hj48PKleujMmTJ+PFixf46aef0KRJE5w+fVrhItvOnTujWrVqmDlzJoQQSvvr2LEjTExM8P3338PX1xetW7eGgYEBAODixYto2rQpjIyMMHr0aJQpUwarVq2Cp6cnoqOj0bBhQ7m2Bg8eDAsLC0ycOBEZGRmoUaMGTExMcPjwYbRr1w4AEBMTAw0NDZw9exZpaWkwMjJCbm4ujhw5Iu1X4PUNJAYGBhgxYgQMDAzw999/Y+LEiUhLS8O8efPk+n306BF8fHzQrVs39OzZE1ZWVhBCoH379vjnn38wcOBAODs749dff4W/v3++n9nbn9+sWbOkzzwtLQ0nT57E6dOn8fXXXxe4bkxMDKKiohAUFAQAmDVrFtq0aYPRo0djxYoVGDx4MJ48eYK5c+eiT58++Pvvv4sUU2H27duHf//9FwEBAbC2tsbFixexevVqXLx4EUePHoVMJkPHjh1x5coVbN68GYsWLYK5uTkAwMLCQmmb9+7dQ+PGjfH8+XMMGzYMZmZmWLduHdq1a4dffvkF3377rVz92bNnQ0NDA6NGjUJqairmzp2LHj164NixY/nG/T7HyZucnZ0xdepUTJw4Ef3790fTpk0BAI0bN5bqPHnyBK1atULHjh3RpUsX/PLLLxgzZgxq1qwJHx+fou/s/3P37l0AkPYjAJw5cwb6+vpwdnaWq9ugQQNp+Zdffon//vsP9+/fV3o9aoMGDfDHH38AeH3jWFhYGGrVqoWBAwdix44dAIBJkybh4sWLOHToEPT19fONUQiBe/fuwdXVVSorat+qbE/z5s0xePBgzJo1Cx06dEDdunWRkpKCoUOHwsvLCwMHDsw3RuDj7Mv85Obmom3btjh+/DgGDRoEJycn7Nq1K9/vi1evXsHb2xtffvkl5s+fj7Jlyyqt977fQyWiZPPIT8tff/0lNDU1haampnB3dxejR48We/fuVXr9Cv7vl+fJkyelsps3bwpdXV3x7bffSmWBgYHCxsZG7he
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot speed up for 200 200 200 grid\n",
"\n",
"t_local_no_simd=186.571/10\n",
"t_local_avx=25.903/10\n",
"t_gpu=5.45952/10\n",
"\n",
"speedup_local = 1.0\n",
"speedup_avx = t_local_no_simd / t_local_avx\n",
"speedup_cuda = t_local_no_simd / t_gpu\n",
"\n",
"\n",
"fig, ax = plt.subplots()\n",
"\n",
"ax.set_title('Speed up ratio for forward simulation with 200x200x200 grid')\n",
"\n",
"ax.set_ylabel('Speed up ratio')\n",
"ax.set_xticks([0, 1, 2])\n",
"ax.set_xticklabels(['baseline', 'intel core i7 with SIMD', 'NVIDIA GeForce GTX 1070'])\n",
"\n",
"\n",
"#ax.set_ylim(0, 10)\n",
"ax.bar(0, 1, color='r', label='baseline (intel core i7 no SIMD)') \n",
"ax.bar(1, speedup_avx, color='b', label='intel core i7 + \\nmemory relocation\\n + SIMD (AVX+FMA 256bits)')\n",
"ax.bar(2, speedup_cuda, color='g', label='memory relocation \\n+ gpu (CUDA NVIDIA GeForce GTX 1070)')\n",
"ax.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig('speedup_200x200x200.png', dpi=300)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "1472887337ddd1717660a5ad84f7170e87aa3d39bd2f4b7d54c31e0901f516a9"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}