From 0d12aad8fd38c746cf4fc15f2ce152734298a362 Mon Sep 17 00:00:00 2001 From: chenhao <8516202+chenhao_1209@user.noreply.gitee.com> Date: Thu, 22 May 2025 11:54:11 +0800 Subject: [PATCH] add jupyter for slow rank --- .../recipes/cluster_display.py | 23 + .../recipes/p2p_pairing/stats.ipynb | 4068 +++++++++++++++++ .../recipes/slow_rank/slow_rank.py | 190 + 3 files changed, 4281 insertions(+) create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/stats.ipynb create mode 100644 profiler/msprof_analyze/cluster_analyse/recipes/slow_rank/slow_rank.py diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py index 35e7ef63872..7e8913948f7 100644 --- a/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py +++ b/profiler/msprof_analyze/cluster_analyse/recipes/cluster_display.py @@ -192,6 +192,29 @@ def display_graph(figs, x_axis, y_axes, title=None, figs.append(fig) +def display_bar(x_axis, y_axes, title=None, y_index=None): + if isinstance(y_axes, pd.DataFrame): + data = y_axes.set_index(x_axis) + elif isinstance(y_axes, dict): + data = pd.DataFrame(y_axes, index=x_axis) + elif isinstance(y_axes, pd.Series): + data = pd.DataFrame({"": y_axes}, index=x_axis) + elif isinstance(y_axes, np.ndarray): + data = pd.DataFrame({"": pd.Series(y_axes)}, index=x_axis) + else: + return + + fig = data.plot.bar(title=title) + fig.bar_label(fig.containers[0]) + if y_index is not None and y_index in y_axes: + # get index of the top1 + top1_indices = data[y_index].nlargest(1).index + # change the color for the top1 + for i, bar in enumerate(fig.patches): + if data.index[i] in top1_indices: + bar.set_color('#FFA500') # highlight in orange + + def display_stats_per_rank_groups_combobox(rank_stats_gdf): names = list(rank_stats_gdf.groups.keys()) if len(names) > 1: diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/stats.ipynb b/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/stats.ipynb new file mode 100644 index 00000000000..0573ba03bb6 --- /dev/null +++ b/profiler/msprof_analyze/cluster_analyse/recipes/p2p_pairing/stats.ipynb @@ -0,0 +1,4068 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Slow Rank\n", + "集群场景通信算子快慢卡汇总分析\n", + "\n", + "1.根据卡粒度,统计每个Rank上的影响因子\n", + "\n", + "2.将统计的结果按柱状图呈现,TOP影响的极为慢卡候选" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数据准备" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import plotly.offline as pyo\n", + "\n", + "from IPython.display import display, HTML\n", + "\n", + "import cluster_display\n", + "\n", + "display(HTML(\"\"))\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', None)\n", + "pyo.init_notebook_mode()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "展示各Rank受影响程度的统计表" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
slowAffectCount
rankId
010
19
215
\n", + "
" + ], + "text/plain": [ + " slowAffectCount\n", + "rankId \n", + "0 10\n", + "1 9\n", + "2 15" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = pd.read_csv(\"rank_stats.csv\", index_col=\"rankId\")\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAHCCAYAAABPFau9AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjMsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvZiW1igAAAAlwSFlzAAAPYQAAD2EBqD+naQAAKZlJREFUeJzt3QmYzXX///E3JmOJsUSWMEK2se9LokSS6C4iZVJ37qyhLNMvssQgabI0yp1IlroTSfdtQoSsQ6Q7YbLNbWlaNMPI0Mz5X+/PdZ3znzFjGM75nDnnPB/X9b1mzvd853w/Z4z5vub9Wb55HA6HQwAAACzJa+tEAAAAivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQSg0NBQefrppyWQjRs3TvLkySO//vqrt5sCBBzCB+BH9u/fL4899phUqlRJChQoIOXLl5f7779fZs2aJbnNxo0bzcXfueXLl09Kly5t2n/gwAFvNw+ABwV58sUB2LN161Zp166dVKxYUZ577jkpU6aMxMfHy/bt2+Wtt96SwYMHS240ZMgQadKkiVy+fFm+++47mTt3rgkm33//vXkPAPwP4QPwE5MmTZKQkBDZtWuXFCtWLMNzCQkJklvdfffdptrhVL16denfv7988MEHMnLkSK+2DYBn0O0C+ImffvpJateunSl4KO3OuJYjR45I9+7dpUSJElKoUCFp3ry5fPHFF67n9QbYt912mwwfPty1Ly0tzZxPu0z++OMP1/6pU6dKUFCQnD9//obCiPP9pDd9+nRp2bKllCxZUgoWLCiNGjWSTz75JNPXaxfOoEGDZOXKlRIWFibBwcHm+7JmzZprnvv48eNStWpV83U///xzjtsO4PoQPgA/oeM8du/ebborckovtHphj4mJkQEDBpgqysWLF+Xhhx+WFStWuC7qrVq1kk2bNrm+TrtJEhMTzefffPONa//mzZulQYMGcuutt+a4LceOHTMfixcvnmG/dh3pa06YMEEmT55swo2GpfQByWnLli3mffTs2VOmTZtm3sujjz4qv/3221XPq2GnTZs2UqRIEdPtc/vtt+e47QCukwOAX/jyyy8d+fLlM1uLFi0cI0eOdMTExDguXbqU6dhKlSo5wsPDXY+HDh3q0F8Hmzdvdu07d+6co3Llyo7Q0FBHamqq2ff666+b109KSjKPZ86caV6radOmjlGjRpl9emyxYsUcw4YNy7a9GzZsMOecP3++45dffnGcOnXKsWbNGkfVqlUdefLkcezcuTPD8RcuXMjwWN9XWFiY4957782wX18zf/78jri4ONe+ffv2mf2zZs1y7Xv11VfNPj33gQMHHOXKlXM0adLE8fvvv1/jOw3gZlH5APyEzmrZtm2bqVbs27fP/MXfsWNHM+Nl1apV2X7tv//9b2natKm0bt3atU+rFv369TOViB9++MHVJZKammoGtzorHLpPN/1caeVFu2Cc3SfX8swzz0ipUqWkXLly8sADD5hKyqJFi8wg1PS0q8Xp7Nmz5jg9x549ezK9Zvv27aVKlSqux3Xr1pWiRYuarqUraXvvueceM/143bp1mSouANyP8AH4Eb1gf/rpp+bivHPnTomIiJBz586ZAZ3OAHG1sQ460PNKNWvWdD2vGjZsaMaDOIOGM3xod0VsbKzp3nA+lz7IZGfs2LGydu1a073Tp08fEyry5s38q2n16tVmHIpOIdZxKRpYoqOjXd0+6emMnytpqNDvy5W6dOliulq0y0kDCgDPI3wAfih//vwmiOjYCL1A6zTWf/3rXzf9urfccos0a9bMjPuIi4uTM2fOmPChQUPPsWPHDhM+atSoYcLB9ahTp46pVHTr1k0WLlxoKjc6VVinCTvpa+p+DR5vv/22qdRoYHniiSfMQNgr6QDYrGR1rI4F0fEeixcvztH3AsCNI3wAfq5x48bm4+nTp7MdrHrw4MFM+3/88UfX804aNrSqol0UOvtFg4ZWInRGiYYE3bQScqOmTJliKig66NVp+fLlJnhodUK7aTp16mQCizu8/vrr8uyzz5oBqkuWLHHLawLIHuED8BMbNmzI8i97rRKorLpVnB588EETKHTMiFNycrK8++67ZixErVq1MoSPlJQUiYqKMhUPnQXj3K9jNU6dOnXd4z2yomM1tBqxYMECU1lxVjL0PDrexEnHouh02pulr6vvU7umwsPDrzk+BsDNI3wAfkJXMNUL94svvijz5s2TOXPmSO/eveXll182AaJv375X/drRo0ebqaVaUdAxGM5gcfToUZkxY0aGMRgtWrQw01y1UpI+ZGi149ChQ+bzmwkfasSIEa6Aozp37iwXLlwwA1J1BVSdbqvdP7omhzvo+/vwww+lQ4cO0qNHD/nqq6/c8roAskb4APyELsKly6trpUMXAtNNqxnanaBjMbJafMxJg4fOYHHeB0YHquq4kc8//1weeeSRDMcWLlzYrLdx5aBSZ+CoUKFChm6aG+0qatu2rWtA6b333ivvvfeeqYQMHTpUli5dahYyu7JtNzueRRct00GtXbt2Nd8zAJ6RR+fbeui1AQAAMqHyAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrgiSXSUtLMysk6o2enCsnAgCA3E1X7tAbWeodqrO6OWSuDh8aPHSRIgAA4Hv0ppB33HGHb4UPrXg4G8/trQEA8A1JSUmmeOC8jvtU+HB2tWjwIHwAAOBbrmfIBANOAQCAVYQPAABgFeEDAOD3Nm3aJF26dDEzMbRbYOXKlRmef/rpp83+9NsDDzzgtfb6u1w35uN6paamyuXLl73dDAQgvfV6vnz5vN0MADmQnJws9erVk2eeeUb+9re/ZXmMho3333/f9Tg4ONhiCwNLkC/OIz5z5oz88ccf3m4KAlixYsWkTJkyrEUD+IhOnTqZLTsaNvT/NTzP58KHM3iULl1aChUqxC9/WA+/Fy5ckISEBPO4bNmy3m4SADfZuHGjubYUL15c7r33XnnttdekZMmS3m6WXwryta4WZ/DgBwLeUrBgQfNRA4j+LNIFA/g+7XLR7pjKlSvLTz/9JC+//LKplGzbto3/44EePpxjPLTiAXiT82dQfyb5xQT4vp49e7o+r1OnjtStW1eqVKliqiH33XefV9vmj3xytgtdLfA2fgYB/3bnnXfKbbfdJnFxcd5uil/yyfABAIAn/e9//5PffvuNcV0eQvjwMp1b3q1bN6+2QQdQPvroo2Y5e/2LXsfVZLUPAHzV+fPnZe/evWZTR48eNZ+fOHHCPDdixAjZvn27HDt2TNavXy9du3aVqlWrSseOHb3ddL/kU2M+shM6+gur5zs2pbP4kqVLl8qTTz4pzz//vMyZMyfDcwsXLpTNmzfL1q1bTZkxJCRE5s6dm2nfzWrbtq3Ur19foqKiMj23fPlymTVrlnz77bdmYLGWPB977DEZNGiQlChRQmwZN26cWXzI+QsKgH+IjY2Vdu3auR4PHz7cfAwPD5fo6Gj57rvvzO9C/UNLFyLr0KGDTJw4kbU+PMRvwgey995778nIkSPlnXfekTfeeEMKFCjgek5HdtesWVPCwsKy3ecp//d//ydTp06VYcOGyeTJk81//MOHD5sAtGjRInnhhRc83gYA/k3/+NGp8lcTExNjtT2Bjm4XSz755BMzglqnaeo04fbt25sV966UkpIiQ4YMMVM4NSC0bt1adu3a5Xq+cePGMn36dNdj7bLRFTe1bOjsp9RukvSDpLS8qBWM0aNHy1133SWffvpphv+QGkZ06WH9On2c1T5n21566SUpX768FC5cWJo1a2ZGgqf3zTffmON1NojOldeS5dmzZ0330tdffy1vvfWWa+liLW/u3LnTBA493+uvvy4tW7aU0NBQuf/++001RP8qcdK/TnT0ef78+aV69eommDjpa+lrpq9Y6F8wus/ZRv2oj7Wkqt9HbaOe7+DBg+b5BQsWyPjx42Xfvn2uNuo+AIB7Ufmw4PTp09KrVy+ZNm2aPPLII3Lu3DnTpZFVCtfqhF50tfxXqVIl8zV6Adcwod0P99xzj7mIagjQr9fX0dU2t2zZYuap6wVew4H2VTrpcsGdO3c2XSfa9aJVkCeeeMI8p0FEQ8n3339vPtcLu8pqn3aB/PDDD7Js2TJTnVixYoU55/79+6VatWrmwq9T0nT5Yg0ZQUFBsmHDBtONoo8PHTpkKikTJkwwr1eqVCl588035dZbb5UBAwZk+b3T96b0XFoB0S4bDW6rV6+Wvn37yh133JGhlHq9lRYNO3p+7YbS9mpoevzxx817XrNmjaxbt84c647uJiCgJZ8QSfnV263wD8G3iRSuKP6A8GEpfPz1119mARsNFEqrIFfSSoj+da9/bTuXAZ43b56sXbvWBAYdEKVVBf1cL+h6odRgoBdNDSQaBPSjBhSntLQ083o6nsI5l/3FF1801RBdTEcDjVYA9HXSLyt85T4dlKUhRj9q8FAagPRCrfu1eqFBSSsKb7/9tut1ateu7fpcX09fN/15tHtFx3do9SY7Wu3R6okzpGh/rQ4O0/05DR+TJk1yfY80ZGkwu3jxoqlKaRDS0MQSy4Cbgsfn1UXSLnq7Jf4hbwGRLgf9IoDQ7WKB3sxIKwIaOLp3724ChXZFXEnHWeiiVa1atXLt04ty06ZN5cCBA+bx3XffbSonOjBTqxx6EdVA4uxa0H3ObhKlwUVDzYMPPmge6+BR7dKYP39+jt6DVjc08Gi3jV6gnZueT9utnJWPnMiuDzY9ff/pvy9KHzu/Lzmhiwc5OafROZdLB+BGWvEgeLhP2kW/qSJR+bBAV8DUEKDjLr788ktThdDS/44dO3L8WtoNoWFGw4Yu+6tBok2bNqb6od0aWklIX/nQKsnvv//uWhLcWQ3Rkd06viFv3uvLnzqmRN/H7t27M63oqSFEpT/H9dIwo11GGrquVf3IjvN9pA8zV7vrcfrzOBcL0+8JAMAOKh+W6EVO/1LXC75WLbQLQscxpOccTKnjD9JfQHXAaa1atVz7NFzoWAodEKpVDu060Zkp2p2gf8nrBV3pAjmfffaZGaPhnN+um55fKy8ahK5XgwYNTOVDKwQ6niT95uyi0IqCDua8Gn1v+hrp6dgTDTbpu2rSc64vou8v/fdF6WPn90XHbzi7uJxuZLpsVm0EALgXlQ8LtMKhF2WdN66zWPTxL7/8Yi6oWoFw0hkk/fv3N2M7NFBUrFjRjKPQBb+effZZ13EaOLR6ohfcGjVquPbNnj3bdOs46WwQnVnTo0ePTMuBazeMVkV0nMj10EDTu3dv6dOnjxmsqWFE34O+Lw0dOm4iIiLCdC3puAwdyKkXcg1J2ibt7tFZLPredWaKVkv0PeqMGR1kq+NQTp48aQbk6pgSHWCrU211to8ONNXvib4PPa8OOP3888/NYFjnwFCtujRv3lymTJlixrJoSHrllVdy/G+lbXQuPqSDWYsUKcI8fwBwMyofFugqoVql0Au+XsT1oqgXcOeg0vT04qkriz711FPSsGFDcxHW+ec6bdVJx31oN0H67hUNH/oXe/rxHjquQy/mWd2HRM+xatUq+fXX6+8/1IGlGj40KOhUV53mq1UZDUlK35tWU3Sqqo5TadGiham86ABO5wBV7bLRaoUGJx28qnSNjyVLlphgojN7dJCqDijVUOOcaqvn0hkzOsBUn9f1SrQ9V75fHdjbqFEjGTp0qLkddk7p90UDmQ5i1Tbq4mwAAPfK47jeEX+WJCUlmemNiYmJ5qKdns5IcM7SSL9IFmAbP4vAdfh9j8iaRt5uhX95YLdIiYaSG2V3/b4SlQ8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYJVPhg9Wo4S38TMIAAGyyJguWqXLaJ86dcqswaCPs1rDAvAUnZl+6dIls8Ca/iw67/gLAPDT8KG/7HVdBV1CWwMI4C16d15dXO16740DAPDR8KH0L039pa8rWXIPDniDrtKqq7ZSdQOAAAkfSn/p651Jb+YuqAAAwDuoGQMAAKsIHwAAIHeHD707a5cuXcxtz7X7Y+XKlVc9Vm+rrsdERUXdbDsBAECgho/k5GSpV6+ezJkzJ9vjVqxYIdu3bzchBQAA4IYHnHbq1Mls2Tl58qQMHjxYYmJipHPnzjk9BQAA8GNBnlj58amnnpIRI0ZI7dq1r3l8SkqK2ZySkpLc3SQAAODPA06nTp1q1kAYMmTIdR0fGRkpISEhrq1ChQrubhIAAPDX8LF792556623ZMGCBde9AFNERIQkJia6tvj4eHc2CQAA+HP42Lx5syQkJJgVSLX6odvx48flxRdflNDQ0Cy/Jjg4WIoWLZphAwAA/sutYz50rEf79u0z7OvYsaPZ37dvX3eeCgAABEr4OH/+vMTFxbkeHz16VPbu3SslSpQwFY+SJUtmOF6XQC9TpoxUr17dPS0GAACBFT5iY2OlXbt2rsfDhw83H8PDw81YDwAAALeGj7Zt24rD4bju448dO5bTUwAAAD/GvV0AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAAC5O3xs2rRJunTpIuXKlZM8efLIypUrXc9dvnxZRo0aJXXq1JHChQubY/r06SOnTp1yd7sBAECghI/k5GSpV6+ezJkzJ9NzFy5ckD179siYMWPMx08//VQOHjwoDz/8sLvaCwAAfFxQTr+gU6dOZstKSEiIrF27NsO+2bNnS9OmTeXEiRNSsWLFG28pAAAIzPCRU4mJiaZ7plixYlk+n5KSYjanpKQkTzcJAAD464DTixcvmjEgvXr1kqJFi2Z5TGRkpKmYOLcKFSp4skkAAMBfw4cOPu3Ro4c4HA6Jjo6+6nERERGmOuLc4uPjPdUkAADgr90uzuBx/Phx+eqrr65a9VDBwcFmAwAAgSHIU8Hj8OHDsmHDBilZsqS7TwEAAAIpfJw/f17i4uJcj48ePSp79+6VEiVKSNmyZeWxxx4z02xXr14tqampcubMGXOcPp8/f373th4AAPh/+IiNjZV27dq5Hg8fPtx8DA8Pl3HjxsmqVavM4/r162f4Oq2CtG3b9uZbDAAAAit8aIDQQaRXk91zAAAA3NsFAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8/sGnTJunSpYuUK1dO8uTJIytXrszwvMPhkLFjx0rZsmWlYMGC0r59ezl8+LDX2gsACGyEDz+QnJws9erVkzlz5mT5/LRp02TmzJkyd+5c2bFjhxQuXFg6duwoFy9etN5WAACCvN0A3LxOnTqZLSta9YiKipJXXnlFunbtavZ98MEHcvvtt5sKSc+ePS23FgAQ6Kh8+LmjR4/KmTNnTFeLU0hIiDRr1ky2bdvm1bYBAAIT4cPPafBQWulITx87nwMAwCbCBwAAsIrw4efKlCljPv78888Z9utj53MAANhE+PBzlStXNiFj/fr1rn1JSUlm1kuLFi282jYAQGBitosfOH/+vMTFxWUYZLp3714pUaKEVKxYUYYOHSqvvfaaVKtWzYSRMWPGmDVBunXr5tV2AwACU44rHyxolfvExsZKgwYNzKaGDx9uPtd/BzVy5EgZPHiw9OvXT5o0aWLCypo1a6RAgQJebjkAIBDlOHywoFXu07ZtWxP6rtwWLFhgnteQOGHCBDO7Rf8d1q1bJ3fddZe3mw0ACFA57nZhQSsAAJBrBpzeyIJWKSkpZgBk+g0AAPivIG8vaBUZGSnjx48XXxQ6+gtvN8EvHJvS2dtNAAAE0lTbiIgISUxMdG3x8fHebhIAAPCV8HEjC1oFBwdL0aJFM2wAAMB/uTV8sKAVAABw+5gPFrQCAABWw4cuaNWuXTvXY13QSoWHh5t1JXRBK10LRBe0+uOPP6R169YsaAUAAG48fDgXtLoa54JWugEAAOS62S4AACCwED4AAIBVhA8AAGAV4QMAAFhF+ABgxblz58xU/EqVKknBggWlZcuWsmvXLm83C4AXED4AWPH3v/9d1q5dK4sWLZL9+/dLhw4dzE0oT5486e2mAbCM8AHA4/78809Zvny5TJs2Tdq0aSNVq1aVcePGmY/R0dHebh4AywgfADzur7/+ktTU1EyLDWr3y5YtW7zWLgDeQfgA4HFFihQx93eaOHGinDp1ygSRDz/8ULZt2yanT5/2dvMAWEb4AGCFjvXQ1ZHLly9v7mY9c+ZM6dWrl+TNy68hINDwvx6AFVWqVJGvv/7a3JwyPj5edu7cKZcvX5Y777zT200DYBnhA4BVhQsXlrJly8rZs2clJiZGunbt6u0mAcjtN5YDgBuhQUO7XapXry5xcXEyYsQIqVGjhvTt29fbTQNgGZUPAFYkJibKwIEDTeDo06ePtG7d2gSSW265xdtNA2AZlQ8AVvTo0cNsAEDlAwAAWEX4AAAAVtHtAviR0NFfeLsJfuPYlM7ebgLgt6h8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMC3w0dqaqqMGTNGKleuLAULFpQqVarIxIkTxeFwuPtUAADABwW5+wWnTp0q0dHRsnDhQqldu7bExsZK3759JSQkRIYMGeLu0wEAgEAPH1u3bpWuXbtK586dzePQ0FBZunSp7Ny5092nAgAAPsjt3S4tW7aU9evXy6FDh8zjffv2yZYtW6RTp05ZHp+SkiJJSUkZNgAA4L/cXvkYPXq0CRA1atSQfPnymTEgkyZNkt69e2d5fGRkpIwfP97dzQAAAIFS+fj4449l8eLFsmTJEtmzZ48Z+zF9+nTzMSsRERGSmJjo2uLj493dJAAA4M+VjxEjRpjqR8+ePc3jOnXqyPHjx02FIzw8PNPxwcHBZgMAAIHB7ZWPCxcuSN68GV9Wu1/S0tLcfSoAAOCD3F756NKlixnjUbFiRTPV9ttvv5UZM2bIM8884+5TAQAAH+T28DFr1iyzyNiAAQMkISFBypUrJ//4xz9k7Nix7j4VAADwQW4PH0WKFJGoqCizAQAAXIl7uwAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAHw/fJw8eVKefPJJKVmypBQsWFDq1KkjsbGxnjgVAADwMUHufsGzZ89Kq1atpF27dvKf//xHSpUqJYcPH5bixYu7+1QAAMAHuT18TJ06VSpUqCDvv/++a1/lypXdfRoAAOCj3N7tsmrVKmncuLF0795dSpcuLQ0aNJB58+Zd9fiUlBRJSkrKsAEAAP/l9vBx5MgRiY6OlmrVqklMTIz0799fhgwZIgsXLszy+MjISAkJCXFtWjUBAAD+y+3hIy0tTRo2bCiTJ082VY9+/frJc889J3Pnzs3y+IiICElMTHRt8fHx7m4SAADw5/BRtmxZqVWrVoZ9NWvWlBMnTmR5fHBwsBQtWjTDBgAA/Jfbw4fOdDl48GCGfYcOHZJKlSq5+1QAAMAHuT18DBs2TLZv3266XeLi4mTJkiXy7rvvysCBA919KgAA4IPcHj6aNGkiK1askKVLl0pYWJhMnDhRoqKipHfv3u4+FQAA8EFuX+dDPfTQQ2YDAAC4Evd2AQAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAA+Ff4mDJliuTJk0eGDh3q6VMBAIBADx+7du2Sd955R+rWrevJ0wAAAB/isfBx/vx56d27t8ybN0+KFy/uqdMAAAAf47HwMXDgQOncubO0b98+2+NSUlIkKSkpwwYAAPxXkCdedNmyZbJnzx7T7XItkZGRMn78eE80AwAABELlIz4+Xl544QVZvHixFChQ4JrHR0RESGJiomvTrwcAAP7L7ZWP3bt3S0JCgjRs2NC1LzU1VTZt2iSzZ8823Sz58uVzPRccHGw2AAAQGNwePu677z7Zv39/hn19+/aVGjVqyKhRozIEDwAAEHjcHj6KFCkiYWFhGfYVLlxYSpYsmWk/AAAIPKxwCgAAfH+2y5U2btxo4zQAAMAHUPkAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAADg2+EjMjJSmjRpIkWKFJHSpUtLt27d5ODBg+4+DQAA8FFuDx9ff/21DBw4ULZv3y5r166Vy5cvS4cOHSQ5OdndpwIAAD4oyN0vuGbNmgyPFyxYYCogu3fvljZt2rj7dAAAwMd4fMxHYmKi+ViiRAlPnwoAAARi5SO9tLQ0GTp0qLRq1UrCwsKyPCYlJcVsTklJSZ5sEgAA8OfKh479+P7772XZsmXZDlANCQlxbRUqVPBkkwAAgL+Gj0GDBsnq1atlw4YNcscdd1z1uIiICNM149zi4+M91SQAAOCP3S4Oh0MGDx4sK1askI0bN0rlypWzPT44ONhsAAAgMAR5oqtlyZIl8tlnn5m1Ps6cOWP2a5dKwYIF3X06AAAQ6N0u0dHRpvukbdu2UrZsWdf20UcfuftUAADAB3mk2wUAAOBquLcLAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAP8LHnDlzJDQ0VAoUKCDNmjWTnTt3eupUAAAg0MPHRx99JMOHD5dXX31V9uzZI/Xq1ZOOHTtKQkKCJ04HAAACPXzMmDFDnnvuOenbt6/UqlVL5s6dK4UKFZL58+d74nQAACCQw8elS5dk9+7d0r59+/9/krx5zeNt27a5+3QAAMDHBLn7BX/99VdJTU2V22+/PcN+ffzjjz9mOj4lJcVsTomJieZjUlKS5HZpKRe83QS/4Av/1r6Cn0n34efSDZLOi/Aj6f7vaVBSrv4/43A47IePnIqMjJTx48dn2l+hQgWvtAf2hUR5uwVAZvxcIne6R3K7c+fOSUhIiN3wcdttt0m+fPnk559/zrBfH5cpUybT8REREWZwqlNaWpr8/vvvUrJkScmTJ4+7mxdQNIVqiIuPj5eiRYt6uzkAP5PIlfi5dA+teGjwKFeu3DWPdXv4yJ8/vzRq1EjWr18v3bp1cwUKfTxo0KBMxwcHB5stvWLFirm7WQFN/zPxHwq5CT+TyI34ubx516p4eLTbRSsZ4eHh0rhxY2natKlERUVJcnKymf0CAAACm0fCx+OPPy6//PKLjB07Vs6cOSP169eXNWvWZBqECgAAAo/HBpxqF0tW3SywR7uzdKG3K7u1AG/hZxK5ET+X9uVxXM+cGAAAADfhxnIAAMAqwgcAALCK8AEAAKwifAAAAKu8vrw63HtfHb1zsN7AT6c4K11VtmXLlvL0009LqVKlvN1EAACY7eIvdu3aJR07dpRChQqZOwg711TRZe11ddkLFy5ITEyMWfgNAALZn3/+ae6+XqJECalVq1aG5y5evCgff/yx9OnTx2vtCwSEDz/RvHlzqVevnsydOzfTPXH0n/j555+X7777zlRFgNxC76Wh6ytoxQ6w4dChQ9KhQwc5ceKE+V3ZunVrWbZsmZQtW9b1B5vem0Tvzg7PYcyHn9i3b58MGzYsy5vx6T59bu/evV5pG3A1ehPJhQsXersZCCCjRo2SsLAwSUhIkIMHD0qRIkWkVatWJozAHsZ8+Akd27Fz506pUaNGls/rcyxvD9tWrVqV7fNHjhyx1hZAbd26VdatW2fuwK7b559/LgMGDJC7775bNmzYIIULF/Z2EwMC4cNPvPTSS9KvXz/Tj3nfffdlGvMxb948mT59urebiQCjd7bWylt2vbtZVesAT473CAoKyvDzFx0dbW4Hcs8998iSJUu82r5AQfjwEwMHDjQp/s0335S3337b1V+ZL18+adSokSxYsEB69Ojh7WYiwGg/uv48du3aNcvntStQfz4BW7Q6HBsbKzVr1sywf/bs2ebjww8/7KWWBRbGfPgRvZvw9u3bzcyWkydPmk0/130ED3iDBgutxl3NtaoigLs98sgjsnTp0iyf0wDSq1cvfiYtYLYLAI/ZvHmzJCcnywMPPJDl8/qc/hWq5W4AgYPwAQAArKLbBQAAWEX4AAAAVhE+AACAVYQPAF6hNzvUdUBuxrhx46R+/fpuaxMAOwgfAHIlXZumWLFi3m4GAA8gfADIkUuXLnm7CQB8HOEDQLbatm1rlp4eOnSoWUW3Y8eOMmPGDKlTp465D0aFChXMvTHOnz+fqWoRExNjVpK89dZbzVofp0+fvup5du3aJaVKlZKpU6de9ZgpU6aYWwfozcCeffZZc/tzAL6H8AHgmvTOs/nz55dvvvlG5s6dK3nz5pWZM2fKf//7X/PcV199JSNHjszwNbq6rt5PaNGiRbJp0yZz11C9B1FW9Ovvv/9+mTRpkrnraFY+/vhjM8Zj8uTJZmEy59LtAHwP93YBcE3VqlWTadOmuR5Xr17d9XloaKi89tpr8vzzz2cIA5cvXzZBpUqVKuaxVk8mTJiQ6bVXrFghffr0kX/+85/mFgFXExUVZaoduik9p96dlOoH4HuofAC4pitv/qYXfb17cvny5U0XyFNPPSW//fabqXY4FSpUyBU8lFYqEhISMrzOjh07pHv37qY6kl3wUAcOHJBmzZpl2NeiRYubfGcAvIHwAeCadGyH07Fjx+Shhx6SunXryvLly82N4+bMmZNpMOott9xyzZvIaTjRu4zOnz/fVEoABAbCB4Ac0bCRlpYmb7zxhjRv3lzuuusuOXXq1A29lg5g1fEecXFx5s7L2QUQHbiqlZL09I7NAHwP4QNAjlStWtWEhFmzZsmRI0dMl4mO7bhRpUuXNgHkxx9/NLcz/+uvv7I87oUXXjAVkvfff18OHTokr776qhnwCsD3ED4A5Ei9evXMVFudEhsWFiaLFy+WyMjIm3rNMmXKmACyf/9+6d27t6SmpmY6RseEjBkzxsyq0TEox48fl/79+9/UeQF4Rx7HlZ2wAAAAHkTlAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAIDb9P94PJfyxs0aRAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cluster_display.display_bar(x_axis=df.index, y_axes=df, title=\"Slow Rank\", y_index=\"slowAffectCount\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank/slow_rank.py b/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank/slow_rank.py new file mode 100644 index 00000000000..5a0d9c8c341 --- /dev/null +++ b/profiler/msprof_analyze/cluster_analyse/recipes/slow_rank/slow_rank.py @@ -0,0 +1,190 @@ +# Copyright (c) 2025, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from collections import defaultdict + +import pandas as pd +import numpy as np + +from msprof_analyze.cluster_analyse.recipes.base_recipe_analysis import BaseRecipeAnalysis +from msprof_analyze.prof_common.constant import Constant +from msprof_analyze.prof_common.logger import get_logger +from msprof_analyze.prof_exports.cluster_time_summary_export import CommunicationTimeExport +from msprof_analyze.cluster_analyse.recipes.slow_rank.dixon_table import DIXON_TABLE_995 + +logger = get_logger() + + +def judge_norm(time_list, threshold=3): + t_max = max(time_list) + t_min = min(time_list) + t_mean = np.mean(time_list) + t_std = np.std(time_list) + threshold_high = t_mean + threshold * t_std + threshold_low = t_mean - threshold * t_std + + # 耗时低于下阈值的卡认为是慢卡 + outliers_idx = [i for i, time in enumerate(time_list) if time < threshold_low] + + # 如果存在高于上阈值的卡,则将耗时最短的卡加到慢卡的list中 + if t_max > threshold_high: + if time_list.index(t_min) not in outliers_idx: + outliers_idx.append(time_list.index(t_min)) + return outliers_idx + + +def judge_dixon(time_list): + n = len(time_list) + if n in [0, 1, 2]: + return [] + sorted_list = sorted(time_list) + + # 判断计算检验指标时分母是否可能为0 + if len(set(sorted_list)) <= 3: + return [] + + # 计算狄克逊检验的检验指标,次小值和最小值差,比上最大值和最小值的差。根据数据数量改变次小值和最大值的选取 + if n <= Constant.MAX_DIXON_NUM: + if n <= Constant.DIXON_THRESHOLD_1: + flag = (sorted_list[1] - sorted_list[0]) / (sorted_list[-1] - sorted_list[0]) \ + if (sorted_list[-1] - sorted_list[0]) else 0 + elif n <= Constant.DIXON_THRESHOLD_2: + flag = (sorted_list[1] - sorted_list[0]) / (sorted_list[-2] - sorted_list[0]) \ + if (sorted_list[-2] - sorted_list[0]) else 0 + elif n <= Constant.DIXON_THRESHOLD_3: + flag = (sorted_list[2] - sorted_list[0]) / (sorted_list[-2] - sorted_list[0]) \ + if (sorted_list[-2] - sorted_list[0]) else 0 + else: + flag = (sorted_list[2] - sorted_list[0]) / (sorted_list[-3] - sorted_list[0]) \ + if (sorted_list[-3] - sorted_list[0]) else 0 + + # 根据数据数量查表,若计算的检验指标较大,则认为有异常值,耗时最短的卡是慢卡 + if flag > DIXON_TABLE_995[n]: + return [time_list.index(sorted_list[0])] + return [] + + +def judge_slow_rank(time_list): + """根据time list长度 选择狄克逊检验或三倍标准差""" + if len(time_list) <= Constant.MAX_DIXON_NUM: + return judge_dixon(time_list) + else: + return judge_norm(time_list) + + +class SlowRankAnalysis(BaseRecipeAnalysis): + def __init__(self, params): + super().__init__(params) + logger.info("Slow Rank Analysis init.") + + @property + def base_dir(self): + return os.path.basename(os.path.dirname(__file__)) + + def reducer_func(self, mapper_res): + mapper_res = list(filter(lambda df: df is not None, mapper_res)) + if not mapper_res: + logger.error("Mapper data is None.") + return None + concated_df = pd.concat(mapper_res) + return concated_df + + def run(self, context): + if self._is_msprof: + logger.warning("Slow rank analysis do not support msprof db now.") + return + + mapper_res = self.mapper_func(context) + comm_ops_df = self.reducer_func(mapper_res) + if comm_ops_df is None: + return + + analyzer = SlowRankVoteAnalysis(comm_ops_df) + perpector_df = analyzer.run() + + if self._export_type == Constant.DB: + self.save_db(perpector_df) + elif self._export_type == "notebook": + self.save_notebook(perpector_df) + else: + logger.error("SlowRank analysis is not supported for notebook export type.") + + def save_db(self, perpector_df): + self.dump_data(perpector_df, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER, "SlowRank") + + def save_notebook(self, perpector_df): + self.dump_data(perpector_df, "rank_stats.csv") + self.create_notebook("stats.ipynb") + self.add_helper_file("cluster_display.py") + + def _mapper_func(self, data_map, analysis_class): + profiler_db_path = data_map.get(Constant.PROFILER_DB_PATH) + step_range = data_map.get(Constant.STEP_RANGE) + df = CommunicationTimeExport(profiler_db_path, analysis_class, step_range).read_export_db() + return df + + +class SlowRankVoteAnalysis: + def __init__(self, comm_ops): + self.comm_ops = comm_ops + + def grouping_ops(self): + """按照通信域、算子名称对通信算子进行分组""" + grouped_ops_dict = defaultdict(lambda: defaultdict(list)) + self.comm_ops = self.comm_ops[~self.comm_ops["opName"].str.contains("send")] + self.comm_ops = self.comm_ops[~self.comm_ops["opName"].str.contains("receive")] + grouped_df = self.comm_ops.groupby("groupName") + exclude_groups = [] + for group_name in grouped_df.groups.keys(): + ops_groupby_group_name = grouped_df.get_group(group_name) + ops_num = ops_groupby_group_name.groupby("opName").size().values + if len(set(ops_num)) > 1: + exclude_groups.append(group_name) + for exclude_group in exclude_groups: + self.comm_ops.drop(self.comm_ops[self.comm_ops["groupName"] == exclude_group].index, inplace=True) + self.comm_ops.reset_index(drop=True, inplace=True) + n = len(self.comm_ops) + group_name_arr = self.comm_ops["groupName"].values + op_name_arr = self.comm_ops["opName"].values + for idx in range(n): + group_name = group_name_arr[idx] + op_name = op_name_arr[idx] + grouped_ops_dict[group_name][op_name].append(idx) + return grouped_ops_dict + + def run(self): + grouped_ops_dict = self.grouping_ops() + perpector_dict = self.analysis(grouped_ops_dict) + return perpector_dict + + def analysis(self, grouped_ops_dict): + rank_id_arr = self.comm_ops["rankId"].values + comm_time_arr = self.comm_ops["communication_time"].values + perpector_dict = defaultdict(lambda: 0) + for _, ops_same_group in grouped_ops_dict.items(): + for _, ops_list in ops_same_group.items(): + time_list = [comm_time_arr[op_idx] for op_idx in ops_list] + perpector_rank_idx = judge_slow_rank(time_list) + if perpector_rank_idx: + for rank_idx in perpector_rank_idx: + slow_rank = rank_id_arr[ops_list[rank_idx]] + perpector_dict[slow_rank] += 1 + + perpector_df = pd.DataFrame(columns=["rankId", "slowAffectCount"]) + for rank, perpector_times in perpector_dict.items(): + perpector_df.loc[len(perpector_df)] = [rank, perpector_times] + perpector_df.set_index(["rankId"], inplace=True) + return perpector_df -- Gitee