From 97c27ee5673842c4f9c9186b3333782eeea69f4c Mon Sep 17 00:00:00 2001 From: yang_feida Date: Mon, 19 May 2025 16:35:17 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E8=A1=A5=E5=85=85=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- omniadvisor/README.md | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/omniadvisor/README.md b/omniadvisor/README.md index 7ff5fdfe4..5979b9c85 100755 --- a/omniadvisor/README.md +++ b/omniadvisor/README.md @@ -1,8 +1,32 @@ -1 对负载进行调优 -python tunning.py -l 16 -r backend -rt 1 -t iterative -各参数说明,可以通过python3 tunning.py --help 查看 +1 负载劫持 +首先要对用户负载进行劫持,获取用户负载及相关信息,以便后续能够调优。使能负载劫持,步骤如下: +- ① 修改 omniadvisor/script/spark-submit 脚本中 hijack_path 值,需要填实际的 hijack.py 文件的路径 +- ② 替换正在使用的 spark-submit 脚本(可通过 `which spark-submit` 查看)为 ① 中的 spark-submit 文件 +- ③ 启用环境变量:`export enable_omniadvisor=true` +说明:完成以上步骤之后,用户下发相同的负载,会劫持用户的命令,并替换用户配置为系统推荐的最优的配置(初始状态下保持用户的默认配置)。推荐配置若执行失败,则退化为用户的默认配置 + +2 查询负载等信息 +执行负载并劫持之后,在后台管理页面查看到相关信息 +- ① 若之前从未创建过用户,使用命令:`python init.py createsuperuser`,按指示设置用户名和密码。 +- ② `python init.py runserver`(可选 绑定 0.0.0.0:8000) +- ③ 修改settings.py中的 ALLOWED_HOSTS(可选) +- ④ 登录后台管理页面:`localhost:8000/admin/` (替换实际的ip) +说明:进入页面后,可以看到APP下有若干数据表,其中 loads 就是负载表 + +3 启用调优 +通过在根目录下,执行 `python tunning.py --help` 查看各参数说明 +- ① 各参数的详细介绍 + - --load-id,即步骤2中loads表查询到的负载的id + - --retest-way,复测方式(复测是为了保证结果的可靠),可选前台或者后台复测。若是前台复测,当用户提交任务时,跟随任务下发;若后台复测,则是由 + omniadvisor触发 + - --tuning-method,调优方法,不同的调优方法原理不同,iterative,即迭代调优,本质上使用贝叶斯优化方法;expert,即专家调优,通过诊断资源瓶颈, + 给出调优建议;transfer,迁移调优,迁移相似负载的调优经验到陌生负载;native 算子加速,将Spark原生算子替换为C++ Native算子,同时使能CPU向量化执行,实现性能加速 +- ② 命令示例 + - `python tunning.py -l 1 -r backend -t expert`。说明:使用专家调优,对id为1的负载,进行调优,期间在后台复测,复测次数为 + common_config.cfg 中 tuning.retest.times 的值 + - `python tunning.py -l 1 -r backend`。说明:使用omniadvisor中默认的调优策略以及历史调优记录,共同决定当前的调优方式 +说明:调优结束后,会更新负载的最优配置信息,以便步骤1能够使用最优配置 + + -2 负载劫持优化 -如果需要开启优化,需要加上环境变量 enable_omniadvisor=true -(对于开发人员,如果需要指定hijack.py文件,那么可以在环境变量中添加:HIJACK_PATH=xxx) -- Gitee From 4707bb581bd8d40a99213b092954c22d8ded7bdc Mon Sep 17 00:00:00 2001 From: yang_feida Date: Wed, 21 May 2025 14:52:16 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=B7=B2=E7=9F=A5?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- omniadvisor/README.md | 2 +- omniadvisor/docs/CONTRIBUTING.md | 21 ------ omniadvisor/docs/FAQ.md | 66 +++++++++++-------- .../omniadvisor/interface/config_tuning.py | 8 ++- .../repository/load_prefetch_repository.py | 34 ++++++++++ .../src/omniadvisor/service/retest_service.py | 3 +- .../service/tuning_result/tuning_result.py | 2 +- .../tuning_result/tuning_result_history.py | 35 ++++++++-- omniadvisor/src/server/app/models.py | 7 +- .../interface/test_config_tuning.py | 10 +-- 10 files changed, 119 insertions(+), 69 deletions(-) delete mode 100644 omniadvisor/docs/CONTRIBUTING.md create mode 100644 omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py diff --git a/omniadvisor/README.md b/omniadvisor/README.md index 5979b9c85..9cc3f4bf5 100755 --- a/omniadvisor/README.md +++ b/omniadvisor/README.md @@ -10,7 +10,7 @@ - ① 若之前从未创建过用户,使用命令:`python init.py createsuperuser`,按指示设置用户名和密码。 - ② `python init.py runserver`(可选 绑定 0.0.0.0:8000) - ③ 修改settings.py中的 ALLOWED_HOSTS(可选) -- ④ 登录后台管理页面:`localhost:8000/admin/` (替换实际的ip) +- ④ 登录后台管理页面:`localhost:8000/admin/` (替换实际的ip,若访问不通,须执行步骤 ②③) 说明:进入页面后,可以看到APP下有若干数据表,其中 loads 就是负载表 3 启用调优 diff --git a/omniadvisor/docs/CONTRIBUTING.md b/omniadvisor/docs/CONTRIBUTING.md deleted file mode 100644 index c0e12ed97..000000000 --- a/omniadvisor/docs/CONTRIBUTING.md +++ /dev/null @@ -1,21 +0,0 @@ -# 开发者须知 - ---- -## 自动导包配置 - -由于 PyCharm 的导包机制可能存在不合理之处,在 `alt` + `enter` 快捷键下,容易出现导包的路径错误,需手动执行以下操作: - -**设置 `src` 为资源目录**: - - 右键点击 `src` 文件夹。 - - 选择 `Mark Directory as` -> `Resources Root`。 - - 重启 PyCharm 以生效。 ---- -## 如何在django admin的页面中显示一个model? - - 注册一个model(参考`server\app\models.py`) - - 在admin后台显示model(参考`server\app\admin.py`) - - 其他高阶用法持续更新中。。。 - ---- -## 在哪写数据库增删改查? - -在 `omniadvisor\repository.py`中,组织django中注册的model,实现增删改查功能 \ No newline at end of file diff --git a/omniadvisor/docs/FAQ.md b/omniadvisor/docs/FAQ.md index c79ffd6d1..15a518750 100644 --- a/omniadvisor/docs/FAQ.md +++ b/omniadvisor/docs/FAQ.md @@ -1,45 +1,53 @@ # FAQ 文档 - - --- -## 如何进度Django 后台管理页面: + +## 进入Django 后台管理页面?: 在`src`目录下,按照以下步骤操作 -### 数据库迁移 -```bash -python init.py makemigrations -python init.py migrate -``` -### 创建超级用户 +### 创建超级用户(若未曾创建) + ```bash python init.py createsuperuser ``` + 按照提示输入用户名、邮箱和密码 ### 启动开发服务 + ```bash python init.py runserver ``` -## 如何拉起单元测试? +### 登录管理页面 + +访问以下地址以登录 Django 后台管理页面: +[http://127.0.0.1:8000/admin](http://127.0.0.1:8000/admin) + + +--- +## 拉单元测试? 在 `omniadvisor` 根目录下,按照以下步骤运行单元测试: ### 安装测试环境依赖 + ```bash poetry install --with test ``` + 该命令用于安装测试环境所需的依赖。 ### 激活虚拟环境 + ```bash poetry shell ``` ### 运行单元测试 -- **多线程运行测试**: + +- **多线程运行测试(须安装 pytest-xdist)**: ```bash python -m pytest -n auto ``` @@ -49,33 +57,35 @@ poetry shell ``` ### 退出虚拟环境 + ```bash exit ``` --- -## 如何进入 Django 后台管理页面? +## 自动导包配置? -在 `src/django_server` 目录下,按照以下步骤操作: +由于 PyCharm 的导包机制可能存在不合理之处,在 `alt` + `enter` 快捷键下,容易出现导包的路径错误,需手动执行以下操作: -### 数据库迁移 -```bash -python manage.py makemigrations -python manage.py migrate -``` +**设置 `src` 为资源目录**: + - 右键点击 `src` 文件夹。 + - 选择 `Mark Directory as` -> `Resources Root`。 + - 重启 PyCharm 以生效。 -### 创建超级用户 -```bash -python manage.py createsuperuser -``` -按照提示输入用户名、邮箱和密码。 +--- +## 在django admin的页面中显示一个model? + - 注册一个model(参考`server\app\models.py`) + - 在admin后台显示model(参考`server\app\admin.py`) + - 其他高阶用法持续更新中。。。 -### 启动开发服务器 +--- +### 数据库迁移? ```bash -python manage.py runserver +python init.py makemigrations +python init.py migrate ``` -### 登录管理页面 -访问以下地址以登录 Django 后台管理页面: -[http://127.0.0.1:8000/admin](http://127.0.0.1:8000/admin) +--- + + diff --git a/omniadvisor/src/omniadvisor/interface/config_tuning.py b/omniadvisor/src/omniadvisor/interface/config_tuning.py index 962bed803..d81a1a0f9 100644 --- a/omniadvisor/src/omniadvisor/interface/config_tuning.py +++ b/omniadvisor/src/omniadvisor/interface/config_tuning.py @@ -29,7 +29,7 @@ def _parse_tuning_args(): help='If choose hijacking, a test will execute immediately') parser.add_argument("-t", "--tuning-method", type=str, required=False, - choices=OA_CONF.TuningMethod.all, + choices=[method for method in OA_CONF.TuningMethod.all if method != OA_CONF.TuningMethod.user], help='Support three tune method, for iterative, use BO; for expert ...') args = parser.parse_args() @@ -60,7 +60,9 @@ def unified_tuning(load, retest_way: str, tuning_method: str): elif tuning_method == OA_CONF.TuningMethod.transfer: perf_history = get_tuning_result_history(load) other_history = get_other_tuning_result_history(load) - next_config, method_extend = TransferTuning.tune(perf_history.tuning_history, other_history) + next_config, method_extend = TransferTuning.tune( + perf_history.tuning_history, [other.tuning_history for other in other_history] + ) else: raise ValueError(f'Not supported tuning method: {tuning_method}') @@ -84,7 +86,7 @@ def unified_tuning(load, retest_way: str, tuning_method: str): raise perf_history = get_tuning_result_history(load) # 更新最优配置 - if perf_history.best_config: + if perf_history.best_config and perf_history.best_config != next_config: LoadRepository.update_best_config(load, perf_history.best_config) else: # 更新待测试配置即可 diff --git a/omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py b/omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py new file mode 100644 index 000000000..c093f1b58 --- /dev/null +++ b/omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. +from django.db.models import Prefetch +from server.app.models import DatabaseTuningRecord, DatabaseLoad +from omniadvisor.repository.repository import Repository + + +class LoadPrefetchRepository(Repository): + + @classmethod + def get_loads_exclude(cls, exclude_load_id: str): + """ + 获取 exclude_load_id 以外的 load 及 tuning record的聚合 + :param exclude_load_id: 排除的load id + :return: + """ + + # 预取相关TuningRecord和ExamRecord + lower_str = DatabaseTuningRecord.__name__.lower() + + # lookup 参数为固定格式 xx__set + tuning_record_prefetch = Prefetch( + f'{lower_str}_set', + queryset=DatabaseTuningRecord.objects.select_related('load') + ) + + # 获取所有不包含exclude_load的Load记录 + loads = DatabaseLoad.objects.exclude(id=exclude_load_id) \ + .prefetch_related(tuning_record_prefetch) \ + .all() + + return loads diff --git a/omniadvisor/src/omniadvisor/service/retest_service.py b/omniadvisor/src/omniadvisor/service/retest_service.py index cddb5b133..c92138d9a 100644 --- a/omniadvisor/src/omniadvisor/service/retest_service.py +++ b/omniadvisor/src/omniadvisor/service/retest_service.py @@ -32,4 +32,5 @@ def retest(load: Load, config: dict): spark_output) tuning_result = get_tuning_result(load, config) if tuning_result.failed_times >= OA_CONF.config_fail_threshold: - raise RuntimeError('The number of retest failures has reached the failure threshold.') + global_logger.info('The number of retest failures has reached the failure threshold.') + return diff --git a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result.py b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result.py index 925df6265..c90840ae5 100644 --- a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result.py +++ b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result.py @@ -103,7 +103,7 @@ class TuningResult: """ if self.failed_times >= OA_CONF.config_fail_threshold: overall_status = OA_CONF.ExecStatus.fail - elif len(self._exam_records) == OA_CONF.tuning_retest_times: + elif len(self._exam_records) >= OA_CONF.tuning_retest_times: overall_status = OA_CONF.ExecStatus.success else: overall_status = OA_CONF.ExecStatus.running diff --git a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py index 5985cc12f..2ad37512d 100644 --- a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py +++ b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py @@ -1,6 +1,7 @@ -from itertools import chain from typing import List +from omniadvisor.repository.exam_record_repository import ExamRecordRepository +from omniadvisor.repository.load_prefetch_repository import LoadPrefetchRepository from omniadvisor.repository.model.load import Load from omniadvisor.repository.tuning_record_repository import TuningRecordRepository from omniadvisor.service.tuning_result.tuning_result import TuningResult @@ -9,9 +10,35 @@ from omniadvisor.service.tuning_result.tuning_result import get_tuning_result from common.constant import OA_CONF -def get_other_tuning_result_history(load: Load): - # 暂未实现 - return None +def get_other_tuning_result_history(exclude_load: Load): + """ + 查询 exclude_load 以外的所有 tuning result history + :param exclude_load + :return: + """ + # 预取相关TuningRecord和ExamRecord + result = [] + loads = LoadPrefetchRepository.get_loads_exclude(exclude_load.id) + for load in loads: + # 获取该Load下的所有TuningRecord + tuning_records = load.databasetuningrecord_set.all() + tuning_results = [] + for tuning_record in tuning_records: + # 查询匹配的ExamRecord + exam_records = ExamRecordRepository.query_by_load_and_config(Load(load), tuning_record.config) + tuning_results.append( + TuningResult( + tuning_record=tuning_record, + exam_records=exam_records + ) + ) + + result.append(TuningResultHistory( + load=load, + tuning_results=tuning_results + )) + + return result def get_next_tuning_method(load) -> str: diff --git a/omniadvisor/src/server/app/models.py b/omniadvisor/src/server/app/models.py index 17243fce9..50f17da67 100644 --- a/omniadvisor/src/server/app/models.py +++ b/omniadvisor/src/server/app/models.py @@ -28,12 +28,7 @@ class DatabaseTuningRecord(models.Model): """ 调优记录模型 """ - TUNING_METHOD_CHOICES = { - (OA_CONF.TuningMethod.user, 'UserDefault'), - (OA_CONF.TuningMethod.iterative, 'Iterative'), - (OA_CONF.TuningMethod.expert, 'Expert'), - (OA_CONF.TuningMethod.transfer, 'Transfer'), - } + TUNING_METHOD_CHOICES = {(method, method.upper()) for method in OA_CONF.TuningMethod.all} id = models.AutoField(primary_key=True) load = models.ForeignKey( diff --git a/omniadvisor/tests/omniadvisor/interface/test_config_tuning.py b/omniadvisor/tests/omniadvisor/interface/test_config_tuning.py index 603375f68..30935faca 100644 --- a/omniadvisor/tests/omniadvisor/interface/test_config_tuning.py +++ b/omniadvisor/tests/omniadvisor/interface/test_config_tuning.py @@ -49,7 +49,7 @@ class TestTuning: with patch('omniadvisor.repository.load_repository.LoadRepository.query_by_id'), \ patch('omniadvisor.repository.tuning_record_repository.TuningRecordRepository.create'), \ patch('omniadvisor.service.retest_service.spark_run') as mock_spark_run, \ - patch('omniadvisor.interface.config_tuning.get_tuning_result_history'), \ + patch('omniadvisor.interface.config_tuning.get_tuning_result_history') as mock_tuning_result_history, \ patch('omniadvisor.service.retest_service.get_tuning_result') as mock_get_tuning_result, \ patch('omniadvisor.interface.config_tuning.remove_tuning_result') as mock_remove_tuning_result, \ patch('algo.iterative.tuning.SmacAppendTuning.tune') as mock_smac_tuning, \ @@ -62,13 +62,15 @@ class TestTuning: mock_tuning_result = MagicMock() mock_tuning_result.failed_times = OA_CONF.config_fail_threshold mock_get_tuning_result.return_value = mock_tuning_result - with pytest.raises(RuntimeError): - unified_tuning(load=self.load, retest_way=OA_CONF.RetestWay.backend, tuning_method=self.tuning_method) + tuning_result_history = MagicMock() + tuning_result_history.best_config = {**self.load.default_config, **self.tune_return_val[0]} + mock_tuning_result_history.return_value = tuning_result_history + unified_tuning(load=self.load, retest_way=OA_CONF.RetestWay.backend, tuning_method=self.tuning_method) mock_smac_tuning.assert_called_once() mock_spark_run.assert_called_once() mock_update_best.assert_not_called() - mock_remove_tuning_result.assert_called_once() + mock_remove_tuning_result.assert_not_called() assert 'Retest failed in round 1. Exception source: Spark exception' in caplog.text -- Gitee From 6d3f0c4639614e26f8598b97294d6d23832f2410 Mon Sep 17 00:00:00 2001 From: yang_feida Date: Thu, 22 May 2025 14:33:14 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=B0=83=E4=BC=98?= =?UTF-8?q?=E7=AD=96=E7=95=A5=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- omniadvisor/config/common_config.cfg | 1 + omniadvisor/src/common/constant.py | 9 ++++--- .../omniadvisor/interface/config_tuning.py | 27 +++++++++---------- .../tuning_result/tuning_result_history.py | 20 -------------- 4 files changed, 18 insertions(+), 39 deletions(-) diff --git a/omniadvisor/config/common_config.cfg b/omniadvisor/config/common_config.cfg index 8c8c0aa9e..0baba7fcd 100755 --- a/omniadvisor/config/common_config.cfg +++ b/omniadvisor/config/common_config.cfg @@ -1,6 +1,7 @@ [common] # 调优复测次数 tuning.retest.times=3 +tuning.strategy=[["transfer", 1],["expert", 2],["iterative", 10]] [spark] # Spark History Server的URL 仅用于Rest模式 diff --git a/omniadvisor/src/common/constant.py b/omniadvisor/src/common/constant.py index 3725f1d96..e5d041860 100644 --- a/omniadvisor/src/common/constant.py +++ b/omniadvisor/src/common/constant.py @@ -1,3 +1,4 @@ +import json import os import configparser @@ -63,10 +64,6 @@ class OmniAdvisorConf: backend = 'backend' all = [hijacking, backend] - # 规定每种调优方式的轮次 - # TODO 瞎拍的 - tuning_strategies = [(TuningMethod.expert, 5), (TuningMethod.iterative, 10)] - # 评估配置失效的阈值,当配置执行失败次数大于等于此值,则配置失效 config_fail_threshold = 1 @@ -80,4 +77,8 @@ class OmniAdvisorConf: spark_history_rest_url = _common_config.get('spark', 'spark.history.rest.url') timeout_ratio = _common_config.getfloat('spark', 'spark.sql.timeout.ratio') + # 规定每种调优方式的轮次 瞎拍的 + tuning_strategies = json.loads(_common_config.get('common', 'tuning.strategy')) + + OA_CONF = OmniAdvisorConf() diff --git a/omniadvisor/src/omniadvisor/interface/config_tuning.py b/omniadvisor/src/omniadvisor/interface/config_tuning.py index d81a1a0f9..14d19bd24 100644 --- a/omniadvisor/src/omniadvisor/interface/config_tuning.py +++ b/omniadvisor/src/omniadvisor/interface/config_tuning.py @@ -4,15 +4,16 @@ from algo.expert.tuning import ExpertTuning from algo.iterative.tuning import SmacAppendTuning from algo.native.tuning import NativeTuning from algo.transfer.tuning import TransferTuning +from common.constant import OA_CONF + from omniadvisor.repository.load_repository import LoadRepository from omniadvisor.repository.tuning_record_repository import TuningRecordRepository from omniadvisor.service.retest_service import retest from omniadvisor.service.tuning_result.tuning_result import remove_tuning_result from omniadvisor.service.tuning_result.tuning_result_history import get_tuning_result_history, \ - get_next_tuning_method, get_other_tuning_result_history -from omniadvisor.utils.logger import global_logger + get_other_tuning_result_history -from common.constant import OA_CONF +from omniadvisor.utils.logger import global_logger def _parse_tuning_args(): @@ -130,15 +131,11 @@ def main(): global_logger.info('The retest status of latest tuning result is still running, please wait.') return - if args.tuning_method: - # 用户输入了强制调优的请求,以用户为准 - unified_tuning(load, args.retest_way, args.tuning_method) - else: - tuning_method = get_next_tuning_method(load) - if not tuning_method: - global_logger.info( - 'The tuning times reaches default settings, if you want to append tuning times, add \'-t\' in your ' - 'command' - ) - return - unified_tuning(load, args.retest_way, tuning_method) + for tuning_strategy in OA_CONF.tuning_strategies: + tuning_method = tuning_strategy[0] + tuning_times = tuning_strategy[1] + for _ in range(tuning_times): + try: + unified_tuning(load, args.retest_way, tuning_method) + except Exception: + global_logger.info('The tuning method: %s encountered an exception, trying next.', tuning_method) diff --git a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py index 2ad37512d..37df6bc1e 100644 --- a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py +++ b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py @@ -7,8 +7,6 @@ from omniadvisor.repository.tuning_record_repository import TuningRecordReposito from omniadvisor.service.tuning_result.tuning_result import TuningResult from omniadvisor.service.tuning_result.tuning_result import get_tuning_result -from common.constant import OA_CONF - def get_other_tuning_result_history(exclude_load: Load): """ @@ -41,24 +39,6 @@ def get_other_tuning_result_history(exclude_load: Load): return result -def get_next_tuning_method(load) -> str: - """ - 根据 load,获取调优历史记录,并根据OA_CONF中的tuning_strategies决定本轮的调优方法 - :param load - :return: 当前的调优方法 - """ - tuning_result_history = get_tuning_result_history(load) - statistics = tuning_result_history.collect_method_calls - next_tuning_method = None - for tuning_method, expected_tuning_times in OA_CONF.tuning_strategies: - # 历史中该方法的调优次数已经超过了设定的调优次数,跳过(超过是因为用户强制使用) - if statistics.get(tuning_method, 0) >= expected_tuning_times: - continue - next_tuning_method = tuning_method - break - return next_tuning_method - - def get_tuning_result_history(load: Load): """ 指定负载,获取调优结果所有历史 -- Gitee From 9cd53cd0a91315dcce871b81b0e6095e0ca8d105 Mon Sep 17 00:00:00 2001 From: yang_feida Date: Thu, 22 May 2025 15:34:57 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E8=AF=84=E5=AE=A1=E6=84=8F=E8=A7=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- omniadvisor/README.md | 4 +-- .../omniadvisor/interface/config_tuning.py | 27 +++++++++++-------- .../repository/load_prefetch_repository.py | 22 +++++++++++---- .../tuning_result/tuning_result_history.py | 7 +++-- 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/omniadvisor/README.md b/omniadvisor/README.md index 9cc3f4bf5..3f6ca35dd 100755 --- a/omniadvisor/README.md +++ b/omniadvisor/README.md @@ -18,13 +18,13 @@ - ① 各参数的详细介绍 - --load-id,即步骤2中loads表查询到的负载的id - --retest-way,复测方式(复测是为了保证结果的可靠),可选前台或者后台复测。若是前台复测,当用户提交任务时,跟随任务下发;若后台复测,则是由 - omniadvisor触发 + 系统触发 - --tuning-method,调优方法,不同的调优方法原理不同,iterative,即迭代调优,本质上使用贝叶斯优化方法;expert,即专家调优,通过诊断资源瓶颈, 给出调优建议;transfer,迁移调优,迁移相似负载的调优经验到陌生负载;native 算子加速,将Spark原生算子替换为C++ Native算子,同时使能CPU向量化执行,实现性能加速 - ② 命令示例 - `python tunning.py -l 1 -r backend -t expert`。说明:使用专家调优,对id为1的负载,进行调优,期间在后台复测,复测次数为 common_config.cfg 中 tuning.retest.times 的值 - - `python tunning.py -l 1 -r backend`。说明:使用omniadvisor中默认的调优策略以及历史调优记录,共同决定当前的调优方式 + - `python tunning.py -l 1 -r backend`。说明:使用系统中默认的调优策略以及历史调优记录,共同决定当前的调优方式 说明:调优结束后,会更新负载的最优配置信息,以便步骤1能够使用最优配置 diff --git a/omniadvisor/src/omniadvisor/interface/config_tuning.py b/omniadvisor/src/omniadvisor/interface/config_tuning.py index 14d19bd24..c5f4741e9 100644 --- a/omniadvisor/src/omniadvisor/interface/config_tuning.py +++ b/omniadvisor/src/omniadvisor/interface/config_tuning.py @@ -50,19 +50,19 @@ def unified_tuning(load, retest_way: str, tuning_method: str): # 推荐下一个配置 if tuning_method == OA_CONF.TuningMethod.iterative: - perf_history = get_tuning_result_history(load) - next_config, method_extend = SmacAppendTuning.tune(perf_history.tuning_history) + tuning_result_history = get_tuning_result_history(load) + next_config, method_extend = SmacAppendTuning.tune(tuning_result_history.tuning_history) elif tuning_method == OA_CONF.TuningMethod.expert: - perf_history = get_tuning_result_history(load) - next_config, method_extend = ExpertTuning.tune(perf_history.tuning_history) + tuning_result_history = get_tuning_result_history(load) + next_config, method_extend = ExpertTuning.tune(tuning_result_history.tuning_history) elif tuning_method == OA_CONF.TuningMethod.native: - perf_history = get_tuning_result_history(load) - next_config, method_extend = NativeTuning.tune(perf_history.tuning_history) + tuning_result_history = get_tuning_result_history(load) + next_config, method_extend = NativeTuning.tune(tuning_result_history.tuning_history) elif tuning_method == OA_CONF.TuningMethod.transfer: - perf_history = get_tuning_result_history(load) + tuning_result_history = get_tuning_result_history(load) other_history = get_other_tuning_result_history(load) next_config, method_extend = TransferTuning.tune( - perf_history.tuning_history, [other.tuning_history for other in other_history] + tuning_result_history.tuning_history, [other.tuning_history for other in other_history] ) else: raise ValueError(f'Not supported tuning method: {tuning_method}') @@ -85,10 +85,10 @@ def unified_tuning(load, retest_way: str, tuning_method: str): global_logger.info('Remove tuning result because the status is running.') remove_tuning_result(load, next_config) raise - perf_history = get_tuning_result_history(load) + tuning_result_history = get_tuning_result_history(load) # 更新最优配置 - if perf_history.best_config and perf_history.best_config != next_config: - LoadRepository.update_best_config(load, perf_history.best_config) + if tuning_result_history.best_config and tuning_result_history.best_config != next_config: + LoadRepository.update_best_config(load, tuning_result_history.best_config) else: # 更新待测试配置即可 LoadRepository.update_test_config(load, next_config) @@ -131,6 +131,11 @@ def main(): global_logger.info('The retest status of latest tuning result is still running, please wait.') return + if args.tuning_method or args.retest_way == OA_CONF.RetestWay.hijacking: + unified_tuning(load, args.retest_way, args.tuning_method) + return + + # 当且仅当后台复测,且未指定调优方法时,会连续拉起调优 for tuning_strategy in OA_CONF.tuning_strategies: tuning_method = tuning_strategy[0] tuning_times = tuning_strategy[1] diff --git a/omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py b/omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py index c093f1b58..c8a1dba3b 100644 --- a/omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py +++ b/omniadvisor/src/omniadvisor/repository/load_prefetch_repository.py @@ -4,18 +4,21 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. from django.db.models import Prefetch from server.app.models import DatabaseTuningRecord, DatabaseLoad +from omniadvisor.repository.model.load import Load +from omniadvisor.repository.model.tuning_record import TuningRecord from omniadvisor.repository.repository import Repository class LoadPrefetchRepository(Repository): @classmethod - def get_loads_exclude(cls, exclude_load_id: str): + def get_loads_prefetch(cls, exclude_load: Load): """ - 获取 exclude_load_id 以外的 load 及 tuning record的聚合 - :param exclude_load_id: 排除的load id + 获取 exclude_load 的 id 以外的 load 及 tuning record的聚合 + :param exclude_load: 排除的load id :return: """ + res = [] # 预取相关TuningRecord和ExamRecord lower_str = DatabaseTuningRecord.__name__.lower() @@ -27,8 +30,17 @@ class LoadPrefetchRepository(Repository): ) # 获取所有不包含exclude_load的Load记录 - loads = DatabaseLoad.objects.exclude(id=exclude_load_id) \ + loads = DatabaseLoad.objects.exclude(id=exclude_load.id) \ .prefetch_related(tuning_record_prefetch) \ .all() - return loads + for prefetch_load in loads: + database_tuning_records = prefetch_load.databasetuningrecord_set.all() + res.append( + ( + Load(prefetch_load), + [TuningRecord(database_tuning_record) for database_tuning_record in database_tuning_records] + ) + ) + + return res diff --git a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py index 37df6bc1e..a9dbbed41 100644 --- a/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py +++ b/omniadvisor/src/omniadvisor/service/tuning_result/tuning_result_history.py @@ -16,14 +16,13 @@ def get_other_tuning_result_history(exclude_load: Load): """ # 预取相关TuningRecord和ExamRecord result = [] - loads = LoadPrefetchRepository.get_loads_exclude(exclude_load.id) - for load in loads: + load_tuning_records_pairs = LoadPrefetchRepository.get_loads_prefetch(exclude_load) + for load, tuning_records in load_tuning_records_pairs: # 获取该Load下的所有TuningRecord - tuning_records = load.databasetuningrecord_set.all() tuning_results = [] for tuning_record in tuning_records: # 查询匹配的ExamRecord - exam_records = ExamRecordRepository.query_by_load_and_config(Load(load), tuning_record.config) + exam_records = ExamRecordRepository.query_by_load_and_config(load, tuning_record.config) tuning_results.append( TuningResult( tuning_record=tuning_record, -- Gitee