From bc557fff50f767fbd93901f74fd5c9b5c8338ba5 Mon Sep 17 00:00:00 2001 From: zeau <8350328+zeau1@user.noreply.gitee.com> Date: Sun, 5 Mar 2023 03:48:40 +0000 Subject: [PATCH] =?UTF-8?q?add=20USTB=E6=99=BA=E8=83=BD=E5=AE=89=E5=85=A8?= =?UTF-8?q?=E8=BF=90=E7=BB=B4/bayes.py.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zeau <8350328+zeau1@user.noreply.gitee.com> --- .../bayes.py" | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 "USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py" diff --git "a/USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py" "b/USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py" new file mode 100644 index 00000000..c6293dac --- /dev/null +++ "b/USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py" @@ -0,0 +1,183 @@ +import logging +import os +import signal +from logging import handlers +from bayes_opt import BayesianOptimization + +from . import benchmark +from .character import WORKLOAD_TYPE +from .db_agent import new_db_agent +from .db_env import DB_Env +from .knob import load_knobs_from_json_file +from .recommend import recommend_knobs +from .recorder import Recorder +from .utils import YELLOW_FMT + + +def prompt_restart_risks(): + hint = "WARN: The database may restart several times during tuning, continue or not [yes|no]:" + answer = input(YELLOW_FMT.format(hint)) + while True: + if answer.lower() == 'no': + print( + YELLOW_FMT.format( + "FATAL: Tuning program will exit because you are not currently allowed to interrupt business." + ) + ) + exit(0) + elif answer.lower() == 'yes': + return + else: + answer = input(YELLOW_FMT.format("Please input yes or no:")) + + +def set_logger(filename): + logger = logging.getLogger() + logger.setLevel(logging.INFO) + + dirname = os.path.dirname(filename) + if not os.path.exists(dirname): + os.makedirs(dirname, mode=0o700) + + stream_hdlr = logging.StreamHandler() + stream_hdlr.setLevel(logging.WARNING) + + rota_hdlr = handlers.RotatingFileHandler(filename=filename, + maxBytes=1024 * 1024 * 100, + backupCount=5) + rota_hdlr.setLevel(logging.INFO) + formatter = logging.Formatter('[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', + '%m-%d %H:%M:%S') + rota_hdlr.setFormatter(formatter) + + logger.addHandler(stream_hdlr) + logger.addHandler(rota_hdlr) + + +def procedure_main(mode, db_info, config): + """ + This is the real entry for tuning programs. + + :param mode: Three modes: tune, train and recommend. + :param db_info: Dict data structure. db_info is used to store information about + the database to be connected and is transferred through the command line + or configuration file. + :param config: Information read from xtuner.conf. + :return: Exit status code. + """ + # Set the minimum permission on the output files. + os.umask(0o0077) + # Initialize logger. + set_logger(config['logfile']) + logging.info('Starting... (mode: %s)', mode) + db_agent = new_db_agent(db_info) + + # Clarify the scenario: + if config['scenario'] in WORKLOAD_TYPE.TYPES: + db_agent.metric.set_scenario(config['scenario']) + else: + config['scenario'] = db_agent.metric.workload_type + # Clarify tune strategy: + if config['tune_strategy'] == 'auto': + # If more iterations are allowed, reinforcement learning is preferred. + if config['rl_steps'] * config['max_episode_steps'] > 1500: + config['tune_strategy'] = 'rl' + else: + config['tune_strategy'] = 'gop' + + logging.info("Configurations: %s.", config) + # 如果在配置文件中指定了tuning_list配置项,并且非recommend模式,则加载该待调优参数列表的配置文件,否则通过recommend_knobs + # 函数智能获取待调优参数列表 + if config['tuning_list'].strip() != '' and mode != 'recommend': + print("You have configured the tuning list, so use this list to tune.") + knobs = load_knobs_from_json_file(config['tuning_list']) + else: + print("Start to recommend knobs. Just a moment, please.") + knobs = recommend_knobs(mode, db_agent.metric) + if not knobs: + logging.fatal('No recommended best_knobs for the database. Stop the execution.') + return -1 + + # If the recommend mode is not used, + # the benchmark running and best_knobs tuning process need to be iterated. + if mode != 'recommend': + prompt_restart_risks() # Users need to be informed of risks. + + recorder = Recorder(config['recorder_file']) + #分别读取配置文件中的三个配置项:benchmark_script、benchmark_path及benchmark_cmd,通过这三个配置项就可以获取benchmark驱动脚本的实例, + #可以用他们来衡量数据库的性能 + bm = benchmark.get_benchmark_instance(config['benchmark_script'], + config['benchmark_path'], + config['benchmark_cmd'], + db_info) + #初始化数据库调优环境实例,该对象封装了迭代过程,保持与强化学习库gym.Env的接口一致 + env = DB_Env(db_agent, benchmark=bm, recorder=recorder, + drop_cache=config['drop_cache'], + mem_penalty=config['used_mem_penalty_term']) + env.set_tuning_knobs(knobs) + + print('The benchmark will start to run iteratively. ' + 'This process may take a long time. Please wait a moment.') + if mode == 'tune': + # Run once the performance under the default knob configuration. + # Its id is 0, aka the first one. + original_knobs = db_agent.get_default_normalized_vector() + env.step(original_knobs) + try: + if config['tune_strategy'] == 'gop': + global_search(env, config) + else: + raise ValueError('Incorrect tune strategy: %s.' % config['tune_strategy']) + + except KeyboardInterrupt: + signal.signal(signal.SIGINT, signal.SIG_IGN) + print("Trigger an interrupt via the keyboard. " + "Continue to generate current tuning results.") + + # Rollback/reset to the original/initial knobs while the tuning process is finished. + db_agent.set_knob_normalized_vector(original_knobs) + # Modify the variable `knobs` with tuned result. + recorder.give_best(knobs) + else: + raise ValueError('Incorrect mode value: %s.' % mode) + + # After the above process is executed, the tuned best_knobs are output. + knobs.output_formatted_knobs() + if config['output_tuning_result'] != '': + with open(config['output_tuning_result'], 'w+') as fp: + # In reinforcement learning training mode, + # only the training knob list is dumped, but the recommended knob result is not dumped. + # This is because, in tune mode of reinforcement learning, + # users can directly load the dumped file as the knob tuning list. + knobs.dump(fp, dump_report_knobs=mode != 'train') + logging.info('X-Tuner is executed and ready to exit. ' + 'Please refer to the log for details of the execution process.') + return 0 + + +def global_search(env, config): + method = config['gop_algorithm'] + if method == 'bayes': + + action = [0 for _ in range(env.nb_actions)] + pbound = {name: (0, 1) for name in env.db.ordered_knob_list} + #定义一个黑盒函数,用于适配第三方库的接口 + def performance_function(**params): + if not len(params) == env.nb_actions: + raise AssertionError('Failed to check the input feature dimension.') + + for name, val in params.items(): + index = env.db.ordered_knob_list.index(name) + action[index] = val + + s, r, d, _ = env.step(action) + return r # Wishes to maximize. + + optimizer = BayesianOptimization( + f=performance_function, + pbounds=pbound + ) + optimizer.maximize( + #最大迭代轮次越大结果越精准,但是也更耗时 + n_iter=config['max_iterations'] + ) \ No newline at end of file -- Gitee