From bc557fff50f767fbd93901f74fd5c9b5c8338ba5 Mon Sep 17 00:00:00 2001
From: zeau <8350328+zeau1@user.noreply.gitee.com>
Date: Sun, 5 Mar 2023 03:48:40 +0000
Subject: [PATCH] =?UTF-8?q?add=20USTB=E6=99=BA=E8=83=BD=E5=AE=89=E5=85=A8?=
 =?UTF-8?q?=E8=BF=90=E7=BB=B4/bayes.py.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: zeau <8350328+zeau1@user.noreply.gitee.com>
---
 .../bayes.py"                                 | 183 ++++++++++++++++++
 1 file changed, 183 insertions(+)
 create mode 100644 "USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py"

diff --git "a/USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py" "b/USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py"
new file mode 100644
index 00000000..c6293dac
--- /dev/null
+++ "b/USTB\346\231\272\350\203\275\345\256\211\345\205\250\350\277\220\347\273\264/bayes.py"
@@ -0,0 +1,183 @@
+import logging
+import os
+import signal
+from logging import handlers
+from bayes_opt import BayesianOptimization
+
+from . import benchmark
+from .character import WORKLOAD_TYPE
+from .db_agent import new_db_agent
+from .db_env import DB_Env
+from .knob import load_knobs_from_json_file
+from .recommend import recommend_knobs
+from .recorder import Recorder
+from .utils import YELLOW_FMT
+
+
+def prompt_restart_risks():
+    hint = "WARN: The database may restart several times during tuning, continue or not [yes|no]:"
+    answer = input(YELLOW_FMT.format(hint))
+    while True:
+        if answer.lower() == 'no':
+            print(
+                YELLOW_FMT.format(
+                    "FATAL: Tuning program will exit because you are not currently allowed to interrupt business."
+                )
+            )
+            exit(0)
+        elif answer.lower() == 'yes':
+            return
+        else:
+            answer = input(YELLOW_FMT.format("Please input yes or no:"))
+
+
+def set_logger(filename):
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+
+    dirname = os.path.dirname(filename)
+    if not os.path.exists(dirname):
+        os.makedirs(dirname, mode=0o700)
+
+    stream_hdlr = logging.StreamHandler()
+    stream_hdlr.setLevel(logging.WARNING)
+
+    rota_hdlr = handlers.RotatingFileHandler(filename=filename,
+                                             maxBytes=1024 * 1024 * 100,
+                                             backupCount=5)
+    rota_hdlr.setLevel(logging.INFO)
+    formatter = logging.Formatter('[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
+                                  '%m-%d %H:%M:%S')
+    rota_hdlr.setFormatter(formatter)
+
+    logger.addHandler(stream_hdlr)
+    logger.addHandler(rota_hdlr)
+
+
+def procedure_main(mode, db_info, config):
+    """
+    This is the real entry for tuning programs.
+
+    :param mode: Three modes: tune, train and recommend.
+    :param db_info: Dict data structure. db_info is used to store information about
+                    the database to be connected and is transferred through the command line
+                    or configuration file.
+    :param config: Information read from xtuner.conf.
+    :return: Exit status code.
+    """
+    # Set the minimum permission on the output files.
+    os.umask(0o0077)
+    # Initialize logger.
+    set_logger(config['logfile'])
+    logging.info('Starting... (mode: %s)', mode)
+    db_agent = new_db_agent(db_info)
+
+    # Clarify the scenario:
+    if config['scenario'] in WORKLOAD_TYPE.TYPES:
+        db_agent.metric.set_scenario(config['scenario'])
+    else:
+        config['scenario'] = db_agent.metric.workload_type
+    # Clarify tune strategy:
+    if config['tune_strategy'] == 'auto':
+        # If more iterations are allowed, reinforcement learning is preferred.
+        if config['rl_steps'] * config['max_episode_steps'] > 1500:
+            config['tune_strategy'] = 'rl'
+        else:
+            config['tune_strategy'] = 'gop'
+
+    logging.info("Configurations: %s.", config)
+    # 如果在配置文件中指定了tuning_list配置项，并且非recommend模式，则加载该待调优参数列表的配置文件，否则通过recommend_knobs
+    # 函数智能获取待调优参数列表
+    if config['tuning_list'].strip() != '' and mode != 'recommend':
+        print("You have configured the tuning list, so use this list to tune.")
+        knobs = load_knobs_from_json_file(config['tuning_list'])
+    else:
+        print("Start to recommend knobs. Just a moment, please.")
+        knobs = recommend_knobs(mode, db_agent.metric)
+    if not knobs:
+        logging.fatal('No recommended best_knobs for the database. Stop the execution.')
+        return -1
+
+    # If the recommend mode is not used,
+    # the benchmark running and best_knobs tuning process need to be iterated.
+    if mode != 'recommend':
+        prompt_restart_risks()  # Users need to be informed of risks.
+
+        recorder = Recorder(config['recorder_file'])
+        #分别读取配置文件中的三个配置项：benchmark_script、benchmark_path及benchmark_cmd，通过这三个配置项就可以获取benchmark驱动脚本的实例，
+        #可以用他们来衡量数据库的性能
+        bm = benchmark.get_benchmark_instance(config['benchmark_script'],
+                                              config['benchmark_path'],
+                                              config['benchmark_cmd'],
+                                              db_info)
+        #初始化数据库调优环境实例，该对象封装了迭代过程，保持与强化学习库gym.Env的接口一致
+        env = DB_Env(db_agent, benchmark=bm, recorder=recorder,
+                     drop_cache=config['drop_cache'],
+                     mem_penalty=config['used_mem_penalty_term'])
+        env.set_tuning_knobs(knobs)
+
+        print('The benchmark will start to run iteratively. '
+              'This process may take a long time. Please wait a moment.')
+        if mode == 'tune':
+            # Run once the performance under the default knob configuration.
+            # Its id is 0, aka the first one.
+            original_knobs = db_agent.get_default_normalized_vector()
+            env.step(original_knobs)
+            try:
+                if config['tune_strategy'] == 'gop':
+                    global_search(env, config)
+                else:
+                    raise ValueError('Incorrect tune strategy: %s.' % config['tune_strategy'])
+
+            except KeyboardInterrupt:
+                signal.signal(signal.SIGINT, signal.SIG_IGN)
+                print("Trigger an interrupt via the keyboard. "
+                      "Continue to generate current tuning results.")
+
+            # Rollback/reset to the original/initial knobs while the tuning process is finished.
+            db_agent.set_knob_normalized_vector(original_knobs)
+            # Modify the variable `knobs` with tuned result.
+            recorder.give_best(knobs)
+        else:
+            raise ValueError('Incorrect mode value: %s.' % mode)
+
+    # After the above process is executed, the tuned best_knobs are output.
+    knobs.output_formatted_knobs()
+    if config['output_tuning_result'] != '':
+        with open(config['output_tuning_result'], 'w+') as fp:
+            # In reinforcement learning training mode,
+            # only the training knob list is dumped, but the recommended knob result is not dumped.
+            # This is because, in tune mode of reinforcement learning,
+            # users can directly load the dumped file as the knob tuning list.
+            knobs.dump(fp, dump_report_knobs=mode != 'train')
+    logging.info('X-Tuner is executed and ready to exit. '
+                 'Please refer to the log for details of the execution process.')
+    return 0
+
+
+def global_search(env, config):
+    method = config['gop_algorithm']
+    if method == 'bayes':
+
+        action = [0 for _ in range(env.nb_actions)]
+        pbound = {name: (0, 1) for name in env.db.ordered_knob_list}
+        #定义一个黑盒函数，用于适配第三方库的接口
+        def performance_function(**params):
+            if not len(params) == env.nb_actions:
+                raise AssertionError('Failed to check the input feature dimension.')
+
+            for name, val in params.items():
+                index = env.db.ordered_knob_list.index(name)
+                action[index] = val
+
+            s, r, d, _ = env.step(action)
+            return r  # Wishes to maximize.
+
+        optimizer = BayesianOptimization(
+            f=performance_function,
+            pbounds=pbound
+        )
+        optimizer.maximize(
+            #最大迭代轮次越大结果越精准，但是也更耗时
+            n_iter=config['max_iterations']
+        )
\ No newline at end of file
-- 
Gitee