diff --git a/docs/zh/server/performance/tuning_framework/oeaware/oeaware_user_guide.md b/docs/zh/server/performance/tuning_framework/oeaware/oeaware_user_guide.md index 0ecf0f4bec77cda8b7aa537ad9aa891d9b5e5702..312775c74741257629b5a8e8c7f6be701585115c 100644 --- a/docs/zh/server/performance/tuning_framework/oeaware/oeaware_user_guide.md +++ b/docs/zh/server/performance/tuning_framework/oeaware/oeaware_user_guide.md @@ -24,21 +24,21 @@ yum install oeAware-manager systemctl start oeaware ``` -配置文件 +### 配置文件 -配置文件路径:/etc/oeAware/config.yaml +配置文件路径:`/etc/oeAware/config.yaml`。 ```yaml log_path: /var/log/oeAware #日志存储路径 -log_level: 1 #日志等级 1:DEBUG 2:NFO 3:WARN 4:ERROR +log_level: 1 #日志等级 1:DEBUG 2:INFO 3:WARN 4:ERROR enable_list: #默认使能插件 - - name: libtest.so #只配置插件,使能本插件的所有实例 - - name: libtest1.so #配置插件实例,使能配置的插件实例 - instances: - - instance1 - - instance2 - ... - ... + - name: libtest.so #只配置插件,使能本插件的所有实例 + - name: libtest1.so #配置插件实例,使能配置的插件实例 + instances: + - instance1 + - instance2 + ... + ... plugin_list: #可支持下载的包 - name: test #名称需要唯一,如果重复取第一个配置 description: hello world @@ -54,7 +54,7 @@ systemctl restart oeaware ### 插件说明 -**插件定义**:一个插件对应一个.so文件,插件分为采集插件、感知插件和调优插件。 +**插件定义**:一个插件对应一个`.so`文件,插件分为采集插件、感知插件和调优插件。 **实例定义**:服务中的调度单位是实例,一个插件中包括多个实例。例如,一个采集插件包括多个采集项,每个采集项是一个实例。 @@ -62,7 +62,7 @@ systemctl restart oeaware 服务会默认加载插件存储路径下的插件。 -插件路径:/usr/lib64/oeAware-plugin/ +插件路径:`/usr/lib64/oeAware-plugin/`。 另外也可以通过手动加载的方式加载插件。 @@ -70,7 +70,7 @@ systemctl restart oeaware oeawarectl -l | --load <插件名> ``` -示例 +示例: ```shell [root@localhost ~]# oeawarectl -l libthread_collect.so @@ -85,7 +85,7 @@ Plugin loaded successfully. oeawarectl -r <插件名> | --remove <插件名> ``` -示例 +示例: ```shell [root@localhost ~]# oeawarectl -r libthread_collect.so @@ -103,36 +103,57 @@ oeawarectl -q #查询系统中已经加载的所有插件 oeawarectl --query <插件名> #查询指定插件 ``` -示例 +示例: ```shell -[root@localhost ~]# oeawarectl -q Show plugins and instances status. ------------------------------------------------------------ +libthread_scenario.so + thread_scenario(available, close, count: 0) +libanalysis_oeaware.so + hugepage_analysis(available, close, count: 0) + dynamic_smt_analysis(available, close, count: 0) + smc_d_analysis(available, close, count: 0) + xcall_analysis(available, close, count: 0) + net_hirq_analysis(available, close, count: 0) + numa_analysis(available, close, count: 0) + docker_coordination_burst_analysis(available, close, count: 0) + microarch_tidnocmp_analysis(available, close, count: 0) +libscenario_numa.so + scenario_numa(available, close, count: 12) libsystem_tune.so stealtask_tune(available, close, count: 0) + dynamic_smt_tune(available, close, count: 0) smc_tune(available, close, count: 0) xcall_tune(available, close, count: 0) + transparent_hugepage_tune(available, close, count: 0) seep_tune(available, close, count: 0) -libpmu.so - pmu_counting_collector(available, close, count: 0) - pmu_sampling_collector(available, close, count: 0) - pmu_spe_collector(available, close, count: 0) - pmu_uncore_collector(available, close, count: 0) + preload_tune(available, close, count: 0) + binary_tune(available, close, count: 0) + numa_sched_tune(available, close, count: 0) + net_hard_irq_tune(available, close, count: 0) + multi_net_path_tune(available, close, count: 0) libdocker_tune.so docker_cpu_burst(available, close, count: 0) -libthread_scenario.so - thread_scenario(available, close, count: 0) -libsystem_collector.so - thread_collector(available, close, count: 0) - kernel_config(available, close, count: 0) - command_collector(available, close, count: 0) + docker_burst(available, close, count: 0) + load_based_scheduling_tune(available, close, count: 0) +libpmu.so + pmu_counting_collector(available, close, count: 0) + pmu_sampling_collector(available, close, count: 12) + pmu_spe_collector(available, close, count: 12) + pmu_uncore_collector(available, close, count: 12) libdocker_collector.so docker_collector(available, close, count: 0) +libtune_numa.so + tune_numa_mem_access(available, close, count: 12) libub_tune.so unixbench_tune(available, close, count: 0) -libanalysis_oeaware.so - analysis_aware(available, close, count: 0) +libsystem_collector.so + thread_collector(available, close, count: 0) + kernel_config(available, close, count: 0) + command_collector(available, close, count: 0) + env_info_collector(available, close, count: 0) + net_interface_info(available, close, count: 0) ------------------------------------------------------------ format: [plugin] @@ -144,6 +165,14 @@ enable cnt: number of instances enabled. 失败返回错误说明。 +#### 查询调优实例信息 + +```shell +oeawarectl --info +``` + +显示调优实例描述信息及运行状态。 + #### 查询运行实例订阅关系 ```shell @@ -155,7 +184,7 @@ oeawarectl --query-dep= <插件实例> #查询运行实例订阅关系图 实例未运行,不会显示订阅关系。 -示例 +示例: ```sh oeawarectl -e thread_scenario @@ -229,13 +258,29 @@ oeawarectl -i | --install #指定--list下查询得到的包名称 oeawarectl analysis -h usage: oeawarectl analysis [options]... options - -t|--time set analysis duration in seconds(default 30s), range from 1 to 100. - -r|--realtime show real time report. - -v|--verbose show verbose information. - -h|--help show this help message. + -t|--time set analysis duration in seconds(default 30s), range from 1 to 100. + -r|--realtime show real time report. + -v|--verbose show verbose information. + -h|--help show this help message. + --l1-miss-threshold set l1 tlbmiss threshold. + --l2-miss-threshold set l2 tlbmiss threshold. + --out-path set the path of the analysis report. + --dynamic-smt-threshold set dynamic smt cpu threshold. + --pid set the pid to be analyzed. + --numa-thread-threshold set numa sched thread creation threshold. + --smc-change-rate set smc connections change rate threshold. + --smc-localnet-flow set smc local net flow threshold. + --host-cpu-usage-threshold set host cpu usage threshold. + --docker-cpu-usage-threshold set docker cpu usage threshold. ``` -示例 +--l1-miss-threshold用于设置l1-tlb—miss阈值,超过这个阈值miss率为high。 +--l1-miss-threshold用于设置l1-tlb-miss阈值,超过这个阈值miss率为high。(注意:选项名为l1-miss-threshold,但描述为tlb-miss,请确认为缓存 L1 cache miss 或 L1 TLB miss) + +--l2-miss-threshold用于设置l2-tlb—miss阈值,超过这个阈值miss率为high。 +--l2-miss-threshold用于设置l2-tlb-miss阈值,超过这个阈值miss率为high。(注意:选项名为l2-miss-threshold,但描述为tlb-miss,请确认为缓存 L2 cache miss 或 L2 TLB miss) + +示例: 执行以下命令,输出系统分析报告。 @@ -243,39 +288,11 @@ usage: oeawarectl analysis [options]... oeawarectl analysis -t 10 ``` -结果如下 - -```bash -============================================================================================ - Summary Analysis Report -============================================================================================ - ========================================= Suggest ========================================= - Tune Instance | Suggest | Note - stealtask_tune | No | CpuRatio(average) : 0.17% - smc_tune | No | Collecting very little network access - gazelle | No | Collecting very little network access - tune_numa_mem_access | No | No access - ========================================= Network ========================================= - - Local network communication distribution - Node0 Node1 Node2 Node3 - 0.00% 0.00% 0.00% 0.00% - Remote network communication distribution(receive) - matrix representation of network thread nodes to irq nodes - Node0 Node1 Node2 Node3 - Node0 100.00% 0.00% 0.00% 0.00% - Node1 0.00% 0.00% 0.00% 0.00% - Node2 0.00% 0.00% 0.00% 0.00% - Node3 0.00% 0.00% 0.00% 0.00% - ======================================== Solution ======================================== - No solution. -``` - 报告分为三部分: -- Suggest:根据系统运行状态,给出调优建议。 -- Network:网络使用情况。 -- Solution:给出具体调优方法。 +- Data Analysis:根据系统运行状态,给出系统性能数据分析。 +- Analysis Conclusion:给出系统分析结论。 +- Analysis Suggestion:给出具体调优方法。 ### 帮助 @@ -284,6 +301,7 @@ oeawarectl analysis -t 10 ```shell usage: oeawarectl [options]... options + analysis run analysis mode. -l|--load [plugin] load plugin. -r|--remove [plugin] remove plugin from system. -e|--enable [instance] enable the plugin instance. @@ -293,7 +311,9 @@ usage: oeawarectl [options]... -Q query all instances dependencies. --query-dep [instance] query the instance dependency. --list the list of supported plugins. + --info the list of InfoCmd plugins. -i|--install [plugin] install plugin from the list. + --reload-conf reload config file(now only support log level). --help show this help message. ``` @@ -361,17 +381,17 @@ protected: | 属性 | 类型 | 说明 | | --- | --- | --- | | name | string | 实例名称 | -| version | string | 实例版本(预留) | +| version | string | 实例版本(预留) | | description | string | 实例描述 | | supportTopics | vector\ | 支持的topic | -| priority | int | 实例执行的优先级 (调优 > 感知 > 采集)| +| priority | int | 实例执行的优先级(调优 > 感知 > 采集)| | type | int | 实例类型,通过比特位标识,第二位表示单次执行实例,第三位表示采集实例,第四位表示感知实例,第5位表示调优实例| -| period | int | 实例执行周期,单位ms,period为10的倍数 | +| period | int | 实例执行周期,单位ms,period为10的倍数 | ### 接口说明 | 函数名 | 参数 | 返回值 | 说明 | -| --- | --- | --- | --- | +| --- | --- | --- | --- | |Result OpenTopic(const Topic &topic) | topic:打开的主题 | | 打开对应的topic | | void CloseTopic(const Topic &topic) | topic:关闭的主题| |关闭对应的topic | | void UpdateData(const DataList &dataList) | dataList:订阅的数据 | | 当订阅topic时,被订阅的topic每周期会通过UpdateData更新数据 | @@ -409,18 +429,20 @@ public: } } oeaware::Result Enable(const std::string ¶m = "") override { + // 注意:Subscribe/Publish/SetDataListTopic 等函数并非Interface基类的一部分, + // 它们可能是由框架提供的全局函数或需要继承另一个基类。文档中应予以说明。 Subscribe(oeaware::Topic{"thread_collector", "thread_collector", ""}); return oeaware::Result(OK); } void Disable() override { - + } void Run() override { DataList dataList; oeaware::SetDataListTopic(&dataList, "test", "test", ""); dataList.len = 1; dataList.data = new void* [1]; - dataList.data[0] = &pubData; + dataList.data[0] = &pubData; Publish(dataList); } private: @@ -439,8 +461,8 @@ extern "C" void GetInstance(std::vector> &in | 实例名称 | 架构 | 说明 | topic | | --- | --- | --- | --- | -| pmu_counting_collector | aarch64 | 采集count相关事件 |cycles,net:netif_rx,L1-dcache-load-misses,L1-dcache-loads,L1-icache-load-misses,L1-icache-loads,branch-load-misses,branch-loads,dTLB-load-misses,dTLB-loads,iTLB-load-misses,iTLB-loads,cache-references,cache-misses,l2d_tlb_refill,l2d_cache_refill,l1d_tlb_refill,l1d_cache_refill,inst_retired,instructions | -| pmu_sampling_collector | aarch64 | 采集sample相关事件 | cycles, skb:skb_copy_datagram_iovec,net:napi_gro_receive_entry | +| pmu_counting_collector | aarch64 | 采集count相关事件 |cycles,net:netif_rx,L1-dcache-load-misses,L1-dcache-loads,L1-icache-load-misses,L1-icache-loads,branch-load-misses,branch-loads,dTLB-load-misses,dTLB-loads,iTLB-load-misses,iTLB-loads,cache-references,cache-misses,l2d_tlb_refill,l2d_cache_refill,l1d_tlb_refill,l1d_cache_refill,l1d_tlb,l1i_tlb,l1i_tlb_refill,l2d_tlb,l2i_tlb,l2i_tlb_refill,inst_retired,instructions,sched:sched_process_fork,sched:sched_process_exit | +| pmu_sampling_collector | aarch64 | 采集sample相关事件 | cycles,skb:skb_copy_datagram_iovec,net:napi_gro_receive_entry | | pmu_spe_collector | aarch64 | 采集spe事件 | spe | | pmu_uncore_collector | aarch64 | 采集uncore事件 | uncore | @@ -448,7 +470,7 @@ extern "C" void GetInstance(std::vector> &in 采集spe事件需要依赖硬件能力,此插件运行依赖 BIOS 的 SPE,运行前需要将 SPE 打开。 -运行perf list | grep arm_spe查看是否已经开启SPE,如果开启,则有如下显示 +运行perf list | grep arm_spe查看是否已经开启SPE,如果开启,则有如下显示: ```sh arm_spe_0// [Kernel PMU event] @@ -456,15 +478,15 @@ arm_spe_0// [Kernel PMU event] 如果没有开启,则按下述步骤开启。 -检查BIOS配置项 MISC Config --> SPE 的状态, 如果状态为 Disable,则需要更改为 Enable。如果找不到这个选项,可能是BIOS版本过低。 +检查BIOS配置项 MISC Config --> SPE 的状态,如果状态为 Disable,则需要更改为 Enable。如果找不到这个选项,可能是BIOS版本过低。 -进入系统 vim /etc/grub2-efi.cfg,定位到内核版本对应的开机启动项,在末尾增加“kpti=off”。例如: +进入系统`vim /boot/efi/EFI/openEuler/grub.cfg`,定位到内核版本对应的开机启动项,在末尾增加`kpti=off`。例如: ```sh linux /vmlinuz-4.19.90-2003.4.0.0036.oe1.aarch64 root=/dev/mapper/openeuler-root ro rd.lvm.lv=openeuler/root rd.lvm.lv=openeuler/swap video=VGA-1:640x480-32@60me rhgb quiet smmu.bypassdev=0x1000:0x17 smmu.bypassdev=0x1000:0x15 crashkernel=1024M,high video=efifb:off video=VGA-1:640x480-32@60me kpti=off ``` -按“ESC”,输入“:wq”,按“Enter”保存并退出。执行reboot命令重启服务器。 +按**ESC**,输入“:wq”,按**Enter**保存并退出。执行reboot命令重启服务器。 ### libsystem_collector.so @@ -508,6 +530,7 @@ fsdisk | 实例名称 | 架构 | 说明 | 订阅 | | --- | --- | --- | --- | | analysis_aware | 分析当前环境的业务特征,并给出优化建议 | aarch64 | pmu_spe_collector::spe, pmu_counting_collector::net:netif_rx, pmu_sampling_collector::cycles, pmu_sampling_collector::skb:skb_copy_datagram_iovec, pmu_sampling_collector::net:napi_gro_receive_entry | +| analysis_aware | aarch64 | 分析当前环境的业务特征,并给出优化建议 | pmu_spe_collector::spe, pmu_counting_collector::net:netif_rx, pmu_sampling_collector::cycles, pmu_sampling_collector::skb:skb_copy_datagram_iovec, pmu_sampling_collector::net:napi_gro_receive_entry | ### libsystem_tune.so @@ -519,10 +542,18 @@ fsdisk | smc_tune | aarch64 | 使能smc加速,对使用tcp协议的连接无感加速 | 无 | | xcall_tune | aarch64 | 通过减少系统调用底噪,提升系统性能 | thread_collector::thread_collector | | seep_tune | aarch64 | 使能智能功耗模式,降低系统能耗 | 无 | +| transparent_hugepage_tune | aarch64/x86 | 开启透明大页,降低tlbmiss | 无 | +| preload_tune | aarch64 | 无感加载动态库 | 无 | +| binary_tune | aarch64 | 将容器中运行的特殊二进制文件绑定到物理核心,通过解析ELF文件中的特殊段识别需要调优的程序,并根据配置进行CPU亲和性绑定,提升程序性能 | env_info::static, env_info::realtime, thread_collector::thread_collector, docker_collector::docker_collector | +| cluster_tune | aarch64 | 启用CPU cluster调度来优化性能 | 无 | +| dynamic_smt_tune | aarch64 | 低负载场景优先分配物理核,减少超线程的核间干扰 | 无 | +| numa_sched_tune | aarch64 | 针对有numa瓶颈的场景,让线程在整个生命周期尽可能在同numa内调度 | 无 | +| hardirq_tune | aarch64 | 将网卡队列对应的中断尽量和使用该中断的业务绑定在相同numa上,减少跨numa访问 | 无 | +| multi_net_path | aarch64 | 网卡多路径调优,每个中断只处理所在numa上的业务 | 无 | #### 配置文件 -xcall.yaml +##### xcall.yaml ``` yaml redis: # 线程名称 @@ -533,9 +564,18 @@ node: - xcall_1: 1 ``` -#### 限制条件 +**限制说明**:xcall_tune依赖内核特性,需要开启FAST_SYSCALL编译内核,并且在cmdline里增加xcall字段。 -xcall_tune依赖内核特性,需要开启FAST_SYSCALL编译内核,并且增加在cmdline里增加xcall字段。 +##### preload.yaml + +路径:`/etc/oeAware/preload.yaml` + +```yaml +- appname: "" + so: "" +``` + +通过执行`oeawarectl -e preload_tune`命令,根据配置文件给对应进程加载so。 ### libub_tune.so @@ -550,10 +590,13 @@ unixbench调优插件。 | 实例名称 | 架构 | 说明 | 订阅 | | --- | --- | --- | --- | | docker_cpu_burst | aarch64 | 在出现突发负载时,CPUBurst可以为容器临时提供额外的CPU资源,缓解CPU限制带来的性能瓶颈 | pmu_counting_collector::cycles,docker_collector::docker_collector | +| docker_coordination_burst_tune | aarch64 | 感知多容器的CPU配额,划分空闲CPU算力给算力不足的容器 | 无 | +| load_based_scheduling_tune | aarch64 | 针对超过负载超过阈值的容器,自动使能潮汐调度,使资源在容器间更均匀 | docker_collector::docker_collector, env_info_collector::static, pmu_sampling_collector::cycles | +| docker_cluster_affinity | aarch64 | 在系统存在cluster架构是,容器感知cluster架构进行调度,并感知多容器间CPU负载,在容器与容器之间进行调整quota资源(针对多容器资源负载不均衡场景) | l3c_hit, docker_collector::docker_collector | ## 外部插件 -外部插件需要通过以下命令安装,例如安装numafast相关插件 +外部插件需要通过以下命令安装,例如安装numafast相关插件。 ```sh oeawarectl -i numafast @@ -571,6 +614,76 @@ oeawarectl -i numafast | --- | --- | --- | --- | | tune_numa_mem_access | aarch64 | 周期性迁移线程和内存,减少跨NUMA内存访问 | scenario_numa::system_score, pmu_spe_collector::spe, pmu_counting_collector::cycles | +#### tune_numa_mem_access使用说明 + +tune_numa_mem_access可以通过 `--help`命令查看所有的参数及其作用 + +```shell +[root@localhost ~]# oeawarectl -e tune_numa_mem_access -cmd "--help cmd" +Instance enabled failed, because Invalid parameter: --help cmd, please check. +/etc/numafast.yaml parse success. opt not found +Usage: oeaware -e tune_numa_mem_access --cmd "[options][]" + or vim /etc/numafast.yaml and set options + attr:c => support conf by cmdline, y => support conf by yaml, r => support reload yaml online +Options: + -i, --sampling-interval attr:cy, every sampling interval n msec, range is [100, 100000], default is 100 + -t, --sampling-times attr:cy, every optimizing have n times sampling, range is [1, 1000] default is 10 + -m, --tune-mode attr:cy, tune mode, mode can be [b, t, p], default is b + b: migrate page and thread + t: migrate thread only + p: migrate page only + -w, --load-way attr:cy, load way, can be [b, c], default is b + b: balance the load of threads on all numa nodes + c: centralize processes to fewer numas based on load + --smt attr:cy, smt mode, can be [off, phy-first, load-first], default is phy-first + off: disable smt + phy-first: migrate threads to physical cores first, may limit load + load-first: migrate threads to physical cores based on load, limit load + -h, --help attr:c, show help info, type can be [cmd, yaml], default is cmd + -v, --version attr:c, show version info + -W, --whitelist + attr:cy, only migrate process in the list, regexp list split by comma, if not set, migrate all process. + -b, --blacklist + attr:cy, do not migrate process in the list, regexp list split by comma, priority higher than whitelist. + --precise-load attr:cy, load control precisely + --mem-numa-aggregation attr:cy, process memory aggregate by numa + other options refer to /etc/numafast.yaml + +[root@localhost format]# oeawarectl -e tune_numa_mem_access -cmd "--help yaml" +Instance enabled failed, because show help message: +Usage: vim /etc/numafast.yaml and set options +sampling-interval: # every sampling interval n msec, range is [100, 100000], default is 100 +sampling-times: # every optimizing have n times sampling, range is [1, 1000] default is 10 +tune-mode: # tune mode, mode can be [b, t, p], default is b + # b: migrate page and thread + # t: migrate thread only + # p: migrate page only +load-way: # load way, can be [b, c], default is b + # b: balance the load of threads on all numa nodes + # c: centralize processes to fewer numas based on load +smt: # smt mode, can be [off, phy-first, load-first], default is phy-first + # off: disable smt + # phy-first: migrate threads to physical cores first, may limit load + # load-first: migrate threads to physical cores based on load, limit load +whitelist: [] # only migrate process in the list, regexp list split by comma, if not set, migrate all process. +group: # process affinity group + # - [process1, process2, ...] +min-numa-score: # min numa score, range is [0 ,1000], default is 955 +max-numa-score: # max numa score, range is [0, 1000], default is 975 +min-rx-ops-per-ms: # min rx ops per ms, default is 10000 +numa-ratio: [] # process initial load distribution for each node +page-reserve: # page reserve, range is [0, 4294967295], default is 100000 +precise-load: # load control precisely +mem-numa-aggregation: # process memory aggregate by numa +process: # process config + # - name: process1 # process name, /proc/pid/comm + # params-regex: "" # process params regex, /proc/pid/cmdline + # algorithm: "" # process algorithm, support [MigrateThreadsToOneNode, BalanceProcNum] + # migrate-all-memory: "" # migrate all memory, support [true, false] + # default-mig-mem-node: "" # default migrate memory node, support [0, numa_node_num - 1] + # net-affinity: "" # process net affinity, set net interface name +``` + ## SDK使用说明 ```C