diff --git a/tools/Boostkit_ISV_Tool.md b/tools/Boostkit_ISV_Tool.md index 13a1781016c893ecffe370c7a6e235d056656c6b..99cacf093c664e7c6eadddccf72d1c94e57b1871 100644 --- a/tools/Boostkit_ISV_Tool.md +++ b/tools/Boostkit_ISV_Tool.md @@ -14,6 +14,7 @@ - **注意**:只需修改要采集的特性相关的配置,其余配置保持不变即可 - **注意**:采集过程中会尝试采集对应解决方案下的所有特性,因此对未使能特性的采集出现报错是正常现象,不影响最终结果 +- 新版工具同时支持对毕昇JDK、HPCkit特性的信息收集 ### **1.2采集工具配置文件说明** @@ -72,6 +73,18 @@ spark的安装路径,默认为/home/ +> hive_bin_path=hive + +hive可执行路径 + + + +> hive_local_mode=0 + +是否使用 local 模式启动 hive + + + > omnioperator_dir=/opt/omni-operator 算子加速的安装路径,默认为/opt/omni-operator @@ -102,7 +115,7 @@ spark的安装路径,默认为/home/ > omnimv_dir=/omnimv -omnimv 目录路径, 用于物化视图特性验证 +omnimv 目录路径, 用于spark场景下的物化视图特性验证 > omnidata_launcher_server= @@ -140,6 +153,14 @@ shuffle加速相关jar包路径,用于shuffle加速特性验证 shuffle加速 ock 安装路径,用于shuffle加速特性验证 +> omnishield_jar=/home/omnishield/omnishield-1.0-SNAPSHOT.jar + +omnishield相关的jar包路径,用于机密大数据特性验证 + +> doris_install_dir=/opt/tools/installed/doris-2.1.2-rc04 + +doris的安装目录,用于Doris指令优化特性验证 + #### **1.2.2分布式存储配置文件说明** ![bigdata](./BoostKit_pic/storage.png) @@ -178,13 +199,28 @@ ceph 配置文件路径,用于“压缩算法”特性验收 > non_ceph_bin=/tmp/non_ceph.bin -非ceph场景下的软件二进制路径,用于“存储加速算法库”特性验收 +非ceph场景下的软件二进制路径,用于“存储加速算法库(KSAL)”特性验收 > non_ceph_pid=38799 -非ceph场景下的软件进程id,用于“存储加速算法库”特性验收 +非ceph场景下的软件进程id,用于“存储加速算法库(KSAL)”特性验收 +> nvmf_tgt_bin=/tmp/nvmf_tgt +nvmf_tgt 二进制路径,用于“KAE使能SPDK“中的crc特性验收 + +> rdma_ceph_conf=/etc/ceph/ceph.conf + +ceph14 请填写ceph配置文件路径,ceph17请填写osd配置文件路径,比如 /var/lib/ceph/{ceph cluster id}/osd.0/config. +用于“RDMA网络加速“特性验收 + +> ceph_bin=ceph + +ceph 二进制路径,用于“RDMA网络加速”特性验收 + +> boostio_bin=/tmp/bio_test + +集成BOOSTIO的软件可执行文件路径。boostio集成部署时,需要填写该字段,用于“BoostIO”特性验收 #### **1.2.3数据库配置文件说明** @@ -257,7 +293,7 @@ check 默认为False, 置为True 则开启ARM原生特性使能信息收集功 > instuction_container=kbox_3 -分别为待采集的kbox、视频流、指令流容器名称。要求3个容器均存在,否则会退出脚本。 +分别为待采集的kbox、视频流、指令流容器名称。 @@ -293,33 +329,29 @@ check 默认为False, 置为True 则开启加速库特性使能信息收集功 引用数学库的字节码文件位置 +> crc32_path=/home/test.bin -#### **1.2.6虚拟化配置文件说明** - -![bigdata](./BoostKit_pic/virtual.png) - - +使用到了“CRC32指令优化”的二进制或动态库的路径,用于“CRC32指令优化”特性验收 -注意: +> zstd_bin=/home/test.bin -1.OVS流表网卡加速特性需再host测执行,配置文件无需修改 +使用了 zstd压缩算法的二进制文件路径,用于“Zstd压缩算法”特性验收 -2.虚拟化DPU卸载 特性需要再DPU测执行采集工具, 配置文件无需修改 +> lz4_bin=/home/test.bin -3.OVS流表归一化 特性验证前需准备环境,让物理机B上的虚拟机持续ping物理机A上的虚拟机,然后在物理机A上运行采集工具 -该特性默认支持验收, 配置文件无需修改 +使用了 lz4压缩算法的二进制文件路径,用于“Lz4压缩算法”特性验收 -OpenStack 验证需要以下信息 +#### **1.2.6虚拟化配置文件说明** -> server_name=vm1 +![bigdata](./BoostKit_pic/virtual.png) -> network=port1_vdpa01 -> flavor=4U4G80G -> volume=ceph1_centos2 +注意: -> availability_zone=nova:dpu01 +1.OVS流表网卡加速特性需再host测执行,配置文件无需修改 +2.K8S NUMA自适应插件, K8S MPAM插件 验收时配置文件无需修改 +3.virsh console 命令同时只能有一个用户在线上,采集前需让已登录的用户退出 > vm_ip=71.14.48.104 @@ -341,6 +373,67 @@ OpenStack 验证需要以下信息 虚拟机名称, 用于验收高性能云盘优化特性 +> schedule_vm_name=vm1 + +虚拟机名称,用于验收虚拟化调度优化特性 + +> schedule_vm_user=root + +虚拟机用户名,用于验收虚拟化调度优化特性 + +> schedule_vm_password=password + +虚拟机密码,用于验收虚拟化调度优化特性 + +> hot_swap_vm_name=vm1 + +虚拟机名称,用于验收热插拔特性 + +> hot_swap_vm_user=root + +虚拟机用户名,用于验收热插拔特性 + +> hot_swap_vm_password=password + +虚拟机密码,用于验收热插拔特性 + +> hot_swap_cpu_count=4 + +热插拔cpu测试时,设置的 cpu 数量,用于验收热插拔特性 + +> hot_swap_mem_xml_path=/tmp/mem.xml + +用于热插拔内存测试的xml,用于验收热插拔特性。 +例: 将 NUMA node 3 的 内存设置为 1024M,对应的 xml 内容示例 +```xml + + + 1024 + 3 + + +``` + +> kae_live_migrate_vm_name=vm_name + +虚拟机名称,用于KAE加速热迁移特性验收 + +> hardware_acc_vm_name=vm1 + +虚拟机名称,用于验收虚拟化硬件辅助加速特性 + +> hardware_acc_vm_user=root + +虚拟机用户名,用于验收虚拟化硬件辅助加速特性 + +> hardware_acc_vm_password=password + +虚拟机密码,用于验收虚拟化硬件辅助加速特性 + +> sr_iov_pod=test_pod + +pod 名称,用于验收K8s SR-IOV直通插件特性 + #### **1.2.7 机密计算配置文件说明** ![bigdata](./BoostKit_pic/virt.png) @@ -375,32 +468,19 @@ check 默认为False, 置为True 则开启数据库相关特性使能信息收 check 默认为False, 置为True 则开启HPC相关特性使能信息收集功能 -> acc_lib +> common_acc_lib_bin=/tmp/binary -引用HPC加速库改造二进制文件绝对路径 +引用HPC通用加速库改造的二进制文件绝对路径, 多个路径用空格分隔 -> sme +> domain_acc_lib_bin=/tmp/binary -引用HPC SME改造二进制文件绝对路径 +引用HPC领域加速库改造的二进制文件绝对路径, 多个路径用空格分隔 -> sve_source_code - -填写有用到 sve 的源码目录或文件的绝对路径 ### 1.3 采集工具使用说明 -#### 1.3.1 二进制使用 - ``` -./collect_msg +bash collect_msg.sh # 同级目录下具有config.ini 配置文件,且已按1.2中对应解决方案完成相关参数配置 -``` - -#### 1.3.2 shell 脚本使用 - -``` -bash collect_msg.sh -# 同级目录下具有config.ini 配置文件, 且已按1.2中对应解决方案完成相关参数配置 -``` - +``` \ No newline at end of file diff --git a/tools/collect_msg.sh b/tools/collect_msg.sh index 113d2d8a1cb5211f1b8ccdf807d2d02c149c919c..480f1ae9961cefd66195693a125605d508b48d37 100644 --- a/tools/collect_msg.sh +++ b/tools/collect_msg.sh @@ -67,11 +67,7 @@ collect_arm_native(){ kbox_container=$1 video_container=$2 instruction_container=$3 - # check tested container whether exist - containers=($1 $2 $3) - for i in ${containers[@]};do - docker ps -a | grep -wq $i || { echo "the $i container doesn't found,please check!"; exit 1; } - done + # kbox基础云手机 rm -f $log_path/arm_native.log @@ -91,58 +87,9 @@ collect_arm_native(){ #################虚拟化特性信息收集################## -collect_virtual_host(){ - sudo systemctl status waasagent.service |grep "Active" > $log_path/virtual_sense.log - waasctl --version >> $log_path/virtual_sense.log - - ovs_appctl_res=$(ovs-appctl --version 2>&1) - - if [[ $ovs_appctl_res =~ "command not found" ]]; - then - echo "ovs-appctl: command not found" > $log_path/virtual_dpu_flow.log - else - echo "ovs-appctl version: $ovs_appctl_res" > $log_path/virtual_dpu_flow.log - script -a -c 'ovs-appctl hwoff/dump-hwoff-flows' $log_path/virtual_dpu_flow.log - fi -} - - -collect_virtual_dpu(){ - server_name=$1 - network=$2 - flavor=$3 - volume=$4 - availability_zone=$5 - - # 需要再DPU测执行 - dpak_ovs_ctl_res=$(dpak-ovs-ctl -h 2>&1) - if [[ $dpak_ovs_ctl_res =~ "command not found" ]]; - then - echo "请确定已在DPU测执行该工具" - echo "dpak_ovs_ctl: command not found" > $log_path/virtual_dpu.log - else - echo "dpak_ovs_ctl version: $dpak_ovs_ctl_res" > $log_path/virtual_dpu.log - script -a -c 'dpak-ovs-ctl hwoff/dump-hwoff-flows' $log_path/virtual_dpu.log - fi - - /usr/libexec/spdk/scripts/hw_dpu_rpc.py get_version >> $log_path/virtual_dpu.log 2>&1 - /usr/libexec/spdk/scripts/hw_dpu_rpc.py get_controllers >> $log_path/virtual_dpu.log 2>&1 - - # 创建虚机 - openstack_res=$(openstack --version 2>&1) - - if [[ $openstack_res =~ "command not found" ]]; - then - echo "请确定已在DPU测执行该工具" - echo "openstack: command not found" >> $log_path/virtual_dpu.log - else - echo "openstack version: $openstack_res" >> $log_path/virtual_dpu.log - openstack server create $server_name --network $network --flavor $flavor --volume $volume --availability-zone $availability_zone >> $log_path/virtual_dpu.log - echo "等待虚机创建完成" - sleep 120 - echo "server_name: $server_name" >> $log_path/virtual_dpu.log - openstack server list >> $log_path/virtual_dpu.log - fi +collect_virtual_ovs_xpf(){ + ovs-appctl hwoff/dump-hwoff-flows > $log_path/virtual_ovs_xpf.log + echo $? >> $log_path/virtual_ovs_xpf.log } @@ -160,9 +107,146 @@ collect_virtual_ceph(){ } -collect_virtual_ovs_xpf(){ - ovs-appctl hwoff/dump-hwoff-flows > $log_path/virtual_ovs_xpf.log - echo $? >> $log_path/virtual_ovs_xpf.log +collect_virtual_schedule(){ + vm_name=$1 + vm_user=$2 + vm_password=$3 + expect << EOF > $log_path/virtual_schedule.log + spawn virsh console $vm_name + expect "Escape character is \\^]" + send "\r" + expect "login:" + send {$vm_user} + send "\r" + expect "Password:" + send {$vm_password} + send "\r" + expect "# " + send "numactl -H | grep available --color=never \r" + expect "# " + send "cat /proc/sys/kernel/sched_cluster \r" + expect "# " + send "dmesg | grep PV --color=never \r" + expect "# " + send "dmesg | grep 'NMI watchdog' --color=never \r" + expect "# " + send "lscpu \r" + expect "# " + send "exit\r" + expect eof +EOF + + virsh dumpxml $vm_name | grep memnode > $log_path/virtual_schedule_xml.log + virsh dumpxml $vm_name | grep cacheinfo >> $log_path/virtual_schedule_xml.log +} + + +collect_virtual_hot_swap(){ + hot_swap_vm_name=$1 + hot_swap_vm_user=$2 + hot_swap_vm_password=$3 + hot_swap_cpu_count=$4 + hot_swap_mem_xml_path=$5 + + echo $hot_swap_cpu_count > $log_path/virtual_hot_swap_vcpu.log + if virsh setvcpus $hot_swap_vm_name --count $hot_swap_cpu_count --live; then + expect << EOF >> $log_path/virtual_hot_swap_vcpu.log + spawn virsh console $vm_name + expect "Escape character is \\^]" + send "\r" + expect "login:" + send {$vm_user} + send "\r" + expect "Password:" + send {$vm_password} + send "\r" + expect "# " + send "lscpu | grep 'On-line CPU' --color=never \r" + expect "# " + send "exit\r" + expect eof +EOF + fi + + cp $hot_swap_mem_xml_path $log_path/virtual_hot_swap_mem_xml.log + expect << EOF > $log_path/virtual_hot_swap_mem.log + spawn virsh console $vm_name + expect "Escape character is \\^]" + send "\r" + expect "login:" + send {$vm_user} + send "\r" + expect "Password:" + send {$vm_password} + send "\r" + expect "# " + send "numactl -H \r" + expect "# " + send "exit\r" + expect eof +EOF +} + + +collect_virtual_live_migrate(){ + kae_live_migrate_vm_name=$1 + virsh dumpxml $kae_live_migrate_vm_name > $log_path/virtual_live_migrate_xml.log + virsh start $kae_live_migrate_vm_name + nohup timeout 20 watch -gt -n 0.2 cat /sys/class/uacce/hisi_zip-*/available_instances > $log_path/virtual_live_migrate.log & +} + + +collect_virtual_hardware_acc(){ + vm_name=$1 + vm_user=$2 + vm_password=$3 + dmesg | grep GIC > $log_path/virtual_hardware_acc.log + expect << EOF >> $log_path/virtual_hardware_acc.log + spawn virsh console $vm_name + expect "Escape character is \\^]" + send "\r" + expect "login:" + send {$vm_user} + send "\r" + expect "Password:" + send {$vm_password} + send "\r" + expect "# " + send "dmesg | grep SGI --color=never \r" + expect "# " + send "exit\r" + expect eof +EOF +} + + +collect_k8s_numa(){ + kubectl get pod | grep numaadj- > $log_path/virtual_k8s_numa.log +} + + +collect_virtual_sr_iov(){ + pod=$1 + kubectl -n kube-system get pod | grep kube-sriov-device-plugin- > $log_path/virtual_sr_iov.log + kubectl exec -it $pod ls /dev >> $log_path/virtual_sr_iov.log +} + + +collect_k8s_mpam(){ + kubectl get pod | grep mpam-controller-daemon-agent- > $log_path/virtual_k8s_mpam.log +} + + +collect_virtual_host(){ + ovs_appctl_res=$(ovs-appctl --version 2>&1) + + if [[ $ovs_appctl_res =~ "command not found" ]]; + then + echo "ovs-appctl: command not found" > $log_path/virtual_dpu_flow.log + else + echo "ovs-appctl version: $ovs_appctl_res" > $log_path/virtual_dpu_flow.log + script -a -c 'ovs-appctl hwoff/dump-hwoff-flows' $log_path/virtual_dpu_flow.log + fi } @@ -216,10 +300,10 @@ collect_database_other_db(){ greenplum_port=$3 kae_version=$4 greenplum_kae_sql=$5 - readelf -a $other_other_db_bin | grep bolt > $log_path/database_other_db.log - objdump -d $other_other_db_bin | grep crc32cb >> $log_path/database_other_db.log + readelf -a $other_db_bin | grep bolt > $log_path/database_other_db.log + objdump -d $other_db_bin | grep crc32cb >> $log_path/database_other_db.log if [ "$kae_version" == "1.0" ]; then - nohup timout 20 watch -gt -n 0.2 cat /sys/class/uacce/hisi_zip*/attrs/available_instances > $log_path/database_greenplum_kae.log & + nohup timeout 20 watch -gt -n 0.2 cat /sys/class/uacce/hisi_zip*/attrs/available_instances > $log_path/database_greenplum_kae.log & else nohup timeout 20 watch -gt -n 0.2 cat /sys/class/uacce/hisi_zip*/available_instances > $log_path/database_greenplum_kae.log & fi @@ -259,9 +343,11 @@ collect_virtcca_msg(){ expect "Escape character is \\^]" send "\r" expect "login:" - send "$username\r" + send {$username} + send "\r" expect "Password:" - send "$passwd\r" + send {$passwd} + send "\r" expect "# " send "ls -l /\r" expect "# " @@ -288,6 +374,10 @@ collect_acceleration_library(){ math_jar=$4 math_java=$5 openssl speed -elapsed -engine kae rsa2048 > $log_path/acceleration_library.log 2>&1 + lspci | grep ZIP >> $log_path/acceleration_library.log + ls -l /usr/local/kaezip/lib/ >> $log_path/acceleration_library.log + ls -l /usr/local/kaezstd/lib/ >> $log_path/acceleration_library.log + ls -l /usr/local/kaelz4/lib/ >> $log_path/acceleration_library.log ldd $1 >> $log_path/acceleration_library.log ldd $2 >> $log_path/acceleration_library.log ldd $3 >> $log_path/acceleration_library.log @@ -296,6 +386,25 @@ collect_acceleration_library(){ } +collect_acceleration_library_crc32(){ + crc32_path=$1 + objdump -d $crc32_path | grep crc32cb > $log_path/acceleration_library_crc32.log +} + + +collect_acceleration_library_kail_dnn(){ + env | grep LD_LIBRARY_PATH | grep '/usr/local/kail/lib' > $log_path/acceleration_library_kail_dnn.log +} + + +collect_acceleration_library_compress(){ + zstd_bin=$1 + lz4_bin=$2 + ldd $zstd_bin > $log_path/acceleration_library_compress.log + ldd $lz4_bin | grep liblz4.so | awk '{print $3}' | xargs -I {} nm -D {} >> $log_path/acceleration_library_compress.log + ldd gzip >> $log_path/acceleration_library_compress.log +} + ###############分布式存储特性信息收集############### # $1 ec_pool 名字 collect_storage_acc(){ @@ -331,17 +440,45 @@ collect_storage_acc(){ systemctl status ceph.target > $log_path/storage_comporess.log \cp "$ceph_conf".bak "$ceph_conf" fi + + ceph daemon osd.0 config show | grep compaction_enabled > $log_path/storage_compaction.log + ceph daemon osd.0 config show | grep compaction_opt_space_enabled >> $log_path/storage_compaction.log + ldd $storage_maintain_bin > $log_path/storage_maintain_tool.log lib_rocksdb=$(ldd $rocksdb_bin | grep librocksdb | awk '{print $3}') ldd $lib_rocksdb > $log_path/storage_rocksdb.log ldd $ucache_bin > $log_path/storage_ucache.log + ceph -s > $log_path/storage_spdk_io.log + cephadm shell sh /var/lib/ceph/spdk_lib/scripts/setup.sh status | grep NVMe >> $log_path/storage_spdk_io.log + if ldd $non_ceph_bin | grep ksal; then timeout 20 perf top -p $non_ceph_pid > $log_path/storage_non_ceph_perf_top.log fi } +collect_storage_spdk(){ + nvmf_tgt_bin=$1 + ls /usr/local/lib/engines-*/ > $log_path/storage_kae.log + ldd $nvmf_tgt_bin > $log_path/storage_spdk_crc.log +} + + +collect_storage_rdma(){ + rdma_ceph_conf=$1 + ceph_bin=$2 + cat $rdma_ceph_conf > $log_path/storage_rdma.log + $ceph_bin -s >> $log_path/storage_rdma.log +} + + +collect_storage_boostio(){ + boostio_bin=$1 + ps -ef | grep bio > $log_path/storage_boostio.log + ldd $boostio_bin >> $log_path/storage_boostio.log +} + ###############大数据特性信息收集################## collect_bigdata_kal(){ algotithm_list=$1 @@ -507,7 +644,7 @@ collect_bigdata_kal(){ } -collect_bigdata_operator(){ +collect_bigdata_operator_spark(){ # 日志记录位置 log_path/bigdata_operator.log spark_path=$1 database=$2 @@ -581,8 +718,42 @@ collect_bigdata_operator(){ } +collect_bigdata_operator_hive(){ + hive_bin_path=$1 + hive_local_mode=$2 + omnioperator_dir=$3 + + extra_options='' + if [ "$hive_local_mode" == "1" ]; then + extra_options='--hiveconf tez.local.mode=true --hiveconf tez.runtime.optimize.local.fetch=true' + fi + OMNI_HOME=${omnioperator_dir} HIVE_AUX_JARS_PATH=${omnioperator_dir}/lib LD_LIBRARY_PATH=${omnioperator_dir}/lib:$LD_LIBRARY_PATH \ + hive --database default --hiveconf tez.task.launch.env=OMNI_HOME=${omnioperator_dir},LD_LIBRARY_PATH=${omnioperator_dir}/lib --hiveconf tez.local.mode=true --hiveconf tez.runtime.optimize.local.fetch=true --hiveconf hive.cbo.enable=true --hiveconf hive.exec.reducers.max=600 --hiveconf hive.exec.compress.intermediate=true --hiveconf hive.tez.container.size=8192 --hiveconf tez.am.resource.memory.mb=8192 --hiveconf tez.task.resource.memory.mb=8192 --hiveconf tez.runtime.io.sort.mb=128 --hiveconf hive.merge.tezfiles=true --hiveconf tez.am.container.reuse.enabled=true --hiveconf hive.exec.pre.hooks=com.huawei.boostkit.hive.OmniExecuteWithHookContext --hiveconf hive.vectorized.execution.enabled=false --hiveconf hive.fetch.task.conversion=none $extra_options > $log_path/bigdata_operator.log << EOF + drop table if exists test_omni_hive; + create table if not exists test_omni_hive(id int, name string, age int) stored as orc; + insert into test_omni_hive values(1, 'alice', 22),(2, 'bob', 23),(3, 'amy', 24); + explain select * from test_omni_hive order by id; + select * from test_omni_hive order by id; + exit; +EOF +} + + collect_bigdata_hbase(){ + hbase shell << EOF + create 'OnlySingleIndexTable', 'C0','C1' + put 'OnlySingleIndexTable', 'row1', 'C0:F0,', 'aaa' + put 'OnlySingleIndexTable', 'row1', 'C0:F1', 'bbb,bbb' + put 'OnlySingleIndexTable', 'row1', 'C1:F0', 'bbb' + put 'OnlySingleIndexTable', 'row2', 'C0:F0', 'opo' + put 'OnlySingleIndexTable', 'row2', 'C0:F1', 'bbb' + put 'OnlySingleIndexTable', 'row2', 'C1:F0', 'sdasd' + put 'OnlySingleIndexTable', 'row3', 'C0:F0', 'icic' + put 'OnlySingleIndexTable', 'row3', 'C0:F1', '23123' + put 'OnlySingleIndexTable', 'row3', 'C1:F0', 'bbb' + exit +EOF hbase com.huawei.boostkit.hindex.mapreduce.GlobalTableIndexer -Dtablename.to.index=OnlySingleIndexTable -Dindexspecs.to.addandbuild='osindex=>C0:[F1]' hbase shell << EOF > $log_path/bigdata_hbase.log 2>&1 debug @@ -597,7 +768,7 @@ collect_bigdata_tune_up(){ mysql_username=$2 mysql_password=$3 mysql_database=$4 - if [ -e $omniadvisor_dir/omniadvisor ]; + if [ -e $omniadvisor_dir/logs/omniadvisor ]; then echo "omniadvisor.log" >> $log_path/bigdata_tune_up.log else @@ -706,45 +877,53 @@ EOF } +collect_bigdata_omni_shield(){ + spark_path=$1 + omnishield_jar=$2 + $spark_path/bin/spark-submit --class org.apache.spark.examples.SparkPi --master local \ + --conf spark.driver.extraClassPath=$omnishield_jar --conf spark.executor.extraClassPath=$omnishield_jar \ + --jars $omnishield_jar --executor-memory 20G --num-executors 2 --conf spark.authenticate=true \ + --conf spark.authenticate.secret=testsecret --conf spark.network.crypto.enabled=true \ + --conf spark.network.crypto.config.cipher.classes=com.huawei.boostkit.omnishield.cipher.SM4Cipher \ + --conf spark.network.crypto.cipher=SM4/GCM/NOPadding $spark_path/examples/jars/spark-examples_2.12-3.3.1.jar > $log_path/bigdata_omni_shield.log 2>&1 +} + + +collect_bigdata_omni_yarn() { + grep "LoadBasedCapacityScheduler" /usr/local/hadoop/logs/hadoop-root-resourcemanager-*.log > $log_path/bigdata_omni_yarn.log 2>&1 +} + + +collect_bigdata_doris() { + doris_install_dir=$1 + grep neonmovemask_bulk $doris_install_dir/thirdparty/src/bitshuffle-0.5.1/src/bitshuffle_core.c > $log_path/bigdata_doris.log 2>&1 +} + + collect_bigdata_components(){ spark_path=$1 + doris_install_dir=$2 $spark_path/bin/spark-sql --version && echo spark > $log_path/bigdata_components.log hive --version && echo hive >> $log_path/bigdata_components.log hbase version && echo hbase >> $log_path/bigdata_components.log + ls $doris_install_dir && echo doris >> $log_path/bigdata_components.log } #################HPC特性信息收集################## # $1 #用户可执行文件路径 -collect_hpc_acceleration_library(){ - bin_path=$1 - rm -rf $log_path/hpc_acceleration_library.log - touch $log_path/hpc_acceleration_library.log - ldd $bin_path > $log_path/hpc_acceleration_library.log 2>&1 +collect_hpc_common_acceleration_library(){ + common_acc_lib_bin=$1 + ldd $common_acc_lib_bin > $log_path/hpc_common_acceleration_library.log 2>&1 } -# $1 #用户可执行文件路径 -collect_sme_acceleration_library(){ - bin_path=$1 - ifsme=`lscpu|grep Flags|grep sme` - if [ -n "$ifsme" ]; then - bin_path=$1 #用户可执行文件路径 - rm -rf $log_path/hpc_SME_library.log - touch $log_path/hpc_SME_library.log - ldd $bin_path | grep SME >> $log_path/hpc_SME_library.log 2>&1 - objdump -d $bin_path >> $log_path/hpc_SME_library.log 2>&1 - else - echo "架构不支持SME" >> $log_path/hpc_SME_library.log - fi - -} -collect_sve_source_code(){ - grep -r arm_sve.h "$1" > $log_path/hpc_sve.log +collect_hpc_domain_acceleration_library(){ + domain_acc_lib_bin=$1 + ldd $domain_acc_lib_bin > $log_path/hpc_domain_acceleration_library.log 2>&1 } - ################环境信息收集####################### collect_os_msg(){ echo os: $(cat /etc/os-release |grep PRETTY_NAME=|awk -F= '{print $2}') > $log_path/os_msg.log @@ -755,6 +934,8 @@ collect_os_msg(){ main(){ + # remove \r at end of line + sed -i 's/\r$//' $config_file select_project=$(acquire_select_project) echo "开始收集BoostKit 特性信息如下:$select_project" mkdir -p $log_path @@ -813,6 +994,12 @@ main(){ math_jar=$(acquire_value Acclib math_jar) math_java=$(acquire_value Acclib math_java) collect_acceleration_library "$system_lib" "$hmpp_lib" "$math_lib" "$math_jar" "$math_java" + crc32_path=$(acquire_value Acclib crc32_path) + collect_acceleration_library_crc32 $crc32_path + collect_acceleration_library_kail_dnn + zstd_bin=$(acquire_value Acclib zstd_bin) + lz4_bin=$(acquire_value Acclib lz4_bin) + collect_acceleration_library_compress "$zstd_bin" "$lz4_bin" echo "acceleration_library collect msg Done..." elif [ $per_project = "Storage" ]; then @@ -825,6 +1012,13 @@ main(){ non_ceph_bin=$(acquire_value Storage non_ceph_bin) non_ceph_pid=$(acquire_value Storage non_ceph_pid) collect_storage_acc "$ec_pool_name" "$ceph_conf" "$storage_maintain_bin" "$rocksdb_bin" "$ucache_bin" "$non_ceph_bin" "$non_ceph_pid" + nvmf_tgt_bin=$(acquire_value Storage nvmf_tgt_bin) + collect_storage_spdk "$nvmf_tgt_bin" + rdma_ceph_conf=$(acquire_value Storage rdma_ceph_conf) + ceph_bin=$(acquire_value Storage ceph_bin) + collect_storage_rdma "$rdma_ceph_conf" "$ceph_bin" + boostio_bin=$(acquire_value Storage boostio_bin) + collect_storage_boostio $boostio_bin echo "Storage collect msg Done..." elif [ $per_project = "Bigdata" ]; then @@ -834,6 +1028,8 @@ main(){ dataset_list=$(acquire_value Bigdata dataset_list) spark_path=$(acquire_value Bigdata spark_path) database=$(acquire_value Bigdata database) + hive_bin_path=$(acquire_value Bigdata hive_bin_path) + hive_local_mode=$(acquire_value Bigdata hive_local_mode) omnioperator_dir=$(acquire_value Bigdata omnioperator_dir) omniadvisor_dir=$(acquire_value Bigdata omniadvisor_dir) mysql_username=$(acquire_value Bigdata mysql_username) @@ -848,43 +1044,60 @@ main(){ zookeeper_address=$(acquire_value Bigdata zookeeper_address) zookeeper_path=$(acquire_value Bigdata zookeeper_path) shuffle_jars=$(acquire_value Bigdata shuffle_jars) - shuffle_ock_path=$(acquire_value Bigdata shuffle_ock_path) - collect_bigdata_components $spark_path + omnishield_jar=$(acquire_value Bigdata omnishield_jar) + doris_install_dir=$(acquire_value Bigdata doris_install_dir) + collect_bigdata_components $spark_path $doris_install_dir collect_bigdata_kal "${algorithms_list[@]}" $algorithms_path "${dataset_list[@]}" - collect_bigdata_operator $spark_path $database $omnioperator_dir + collect_bigdata_operator_spark $spark_path $database $omnioperator_dir + collect_bigdata_operator_hive $hive_bin_path $hive_local_mode $omnioperator_dir collect_bigdata_hbase collect_bigdata_tune_up $omniadvisor_dir $mysql_username $mysql_password $mysql_database_name collect_bigdata_omnimv "$omnimv_dir" collect_bigdata_omni_push_down "$omnidata_launcher_server" "$omnidata_launcher" "$push_down_jars" "$push_down_conf" "$spark_path" "$database" "$omnidata_install_path" "$zookeeper_address" "$zookeeper_path" collect_bigdata_omni_shuffle "$spark_path" "$shuffle_jars" "$database" "$shuffle_ock_path" + collect_bigdata_omni_shield $spark_path $omnishield_jar + collect_bigdata_omni_yarn + collect_bigdata_doris $doris_install_dir echo "Bigdata collect msg Done..." elif [ $per_project = "Virtual" ]; then echo "start collect Virtual msg..." - collect_virtual_host - server_name=$(acquire_value Virtual server_name) - network=$(acquire_value Virtual network) - flavor=$(acquire_value Virtual flavor) - volume=$(acquire_value Virtual volume) - availability_zone=$(acquire_value Virtual availability_zone) - collect_virtual_dpu $server_name $network $flavor $volume $availability_zone + collect_virtual_ovs_xpf vm_ip=$(acquire_value Virtual vm_ip) vm_user=$(acquire_value Virtual vm_user) vm_password=$(acquire_value Virtual vm_password) vm_ceph_disk_name=$(acquire_value Virtual vm_ceph_disk_name) vm_name=$(acquire_value Virtual vm_name) collect_virtual_ceph "$vm_ip" "$vm_user" "$vm_password" "$vm_ceph_disk_name" "$vm_name" - collect_virtual_ovs_xpf + schedule_vm_name=$(acquire_value Virtual schedule_vm_name) + schedule_vm_user=$(acquire_value Virtual schedule_vm_user) + schedule_vm_password=$(acquire_value Virtual schedule_vm_password) + collect_virtual_schedule "$schedule_vm_name" "$schedule_vm_user" "$schedule_vm_password" + hot_swap_vm_name=$(acquire_value Virtual hot_swap_vm_name) + hot_swap_vm_user=$(acquire_value Virtual hot_swap_vm_user) + hot_swap_vm_password=$(acquire_value Virtual hot_swap_vm_password) + hot_swap_cpu_count=$(acquire_value Virtual hot_swap_cpu_count) + hot_swap_mem_xml_path=$(acquire_value Virtual hot_swap_mem_xml_path) + collect_virtual_hot_swap "$hot_swap_vm_name" "$hot_swap_vm_user" "$hot_swap_vm_password" "$hot_swap_cpu_count" "$hot_swap_mem_xml_path" + kae_live_migrate_vm_name=$(acquire_value Virtual kae_live_migrate_vm_name) + collect_virtual_live_migrate "$kae_live_migrate_vm_name" + hardware_acc_vm_name=$(acquire_value Virtual hardware_acc_vm_name) + hardware_acc_vm_user=$(acquire_value Virtual hardware_acc_vm_user) + hardware_acc_vm_password=$(acquire_value Virtual hardware_acc_vm_password) + collect_virtual_hardware_acc "$hardware_acc_vm_name" "$hardware_acc_vm_user" "$hardware_acc_vm_password" + collect_k8s_numa + sr_iov_pod=$(acquire_value Virtual sr_iov_pod) + collect_virtual_sr_iov "$sr_iov_pod" + collect_k8s_mpam + collect_virtual_host echo "Virtual collect msg Done..." elif [ $per_project = "HPC" ]; then echo "start collect HPC msg..." - acc_lib=$(acquire_value HPC acc_lib) - sme=$(acquire_value HPC sme) - sve_source_code=$(acquire_value HPC sve_source_code) - collect_hpc_acceleration_library $acc_lib - collect_sme_acceleration_library $sme - collect_sve_source_code $sve_source_code + common_acc_lib_bin=$(acquire_value HPC common_acc_lib_bin) + collect_hpc_common_acceleration_library "$common_acc_lib_bin" + domain_acc_lib_bin=$(acquire_value HPC domain_acc_lib_bin) + collect_hpc_domain_acceleration_library "$domain_acc_lib_bin" echo "HPC collect msg Done..." fi done diff --git a/tools/config.ini b/tools/config.ini index 1b54908ab159ec72b6b704c9d60f6501eb94496f..73939a3d3d1fea3a3b7d488f5e8395efe3d2bc39 100644 --- a/tools/config.ini +++ b/tools/config.ini @@ -38,12 +38,7 @@ greenplum_kae_sql= [Virtual] check=False -# OpenStack 验证需要 -server_name=vm1 -network=port1_vdpa01 -flavor=4U4G80G -volume=ceph1_centos2 -availability_zone=nova:dpu01 + # 高性能云盘 vm_ip=71.14.48.104 vm_user=root @@ -51,22 +46,49 @@ vm_password=Huawei12#$ vm_ceph_disk_name=vdb vm_name=vm1 +# 虚拟化调度优化 +schedule_vm_name=vm1 +schedule_vm_user=root +schedule_vm_password=password + +# 热插拔 +hot_swap_vm_name=vm_name +hot_swap_vm_user=root +hot_swap_vm_password=password +hot_swap_cpu_count=4 +hot_swap_mem_xml_path=/tmp/mem.xml + +# KAE加速热迁移 +kae_live_migrate_vm_name=vm_name + +# 虚拟化硬件辅助加速 +hardware_acc_vm_name=vm_name +hardware_acc_vm_user=root +hardware_acc_vm_password=password + +# K8s SR-IOV直通插件 +sr_iov_pod=test_pod [Acclib] check=False # 工具默认支持验收 KAE 特性,不需要作额外配置 #引用系统库二进制位置 -system_lib=/home/system_lib.so +system_lib=/home/system_bin #引用HMPP库二进制位置 -HMPP_lib=/home/HMPP_lib.so +HMPP_lib=/home/HMPP_bin # 数学库三个配置只需填其中之一即可 # 引用数学库二进制位置 -math_lib=/home/math_lib.so +math_lib=/home/math_bin # 引用数学库的jar包位置 math_jar=/home/math.jar # 引用数学库的字节码文件位置 math_java=/home/math.class +crc32_path=/home/test.bin + +# 压缩库 +zstd_bin=/home/test.bin +lz4_bin=/home/test.bin [Bigdata] check=False @@ -75,9 +97,13 @@ algorithms_path=/home/KAL_TEST_DIR algorithms_name=algorithms_name1 algorithms_name2 algorithms_name3 dataset_list=dataset1 dataset2 dataset3 -# 算子加速 spark_path=/usr/local/spark +# 算子加速 Spark 场景配置 database=tpcds_bin_partitioned_varchar_orc_2 +# 算子加速 Hive 场景配置 +hive_bin_path=hive +hive_local_mode=0 +# 算子加速安装目录 omnioperator_dir=/opt/omni-operator # 参数调优 @@ -98,7 +124,13 @@ push_down_conf=spark.executor.extraClassPath=./* zookeeper_address=agent1:2181,agent2:2181,agent3:2181 zookeeper_path=/sdi/status # shuffle加速 -shuffle_jars=/home/ockadmin/opt/ock/jars/* +shuffle_jars=/home/ockadmin/opt/ock/jar/* +shuffle_ock_path=/home/ockadmin/opt/ock +# 机密大数据 +omnishield_jar=/home/omnishield/omnishield-1.0-SNAPSHOT.jar + +# doris 加速 +doris_install_dir=/opt/tools/installed/doris-2.1.2-rc04 [Storage] check=False @@ -111,10 +143,17 @@ ucache_bin=/tmp/ucache.bin non_ceph_bin=/tmp/non_ceph.bin non_ceph_pid=38799 +# KAE使能SPDK 中的 crc 特性验证 +nvmf_tgt_bin=/tmp/nvmf_tgt +# RDMA网络加速 +# ceph14 请填写ceph配置文件路径,ceph17请填写osd配置文件路径,比如 /var/lib/ceph/{ceph cluster id}/osd.0/config +rdma_ceph_conf=/etc/ceph/ceph.conf +ceph_bin=ceph + +# BoostIO +boostio_bin=/tmp/bio_test + [HPC] check=False -acc_lib=/home/lib -sme=/home/lib -# 填写用到 sve 的源码目录或文件的绝对路径 -sve_source_code=/tmp/src - +common_acc_lib_bin=/tmp/binary +domain_acc_lib_bin=/tmp/binary