From 16318d8f67e7dec21df5212939242f635b17ad5f Mon Sep 17 00:00:00 2001 From: trivial <14522336+nontrivial@user.noreply.gitee.com> Date: Sun, 30 Jun 2024 03:22:46 +0800 Subject: [PATCH] Eulix Report --- .../posts/2024-01/arm_kubernetes_test.md | 481 ++++++++++++++++++ 1 file changed, 481 insertions(+) create mode 100644 src/content/posts/2024-01/arm_kubernetes_test.md diff --git a/src/content/posts/2024-01/arm_kubernetes_test.md b/src/content/posts/2024-01/arm_kubernetes_test.md new file mode 100644 index 0000000..b3a3b45 --- /dev/null +++ b/src/content/posts/2024-01/arm_kubernetes_test.md @@ -0,0 +1,481 @@ +--- +title: Arm平台上的Kubernetes测试 +author: 林之昕 +pubDate: 2024-06-14 +categories: ['2024 年第一期'] +description: 'Arm上的Kubernetes环境搭建与性能测试方式' +--- + +## 环境搭建 +[Kubernetes][K8S]的官方运行要求是有一个可行的Go运行环境即可,在后续根据官方文档进行集成测试时,还依赖于etcd。 +首先我们需要在服务器上安装Go,这一步可以通过系统的包管理器解决,测试机上可以通过 + +``` bash +sudo dnf install golang +``` +来获取Go,但由于包管理器安装的Golang版本较旧,在进行测试时会出现达不到版本要求的错误,我们通过以下方式,将目前最新版本的Go(1.22.4)安装在服务器上。 +``` bash +wget https://go.dev/dl/go1.22.4.linux-arm64.tar.gz +mkdir ~/golang +tar -C ~/golang -xzf go1.22.4.linux-arm64.tar.gz +echo "export PATH=\"\$PATH:$(pwd)/golang/go/bin\"" >> ~/.bashrc +source ~/.bashrc +``` +可以看到我们将Go安装在用户目录下,由于Arm服务器是多人共享的,这种方式不会影响到其他人的Golang执行环境。 + +### 安装etcd +在[Kubernetes][K8S]的官方文档中的[集成测试][INTEGRATION]部分中看到,集成测试依赖于etcd,官方文档提供了两种不同的方式安装etcd依赖,在这次测试中,我们选择了第一种方式来安装依赖。 + +> Kubernetes的Repo中提供了一个install-etcd.sh的脚本,这个脚本会在third_party下安装一个仅本项目可见的etcd + +``` bash +./hack/install-etcd.sh # Installs in ./third_party/etcd +echo "export PATH=\"\$PATH:$(pwd)/third_party/etcd\"" >> ~/.bashrc +source ~/.bashrc +``` + +### 安装rsync, perf等工具 +这里通过包管理器直接安装较方便 + +``` bash +sudo dnf -y install rsync perf +``` + +到这里我们就安装好了本次测试中Kubernetes需要的依赖与性能测试所需要的工具 +上述环境搭建内容,可以总结为一个`env_setup.sh`的脚本,内容如下 +``` bash +#!/bin/bash + +main() { + set -o errexit + + echo "PLEASE RUN THIS SCRIPT UNDER ROOT OF KUBERNETES REPOSITORY TO SETUP etcd !!!" + if [ "$1" == "-y" ] + then + install + else + echo "PACKAGES WILL BE INSTALLED. THIS MAY BREAK YOUR EXISTING TOOLCHAIN." + echo "YOU ACCEPT ALL RESPONSIBILITY BY PROCEEDING." + read -p "Proceed? [Y/n] : " yn + + case $yn in + Y|y) install;; + *) ;; + esac + fi + + echo "Script complete." +} + +install() { + set -x + UNAME=$(uname | tr "[:lower:]" "[:upper:]" ) + + case $UNAME in + + LINUX) + version=$(cat /etc/os-release | grep VERSION_ID | cut -d '"' -f 2) + case $version in + 18.04) install_linux ;; + 20.04) install_linux ;; + 22.03) install_linux ;; + 22.04) install_linux ;; + *) give_up ;; + esac + ;; + + *) give_up ;; + esac +} + +give_up() { + set +x + echo "Unsupported distribution '$UNAME'" + echo + exit 1 +} + +install_linux() { + if test ! $(which go); then + wd=$(pwd) + cd ~/ + # install go + if [ ! -f ]; then + echo "Fetching Go tar file" + wget https://go.dev/dl/go1.22.4.linux-arm64.tar.gz + fi + mkdir -p ~/golang + tar -C ~/golang -xzf go1.22.4.linux-arm64.tar.gz + echo "export PATH=\"\$PATH:$(pwd)/golang/go/bin\"" >> ~/.bashrc + cd $(wd) + fi + # setup etcd + ./hack/install-etcd.sh # Installs in ./third_party/etcd + echo "export PATH=\"\$PATH:$(pwd)/third_party/etcd\"" >> ~/.bashrc + source ~/.bashrc + + # Install packages. + sudo dnf -y install \ + rsync \ + perf + + go version +} + +main "$@" + +``` + +## 对Kubernetes进行测试 +先通过`make all`构造Kubernetes项目,接下来我们就需要对Kubernetes进行测试,根据官方文档的描述 +* [testing guide][TEST] +* [integration test][INTEGRATION] +* [e2e test][E2E] + +我们分别需要进行`单元测试`,`集成测试`与`端到端(E2E)`测试,我们分别通过 +* `make test`来进行单元测试 +* `make test-integration`来进行集成测试 +* `kubetest --build --up --test --down`来进行E2E测试 +但由于我们需要通过perf来收集各项测试在Arm平台上的性能数据,我们不能通过以上方式来进行测试。 +因此我们需要通过`go test $testpath -v -run $testname`的方式来执行所有测试。 + +## Kubernetes在Arm平台上的性能测试 +上面提到,由于测试目标是各项小测试在Arm平台上的性能表现,我们的测试方式需要进行更改,因此本项目分发了一个`performance_counter_920.sh`的脚本来对测试数据进行收集,通过以下方式来使用该脚本。 + +``` bash +performance_counter_920.sh "./pkg/apis/core/validation -v -run ^TestValidatePods$" ./ValidationPodsPerf +``` + +由于脚本将perf数据临时存放在performance.txt中,不支持多个测试同时进行,我们对该脚本进行修改,使其能支持多个不同测试同时进行,结果如下 + +``` bash +#!/bin/sh + +# $1: Complete execution command $2: store folder for generated file +# eg: ./performance_counter.sh "./hackbench -s 512 -l 200 -g 15 -f 25 -P" /home + +if [ $# -ne 2 ]; then + echo "Usage: ./performance_counter.sh parameter1 parameter2" + exit 1 +fi + +echo "parameter1=$1" + +result=$(echo "$1" | sed 's:.*/::') + +file_name=$(echo "$result" | sed 's/ //g') + +tmp=_tmp +tmp_name=$file_name$tmp + +echo "file name : $file_name" +echo "tmp name : $tmp_name" +echo "tmp txt : $tmp_name.txt" + +if [ -f "$tmp_name.txt" ]; then + rm -f $tmp_name.txt + echo "$tmp_name.txt has been deleted" +fi + +mkdir -p $2 +perf stat --sync -e duration_time,task-clock,cycles,instructions,cache-references,cache-misses,branches,branch-misses,L1-dcache-loads,L1-dcache-load-misses,LLC-load-misses,LLC-loads -r 1 -o $tmp_name.txt $1 > $2$file_name.log +tail -n 1 $2$file_name.log >> testResult + +awk '{print $1, $2, $3}' $tmp_name.txt > $file_name.txt + +mv $file_name.txt $tmp_name.txt + +duration_time=`cat $tmp_name.txt | grep "duration_time" | awk '{print $1}' | sed 's/,//g'` + +task_clock=`cat $tmp_name.txt | grep "task-clock" | awk '{print $1}' | sed 's/,//g'` + +cpu_cycle=`cat $tmp_name.txt | grep "cycles" | awk '{print $1}' | sed 's/,//g'` + +instruction=`cat $tmp_name.txt | grep "instructions" | awk '{print $1}' | sed 's/,//g'` + +cache_references=`cat $tmp_name.txt | grep "cache-references" | awk '{print $1}' | sed 's/,//g'` + +cache_misses=`cat $tmp_name.txt | grep "cache-misses" | awk '{print $1}' | sed 's/,//g'` + +branches=`cat $tmp_name.txt | grep "branches" | awk '{print $1}' | sed 's/,//g'` + +branch_misses=`cat $tmp_name.txt | grep "branch-misses" | awk '{print $1}' | sed 's/,//g'` + +L1_dcache_loads=`cat $tmp_name.txt | grep "L1-dcache-loads" | awk '{print $1}' | sed 's/,//g'` + +L1_dcache_load_misses=`cat $tmp_name.txt | grep "L1-dcache-load-misses" | awk '{print $1}' | sed 's/,//g'` + +LLC_load_misses=`cat $tmp_name.txt | grep "LLC-load-misses" | awk '{print $1}' | sed 's/,//g'` + +LLC_loads=`cat $tmp_name.txt | grep "LLC-loads" | awk '{print $1}' | sed 's/,//g'` + +printf "\n\n" + +echo "Avg 10 times duration time: $duration_time" + +printf "Avg 10 times task clock: %.3f\n" $task_clock + +echo "Avg 10 times cpu-cycles: $cpu_cycle" + +echo "Avg 10 times instructions: $instruction" + +echo "Avg 10 times cache references: $cache_references" + +echo "Avg 10 times cache misses: $cache_misses" + +echo "Avg 10 times branches: $branches" + +echo "Avg 10 times branch misses: $branch_misses" + +echo "Avg 10 times L1 dcache loads: $L1_dcache_loads" + +echo "Avg 10 times L1 dcache load misses: $L1_dcache_load_misses" + +echo "Avg 10 times LLC load misses: $LLC_load_misses" + +echo "Avg 10 times LLC load: $LLC_loads" + +IPC=`echo "scale=3; $instruction / $cpu_cycle" | bc` +printf "Avg 10 times IPC: %.3f\n" $IPC + +if [ -f "$file_name.txt" ]; then + rm -f $file_name.txt + echo "$file_name.txt has been deleted" +fi + +echo $duration_time >> $file_name.txt +echo $task_clock >> $file_name.txt +echo $cpu_cycle >> $file_name.txt +echo $instruction >> $file_name.txt +echo $cache_references >> $file_name.txt +echo $cache_misses >> $file_name.txt +echo $branches >> $file_name.txt +echo $branch_misses >> $file_name.txt +echo $L1_dcache_loads >> $file_name.txt +echo $L1_dcache_load_misses >> $file_name.txt +echo $LLC_load_misses >> $file_name.txt +echo $LLC_loads >> $file_name.txt +printf "%.3f\n" $IPC >> $file_name.txt + +mv $file_name.txt $2 + +rm -f $tmp_name.txt + +``` + +在对该脚本进行修改后,会将测试的运行结果通过append的方式添加到testResult的末尾,便于后续对测试成功数量进行统计 +我们通过 + +``` bash +cat testResult | grep ok | wc -l +``` + +来统计测试成功数量 + +## 半自动化测试方式 +上面提到,我们通过脚本对Kubernetes进行测试,是不是到这里就结束了呢,实际上还没有,这里还有一个问题,单元测试与集成测试的数量非常多,约有几千个,我们该如何利用这个脚本来进行测试呢,手写所有测试命令肯定是不现实的。 + +> ***!!! 运行不同种类*** `(unit, integration, e2e)` ***测试前,记得清空 `testResult` 内的内容*** + +### 生成需要运行的测试 +我们需要一个能够生成所有测试的方法,刚好Go在1.9的版本更新后,支持通过`go test ./testpath/... -list Test`的方式来列出所有测试,并且不执行这些测试.在这里,我们可以通过 + +``` bash +go test ./pkg/... -json -list Test > test_list.json +``` + +将对应的测试结果与测试路径以`json`格式存到`test_list.json`中,并对该文件进行处理,输出一个每行格式为 + +``` bash +./performance_counter_920.sh "CmdToRun" pathToStorePerfResult +``` + +的文件,这样,通过执行该文件中的每一行,我们就可以运行所有单元测试,并统计其在Arm平台上的性能。在进行并行测试时,生成的文件中去掉了`CmdToRun`周边的双引号,原因会在下面提到。这里,我们通过getTest.go来对test_list.json这个文件来进行处理 + +``` go +package main + +import ( + "encoding/json" + "fmt" + "os" + "strings" +) + +func main() { + args := os.Args + // fmt.Println("arguments: %s\n", args) + if len(args) != 4 { + fmt.Println("usage: prog jsonfile output PathToTestResult") + os.Exit(-1) + } + file, err := os.Open(args[1]) + if err != nil { + } + testPath := make([]string, 0) + testNames := make([]string, 0) + + f, err := os.Create(args[2]) + defer f.Close() + + dec := json.NewDecoder(file) + for { + mp := make(map[string]interface{}) + if err := dec.Decode(&mp); err != nil { + fmt.Println("ERR: %v", err) + break + } + action, ok := mp["Action"].(string) + if !ok || action != "output" { + continue + } + path := mp["Package"].(string) + testName := mp["Output"].(string) + if len(testName) < 4 || testName[0:4] != "Test" { + continue + } + idx := strings.Index(path, "/") + path = path[idx+1:] + idx = strings.Index(path, "/") + path = path[idx:] + path = "." + path + testName = testName[:len(testName)-1] + // fmt.Println("path: %v TestName: %v", path, testName) + testPath = append(testPath, path) + testNames = append(testNames, testName) + } + + // f.WriteString("#!/bin/bash\n") + target := args[3] + if target[len(target)-1:] != "/" { + target += "/" + } + for i, _ := range testPath { + // run test + // f.WriteString("./performance_counter_920.sh \"go test "+testPath[i]+" -v -run "+testNames[i]+"\" ") + f.WriteString("./performance_counter_920.sh go test " + testPath[i] + " -v -run " + testNames[i] + " ") + // where to store test result + f.WriteString(target + testPath[i][2:] + "\n") + } + +} + +``` + +通过`go build getTest.go`编译该程序后,通过`./getTest jsonFile output PathToStoreResult`的方式来执行该文件,其中`jsonFile`是`go test ./test/... -json -list Test > test_list.json`生成的json文件,`output`是生成的包含需要运行的程序的文件,`PathToStoreResult`是测试结果需要存储的目录名,由于部分单元测试名称相同,该程序会在存储目录中创建一个与单元测试所在地址对应的目录,如 + +``` bash +./getTest test_list.json unit_test UTESTResult +``` + +会使测试结果存在`UTESTResult`下,并在其中创建对应的目录,假设`TestSoyo`位于`./mygo/soyo_test.go`下,运行生成的文件中的命令,就会将测试的结果与性能数据存放在`UTESTResult/mygo/`中,测试结果的名称为`BlahBlahBlahTestSoyo.log`,性能数据的名称为`BlahBlahBlahTestSoyo.txt`。我们可以修改上述代码,使`unit_test`生成为`unit_test.sh`,并直接在bash中运行他,直接去掉部分注释即可(这里保留了注释掉的部分代码,可以看到原来的想法)。 + +### 并发运行单元测试 +但是花了这么多时间,只是生成这个测试,然后一行一行执行,是不是太麻烦而且非常慢呢,毕竟我们已经修改了性能测试的`performance_counter_920.sh`脚本,让其支持并行测试,又得到了一个需要执行的所有测试的文件,我们是否可以利用go本身对此的便利支持,来高效的进行我们的测试呢? + +当然可以这样了,虽然本来懒了,想鸽掉这一部分,但是串行跑测试,测下来太慢太花时间了,就实现了一个简单的并行测试,内容在RunManyTest.go中 + +``` go +package main + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "strconv" + "strings" + "sync" +) + +func main() { + args := os.Args + // fmt.Println("arguments: %s\n", args) + if len(args) < 2 { + fmt.Println("usage: prog tasks [optional]parallel") + os.Exit(-1) + } + file, err := os.Open(args[1]) + if err != nil { + fmt.Println("Err: %v", err) + os.Exit(-1) + } + defer file.Close() + + var wg sync.WaitGroup + scanner := bufio.NewScanner(file) + parallel := 8 + if len(os.Args) == 3 { + parallel, err = strconv.Atoi(os.Args[2]) + } + tasks := make(chan *exec.Cmd, parallel) + for i := 0; i < parallel; i++ { + wg.Add(1) + go func(wg *sync.WaitGroup) { + defer wg.Done() + for cmd := range tasks { + out, err := cmd.Output() + if err != nil { + fmt.Println("Err: %v", err) + } + fmt.Printf("%s output", out) + } + }(&wg) + + } + for scanner.Scan() { + cmd := scanner.Text() + args := strings.Split(cmd, " ") + arg := args[1] + for i := 2; i < len(args)-1; i++ { + arg += " " + arg += args[i] + } + cmdargs := make([]string, 0) + cmdargs = append(cmdargs, arg) + cmdargs = append(cmdargs, args[len(args)-1]) + tasks <- exec.Command(args[0], cmdargs...) + } + close(tasks) + + wg.Wait() + + fmt.Println("Task Done") +} +``` + +我们通过`go build RunManyTest.go`编译后,通过 +``` bash +./RunManyTest unit_test parallel +``` +来并发执行测试,如果不填parallel,默认会生成8个worker来进行测试,由于单元测试负载较低,因此不会影响到测试正确性,实测大大减小了测试时间。 + +## 测试结果 +最终进行了4389个单元测试,587个集成测试,27个e2e测试, +* ***单元测试*** 4388/4389 PASS +* ***集成测试*** 567/587 PASS +* ***E2E*** 26/27 PASS + +其中`单元测试`失败的主要原因是缺失`Authz test case`,`集成测试`失败的主要原因目前来看是网络问题引起的,但其网络测试方式皆在本地,我个人目前没有网络相关的知识,并不具备查错能力,暂时搁置。而`E2E测试`的测试方式与[官方E2E测试文档][E2E]相差较大,暂时无法分析是什么原因造成的。 + +## 最后 +写报告的时候看了一下,RISC-V的服务器好像还没发,就先摸了。不过测试方法是类似的,相对来说,上面这个半自动的方式,也可以写成bash脚本,然后就省事了,不过,暂时就先鸽了吧。 + +## Ref +可能会用到的几个链接: + +* ***Kubernetes Repo***: [https://github.com/kubernetes/kubernetes][K8S] + +* ***Kubernetes Developer Document***: [https://git.k8s.io/community/contributors/devel#readme][Developer] + +* ***Kubernetes Testing Guide***: [https://github.com/kubernetes/community/blob/master/contributors/devel/sig-testing/testing.md][TEST] + +* ***Kubernetes Integration Test***: [https://github.com/kubernetes/community/blob/master/contributors/devel/sig-testing/integration-tests.md][INTEGRATION] + +* ***Kubernetes E2E Test***: [https://github.com/kubernetes/community/blob/master/contributors/devel/sig-testing/e2e-tests.md][E2E] + +[K8S]: https://github.com/kubernetes/kubernetes +[TEST]: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-testing/testing.md +[INTEGRATION]: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-testing/integration-tests.md +[E2E]: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-testing/e2e-tests.md +[Developer]: https://git.k8s.io/community/contributors/devel#readme \ No newline at end of file -- Gitee