diff --git a/.gitignore b/.gitignore index d9c35d35bb36f36c52cac26ed99d63a114b6bc35..e056f68bb5412c2d6ce31dc684de6a6ab79a9737 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ .out .vscode .fortest +/unit_test.log diff --git a/Dockerfile b/Dockerfile index 51f0e4c16cc083b5403cfc55cb520bfd297b2000..bbaf598b3d5d6d62921a75092b61bf87ee7ad056 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ FROM scratch -COPY ./rubik /rubik +COPY ./build/rubik /rubik ENTRYPOINT ["/rubik"] + diff --git a/Makefile b/Makefile index 4285721a055bd085c3026987137a444286d2f319..522d1d6a52dac4f567bae93ec9c8e17a639e94cc 100644 --- a/Makefile +++ b/Makefile @@ -22,13 +22,15 @@ BUILD_TIME := $(shell date "+%Y-%m-%d") GIT_COMMIT := $(if $(shell git rev-parse --short HEAD),$(shell git rev-parse --short HEAD),$(shell cat ./git-commit | head -c 7)) DEBUG_FLAGS := -gcflags="all=-N -l" +EXTRALDFLAGS := -extldflags=-ftrapv \ + -extldflags=-Wl,-z,relro,-z,now -linkmode=external -extldflags=-static + LD_FLAGS := -ldflags '-buildid=none -tmpdir=$(TMP_DIR) \ -X isula.org/rubik/pkg/version.GitCommit=$(GIT_COMMIT) \ -X isula.org/rubik/pkg/version.BuildTime=$(BUILD_TIME) \ -X isula.org/rubik/pkg/version.Version=$(VERSION) \ -X isula.org/rubik/pkg/version.Release=$(RELEASE) \ - -extldflags=-ftrapv \ - -extldflags=-Wl,-z,relro,-z,now -linkmode=external -extldflags=-static' + $(EXTRALDFLAGS)' GO_BUILD=CGO_ENABLED=1 \ CGO_CFLAGS="-fstack-protector-strong -fPIE" \ @@ -51,13 +53,21 @@ help: @echo "make cover # generate coverage report" @echo "make install # install files to /var/lib/rubik" -release: +prepare: mkdir -p $(TMP_DIR) $(BUILD_DIR) - rm -rf $(TMP_DIR) && mkdir -p $(ORG_PATH) $(TMP_DIR) + rm -rf $(TMP_DIR) && mkdir -p $(TMP_DIR) + +release: prepare $(GO_BUILD) -o $(BUILD_DIR)/rubik $(LD_FLAGS) rubik.go sed 's/__RUBIK_IMAGE__/rubik:$(VERSION)-$(RELEASE)/g' hack/rubik-daemonset.yaml > $(BUILD_DIR)/rubik-daemonset.yaml cp hack/rubik.service $(BUILD_DIR) +debug: prepare + EXTRALDFLAGS="" + go build $(LD_FLAGS) $(DEBUG_FLAGS) -o $(BUILD_DIR)/rubik rubik.go + sed 's/__RUBIK_IMAGE__/rubik:$(VERSION)-$(RELEASE)/g' hack/rubik-daemonset.yaml > $(BUILD_DIR)/rubik-daemonset.yaml + cp hack/rubik.service $(BUILD_DIR) + image: release docker build -f Dockerfile -t rubik:$(VERSION)-$(RELEASE) . @@ -88,3 +98,4 @@ install: install -d -m 0750 $(INSTALL_DIR) cp -f $(BUILD_DIR)/* $(INSTALL_DIR) cp -f $(BUILD_DIR)/rubik.service /lib/systemd/system/ + diff --git a/README.en.md b/README.en.md deleted file mode 100644 index 9c945f92fb0d067024d73dab0e1268dca020692f..0000000000000000000000000000000000000000 --- a/README.en.md +++ /dev/null @@ -1,52 +0,0 @@ -# rubik - -## Description - -The current global cloud infrastructure service expenditure is huge. However, the average CPU utilization of data center user clusters is very low, which is a huge waste of resources. Therefore, improving the utilization of data center resources is an important issue that needs to be solved urgently. - -Deployment of various types of services togather can significantly improve the utilization of cluster resources, but it also brings the problem of co-peaking, this issue can lead to partial service quality of service (QoS) compromise. How to ensure that the application's QoS is not damaged after improving the utilization of resources is a key technical challenge. - -To this end, we propose the Rubik resource utilization improvement solution, Rubik literally means Rubik's Cube, The Rubik’s Cube was invented in 1974 by Ernõ Rubik, a Hungarian architecture professor. In our solution, Rubik symbolizes being able to manage servers in an orderly manner. - -Rubik currently supports the following features: -- pod's CPU priority configure. -- pod's memory priority configure. - -## Build - -Pull the source code: -```sh -git clone https://gitee.com/openeuler/rubik.git -``` - -Enter the source code directory to compile: -```sh -cd rubik -make -``` - -Make rubik image: -```sh -make image -``` - -Install the relevant deployment files on the system: -```sh -sudo make install -``` -## Deployment - -### Prepare environment - -- OS: openeuler 21.09/22.03/22.09+ -- kubernetes: 1.17.0+ - -### Deploy rubik as daemonset - -```sh -kubectl apply -f /var/lib/rubik/rubik-daemonset.yaml -``` - -## Copyright - -Mulan PSL v2 diff --git a/README.md b/README.md deleted file mode 100644 index cb4ed8def92ae243c35f16acc821071a16809eb0..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,116 +0,0 @@ -# rubik - -## 概述 - -当前全球云基础设施服务支出费用庞大,然而数据中心用户集群的平均CPU利用率却很低,存在巨大的资源浪费。因此,提升数据中心资源利用率是当前急需解决的一个重要问题。 - -将多种类型业务混合部署能够显著提升集群资源利用率,但也带来了共峰问题,该问题会导致部分业务服务质量(QoS)受损。如何在提升资源利用率之后,保障业务QoS不受损是技术上的关键挑战。 - -为此我们提出了Rubik资源利用率提升解决方案,Rubik字面意思为魔方,魔方由Rubik在1974年发明,故Rubik既是人名也指代魔方,在我们的解决方案中,Rubik象征着能够将服务器管理的有条不紊。 - -Rubik当前支持如下特性: - -- [pod CPU优先级配置](./docs/modules.md/#pod-cpu优先级) -- [pod memory优先级配置](./docs/modules.md#pod-内存优先级) -- [pod 访存带宽和LLC限制](./docs/modules.md#dyncache-访存带宽和llc限制) -- [pod blkio配置](./docs/modules.md/#blkio) -- [pod memory cgroup分级](./docs/modules.md/#memory) -- [pod quota burst配置](./docs/modules.md/#quota-burst) -- [基于iocost io权重控制](./docs/modules.md/#rubik支持基于iocost的io权重控制) - -## 编译 - -拉取源代码: - -```sh -git clone https://gitee.com/openeuler/rubik.git -``` - -进入源码目录编译: - -```sh -cd rubik -make -``` - -制作rubik镜像 - -```bash -make image -``` - -将相关部署文件安装到系统中: - -```sh -sudo make install -``` - -## 部署 - -### 环境准备 - -- OS: openEuler 21.09/22.03/22.09+ -- kubernetes: 1.17.0+ - -### rubik daemonset部署 - -在master节点上使用kubectl命令部署rubik daemonset: - -```sh -kubectl apply -f /var/lib/rubik/rubik-daemonset.yaml -``` - -## 常用配置 - -通过以上方式部署的rubik将以默认配置启动,若用户需要修改rubik的配置,可通过修改rubik-daemonset.yaml文件中的config.json部分后重新部署rubik daemonset。 - -以下介绍几个常见配置,其他配置详见[配置文档](./docs/config.md) - -### Pod优先级自动配置 - -若在rubik config中配置autoConfig为true开启了Pod自动感知配置功能,用户仅需在部署业务pod时在yaml中通过annotation指定其优先级,部署后rubik会自动感知当前节点pod的创建与更新,并根据用户配置的优先级设置pod优先级。 - -### 依赖于kubelet的Pod优先级配置 - -由于自动配置依赖于来自api-server pod创建事件的通知,具有一定的延迟性,无法在进程启动之前及时完成优先级的配置,导致业务性能可能存在抖动。用户可以关闭自动配置,通过修改kubelet,向rubik发送http请求,在更早的时间点调用rubik配置pod优先级,http接口具体使用方法详见[http接口文档](./docs/http_API.md) - -### 支持自动校对Pod优先级 - -rubik支持在启动时对当前节点Pod QoS优先级配置一致性进行校对,这里的一致性是指k8s集群中的配置和rubik对pod优先级的配置之间的一致性。可以通过config选项autoCheck控制是否开启校对功能,默认关闭。若开启校对Pod优先级功能,启动或重启rubik时,rubik会自动校验并更正当前节点pod优先级配置。 - -## 在离线业务配置示例 - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: nginx - namespace: qosexample - annotations: - volcano.sh/preemptable: "true" # volcano.sh/preemptable为true代表业务为离线业务,false代表业务为在线业务,默认为false -spec: - containers: - - name: nginx - image: nginx - resources: - limits: - memory: "200Mi" - cpu: "1" - requests: - memory: "200Mi" - cpu: "1" -``` - -## 注意事项 - -约束限制详见[约束限制文档](./docs/limitation.md) - -## 如何贡献 - -我们很高兴能有新的贡献者加入! - -在一切开始之前,请签署[CLA协议](https://openeuler.org/en/cla.html) - -## 版权 - -rubik遵从**Mulan PSL v2**版权协议 diff --git a/docs/api.md b/docs/api.md deleted file mode 100644 index 14f1a889f1e672ed52b0c57a5815f74b4d4d5cc4..0000000000000000000000000000000000000000 --- a/docs/api.md +++ /dev/null @@ -1,74 +0,0 @@ -# http接口 - -rubik包含如下http接口 - -## 设置、更新Pod优先级接口 - -接口语法: - -```bash -HTTP POST /run/rubik/rubik.sock -{ - "Pods": { - "podaaa": { - "CgroupPath": "kubepods/burstable/podaaa", - "QosLevel": 0 - }, - "podbbb": { - "CgroupPath": "kubepods/burstable/podbbb", - "QosLevel": -1 - } - } -} -``` - -参数说明: - -- pods map必须提供pods。 - -- podUID map必须提供每个pod的UID。 - -- QosLevel int必须提供优先级。 - - - 0:默认值,在线业务。 - - -1:离线业务。 - - 其他:非法,不支持。 - -- CgroupPath string必须提供Pod的cgroup子路径。 - -说明: - -- 请求并发量1000QPS,并发量越界报错。 -- 单个请求pod数100个,请求数量越界报错。 - -示例如下: - -```sh -curl -v -H "Accept: application/json" -H "Content-type: application/json" -X POST --data '{"Pods": {"podaaa": {"CgroupPath": "kubepods/burstable/podaaa","QosLevel": 0},"podbbb": {"CgroupPath": "kubepods/burstable/podbbb","QosLevel": -1}}}' --unix-socket /run/rubik/rubik.sock http://localhost/ -``` - -## 探活接口 - -rubik作为HTTP服务,提供探活接口用于帮助判断rubik服务是否还在运行。 - -接口形式:HTTP/GET /ping - -示例如下: - -```sh -curl -XGET --unix-socket /run/rubik/rubik.sock http://localhost/ping -``` - -## 版本信息查询接口 - -rubik支持通过HTTP请求查询版本号。 - -接口形式:HTTP/GET /version - -示例如下: - -```sh -curl -XGET --unix-socket /run/rubik/rubik.sock http://localhost/version -{"Version":"0.0.1","Release":"1","Commit":"29910e6","BuildTime":"2021-05-12"} -``` - diff --git a/docs/config.md b/docs/config.md deleted file mode 100644 index 68b4a4a529d3e531155f8ab871f3af2c9f2712cb..0000000000000000000000000000000000000000 --- a/docs/config.md +++ /dev/null @@ -1,90 +0,0 @@ -# Rubik配置说明 - -## 基本配置说明 - -Rubik执行程序由Go语言实现,并编译为静态可执行文件,以便尽可能与系统依赖解耦。 - -Rubik除支持 `-v` 参数查询版本信息之外,不支持其他参数,版本信息输出示例如下所示,该信息中的内容和格式可能随着版本发生变化。 - -``` -rubik -v -Version: 0.1.0 -Release: -Go Version: go1.15.15 -Git Commit: 9bafc90 -Built: 2022-06-24 -OS/Arch: linux/amd64 -``` - -Rubik启动时会解析配置文件,配置文件的路径固定为 `/var/lib/rubik/config.json` ,为避免配置混乱,暂不支持指定其他路径。 - -配置文件采用json格式,字段键采用驼峰命名规则,且首字母小写。 - -配置文件示例内容如下: - -```json -{ - "autoConfig": true, - "autoCheck": false, - "logDriver": "stdio", - "logDir": "/var/log/rubik", - "logSize": 1024, - "logLevel": "info", - "cgroupRoot": "/sys/fs/cgroup", - "cacheConfig": { - "enable": false, - "defaultLimitMode": "static", - "adjustInterval": 1000, - "perfDuration": 1000, - "l3Percent": { - "low": 20, - "mid": 30, - "high": 50 - }, - "memBandPercent": { - "low": 10, - "mid": 30, - "high": 50 - } - }, - "blkioConfig": { - "enable": false - }, - "memoryConfig": { - "enable": true, - "strategy": "none", - "checkInterval": 5 - } -} -``` - -常用配置项说明: - -| 配置键[=默认值] | 类型 | 描述 | 示例值 | -|---------------------------|--------|-----------------------------------------------------|----------------------| -| autoConfig=false | bool | 自动配置开关,自动配置即自行拉取Pod信息并配置给系统 | false, true | -| autoCheck=false | bool | 自动检查开关,自动纠正因故障等原因导致的错误配置 | false, true | -| logDriver=stdio | string | 日志驱动,支持标准输出和文件 | stdio, file | -| logDir=/var/log/rubik | string | 日志保存目录 | /var/log/rubik | -| logSize=1024 | int | 总日志大小,单位MB,适用于logDriver=file | [10, 2**20] | -| logLevel=info | string | 日志级别 | debug, info, error | -| cgroupRoot=/sys/fs/cgroup | string | 系统cgroup挂载点路径 | /sys/fs/cgroup | -| cacheConfig | map | 动态控制CPU高速缓存模块(dynCache)的相关配置 | | -| .enable=false | bool | dynCache功能启用开关 | false, true | -| .defaultLimitMode=static | string | dynCache控制模式 | static, dynamic | -| .adjustInterval=1000 | int | dynCache动态控制间隔时间,单位ms | [10, 10000] | -| .perfDuration=1000 | int | dynCache性能perf执行时长,单位ms | [10, 10000] | -| .l3Percent | map | dynCache控制中L3各级别对应水位(%) | | -| ..low=20 | int | L3低水位组控制线 | [10, 100] | -| ..mid=30 | int | L3中水位组控制线 | [low, 100] | -| ..high=50 | int | L3高水位组控制线 | [mid, 100] | -| .memBandPercent | map | dynCache控制中MB各级别对应水位(%) | | -| ..low=10 | int | MB低水位组控制线 | [10, 100] | -| ..mid=30 | int | MB中水位组控制线 | [low, 100] | -| ..high=50 | int | MB高水位组控制线 | [mid, 100] | -| blkioConfig | map | IO控制模块相关配置 | | -| .enable=false | bool | IO控制模块使能开关 | | -| memoryConfig | map | 内存控制模块相关配置 | | -| .enable=false | bool | 内存控制模块使能开关 | | -| .strategy=none | string | 内存动态分级回收控制策略 | none, dynlevel, fssr | -| .checkInterval=5 | string | 内存动态分级回收控制策略检测间隔 | (0, 30] | diff --git a/docs/limitation.md b/docs/limitation.md deleted file mode 100644 index f6efab37f450f18a31837a7adab7080371639c45..0000000000000000000000000000000000000000 --- a/docs/limitation.md +++ /dev/null @@ -1,49 +0,0 @@ -# 约束限制 - -## 规格 - -- 磁盘:1GB+ - -- 内存:100MB+ - -- 单个请求超时时间:120s - -- 单个请求Pod上限:100个 - -- HTTP请求并发量上限:1000QPS - -## 运行时 - -- 每个k8s节点只能部署一个rubik,多个rubik会冲突。 - -- rubik不提供端口访问,只能通过sock通信。 - -- rubik只接收合法http请求路径及网络协议:http://localhost/(POST)、http://localhost/ping(GET)、http://localhost/version(GET) - -- rubik不接受任何命令行参数,若添加参数启动会报错退出。 - -- 容器挂载目录时,rubik本地套接字/run/rubik的目录权限需由业务侧保证最小权限(如700)。 - -- 如果rubik进程进入T、D状态,则服务端不可用,此时服务不会响应任何请求。为了避免此情况的发生,请在客户端设置超时时间,避免无限等待。 - -## Pod优先级设置 - -- 禁止低优先级往高优先级切换。如业务A先被设置为低优先级(-1),接着请求设置为高优先级(0),rubik报错。 - -- 用户添加注解、修改注解、修改yaml中的注解并重新apply等操作不会触发Pod重建。rubik不会监控Pod注解变化情况,因此Pod的优先级和注解一致性需要kubelet及上层组件保证。 - -- 禁止将任务从在线组迁移到离线组后再迁移回在线组,此操作会导致该任务QoS异常。 - -- 禁止将重要的系统服务和内核线程加入到离线组中,否则可能导致调度不及时,进而导致系统异常。 - -- cpu和memory的在线、离线配置需要统一,否则可能导致两个子系统的QoS冲突。 - -- kubelet创建pod时需要调用rubik并确保成功,否则不保证数据一致性。 - -- 使用混部后,原始的cpu share功能存在限制。具体表现为: - - - 若当前cpu中同时存在在线任务和离线任务,则离线任务的cpu share无法生效。 - - - 若当前cpu中只有在线任务或只有离线任务,cpu share能生效 - -- 用户态的优先级反转、smt、cache、numa负载均衡、离线任务的负载均衡,当前不支持。 diff --git a/docs/modules.md b/docs/modules.md deleted file mode 100644 index df5c02fa85d30260940c5c72c3d121a5e35ed578..0000000000000000000000000000000000000000 --- a/docs/modules.md +++ /dev/null @@ -1,372 +0,0 @@ -# 模块介绍 - -## Pod CPU优先级 - -rubik支持业务CPU优先级配置,针对在离线业务混合部署的场景,确保在线业务相对离线业务的CPU资源抢占。 - -**前置条件**: - -- 建议内核版本openEuler-22.03+。内核支持针对cgroup的cpu优先级配置,cpu子系统存在接口cpu.qos_level。 - -### CPU优先级内核接口 - -- /sys/fs/cgroup/cpu目录下容器的cgroup中,如`/sys/fs/cgroup/cpu/kubepods/burstable//`目录 - - cpu.qos_level:开启CPU优先级配置,默认值为0, 有效值为0和-1。 - - 0:标识为在线业务 - - -1:标识为离线业务 - -### CPU优先级配置详解 - -rubik会根据pod的yaml文件中的注解`volcano.sh/preemptable`自动配置cpu.qos_level, 默认为false。 - -``` -annotations: - volcano.sh/preemptable: true -``` - -- true:代表业务为离线业务, -- false:代表业务为在线业务 - ---------------------- - -## pod 内存优先级 - -rubik支持业务memory优先级配置,针对在离线业务混合部署的场景,确保OOM时优先kill离线业务。 - -**前置条件**: - -- 建议内核版本openEuler-22.03+。内核支持针对cgroup的memory优先级配置,memory子系统存在接口memory.qos_level。 -- 开启内存优先级支持: `echo 1 > /proc/sys/vm/memcg_qos_enable` - -### 内存优先级内核接口 - -- /proc/sys/vm/memcg_qos_enable:开启内存优先级特性,默认值为0,有效值为0和1。开启命令为:`echo 1 > /proc/sys/vm/memcg_qos_enable` - - 0:表示关闭特性 - - 1:表示开启特性。 - -- /sys/fs/cgroup/memory目录下容器的cgroup中,如`/sys/fs/cgroup/memory/kubepods/burstable//`目录 - - memory.qos_level:开启内存优先级配置,默认值为0,有效值为0和-1。 - - 0:标识为在线业务 - - -1:标识为离线业务 - -### 内存优先级配置详解 - -rubik会根据pod的yaml文件中的注解`volcano.sh/preemptable`自动配置memory.qos_level,参考[CPU优先级配置详解](#cpu优先级配置详解) - ---------------------- - -## dynCache 访存带宽和LLC限制 - -rubik支持业务的Pod访存带宽(memory bandwidth)和LLC(Last Level Cache)限制,通过限制离线业务的访存带宽/LLC使用,减少其对在线业务的干扰。 - -**前置条件**: - -- cache/访存限制功能仅支持物理机,不支持虚拟机。 - - X86物理机,需要OS支持且开启intel RDT的CAT和MBA功能,内核启动项cmdline需要添加`rdt=l3cat,mba` - - ARM物理机,需要OS支持且开启mpam功能,内核启动项需要添加`mpam=acpi`。 - -- 由于内核限制,RDT mode当前不支持pseudo-locksetup模式。 - -**rubik新增权限和目录**: - -- 挂载目录: `/sys/fs/resctrl`。 rubik需要读取和设置/sys/fs/resctrl目录下的文件,该目录需在rubik启动前挂载,且需保障在rubik运行过程中不被卸载。 -- 权限: SYS_ADMIN. 设置主机/sys/fs/resctrl目录下的文件需要rubik容器被赋有SYS_ADMIN权限。 -- namepsace: pid namespace. rubik需要获取业务容器进程在主机上的pid,所以rubik容器需与主机共享pid namespace。 - -**rubik rdt 控制组**: - -rubik在RDT resctrl目录(默认为 /sys/fs/resctrl)下创建5个控制组,分别为rubik_max、rubik_high、rubik_middle、rubik_low、rubik_dynamic。rubik启动后,将水位线写入对应控制组的schemata。其中,low、middle、high的水位线可在cacheConfig中配置;max控制组为默认最大值, dynamic控制组初始水位线和low控制组一致。 - -离线业务pod启动时通过注解`volcano.sh/cache-limit`设置其cache level,并被加入到指定的控制组中, 如下列配置的pod将被加入rubik_low控制组: - -``` -annotations: - volcano.sh/cache-limit: "low" -``` - -**rubik dynamic控制组**: - -当存在level为dynamic的离线pod时,rubik通过采集当前节点在线业务pod的cache miss 和 llc miss 指标,调整rubik_dynamic控制组的水位线,实现对dynamic控制组内离线应用pod的动态控制。 - -### dynCache内核接口 - -- /sys/fs/resctrl: 在该目录下创建5个控制组目录,并修改其schemata和tasks文件。 - -### dynCache配置详解 - -dynCache功能相关的配置在`cacheConfig`中: - -``` -"cacheConfig": { - "enable": false, - "defaultLimitMode": "static", - "adjustInterval": 1000, - "perfDuration": 1000, - "l3Percent": { - "low": 20, - "mid": 30, - "high": 50 - }, - "memBandPercent": { - "low": 10, - "mid": 30, - "high": 50 - } - }, -``` - -- l3Percent 和 memBandPercent: - 通过 l3Percent 和 memBandPercent 配置low, mid, high控制组的水位线。 - - 比如当环境的`rdt bitmask=fffff, numa=2`时, rubik_low的控制组将根据 l3Percent low=20 和 memBandPercent low=10 两个参数, 将为/sys/fs/resctrl/rubik_low控制组配置: - - ``` - L3:0=f;1=f - MB:0=10;1=10 - ``` - -- defaultLimitMode: 如果离线pod未指定`volcano.sh/cache-limit`注解,将根据cacheConfig的defaultLimitMode来决定pod将被加入哪个控制组: - - defaultLimitMode为static时,pod将被加入到rubik_max控制组 - - defaultLimitMode为dynamic时,pod将被加入到rubik_dynamic控制组 -- adjustInterval: dynCache动态调整rubik_dynamic控制组的间隔时间,单位ms,默认1000ms -- perfDuration: dynCache性能perf执行时长,单位ms,默认1000ms - -### dynCache注意事项 - -- dynCache仅针对离线pod,对在线业务不生效。 -- 若业务容器运行过程中被手动重启(容器ID不变但容器进程PID变化),针对该容器的dynCache无法生效。 -- 业务容器启动并已设置dynCache级别后,不支持对其限制级别进行修改。 -- 动态限制组的调控灵敏度受到rubik配置文件内adjustInterval、perfDuration值以及节点在线业务pod数量的影响,每次调整(若干扰检测结果为需要调整)间隔在区间[adjustInterval+perfDuration, adjustInterval+perfDuration*pod数量]内波动,用户可根据灵敏度需求调整配置项。 - ---------------------- - -## blkio - -Pod的blkio的配置以`volcano.sh/blkio-limit`注解的形式,在pod创建的时候配置,或者在pod运行期间通过kubectl annotate进行动态的修改,支持离线和在线pod。 - -配置内容为4个列表: -| 项 | 说明 | -| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -| device_read_bps | 用于设定设备执行“读”操作字节的上限。该配置为list,可以对多个device进行配置,device指定需要限制的块设备,value限定上限值,单位为byte | -| device_read_iops | 用于设定设备执行“读”操作次数的上限。该配置为list,可以对多个device进行配置,device指定需要限制的块设备 | -| device_write_bps | 用于设定设备执行 “写” 操作次数的上限。该配置为list,可以对多个device进行配置,device指定需要限制的块设备,value限定上限值,单位为byte | -| device_write_iops | 用于设定设备执行“写”操作字节的上限。该配置为list,可以对多个device进行配置,device指定需要限制的块设备 | - -### blkio内核接口 - -- /sys/fs/cgroup/blkio目录下容器的cgroup中,如`/sys/fs/cgroup/blkio/kubepods/burstable//`目录: - - blkio.throttle.read_bps_device - - blkio.throttle.read_iops_device - - blkio.throttle.write_bps_device - - blkio.throttle.write_iops_device - -配置的key:value和cgroup的key:value的配置规则一致: - -- 写入时会转换成环境page size的倍数 -- 只有minor为0的device配置才会生效 -- 如果取消限速,可将值设为0 - -### blkio配置详解 - -**rubik开启关闭blkio功能**: - -rubik提供blkio配置功能的开关,在`blkioConfig`中 - -``` -"blkioConfig": { - "enable": true -} -``` - -- enable: IO控制模块使能开关, 默认为false - -**pod配置样例**: - -通过pod的注解配置时可提供四个列表,分别是write_bps, write_iops, read_bps, read_iops, read_byte. - -- 创建时: 在yaml文件中 - - ``` - volcano.sh/blkio-limit: '{"device_read_bps":[{"device":"/dev/sda1","value":"10485760"}, {"device":"/dev/sda","value":"20971520"}], - "device_write_bps":[{"device":"/dev/sda1","value":"20971520"}], - "device_read_iops":[{"device":"/dev/sda1","value":"200"}], - "device_write_iops":[{"device":"/dev/sda1","value":"300"}]}' - ``` - -- 修改annotation: 可通过 kubectl annotate动态修改,如: - ```kubectl annotate --overwrite pods volcano.sh/blkio-limit='{"device_read_bps":[{"device":"/dev/vda", "value":"211715200"}]}'``` - ---------------------- - -## memory - -rubik中支持多种内存策略。针对不同场景使用不同的内存分配方案,以解决多场景内存分配。 - -dynlevel策略:基于内核cgroup的多级别控制。通过监测节点内存压力,多级别动态调整离线业务的memory cgroup,尽可能地保障在线业务服务质量。 - -fssr策略:基于内核cgroup的动态水位线控制。memory.high是内核提供的memcg级的水位线接口,rubik动态检测内存压力,动态调整离线应用的memory.high上限,实现对离线业务的内存压制,保障在线业务的服务质量。 - -### memory dynlevel策略内核接口 - -- /sys/fs/cgroup/memory目录下容器的cgroup中,如`/sys/fs/cgroup/memory/kubepods/burstable//`目录。dynlevel策略会依据当前节点的内存压力大小,依次调整节点离线应用容器的下列值: - - - memory.soft_limit_in_bytes - - memory.force_empty - - memory.limit_in_bytes - - /proc/sys/vm/drop_caches - -### memory dynlevel策略配置详解 - -rubik提供memory的指定策略和控制间隔,在`memoryConfig`中 - -``` -"memoryConfig": { - "enable": true, - "strategy": "none", - "checkInterval": 5 - } -``` - -- enable 为是否打开该配置的开关 -- strategy为memory的策略名称,现支持 dynlevel/fssr/none,默认为none。 - - none: 即不设置任何策略,不会对内存进行调整。 - - dynlevel: 动态分级调整策略。 - - fssr: 快压制慢恢复策略。1)rubik启动时,默认配置所有离线的memory.high为总内存的80%。2)当内存压力增加,可用内存freeMemory < reservedMemory(预留内存,totalMemory * 5%) 时认为内存紧张,此时压缩所有离线的memory.high, 压缩量为总内存的10%,即最新的memory.high=memory.high-totalMemory * 10%。3)当持续一段时间总内存比较富裕,即可用内存freeMemory > 3 * reservedMemory(totalMemory * 5%)时认为内存富裕,此时释放总内存的1%给离线应用,memory.high=memory.high+totalMemory * 1%, 直到memory free 介于reservedMemory与3 * reservedMemory之间。 - -- checkInterval为策略的周期性检查的时间,单位为秒, 默认为5。 - -### memory fssr策略内核接口 - -- /sys/fs/cgroup/memory目录下容器的cgroup中,如`/sys/fs/cgroup/memory/kubepods/burstable//`目录。fssr策略会依据当前节点的内存压力大小,依次调整节点离线应用容器的下列值: -- memory.high - -### memory fssr策略配置详解 - -rubik提供memory的指定策略和控制间隔,在`memoryConfig`中 -``` -"memoryConfig": { - "enable": true, - "strategy": "fssr", - "checkInterval": 5 - } -``` - -- enable 为是否打开该配置的开关 -- strategy为memory的策略名称,现支持 dynlevel/fssr/none 两个选项,默认为none。 - - none: 即不设置任何策略,不会对内存进行调整。 - - dynlevel: 动态分级调整策略。 - - fssr: 快压制慢恢复策略。1)rubik启动时,默认配置所有离线的memory.high为总内存的80%。2)当内存压力增加,可用内存freeMemory < reservedMemory(预留内存,totalMemory * 5%) 时认为内存紧张,此时压缩所有离线的memory.high, 压缩量为总内存的10%,即最新的memory.high=memory.high-totalMemory * 10%。3)当持续一段时间总内存比较富裕,即可用内存freeMemory > 3 * reservedMemory(totalMemory * 5%)时认为内存富裕,此时释放总内存的1%给离线应用,memory.high=memory.high+totalMemory * 1%, 直到memory free 介于reservedMemory与3 * reservedMemory之间。 - -- checkInterval为策略的周期性检查的时间,单位为秒, 默认为5。 - ---------------------- - -## quota burst - -Pod的quota burst的配置以`volcano.sh/quota-burst-time`注解的形式,在pod创建的时候配置,或者在pod运行期间通过kubectl annotate进行动态的修改,支持离线和在线pod。 - -Pod的quota burst默认单位是microseconds, 其允许容器的cpu使用率低于quota时累积cpu资源,并在cpu利用率超过quota时,使用容器累积的cpu资源。 - -### quota burst内核接口 - -- /sys/fs/cgroup/cpu目录下容器的cgroup中,如`/sys/fs/cgroup/cpu/kubepods/burstable//`目录,注解的值将被写入下列文件中: - - cpu.cfs_burst_us - -- 注解`volcano.sh/quota-burst-time`的值和cpu.cfs_burst_us的约束一致: - - 当cpu.cfs_quota_us不为-1,需满足cpu.cfs_burst_us + cpu.cfs_quota_us <= 2^44-1 且 cpu.cfs_burst_us <= cpu.cfs_quota_us - - 当cpu.cfs_quota_us为-1,cpu.cfs_burst_us最大没有限制,取决于系统最大可设置的值 - - -**pod配置样例** - -- 创建时: 在yaml文件中 - - ``` - metadata: - annotations: - volcano.sh/quota-burst-time : "2000" - ``` - -- 修改annotation: 可通过 kubectl annotate动态修改,如: - - ```kubectl annotate --overwrite pods volcano.sh/quota-burst-time='3000'``` - ---------------------- - -## rubik支持基于iocost的io权重控制 - -### 依赖说明 -rubik支持通过在cgroup v1下的iocost控制不同pod的io权重分配。因此需要内核支持如下特性: -- 内核支持cgroup v1 blkcg iocost -- 内核支持cgroup v1 writeback - -### rubik实现说明 - -```mermaid -sequenceDiagram -actor user as actor -participant apiserver -participant rubik -participant kernel; -participant cgroup; -user->>kernel: use iocost_coef_gen.py to get iocost parameter -user->>rubik: deploy rubik and enable rubik iocost feature and take iocost parameter -activate rubik; -rubik->>apiserver: listen and watch -rubik->>rubik: parsing iocost parameters -rubik->>cgroup : configure iocost parameters -user->>apiserver : deploy pod -apiserver->>rubik : send pod configure -rubik->>cgroup : parse pod's configure and bind memcg with blkcg and config pod's iocost.weight -``` - -步骤如下 -- 部署rubik时,rubik解析配置并设置iocost相关参数 -- rubik注册监听事件到k8s api-server -- pod被部署时将pod配置信息等回调到rubik -- rubik解析pod配置信息,并根据qos level配置pod iocost权重 - -### rubik协议说明 -```json -"nodeConfig": [ - { - "nodeName": "slaver01", - "iocostEnable": true, - "iocostConfig": [ - { - "dev": "sda", - "enable": false, - "model": "linear", - "param": { - "rbps": 174610612, - "rseqiops": 41788, - "rrandiops": 371, - "wbps": 178587889, - "wseqiops": 42792, - "wrandiops": 379 - } - } - ] - } - ] -``` - -| 配置项 | 类型 | 说明 | -| ----------- | ----------- | ------ | -| nodeConfig | 数组 | node节点配置信息 | -| nodeName | string | 要配置的节点名称 | -| iocostEnable | bool | 该node节点是否使用iocost | -| iocostConfig | 数组 | 针对不同物理磁盘的配置数组,当iocostEnable为true时会被读取 | -| dev | string | 物理磁盘名称 | -| enable | bool | 该物理磁盘是否启用iocost | -| model | string | iocost的模型名称,linear为内核自带线性模型 | -| param | object | 该参数针对model参数配置,当model为linear时,下面的参数都是linear相关参数 | -| r(w)bps | int64 | 该物理块设备最大读(写)带宽 | -| r(w)seqiops | int64 | 该物理块设备最大顺序读(写)iops | -| r(w)randiops | int64 | 该物理块设备最大随机读(写)iops | - - -### 其他 -- iocost linear模型相关参数可以通过iocost_coef_gen.py脚本获取,可以从[link](https://github.com/torvalds/linux/blob/master/tools/cgroup/iocost_coef_gen.py)获得。 - -- 在blkcg根系统文件下存在`blkio.cost.qos`和`blkio.cost.model`两个文件接口。实现方式和接口说明可以访问openEuler内核文档。 \ No newline at end of file diff --git a/go.sum b/go.sum index 6e9ecd91491f13501768ffa8720d2d91c431a799..4962dc59ea7fcfc81215f8d0bc62dfbb8b8face7 100644 --- a/go.sum +++ b/go.sum @@ -78,6 +78,7 @@ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= @@ -138,9 +139,11 @@ github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/X github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= @@ -167,6 +170,7 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= @@ -322,6 +326,7 @@ golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjs golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= @@ -377,6 +382,7 @@ google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= diff --git a/hack/rubik-daemonset.yaml b/hack/rubik-daemonset.yaml index df166d06bc6b500ba3d6950372e5cabbce2471a8..3cbd93adff7c08a7f9392d854f779a904da5c352 100644 --- a/hack/rubik-daemonset.yaml +++ b/hack/rubik-daemonset.yaml @@ -34,28 +34,22 @@ metadata: data: config.json: | { - "autoCheck": false, + "agent": { "logDriver": "stdio", "logDir": "/var/log/rubik", "logSize": 1024, "logLevel": "info", "cgroupRoot": "/sys/fs/cgroup", - "cacheConfig": { - "enable": false, - "defaultLimitMode": "static", - "adjustInterval": 1000, - "perfDuration": 1000, - "l3Percent": { - "low": 20, - "mid": 30, - "high": 50 - }, - "memBandPercent": { - "low": 10, - "mid": 30, - "high": 50 - } - } + "enabledFeatures": [ + "preemption" + ] + }, + "preemption": { + "resource": [ + "cpu", + "memory" + ] + } } --- apiVersion: apps/v1 @@ -106,6 +100,9 @@ spec: - name: sysfs mountPath: /sys/fs readOnly: false + - name: devfs + mountPath: /dev + readOnly: false - name: config-volume mountPath: /var/lib/rubik terminationGracePeriodSeconds: 30 @@ -119,6 +116,9 @@ spec: - name: sysfs hostPath: path: /sys/fs + - name: devfs + hostPath: + path: /dev - name: config-volume configMap: name: rubik-config diff --git a/hack/static_check.sh b/hack/static_check.sh old mode 100755 new mode 100644 diff --git a/hack/unit_test.sh b/hack/unit_test.sh old mode 100755 new mode 100644 diff --git a/pkg/api/api.go b/pkg/api/api.go new file mode 100644 index 0000000000000000000000000000000000000000..da4228efdc92aa0988e94a766940723095c94f2c --- /dev/null +++ b/pkg/api/api.go @@ -0,0 +1,102 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file contains important interfaces used in the project + +// Package api is interface collection +package api + +import ( + "context" + + "isula.org/rubik/pkg/core/typedef" +) + +// Registry provides an interface for service discovery +type Registry interface { + Init() error + Register(*Service, string) error + Deregister(*Service, string) error + GetService(string) (*Service, error) + ListServices() ([]*Service, error) +} + +// ServiceDescriber describes services +type ServiceDescriber interface { + ID() string +} + +// EventFunc is the event handler for Service +type EventFunc interface { + AddFunc(podInfo *typedef.PodInfo) error + UpdateFunc(old, new *typedef.PodInfo) error + DeleteFunc(podInfo *typedef.PodInfo) error +} + +// Service contains progress that all services need to have +type Service interface { + ServiceDescriber + EventFunc +} + +// PersistentService is an abstract persistent running service +type PersistentService interface { + ServiceDescriber + // Run is a service processing logic, which is blocking (implemented in an infinite loop, etc.) + Run(ctx context.Context) +} + +// ListOption is for filtering podInfo +type ListOption func(pi *typedef.PodInfo) bool + +// Viewer collect on/offline pods info +type Viewer interface { + ListContainersWithOptions(options ...ListOption) map[string]*typedef.ContainerInfo + ListPodsWithOptions(options ...ListOption) map[string]*typedef.PodInfo +} + +// Publisher is a generic interface for Observables +type Publisher interface { + Subscribe(s Subscriber) error + Unsubscribe(s Subscriber) + Publish(topic typedef.EventType, event typedef.Event) +} + +// Subscriber is a common interface for subscribers +type Subscriber interface { + ID() string + NotifyFunc(eventType typedef.EventType, event typedef.Event) + TopicsFunc() []typedef.EventType +} + +// EventHandler is the processing interface for change events +type EventHandler interface { + HandleEvent(eventType typedef.EventType, event typedef.Event) + EventTypes() []typedef.EventType +} + +// Informer is an interface for external pod data sources to interact with rubik +type Informer interface { + Publisher + Start(ctx context.Context) +} + +// Logger is the handler to print the log +type Logger interface { + // Errorf logs bugs that affect normal functionality + Errorf(f string, args ...interface{}) + // Warnf logs produce unexpected results + Warnf(f string, args ...interface{}) + // Infof logs normal messages + Infof(f string, args ...interface{}) + // Debugf logs verbose messages + Debugf(f string, args ...interface{}) +} diff --git a/pkg/autoconfig/autoconfig.go b/pkg/autoconfig/autoconfig.go deleted file mode 100644 index 5a494e048fe280ed52fb7525518ad5a111a54d3e..0000000000000000000000000000000000000000 --- a/pkg/autoconfig/autoconfig.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2021-07-22 -// Description: qos auto config - -// Package autoconfig is for qos auto config -package autoconfig - -import ( - "time" - - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/fields" - "k8s.io/client-go/informers" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" - - "isula.org/rubik/pkg/config" - log "isula.org/rubik/pkg/tinylog" -) - -const invalidErr = "Auto config error: invalid pod type" - -// EventHandler is used to process pod events pushed by Kubernetes APIServer. -type EventHandler interface { - AddEvent(pod *corev1.Pod) - UpdateEvent(oldPod *corev1.Pod, newPod *corev1.Pod) - DeleteEvent(pod *corev1.Pod) -} - -// Backend is Rubik struct. -var Backend EventHandler - -// Init initializes the callback function for the pod event. -func Init(kubeClient *kubernetes.Clientset, nodeName string) error { - const ( - reSyncTime = 30 - specNodeNameField = "spec.nodeName" - ) - kubeInformerFactory := informers.NewSharedInformerFactoryWithOptions(kubeClient, - time.Duration(reSyncTime)*time.Second, - informers.WithTweakListOptions(func(options *metav1.ListOptions) { - // set Options to return only pods on the current node. - options.FieldSelector = fields.OneTermEqualSelector(specNodeNameField, nodeName).String() - })) - kubeInformerFactory.Core().V1().Pods().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: addHandler, - UpdateFunc: updateHandler, - DeleteFunc: deleteHandler, - }) - kubeInformerFactory.Start(config.ShutdownChan) - return nil -} - -func addHandler(obj interface{}) { - pod, ok := obj.(*corev1.Pod) - if !ok { - log.Errorf(invalidErr) - return - } - - Backend.AddEvent(pod) -} - -func updateHandler(old, new interface{}) { - oldPod, ok1 := old.(*corev1.Pod) - newPod, ok2 := new.(*corev1.Pod) - if !ok1 || !ok2 { - log.Errorf(invalidErr) - return - } - - Backend.UpdateEvent(oldPod, newPod) -} - -func deleteHandler(obj interface{}) { - pod, ok := obj.(*corev1.Pod) - if !ok { - log.Errorf(invalidErr) - return - } - - Backend.DeleteEvent(pod) -} diff --git a/pkg/blkio/blkio.go b/pkg/blkio/blkio.go deleted file mode 100644 index 573b3bf5277078ebed3bf1304abadd97f2d494c1..0000000000000000000000000000000000000000 --- a/pkg/blkio/blkio.go +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Song Yanting -// Create: 2022-6-7 -// Description: blkio setting for pods - -// Package blkio now only support byte unit. -// For example, limit read operation maximum 10 MBps, set value 10485760 -// More units will be supported like MB, KB... -package blkio - -import ( - "bytes" - "encoding/json" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "strings" - "syscall" - - corev1 "k8s.io/api/core/v1" - - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/util" -) - -const ( - deviceReadBpsFile = "blkio.throttle.read_bps_device" - deviceWriteBpsFile = "blkio.throttle.write_bps_device" - deviceReadIopsFile = "blkio.throttle.read_iops_device" - deviceWriteIopsFile = "blkio.throttle.write_iops_device" -) - -// DeviceConfig defines blkio device configurations -type DeviceConfig struct { - DeviceName string `json:"device,omitempty"` - DeviceValue string `json:"value,omitempty"` -} - -// BlkConfig defines blkio device configurations -type BlkConfig struct { - DeviceReadBps []DeviceConfig `json:"device_read_bps,omitempty"` - DeviceWriteBps []DeviceConfig `json:"device_write_bps,omitempty"` - DeviceReadIops []DeviceConfig `json:"device_read_iops,omitempty"` - DeviceWriteIops []DeviceConfig `json:"device_write_iops,omitempty"` -} - -// SetBlkio set blkio limtis according to annotation -func SetBlkio(pod *corev1.Pod) { - cfg := decodeBlkioCfg(pod.Annotations[constant.BlkioKey]) - if cfg == nil { - return - } - blkioLimit(pod, cfg, false) -} - -// WriteBlkio updates blkio limtis according to annotation -func WriteBlkio(old *corev1.Pod, new *corev1.Pod) { - if new.Status.Phase != corev1.PodRunning { - return - } - - if old.Annotations[constant.BlkioKey] == new.Annotations[constant.BlkioKey] { - return - } - - // empty old blkio limits - if oldCfg := decodeBlkioCfg(old.Annotations[constant.BlkioKey]); oldCfg != nil { - blkioLimit(old, oldCfg, true) - } - - // set new blkio limits - if newCfg := decodeBlkioCfg(new.Annotations[constant.BlkioKey]); newCfg != nil { - blkioLimit(new, newCfg, false) - } -} - -func blkioLimit(pod *corev1.Pod, cfg *BlkConfig, empty bool) { - if len(cfg.DeviceReadBps) > 0 { - tryWriteBlkioLimit(pod, cfg.DeviceReadBps, deviceReadBpsFile, empty) - } - if len(cfg.DeviceWriteBps) > 0 { - tryWriteBlkioLimit(pod, cfg.DeviceWriteBps, deviceWriteBpsFile, empty) - } - if len(cfg.DeviceReadIops) > 0 { - tryWriteBlkioLimit(pod, cfg.DeviceReadIops, deviceReadIopsFile, empty) - } - if len(cfg.DeviceWriteIops) > 0 { - tryWriteBlkioLimit(pod, cfg.DeviceWriteIops, deviceWriteIopsFile, empty) - } -} - -func decodeBlkioCfg(blkioCfg string) *BlkConfig { - if len(blkioCfg) == 0 { - return nil - } - log.Infof("blkioCfg is %v", blkioCfg) - cfg := &BlkConfig{ - DeviceReadBps: []DeviceConfig{}, - DeviceWriteBps: []DeviceConfig{}, - DeviceReadIops: []DeviceConfig{}, - DeviceWriteIops: []DeviceConfig{}, - } - reader := bytes.NewReader([]byte(blkioCfg)) - if err := json.NewDecoder(reader).Decode(cfg); err != nil { - log.Errorf("decode blkioCfg failed with error: %v", err) - return nil - } - return cfg -} - -func tryWriteBlkioLimit(pod *corev1.Pod, devCfgs []DeviceConfig, deviceFilePath string, empty bool) { - for _, devCfg := range devCfgs { - devName, devLimit := devCfg.DeviceName, devCfg.DeviceValue - - fi, err := os.Stat(devName) - if err != nil { - log.Errorf("stat %s failed with error %v", devName, err) - continue - } - if fi.Mode()&os.ModeDevice == 0 { - log.Errorf("%s is not a device", devName) - continue - } - - if st, ok := fi.Sys().(*syscall.Stat_t); ok { - devno := st.Rdev - major, minor := devno/256, devno%256 - var limit string - if empty == true { - limit = fmt.Sprintf("%v:%v 0", major, minor) - } else { - limit = fmt.Sprintf("%v:%v %s", major, minor, devLimit) - } - writeBlkioLimit(pod, limit, deviceFilePath) - } else { - log.Errorf("failed to get Sys(), %v has type %v", devName, st) - } - } -} - -func writeBlkioLimit(pod *corev1.Pod, limit, deviceFilePath string) { - const ( - dockerPrefix = "docker://" - containerdPrefix = "containerd://" - blkioPath = "blkio" - ) - podCgroupPath := util.GetPodCgroupPath(pod) - for _, container := range pod.Status.ContainerStatuses { - containerID := strings.TrimPrefix(container.ContainerID, dockerPrefix) - containerID = strings.TrimPrefix(containerID, containerdPrefix) - containerPath := filepath.Join(podCgroupPath, containerID) - containerBlkFilePath := filepath.Join(config.CgroupRoot, blkioPath, containerPath, deviceFilePath) - - err := ioutil.WriteFile(containerBlkFilePath, []byte(limit), constant.DefaultFileMode) - if err != nil { - log.Errorf("writeBlkioLimit write %v to %v failed with error: %v", limit, containerBlkFilePath, err) - continue - } - log.Infof("writeBlkioLimit write %s to %v success", limit, containerBlkFilePath) - } -} diff --git a/pkg/blkio/blkio_test.go b/pkg/blkio/blkio_test.go deleted file mode 100644 index 96ef3d8a5d7fecaa66d0d5049e9018709da539b8..0000000000000000000000000000000000000000 --- a/pkg/blkio/blkio_test.go +++ /dev/null @@ -1,276 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Song Yanting -// Create: 2022-6-7 -// Description: blkio test -package blkio - -import ( - "fmt" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/typedef" - "isula.org/rubik/pkg/util" -) - -const ( - blkioAnnotation = `{"device_read_bps":[{"device":"/dev/sda1","value":"52428800"}, {"device":"/dev/sda","value":"105857600"}], -"device_write_bps":[{"device":"/dev/sda1","value":"105857600"}], -"device_read_iops":[{"device":"/dev/sda1","value":"200"}], -"device_write_iops":[{"device":"/dev/sda1","value":"300"}]}` - invBlkioAnnotation = `{"device_read_bps":[{"device":"/dev/sda1","value":"52428800"}, {"device":"/dev/sda","value":"105857600"}}` -) - -var ( - devicePaths = map[string]string{ - "device_read_bps": deviceReadBpsFile, - "device_write_bps": deviceWriteBpsFile, - "device_read_iops": deviceReadIopsFile, - "device_write_iops": deviceWriteIopsFile, - } - status = corev1.PodStatus{ - ContainerStatuses: []corev1.ContainerStatus{ - {ContainerID: "docker://aaa"}, - }, - QOSClass: corev1.PodQOSBurstable, - Phase: corev1.PodRunning, - } - containerDir = filepath.Join(constant.TmpTestDir, "blkio/kubepods/burstable/podaaa/aaa") -) - -func getMajor(devName string) (major int, err error) { - cmd := fmt.Sprintf("ls -l %v | awk -F ' ' '{print $5}'", devName) - out, err := exec.Command("/bin/bash", "-c", cmd).Output() - if err != nil { - return -1, err - } - o := strings.TrimSuffix(strings.TrimSpace(string(out)), ",") - return strconv.Atoi(o) -} - -func getMinor(devName string) (minor int, err error) { - cmd := fmt.Sprintf("ls -l %v | awk -F ' ' '{print $6}'", devName) - out, err := exec.Command("sh", "-c", cmd).Output() - if err != nil { - return -1, err - } - o := strings.TrimSuffix(strings.TrimSpace(string(out)), "\n") - return strconv.Atoi(o) -} - -func TestBlkioAnnotation1(t *testing.T) { - // valid blkiocfg format - cfg := decodeBlkioCfg(blkioAnnotation) - assert.True(t, len(cfg.DeviceReadBps) > 0) - assert.True(t, len(cfg.DeviceWriteBps) > 0) - assert.True(t, len(cfg.DeviceReadIops) > 0) - assert.True(t, len(cfg.DeviceWriteIops) > 0) - - assert.Equal(t, "/dev/sda1", cfg.DeviceReadBps[0].DeviceName) - assert.Equal(t, "52428800", cfg.DeviceReadBps[0].DeviceValue) - assert.Equal(t, "/dev/sda", cfg.DeviceReadBps[1].DeviceName) - assert.Equal(t, "105857600", cfg.DeviceReadBps[1].DeviceValue) - - assert.Equal(t, "/dev/sda1", cfg.DeviceWriteBps[0].DeviceName) - assert.Equal(t, "105857600", cfg.DeviceWriteBps[0].DeviceValue) - - assert.Equal(t, "/dev/sda1", cfg.DeviceReadIops[0].DeviceName) - assert.Equal(t, "200", cfg.DeviceReadIops[0].DeviceValue) - - assert.Equal(t, "/dev/sda1", cfg.DeviceWriteIops[0].DeviceName) - assert.Equal(t, "300", cfg.DeviceWriteIops[0].DeviceValue) - - // invalid blkiocfg format - cfg = decodeBlkioCfg(invBlkioAnnotation) - assert.Equal(t, (*BlkConfig)(nil), cfg) -} - -func TestBlkioAnnotation2(t *testing.T) { - // valid blkiocfg format, valid + invalid device name - s1 := `{"device_read_bps":[{"device":"/dev/sda1","value":"10485760"}, {"device":"/dev/sda","value":"10485760"}], - "device_read_iops":[{"device":"/dev/sda1","value":"200"}, {"device":"/dev/123","value":"123"}]}` - cfg := decodeBlkioCfg(s1) - assert.True(t, len(cfg.DeviceReadBps) == 2) - assert.True(t, len(cfg.DeviceWriteBps) == 0) - assert.True(t, len(cfg.DeviceReadIops) == 2) - assert.True(t, len(cfg.DeviceWriteIops) == 0) - - assert.Equal(t, "/dev/sda1", cfg.DeviceReadBps[0].DeviceName) - assert.Equal(t, "10485760", cfg.DeviceReadBps[0].DeviceValue) - assert.Equal(t, "/dev/sda", cfg.DeviceReadBps[1].DeviceName) - assert.Equal(t, "10485760", cfg.DeviceReadBps[1].DeviceValue) - - assert.Equal(t, "/dev/sda1", cfg.DeviceReadIops[0].DeviceName) - assert.Equal(t, "200", cfg.DeviceReadIops[0].DeviceValue) - assert.Equal(t, "/dev/123", cfg.DeviceReadIops[1].DeviceName) - assert.Equal(t, "123", cfg.DeviceReadIops[1].DeviceValue) -} - -func listDevices() []string { - dir, _ := ioutil.ReadDir("/sys/block") - devices := make([]string, 0, len(dir)) - for _, f := range dir { - devices = append(devices, f.Name()) - } - return devices -} - -func genarateRandDev(n int, devices []string) string { - const bytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-" - b := make([]byte, n) - valid := true - for valid { - valid = false - for i := range b { - b[i] = bytes[typedef.RandInt(len(bytes))] - } - for _, device := range devices { - if device == string(b) { - valid = true - } - } - } - return string(b) -} - -func mkdirHelper(t *testing.T) { - assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) - for _, fname := range []string{deviceReadBpsFile, deviceWriteBpsFile, deviceReadIopsFile, deviceWriteIopsFile} { - assert.NoError(t, util.CreateFile(filepath.Join(containerDir, fname))) - } -} - -func TestSetBlkio(t *testing.T) { - testFunc := func(s1, deviceType, devicePath string, devices []string) { - mkdirHelper(t) - defer os.RemoveAll(constant.TmpTestDir) - - config.CgroupRoot = constant.TmpTestDir - pod := &corev1.Pod{ - ObjectMeta: v1.ObjectMeta{ - UID: "aaa", - Annotations: map[string]string{ - constant.BlkioKey: s1, - }, - }, - Status: status, - } - - SetBlkio(pod) - - expected := "" - for _, device := range devices { - major, err := getMajor("/dev/" + device) - if major == 0 || err != nil { - continue - } - minor, err := getMinor("/dev/" + device) - if err != nil { - continue - } - expected += fmt.Sprintf("%v:%v %v", major, minor, 10485760) - } - actual, _ := ioutil.ReadFile(filepath.Join(containerDir, devicePath)) - - assert.Equal(t, expected, strings.TrimSuffix(string(actual), "\n")) - } - - // test valid devices names from /sys/block - devices := listDevices() - for deviceType, devicePath := range devicePaths { - for _, device := range devices { - cfg := `{"` + deviceType + `":[{"device":"/dev/` + device + `","value":"10485760"}]}` - testFunc(cfg, deviceType, devicePath, []string{device}) - } - } - - // test invalid device names from random generated characters - for deviceType, devicePath := range devicePaths { - invalidDeviceName := genarateRandDev(3, devices) - cfg := `{"` + deviceType + `":[{"device":"/dev/` + invalidDeviceName + `","value":"10485760"}]}` - testFunc(cfg, deviceType, devicePath, []string{invalidDeviceName}) - - } - - // test valid devices names + invalid devices names - for deviceType, devicePath := range devicePaths { - for _, device := range devices { - invalidDeviceName := genarateRandDev(3, devices) - cfg := `{"` + deviceType + `":[{"device":"/dev/` + device + `","value":"10485760"}, {"device":"/dev/` + invalidDeviceName + `","value":"10485760"}]}` - testFunc(cfg, deviceType, devicePath, []string{device, invalidDeviceName}) - } - } -} - -func TestWriteBlkio(t *testing.T) { - mkdirHelper(t) - defer os.RemoveAll(constant.TmpTestDir) - - config.CgroupRoot = constant.TmpTestDir - old := corev1.Pod{ - ObjectMeta: v1.ObjectMeta{ - UID: "aaa", - Annotations: map[string]string{ - constant.BlkioKey: "", - }, - }, - Status: status, - } - SetBlkio(&old) - - testFunc := func(newCfg, deviceType, devicePath string, devices []string) { - new := corev1.Pod{ - ObjectMeta: v1.ObjectMeta{ - UID: "aaa", - Annotations: map[string]string{ - constant.BlkioKey: newCfg, - }, - }, - Status: status, - } - WriteBlkio(&old, &new) - - old = new - expected := "" - for _, device := range devices { - major, _ := getMajor("/dev/" + device) - if major == 0 { - continue - } - minor, _ := getMinor("/dev/" + device) - expected += fmt.Sprintf("%v:%v %v", major, minor, 10485760) - } - file := filepath.Join(containerDir, devicePath) - actual, _ := ioutil.ReadFile(file) - - assert.Equal(t, expected, strings.TrimSuffix(string(actual), "\n")) - } - - // test valid devices names from /sys/block - devices := listDevices() - for deviceType, devicePath := range devicePaths { - for _, device := range devices { - newCfg := `{"` + deviceType + `":[{"device":"/dev/` + device + `","value":"10485760"}]}` - testFunc(newCfg, deviceType, devicePath, []string{device}) - } - } -} diff --git a/pkg/cachelimit/cachelimit.go b/pkg/cachelimit/cachelimit.go deleted file mode 100644 index 9b668bb8a9cad1aa4c8e3404645dd0c666b94881..0000000000000000000000000000000000000000 --- a/pkg/cachelimit/cachelimit.go +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2022-01-18 -// Description: offline pod cache limit function - -// Package cachelimit is for cache limiting -package cachelimit - -import ( - "io/ioutil" - "os" - "path/filepath" - "strings" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/pkg/errors" - "k8s.io/apimachinery/pkg/types" - - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" - "isula.org/rubik/pkg/util" -) - -const ( - resctrlDir = "/sys/fs/resctrl" - noProErr = "no such process" -) - -// SyncLevel sync cache limit level -func SyncLevel(pi *typedef.PodInfo) error { - level := pi.CacheLimitLevel - if level == "" { - if defaultLimitMode == staticMode { - pi.CacheLimitLevel = maxLevel - } else { - pi.CacheLimitLevel = dynamicLevel - } - } - if !levelValid(pi.CacheLimitLevel) { - return errors.Errorf("invalid cache limit level %v for pod: %v", level, pi.UID) - } - return nil -} - -// syncCacheLimit sync cache limit for offline pods, as new processes may generate during pod running, -// they should be moved to resctrl directory -func syncCacheLimit() { - offlinePods := cpm.ListOfflinePods() - for _, p := range offlinePods { - if err := SyncLevel(p); err != nil { - log.Errorf("sync cache limit level err: %v", err) - continue - } - if err := writeTasksToResctrl(p, resctrlDir); err != nil { - log.Errorf("set cache limit for pod %v err: %v", p.UID, err) - } - } -} - -// SetCacheLimit set cache limit for offline pods -func SetCacheLimit(pi *typedef.PodInfo) error { - log.Logf("setting cache limit level=%v for pod %s", pi.CacheLimitLevel, pi.UID) - - return writeTasksToResctrl(pi, resctrlDir) -} - -func writeTasksToResctrl(pi *typedef.PodInfo, resctrlRoot string) error { - taskRootPath := filepath.Join(config.CgroupRoot, "cpu", pi.CgroupPath) - if !util.PathExist(taskRootPath) { - log.Infof("path %v not exist, maybe pod %v is deleted", taskRootPath, pi.UID) - return nil - } - - tasks, _, err := getTasks(pi, taskRootPath) - if err != nil { - return err - } - if len(tasks) == 0 { - return nil - } - - resctrlTaskFile := filepath.Join(resctrlRoot, dirPrefix+pi.CacheLimitLevel, "tasks") - for _, task := range tasks { - if err := ioutil.WriteFile(resctrlTaskFile, []byte(task), constant.DefaultFileMode); err != nil { - if strings.Contains(err.Error(), noProErr) { - log.Errorf("pod %s task %s not exist", pi.UID, task) - continue - } - return errors.Errorf("add task %v to file %v error: %v", task, resctrlTaskFile, err) - } - } - - return nil -} - -func getTasks(pi *typedef.PodInfo, taskRootPath string) ([]string, []string, error) { - file := "cgroup.procs" - var taskList, containers []string - err := filepath.Walk(taskRootPath, func(path string, f os.FileInfo, err error) error { - if f != nil && f.IsDir() { - containerID := filepath.Base(f.Name()) - if cpm.ContainerExist(types.UID(pi.UID), containerID) { - return nil - } - cgFilePath, err := securejoin.SecureJoin(path, file) - if err != nil { - return errors.Errorf("join path failed for %s and %s: %v", path, file, err) - } - tasks, err := ioutil.ReadFile(filepath.Clean(cgFilePath)) - if err != nil { - return errors.Errorf("read task file %v err: %v", cgFilePath, err) - } - if strings.TrimSpace(string(tasks)) == "" { - return nil - } - if containerID != filepath.Base(taskRootPath) { - containers = append(containers, containerID) - } - taskList = append(taskList, strings.Split(strings.TrimSpace(string(tasks)), "\n")...) - } - return nil - }) - - return taskList, containers, err -} - -func levelValid(level string) bool { - switch level { - case lowLevel: - case middleLevel: - case highLevel: - case maxLevel: - case dynamicLevel: - default: - return false - } - - return true -} diff --git a/pkg/cachelimit/cachelimit_init.go b/pkg/cachelimit/cachelimit_init.go deleted file mode 100644 index 2abfc6f85e57883826e8d31b9fcbe20977f0c1ee..0000000000000000000000000000000000000000 --- a/pkg/cachelimit/cachelimit_init.go +++ /dev/null @@ -1,376 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2022-01-18 -// Description: offline pod cache limit directory init function - -package cachelimit - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - "time" - - "github.com/pkg/errors" - "k8s.io/apimachinery/pkg/util/wait" - - "isula.org/rubik/pkg/checkpoint" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/perf" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" - "isula.org/rubik/pkg/util" -) - -const ( - schemataFile = "schemata" - numaNodeDir = "/sys/devices/system/node" - dirPrefix = "rubik_" - perfEvent = "perf_event" - - lowLevel = "low" - middleLevel = "middle" - highLevel = "high" - maxLevel = "max" - dynamicLevel = "dynamic" - - staticMode = "static" - dynamicMode = "dynamic" - - defaultL3PercentMax = 100 - defaultMbPercentMax = 100 - minAdjustInterval = 10 - maxAdjustInterval = 10000 - // minimum perf duration, unit ms - minPerfDur = 10 - // maximum perf duration, unit ms - maxPerfDur = 10000 - minPercent = 10 - maxPercent = 100 - - base2, base16, bitSize = 2, 16, 32 -) - -var ( - numaNum, l3PercentDynamic, mbPercentDynamic int - defaultLimitMode string - enable bool - cpm *checkpoint.Manager -) - -type cacheLimitSet struct { - level string - clDir string - L3Percent int - MbPercent int -} - -// Init init and starts cache limit -func Init(m *checkpoint.Manager, cfg *config.CacheConfig) error { - enable = true - if !isHostPidns("/proc/self/ns/pid") { - return errors.New("share pid namespace with host is needed for cache limit") - } - if !perf.HwSupport() { - return errors.New("hardware event perf not supported") - } - if err := checkCacheCfg(cfg); err != nil { - return err - } - if err := checkResctrlExist(cfg); err != nil { - return err - } - if err := initCacheLimitDir(cfg); err != nil { - return errors.Errorf("cache limit directory create failed: %v", err) - } - defaultLimitMode = cfg.DefaultLimitMode - cpm = m - - go wait.Until(syncCacheLimit, time.Second, config.ShutdownChan) - missMax, missMin := 20, 10 - dynamicFunc := func() { startDynamic(cfg, missMax, missMin) } - go wait.Until(dynamicFunc, time.Duration(cfg.AdjustInterval)*time.Millisecond, config.ShutdownChan) - return nil -} - -func isHostPidns(path string) bool { - ns, err := os.Readlink(path) - if err != nil { - log.Errorf("get pid namespace inode error: %v", err) - return false - } - hostPidInode := "4026531836" - return strings.Trim(ns, "pid:[]") == hostPidInode -} - -func checkCacheCfg(cfg *config.CacheConfig) error { - defaultLimitMode = cfg.DefaultLimitMode - if defaultLimitMode != staticMode && defaultLimitMode != dynamicMode { - return errors.Errorf("invalid cache limit mode: %s, should be %s or %s", - cfg.DefaultLimitMode, staticMode, dynamicMode) - } - if cfg.AdjustInterval < minAdjustInterval || cfg.AdjustInterval > maxAdjustInterval { - return errors.Errorf("adjust interval %d out of range [%d,%d]", - cfg.AdjustInterval, minAdjustInterval, maxAdjustInterval) - } - if cfg.PerfDuration < minPerfDur || cfg.PerfDuration > maxPerfDur { - return errors.Errorf("perf duration %d out of range [%d,%d]", cfg.PerfDuration, minPerfDur, maxPerfDur) - } - for _, per := range []int{cfg.L3Percent.Low, cfg.L3Percent.Mid, cfg.L3Percent.High, cfg.MemBandPercent.Low, - cfg.MemBandPercent.Mid, cfg.MemBandPercent.High} { - if per < minPercent || per > maxPercent { - return errors.Errorf("cache limit percentage %d out of range [%d,%d]", per, minPercent, maxPercent) - } - } - if cfg.L3Percent.Low > cfg.L3Percent.Mid || cfg.L3Percent.Mid > cfg.L3Percent.High { - return errors.Errorf("cache limit config L3Percent does not satisfy constraint low<=mid<=high") - } - if cfg.MemBandPercent.Low > cfg.MemBandPercent.Mid || cfg.MemBandPercent.Mid > cfg.MemBandPercent.High { - return errors.Errorf("cache limit config MemBandPercent does not satisfy constraint low<=mid<=high") - } - - return nil -} - -// initCacheLimitDir init multi-level cache limit directories -func initCacheLimitDir(cfg *config.CacheConfig) error { - log.Infof("init cache limit directory") - - var err error - if numaNum, err = getNUMANum(numaNodeDir); err != nil { - return errors.Errorf("get NUMA nodes number error: %v", err) - } - - l3PercentDynamic = cfg.L3Percent.Low - mbPercentDynamic = cfg.MemBandPercent.Low - cacheLimitList := []*cacheLimitSet{ - newCacheLimitSet(cfg.DefaultResctrlDir, dynamicLevel, l3PercentDynamic, mbPercentDynamic), - newCacheLimitSet(cfg.DefaultResctrlDir, lowLevel, cfg.L3Percent.Low, cfg.MemBandPercent.Low), - newCacheLimitSet(cfg.DefaultResctrlDir, middleLevel, cfg.L3Percent.Mid, cfg.MemBandPercent.Mid), - newCacheLimitSet(cfg.DefaultResctrlDir, highLevel, cfg.L3Percent.High, cfg.MemBandPercent.High), - newCacheLimitSet(cfg.DefaultResctrlDir, maxLevel, defaultL3PercentMax, defaultMbPercentMax), - } - - for _, cl := range cacheLimitList { - if err = cl.writeResctrlSchemata(numaNum); err != nil { - return err - } - } - - log.Infof("init cache limit directory success") - return nil -} - -func newCacheLimitSet(basePath, level string, l3Per, mbPer int) *cacheLimitSet { - return &cacheLimitSet{ - level: level, - L3Percent: l3Per, - MbPercent: mbPer, - clDir: filepath.Join(filepath.Clean(basePath), dirPrefix+level), - } -} - -// calcLimitedCacheValue calculate number of cache way could be used according to L3 limit percent -func calcLimitedCacheValue(path string, l3Percent int) (string, error) { - l3BinaryMask, err := getBinaryMask(path) - if err != nil { - return "", err - } - ten, hundred, binValue := 10, 100, 0 - binLen := l3BinaryMask * l3Percent / hundred - if binLen == 0 { - binLen = 1 - } - for i := 0; i < binLen; i++ { - binValue = binValue*ten + 1 - } - decValue, err := strconv.ParseInt(strconv.Itoa(binValue), base2, bitSize) - if err != nil { - return "", errors.Errorf("transfer %v to decimal format error: %v", binValue, err) - } - - return strconv.FormatInt(decValue, base16), nil -} - -func (cl *cacheLimitSet) setClDir() error { - if len(cl.clDir) == 0 { - return errors.Errorf("cache limit path empty") - } - if err := os.Mkdir(cl.clDir, constant.DefaultDirMode); err != nil && !os.IsExist(err) { - return errors.Errorf("create cache limit directory error: %v", err) - } - return nil -} - -func (cl *cacheLimitSet) writeResctrlSchemata(numaNum int) error { - // get cbm mask like "fffff" means 20 cache way - maskFile := filepath.Join(filepath.Dir(cl.clDir), "info", "L3", "cbm_mask") - llc, err := calcLimitedCacheValue(maskFile, cl.L3Percent) - if err != nil { - return errors.Errorf("get limited cache value from L3 percent error: %v", err) - } - - if err := cl.setClDir(); err != nil { - return err - } - schemetaFile := filepath.Join(cl.clDir, schemataFile) - var content string - for i := 0; i < numaNum; i++ { - content = content + fmt.Sprintf("L3:%d=%s\n", i, llc) + fmt.Sprintf("MB:%d=%d\n", i, cl.MbPercent) - } - if err := ioutil.WriteFile(schemetaFile, []byte(content), constant.DefaultFileMode); err != nil { - return errors.Errorf("write %s to file %s error: %v", content, schemetaFile, err) - } - - return nil -} - -func (cl *cacheLimitSet) doFlush() error { - if err := cl.writeResctrlSchemata(numaNum); err != nil { - return errors.Errorf("adjust dynamic cache limit to l3:%v mb:%v error: %v", - cl.L3Percent, cl.MbPercent, err) - } - l3PercentDynamic = cl.L3Percent - mbPercentDynamic = cl.MbPercent - - return nil -} - -func (cl *cacheLimitSet) flush(cfg *config.CacheConfig, step int) error { - l3 := nextPercent(l3PercentDynamic, cfg.L3Percent.Low, cfg.L3Percent.High, step) - mb := nextPercent(mbPercentDynamic, cfg.MemBandPercent.Low, cfg.MemBandPercent.High, step) - if l3PercentDynamic == l3 && mbPercentDynamic == mb { - return nil - } - log.Infof("flush L3 from %v to %v, Mb from %v to %v", cl.L3Percent, l3, cl.MbPercent, mb) - cl.L3Percent, cl.MbPercent = l3, mb - return cl.doFlush() -} - -func nextPercent(value, min, max, step int) int { - value += step - if value < min { - return min - } - if value > max { - return max - } - return value -} - -// startDynamic start monitor online pod qos and adjust dynamic cache limit value -func startDynamic(cfg *config.CacheConfig, missMax, missMin int) { - if !dynamicExist() { - return - } - - stepMore, stepLess := 5, -50 - needMore := true - limiter := newCacheLimitSet(cfg.DefaultResctrlDir, dynamicLevel, l3PercentDynamic, mbPercentDynamic) - - onlinePods := cpm.ListOnlinePods() - for _, p := range onlinePods { - cacheMiss, LLCMiss := getPodCacheMiss(p, cfg.PerfDuration) - if cacheMiss >= missMax || LLCMiss >= missMax { - log.Infof("online pod %v cache miss: %v LLC miss: %v exceeds maxmiss, lower offline cache limit", - p.UID, cacheMiss, LLCMiss) - - if err := limiter.flush(cfg, stepLess); err != nil { - log.Errorf(err.Error()) - } - return - } - if cacheMiss >= missMin || LLCMiss >= missMin { - needMore = false - } - } - - if !needMore { - return - } - if err := limiter.flush(cfg, stepMore); err != nil { - log.Errorf(err.Error()) - } -} - -func dynamicExist() bool { - offlinePods := cpm.ListOfflinePods() - for _, p := range offlinePods { - err := SyncLevel(p) - if err != nil { - continue - } - if p.CacheLimitLevel == dynamicLevel { - return true - } - } - return false -} - -func getPodCacheMiss(pi *typedef.PodInfo, perfDu int) (int, int) { - cgroupPath := filepath.Join(config.CgroupRoot, perfEvent, pi.CgroupPath) - if !util.PathExist(cgroupPath) { - return 0, 0 - } - - stat, err := perf.CgroupStat(cgroupPath, time.Duration(perfDu)*time.Millisecond) - if err != nil { - return 0, 0 - } - - return int(100.0 * float64(stat.CacheMisses) / (1.0 + float64(stat.CacheReferences))), - int(100.0 * float64(stat.LLCMiss) / (1.0 + float64(stat.LLCAccess))) -} - -// ClEnabled return if cache limit is enabled -func ClEnabled() bool { - return enable -} - -// checkResctrlExist check if resctrl directory exists -func checkResctrlExist(cfg *config.CacheConfig) error { - if !util.PathExist(cfg.DefaultResctrlDir) { - return errors.Errorf("path %v not exist, not support cache limit", cfg.DefaultResctrlDir) - } - schemataPath := filepath.Join(cfg.DefaultResctrlDir, schemataFile) - if !util.PathExist(schemataPath) { - return errors.Errorf("path %v not exist, check if %v directory is mounted", - schemataPath, cfg.DefaultResctrlDir) - } - return nil -} - -func getNUMANum(path string) (int, error) { - files, err := filepath.Glob(filepath.Join(path, "node*")) - if err != nil { - return 0, err - } - return len(files), nil -} - -// getBinaryMask get l3 limit mask like "7ff" and transfer it to binary like "111 1111 1111", return binary length 11 -func getBinaryMask(path string) (int, error) { - maskValue, err := ioutil.ReadFile(filepath.Clean(path)) - if err != nil { - return -1, errors.Errorf("get L3 mask value error: %v", err) - } - - // transfer mask to binary format - decMask, err := strconv.ParseInt(strings.TrimSpace(string(maskValue)), base16, bitSize) - if err != nil { - return -1, errors.Errorf("transfer L3 mask value %v to decimal format error: %v", string(maskValue), err) - } - return len(strconv.FormatInt(decMask, base2)), nil -} diff --git a/pkg/cachelimit/cachelimit_init_test.go b/pkg/cachelimit/cachelimit_init_test.go deleted file mode 100644 index fe8f5417e9e8b1151bb9d42fbac0e24120379940..0000000000000000000000000000000000000000 --- a/pkg/cachelimit/cachelimit_init_test.go +++ /dev/null @@ -1,1019 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Xiang Li -// Create: 2022-05-16 -// Description: offline pod cache limit directory init function - -package cachelimit - -import ( - "fmt" - "io/ioutil" - "math" - "os" - "path/filepath" - "strconv" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "isula.org/rubik/pkg/checkpoint" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/perf" - "isula.org/rubik/pkg/try" - "isula.org/rubik/pkg/typedef" -) - -// TestGetNUMANum testcase -func TestGetNUMANum(t *testing.T) { - threeNodeDir := try.GenTestDir().String() - for i := 0; i < 3; i++ { - nodeDir := filepath.Join(threeNodeDir, fmt.Sprintf("node%d", i)) - try.MkdirAll(nodeDir, constant.DefaultDirMode) - } - - type args struct { - path string - } - tests := []struct { - name string - args args - want int - wantErr bool - compare bool - }{ - { - name: "TC-right numa folder", - args: args{path: numaNodeDir}, - wantErr: false, - compare: false, - }, - { - name: "TC-three numa foler", - args: args{path: threeNodeDir}, - want: 3, - wantErr: false, - compare: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := getNUMANum(tt.args.path) - if (err != nil) != tt.wantErr { - t.Errorf("getNUMANum() error = %v, wantErr %v", err, tt.wantErr) - return - } - - if tt.compare { - if got != tt.want { - t.Errorf("getNUMANum() = %v, want %v", got, tt.want) - } - } - }) - } -} - -// TestGetBinaryMask testcase -func TestGetBinaryMask(t *testing.T) { - file7ff := filepath.Join(try.GenTestDir().String(), "7ff") - file3ff := filepath.Join(try.GenTestDir().String(), "3ff") - fileNotHex := filepath.Join(try.GenTestDir().String(), "nohex") - - tests := []struct { - preHook func(t *testing.T) - name string - path string - want int - wantErr bool - }{ - { - name: "TC-7ff", - path: file7ff, - want: 11, - wantErr: false, - preHook: func(t *testing.T) { - try.WriteFile(file7ff, []byte("7ff"), constant.DefaultFileMode) - }, - }, - { - name: "TC-3ff", - path: file3ff, - want: 10, - wantErr: false, - preHook: func(t *testing.T) { - try.WriteFile(file3ff, []byte("3ff"), constant.DefaultFileMode) - }, - }, - { - name: "TC-not hex format", - path: fileNotHex, - wantErr: true, - preHook: func(t *testing.T) { - try.WriteFile(fileNotHex, []byte("ghi"), constant.DefaultFileMode) - }, - }, - { - name: "TC-file not exist", - path: "/file/not/exist", - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.preHook != nil { - tt.preHook(t) - } - got, err := getBinaryMask(tt.path) - if (err != nil) != tt.wantErr { - t.Errorf("getBinaryMask() error = %v, wantErr %v, file = %v", err, tt.wantErr, tt.path) - return - } - if err == nil { - if got != tt.want { - t.Errorf("getBinaryMask() = %v, want %v", got, tt.want) - } - } - }) - } -} - -// TestCalcLimitedCacheValue testcase -func TestCalcLimitedCacheValue(t *testing.T) { - testFile := filepath.Join(try.GenTestDir().String(), "testFile") - type fields struct { - level string - L3Percent int - MbPercent int - } - type args struct { - path string - } - tests := []struct { - preHook func(t *testing.T) - postHook func(t *testing.T) - name string - fields fields - args args - want string - wantErr bool - }{ - { - name: "TC-7ff", - args: args{testFile}, - want: "1", - fields: fields{ - L3Percent: 10, - MbPercent: 10, - }, - preHook: func(t *testing.T) { - try.WriteFile(testFile, []byte("7ff"), constant.DefaultFileMode) - }, - }, - { - name: "TC-fffff", - args: args{testFile}, - want: "3", - fields: fields{ - L3Percent: 10, - MbPercent: 10, - }, - preHook: func(t *testing.T) { - try.WriteFile(testFile, []byte("fffff"), constant.DefaultFileMode) - }, - }, - { - name: "TC-ff", - args: args{testFile}, - want: "1", - fields: fields{ - L3Percent: 10, - }, - preHook: func(t *testing.T) { - try.WriteFile(testFile, []byte("ff"), constant.DefaultFileMode) - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - clSet := &cacheLimitSet{ - level: tt.fields.level, - L3Percent: tt.fields.L3Percent, - MbPercent: tt.fields.MbPercent, - } - if tt.preHook != nil { - tt.preHook(t) - } - got, err := calcLimitedCacheValue(tt.args.path, clSet.L3Percent) - if (err != nil) != tt.wantErr { - t.Errorf("cacheLimitSet.calcLimitedCacheValue() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("cacheLimitSet.calcLimitedCacheValue() = %v, want %v", got, tt.want) - } - if tt.postHook != nil { - tt.postHook(t) - } - }) - } -} - -// TestWriteResctrlSchemata testcase -func TestWriteResctrlSchemata(t *testing.T) { - testFolder := try.GenTestDir().String() - assert.NoError(t, setMaskFile(t, testFolder, "3ff")) - type fields struct { - level string - clDir string - L3Percent int - MbPercent int - } - type args struct { - llc string - numaNum int - } - tests := []struct { - preHook func(t *testing.T) - postHook func(t *testing.T) - name string - fields fields - args args - wantErr bool - }{ - { - name: "TC-normal", - fields: fields{ - level: lowLevel, - clDir: filepath.Join(testFolder, "normal"), - L3Percent: 30, - MbPercent: 30, - }, - args: args{llc: "3ff", numaNum: 2}, - wantErr: false, - }, - { - name: "TC-cache limit dir not set", - fields: fields{ - level: lowLevel, - L3Percent: 30, - MbPercent: 30, - }, - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - clSet := &cacheLimitSet{ - level: tt.fields.level, - clDir: tt.fields.clDir, - L3Percent: tt.fields.L3Percent, - MbPercent: tt.fields.MbPercent, - } - if tt.preHook != nil { - tt.preHook(t) - } - if err := clSet.writeResctrlSchemata(tt.args.numaNum); (err != nil) != tt.wantErr { - t.Errorf("cacheLimitSet.writeResctrlSchemata() error = %v, wantErr %v", err, tt.wantErr) - } - if tt.postHook != nil { - tt.postHook(t) - } - }) - } -} - -// TestCheckCacheCfg testcase -func TestCheckCacheCfg(t *testing.T) { - type args struct { - cfg config.CacheConfig - } - tests := []struct { - name string - args args - wantErr bool - wantMsg string - }{ - { - name: "TC-static mode config", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: minAdjustInterval + 1, - PerfDuration: minPerfDur + 1, - L3Percent: config.MultiLvlPercent{ - Low: minPercent + 1, - Mid: maxPercent/2 + 1, - High: maxPercent - 1, - }, - MemBandPercent: config.MultiLvlPercent{ - Low: minPercent + 1, - Mid: maxPercent/2 + 1, - High: maxPercent - 1, - }, - }}, - }, - { - name: "TC-invalid mode config", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: "invalid mode", - }}, - wantErr: true, - wantMsg: dynamicMode, - }, - { - name: "TC-invalid adjust interval less than min value", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: minAdjustInterval - 1, - }}, - wantErr: true, - wantMsg: strconv.Itoa(minAdjustInterval), - }, - { - name: "TC-invalid adjust interval greater than max value", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: maxAdjustInterval + 1, - }}, - wantErr: true, - wantMsg: strconv.Itoa(maxAdjustInterval), - }, - { - name: "TC-invalid perf duration less than min value", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: maxAdjustInterval/2 + 1, - PerfDuration: minPerfDur - 1, - }}, - wantErr: true, - wantMsg: strconv.Itoa(minPerfDur), - }, - { - name: "TC-invalid perf duration greater than max value", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: maxAdjustInterval/2 + 1, - PerfDuration: maxPerfDur + 1, - }}, - wantErr: true, - wantMsg: strconv.Itoa(maxPerfDur), - }, - { - name: "TC-invalid percent value", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: maxAdjustInterval/2 + 1, - PerfDuration: maxPerfDur/2 + 1, - L3Percent: config.MultiLvlPercent{ - Low: minPercent - 1, - }, - }}, - wantErr: true, - wantMsg: strconv.Itoa(minPercent), - }, - { - name: "TC-invalid l3 percent low value larger than mid value", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: maxAdjustInterval/2 + 1, - PerfDuration: maxPerfDur/2 + 1, - L3Percent: config.MultiLvlPercent{ - Low: minPercent + 2, - Mid: minPercent + 1, - High: minPercent + 1, - }, - MemBandPercent: config.MultiLvlPercent{ - Low: minPercent, - Mid: minPercent + 1, - High: minPercent + 2, - }, - }}, - wantErr: true, - wantMsg: "low<=mid<=high", - }, - { - name: "TC-invalid memband percent mid value larger than high value", - args: args{cfg: config.CacheConfig{ - DefaultLimitMode: staticMode, - AdjustInterval: maxAdjustInterval/2 + 1, - PerfDuration: maxPerfDur/2 + 1, - L3Percent: config.MultiLvlPercent{ - Low: minPercent, - Mid: minPercent + 1, - High: minPercent + 2, - }, - MemBandPercent: config.MultiLvlPercent{ - Low: minPercent, - Mid: maxPercent/2 + 1, - High: maxPercent / 2, - }, - }}, - wantErr: true, - wantMsg: "low<=mid<=high", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := checkCacheCfg(&tt.args.cfg) - if (err != nil) != tt.wantErr { - t.Errorf("checkCacheCfg() error = %v, wantErr %v", err, tt.wantErr) - } - if err != nil && !strings.Contains(err.Error(), tt.wantMsg) { - t.Errorf("checkCacheCfg() error = %v, wantMsg %v", err, tt.wantMsg) - } - }) - } -} - -func setMaskFile(t *testing.T, resctrlDir string, data string) error { - maskDir := filepath.Join(resctrlDir, "info", "L3") - maskFile := filepath.Join(maskDir, "cbm_mask") - if err := os.MkdirAll(maskDir, constant.DefaultDirMode); err != nil { - return err - } - if err := ioutil.WriteFile(maskFile, []byte(data), constant.DefaultFileMode); err != nil { - return err - } - return nil -} - -// TestInitCacheLimitDir testcase -func TestInitCacheLimitDir(t *testing.T) { - resctrlDir := try.GenTestDir().String() - type args struct { - cfg config.CacheConfig - } - tests := []struct { - setMaskFile func(t *testing.T) error - name string - args args - wantErr bool - }{ - { - name: "TC-valid cache limit dir setting", - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: resctrlDir, - DefaultLimitMode: staticMode, - }}, - setMaskFile: func(t *testing.T) error { - return setMaskFile(t, resctrlDir, "3ff") - }, - }, - { - name: "TC-empty resctrl dir", - args: args{config.CacheConfig{ - DefaultResctrlDir: "", - DefaultLimitMode: staticMode, - }}, - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.setMaskFile != nil { - assert.NoError(t, tt.setMaskFile(t)) - } - if err := initCacheLimitDir(&tt.args.cfg); (err != nil) != tt.wantErr { - t.Errorf("initCacheLimitDir() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} - -// TestSetClDir testcase -func TestSetClDir(t *testing.T) { - testRoot := try.GenTestDir().String() - _, err := os.Create(filepath.Join(testRoot, "test")) - assert.NoError(t, err) - type fields struct { - level string - clDir string - L3Percent int - MbPercent int - } - tests := []struct { - name string - fields fields - wantErr bool - }{ - { - name: "TC-normal cache limit dir", - fields: fields{clDir: testRoot}, - }, - { - name: "TC-empty dir", - wantErr: true, - }, - { - name: "TC-path not exist", - fields: fields{clDir: "/path/not/exist"}, - wantErr: true, - }, - { - name: "TC-path not exist", - fields: fields{clDir: filepath.Join(testRoot, "test", "test")}, - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - clSet := &cacheLimitSet{ - level: tt.fields.level, - clDir: tt.fields.clDir, - L3Percent: tt.fields.L3Percent, - MbPercent: tt.fields.MbPercent, - } - if err := clSet.setClDir(); (err != nil) != tt.wantErr { - t.Errorf("cacheLimitSet.setClDir() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} - -// TestCheckResctrlExist testcase -func TestCheckResctrlExist(t *testing.T) { - resctrlDir := try.GenTestDir().String() - resctrlDirNoSchemataFile := try.GenTestDir().String() - schemataPath := filepath.Join(resctrlDir, schemataFile) - _, err := os.Create(schemataPath) - assert.NoError(t, err) - type args struct { - cfg config.CacheConfig - } - tests := []struct { - name string - args args - wantErr bool - }{ - { - name: "TC-resctrl exist", - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: resctrlDir, - }}, - }, - { - name: "TC-resctrl exist but not schemata file", - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: resctrlDirNoSchemataFile, - }}, - wantErr: true, - }, - { - name: "TC-resctrl not exist", - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: "/path/not/exist", - }}, - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if err := checkResctrlExist(&tt.args.cfg); (err != nil) != tt.wantErr { - t.Errorf("checkResctrlExist() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} - -// TestDoFlush testcase -func TestAdjustCacheLimit(t *testing.T) { - resctrlDir := try.GenTestDir().String() - assert.NoError(t, setMaskFile(t, resctrlDir, "3ff")) - - type fields struct { - level string - clDir string - L3Percent int - MbPercent int - } - type args struct { - clValue string - } - tests := []struct { - preHook func(t *testing.T) - name string - fields fields - args args - wantErr bool - }{ - { - name: "TC-adjust success", - fields: fields{ - level: lowLevel, - clDir: filepath.Join(filepath.Clean(resctrlDir), dirPrefix+lowLevel), - L3Percent: 10, - MbPercent: 10, - }, - }, - { - name: "TC-l3PercentDynamic", - fields: fields{ - level: lowLevel, - clDir: filepath.Join(filepath.Clean(resctrlDir), dirPrefix+lowLevel), - L3Percent: l3PercentDynamic, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - clSet := &cacheLimitSet{ - level: tt.fields.level, - clDir: tt.fields.clDir, - L3Percent: tt.fields.L3Percent, - MbPercent: tt.fields.MbPercent, - } - if tt.preHook != nil { - tt.preHook(t) - } - if err := clSet.doFlush(); (err != nil) != tt.wantErr { - t.Errorf("clSet.doFlush() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} - -func TestGetPodCacheMiss(t *testing.T) { - if !perf.HwSupport() { - t.Skipf("%s only run on physical machine", t.Name()) - } - testCGRoot := filepath.Join(config.CgroupRoot, "perf_event", t.Name()) - type fields struct { - podID string - cgroupPath string - cacheLimitLevel string - containers map[string]*typedef.ContainerInfo - } - type args struct { - cgroupRoot string - perfDu int - } - tests := []struct { - preHook func(t *testing.T) - postHook func(t *testing.T) - name string - fields fields - args args - want int - }{ - { - name: "TC-get pod cache miss success", - fields: fields{ - podID: "abcd", - cgroupPath: t.Name(), - cacheLimitLevel: lowLevel, - containers: make(map[string]*typedef.ContainerInfo), - }, - preHook: func(t *testing.T) { - try.MkdirAll(testCGRoot, constant.DefaultDirMode) - try.WriteFile(filepath.Join(testCGRoot, "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - }, - postHook: func(t *testing.T) { - try.WriteFile(filepath.Join(config.CgroupRoot, "perf_event", "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - try.RemoveAll(testCGRoot) - }, - args: args{cgroupRoot: config.CgroupRoot, perfDu: 1}, - }, - { - name: "TC-get pod cache miss failed", - fields: fields{ - podID: "abcd", - cgroupPath: t.Name(), - cacheLimitLevel: middleLevel, - containers: make(map[string]*typedef.ContainerInfo), - }, - }, - } - for _, tt := range tests { - name := t.Name() - fmt.Println(name) - t.Run(tt.name, func(t *testing.T) { - p := &typedef.PodInfo{ - UID: tt.fields.podID, - CgroupPath: tt.fields.cgroupPath, - CacheLimitLevel: tt.fields.cacheLimitLevel, - Containers: tt.fields.containers, - } - if tt.preHook != nil { - tt.preHook(t) - } - getPodCacheMiss(p, tt.args.perfDu) - if tt.postHook != nil { - tt.postHook(t) - } - }) - } -} - -func TestStartDynamic(t *testing.T) { - if !perf.HwSupport() { - t.Skipf("%s only run on physical machine", t.Name()) - } - initCpm() - startDynamic(&config.CacheConfig{}, 0, 0) - resctrlDir := try.GenTestDir().String() - testCGRoot := filepath.Join(config.CgroupRoot, "perf_event", t.Name()) - assert.NoError(t, setMaskFile(t, resctrlDir, "3ff")) - - type args struct { - minWaterLine, maxWaterLine, wantL3, wantMb, WantFinalL3, wantFinalMb int - cfg config.CacheConfig - } - tests := []struct { - preHook func(t *testing.T) - postHook func(t *testing.T) - name string - args args - }{ - { - name: "TC-start dynamic", - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: resctrlDir, - DefaultLimitMode: dynamicMode, - PerfDuration: 10, - L3Percent: config.MultiLvlPercent{ - High: 50, - Low: 20, - Mid: 30, - }, - MemBandPercent: config.MultiLvlPercent{ - High: 50, - Low: 10, - Mid: 30, - }, - }, - minWaterLine: 0, - maxWaterLine: 0, - wantL3: 20, - wantMb: 10, - WantFinalL3: 20, - wantFinalMb: 10, - }, - preHook: func(t *testing.T) { - pi := &typedef.PodInfo{ - UID: "abcde", - CgroupPath: filepath.Base(testCGRoot), - CacheLimitLevel: lowLevel, - Containers: make(map[string]*typedef.ContainerInfo), - } - cpm.Checkpoint.Pods[pi.UID] = pi - try.MkdirAll(testCGRoot, constant.DefaultDirMode) - try.WriteFile(filepath.Join(testCGRoot, "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - }, - postHook: func(t *testing.T) { - try.WriteFile(filepath.Join(config.CgroupRoot, "perf_event", "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - try.RemoveAll(testCGRoot) - cpm.Checkpoint.Pods = make(map[string]*typedef.PodInfo) - }, - }, - { - name: "TC-start dynamic with very high water line", - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: resctrlDir, - DefaultLimitMode: dynamicMode, - PerfDuration: 10, - L3Percent: config.MultiLvlPercent{ - High: 50, - Low: 20, - Mid: 30, - }, - MemBandPercent: config.MultiLvlPercent{ - High: 50, - Low: 10, - Mid: 30, - }, - }, - minWaterLine: math.MaxInt64, - maxWaterLine: math.MaxInt64, - wantL3: 25, - wantMb: 15, - WantFinalL3: 50, - wantFinalMb: 50, - }, - preHook: func(t *testing.T) { - pi := &typedef.PodInfo{ - UID: "abcde", - CgroupPath: filepath.Base(testCGRoot), - CacheLimitLevel: lowLevel, - Containers: make(map[string]*typedef.ContainerInfo), - } - cpm.Checkpoint.Pods[pi.UID] = pi - try.MkdirAll(testCGRoot, constant.DefaultDirMode) - try.WriteFile(filepath.Join(testCGRoot, "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - }, - postHook: func(t *testing.T) { - try.WriteFile(filepath.Join(config.CgroupRoot, "perf_event", "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - try.RemoveAll(testCGRoot) - cpm.Checkpoint.Pods = make(map[string]*typedef.PodInfo) - }, - }, - { - name: "TC-start dynamic with low min water line", - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: resctrlDir, - DefaultLimitMode: dynamicMode, - PerfDuration: 10, - L3Percent: config.MultiLvlPercent{ - High: 50, - Low: 20, - Mid: 30, - }, - MemBandPercent: config.MultiLvlPercent{ - High: 50, - Low: 10, - Mid: 30, - }, - }, - minWaterLine: 0, - maxWaterLine: math.MaxInt64, - wantL3: 20, - wantMb: 10, - WantFinalL3: 20, - wantFinalMb: 10, - }, - preHook: func(t *testing.T) { - pi := &typedef.PodInfo{ - UID: "abcde", - CgroupPath: filepath.Base(testCGRoot), - CacheLimitLevel: lowLevel, - Containers: make(map[string]*typedef.ContainerInfo), - } - cpm.Checkpoint.Pods[pi.UID] = pi - try.MkdirAll(testCGRoot, constant.DefaultDirMode) - try.WriteFile(filepath.Join(testCGRoot, "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - }, - postHook: func(t *testing.T) { - try.WriteFile(filepath.Join(config.CgroupRoot, "perf_event", "tasks"), []byte(fmt.Sprint(os.Getpid())), constant.DefaultFileMode) - try.RemoveAll(testCGRoot) - cpm.Checkpoint.Pods = make(map[string]*typedef.PodInfo) - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.preHook != nil { - tt.preHook(t) - } - - l3PercentDynamic = tt.args.cfg.L3Percent.Low - mbPercentDynamic = tt.args.cfg.MemBandPercent.Low - startDynamic(&tt.args.cfg, tt.args.maxWaterLine, tt.args.minWaterLine) - assert.Equal(t, tt.args.wantL3, l3PercentDynamic) - assert.Equal(t, tt.args.wantMb, mbPercentDynamic) - for i := 0; i < 10; i++ { - startDynamic(&tt.args.cfg, tt.args.maxWaterLine, tt.args.minWaterLine) - } - assert.Equal(t, tt.args.WantFinalL3, l3PercentDynamic) - assert.Equal(t, tt.args.wantFinalMb, mbPercentDynamic) - if tt.postHook != nil { - tt.postHook(t) - } - }) - } -} - -func TestClEnabled(t *testing.T) { - oldEnbaled := enable - tests := []struct { - preHook func(t *testing.T) - postHook func(t *testing.T) - name string - want bool - }{ - { - name: "TC-return enabled", - preHook: func(t *testing.T) { - enable = true - }, - postHook: func(t *testing.T) { - enable = oldEnbaled - }, - want: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.preHook != nil { - tt.preHook(t) - } - if got := ClEnabled(); got != tt.want { - t.Errorf("ClEnabled() = %v, want %v", got, tt.want) - } - if tt.postHook != nil { - tt.postHook(t) - } - }) - } -} - -// TestDynamicExist test dynamicExist -func TestDynamicExist(t *testing.T) { - initCpm() - cpm.Checkpoint.Pods["podabc"].CacheLimitLevel = lowLevel - assert.Equal(t, false, dynamicExist()) - cpm.Checkpoint.Pods["podabc"].CacheLimitLevel = dynamicLevel - assert.Equal(t, true, dynamicExist()) -} - -// TestIsHostPidns test isHostPidns -func TestIsHostPidns(t *testing.T) { - assert.Equal(t, false, isHostPidns(filepath.Join(constant.TmpTestDir, "path/not/exist/pid"))) - assert.Equal(t, true, isHostPidns("/proc/self/ns/pid")) -} - -// TestInit test Init -func TestInit(t *testing.T) { - resctrlDir := try.GenTestDir().String() - schemataPath := filepath.Join(resctrlDir, schemataFile) - _, err := os.Create(schemataPath) - assert.NoError(t, err) - assert.NoError(t, setMaskFile(t, resctrlDir, "3ff")) - var TC1WantErr bool - if !perf.HwSupport() { - TC1WantErr = true - } - type args struct { - cfg config.CacheConfig - } - tests := []struct { - preHook func(t *testing.T) - postHook func(t *testing.T) - name string - args args - wantErr bool - }{ - { - name: "TC-normal testcase", - wantErr: TC1WantErr, - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: resctrlDir, - DefaultLimitMode: dynamicMode, - PerfDuration: 10, - L3Percent: config.MultiLvlPercent{ - High: 100, - Low: 10, - Mid: 50, - }, - MemBandPercent: config.MultiLvlPercent{ - High: 100, - Low: 10, - Mid: 50, - }, - AdjustInterval: 10, - }}, - }, - { - name: "TC-invalid cache config", - wantErr: true, - args: args{cfg: config.CacheConfig{ - AdjustInterval: 0, - }}, - }, - { - name: "TC-resctrl not exist", - wantErr: true, - args: args{cfg: config.CacheConfig{ - DefaultResctrlDir: "/path/not/exist", - DefaultLimitMode: dynamicMode, - PerfDuration: 10, - L3Percent: config.MultiLvlPercent{ - High: 100, - Low: 10, - Mid: 50, - }, - MemBandPercent: config.MultiLvlPercent{ - High: 100, - Low: 10, - Mid: 50, - }, - AdjustInterval: 10, - }}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.preHook != nil { - tt.preHook(t) - } - var cfg config.CacheConfig - cfg = tt.args.cfg - m := &checkpoint.Manager{ - Checkpoint: &checkpoint.Checkpoint{ - Pods: make(map[string]*typedef.PodInfo), - }, - } - if err := Init(m, &cfg); (err != nil) != tt.wantErr { - t.Errorf("Init() error = %v, wantErr %v", err, tt.wantErr) - } - if tt.postHook != nil { - tt.postHook(t) - } - }) - } -} diff --git a/pkg/cachelimit/cachelimit_test.go b/pkg/cachelimit/cachelimit_test.go deleted file mode 100644 index ec7a36fad370a2e4c109af04777a8e90dfaef027..0000000000000000000000000000000000000000 --- a/pkg/cachelimit/cachelimit_test.go +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Xiang Li, Danni Xia -// Create: 2022-05-16 -// Description: offline pod cache limit function - -// Package cachelimit is for cache limiting -package cachelimit - -import ( - "io/ioutil" - "os" - "path/filepath" - "strings" - "testing" - - "isula.org/rubik/pkg/checkpoint" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/try" - "isula.org/rubik/pkg/typedef" - - "github.com/stretchr/testify/assert" -) - -var podInfo = typedef.PodInfo{ - CgroupPath: "kubepods/podaaa", - Offline: true, - CacheLimitLevel: "dynamic", -} - -func initCpm() { - podID := "podabc" - cpm = &checkpoint.Manager{ - Checkpoint: &checkpoint.Checkpoint{ - Pods: map[string]*typedef.PodInfo{ - podID: &podInfo, - }, - }, - } -} - -// TestLevelValid testcase -func TestLevelValid(t *testing.T) { - type args struct { - level string - } - tests := []struct { - name string - args args - want bool - }{ - { - name: "TC-normal cache limit level", - args: args{level: lowLevel}, - want: true, - }, - { - name: "TC-abnormal cache limit level", - args: args{level: "abnormal level"}, - want: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := levelValid(tt.args.level); got != tt.want { - t.Errorf("levelValid() = %v, want %v", got, tt.want) - } - }) - } -} - -// TestNewCacheLimitPodInfo test NewCacheLimitPodInfo -func TestSyncLevel(t *testing.T) { - podInfo := typedef.PodInfo{ - CgroupPath: "kubepods/podaaa", - Offline: true, - CacheLimitLevel: "invalid", - } - err := SyncLevel(&podInfo) - assert.Equal(t, true, err != nil) - - podInfo.CacheLimitLevel = lowLevel - err = SyncLevel(&podInfo) - assert.NoError(t, err) - assert.Equal(t, podInfo.CacheLimitLevel, lowLevel) - - defaultLimitMode = staticMode - podInfo.CacheLimitLevel = "" - err = SyncLevel(&podInfo) - assert.NoError(t, err) - assert.Equal(t, podInfo.CacheLimitLevel, maxLevel) - - defaultLimitMode = dynamicMode - podInfo.CacheLimitLevel = "" - err = SyncLevel(&podInfo) - assert.NoError(t, err) - assert.Equal(t, podInfo.CacheLimitLevel, dynamicLevel) -} - -// TestWriteTasksToResctrl test writeTasksToResctrl -func TestWriteTasksToResctrl(t *testing.T) { - initCpm() - err := SyncLevel(&podInfo) - assert.NoError(t, err) - - testDir := try.GenTestDir().String() - config.CgroupRoot = testDir - - pid, procsFile, container := "12345", "cgroup.procs", "container1" - podCPUCgroupPath := filepath.Join(testDir, "cpu", podInfo.CgroupPath) - try.MkdirAll(filepath.Join(podCPUCgroupPath, container), constant.DefaultDirMode) - err = writeTasksToResctrl(&podInfo, testDir) - // pod cgroup.procs not exist, return error - assert.Equal(t, true, err != nil) - _, err = os.Create(filepath.Join(podCPUCgroupPath, procsFile)) - assert.NoError(t, err) - try.WriteFile(filepath.Join(podCPUCgroupPath, container, procsFile), []byte(pid), constant.DefaultFileMode) - - err = writeTasksToResctrl(&podInfo, testDir) - // resctrl tasks file not exist, return error - assert.Equal(t, true, err != nil) - - resctrlSubDir, taskFile := dirPrefix+podInfo.CacheLimitLevel, "tasks" - try.MkdirAll(filepath.Join(testDir, resctrlSubDir), constant.DefaultDirMode) - err = writeTasksToResctrl(&podInfo, testDir) - // write success - assert.NoError(t, err) - bytes, err := ioutil.ReadFile(filepath.Join(testDir, resctrlSubDir, taskFile)) - assert.NoError(t, err) - assert.Equal(t, pid, strings.TrimSpace(string(bytes))) - - // container pid already written - err = writeTasksToResctrl(&podInfo, testDir) - assert.NoError(t, err) - - config.CgroupRoot = constant.DefaultCgroupRoot -} - -// TestSetCacheLimit test SetCacheLimit -func TestSetCacheLimit(t *testing.T) { - initCpm() - err := SetCacheLimit(&podInfo) - assert.NoError(t, err) -} - -// TestSyncCacheLimit test syncCacheLimit -func TestSyncCacheLimit(t *testing.T) { - initCpm() - syncCacheLimit() -} diff --git a/pkg/checkpoint/checkpoint.go b/pkg/checkpoint/checkpoint.go deleted file mode 100644 index c33ef50ed90da34292a88e76b35b9cd415e2aad5..0000000000000000000000000000000000000000 --- a/pkg/checkpoint/checkpoint.go +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Jiaqi Yang -// Create: 2022-04-27 -// Description: provide pods checkpoint management - -// Package checkpoint provide pods checkpoint management. -package checkpoint - -import ( - "path/filepath" - "strings" - "sync" - - corev1 "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" - "isula.org/rubik/pkg/util" -) - -// Checkpoint stores the binding between the CPU and pod container. -type Checkpoint struct { - Pods map[string]*typedef.PodInfo `json:"pods,omitempty"` -} - -// Manager manage checkpoint -type Manager struct { - Checkpoint *Checkpoint - CgroupRoot string - sync.Mutex -} - -// NewManager create manager -func NewManager(cgroupRoot string) *Manager { - return &Manager{ - Checkpoint: &Checkpoint{ - Pods: make(map[string]*typedef.PodInfo, 0), - }, - CgroupRoot: cgroupRoot, - } -} - -// AddPod returns pod info from pod ID -func (cm *Manager) AddPod(pod *corev1.Pod) { - // Before adding pod to the checkpoint, ensure that the pod is in the running state. Otherwise, problems may occur. - // Only pod.Status.Phase == corev1.PodRunning - cm.Lock() - defer cm.Unlock() - if pod == nil || string(pod.UID) == "" { - return - } - if _, ok := cm.Checkpoint.Pods[string(pod.UID)]; ok { - log.Debugf("pod %v is existed", string(pod.UID)) - return - } - log.Debugf("add pod %v", string(pod.UID)) - cm.Checkpoint.Pods[string(pod.UID)] = NewPodInfo(pod, cm.CgroupRoot) -} - -// GetPod returns pod info from pod ID -func (cm *Manager) GetPod(podID types.UID) *typedef.PodInfo { - cm.Lock() - defer cm.Unlock() - return cm.Checkpoint.Pods[string(podID)].Clone() -} - -// PodExist returns true if there is a pod whose key is podID in the checkpoint -func (cm *Manager) PodExist(podID types.UID) bool { - cm.Lock() - defer cm.Unlock() - _, ok := cm.Checkpoint.Pods[string(podID)] - return ok -} - -// ContainerExist returns true if there is a pod whose key is podID in the checkpoint -func (cm *Manager) ContainerExist(podID types.UID, containerID string) bool { - cm.Lock() - defer cm.Unlock() - if _, ok := cm.Checkpoint.Pods[string(podID)]; !ok { - return false - } - _, ok := cm.Checkpoint.Pods[string(podID)].Containers[containerID] - return ok -} - -// DelPod delete pod from checkpoint -func (cm *Manager) DelPod(podID types.UID) { - cm.Lock() - defer cm.Unlock() - if _, ok := cm.Checkpoint.Pods[string(podID)]; !ok { - log.Debugf("pod %v is not existed", string(podID)) - return - } - log.Debugf("delete pod %v", podID) - delete(cm.Checkpoint.Pods, string(podID)) -} - -// UpdatePod updates pod information based on pods -func (cm *Manager) UpdatePod(pod *corev1.Pod) { - cm.Lock() - defer cm.Unlock() - old, ok := cm.Checkpoint.Pods[string(pod.UID)] - if !ok { - log.Debugf("pod %v is not existed", string(pod.UID)) - return - } - log.Debugf("update pod %v", string(pod.UID)) - updatePodInfoNoLock(old, pod) -} - -// SyncFromCluster synchronizing data from the kubernetes cluster using the list mechanism at the beginning -func (cm *Manager) SyncFromCluster(items []corev1.Pod) { - cm.Lock() - defer cm.Unlock() - for _, pod := range items { - if string(pod.UID) == "" { - continue - } - log.Debugf("add pod %v", string(pod.UID)) - cm.Checkpoint.Pods[string(pod.UID)] = NewPodInfo(&pod, cm.CgroupRoot) - } -} - -// filter filtering for list functions -type filter func(pi *typedef.PodInfo) bool - -// listContainersWithFilters filters and returns deep copy objects of all containers -func (cm *Manager) listContainersWithFilters(filters ...filter) map[string]*typedef.ContainerInfo { - cm.Lock() - defer cm.Unlock() - cc := make(map[string]*typedef.ContainerInfo, len(cm.Checkpoint.Pods)) - - for _, pod := range cm.Checkpoint.Pods { - if !mergeFilters(pod, filters) { - continue - } - for _, ci := range pod.Containers { - cc[ci.ID] = ci.Clone() - } - } - - return cc -} - -// ListPodsWithFilters filters and returns deep copy objects of all pod -func (cm *Manager) listPodsWithFilters(filters ...filter) map[string]*typedef.PodInfo { - cm.Lock() - defer cm.Unlock() - pc := make(map[string]*typedef.PodInfo, 0) - - for _, pod := range cm.Checkpoint.Pods { - if !mergeFilters(pod, filters) { - continue - } - pc[pod.UID] = pod.Clone() - } - return pc -} - -func mergeFilters(pi *typedef.PodInfo, filters []filter) bool { - for _, f := range filters { - if !f(pi) { - return false - } - } - return true -} - -// ListOfflineContainers filtering offline containers -func (cm *Manager) ListOfflineContainers() map[string]*typedef.ContainerInfo { - return cm.listContainersWithFilters(func(pi *typedef.PodInfo) bool { - return pi.Offline && pi.Namespace != v1.NamespaceSystem - }) -} - -// ListAllContainers returns all containers copies -func (cm *Manager) ListAllContainers() map[string]*typedef.ContainerInfo { - return cm.listContainersWithFilters() -} - -// ListAllPods returns all pods copies -func (cm *Manager) ListAllPods() map[string]*typedef.PodInfo { - return cm.listPodsWithFilters() -} - -// ListOfflinePods returns all pods copies -func (cm *Manager) ListOfflinePods() map[string]*typedef.PodInfo { - return cm.listPodsWithFilters(func(pi *typedef.PodInfo) bool { - return pi.Offline && pi.Namespace != v1.NamespaceSystem - }) -} - -// ListOnlinePods returns all pods copies -func (cm *Manager) ListOnlinePods() map[string]*typedef.PodInfo { - return cm.listPodsWithFilters(func(pi *typedef.PodInfo) bool { - return !pi.Offline && pi.Namespace != v1.NamespaceSystem - }) -} - -// NewPodInfo create PodInfo -func NewPodInfo(pod *corev1.Pod, cgroupRoot string) *typedef.PodInfo { - pi := &typedef.PodInfo{ - Name: pod.Name, - UID: string(pod.UID), - Containers: make(map[string]*typedef.ContainerInfo, 0), - CgroupPath: util.GetPodCgroupPath(pod), - Namespace: pod.Namespace, - CgroupRoot: cgroupRoot, - } - updatePodInfoNoLock(pi, pod) - return pi -} - -// updatePodInfoNoLock updates PodInfo from the pod of Kubernetes. -// UpdatePodInfoNoLock does not lock pods during the modification. -// Therefore, ensure that the pod is being used only by this function. -// Currently, the checkpoint manager variable is locked when this function is invoked. -func updatePodInfoNoLock(pi *typedef.PodInfo, pod *corev1.Pod) { - const ( - dockerPrefix = "docker://" - containerdPrefix = "containerd://" - ) - pi.Name = pod.Name - pi.Offline = util.IsOffline(pod) - pi.CacheLimitLevel = util.GetPodCacheLimit(pod) - pi.QuotaBurst = util.GetQuotaBurst(pod) - - nameID := make(map[string]string, len(pod.Status.ContainerStatuses)) - for _, c := range pod.Status.ContainerStatuses { - // Rubik is compatible with dockerd and containerd container engines. - cid := strings.TrimPrefix(c.ContainerID, dockerPrefix) - cid = strings.TrimPrefix(cid, containerdPrefix) - - // the container may be in the creation or deletion phase. - if len(cid) == 0 { - log.Debugf("no container id found of container %v", c.Name) - continue - } - nameID[c.Name] = cid - } - // update ContainerInfo in a PodInfo - for _, c := range pod.Spec.Containers { - ci, ok := pi.Containers[c.Name] - // add a container - if !ok { - log.Debugf("add new container %v", c.Name) - pi.AddContainerInfo(typedef.NewContainerInfo(c, string(pod.UID), nameID[c.Name], - pi.CgroupRoot, pi.CgroupPath)) - continue - } - // The container name remains unchanged, and other information about the container is updated. - ci.ID = nameID[c.Name] - ci.CgroupAddr = filepath.Join(pi.CgroupPath, ci.ID) - } - // delete a container that does not exist - for name := range pi.Containers { - if _, ok := nameID[name]; !ok { - log.Debugf("delete container %v", name) - delete(pi.Containers, name) - } - } -} diff --git a/pkg/checkpoint/checkpoint_test.go b/pkg/checkpoint/checkpoint_test.go deleted file mode 100644 index e9167a624a6f2e8d660e1c7b9e2cd04922e62b2e..0000000000000000000000000000000000000000 --- a/pkg/checkpoint/checkpoint_test.go +++ /dev/null @@ -1,397 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Jiaqi Yang -// Create: 2022-05-10 -// Description: checkpoint DT test - -package checkpoint - -import ( - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/typedef" -) - -var containerInfos = []*typedef.ContainerInfo{ - { - Name: "FooCon", - ID: "testCon1", - PodID: "testPod1", - CgroupRoot: constant.DefaultCgroupRoot, - CgroupAddr: "kubepods/testPod1/testCon1", - }, - { - Name: "BarCon", - ID: "testCon2", - PodID: "testPod2", - CgroupRoot: constant.DefaultCgroupRoot, - CgroupAddr: "kubepods/testPod2/testCon2", - }, - { - Name: "BiuCon", - ID: "testCon3", - PodID: "testPod3", - CgroupRoot: constant.DefaultCgroupRoot, - CgroupAddr: "kubepods/testPod3/testCon3", - }, - { - Name: "PahCon", - ID: "testCon4", - PodID: "testPod4", - CgroupRoot: constant.DefaultCgroupRoot, - CgroupAddr: "kubepods/testPod4/testCon4", - }, -} - -var podInfos = []*typedef.PodInfo{ - // allow quota adjustment - { - Name: "FooPod", - UID: containerInfos[0].PodID, - Containers: map[string]*typedef.ContainerInfo{ - containerInfos[0].Name: containerInfos[0], - }, - }, - // allow quota adjustment - { - Name: "BarPod", - UID: containerInfos[1].PodID, - Containers: map[string]*typedef.ContainerInfo{ - containerInfos[1].Name: containerInfos[1], - }, - }, - // quota adjustment is not allowed - { - Name: "BiuPod", - UID: containerInfos[2].PodID, - Containers: map[string]*typedef.ContainerInfo{ - containerInfos[2].Name: containerInfos[2], - }, - }, - // quota adjustment is not allowed - { - Name: "PahPod", - UID: containerInfos[3].PodID, - Containers: map[string]*typedef.ContainerInfo{ - containerInfos[3].Name: containerInfos[3], - }, - }, -} - -var coreV1Pods = []corev1.Pod{ - { - Status: corev1.PodStatus{ - Phase: corev1.PodFailed, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID("testPod5"), - Name: "BiuPod", - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - QOSClass: corev1.PodQOSGuaranteed, - ContainerStatuses: []corev1.ContainerStatus{ - { - Name: "BiuCon", - ContainerID: "docker://testCon5", - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "BiuCon", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - "cpu": *resource.NewQuantity(2, resource.DecimalSI), - }, - Limits: corev1.ResourceList{ - "cpu": *resource.NewQuantity(3, resource.DecimalSI), - "memory": *resource.NewQuantity(300, resource.DecimalSI), - }, - }, - }, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(podInfos[1].UID), - Name: podInfos[1].Name, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - QOSClass: corev1.PodQOSGuaranteed, - ContainerStatuses: []corev1.ContainerStatus{ - { - Name: "BarCon1", - ContainerID: "docker://testCon6", - }, - { - Name: "BarCon2", - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "BarCon1", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - "cpu": *resource.NewQuantity(2, resource.DecimalSI), - }, - Limits: corev1.ResourceList{ - "cpu": *resource.NewQuantity(2, resource.DecimalSI), - "memory": *resource.NewQuantity(100, resource.DecimalSI), - }, - }, - }, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(podInfos[0].UID), - Name: "FooPod", - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - QOSClass: corev1.PodQOSGuaranteed, - ContainerStatuses: []corev1.ContainerStatus{ - { - Name: "FooCon", - ContainerID: "docker://" + containerInfos[0].ID, - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "FooCon", - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - "cpu": *resource.NewQuantity(2, resource.DecimalSI), - }, - Limits: corev1.ResourceList{ - "cpu": *resource.NewQuantity(3, resource.DecimalSI), - "memory": *resource.NewQuantity(300, resource.DecimalSI), - }, - }, - }, - }, - }, - }, -} - -// TestManagerAddPod tests AddPod of Manager -func TestManagerAddPod(t *testing.T) { - var ( - podNum1 = 1 - podNum2 = 2 - ) - cpm := &Manager{ - Checkpoint: &Checkpoint{ - Pods: map[string]*typedef.PodInfo{ - podInfos[0].UID: podInfos[0].Clone(), - }, - }, - } - // 1. add pods that do not exist - assert.Equal(t, len(cpm.Checkpoint.Pods), podNum1) - var mockPahPod = &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID(podInfos[3].UID), - Name: podInfos[3].Name, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - QOSClass: corev1.PodQOSBurstable, - ContainerStatuses: []corev1.ContainerStatus{ - { - Name: "PahCon", - ContainerID: "docker://" + containerInfos[3].ID, - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "PahCon", - }, - }, - }, - } - cpm.AddPod(mockPahPod) - assert.Equal(t, len(cpm.Checkpoint.Pods), podNum2) - // 2.join a joined pods - cpm.AddPod(mockPahPod) - assert.Equal(t, len(cpm.Checkpoint.Pods), podNum2) - - // 3.add a pod whose name is empty - cpm.AddPod(&coreV1Pods[0]) - assert.Equal(t, len(cpm.Checkpoint.Pods), podNum2) -} - -// TestManagerDelPod tests DelPod of Manager -func TestManagerDelPod(t *testing.T) { - var ( - podNum0 = 0 - podNum1 = 1 - ) - cpm := &Manager{ - Checkpoint: &Checkpoint{ - Pods: map[string]*typedef.PodInfo{ - podInfos[0].UID: podInfos[0].Clone(), - }, - }, - } - // 1. delete pods that do not exist - assert.Equal(t, len(cpm.Checkpoint.Pods), podNum1) - cpm.DelPod(types.UID(podInfos[1].UID)) - assert.Equal(t, len(cpm.Checkpoint.Pods), podNum1) - // 2. delete existed pods - cpm.DelPod(types.UID(podInfos[0].UID)) - assert.Equal(t, len(cpm.Checkpoint.Pods), podNum0) -} - -// TestManagerUpdatePod test UpdatePod function of Manager -func TestManagerUpdatePod(t *testing.T) { - var managerUpdatePodTests = []struct { - pod *corev1.Pod - judgement func(t *testing.T, m *Manager) - name string - }{ - { - name: "TC1 - update a non-added pod", - pod: &coreV1Pods[1], - judgement: func(t *testing.T, m *Manager) { - podNum2 := 2 - assert.Equal(t, podNum2, len(m.Checkpoint.Pods)) - }, - }, - } - cpm := &Manager{ - Checkpoint: &Checkpoint{ - Pods: map[string]*typedef.PodInfo{ - podInfos[0].UID: podInfos[0].Clone(), - podInfos[1].UID: podInfos[1].Clone(), - }, - }, - } - - for _, tt := range managerUpdatePodTests { - t.Run(tt.name, func(t *testing.T) { - cpm.UpdatePod(tt.pod) - tt.judgement(t, cpm) - }) - } -} - -// TestManagerListPodsAndContainers tests methods of list pods and containers of Manager -func TestManagerListPodsAndContainers(t *testing.T) { - var ( - podNum3 = 3 - podNum4 = 4 - ) - // The pod names in Kubernetes must be unique. The same pod cannot have the same name, but different pods can have the same name. - cpm := &Manager{ - Checkpoint: &Checkpoint{ - Pods: map[string]*typedef.PodInfo{ - podInfos[0].UID: podInfos[0].Clone(), - podInfos[1].UID: podInfos[1].Clone(), - podInfos[2].UID: podInfos[2].Clone(), - }, - }, - } - // 1. Containers with Different Pods with Different Names - assert.Equal(t, len(cpm.ListAllPods()), podNum3) - assert.Equal(t, len(cpm.ListAllContainers()), podNum3) - // 2. Containers with the same name in different pods - var podWithSameNameCon = &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: types.UID("testPod5"), - Name: "FakeFooPod", - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - ContainerStatuses: []corev1.ContainerStatus{ - { - Name: "FooCon", - ContainerID: "docker://testCon5", - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "FooCon", - }, - }, - }, - } - - cpm.AddPod(podWithSameNameCon) - assert.Equal(t, len(cpm.ListAllContainers()), podNum4) -} - -// TestManagerSyncFromCluster tests SyncFromCluster of Manager -func TestManagerSyncFromCluster(t *testing.T) { - cpm := NewManager("") - cpm.Checkpoint = &Checkpoint{ - Pods: make(map[string]*typedef.PodInfo, 0), - } - - cpm.SyncFromCluster(coreV1Pods) - expPodNum := 3 - assert.Equal(t, len(cpm.Checkpoint.Pods), expPodNum) - - pi2 := cpm.GetPod(coreV1Pods[1].UID) - assert.Equal(t, "BiuPod", pi2.Name) -} - -// TestMangerPodExist tests the PodExist of Manger -func TestMangerPodExist(t *testing.T) { - tests := []struct { - name string - id types.UID - want bool - }{ - { - name: "TC1 - check a non-existed pod", - id: types.UID(podInfos[0].UID), - want: true, - }, - { - name: "TC2 - check an existed pod", - id: types.UID(podInfos[1].UID), - want: false, - }, - } - cpm := NewManager("") - cpm.Checkpoint = &Checkpoint{ - Pods: map[string]*typedef.PodInfo{ - podInfos[0].UID: podInfos[0].Clone(), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.want, cpm.PodExist(tt.id)) - }) - } -} diff --git a/pkg/constant/constant.go b/pkg/common/constant/constant.go similarity index 58% rename from pkg/constant/constant.go rename to pkg/common/constant/constant.go index a1d9476742d50d6c5b2dbb2e8b26631af3d3f3ec..c2a5ce4483110e92b3498788b78b6dcad16c42da 100644 --- a/pkg/constant/constant.go +++ b/pkg/common/constant/constant.go @@ -15,56 +15,43 @@ package constant import ( - "errors" "os" - "time" ) +// the files and directories used by the system by default const ( - // RubikSock is path for rubik socket file - RubikSock = "/run/rubik/rubik.sock" // ConfigFile is rubik config file ConfigFile = "/var/lib/rubik/config.json" - // DefaultLogDir is default log dir - DefaultLogDir = "/var/log/rubik" // LockFile is rubik lock file LockFile = "/run/rubik/rubik.lock" - // ReadTimeout is timeout for http read - ReadTimeout = 60 * time.Second - // WriteTimeout is timeout for http write - WriteTimeout = 60 * time.Second - // DefaultSucceedCode is succeed code - DefaultSucceedCode = 0 // DefaultCgroupRoot is mount point DefaultCgroupRoot = "/sys/fs/cgroup" - // CPUCgroupFileName is name of cgroup file used for cpu qos level setting - CPUCgroupFileName = "cpu.qos_level" - // MemoryCgroupFileName is name of cgroup file used for memory qos level setting - MemoryCgroupFileName = "memory.qos_level" - // DefaultFileMode is file mode for cgroup files - DefaultFileMode os.FileMode = 0600 - // DefaultDirMode is dir default mode - DefaultDirMode os.FileMode = 0700 - // DefaultUmask is default umask - DefaultUmask = 0077 - // MaxCgroupPathLen is max cgroup path length for pod - MaxCgroupPathLen = 4096 - // MaxPodIDLen is max pod id length - MaxPodIDLen = 256 - // MaxPodsPerRequest is max pods number per http request - MaxPodsPerRequest = 100 // TmpTestDir is tmp directory for test TmpTestDir = "/tmp/rubik-test" - // TaskChanCapacity is capacity for task chan - TaskChanCapacity = 1024 - // WorkerNum is number of workers - WorkerNum = 1 +) + +// kubernetes related configuration +const ( // KubepodsCgroup is kubepods root cgroup KubepodsCgroup = "kubepods" // PodCgroupNamePrefix is pod cgroup name prefix PodCgroupNamePrefix = "pod" // NodeNameEnvKey is node name environment variable key NodeNameEnvKey = "RUBIK_NODE_NAME" +) + +// File permission +const ( + // DefaultUmask is default umask + DefaultUmask = 0077 + // DefaultFileMode is file mode for cgroup files + DefaultFileMode os.FileMode = 0600 + // DefaultDirMode is dir default mode + DefaultDirMode os.FileMode = 0700 +) + +// Pod Annotation +const ( // PriorityAnnotationKey is annotation key to mark offline pod PriorityAnnotationKey = "volcano.sh/preemptable" // CacheLimitAnnotationKey is annotation key to set L3/Mb resctrl group @@ -73,41 +60,48 @@ const ( QuotaBurstAnnotationKey = "volcano.sh/quota-burst-time" // BlkioKey is annotation key to set blkio limit BlkioKey = "volcano.sh/blkio-limit" - // DefaultMemCheckInterval indicates the default memory check interval 5s. - DefaultMemCheckInterval = 5 - // DefaultMaxMemCheckInterval indicates the default max memory check interval 30s. - DefaultMaxMemCheckInterval = 30 - // DefaultMemStrategy indicates the default memory strategy. - DefaultMemStrategy = "none" + // QuotaAnnotationKey is annotation key to mark whether to enable the quota turbo + QuotaAnnotationKey = "volcano.sh/quota-turbo" ) -// LevelType is type definition of qos level -type LevelType int32 - +// log config const ( - // MinLevel is min level for qos level - MinLevel LevelType = -1 - // MaxLevel is max level for qos level - MaxLevel LevelType = 0 + LogDriverStdio = "stdio" + LogDriverFile = "file" + LogLevelDebug = "debug" + LogLevelInfo = "info" + LogLevelWarn = "warn" + LogLevelError = "error" + LogLevelStack = "stack" + DefaultLogDir = "/var/log/rubik" + DefaultLogLevel = LogLevelInfo + DefaultLogSize = 1024 + // LogEntryKey is the key representing EntryName in the context + LogEntryKey = "module" ) -// Int is type casting for type LevelType -func (l LevelType) Int() int { - return int(l) -} - +// exit code const ( - // ErrCodeFailed for normal failed - ErrCodeFailed = 1 + // NORMALEXIT for the normal exit code + NormalExitCode int = iota + // ArgumentErrorExitCode for normal failed + ArgumentErrorExitCode + // RepeatRunExitCode for repeat run exit + RepeatRunExitCode + // ErrorExitCode failed during run + ErrorExitCode ) -// error define ref from src/internal/oserror/errors.go -var ( - // ErrFileTooBig file too big - ErrFileTooBig = errors.New("file too big") +// qos level +const ( + Offline = -1 + Online = 0 ) +// cgroup file name const ( - // InvalidBurst for invalid quota burst - InvalidBurst = -1 + // CPUCgroupFileName is name of cgroup file used for cpu qos level setting + CPUCgroupFileName = "cpu.qos_level" + // MemoryCgroupFileName is name of cgroup file used for memory qos level setting + MemoryCgroupFileName = "memory.qos_level" ) diff --git a/pkg/tinylog/tinylog.go b/pkg/common/log/log.go similarity index 61% rename from pkg/tinylog/tinylog.go rename to pkg/common/log/log.go index 6c5ca988c55e3f0f7d3266f6c4a647171df1d053..d9a5cc7bbf9647a513d2fab7bf244d26f50b364a 100644 --- a/pkg/tinylog/tinylog.go +++ b/pkg/common/log/log.go @@ -12,7 +12,7 @@ // Description: This file is used for rubik log // Package tinylog is for rubik log -package tinylog +package log import ( "context" @@ -25,44 +25,41 @@ import ( "sync/atomic" "time" - "isula.org/rubik/pkg/constant" + "isula.org/rubik/pkg/common/constant" ) // CtxKey used for UUID type CtxKey string const ( - // UUID is log uuid - UUID = "uuid" - - logStdio = 0 - logFile = 1 - logDriverStdio = "stdio" - logDriverFile = "file" - - logDebug = 0 - logInfo = 1 - logError = 2 - logStack = 20 - logStackFrom = 2 - logLevelInfo = "info" - logLevelStack = "stack" - - logFileNum = 10 - logSizeMin int64 = 10 // 10MB - logSizeMax int64 = 1024 * 1024 // 1TB - unitMB int64 = 1024 * 1024 + logStack = 20 + logStackFrom = 2 + logFileNum = 10 + logSizeMin int64 = 10 // 10MB + logSizeMax int64 = 1024 * 1024 // 1TB + unitMB int64 = 1024 * 1024 +) + +const ( + stdio int = iota + file +) + +const ( + logDebug int = iota + logInfo + logWarn + logError ) var ( - logDriver = logStdio + logDriver = stdio logFname = filepath.Join(constant.DefaultLogDir, "rubik.log") - logLevel = 0 + logLevel = logInfo logSize int64 = 1024 logFileMaxSize int64 logFileSize int64 - - lock = sync.Mutex{} + lock = sync.Mutex{} ) func makeLogDir(logDir string) error { @@ -77,23 +74,23 @@ func makeLogDir(logDir string) error { return nil } -// InitConfig init log config +// InitConfig initializes log config func InitConfig(driver, logdir, level string, size int64) error { if driver == "" { - driver = logDriverStdio + driver = constant.LogDriverStdio } - if driver != logDriverStdio && driver != logDriverFile { + if driver != constant.LogDriverStdio && driver != constant.LogDriverFile { return fmt.Errorf("invalid log driver %s", driver) } - logDriver = logStdio - if driver == logDriverFile { - logDriver = logFile + logDriver = stdio + if driver == constant.LogDriverFile { + logDriver = file } if level == "" { - level = logLevelInfo + level = constant.LogLevelInfo } - levelstr, err := logLevelFromString(level) + levelstr, err := levelFromString(level) if err != nil { return err } @@ -105,7 +102,7 @@ func InitConfig(driver, logdir, level string, size int64) error { logSize = size logFileMaxSize = logSize / logFileNum - if driver == "file" { + if driver == constant.LogDriverFile { if err := makeLogDir(logdir); err != nil { return err } @@ -130,48 +127,52 @@ func DropError(args ...interface{}) { } } -func logLevelToString(level int) string { +func levelToString(level int) string { switch level { case logDebug: - return "debug" + return constant.LogLevelDebug case logInfo: - return "info" + return constant.LogLevelInfo + case logWarn: + return constant.LogLevelWarn case logError: - return "error" + return constant.LogLevelError case logStack: - return logLevelStack + return constant.LogLevelStack default: return "" } } -func logLevelFromString(level string) (int, error) { +func levelFromString(level string) (int, error) { switch level { - case "debug": + case constant.LogLevelDebug: return logDebug, nil - case "info", "": + case constant.LogLevelInfo, "": return logInfo, nil - case "error": + case constant.LogLevelWarn: + return logWarn, nil + case constant.LogLevelError: return logError, nil default: return logInfo, fmt.Errorf("invalid log level %s", level) } } -func logRename() { +func renameLogFile() { for i := logFileNum - 1; i > 1; i-- { - old := logFname + fmt.Sprintf(".%d", i-1) - new := logFname + fmt.Sprintf(".%d", i) - if _, err := os.Stat(old); err == nil { - DropError(os.Rename(old, new)) + oldFile := logFname + fmt.Sprintf(".%d", i-1) + newFile := logFname + fmt.Sprintf(".%d", i) + if _, err := os.Stat(oldFile); err == nil { + DropError(os.Rename(oldFile, newFile)) } } DropError(os.Rename(logFname, logFname+".1")) } -func logRotate(line int64) string { +func rotateLog(line int64) string { if atomic.AddInt64(&logFileSize, line) > logFileMaxSize*unitMB { - logRename() + renameLogFile() atomic.StoreInt64(&logFileSize, line) } @@ -179,7 +180,7 @@ func logRotate(line int64) string { } func writeLine(line string) { - if logDriver == logStdio { + if logDriver == stdio { fmt.Printf("%s", line) return } @@ -187,7 +188,7 @@ func writeLine(line string) { lock.Lock() defer lock.Unlock() - f, err := os.OpenFile(logRotate(int64(len(line))), os.O_CREATE|os.O_APPEND|os.O_WRONLY, constant.DefaultFileMode) + f, err := os.OpenFile(rotateLog(int64(len(line))), os.O_CREATE|os.O_APPEND|os.O_WRONLY, constant.DefaultFileMode) if err != nil { return } @@ -196,12 +197,12 @@ func writeLine(line string) { DropError(f.Close()) } -func logf(level string, format string, args ...interface{}) { +func output(level string, format string, args ...interface{}) { tag := fmt.Sprintf("%s [rubik] level=%s ", time.Now().Format("2006-01-02 15:04:05.000"), level) raw := fmt.Sprintf(format, args...) + "\n" depth := 1 - if level == logLevelStack { + if level == constant.LogLevelStack { depth = logStack } @@ -213,44 +214,44 @@ func logf(level string, format string, args ...interface{}) { fs = strings.Split("."+fs[len(fs)-1], ".") fn := fs[len(fs)-1] line = tag + fmt.Sprintf("%s:%d:%s() ", file, linum, fn) + raw - } else if level == logLevelStack { + } else if level == constant.LogLevelStack { break } writeLine(line) } } -// Logf log info level -func Logf(format string, args ...interface{}) { - if logInfo >= logLevel { - logf(logLevelToString(logInfo), format, args...) +// Warnf log warn level +func Warnf(format string, args ...interface{}) { + if logWarn >= logLevel { + output(levelToString(logWarn), format, args...) } } // Infof log info level func Infof(format string, args ...interface{}) { if logInfo >= logLevel { - logf(logLevelToString(logInfo), format, args...) + output(levelToString(logInfo), format, args...) } } // Debugf log debug level func Debugf(format string, args ...interface{}) { if logDebug >= logLevel { - logf(logLevelToString(logDebug), format, args...) + output(levelToString(logDebug), format, args...) } } // Errorf log error level func Errorf(format string, args ...interface{}) { if logError >= logLevel { - logf(logLevelToString(logError), format, args...) + output(levelToString(logError), format, args...) } } // Stackf log stack dump func Stackf(format string, args ...interface{}) { - logf("stack", format, args...) + output("stack", format, args...) } // Entry is log entry @@ -266,19 +267,19 @@ func WithCtx(ctx context.Context) *Entry { } func (e *Entry) level(l int) string { - uuid, ok := e.Ctx.Value(CtxKey(UUID)).(string) + id, ok := e.Ctx.Value(CtxKey(constant.LogEntryKey)).(string) if ok { - return logLevelToString(l) + " UUID=" + uuid + return levelToString(l) + " " + constant.LogEntryKey + "=" + id } - return logLevelToString(l) + return levelToString(l) } -// Logf write logs -func (e *Entry) Logf(f string, args ...interface{}) { +// Warnf write logs +func (e *Entry) Warnf(f string, args ...interface{}) { if logInfo < logLevel { return } - logf(e.level(logInfo), f, args...) + output(e.level(logWarn), f, args...) } // Infof write logs @@ -286,7 +287,7 @@ func (e *Entry) Infof(f string, args ...interface{}) { if logInfo < logLevel { return } - logf(e.level(logInfo), f, args...) + output(e.level(logInfo), f, args...) } // Debugf write verbose logs @@ -294,7 +295,7 @@ func (e *Entry) Debugf(f string, args ...interface{}) { if logDebug < logLevel { return } - logf(e.level(logDebug), f, args...) + output(e.level(logDebug), f, args...) } // Errorf write error logs @@ -302,5 +303,20 @@ func (e *Entry) Errorf(f string, args ...interface{}) { if logError < logLevel { return } - logf(e.level(logError), f, args...) + output(e.level(logError), f, args...) } + +// EmptyLog is an empty log structure without any log processing +type EmptyLog struct{} + +// Warnf write logs +func (e *EmptyLog) Warnf(f string, args ...interface{}) {} + +// Infof write logs +func (e *EmptyLog) Infof(f string, args ...interface{}) {} + +// Debugf write verbose logs +func (e *EmptyLog) Debugf(f string, args ...interface{}) {} + +// Errorf write error logs +func (e *EmptyLog) Errorf(f string, args ...interface{}) {} diff --git a/pkg/tinylog/tinylog_test.go b/pkg/common/log/log_test.go similarity index 90% rename from pkg/tinylog/tinylog_test.go rename to pkg/common/log/log_test.go index f36b04086d4ed7446720ce3510a7ff3858f23821..8476b17b0ef115bbf25eff99b5d702830441ddea 100644 --- a/pkg/tinylog/tinylog_test.go +++ b/pkg/common/log/log_test.go @@ -11,7 +11,7 @@ // Create: 2021-05-24 // Description: This file is used for testing tinylog -package tinylog +package log import ( "context" @@ -24,8 +24,8 @@ import ( "github.com/stretchr/testify/assert" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/try" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/test/try" ) // test_rubik_set_logdriver_0001 @@ -34,7 +34,7 @@ func TestInitConfigLogDriver(t *testing.T) { logFilePath := filepath.Join(logDir, "rubik.log") // case: rubik.log already exist. - try.WriteFile(logFilePath, []byte(""), constant.DefaultFileMode) + try.WriteFile(logFilePath, "") err := InitConfig("file", logDir, "", logSize) assert.NoError(t, err) @@ -44,9 +44,9 @@ func TestInitConfigLogDriver(t *testing.T) { // logDriver is file err = InitConfig("file", logDir, "", logSize) assert.NoError(t, err) - assert.Equal(t, logFile, logDriver) + assert.Equal(t, file, logDriver) logString := "Test InitConfig with logDriver file" - Logf(logString) + Infof(logString) b, err := ioutil.ReadFile(logFilePath) assert.NoError(t, err) assert.Equal(t, true, strings.Contains(string(b), logString)) @@ -55,10 +55,10 @@ func TestInitConfigLogDriver(t *testing.T) { os.Remove(logFilePath) err = InitConfig("stdio", logDir, "", logSize) assert.NoError(t, err) - assert.Equal(t, logStdio, logDriver) + assert.Equal(t, stdio, logDriver) logString = "Test InitConfig with logDriver stdio" - Logf(logString) - b, err = ioutil.ReadFile(logFilePath) + Infof(logString) + _, err = ioutil.ReadFile(logFilePath) assert.Equal(t, true, err != nil) // logDriver invalid @@ -68,7 +68,7 @@ func TestInitConfigLogDriver(t *testing.T) { // logDriver is null err = InitConfig("", logDir, "", logSize) assert.NoError(t, err) - assert.Equal(t, logStdio, logDriver) + assert.Equal(t, stdio, logDriver) } // test_rubik_set_logdir_0001 @@ -80,7 +80,7 @@ func TestInitConfigLogDir(t *testing.T) { err := InitConfig("file", logDir, "", logSize) assert.NoError(t, err) logString := "Test InitConfig with logDir valid" - Logf(logString) + Infof(logString) b, err := ioutil.ReadFile(logFilePath) assert.NoError(t, err) assert.Equal(t, true, strings.Contains(string(b), logString)) @@ -153,7 +153,7 @@ func TestInitConfigLogLevel(t *testing.T) { Debugf(debugLogSting) Infof(infoLogSting) Errorf(errorLogSting) - Logf(logLogString) + Infof(logLogString) b, err := ioutil.ReadFile(logFilePath) assert.NoError(t, err) assert.Equal(t, tt.debug, strings.Contains(string(b), debugLogSting)) @@ -162,11 +162,11 @@ func TestInitConfigLogLevel(t *testing.T) { assert.Equal(t, tt.error, strings.Contains(string(b), errorLogSting)) os.Remove(logFilePath) - ctx := context.WithValue(context.Background(), CtxKey(UUID), "abc123") + ctx := context.WithValue(context.Background(), CtxKey(constant.LogEntryKey), "abc123") WithCtx(ctx).Debugf(debugLogSting) WithCtx(ctx).Infof(infoLogSting) WithCtx(ctx).Errorf(errorLogSting) - WithCtx(ctx).Logf(logLogString) + WithCtx(ctx).Warnf(logLogString) b, err = ioutil.ReadFile(logFilePath) assert.NoError(t, err) assert.Equal(t, tt.debug, strings.Contains(string(b), debugLogSting)) @@ -195,12 +195,12 @@ func TestInitConfigLogSize(t *testing.T) { err = InitConfig("file", logDir, "", logSize) assert.NoError(t, err) for i := 0; i < printLine; i++ { - Logf(strings.Repeat("TestInitConfigLogSize log", repeat)) + Infof(strings.Repeat("TestInitConfigLogSize log", repeat)) } err = InitConfig("file", logDir, "", int64(testSize)) assert.NoError(t, err) for i := 0; i < printLine; i++ { - Logf(strings.Repeat("TestInitConfigLogSize log", repeat)) + Infof(strings.Repeat("TestInitConfigLogSize log", repeat)) } var size int64 err = filepath.Walk(logDir, func(_ string, f os.FileInfo, _ error) error { @@ -248,16 +248,16 @@ func TestDropError(t *testing.T) { // TestLogOthers is log other tests func TestLogOthers(t *testing.T) { logDir := filepath.Join(try.GenTestDir().String(), "regular-file") - try.WriteFile(logDir, []byte{}, constant.DefaultFileMode) + try.WriteFile(logDir, "") err := makeLogDir(logDir) assert.Equal(t, true, err != nil) - level1 := 3 - s := logLevelToString(level1) + const outOfRangeLogLevel = 100 + s := levelToString(outOfRangeLogLevel) assert.Equal(t, "", s) - level2 := 20 - s = logLevelToString(level2) + const stackLoglevel = 20 + s = levelToString(stackLoglevel) assert.Equal(t, "stack", s) logDriver = 1 diff --git a/pkg/perf/perf.go b/pkg/common/perf/perf.go similarity index 85% rename from pkg/perf/perf.go rename to pkg/common/perf/perf.go index a79aa3cfa32bac1e5f43bb30cb234820a5f09042..68fe6e3fe150a1817e5397938290a2673d22e210 100644 --- a/pkg/perf/perf.go +++ b/pkg/common/perf/perf.go @@ -16,32 +16,30 @@ package perf import ( "encoding/binary" - "path/filepath" "runtime" "time" - "unsafe" "github.com/pkg/errors" "golang.org/x/sys/unix" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/typedef/cgroup" ) var ( hwSupport = false ) -// HwSupport tell if the os support perf hw pmu events. -func HwSupport() bool { +// Support tell if the os support perf hw pmu events. +func Support() bool { return hwSupport } -// PerfStat is perf stat info -type PerfStat struct { +// Stat is perf stat info +type Stat struct { Instructions uint64 - CpuCycles uint64 + CPUCycles uint64 CacheMisses uint64 CacheReferences uint64 LLCAccess uint64 @@ -56,42 +54,45 @@ type cgEvent struct { } type eventConfig struct { - config uint64 - eType uint32 eventName string + eType uint32 + config uint64 } func getEventConfig() []eventConfig { + const eight, sixteen = 8, 16 return []eventConfig{ { + eventName: "instructions", eType: unix.PERF_TYPE_HARDWARE, config: unix.PERF_COUNT_HW_INSTRUCTIONS, - eventName: "instructions", }, { + eventName: "cycles", eType: unix.PERF_TYPE_HARDWARE, config: unix.PERF_COUNT_HW_CPU_CYCLES, - eventName: "cycles", }, { + eventName: "cachereferences", eType: unix.PERF_TYPE_HARDWARE, config: unix.PERF_COUNT_HW_CACHE_REFERENCES, - eventName: "cachereferences", }, { + eventName: "cachemiss", eType: unix.PERF_TYPE_HARDWARE, config: unix.PERF_COUNT_HW_CACHE_MISSES, - eventName: "cachemiss", }, { - eType: unix.PERF_TYPE_HW_CACHE, - config: unix.PERF_COUNT_HW_CACHE_LL | unix.PERF_COUNT_HW_CACHE_OP_READ<<8 | unix.PERF_COUNT_HW_CACHE_RESULT_MISS<<16, eventName: "llcmiss", + eType: unix.PERF_TYPE_HW_CACHE, + config: unix.PERF_COUNT_HW_CACHE_LL | unix.PERF_COUNT_HW_CACHE_OP_READ< fileMaxSize { - return nil, constant.ErrFileTooBig + + if size > fileMaxSize { + return nil, fmt.Errorf("file too big") + } + return ReadFile(path) +} + +// FileSize returns the size of file +func FileSize(path string) (int64, error) { + if !PathExist(path) { + return 0, fmt.Errorf("%v: No such file or directory", path) + } + st, err := os.Lstat(path) + if err != nil { + return 0, err } - return ioutil.ReadFile(path) // nolint: gosec + return st.Size(), nil } // PathExist returns true if the path exists @@ -72,30 +86,70 @@ func PathExist(path string) bool { return true } -// CreateLockFile creates a lock file -func CreateLockFile(p string) (*os.File, error) { - path := filepath.Clean(p) - if err := os.MkdirAll(filepath.Dir(path), constant.DefaultDirMode); err != nil { +// LockFile locks a file, creating a file if it does not exist +func LockFile(path string) (*os.File, error) { + lock, err := CreateFile(path) + if err != nil { return nil, err } + if err := syscall.Flock(int(lock.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { + return lock, err + } + return lock, nil +} - lock, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, constant.DefaultFileMode) - if err != nil { - return nil, err +// UnlockFile unlock file - this function used cleanup resource, +func UnlockFile(lock *os.File) error { + // errors will ignored to make sure more source is cleaned. + if err := syscall.Flock(int(lock.Fd()), syscall.LOCK_UN); err != nil { + return err } + return nil +} - if err = syscall.Flock(int(lock.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { - log.DropError(lock.Close()) - return nil, err +// ReadFile reads a file +func ReadFile(path string) ([]byte, error) { + path = filepath.Clean(path) + if IsDir(path) { + return nil, fmt.Errorf("%v is not a file", path) } + return ioutil.ReadFile(path) +} - return lock, nil +// WriteFile writes a file, if the file does not exist, create the file (including the upper directory) +func WriteFile(path, content string) error { + if IsDir(path) { + return fmt.Errorf("%v is not a file", path) + } + // try to create parent directory + dirPath := filepath.Dir(path) + if !PathExist(dirPath) { + if err := os.MkdirAll(dirPath, constant.DefaultDirMode); err != nil { + return fmt.Errorf("error create dir %v: %v", dirPath, err) + } + } + return ioutil.WriteFile(path, []byte(content), constant.DefaultFileMode) } -// RemoveLockFile removes lock file - this function used cleanup resource, -// errors will ignored to make sure more source is cleaned. -func RemoveLockFile(lock *os.File, path string) { - log.DropError(syscall.Flock(int(lock.Fd()), syscall.LOCK_UN)) - log.DropError(lock.Close()) - log.DropError(os.Remove(path)) +// AppendFile appends content to the file +func AppendFile(path, content string) error { + if !PathExist(path) { + return fmt.Errorf("%v: No such file or directory", path) + } + if IsDir(path) { + return fmt.Errorf("%v is not a file", path) + } + f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, constant.DefaultFileMode) + defer func() { + if err != f.Close() { + return + } + }() + if err != nil { + return fmt.Errorf("error open file: %v", err) + } + if _, err := f.WriteString(content); err != nil { + return fmt.Errorf("error write file: %v", err) + } + return nil } diff --git a/pkg/util/file_test.go b/pkg/common/util/file_test.go similarity index 43% rename from pkg/util/file_test.go rename to pkg/common/util/file_test.go index 9f7f3dc662655ed5bca7ae68101bc9c005276b6b..5620fedef5a771af7674e7068154377b1dc3ac73 100644 --- a/pkg/util/file_test.go +++ b/pkg/common/util/file_test.go @@ -11,24 +11,27 @@ // Create: 2021-04-17 // Description: filepath related common functions testing +// Package util is common utilitization package util import ( + "fmt" "io/ioutil" "os" "path/filepath" + "reflect" "testing" "github.com/stretchr/testify/assert" - - "isula.org/rubik/pkg/constant" + "isula.org/rubik/pkg/common/constant" ) -// TestIsDirectory is IsDirectory function test -func TestIsDirectory(t *testing.T) { +// TestIsDir is IsDir function test +func TestIsDir(t *testing.T) { + os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode) + defer os.RemoveAll(constant.TmpTestDir) directory, err := ioutil.TempDir(constant.TmpTestDir, t.Name()) assert.NoError(t, err) - defer os.RemoveAll(directory) filePath, err := ioutil.TempFile(directory, t.Name()) assert.NoError(t, err) @@ -60,7 +63,7 @@ func TestIsDirectory(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := IsDirectory(tt.args.path); got != tt.want { + if got := IsDir(tt.args.path); got != tt.want { t.Errorf("IsDirectory() = %v, want %v", got, tt.want) } }) @@ -71,9 +74,10 @@ func TestIsDirectory(t *testing.T) { // TestPathIsExist is PathExist function test func TestPathIsExist(t *testing.T) { + os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode) + defer os.RemoveAll(constant.TmpTestDir) filePath, err := ioutil.TempDir(constant.TmpTestDir, "file_exist") assert.NoError(t, err) - defer os.RemoveAll(filePath) type args struct { path string @@ -105,9 +109,10 @@ func TestPathIsExist(t *testing.T) { // TestReadSmallFile is test for read file func TestReadSmallFile(t *testing.T) { + os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode) + defer os.RemoveAll(constant.TmpTestDir) filePath, err := ioutil.TempDir(constant.TmpTestDir, "read_file") assert.NoError(t, err) - defer os.RemoveAll(filePath) // case1: ok err = ioutil.WriteFile(filepath.Join(filePath, "ok"), []byte{}, constant.DefaultFileMode) @@ -117,7 +122,7 @@ func TestReadSmallFile(t *testing.T) { // case2: too big size := 20000000 - big := make([]byte, size, size) + big := make([]byte, size) err = ioutil.WriteFile(filepath.Join(filePath, "big"), big, constant.DefaultFileMode) assert.NoError(t, err) _, err = ReadSmallFile(filepath.Join(filePath, "big")) @@ -134,9 +139,11 @@ func TestCreateLockFile(t *testing.T) { err := os.RemoveAll(lockFile) assert.NoError(t, err) - lock, err := CreateLockFile(lockFile) + lock, err := LockFile(lockFile) assert.NoError(t, err) - RemoveLockFile(lock, lockFile) + UnlockFile(lock) + assert.NoError(t, lock.Close()) + assert.NoError(t, os.Remove(lockFile)) } // TestLockFail is CreateLockFile fail test @@ -148,22 +155,210 @@ func TestLockFail(t *testing.T) { _, err = os.Create(filepath.Join(constant.TmpTestDir, "rubik-lock")) assert.NoError(t, err) - _, err = CreateLockFile(filepath.Join(constant.TmpTestDir, "rubik-lock", "rubik.lock")) + _, err = LockFile(filepath.Join(constant.TmpTestDir, "rubik-lock", "rubik.lock")) assert.Equal(t, true, err != nil) err = os.RemoveAll(filepath.Join(constant.TmpTestDir, "rubik-lock")) assert.NoError(t, err) err = os.MkdirAll(lockFile, constant.DefaultDirMode) assert.NoError(t, err) - _, err = CreateLockFile(lockFile) + _, err = LockFile(lockFile) assert.Equal(t, true, err != nil) err = os.RemoveAll(lockFile) assert.NoError(t, err) - _, err = CreateLockFile(lockFile) + _, err = LockFile(lockFile) assert.NoError(t, err) - _, err = CreateLockFile(lockFile) + _, err = LockFile(lockFile) assert.Equal(t, true, err != nil) err = os.RemoveAll(lockFile) assert.NoError(t, err) } + +// TestReadFile tests ReadFile +func TestReadFile(t *testing.T) { + os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode) + defer os.RemoveAll(constant.TmpTestDir) + type args struct { + path string + } + tests := []struct { + name string + args args + pre func(t *testing.T) + post func(t *testing.T) + want []byte + wantErr bool + }{ + { + name: "TC1-path is dir", + args: args{ + path: constant.TmpTestDir, + }, + pre: func(t *testing.T) { + _, err := ioutil.TempDir(constant.TmpTestDir, "TC1") + assert.NoError(t, err) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.pre != nil { + tt.pre(t) + } + got, err := ReadFile(tt.args.path) + if (err != nil) != tt.wantErr { + t.Errorf("ReadFile() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ReadFile() = %v, want %v", got, tt.want) + } + if tt.post != nil { + tt.post(t) + } + + }) + } +} + +// TestWriteFile tests WriteFile +func TestWriteFile(t *testing.T) { + os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode) + defer os.RemoveAll(constant.TmpTestDir) + var filePath = filepath.Join(constant.TmpTestDir, "cpu", "kubepods", "PodXXX") + type args struct { + path string + content string + } + tests := []struct { + name string + args args + pre func(t *testing.T) + post func(t *testing.T) + wantErr bool + }{ + { + name: "TC1-path is dir", + args: args{ + path: constant.TmpTestDir, + }, + pre: func(t *testing.T) { + _, err := ioutil.TempDir(constant.TmpTestDir, "TC1") + assert.NoError(t, err) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + wantErr: true, + }, + { + name: "TC2-create dir & write file", + args: args{ + path: filePath, + content: "1", + }, + pre: func(t *testing.T) { + assert.NoError(t, os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode)) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.pre != nil { + tt.pre(t) + } + if err := WriteFile(tt.args.path, tt.args.content); (err != nil) != tt.wantErr { + t.Errorf("WriteFile() error = %v, wantErr %v", err, tt.wantErr) + } + if tt.post != nil { + tt.post(t) + } + }) + } +} + +func TestAppendFile(t *testing.T) { + os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode) + defer os.RemoveAll(constant.TmpTestDir) + var ( + dirPath = filepath.Join(constant.TmpTestDir, "cpu", "kubepods", "PodXXX") + filePath = filepath.Join(dirPath, "cpu.cfs_quota_us") + ) + type args struct { + path string + content string + } + tests := []struct { + name string + args args + pre func(t *testing.T) + post func(t *testing.T) + wantErr bool + }{ + { + name: "TC1-path is dir", + args: args{ + path: constant.TmpTestDir, + }, + pre: func(t *testing.T) { + _, err := ioutil.TempDir(constant.TmpTestDir, "TC1") + assert.NoError(t, err) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(filepath.Join(constant.TmpTestDir, "TC1"))) + }, + wantErr: true, + }, + { + name: "TC2-empty path", + args: args{ + path: dirPath, + }, + pre: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + wantErr: true, + }, + { + name: "TC3-write file success", + args: args{ + path: filePath, + content: "1", + }, + pre: func(t *testing.T) { + assert.NoError(t, os.MkdirAll(dirPath, constant.DefaultDirMode)) + assert.NoError(t, ioutil.WriteFile(filePath, []byte(""), constant.DefaultFileMode)) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.pre != nil { + tt.pre(t) + } + err := AppendFile(tt.args.path, tt.args.content) + if (err != nil) != tt.wantErr { + t.Errorf("AppendFile() error = %v, wantErr %v", err, tt.wantErr) + } + if err != nil { + fmt.Printf("error: %v\n", err) + } + if tt.post != nil { + tt.post(t) + } + }) + } +} diff --git a/pkg/common/util/math.go b/pkg/common/util/math.go new file mode 100644 index 0000000000000000000000000000000000000000..c0031b35532666c5fa1277dbfc719a008290f228 --- /dev/null +++ b/pkg/common/util/math.go @@ -0,0 +1,63 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-08 +// Description: This file is used for math + +package util + +import ( + "fmt" + "math" +) + +// Div calculates the quotient of the divisor and the dividend, and it takes +// parameters (dividend, divisor, maximum out of range, precision, and format) +// format indicates the output format, for example, "%.2f" with two decimal places. +func Div(dividend, divisor float64, args ...interface{}) float64 { + var ( + format = "" + accuracy float64 = 0 + maxValue = math.MaxFloat64 + ) + const ( + maxValueIndex int = iota + accuracyIndex + formatIndex + ) + if len(args) > maxValueIndex { + if value, ok := args[maxValueIndex].(float64); ok { + maxValue = value + } + } + if len(args) > accuracyIndex { + if value, ok := args[accuracyIndex].(float64); ok { + accuracy = value + } + } + if len(args) > formatIndex { + if value, ok := args[formatIndex].(string); ok { + format = value + } + } + if divisor == 0 { + return maxValue + } + if math.Abs(divisor) <= accuracy { + return maxValue + } + ans := dividend / divisor + if len(format) != 0 { + if value, err := ParseFloat64(fmt.Sprintf(format, ans)); err == nil { + ans = value + } + } + return ans +} diff --git a/pkg/common/util/math_test.go b/pkg/common/util/math_test.go new file mode 100644 index 0000000000000000000000000000000000000000..5ac2be35cbba14c623d7e6c3f2b2f2316f01dcb8 --- /dev/null +++ b/pkg/common/util/math_test.go @@ -0,0 +1,92 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-24 +// Description: This file is used for testing math + +// Package util is common utilitization +package util + +import ( + "testing" +) + +// TestDiv tests Div +func TestDiv(t *testing.T) { + const ( + dividend float64 = 100.0 + divisor float64 = 1.0 + maxValue float64 = 70.0 + accuracy float64 = 2.0 + format string = "%.2f" + ) + type args struct { + dividend float64 + divisor float64 + args []interface{} + } + tests := []struct { + name string + args args + want float64 + }{ + { + name: "TC1-dividend: 100, divisor: 1, default arguments", + args: args{ + dividend: dividend, + divisor: divisor, + }, + want: dividend, + }, + { + name: "TC2-dividend: 100, divisor: 0, maxValue: 70", + args: args{ + dividend: dividend, + divisor: 0, + args: []interface{}{ + maxValue, + }, + }, + want: maxValue, + }, + { + name: "TC3-dividend: 100, divisor: 1, maxValue: 70, accuracy: 2", + args: args{ + dividend: dividend, + divisor: divisor, + args: []interface{}{ + maxValue, + accuracy, + }, + }, + want: maxValue, + }, + { + name: `TC4-dividend: 3, divisor: 8, format: %.2f`, + args: args{ + dividend: 3, + divisor: 8, + args: []interface{}{ + maxValue, + accuracy, + format, + }, + }, + want: 0.38, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := Div(tt.args.dividend, tt.args.divisor, tt.args.args...); got != tt.want { + t.Errorf("Div() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/config/config.go b/pkg/config/config.go index 5938c87bc1b27a95eb6fa8da2cd41ab4374abb52..16ab6e17163f59fdccc96df27fb313c3e1b1420c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -1,4 +1,4 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. // rubik licensed under the Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. // You may obtain a copy of Mulan PSL v2 at: @@ -7,165 +7,125 @@ // IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR // PURPOSE. // See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2021-04-26 -// Description: config load +// Author: Jiaqi Yang +// Create: 2023-02-01 +// Description: This file contains configuration content and provides external interaction functions +// Package config is used to manage the configuration of rubik package config import ( - "bytes" "encoding/json" - "path/filepath" + "fmt" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/util" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" ) -var ( - // CgroupRoot is cgroup mount point - CgroupRoot = constant.DefaultCgroupRoot - // ShutdownFlag is rubik shutdown flag - ShutdownFlag int32 - // ShutdownChan is rubik shutdown channel - ShutdownChan = make(chan struct{}) -) - -// Config defines the configuration for rubik -type Config struct { - AutoCheck bool `json:"autoCheck,omitempty"` - LogDriver string `json:"logDriver,omitempty"` - LogDir string `json:"logDir,omitempty"` - LogSize int `json:"logSize,omitempty"` - LogLevel string `json:"logLevel,omitempty"` - CgroupRoot string `json:"cgroupRoot,omitempty"` - CacheCfg CacheConfig `json:"cacheConfig,omitempty"` - BlkioCfg BlkioConfig `json:"blkioConfig,omitempty"` - MemCfg MemoryConfig `json:"memoryConfig,omitempty"` - NodeConfig []NodeConfig `json:"nodeConfig,omitempty"` -} +const agentKey = "agent" -// CacheConfig define cache limit related config -type CacheConfig struct { - Enable bool `json:"enable,omitempty"` - DefaultLimitMode string `json:"defaultLimitMode,omitempty"` - DefaultResctrlDir string `json:"-"` - AdjustInterval int `json:"adjustInterval,omitempty"` - PerfDuration int `json:"perfDuration,omitempty"` - L3Percent MultiLvlPercent `json:"l3Percent,omitempty"` - MemBandPercent MultiLvlPercent `json:"memBandPercent,omitempty"` +// sysConfKeys saves the system configuration key, which is the service name except +var sysConfKeys = map[string]struct{}{ + agentKey: {}, } -// BlkioConfig defines blkio related configurations. -type BlkioConfig struct { - Enable bool `json:"enable,omitempty"` -} - -// MultiLvlPercent define multi level percentage -type MultiLvlPercent struct { - Low int `json:"low,omitempty"` - Mid int `json:"mid,omitempty"` - High int `json:"high,omitempty"` +// Config saves all configuration information of rubik +type Config struct { + ConfigParser + Agent *AgentConfig + Fields map[string]interface{} } -type MemoryConfig struct { - Enable bool `json:"enable,omitempty"` - Strategy string `json:"strategy,omitempty"` - CheckInterval int `json:"checkInterval,omitempty"` +// AgentConfig is the configuration of rubik, including important basic configurations such as logs +type AgentConfig struct { + LogDriver string `json:"logDriver,omitempty"` + LogLevel string `json:"logLevel,omitempty"` + LogSize int64 `json:"logSize,omitempty"` + LogDir string `json:"logDir,omitempty"` + CgroupRoot string `json:"cgroupRoot,omitempty"` + EnabledFeatures []string `json:"enabledFeatures,omitempty"` } -// NodeConfig define node configuration for each node -type NodeConfig struct { - NodeName string `json:"nodeName,omitempty"` - IOcostEnable bool `json:"iocostEnable,omitempty"` - IOcostConfig []IOcostConfig `json:"iocostConfig,omitempty"` +// NewConfig returns an config object pointer +func NewConfig(pType parserType) *Config { + c := &Config{ + ConfigParser: defaultParserFactory.getParser(pType), + Agent: &AgentConfig{ + LogDriver: constant.LogDriverStdio, + LogSize: constant.DefaultLogSize, + LogLevel: constant.DefaultLogLevel, + LogDir: constant.DefaultLogDir, + CgroupRoot: constant.DefaultCgroupRoot, + }, + } + return c } -// IOcostConfig define iocost for node -type IOcostConfig struct { - Dev string `json:"dev,omitempty"` - Enable bool `json:"enable,omitempty"` - Model string `json:"model,omitempty"` - Param Param `json:"param,omitempty"` +// loadConfigFile loads data from configuration file +func loadConfigFile(config string) ([]byte, error) { + buffer, err := util.ReadSmallFile(config) + if err != nil { + return nil, err + } + return buffer, nil } -// Param for linear model -type Param struct { - Rbps int64 `json:"rbps,omitempty"` - Rseqiops int64 `json:"rseqiops,omitempty"` - Rrandiops int64 `json:"rrandiops,omitempty"` - Wbps int64 `json:"wbps,omitempty"` - Wseqiops int64 `json:"wseqiops,omitempty"` - Wrandiops int64 `json:"wrandiops,omitempty"` +// parseAgentConfig parses config as AgentConfig +func (c *Config) parseAgentConfig() error { + content, ok := c.Fields[agentKey] + if !ok { + // not setting agent means using the default configuration file + return nil + } + if err := c.UnmarshalSubConfig(content, c.Agent); err != nil { + return err + } + return nil } -// NewConfig returns new config load from config file -func NewConfig(path string) (*Config, error) { +// LoadConfig loads and parses configuration data from the file, and save it to the Config +func (c *Config) LoadConfig(path string) error { if path == "" { path = constant.ConfigFile } - - defaultLogSize, defaultAdInt, defaultPerfDur := 1024, 1000, 1000 - defaultLowL3, defaultMidL3, defaultHighL3, defaultLowMB, defaultMidMB, defaultHighMB := 20, 30, 50, 10, 30, 50 - cfg := Config{ - LogDriver: "stdio", - LogDir: constant.DefaultLogDir, - LogSize: defaultLogSize, - LogLevel: "info", - CgroupRoot: constant.DefaultCgroupRoot, - CacheCfg: CacheConfig{ - Enable: false, - DefaultLimitMode: "static", - DefaultResctrlDir: "/sys/fs/resctrl", - AdjustInterval: defaultAdInt, - PerfDuration: defaultPerfDur, - L3Percent: MultiLvlPercent{ - Low: defaultLowL3, - Mid: defaultMidL3, - High: defaultHighL3, - }, - MemBandPercent: MultiLvlPercent{ - Low: defaultLowMB, - Mid: defaultMidMB, - High: defaultHighMB, - }, - }, - BlkioCfg: BlkioConfig{ - Enable: false, - }, - MemCfg: MemoryConfig{ - Enable: false, - Strategy: constant.DefaultMemStrategy, - CheckInterval: constant.DefaultMemCheckInterval, - }, - } - - defer func() { - CgroupRoot = cfg.CgroupRoot - }() - - if !util.PathExist(path) { - return &cfg, nil + data, err := loadConfigFile(path) + if err != nil { + return fmt.Errorf("error loading config file %s: %w", path, err) } - - b, err := util.ReadSmallFile(filepath.Clean(path)) + fields, err := c.ParseConfig(data) if err != nil { - return nil, err + return fmt.Errorf("error parsing config: %v", err) } - - reader := bytes.NewReader(b) - if err := json.NewDecoder(reader).Decode(&cfg); err != nil { - return nil, err + c.Fields = fields + if err := c.parseAgentConfig(); err != nil { + return fmt.Errorf("error parsing agent config: %v", err) } - - return &cfg, nil + return nil } -// String return string format. -func (cfg *Config) String() string { - data, err := json.MarshalIndent(cfg, "", " ") +func (c *Config) String() string { + data, err := json.MarshalIndent(c.Fields, "", " ") if err != nil { return "{}" } - return string(data) + return fmt.Sprintf("%s", string(data)) +} + +// filterNonServiceKeys returns true when inputting a non-service name +func (c *Config) filterNonServiceKeys(name string) bool { + // 1. ignore system configured key values + _, ok := sysConfKeys[name] + return ok +} + +// UnwrapServiceConfig returns service configuration, indexed by service name +func (c *Config) UnwrapServiceConfig() map[string]interface{} { + serviceConfig := make(map[string]interface{}) + for name, conf := range c.Fields { + if c.filterNonServiceKeys(name) { + continue + } + serviceConfig[name] = conf + } + return serviceConfig } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 86c3df83144786b6efa3ca26fb261605a9b6450d..dbbd2e4dc30ed3c2330faca359396ad99a689885 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -1,4 +1,4 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. // rubik licensed under the Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. // You may obtain a copy of Mulan PSL v2 at: @@ -7,13 +7,15 @@ // IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR // PURPOSE. // See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2021-05-07 -// Description: config load test +// Author: Jiaqi Yang +// Create: 2023-02-01 +// Description: This file is used to test the functions of config.go +// Package config is used to manage the configuration of rubik package config import ( + "fmt" "io/ioutil" "os" "path/filepath" @@ -21,107 +23,60 @@ import ( "github.com/stretchr/testify/assert" - "isula.org/rubik/pkg/constant" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" ) -var ( - cfgA = ` +var rubikConfig string = ` { - "autoCheck": true, - "logDriver": "file", - "logDir": "/tmp/rubik-test", - "logSize": 2048, - "logLevel": "debug", - "cgroupRoot": "/tmp/rubik-test/cgroup" -}` -) + "agent": { + "logDriver": "stdio", + "logDir": "/var/log/rubik", + "logSize": 2048, + "logLevel": "info" + }, + "blkio":{}, + "qos": { + "subSys": ["cpu", "memory"] + }, + "cacheLimit": { + "defaultLimitMode": "static", + "adjustInterval": 1000, + "perfDuration": 1000, + "l3Percent": { + "low": 0, + "mid": 10, + "high": 100 + }, + "memBandPercent": { + "low": 10, + "mid": 30, + "high": 50 + } + } +} +` -// TestNewConfig is NewConfig function test func TestNewConfig(t *testing.T) { - tmpConfigFile := filepath.Join(constant.TmpTestDir, "config.json") - os.Remove(tmpConfigFile) - - // coverage - NewConfig("") - - // test_rubik_load_config_file_0001 - defaultLogSize := 1024 - cfg, err := NewConfig(tmpConfigFile) - assert.NoError(t, err) - assert.Equal(t, cfg.AutoCheck, false) - assert.Equal(t, cfg.LogDriver, "stdio") - assert.Equal(t, cfg.LogDir, constant.DefaultLogDir) - assert.Equal(t, cfg.LogSize, defaultLogSize) - assert.Equal(t, cfg.LogLevel, "info") - assert.Equal(t, cfg.CgroupRoot, constant.DefaultCgroupRoot) - - // test_rubik_load_config_file_0003 - err = os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) + if !util.PathExist(constant.TmpTestDir) { + if err := os.Mkdir(constant.TmpTestDir, constant.DefaultDirMode); err != nil { + assert.NoError(t, err) + } + } - logSize := 2048 - err = ioutil.WriteFile(tmpConfigFile, []byte(cfgA), constant.DefaultFileMode) - assert.NoError(t, err) - cfg, err = NewConfig(tmpConfigFile) - assert.NoError(t, err) - assert.Equal(t, cfg.AutoCheck, true) - assert.Equal(t, cfg.LogDriver, "file") - assert.Equal(t, cfg.LogDir, "/tmp/rubik-test") - assert.Equal(t, cfg.LogSize, logSize) - assert.Equal(t, cfg.LogLevel, "debug") - assert.Equal(t, cfg.CgroupRoot, "/tmp/rubik-test/cgroup") + defer os.RemoveAll(constant.TmpTestDir) - // test_rubik_load_config_file_0002 - err = ioutil.WriteFile(tmpConfigFile, []byte("abc"), constant.DefaultFileMode) - assert.NoError(t, err) - _, err = NewConfig(tmpConfigFile) - assert.Contains(t, err.Error(), "invalid character") - - size := 20000000 - big := make([]byte, size, size) - err = ioutil.WriteFile(tmpConfigFile, big, constant.DefaultFileMode) - assert.NoError(t, err) - _, err = NewConfig(tmpConfigFile) - assert.Contains(t, err.Error(), "too big") - - err = os.Remove(tmpConfigFile) - assert.NoError(t, err) -} - -// TestConfig_String is config string function test -func TestConfig_String(t *testing.T) { tmpConfigFile := filepath.Join(constant.TmpTestDir, "config.json") - os.Remove(tmpConfigFile) - - cfg, err := NewConfig(tmpConfigFile) - assert.NoError(t, err) - - cfgString := cfg.String() - assert.Equal(t, cfgString, `{ - "logDriver": "stdio", - "logDir": "/var/log/rubik", - "logSize": 1024, - "logLevel": "info", - "cgroupRoot": "/sys/fs/cgroup", - "cacheConfig": { - "defaultLimitMode": "static", - "adjustInterval": 1000, - "perfDuration": 1000, - "l3Percent": { - "low": 20, - "mid": 30, - "high": 50 - }, - "memBandPercent": { - "low": 10, - "mid": 30, - "high": 50 - } - }, - "blkioConfig": {}, - "memoryConfig": { - "strategy": "none", - "checkInterval": 5 - } -}`) + defer os.Remove(tmpConfigFile) + if err := ioutil.WriteFile(tmpConfigFile, []byte(rubikConfig), constant.DefaultFileMode); err != nil { + assert.NoError(t, err) + return + } + + c := NewConfig(JSON) + if err := c.LoadConfig(tmpConfigFile); err != nil { + assert.NoError(t, err) + return + } + fmt.Printf("config: %v", c) } diff --git a/pkg/config/jsonparser.go b/pkg/config/jsonparser.go new file mode 100644 index 0000000000000000000000000000000000000000..7cbc6b21b1cff279aad05e2f6a2e74bc475242f5 --- /dev/null +++ b/pkg/config/jsonparser.go @@ -0,0 +1,64 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-02-01 +// Description: This file contains parsing functions for the json language + +// Package config is used to manage the configuration of rubik +package config + +import ( + "encoding/json" +) + +// defaultJsonParser is globally unique json parser +var defaultJsonParser *jsonParser + +// jsonParser is used to parse json +type jsonParser struct{} + +// getJsonParser gets the globally unique json parser +func getJsonParser() *jsonParser { + if defaultJsonParser == nil { + defaultJsonParser = &jsonParser{} + } + return defaultJsonParser +} + +// ParseConfig parses json data as map[string]interface{} +func (parser *jsonParser) ParseConfig(data []byte) (map[string]interface{}, error) { + m := make(map[string]interface{}) + if err := json.Unmarshal(data, &m); err != nil { + return nil, err + } + return m, nil +} + +// UnmarshalSubConfig deserializes interface to structure +func (p *jsonParser) UnmarshalSubConfig(data interface{}, v interface{}) error { + jsonString, err := json.Marshal(data) + if err != nil { + return err + } + // 1. convert json string to struct + return json.Unmarshal(jsonString, v) +} + +// MarshalIndent serializes interface to string +func (p *jsonParser) MarshalIndent(data interface{}, prefix, indent string) (string, error) { + if data == nil { + return "", nil + } + res, err := json.MarshalIndent(data, prefix, indent) + if err != nil { + return "", err + } + return string(res), nil +} diff --git a/pkg/config/parserfactory.go b/pkg/config/parserfactory.go new file mode 100644 index 0000000000000000000000000000000000000000..d7a74c7f2afd669be2ec44351494f47e3cf0908e --- /dev/null +++ b/pkg/config/parserfactory.go @@ -0,0 +1,46 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-02-01 +// Description: This file contains factory classes for configuring parsers for different languages + +// Package config is used to manage the configuration of rubik +package config + +type ( + // parserType represents the parser type + parserType int8 + // parserFactory is the factory class of the parser + parserFactory struct{} + // ConfigParser is a configuration parser for different languages + ConfigParser interface { + ParseConfig(data []byte) (map[string]interface{}, error) + UnmarshalSubConfig(data interface{}, v interface{}) error + MarshalIndent(v interface{}, prefix, indent string) (string, error) + } +) + +const ( + // JSON represents the json type parser + JSON parserType = iota +) + +// defaultParserFactory is globally unique parser factory +var defaultParserFactory = &parserFactory{} + +// getParser gets parser instance according to the parser type passed in +func (factory *parserFactory) getParser(pType parserType) ConfigParser { + switch pType { + case JSON: + return getJsonParser() + default: + return getJsonParser() + } +} diff --git a/pkg/core/publisher/genericpublisher.go b/pkg/core/publisher/genericpublisher.go new file mode 100644 index 0000000000000000000000000000000000000000..adad52bdd41505d19dca584c3671ed5682b896c3 --- /dev/null +++ b/pkg/core/publisher/genericpublisher.go @@ -0,0 +1,113 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file implements pod publisher + +// Package publisher implement publisher interface +package publisher + +import ( + "fmt" + "sync" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/typedef" +) + +type ( + // subscriberIDs records subscriber's ID + subscriberIDs map[string]struct{} + // NotifyFunc is used to notify subscribers of events + NotifyFunc func(typedef.EventType, typedef.Event) +) + +// defaultPublisher is the default is a globally unique generic publisher entity +var defaultPublisher *genericPublisher + +// genericPublisher is the structure to publish Event +type genericPublisher struct { + sync.RWMutex + // topicSubscribersMap is a collection of subscribers organized by interested topics + topicSubscribersMap map[typedef.EventType]subscriberIDs + // subscribers is the set of notification methods divided by ID + subscribers map[string]NotifyFunc +} + +// newGenericPublisher creates the genericPublisher instance +func newGenericPublisher() *genericPublisher { + pub := &genericPublisher{ + subscribers: make(map[string]NotifyFunc, 0), + topicSubscribersMap: make(map[typedef.EventType]subscriberIDs, 0), + } + return pub +} + +// getGenericPublisher initializes via lazy mode and return generic publisher entity +func getGenericPublisher() *genericPublisher { + if defaultPublisher == nil { + defaultPublisher = newGenericPublisher() + } + return defaultPublisher +} + +// subscriberExisted confirms the existence of the subscriber based on the ID +func (pub *genericPublisher) subscriberExisted(id string) bool { + pub.RLock() + _, ok := pub.subscribers[id] + pub.RUnlock() + return ok +} + +// Subscribe registers a api.Subscriber +func (pub *genericPublisher) Subscribe(s api.Subscriber) error { + id := s.ID() + if pub.subscriberExisted(id) { + return fmt.Errorf("subscriber %v has registered", id) + } + pub.Lock() + for _, topic := range s.TopicsFunc() { + if _, ok := pub.topicSubscribersMap[topic]; !ok { + pub.topicSubscribersMap[topic] = make(subscriberIDs, 0) + } + pub.topicSubscribersMap[topic][id] = struct{}{} + log.Debugf("%s subscribes topic %s", id, topic) + } + pub.subscribers[id] = s.NotifyFunc + pub.Unlock() + return nil +} + +// Unsubscribe unsubscribes the indicated subscriber +func (pub *genericPublisher) Unsubscribe(s api.Subscriber) { + id := s.ID() + if !pub.subscriberExisted(id) { + log.Warnf("subscriber %v has not registered", id) + return + } + pub.Lock() + for _, topic := range s.TopicsFunc() { + delete(pub.topicSubscribersMap[topic], id) + log.Debugf("%s unsubscribes topic %s", id, topic) + } + delete(pub.subscribers, id) + pub.Unlock() +} + +// Publish publishes Event to subscribers interested in specified topic +func (pub *genericPublisher) Publish(eventType typedef.EventType, data typedef.Event) { + log.Debugf("publish %s event", eventType.String()) + pub.RLock() + for id := range pub.topicSubscribersMap[eventType] { + pub.subscribers[id](eventType, data) + } + pub.RUnlock() +} diff --git a/pkg/core/publisher/genericpublisher_test.go b/pkg/core/publisher/genericpublisher_test.go new file mode 100644 index 0000000000000000000000000000000000000000..16cd832035a7cbdd3d35815cc5b59921724ae612 --- /dev/null +++ b/pkg/core/publisher/genericpublisher_test.go @@ -0,0 +1,143 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-03-23 +// Description: This file tests pod publisher + +// Package publisher implement publisher interface +package publisher + +import ( + "testing" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/core/typedef" +) + +// mockSubscriber is used to mock a subscriber +type mockSubscriber struct { + name string +} + +// ID returns the unique ID of the subscriber +func (s *mockSubscriber) ID() string { + return s.name +} + +// NotifyFunc notifys subscriber event +func (s *mockSubscriber) NotifyFunc(eventType typedef.EventType, event typedef.Event) {} + +// TopicsFunc returns the topics that the subscriber is interested in +func (s *mockSubscriber) TopicsFunc() []typedef.EventType { + return []typedef.EventType{typedef.RAWPODADD} +} + +// Test_genericPublisher_Subscribe tests Subscribe of genericPublisher +func Test_genericPublisher_Subscribe(t *testing.T) { + const subID = "ID" + type fields struct { + topicSubscribersMap map[typedef.EventType]subscriberIDs + subscribers map[string]NotifyFunc + } + type args struct { + s api.Subscriber + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "TC1-subscriber existed", + fields: fields{ + topicSubscribersMap: map[typedef.EventType]subscriberIDs{ + typedef.INFOADD: map[string]struct{}{subID: {}}, + }, + subscribers: map[string]NotifyFunc{subID: nil}, + }, + args: args{ + s: &mockSubscriber{name: subID}, + }, + wantErr: true, + }, + { + name: "TC2-subscriber is not existed", + fields: fields{ + topicSubscribersMap: make(map[typedef.EventType]subscriberIDs), + subscribers: make(map[string]NotifyFunc), + }, + args: args{ + s: &mockSubscriber{name: subID}, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pub := newGenericPublisher() + pub.subscribers = tt.fields.subscribers + pub.topicSubscribersMap = tt.fields.topicSubscribersMap + if err := pub.Subscribe(tt.args.s); (err != nil) != tt.wantErr { + t.Errorf("genericPublisher.Subscribe() error = %v, wantErr %v", err, tt.wantErr) + } + pub.Publish(typedef.RAWPODADD, "a") + }) + } +} + +// Test_genericPublisher_Unsubscribe tests Unsubscribe of genericPublisher +func Test_genericPublisher_Unsubscribe(t *testing.T) { + const subID = "ID" + type fields struct { + topicSubscribersMap map[typedef.EventType]subscriberIDs + subscribers map[string]NotifyFunc + } + type args struct { + s api.Subscriber + } + tests := []struct { + name string + fields fields + args args + }{ + { + name: "TC1-subscriber existed", + fields: fields{ + topicSubscribersMap: map[typedef.EventType]subscriberIDs{ + typedef.INFOADD: map[string]struct{}{subID: {}}, + }, + subscribers: map[string]NotifyFunc{subID: nil}, + }, + args: args{ + s: &mockSubscriber{name: subID}, + }, + }, + { + name: "TC2-subscriber is not existed", + fields: fields{ + topicSubscribersMap: make(map[typedef.EventType]subscriberIDs), + subscribers: make(map[string]NotifyFunc), + }, + args: args{ + s: &mockSubscriber{name: subID}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pub := &genericPublisher{ + topicSubscribersMap: tt.fields.topicSubscribersMap, + subscribers: tt.fields.subscribers, + } + pub.Unsubscribe(tt.args.s) + }) + } +} diff --git a/pkg/core/publisher/publisherfactory.go b/pkg/core/publisher/publisherfactory.go new file mode 100644 index 0000000000000000000000000000000000000000..e457b668cd168f0b4b3b296bfd4dfaadd2cc5b57 --- /dev/null +++ b/pkg/core/publisher/publisherfactory.go @@ -0,0 +1,45 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file contains publisher factory + +// Package publisher implement publisher interface +package publisher + +import "isula.org/rubik/pkg/api" + +type publihserType int8 + +const ( + // GENERIC indicates the generic publisher type + GENERIC publihserType = iota +) + +// Factory is the factory class of the publisher entity +type Factory struct { +} + +var publisherFactory = &Factory{} + +// GetPublisherFactory creates a publisher factory instance +func GetPublisherFactory() *Factory { + return publisherFactory +} + +// GetPublisher returns the publisher entity according to the publisher type +func (f *Factory) GetPublisher(publisherType publihserType) api.Publisher { + switch publisherType { + case GENERIC: + return getGenericPublisher() + default: + return nil + } +} diff --git a/pkg/core/publisher/publisherfactory_test.go b/pkg/core/publisher/publisherfactory_test.go new file mode 100644 index 0000000000000000000000000000000000000000..b84fde105ac2e6d212997438c5126c6bb404e1dd --- /dev/null +++ b/pkg/core/publisher/publisherfactory_test.go @@ -0,0 +1,46 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-03-23 +// Description: This file tests publisher factory + +// Package publisher implement publisher interface +package publisher + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestGetPublisherFactory tests GetPublisherFactory +func TestGetPublisherFactory(t *testing.T) { + tests := []struct { + name string + want *Factory + }{ + { + name: "TC1-success", + want: &Factory{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GetPublisherFactory() + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("GetPublisherFactory() = %v, want %v", got, tt.want) + } + assert.NotNil(t, got.GetPublisher(GENERIC)) + const typ publihserType = 100 + assert.Nil(t, got.GetPublisher(typ)) + }) + } +} diff --git a/pkg/core/subscriber/genericsubscriber.go b/pkg/core/subscriber/genericsubscriber.go new file mode 100644 index 0000000000000000000000000000000000000000..5df89d2597e266314e75f3c2e613100d0e98058e --- /dev/null +++ b/pkg/core/subscriber/genericsubscriber.go @@ -0,0 +1,48 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file implements the generic subscriber functionality + +// Package subscriber implements generic subscriber interface +package subscriber + +import ( + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/core/typedef" +) + +type genericSubscriber struct { + id string + api.EventHandler +} + +// NewGenericSubscriber returns the generic subscriber entity +func NewGenericSubscriber(handler api.EventHandler, id string) *genericSubscriber { + return &genericSubscriber{ + id: id, + EventHandler: handler, + } +} + +// ID returns the unique ID of the subscriber +func (pub *genericSubscriber) ID() string { + return pub.id +} + +// NotifyFunc notifys subscriber event +func (pub *genericSubscriber) NotifyFunc(eventType typedef.EventType, event typedef.Event) { + pub.HandleEvent(eventType, event) +} + +// TopicsFunc returns the topics that the subscriber is interested in +func (pub *genericSubscriber) TopicsFunc() []typedef.EventType { + return pub.EventTypes() +} diff --git a/pkg/memory/status_test.go b/pkg/core/subscriber/genericsubscriber_test.go similarity index 31% rename from pkg/memory/status_test.go rename to pkg/core/subscriber/genericsubscriber_test.go index 080307b9cd40308f60a91e21e2f5c7496233708e..65bfe918b02fc9fba654d75cbaf2b3a50b666464 100644 --- a/pkg/memory/status_test.go +++ b/pkg/core/subscriber/genericsubscriber_test.go @@ -1,4 +1,4 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. // rubik licensed under the Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. // You may obtain a copy of Mulan PSL v2 at: @@ -7,79 +7,56 @@ // IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR // PURPOSE. // See the Mulan PSL v2 for more details. -// Author: Yang Feiyu -// Create: 2022-6-7 -// Description: tests for memory status functions +// Author: Jiaqi Yang +// Create: 2023-03-23 +// Description: This file tests the generic subscriber functionality -package memory +// Package subscriber implements generic subscriber interface +package subscriber import ( "testing" "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/core/typedef" ) -func TestSetStatus(t *testing.T) { - s := newStatus() - s.pressureLevel = relieve - s.set(normal) - assert.Equal(t, s.pressureLevel, normal) - assert.Equal(t, s.relieveCnt, 0) -} +type mockEventHandler struct{} -func TestIsNormal(t *testing.T) { - s := newStatus() - s.pressureLevel = relieve - assert.Equal(t, s.isNormal(), false) - s.set(normal) - assert.Equal(t, s.isNormal(), true) - assert.Equal(t, s.relieveCnt, 0) -} +// HandleEvent handles the event from publisher +func (h *mockEventHandler) HandleEvent(eventType typedef.EventType, event typedef.Event) {} -func TestIsRelieve(t *testing.T) { - s := newStatus() - s.pressureLevel = relieve - assert.Equal(t, s.isRelieve(), true) +// EventTypes returns the intersted event types +func (h *mockEventHandler) EventTypes() []typedef.EventType { + return nil } -func TestGetLevelInPressure(t *testing.T) { +// TestNewGenericSubscriber tests NewGenericSubscriber +func TestNewGenericSubscriber(t *testing.T) { + type args struct { + handler api.EventHandler + id string + } tests := []struct { - freePercentage float64 - level levelInt + name string + args args }{ { - freePercentage: 0.04, - level: critical, - }, - { - freePercentage: 0.09, - level: high, - }, - { - freePercentage: 0.13, - level: mid, - }, - { - freePercentage: 0.25, - level: low, + name: "TC1-NewGenericSubscriber/ID/NotifyFunc/TopicsFunc", + args: args{ + handler: &mockEventHandler{}, + id: "rubik", + }, }, } - for _, tt := range tests { - tmp := getLevelInPressure(tt.freePercentage) - assert.Equal(t, tmp, tt.level) + t.Run(tt.name, func(t *testing.T) { + got := NewGenericSubscriber(tt.args.handler, tt.args.id) + assert.Equal(t, "rubik", got.ID()) + got.NotifyFunc(typedef.INFOADD, nil) + got.TopicsFunc() + }) } } - -func TestTransitionStatus(t *testing.T) { - s := newStatus() - s.transitionStatus(0.04) - assert.Equal(t, s.pressureLevel, critical) - - s.transitionStatus(0.6) - assert.Equal(t, s.pressureLevel, relieve) - s.relieveCnt = relieveMaxCnt - - s.transitionStatus(0.6) - assert.Equal(t, s.pressureLevel, normal) -} diff --git a/pkg/core/typedef/cgroup/common.go b/pkg/core/typedef/cgroup/common.go new file mode 100644 index 0000000000000000000000000000000000000000..237015a6f89e0d7ba6bd37cc42149a733bca3b5f --- /dev/null +++ b/pkg/core/typedef/cgroup/common.go @@ -0,0 +1,205 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file defines cgroupAttr and CgroupKey + +// Package cgroup uses map to manage cgroup parameters and provides a friendly and simple cgroup usage method +package cgroup + +import ( + "fmt" + "path/filepath" + "strconv" + "strings" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" +) + +var rootDir = constant.DefaultCgroupRoot + +// AbsoluteCgroupPath returns the absolute path of the cgroup +func AbsoluteCgroupPath(elem ...string) string { + elem = append([]string{rootDir}, elem...) + return filepath.Join(elem...) +} + +// ReadCgroupFile reads data from cgroup files +func ReadCgroupFile(elem ...string) ([]byte, error) { + return readCgroupFile(filepath.Join(AbsoluteCgroupPath(elem...))) +} + +// WriteCgroupFile writes data to cgroup file +func WriteCgroupFile(content string, elem ...string) error { + return writeCgroupFile(AbsoluteCgroupPath(elem...), content) +} + +func readCgroupFile(cgPath string) ([]byte, error) { + if !util.PathExist(cgPath) { + return nil, fmt.Errorf("%v: no such file or diretory", cgPath) + } + return util.ReadFile(cgPath) +} + +func writeCgroupFile(cgPath, content string) error { + if !util.PathExist(cgPath) { + return fmt.Errorf("%v: no such file or diretory", cgPath) + } + return util.WriteFile(cgPath, content) +} + +// InitMountDir sets the mount directory of the cgroup file system +func InitMountDir(arg string) { + rootDir = arg +} + +// GetMountDir returns the mount point path of the cgroup +func GetMountDir() string { + return rootDir +} + +type ( + // Key uniquely determines the cgroup value of the container or pod + Key struct { + // SubSys refers to the subsystem of the cgroup + SubSys string + // FileName represents the cgroup file name + FileName string + } + // Attr represents a single cgroup attribute, and Err represents whether the Value is available + Attr struct { + Value string + Err error + } + // SetterAndGetter is used for set and get value to/from cgroup file + SetterAndGetter interface { + SetCgroupAttr(*Key, string) error + GetCgroupAttr(*Key) *Attr + } +) + +// Expect judges whether Attr is consistent with the input +func (attr *Attr) Expect(arg interface{}) error { + if attr.Err != nil { + return attr.Err + } + + switch arg := arg.(type) { + case int: + value, err := attr.Int() + if err != nil { + return fmt.Errorf("fail to convert: %v", err) + } + if value != arg { + return fmt.Errorf("%v is not equal to %v", value, arg) + } + case string: + if attr.Value != arg { + return fmt.Errorf("%v is not equal to %v", attr.Value, arg) + } + case int64: + value, err := attr.Int64() + if err != nil { + return fmt.Errorf("fail to convert: %v", err) + } + if value != arg { + return fmt.Errorf("%v is not equal to %v", value, arg) + } + default: + return fmt.Errorf("invalid expect type: %T", arg) + } + return nil +} + +// Int64 parses CgroupAttr as int64 type +func (attr *Attr) Int64() (int64, error) { + if attr.Err != nil { + return 0, attr.Err + } + return util.ParseInt64(attr.Value) +} + +// Int parses CgroupAttr as int type +func (attr *Attr) Int() (int, error) { + if attr.Err != nil { + return 0, attr.Err + } + return strconv.Atoi(attr.Value) +} + +// Int64Map parses CgroupAttr64 as map[string]int64 type +func (attr *Attr) Int64Map() (map[string]int64, error) { + if attr.Err != nil { + return nil, attr.Err + } + return util.ParseInt64Map(attr.Value) +} + +// CPUStat parses CgroupAttr64 as CPUStat type +func (attr *Attr) CPUStat() (*CPUStat, error) { + if attr.Err != nil { + return nil, attr.Err + } + return NewCPUStat(attr.Value) +} + +// Hierarchy is used to represent a cgroup path +type Hierarchy struct { + MountPoint string `json:"mountPoint,omitempty"` + Path string `json:"cgroupPath"` +} + +// NewHierarchy creates a Hierarchy instance +func NewHierarchy(mountPoint, path string) *Hierarchy { + return &Hierarchy{ + MountPoint: mountPoint, + Path: path, + } +} + +// SetCgroupAttr sets value to the cgroup file +func (h *Hierarchy) SetCgroupAttr(key *Key, value string) error { + if err := validateCgroupKey(key); err != nil { + return err + } + var mountPoint = rootDir + if len(h.MountPoint) > 0 { + mountPoint = h.MountPoint + } + return writeCgroupFile(filepath.Join(mountPoint, key.SubSys, h.Path, key.FileName), value) +} + +// GetCgroupAttr gets cgroup file content +func (h *Hierarchy) GetCgroupAttr(key *Key) *Attr { + if err := validateCgroupKey(key); err != nil { + return &Attr{Err: err} + } + var mountPoint = rootDir + if len(h.MountPoint) > 0 { + mountPoint = h.MountPoint + } + data, err := readCgroupFile(filepath.Join(mountPoint, key.SubSys, h.Path, key.FileName)) + if err != nil { + return &Attr{Err: err} + } + return &Attr{Value: strings.TrimSpace(string(data)), Err: nil} +} + +// validateCgroupKey is used to verify the validity of the cgroup key +func validateCgroupKey(key *Key) error { + if key == nil { + return fmt.Errorf("key cannot be empty") + } + if len(key.SubSys) == 0 || len(key.FileName) == 0 { + return fmt.Errorf("invalid key") + } + return nil +} diff --git a/pkg/core/typedef/cgroup/common_test.go b/pkg/core/typedef/cgroup/common_test.go new file mode 100644 index 0000000000000000000000000000000000000000..6138594e2a81fd018daebc6eb26dba583df9efac --- /dev/null +++ b/pkg/core/typedef/cgroup/common_test.go @@ -0,0 +1,632 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-03-23 +// Description: This file test common function of cgroup + +// Package cgroup uses map to manage cgroup parameters and provides a friendly and simple cgroup usage method +package cgroup + +import ( + "fmt" + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" +) + +// TestReadCgroupFile tests ReadCgroupFile +func TestReadCgroupFile(t *testing.T) { + InitMountDir(constant.TmpTestDir) + defer InitMountDir(constant.DefaultCgroupRoot) + pathElems := []string{"cpu", "kubepods", "PodXXX", "ContYYY", "cpu.cfs_quota_us"} + const value = "-1\n" + tests := []struct { + name string + args []string + pre func(t *testing.T) + post func(t *testing.T) + want []byte + wantErr bool + }{ + { + name: "TC1-non existed path", + args: pathElems, + wantErr: true, + want: nil, + }, + { + name: "TC2-successfully", + args: pathElems, + pre: func(t *testing.T) { + assert.NoError(t, util.WriteFile( + filepath.Join(constant.TmpTestDir, filepath.Join(pathElems...)), + value)) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + wantErr: false, + want: []byte(value), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.pre != nil { + tt.pre(t) + } + got, err := ReadCgroupFile(tt.args...) + if (err != nil) != tt.wantErr { + t.Errorf("ReadCgroupFile() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("ReadCgroupFile() = %v, want %v", got, tt.want) + } + if tt.post != nil { + tt.post(t) + } + }) + } +} + +// TestWriteCgroupFile tests WriteCgroupFile +func TestWriteCgroupFile(t *testing.T) { + InitMountDir(constant.TmpTestDir) + defer InitMountDir(constant.DefaultCgroupRoot) + pathElems := []string{"cpu", "kubepods", "PodXXX", "ContYYY", "cpu.cfs_quota_us"} + const value = "-1\n" + type args struct { + content string + elem []string + } + tests := []struct { + name string + args args + wantErr bool + pre func(t *testing.T) + post func(t *testing.T) + }{ + { + name: "TC1-non existed path", + args: args{ + content: value, + elem: pathElems, + }, + wantErr: true, + }, + { + name: "TC2-successfully", + args: args{ + content: value, + elem: pathElems, + }, + pre: func(t *testing.T) { + assert.NoError(t, util.WriteFile( + filepath.Join(constant.TmpTestDir, filepath.Join(pathElems...)), + value)) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.pre != nil { + tt.pre(t) + } + if err := WriteCgroupFile(tt.args.content, tt.args.elem...); (err != nil) != tt.wantErr { + t.Errorf("WriteCgroupFile() error = %v, wantErr %v", err, tt.wantErr) + } + if tt.post != nil { + tt.post(t) + } + }) + } +} + +// TestAttr_Expect tests Expect of Attr +func TestAttr_Expect(t *testing.T) { + const ( + intValue int = 1 + int64Value int64 = 1 + stringValue string = "rubik" + float64Value float64 = 1.0 + ) + type fields struct { + Value string + Err error + } + type args struct { + arg interface{} + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "TC1-attribute error", + fields: fields{ + Value: "", + Err: fmt.Errorf("fail to get value"), + }, + wantErr: true, + }, + { + name: "TC2.1-expect int 1(parse fail)", + fields: fields{ + Value: "", + }, + args: args{ + arg: intValue, + }, + wantErr: true, + }, + { + name: "TC2.2-expect int 1(not equal value)", + fields: fields{ + Value: "100", + }, + args: args{ + arg: intValue, + }, + wantErr: true, + }, + { + name: "TC2.3-expect int 1(success)", + fields: fields{ + Value: "1", + }, + args: args{ + arg: intValue, + }, + wantErr: false, + }, + { + name: "TC3.1-expect int64 1(parse fail)", + fields: fields{ + Value: "", + }, + args: args{ + arg: int64Value, + }, + wantErr: true, + }, + { + name: "TC3.2-expect int64 1(not equal value)", + fields: fields{ + Value: "100", + }, + args: args{ + arg: int64Value, + }, + wantErr: true, + }, + { + name: "TC3.3-expect int64 1(success)", + fields: fields{ + Value: "1", + }, + args: args{ + arg: int64Value, + }, + wantErr: false, + }, + { + name: "TC4.1-expect string rubik(not equal value)", + fields: fields{ + Value: "-1", + }, + args: args{ + arg: stringValue, + }, + wantErr: true, + }, + { + name: "TC4.2-expect string rubik(success)", + fields: fields{ + Value: stringValue, + }, + args: args{ + arg: stringValue, + }, + wantErr: false, + }, + { + name: "TC5-expect float64(undefined type)", + args: args{ + arg: float64Value, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + attr := &Attr{ + Value: tt.fields.Value, + Err: tt.fields.Err, + } + if err := attr.Expect(tt.args.arg); (err != nil) != tt.wantErr { + t.Errorf("Attr.Expect() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestAttr_Int64 tests Int64 of Attr +func TestAttr_Int64(t *testing.T) { + const int64Value int64 = 1 + type fields struct { + Value string + Err error + } + tests := []struct { + name string + fields fields + want int64 + wantErr bool + }{ + { + name: "TC1-attribute error", + fields: fields{ + Err: fmt.Errorf("fail to get value"), + }, + wantErr: true, + }, + { + name: "TC2-expect int64 1(success)", + fields: fields{ + Value: "1", + }, + want: int64Value, + wantErr: false, + }, + { + name: "TC3-expect int64 1(error parse)", + fields: fields{ + Value: "rubik", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + attr := &Attr{ + Value: tt.fields.Value, + Err: tt.fields.Err, + } + got, err := attr.Int64() + if (err != nil) != tt.wantErr { + t.Errorf("Attr.Int64() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("Attr.Int64() = %v, want %v", got, tt.want) + } + }) + } +} + +// TestAttr_Int tests Int of Attr +func TestAttr_Int(t *testing.T) { + const intValue int = 1 + type fields struct { + Value string + Err error + } + tests := []struct { + name string + fields fields + want int + wantErr bool + }{ + { + name: "TC1-attribute error", + fields: fields{ + Err: fmt.Errorf("fail to get value"), + }, + wantErr: true, + }, + { + name: "TC2-expect int 1(success)", + fields: fields{ + Value: "1", + }, + want: intValue, + wantErr: false, + }, + { + name: "TC3-expect int 1(error parse)", + fields: fields{ + Value: "rubik", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + attr := &Attr{ + Value: tt.fields.Value, + Err: tt.fields.Err, + } + got, err := attr.Int() + if (err != nil) != tt.wantErr { + t.Errorf("Attr.Int() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("Attr.Int() = %v, want %v", got, tt.want) + } + }) + } +} + +// TestAttr_Int64Map tests Int64Map of Attr +func TestAttr_Int64Map(t *testing.T) { + m := map[string]int64{"a": 1, "b": 2} + type fields struct { + Value string + Err error + } + tests := []struct { + name string + fields fields + want map[string]int64 + wantErr bool + }{ + + { + name: "TC1-attribute error", + fields: fields{ + Err: fmt.Errorf("fail to get value"), + }, + wantErr: true, + }, + { + name: "TC2-expect int 1(success)", + fields: fields{ + Value: `a 1 + b 2`, + }, + want: m, + wantErr: false, + }, + { + name: "TC3-expect int 1(error parse)", + fields: fields{ + Value: "rubik", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + attr := &Attr{ + Value: tt.fields.Value, + Err: tt.fields.Err, + } + got, err := attr.Int64Map() + if (err != nil) != tt.wantErr { + t.Errorf("Attr.Int64Map() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Attr.Int64Map() = %v, want %v", got, tt.want) + } + }) + } +} + +// TestAttr_CPUStat tests CPUStat of Attr +func TestAttr_CPUStat(t *testing.T) { + res := &CPUStat{ + NrPeriods: 1, + NrThrottled: 1, + ThrottledTime: 1, + } + type fields struct { + Value string + Err error + } + tests := []struct { + name string + fields fields + want *CPUStat + wantErr bool + }{ + { + name: "TC1-attribute error", + fields: fields{ + Err: fmt.Errorf("fail to get value"), + }, + wantErr: true, + }, + { + name: "TC2-expect int 1(success)", + fields: fields{ + Value: `nr_periods 1 + nr_throttled 1 + throttled_time 1`, + }, + want: res, + wantErr: false, + }, + { + name: "TC3-expect int 1(error parse)", + fields: fields{ + Value: "rubik", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + attr := &Attr{ + Value: tt.fields.Value, + Err: tt.fields.Err, + } + got, err := attr.CPUStat() + if (err != nil) != tt.wantErr { + t.Errorf("Attr.CPUStat() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Attr.CPUStat() = %v, want %v", got, tt.want) + } + }) + } +} + +// TestHierarchy_SetCgroupAttr tests SetCgroupAttr of Hierarchy +func TestHierarchy_SetCgroupAttr(t *testing.T) { + type args struct { + key *Key + value string + } + tests := []struct { + name string + path string + args args + wantErr bool + }{ + { + name: "TC1.1-empty key", + args: args{}, + wantErr: true, + }, + { + name: "TC1.2-empty Subsys", + args: args{ + key: &Key{}, + }, + wantErr: true, + }, + { + name: "TC2-", + args: args{ + key: &Key{ + SubSys: "cpu", + FileName: "cpu.cfs_quota_us", + }, + value: "1", + }, + path: "kubepods/PodXXX/ContXXX", + wantErr: true, + }, + } + defer os.RemoveAll(constant.TmpTestDir) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + h := NewHierarchy(constant.TmpTestDir, tt.path) + if err := h.SetCgroupAttr(tt.args.key, tt.args.value); (err != nil) != tt.wantErr { + t.Errorf("Hierarchy.SetCgroupAttr() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestHierarchy_GetCgroupAttr tests GetCgroupAttr of Hierarchy +func TestHierarchy_GetCgroupAttr(t *testing.T) { + const ( + contPath = "kubepods/PodXXX/ContXXX" + value = " 1\n" + ) + var quotaKey = &Key{ + SubSys: "cpu", + FileName: "cpu.cfs_quota_us", + } + tests := []struct { + name string + path string + args *Key + want string + wantErr bool + pre func(t *testing.T) + post func(t *testing.T) + }{ + { + name: "TC1.1-empty key", + args: nil, + wantErr: true, + }, + { + name: "TC2-empty path", + args: quotaKey, + path: contPath, + wantErr: true, + }, + { + name: "TC3-success", + args: quotaKey, + path: contPath, + pre: func(t *testing.T) { + assert.NoError(t, util.WriteFile( + filepath.Join(constant.TmpTestDir, quotaKey.SubSys, contPath, quotaKey.FileName), + value)) + }, + post: func(t *testing.T) { + assert.NoError(t, os.RemoveAll(constant.TmpTestDir)) + }, + want: "1", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + h := NewHierarchy(constant.TmpTestDir, tt.path) + if tt.pre != nil { + tt.pre(t) + } + got := h.GetCgroupAttr(tt.args) + if (got.Err != nil) != tt.wantErr { + t.Errorf("Hierarchy.GetCgroupAttr() error = %v, wantErr %v", got.Err, tt.wantErr) + } + if got.Err == nil { + if got.Value != tt.want { + t.Errorf("Hierarchy.GetCgroupAttr() = %v, want %v", got, tt.want) + } + } + if tt.post != nil { + tt.post(t) + } + }) + } +} + +// TestAbsoluteCgroupPath tests AbsoluteCgroupPath +func TestAbsoluteCgroupPath(t *testing.T) { + tests := []struct { + name string + args []string + want string + }{ + { + name: "TC1-AbsoluteCgroupPath", + args: []string{"a", "b"}, + want: GetMountDir() + "/a/b", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := AbsoluteCgroupPath(tt.args...); got != tt.want { + t.Errorf("AbsoluteCgroupPath() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/sync/sync.go b/pkg/core/typedef/cgroup/cpu.go similarity index 40% rename from pkg/sync/sync.go rename to pkg/core/typedef/cgroup/cpu.go index aa063ea6ddd3aff2190635ecf30fb24fe566013d..0104b8cea7ee4a590a939c1346488db9a858c579 100644 --- a/pkg/sync/sync.go +++ b/pkg/core/typedef/cgroup/cpu.go @@ -7,40 +7,38 @@ // IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR // PURPOSE. // See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2021-04-20 -// Description: qos setting sync +// Author: Jiaqi Yang +// Date: 2023-02-11 +// Description: This file provides the relevant data structures and methods of the cgroup cpu subsystem -package sync +package cgroup -import ( - "isula.org/rubik/pkg/cachelimit" - "isula.org/rubik/pkg/qos" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" -) +import "isula.org/rubik/pkg/common/util" -// Sync qos setting -func Sync(pods map[string]*typedef.PodInfo) error { - for _, pod := range pods { - if err := qos.SetQosLevel(pod); err != nil { - log.Errorf("sync set pod %v qoslevel error: %v", pod.UID, err) - } - if cachelimit.ClEnabled() { - syncCache(pod) - } +type ( + // CPUStat save the cpu.stat data + CPUStat struct { + NrPeriods int64 + NrThrottled int64 + ThrottledTime int64 } +) - return nil -} - -func syncCache(pi *typedef.PodInfo) { - err := cachelimit.SyncLevel(pi) +// NewCPUStat creates a new MPStat object and returns its pointer +func NewCPUStat(data string) (*CPUStat, error) { + const ( + throttlePeriodNumFieldName = "nr_periods" + throttleNumFieldName = "nr_throttled" + throttleTimeFieldName = "throttled_time" + ) + stringInt64Map, err := util.ParseInt64Map(data) if err != nil { - log.Errorf("sync pod %v level error: %v", pi.UID, err) - return - } - if err = cachelimit.SetCacheLimit(pi); err != nil { - log.Errorf("sync pod %v cache limit error: %v", pi.UID, err) + return nil, err } + + return &CPUStat{ + NrPeriods: stringInt64Map[throttlePeriodNumFieldName], + NrThrottled: stringInt64Map[throttleNumFieldName], + ThrottledTime: stringInt64Map[throttleTimeFieldName], + }, nil } diff --git a/pkg/core/typedef/containerinfo.go b/pkg/core/typedef/containerinfo.go new file mode 100644 index 0000000000000000000000000000000000000000..39cb2cc01359e6c44e1004e63550ba230223f78b --- /dev/null +++ b/pkg/core/typedef/containerinfo.go @@ -0,0 +1,100 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file defines ContainerInfo + +// Package typedef defines core struct and methods for rubik +package typedef + +import ( + "path/filepath" + "strings" + "sync" + + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +// ContainerEngineType indicates the type of container engine +type ContainerEngineType int8 + +const ( + // UNDEFINED means undefined container engine + UNDEFINED ContainerEngineType = iota + // DOCKER means docker container engine + DOCKER + // CONTAINERD means containerd container engine + CONTAINERD +) + +var ( + supportEnginesPrefixMap = map[ContainerEngineType]string{ + DOCKER: "docker://", + CONTAINERD: "containerd://", + } + currentContainerEngines = UNDEFINED + setContainerEnginesOnce sync.Once +) + +// Support returns true when the container uses the container engine +func (engine *ContainerEngineType) Support(cont *RawContainer) bool { + if *engine == UNDEFINED { + return false + } + return strings.HasPrefix(cont.status.ContainerID, engine.Prefix()) +} + +// Prefix returns the ID prefix of the container engine +func (engine *ContainerEngineType) Prefix() string { + prefix, ok := supportEnginesPrefixMap[*engine] + if !ok { + return "" + } + return prefix +} + +// ContainerInfo contains the interested information of container +type ContainerInfo struct { + cgroup.Hierarchy + Name string `json:"name"` + ID string `json:"id"` + RequestResources ResourceMap `json:"requests,omitempty"` + LimitResources ResourceMap `json:"limits,omitempty"` +} + +// NewContainerInfo creates a ContainerInfo instance +func NewContainerInfo(id, podCgroupPath string, rawContainer *RawContainer) *ContainerInfo { + requests, limits := rawContainer.GetResourceMaps() + return &ContainerInfo{ + Name: rawContainer.status.Name, + ID: id, + Hierarchy: cgroup.Hierarchy{Path: filepath.Join(podCgroupPath, id)}, + RequestResources: requests, + LimitResources: limits, + } +} + +func fixContainerEngine(containerID string) { + for engine, prefix := range supportEnginesPrefixMap { + if strings.HasPrefix(containerID, prefix) { + currentContainerEngines = engine + return + } + } + currentContainerEngines = UNDEFINED +} + +// DeepCopy returns deepcopy object. +func (cont *ContainerInfo) DeepCopy() *ContainerInfo { + copyObject := *cont + copyObject.LimitResources = cont.LimitResources.DeepCopy() + copyObject.RequestResources = cont.RequestResources.DeepCopy() + return ©Object +} diff --git a/pkg/core/typedef/event.go b/pkg/core/typedef/event.go new file mode 100644 index 0000000000000000000000000000000000000000..ee9d886ce6b167a0d3f0b7afaa52b850947f72cb --- /dev/null +++ b/pkg/core/typedef/event.go @@ -0,0 +1,59 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file defines event type and event + +// Package typedef defines core struct and methods for rubik +package typedef + +type ( + // EventType is the type of event published by generic publisher + EventType int8 + // Event is the event published by generic publisher + Event interface{} +) + +const ( + // RAWPODADD means Kubernetes starts a new Pod event + RAWPODADD EventType = iota + // RAWPODUPDATE means Kubernetes updates Pod event + RAWPODUPDATE + // RAWPODDELETE means Kubernetes deletes Pod event + RAWPODDELETE + // INFOADD means PodManager adds pod information event + INFOADD + // INFOUPDATE means PodManager updates pod information event + INFOUPDATE + // INFODELETE means PodManager deletes pod information event + INFODELETE + // RAWPODSYNCALL means Full amount of kubernetes pods + RAWPODSYNCALL +) + +const undefinedType = "undefined" + +var eventTypeToString = map[EventType]string{ + RAWPODADD: "addrawpod", + RAWPODUPDATE: "updaterawpod", + RAWPODDELETE: "deleterawpod", + INFOADD: "addinfo", + INFOUPDATE: "updateinfo", + INFODELETE: "deleteinfo", + RAWPODSYNCALL: "syncallrawpods", +} + +// String returns the string of the current event type +func (t EventType) String() string { + if str, ok := eventTypeToString[t]; ok { + return str + } + return undefinedType +} diff --git a/pkg/core/typedef/podinfo.go b/pkg/core/typedef/podinfo.go new file mode 100644 index 0000000000000000000000000000000000000000..907f02b0f62e8d2048fb145d0708b513ce0883d4 --- /dev/null +++ b/pkg/core/typedef/podinfo.go @@ -0,0 +1,74 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file defines podInfo + +// Package typedef defines core struct and methods for rubik +package typedef + +import ( + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +// PodInfo represents pod +type PodInfo struct { + cgroup.Hierarchy + Name string `json:"name"` + UID string `json:"uid"` + Namespace string `json:"namespace"` + IDContainersMap map[string]*ContainerInfo `json:"containers,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` +} + +// NewPodInfo creates the PodInfo instance +func NewPodInfo(pod *RawPod) *PodInfo { + return &PodInfo{ + Name: pod.Name, + Namespace: pod.Namespace, + UID: pod.ID(), + Hierarchy: cgroup.Hierarchy{Path: pod.CgroupPath()}, + IDContainersMap: pod.ExtractContainerInfos(), + Annotations: pod.DeepCopy().Annotations, + } +} + +// DeepCopy returns deepcopy object +func (pod *PodInfo) DeepCopy() *PodInfo { + if pod == nil { + return nil + } + var ( + contMap map[string]*ContainerInfo + annoMap map[string]string + ) + // nil is different from empty value in golang + if pod.IDContainersMap != nil { + contMap = make(map[string]*ContainerInfo) + for id, cont := range pod.IDContainersMap { + contMap[id] = cont.DeepCopy() + } + } + if pod.Annotations != nil { + annoMap = make(map[string]string) + for k, v := range pod.Annotations { + annoMap[k] = v + } + } + + return &PodInfo{ + Name: pod.Name, + UID: pod.UID, + Hierarchy: pod.Hierarchy, + Namespace: pod.Namespace, + Annotations: annoMap, + IDContainersMap: contMap, + } +} diff --git a/pkg/core/typedef/podinfo_test.go b/pkg/core/typedef/podinfo_test.go new file mode 100644 index 0000000000000000000000000000000000000000..26e314f7be52091ad3c9418fa44e089ddc86828a --- /dev/null +++ b/pkg/core/typedef/podinfo_test.go @@ -0,0 +1,84 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-14 +// Description: This file tests podInfo + +// Package typedef defines core struct and methods for rubik +package typedef + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "isula.org/rubik/pkg/common/constant" +) + +func TestPodInfo_DeepCopy(t *testing.T) { + const ( + oldPodName = "FooPod" + newPodName = "NewFooPod" + oldPodID = "testPod1" + newPodID = "newTestPod1" + oldQuota = "true" + newQuota = "false" + oldContName = "FooCon" + newContName = "NewFooPod" + oldReqCPU float64 = 1.2 + newReqCPU float64 = 2.7 + oldReqMem float64 = 500 + newReqMem float64 = 350 + contID = "testCon1" + oldLimitCPU float64 = 9.0 + oldLimitMem float64 = 300 + ) + oldPod := &PodInfo{ + Name: oldPodName, + UID: oldPodID, + Annotations: map[string]string{ + constant.QuotaAnnotationKey: oldQuota, + constant.PriorityAnnotationKey: "true", + }, + IDContainersMap: map[string]*ContainerInfo{ + contID: { + Name: oldContName, + RequestResources: ResourceMap{ResourceCPU: oldReqCPU, ResourceMem: oldReqMem}, + LimitResources: ResourceMap{ResourceCPU: 9.0, ResourceMem: 300}, + }, + }, + } + copyPod := oldPod.DeepCopy() + copyPod.Name = newContName + copyPod.UID = newPodID + copyPod.Annotations[constant.QuotaAnnotationKey] = newQuota + copyPod.IDContainersMap[contID].Name = newContName + copyPod.IDContainersMap[contID].RequestResources[ResourceCPU] = newReqCPU + copyPod.IDContainersMap[contID].RequestResources[ResourceMem] = newReqMem + + assert.Equal(t, oldPodName, oldPod.Name) + assert.Equal(t, oldPodID, oldPod.UID) + assert.Equal(t, oldContName, oldPod.IDContainersMap[contID].Name) + assert.Equal(t, oldQuota, oldPod.Annotations[constant.QuotaAnnotationKey]) + assert.Equal(t, oldReqCPU, oldPod.IDContainersMap[contID].RequestResources[ResourceCPU]) + assert.Equal(t, oldReqMem, oldPod.IDContainersMap[contID].RequestResources[ResourceMem]) + assert.Equal(t, oldLimitCPU, oldPod.IDContainersMap[contID].LimitResources[ResourceCPU]) + assert.Equal(t, oldLimitMem, oldPod.IDContainersMap[contID].LimitResources[ResourceMem]) + + oldNilMapPod := &PodInfo{ + Name: oldPodName, + UID: oldPodID, + Annotations: map[string]string{ + constant.QuotaAnnotationKey: oldQuota, + }, + } + copyPod = oldNilMapPod.DeepCopy() + assert.Equal(t, copyPod, oldNilMapPod) + +} diff --git a/pkg/core/typedef/rawpod.go b/pkg/core/typedef/rawpod.go new file mode 100644 index 0000000000000000000000000000000000000000..59dfb595cef7316d89b0f78fc28e396123af9a1f --- /dev/null +++ b/pkg/core/typedef/rawpod.go @@ -0,0 +1,211 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file defines RawPod which encapsulate kubernetes pods + +// Package typedef defines core struct and methods for rubik +package typedef + +import ( + "fmt" + "path/filepath" + "strings" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + + "isula.org/rubik/pkg/common/constant" +) + +const ( + configHashAnnotationKey = "kubernetes.io/config.hash" + // RUNNING means the Pod is in the running phase + RUNNING = corev1.PodRunning +) + +type ( + // RawContainer is kubernetes contaienr structure + RawContainer struct { + /* + The container information of kubernetes will be stored in pod.Status.ContainerStatuses + and pod.Spec.Containers respectively. + The container ID information is stored in ContainerStatuses. + Currently our use of container information is limited to ID and Name, + so we only implemented a simple RawContainer structure. + You can continue to expand RawContainer in the future, + such as saving the running state of the container. + */ + status corev1.ContainerStatus + spec corev1.Container + } + // RawPod represents kubernetes pod structure + RawPod corev1.Pod + // ResourceType indicates the resource type, such as memory or CPU + ResourceType uint8 + // ResourceMap represents the available value of a certain type of resource + ResourceMap map[ResourceType]float64 +) + +const ( + // ResourceCPU indicates CPU resources + ResourceCPU ResourceType = iota + // ResourceMem represents memory resources + ResourceMem +) + +// ExtractPodInfo returns podInfo from RawPod +func (pod *RawPod) ExtractPodInfo() *PodInfo { + if pod == nil { + return nil + } + return NewPodInfo(pod) +} + +// Running returns true when pod is in the running phase +func (pod *RawPod) Running() bool { + if pod == nil { + return false + } + return pod.Status.Phase == RUNNING +} + +// ID returns the unique identity of pod +func (pod *RawPod) ID() string { + if pod == nil { + return "" + } + return string(pod.UID) +} + +// CgroupPath returns cgroup path of raw pod +func (pod *RawPod) CgroupPath() string { + id := string(pod.UID) + if configHash := pod.Annotations[configHashAnnotationKey]; configHash != "" { + id = configHash + } + + qosClassPath := "" + switch pod.Status.QOSClass { + case corev1.PodQOSGuaranteed: + case corev1.PodQOSBurstable: + qosClassPath = strings.ToLower(string(corev1.PodQOSBurstable)) + case corev1.PodQOSBestEffort: + qosClassPath = strings.ToLower(string(corev1.PodQOSBestEffort)) + default: + return "" + } + /* + example: + 1. Burstable: pod requests are less than the value of limits and not 0; + kubepods/burstable/pod34152897-dbaf-11ea-8cb9-0653660051c3 + 2. BestEffort: pod requests and limits are both 0; + kubepods/bestEffort/pod34152897-dbaf-11ea-8cb9-0653660051c3 + 3. Guaranteed: pod requests are equal to the value set by limits; + kubepods/pod34152897-dbaf-11ea-8cb9-0653660051c3 + */ + return filepath.Join(constant.KubepodsCgroup, qosClassPath, constant.PodCgroupNamePrefix+id) +} + +// ListRawContainers returns all RawContainers in the RawPod +func (pod *RawPod) ListRawContainers() map[string]*RawContainer { + if pod == nil { + return nil + } + var nameRawContainersMap = make(map[string]*RawContainer) + for _, containerStatus := range pod.Status.ContainerStatuses { + // Since corev1.Container only exists the container name, use Name as the unique key + nameRawContainersMap[containerStatus.Name] = &RawContainer{ + status: containerStatus, + } + } + for _, container := range pod.Spec.Containers { + cont, ok := nameRawContainersMap[container.Name] + if !ok { + continue + } + cont.spec = container + } + return nameRawContainersMap +} + +// ExtractContainerInfos returns container information from Pod +func (pod *RawPod) ExtractContainerInfos() map[string]*ContainerInfo { + var idContainersMap = make(map[string]*ContainerInfo, 0) + // 1. get list of raw containers + nameRawContainersMap := pod.ListRawContainers() + if len(nameRawContainersMap) == 0 { + return idContainersMap + } + + // 2. generate ID-Container mapping + podCgroupPath := pod.CgroupPath() + for _, rawContainer := range nameRawContainersMap { + id, err := rawContainer.GetRealContainerID() + if id == "" || err != nil { + continue + } + idContainersMap[id] = NewContainerInfo(id, podCgroupPath, rawContainer) + } + return idContainersMap +} + +// GetRealContainerID parses the containerID of k8s +func (cont *RawContainer) GetRealContainerID() (string, error) { + /* + Note: + An UNDEFINED container engine was used when the function was executed for the first time + it seems unlikely to support different container engines at runtime, + So we don't consider the case of midway container engine changes + `fixContainerEngine` is only executed when `getRealContainerID` is called for the first time + */ + setContainerEnginesOnce.Do(func() { fixContainerEngine(cont.status.ContainerID) }) + + if !currentContainerEngines.Support(cont) { + return "", fmt.Errorf("fatal error : unsupported container engine") + } + + cid := cont.status.ContainerID[len(currentContainerEngines.Prefix()):] + // the container may be in the creation or deletion phase. + if len(cid) == 0 { + return "", nil + } + return cid, nil +} + +// GetResourceMaps returns the number of requests and limits of CPU and memory resources +func (cont *RawContainer) GetResourceMaps() (ResourceMap, ResourceMap) { + const milli float64 = 1000 + var ( + // high precision + converter = func(value *resource.Quantity) float64 { + return float64(value.MilliValue()) / milli + } + iterator = func(resourceItems *corev1.ResourceList) ResourceMap { + results := make(ResourceMap) + results[ResourceCPU] = converter(resourceItems.Cpu()) + results[ResourceMem] = converter(resourceItems.Memory()) + return results + } + ) + return iterator(&cont.spec.Resources.Requests), iterator(&cont.spec.Resources.Limits) +} + +// DeepCopy returns the deep copy object of ResourceMap +func (m ResourceMap) DeepCopy() ResourceMap { + if m == nil { + return nil + } + res := make(ResourceMap, len(m)) + for k, v := range m { + res[k] = v + } + return res +} diff --git a/pkg/feature/feature.go b/pkg/feature/feature.go new file mode 100644 index 0000000000000000000000000000000000000000..d5e9e2efe00ed42eda54398943ffcc832c30c721 --- /dev/null +++ b/pkg/feature/feature.go @@ -0,0 +1,32 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file for defining Features + +// Package feature +package feature + +const ( + // PreemptionFeature is the Preemption feature name + PreemptionFeature = "preemption" + // DynCacheFeature is the DynCache feature name + DynCacheFeature = "dynCache" + // IOLimitFeature is the IOLimit feature name + IOLimitFeature = "ioLimit" + // IOCostFeature is the IOCost feature name + IOCostFeature = "ioCost" + // DynMemoryFeature is the DynMemory feature name + DynMemoryFeature = "dynMemory" + // QuotaBurstFeature is the QuotaBurst feature name + QuotaBurstFeature = "quotaBurst" + // QuotaTurboFeature is the QuotaTurbo feature name + QuotaTurboFeature = "quotaTurbo" +) diff --git a/pkg/informer/apiserverinformer.go b/pkg/informer/apiserverinformer.go new file mode 100644 index 0000000000000000000000000000000000000000..0f8daaa1be5df808fe45ff1e3d77ac6934d994d4 --- /dev/null +++ b/pkg/informer/apiserverinformer.go @@ -0,0 +1,128 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-05 +// Description: This file defines apiinformer which interact with kubernetes apiserver + +// Package informer implements informer interface +package informer + +import ( + "context" + "fmt" + "os" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/typedef" +) + +// APIServerInformer interacts with k8s api server and forward data to the internal +type APIServerInformer struct { + api.Publisher + client *kubernetes.Clientset + nodeName string +} + +// NewAPIServerInformer creates an PIServerInformer instance +func NewAPIServerInformer(publisher api.Publisher) (api.Informer, error) { + informer := &APIServerInformer{ + Publisher: publisher, + } + + // create apiserver client + client, err := initKubeClient() + if err != nil { + return nil, fmt.Errorf("fail to init kubenetes client: %v", err) + } + informer.client = client + + // filter pods on current nodes + nodeName := os.Getenv(constant.NodeNameEnvKey) + if nodeName == "" { + return nil, fmt.Errorf("missing %s", constant.NodeNameEnvKey) + } + informer.nodeName = nodeName + + return informer, nil +} + +// initKubeClient initializes kubeClient +func initKubeClient() (*kubernetes.Clientset, error) { + conf, err := rest.InClusterConfig() + if err != nil { + return nil, err + } + + kubeClient, err := kubernetes.NewForConfig(conf) + if err != nil { + return nil, err + } + + return kubeClient, nil +} + +// Start starts and enables PIServerInformer +func (informer *APIServerInformer) Start(ctx context.Context) { + const specNodeNameField = "spec.nodeName" + // set options to return only pods on the current node. + var fieldSelector = fields.OneTermEqualSelector(specNodeNameField, informer.nodeName).String() + informer.listFunc(fieldSelector) + informer.watchFunc(ctx, fieldSelector) +} + +func (informer *APIServerInformer) listFunc(fieldSelector string) { + pods, err := informer.client.CoreV1().Pods("").List(context.Background(), + metav1.ListOptions{FieldSelector: fieldSelector}) + if err != nil { + log.Errorf("error listing all pods: %v", err) + return + } + informer.Publish(typedef.RAWPODSYNCALL, pods.Items) +} + +func (informer *APIServerInformer) watchFunc(ctx context.Context, fieldSelector string) { + const reSyncTime = 30 + kubeInformerFactory := informers.NewSharedInformerFactoryWithOptions(informer.client, + time.Duration(reSyncTime)*time.Second, + informers.WithTweakListOptions(func(options *metav1.ListOptions) { + options.FieldSelector = fieldSelector + })) + kubeInformerFactory.Core().V1().Pods().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: informer.AddFunc, + UpdateFunc: informer.UpdateFunc, + DeleteFunc: informer.DeleteFunc, + }) + kubeInformerFactory.Start(ctx.Done()) +} + +// AddFunc handles the raw pod increase event +func (informer *APIServerInformer) AddFunc(obj interface{}) { + informer.Publish(typedef.RAWPODADD, obj) +} + +// UpdateFunc handles the raw pod update event +func (informer *APIServerInformer) UpdateFunc(oldObj, newObj interface{}) { + informer.Publish(typedef.RAWPODUPDATE, newObj) +} + +// DeleteFunc handles the raw pod deletion event +func (informer *APIServerInformer) DeleteFunc(obj interface{}) { + informer.Publish(typedef.RAWPODDELETE, obj) +} diff --git a/pkg/informer/informerfactory.go b/pkg/informer/informerfactory.go new file mode 100644 index 0000000000000000000000000000000000000000..69de266bcf086f9bd9058b2f1493d055ef205304 --- /dev/null +++ b/pkg/informer/informerfactory.go @@ -0,0 +1,57 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-28 +// Description: This file defines informerFactory which return the informer creator + +// Package informer implements informer interface +package informer + +import ( + "fmt" + + "isula.org/rubik/pkg/api" +) + +type ( + // the definition of informer type + informerType int8 + // informer's factory class + informerFactory struct{} + informerCreator func(publisher api.Publisher) (api.Informer, error) +) + +const ( + // APISERVER instructs the informer to interact with the api server of kubernetes to obtain data + APISERVER informerType = iota +) + +// defaultInformerFactory is globally unique informer factory +var defaultInformerFactory *informerFactory + +// GetInformerCreator returns the constructor of the informer of the specified type +func (factory *informerFactory) GetInformerCreator(iType informerType) informerCreator { + switch iType { + case APISERVER: + return NewAPIServerInformer + default: + return func(publisher api.Publisher) (api.Informer, error) { + return nil, fmt.Errorf("infomer not implemented") + } + } +} + +// GetInformerFactory returns the Informer factory class entity +func GetInformerFactory() *informerFactory { + if defaultInformerFactory == nil { + defaultInformerFactory = &informerFactory{} + } + return defaultInformerFactory +} diff --git a/pkg/iocost/iocost.go b/pkg/iocost/iocost.go deleted file mode 100644 index 99d6da1d3da0ef12ef82192ba72398adbbe134d2..0000000000000000000000000000000000000000 --- a/pkg/iocost/iocost.go +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: HuangYuqing -// Create: 2022-10-26 -// Description: iocost setting for pods. - -// Package iocost is for iocost. -package iocost - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - "syscall" - "unicode" - - "github.com/pkg/errors" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" - "isula.org/rubik/pkg/util" -) - -const ( - iocostModelFile = "blkio.cost.model" - iocostWeightFile = "blkio.cost.weight" - iocostQosFile = "blkio.cost.qos" - wbBlkioinoFile = "memory.wb_blkio_ino" - blkSubName = "blkio" - memSubName = "memory" - offlineWeight = 10 - onlineWeight = 1000 - paramMaxLen = 512 - devNoMax = 256 - scale = 10 - sysDevBlock = "/sys/dev/block" -) - -var ( - hwSupport = false - iocostEnable = false -) - -// HwSupport tell if the os support iocost. -func HwSupport() bool { - return hwSupport -} - -func init() { - qosFile := filepath.Join(constant.DefaultCgroupRoot, blkSubName, iocostQosFile) - if util.PathExist(qosFile) { - hwSupport = true - } -} - -// SetIOcostEnable set iocost disable or enable -func SetIOcostEnable(status bool) { - iocostEnable = status -} - -// ConfigIOcost for config iocost in cgroup v1. -func ConfigIOcost(iocostConfigArray []config.IOcostConfig) error { - if !iocostEnable { - return errors.Errorf("iocost feature is disable") - } - - if err := clearIOcost(); err != nil { - log.Infof(err.Error()) - } - - for _, iocostConfig := range iocostConfigArray { - if !iocostConfig.Enable { - // notice: dev's iocost is disable by clearIOcost - continue - } - - devno, err := getBlkDeviceNo(iocostConfig.Dev) - if err != nil { - log.Errorf(err.Error()) - continue - } - - if iocostConfig.Model == "linear" { - if err := configLinearModel(iocostConfig.Param, devno); err != nil { - log.Errorf(err.Error()) - continue - } - } else { - log.Errorf("curent rubik not support non-linear model") - continue - } - - if err := configQos(true, devno); err != nil { - log.Errorf(err.Error()) - continue - } - } - return nil -} - -// SetPodWeight set pod weight -func SetPodWeight(pod *typedef.PodInfo) error { - if !iocostEnable { - return errors.Errorf("iocost feature is disable") - } - weightFile := filepath.Join(pod.CgroupRoot, - blkSubName, pod.CgroupPath, iocostWeightFile) - if err := configWeight(pod.Offline, weightFile); err != nil { - return err - } - if err := bindMemcgBlkio(pod.Containers); err != nil { - return err - } - return nil -} - -// ShutDown for clear iocost if feature is enable. -func ShutDown() error { - if !iocostEnable { - return errors.Errorf("iocost feature is disable") - } - if err := clearIOcost(); err != nil { - return err - } - return nil -} - -func getBlkDeviceNo(devName string) (string, error) { - devPath := filepath.Join("/dev", devName) - fi, err := os.Stat(devPath) - if err != nil { - return "", errors.Errorf("stat %s failed with error: %v", devName, err) - } - - if fi.Mode()&os.ModeDevice == 0 { - return "", errors.Errorf("%s is not a device", devName) - } - - st, ok := fi.Sys().(*syscall.Stat_t) - if !ok { - return "", errors.Errorf("failed to get Sys(), %v has type %v", devName, st) - } - - devno := st.Rdev - major, minor := devno/devNoMax, devno%devNoMax - return fmt.Sprintf("%v:%v", major, minor), nil -} - -func configWeight(offline bool, file string) error { - var weight uint64 = offlineWeight - if !offline { - weight = onlineWeight - } - return writeIOcost(file, strconv.FormatUint(weight, scale)) -} - -func configQos(enable bool, devno string) error { - t := 0 - if enable { - t = 1 - } - qosStr := fmt.Sprintf("%v enable=%v ctrl=user min=100.00 max=100.00", devno, t) - filePath := filepath.Join(config.CgroupRoot, blkSubName, iocostQosFile) - return writeIOcost(filePath, qosStr) -} - -func configLinearModel(linearModelParam config.Param, devno string) error { - if linearModelParam.Rbps <= 0 || linearModelParam.Rseqiops <= 0 || linearModelParam.Rrandiops <= 0 || - linearModelParam.Wbps <= 0 || linearModelParam.Wseqiops <= 0 || linearModelParam.Wrandiops <= 0 { - return errors.Errorf("invalid iocost.params, the value must not 0") - } - paramStr := fmt.Sprintf("%v rbps=%v rseqiops=%v rrandiops=%v wbps=%v wseqiops=%v wrandiops=%v", - devno, - linearModelParam.Rbps, linearModelParam.Rseqiops, linearModelParam.Rrandiops, - linearModelParam.Wbps, linearModelParam.Wseqiops, linearModelParam.Wrandiops) - filePath := filepath.Join(config.CgroupRoot, blkSubName, iocostModelFile) - return writeIOcost(filePath, paramStr) -} - -func bindMemcgBlkio(containers map[string]*typedef.ContainerInfo) error { - for _, container := range containers { - memPath := container.CgroupPath(memSubName) - blkPath := container.CgroupPath(blkSubName) - ino, err := getDirInode(blkPath) - if err != nil { - log.Errorf("get director:%v, inode err:%v", blkPath, err.Error()) - continue - } - wbBlkFile := filepath.Join(memPath, wbBlkioinoFile) - if err := writeIOcost(wbBlkFile, strconv.FormatUint(ino, scale)); err != nil { - log.Errorf("write file %v err:%v", wbBlkFile, err.Error()) - continue - } - } - return nil -} - -func getDirInode(file string) (uint64, error) { - fi, err := os.Stat(file) - if err != nil { - return 0, err - } - st, ok := fi.Sys().(*syscall.Stat_t) - if !ok { - return 0, errors.Errorf("failed to get Sys(), %v has type %v", file, st) - } - return st.Ino, nil -} - -func clearIOcost() error { - qosFilePath := filepath.Join(config.CgroupRoot, blkSubName, iocostQosFile) - qosParamByte, err := ioutil.ReadFile(qosFilePath) - if err != nil { - return errors.Errorf("read file:%v failed, err:%v", qosFilePath, err.Error()) - } - - if len(qosParamByte) == 0 { - return errors.Errorf("read file:%v is empty", qosFilePath) - } - - qosParams := strings.Split(string(qosParamByte), "\n") - for _, param := range qosParams { - paramList := strings.FieldsFunc(param, unicode.IsSpace) - if len(paramList) != 0 { - if err := configQos(false, paramList[0]); err != nil { - return errors.Errorf("write file:%v failed, err:%v", qosFilePath, err.Error()) - } - } - } - return nil -} - -func writeIOcost(file, param string) error { - if len(param) > paramMaxLen { - return errors.Errorf("param size exceeds %v", paramMaxLen) - } - if !util.PathExist(file) { - return errors.Errorf("path %v not exist, maybe iocost is unsupport", file) - } - err := ioutil.WriteFile(file, []byte(param), constant.DefaultFileMode) - return err -} diff --git a/pkg/lib/cpu/quotaturbo/client.go b/pkg/lib/cpu/quotaturbo/client.go new file mode 100644 index 0000000000000000000000000000000000000000..a1e3277ee62b973176468605afd0f1f03f97d4b8 --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/client.go @@ -0,0 +1,52 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-09 +// Description: This file is used for quota turbo client + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "fmt" +) + +// Client is quotaTurbo client +type Client struct { + *StatusStore + Driver +} + +// NewClient returns a quotaTurbo client instance +func NewClient() *Client { + return &Client{ + StatusStore: NewStatusStore(), + Driver: &EventDriver{}, + } +} + +// AdjustQuota is used to update status and adjust cgroup quota value +func (c *Client) AdjustQuota() error { + if err := c.updateCPUUtils(); err != nil { + return fmt.Errorf("fail to get current cpu utilization: %v", err) + } + if len(c.cpuQuotas) == 0 { + return nil + } + var errs error + if err := c.updateCPUQuotas(); err != nil { + errs = appendErr(errs, err) + } + c.adjustQuota(c.StatusStore) + if err := c.writeQuota(); err != nil { + errs = appendErr(errs, err) + } + return errs +} diff --git a/pkg/lib/cpu/quotaturbo/client_test.go b/pkg/lib/cpu/quotaturbo/client_test.go new file mode 100644 index 0000000000000000000000000000000000000000..9389fa147c45045abdd2e6760d1e0c5c12808b9e --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/client_test.go @@ -0,0 +1,111 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-09 +// Description: This file is used for testing quota turbo client + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/test/try" +) + +// TestClient_AdjustQuota tests AdjustQuota of Client +func TestClient_AdjustQuota(t *testing.T) { + const ( + contPath = "kubepods/testPod1/testCon1" + podPath = "kubepods/testPod1" + cpuPeriodFile = "cpu.cfs_period_us" + cpuQuotaFile = "cpu.cfs_quota_us" + cpuUsageFile = "cpuacct.usage" + cpuStatFile = "cpu.stat" + stat = `nr_periods 1 + nr_throttled 1 + throttled_time 1 + ` + quota = "200000" + period = "100000" + usage = "1234567" + ) + tests := []struct { + name string + wantErr bool + pre func(t *testing.T, c *Client) + post func(t *testing.T) + }{ + { + name: "TC1-empty CPUQuotas", + wantErr: false, + }, + { + name: "TC2-fail to updateCPUQuota causing absent of path", + pre: func(t *testing.T, c *Client) { + c.SetCgroupRoot(constant.TmpTestDir) + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath)) + try.MkdirAll(filepath.Join(constant.TmpTestDir, "cpu", contPath), constant.DefaultDirMode) + + assert.Equal(t, 0, len(c.cpuQuotas)) + c.AddCgroup(contPath, float64(runtime.NumCPU())) + c.cpuQuotas[contPath] = &CPUQuota{ + Hierarchy: &cgroup.Hierarchy{ + MountPoint: c.CgroupRoot, + Path: contPath, + }, + cpuLimit: float64(runtime.NumCPU()) - 1, + curThrottle: &cgroup.CPUStat{}, + preThrottle: &cgroup.CPUStat{}, + } + assert.Equal(t, 1, len(c.cpuQuotas)) + }, + post: func(t *testing.T) { + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath)) + }, + wantErr: true, + }, + { + name: "TC3-success", + pre: func(t *testing.T, c *Client) { + c.SetCgroupRoot(constant.TmpTestDir) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuPeriodFile), period) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuQuotaFile), quota) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpuacct", contPath, cpuUsageFile), usage) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuStatFile), stat) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", podPath, cpuPeriodFile), period) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", podPath, cpuQuotaFile), quota) + assert.NoError(t, c.AddCgroup(contPath, float64(runtime.NumCPU())-1)) + assert.Equal(t, 1, len(c.cpuQuotas)) + }, + post: func(t *testing.T) { + try.RemoveAll(constant.TmpTestDir) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := NewClient() + if tt.pre != nil { + tt.pre(t, c) + } + if err := c.AdjustQuota(); (err != nil) != tt.wantErr { + t.Errorf("Client.AdjustQuota() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/pkg/lib/cpu/quotaturbo/config.go b/pkg/lib/cpu/quotaturbo/config.go new file mode 100644 index 0000000000000000000000000000000000000000..919501c2a0456e23618f029e766158e37504cf5f --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/config.go @@ -0,0 +1,144 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-07 +// Description: This file is used for quota turbo config + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "fmt" + + "isula.org/rubik/pkg/common/constant" +) + +const ( + defaultHighWaterMark = 60 + defaultAlarmWaterMark = 80 + defaultElevateLimit = 1.0 + defaultSlowFallbackRatio = 0.1 + defaultCPUFloatingLimit = 10.0 +) + +// Config defines configuration of QuotaTurbo +type Config struct { + /* + If the CPU utilization exceeds HighWaterMark, it will trigger a slow fall, + */ + HighWaterMark int `json:"highWaterMark,omitempty"` + /* + If the CPU utilization exceeds the Alarm WaterMark, it will trigger a fast fallback; + otherwise it will trigger a slow increase + */ + AlarmWaterMark int `json:"alarmWaterMark,omitempty"` + /* + Cgroup Root indicates the mount point of the system Cgroup, the default is /sys/fs/cgroup + */ + CgroupRoot string `json:"cgroupRoot,omitempty"` + /* + ElevateLimit is the maximum percentage(%) of the total amount of + a single promotion to the total amount of nodes + Default is 1.0 + */ + ElevateLimit float64 `json:"elevateLimit,omitempty"` + /* + Slow Fallback Ratio is used to control the rate of slow fallback. Default is 0.1 + */ + SlowFallbackRatio float64 `json:"slowFallbackRatio,omitempty"` + /* + CPUFloatingLimit indicates the Upper Percentage Change of the CPU utilization of the node + within the specified time period. + Only when the floating rate is lower than the upper limit can the quota be increased, + and the decrease is not limited + Default is 10.0 + */ + CPUFloatingLimit float64 `json:"cpuFloatingLimit,omitempty"` +} + +// NewConfig returns a quota Turbo config instance with default values +func NewConfig() *Config { + return &Config{ + HighWaterMark: defaultHighWaterMark, + AlarmWaterMark: defaultAlarmWaterMark, + CgroupRoot: constant.DefaultCgroupRoot, + ElevateLimit: defaultElevateLimit, + SlowFallbackRatio: defaultSlowFallbackRatio, + CPUFloatingLimit: defaultCPUFloatingLimit, + } +} + +// validateWaterMark verifies that the WaterMark is set correctly +func (c *Config) validateWaterMark() error { + const minQuotaTurboWaterMark, maxQuotaTurboWaterMark = 0, 100 + outOfRange := func(num int) bool { + return num < minQuotaTurboWaterMark || num > maxQuotaTurboWaterMark + } + if c.AlarmWaterMark <= c.HighWaterMark || outOfRange(c.HighWaterMark) || outOfRange(c.AlarmWaterMark) { + return fmt.Errorf("alarmWaterMark >= highWaterMark, both of which ranges from 0 to 100") + } + return nil +} + +// SetAlarmWaterMark sets AlarmWaterMark of QuotaTurbo +func (c *Config) SetAlarmWaterMark(arg int) error { + tmp := c.AlarmWaterMark + c.AlarmWaterMark = arg + if err := c.validateWaterMark(); err != nil { + c.AlarmWaterMark = tmp + return err + } + return nil +} + +// SetHighWaterMark sets HighWaterMark of QuotaTurbo +func (c *Config) SetHighWaterMark(arg int) error { + tmp := c.HighWaterMark + c.HighWaterMark = arg + if err := c.validateWaterMark(); err != nil { + c.HighWaterMark = tmp + return err + } + return nil +} + +// SetCgroupRoot sets CgroupRoot of QuotaTurbo +func (c *Config) SetCgroupRoot(arg string) { + c.CgroupRoot = arg +} + +// SetlEvateLimit sets ElevateLimit of QuotaTurbo +func (c *Config) SetlEvateLimit(arg float64) error { + if arg < minimumUtilization || arg > maximumUtilization { + return fmt.Errorf("the size range of SingleTotalIncreaseLimit is [0,100]") + } + c.ElevateLimit = arg + return nil +} + +// SetSlowFallbackRatio sets SlowFallbackRatio of QuotaTurbo +func (c *Config) SetSlowFallbackRatio(arg float64) { + c.SlowFallbackRatio = arg +} + +// SetCPUFloatingLimit sets CPUFloatingLimit of QuotaTurbo +func (c *Config) SetCPUFloatingLimit(arg float64) error { + if arg < minimumUtilization || arg > maximumUtilization { + return fmt.Errorf("the size range of SingleTotalIncreaseLimit is [0,100]") + } + c.CPUFloatingLimit = arg + return nil +} + +// GetConfig returns a copy of the QuotaTurbo configuration +func (c *Config) GetConfig() *Config { + copyConf := *c + return ©Conf +} diff --git a/pkg/lib/cpu/quotaturbo/config_test.go b/pkg/lib/cpu/quotaturbo/config_test.go new file mode 100644 index 0000000000000000000000000000000000000000..e41c4878a419de224a4092c155f1e81ed5f8455d --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/config_test.go @@ -0,0 +1,282 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-07 +// Description: This file is used for testing quota turbo config + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" +) + +func TestConfig_SetAlarmWaterMark(t *testing.T) { + type fields struct { + HighWaterMark int + } + type args struct { + arg int + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "TC1-set alarmWaterMark successfully", + fields: fields{ + HighWaterMark: 60, + }, + args: args{ + arg: 100, + }, + wantErr: false, + }, + { + name: "TC2-alarmWaterMark = highwatermark", + fields: fields{ + HighWaterMark: 60, + }, + args: args{ + arg: 60, + }, + wantErr: true, + }, + { + name: "TC2.1-alarmWaterMark < highwatermark", + fields: fields{ + HighWaterMark: 60, + }, + args: args{ + arg: 59, + }, + wantErr: true, + }, + { + name: "TC3-alarmWaterMark > 100", + fields: fields{ + HighWaterMark: 60, + }, + args: args{ + arg: 101, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &Config{ + HighWaterMark: tt.fields.HighWaterMark, + } + if err := c.SetAlarmWaterMark(tt.args.arg); (err != nil) != tt.wantErr { + t.Errorf("Config.SetAlarmWaterMark() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestConfig_SetHighWaterMark tests SetHighWaterMark of Config +func TestConfig_SetHighWaterMark(t *testing.T) { + type fields struct { + AlarmWaterMark int + } + type args struct { + arg int + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "TC1-set highWaterMark successfully", + fields: fields{ + AlarmWaterMark: 80, + }, + args: args{ + arg: 10, + }, + wantErr: false, + }, + { + name: "TC2-alarmWaterMark = highwatermark", + fields: fields{ + AlarmWaterMark: 80, + }, + args: args{ + arg: 80, + }, + wantErr: true, + }, + { + name: "TC2.1-alarmWaterMark < highwatermark", + fields: fields{ + AlarmWaterMark: 80, + }, + args: args{ + arg: 81, + }, + wantErr: true, + }, + { + name: "TC3-highWaterMark < 0", + fields: fields{ + AlarmWaterMark: 60, + }, + args: args{ + arg: -1, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &Config{ + AlarmWaterMark: tt.fields.AlarmWaterMark, + } + if err := c.SetHighWaterMark(tt.args.arg); (err != nil) != tt.wantErr { + t.Errorf("Config.SetHighWaterMark() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestConfig_SetlEvateLimit tests SetlEvateLimit of Config +func TestConfig_SetlEvateLimit(t *testing.T) { + const ( + normal = 2.0 + larger = 100.01 + negative = -0.01 + ) + tests := []struct { + name string + arg float64 + wantErr bool + }{ + { + name: "TC1-set EvateLimit successfully", + arg: normal, + wantErr: false, + }, + { + name: "TC2-too large EvateLimit", + arg: larger, + wantErr: true, + }, + { + name: "TC3-negative EvateLimit", + arg: negative, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := NewConfig() + err := c.SetlEvateLimit(tt.arg) + if (err != nil) != tt.wantErr { + t.Errorf("Config.SetlEvateLimit() error = %v, wantErr %v", err, tt.wantErr) + } + if err == nil { + assert.Equal(t, tt.arg, c.ElevateLimit) + } else { + assert.Equal(t, defaultElevateLimit, c.ElevateLimit) + } + }) + } +} + +// TestConfig_SetCPUFloatingLimit tests SetCPUFloatingLimit of Config +func TestConfig_SetCPUFloatingLimit(t *testing.T) { + const ( + normal = 20.0 + larger = 100.01 + negative = -0.01 + ) + tests := []struct { + name string + arg float64 + wantErr bool + }{ + { + name: "TC1-set CPUFloatingLimit successfully", + arg: normal, + wantErr: false, + }, + { + name: "TC2-too large CPUFloatingLimit", + arg: larger, + wantErr: true, + }, + { + name: "TC3-negative CPUFloatingLimit", + arg: negative, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := NewConfig() + err := c.SetCPUFloatingLimit(tt.arg) + if (err != nil) != tt.wantErr { + t.Errorf("Config.SetCPUFloatingLimit() error = %v, wantErr %v", err, tt.wantErr) + } + if err == nil { + assert.Equal(t, tt.arg, c.CPUFloatingLimit) + } else { + assert.Equal(t, defaultCPUFloatingLimit, c.CPUFloatingLimit) + } + }) + } +} + +// TestOther tests other function of Config +func TestOther(t *testing.T) { + tests := []struct { + name string + want *Config + }{ + { + name: "TC1-test other", + want: &Config{ + HighWaterMark: defaultHighWaterMark, + AlarmWaterMark: defaultAlarmWaterMark, + CgroupRoot: constant.DefaultCgroupRoot, + ElevateLimit: defaultElevateLimit, + SlowFallbackRatio: defaultSlowFallbackRatio, + CPUFloatingLimit: defaultCPUFloatingLimit, + }, + }, + } + const slowFallback = 3.0 + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := NewConfig() + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("NewConfig() = %v, want %v", got, tt.want) + } + got.SetCgroupRoot(constant.TmpTestDir) + assert.Equal(t, got.CgroupRoot, constant.TmpTestDir) + got.SetSlowFallbackRatio(slowFallback) + assert.Equal(t, got.SlowFallbackRatio, slowFallback) + copyConf := got.GetConfig() + if !reflect.DeepEqual(got, copyConf) { + t.Errorf("GetConfig() = %v, want %v", got, copyConf) + } + }) + } +} diff --git a/pkg/lib/cpu/quotaturbo/cpu.go b/pkg/lib/cpu/quotaturbo/cpu.go new file mode 100644 index 0000000000000000000000000000000000000000..72415b76f746c085eb0343bf285294aa03f30148 --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/cpu.go @@ -0,0 +1,111 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-08 +// Description: This file is used for computing cpu utilization + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "fmt" + "io/ioutil" + "math" + "strings" + + "isula.org/rubik/pkg/common/util" +) + +const ( + maximumUtilization float64 = 100 + minimumUtilization float64 = 0 +) + +// ProcStat store /proc/stat data +type ProcStat struct { + name string + user float64 + nice float64 + system float64 + idle float64 + iowait float64 + irq float64 + softirq float64 + steal float64 + guest float64 + guestNice float64 + total float64 + busy float64 +} + +// getProcStat create a proc stat object +func getProcStat() (ProcStat, error) { + const ( + procStatFilePath = "/proc/stat" + nameLineNum = 0 + userIndex = 0 + niceIndex = 1 + systemIndex = 2 + idleIndex = 3 + iowaitIndex = 4 + irqIndex = 5 + softirqIndex = 6 + stealIndex = 7 + guestIndex = 8 + guestNiceIndex = 9 + statsFieldsCount = 10 + supportFieldNumber = 11 + ) + data, err := ioutil.ReadFile(procStatFilePath) + if err != nil { + return ProcStat{}, err + } + // format of the first line of the file /proc/stat : + // name user nice system idle iowait irq softirq steal guest guest_nice + line := strings.Fields(strings.Split(string(data), "\n")[0]) + if len(line) < supportFieldNumber { + return ProcStat{}, fmt.Errorf("too few fields and check the kernel version") + } + var fields [statsFieldsCount]float64 + for i := 0; i < statsFieldsCount; i++ { + fields[i], err = util.ParseFloat64(line[i+1]) + if err != nil { + return ProcStat{}, err + } + } + ps := ProcStat{ + name: line[nameLineNum], + user: fields[userIndex], + nice: fields[niceIndex], + system: fields[systemIndex], + idle: fields[idleIndex], + iowait: fields[iowaitIndex], + irq: fields[irqIndex], + softirq: fields[softirqIndex], + steal: fields[stealIndex], + guest: fields[guestIndex], + guestNice: fields[guestNiceIndex], + } + ps.busy = ps.user + ps.system + ps.nice + ps.iowait + ps.irq + ps.softirq + ps.steal + ps.total = ps.busy + ps.idle + return ps, nil +} + +// calculateUtils calculate the CPU utilization rate based on the two interval /proc/stat +func calculateUtils(t1, t2 ProcStat) float64 { + if t2.busy <= t1.busy { + return minimumUtilization + } + if t2.total <= t1.total { + return maximumUtilization + } + return math.Min(maximumUtilization, + math.Max(minimumUtilization, util.Div(t2.busy-t1.busy, t2.total-t1.total)*maximumUtilization)) +} diff --git a/pkg/lib/cpu/quotaturbo/cpu_test.go b/pkg/lib/cpu/quotaturbo/cpu_test.go new file mode 100644 index 0000000000000000000000000000000000000000..ead2d058baf3669678329eac49d920abe83eb2bb --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/cpu_test.go @@ -0,0 +1,57 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-20 +// Description: This file is used for testing cpu.go + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestCalculateUtils tests calculateUtils +func TestCalculateUtils(t *testing.T) { + var ( + n1 float64 = 1 + n2 float64 = 2 + n3 float64 = 3 + n4 float64 = 4 + ) + + var ( + t1 = ProcStat{ + total: n2, + busy: n1, + } + t2 = ProcStat{ + total: n4, + busy: n2, + } + t3 = ProcStat{ + total: n3, + busy: n3, + } + ) + // normal return result + const ( + util float64 = 50 + minimumUtilization float64 = 0 + maximumUtilization float64 = 100 + ) + assert.Equal(t, util, calculateUtils(t1, t2)) + // busy errors + assert.Equal(t, minimumUtilization, calculateUtils(t2, t1)) + // total errors + assert.Equal(t, maximumUtilization, calculateUtils(t2, t3)) +} diff --git a/pkg/lib/cpu/quotaturbo/cpuquota.go b/pkg/lib/cpu/quotaturbo/cpuquota.go new file mode 100644 index 0000000000000000000000000000000000000000..cb8fad7f9379992675815dd3020790a798c1d24c --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/cpuquota.go @@ -0,0 +1,261 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-20 +// Description: cpu container cpu quota data and methods + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "fmt" + "path" + "time" + + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +const ( + // numberOfRestrictedCycles is the number of periods in which the quota limits the CPU usage. + numberOfRestrictedCycles = 60 + // The default value of the cfs_period_us file is 100ms + defaultCFSPeriodUs int64 = 100000 +) + +var ( + cpuPeriodKey = &cgroup.Key{SubSys: "cpu", FileName: "cpu.cfs_period_us"} + cpuQuotaKey = &cgroup.Key{SubSys: "cpu", FileName: "cpu.cfs_quota_us"} + cpuAcctUsageKey = &cgroup.Key{SubSys: "cpuacct", FileName: "cpuacct.usage"} + cpuStatKey = &cgroup.Key{SubSys: "cpu", FileName: "cpu.stat"} +) + +// cpuUsage cpu time used by the container at timestamp +type cpuUsage struct { + timestamp int64 + usage int64 +} + +// CPUQuota stores the CPU quota information of a single container. +type CPUQuota struct { + // basic container information + *cgroup.Hierarchy + // expect cpu limit + cpuLimit float64 + // current throttling data for the container + curThrottle *cgroup.CPUStat + // previous throttling data for container + preThrottle *cgroup.CPUStat + // container cfs_period_us + period int64 + // current cpu quota of the container + curQuota int64 + // cpu quota of the container in the next period + nextQuota int64 + // the delta of the cpu quota to be adjusted based on the decision. + quotaDelta float64 + // the upper limit of the container cpu quota + heightLimit float64 + // maximum quota that can be used by a container in the next period, + // calculated based on the total usage in the past N-1 cycles + maxQuotaNextPeriod float64 + // container cpu usage sequence + cpuUsages []cpuUsage +} + +// NewCPUQuota create a cpu quota object +func NewCPUQuota(h *cgroup.Hierarchy, cpuLimit float64) (*CPUQuota, error) { + var defaultQuota = cpuLimit * float64(defaultCFSPeriodUs) + cq := &CPUQuota{ + Hierarchy: h, + cpuLimit: cpuLimit, + cpuUsages: make([]cpuUsage, 0), + quotaDelta: 0, + curThrottle: &cgroup.CPUStat{NrThrottled: 0, ThrottledTime: 0}, + preThrottle: &cgroup.CPUStat{NrThrottled: 0, ThrottledTime: 0}, + period: defaultCFSPeriodUs, + curQuota: int64(defaultQuota), + nextQuota: int64(defaultQuota), + heightLimit: defaultQuota, + maxQuotaNextPeriod: defaultQuota, + } + if err := cq.update(); err != nil { + return cq, err + } + // The throttle data before and after the initialization is the same. + cq.preThrottle = cq.curThrottle + return cq, nil +} + +func (c *CPUQuota) update() error { + var errs error + if err := c.updatePeriod(); err != nil { + errs = appendErr(errs, err) + } + if err := c.updateThrottle(); err != nil { + errs = appendErr(errs, err) + } + if err := c.updateQuota(); err != nil { + errs = appendErr(errs, err) + } + if err := c.updateUsage(); err != nil { + errs = appendErr(errs, err) + } + if errs != nil { + return errs + } + return nil +} + +func (c *CPUQuota) updatePeriod() error { + us, err := c.GetCgroupAttr(cpuPeriodKey).Int64() + // If an error occurs, the period remains unchanged or the default value is used. + if err != nil { + return err + } + c.period = us + return nil +} + +func (c *CPUQuota) updateThrottle() error { + // update suppression times and duration + // if data cannot be obtained from cpu.stat, the value remains unchanged. + c.preThrottle = c.curThrottle + cs, err := c.GetCgroupAttr(cpuStatKey).CPUStat() + if err != nil { + return err + } + c.curThrottle = cs + return nil +} + +func (c *CPUQuota) updateQuota() error { + c.quotaDelta = 0 + curQuota, err := c.GetCgroupAttr(cpuQuotaKey).Int64() + if err != nil { + return err + } + c.curQuota = curQuota + return nil +} + +func (c *CPUQuota) updateUsage() error { + latest, err := c.GetCgroupAttr(cpuAcctUsageKey).Int64() + if err != nil { + return err + } + c.cpuUsages = append(c.cpuUsages, cpuUsage{timestamp: time.Now().UnixNano(), usage: latest}) + // ensure that the CPU usage of the container does not exceed the upper limit. + if len(c.cpuUsages) >= numberOfRestrictedCycles { + c.cpuUsages = c.cpuUsages[1:] + } + return nil +} + +func writeQuota(mountPoint string, paths []string, delta int64) error { + type cgroupQuotaPair struct { + h *cgroup.Hierarchy + value int64 + } + var ( + writed []cgroupQuotaPair + save = func(mountPoint, path string, delta int64) error { + h := cgroup.NewHierarchy(mountPoint, path) + curQuota, err := h.GetCgroupAttr(cpuQuotaKey).Int64() + if err != nil { + return fmt.Errorf("error getting cgroup %v quota: %v", path, err) + } + if curQuota == -1 { + return nil + } + + nextQuota := curQuota + delta + if err := h.SetCgroupAttr(cpuQuotaKey, util.FormatInt64(nextQuota)); err != nil { + return fmt.Errorf("error setting cgroup %v quota (%v to %v): %v", path, curQuota, nextQuota, err) + } + writed = append(writed, cgroupQuotaPair{h: h, value: curQuota}) + return nil + } + + fallback = func() { + for _, w := range writed { + if err := w.h.SetCgroupAttr(cpuQuotaKey, util.FormatInt64(w.value)); err != nil { + fmt.Printf("error recovering cgroup %v quota %v\n", w.h.Path, w.value) + } + } + } + ) + + if delta > 0 { + // update the parent cgroup first, then update the child cgroup + for i, j := 0, len(paths)-1; i < j; i, j = i+1, j-1 { + paths[i], paths[j] = paths[j], paths[i] + } + } + + for _, path := range paths { + if err := save(mountPoint, path, delta); err != nil { + fallback() + return err + } + } + return nil +} + +// writeQuota use to modify quota for cgroup +func (c *CPUQuota) writeQuota() error { + var ( + delta = c.nextQuota - c.curQuota + paths []string + fullPath = c.Path + ) + if delta == 0 { + return nil + } + // the upper cgroup needs to be updated synchronously + if len(fullPath) == 0 { + return fmt.Errorf("invalid cgroup path: %v", fullPath) + } + for { + /* + a non-slash start will end up with . + start with a slash and end up with slash + */ + if fullPath == "." || fullPath == "/" || fullPath == "kubepods" || fullPath == "/kubepods" { + break + } + paths = append(paths, fullPath) + fullPath = path.Dir(fullPath) + } + if len(paths) == 0 { + return fmt.Errorf("empty cgroup path") + } + if err := writeQuota(c.MountPoint, paths, delta); err != nil { + return err + } + c.curQuota = c.nextQuota + return nil +} + +func (c *CPUQuota) recoverQuota() error { + // period ranges from 1000(us) to 1000000(us) and does not overflow. + c.nextQuota = int64(c.cpuLimit * float64(c.period)) + return c.writeQuota() +} + +func appendErr(errs error, err error) error { + if errs == nil { + return err + } + if err == nil { + return errs + } + return fmt.Errorf("%s \n* %s", errs.Error(), err.Error()) +} diff --git a/pkg/lib/cpu/quotaturbo/cpuquota_test.go b/pkg/lib/cpu/quotaturbo/cpuquota_test.go new file mode 100644 index 0000000000000000000000000000000000000000..feba814da457d248ce39394ba0a26bbe1acfe8bd --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/cpuquota_test.go @@ -0,0 +1,305 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-20 +// Description: This file is used for testing cpu_quota + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "path" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/test/try" +) + +// TestNewCPUQuota tests NewCPUQuota +func TestNewCPUQuota(t *testing.T) { + const ( + cpuPeriodFile = "cpu.cfs_period_us" + cpuQuotaFile = "cpu.cfs_quota_us" + cpuUsageFile = "cpuacct.usage" + cpuStatFile = "cpu.stat" + validStat = `nr_periods 1 + nr_throttled 1 + throttled_time 1 + ` + throttleTime int64 = 1 + quota = "200000" + quotaValue int64 = 200000 + period = "100000" + periodValue int64 = 100000 + usage = "1234567" + usageValue int64 = 1234567 + ) + + var ( + cgPath = "kubepods/testPod1/testCon1" + h = &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: cgPath, + } + + contPath = filepath.Join(constant.TmpTestDir, "cpu", cgPath, "") + contPeriodPath = filepath.Join(contPath, cpuPeriodFile) + contQuotaPath = filepath.Join(contPath, cpuQuotaFile) + contUsagePath = filepath.Join(constant.TmpTestDir, "cpuacct", cgPath, cpuUsageFile) + contStatPath = filepath.Join(contPath, cpuStatFile) + ) + + try.RemoveAll(constant.TmpTestDir) + try.MkdirAll(contPath, constant.DefaultDirMode) + try.MkdirAll(path.Dir(contUsagePath), constant.DefaultDirMode) + defer try.RemoveAll(constant.TmpTestDir) + + const cpuLimit = 2.0 + // absent of period file + try.RemoveAll(contPeriodPath) + _, err := NewCPUQuota(h, cpuLimit) + assert.Error(t, err, "should lacking of period file") + try.WriteFile(contPeriodPath, period) + + // absent of throttle file + try.RemoveAll(contStatPath) + _, err = NewCPUQuota(h, cpuLimit) + assert.Error(t, err, "should lacking of throttle file") + try.WriteFile(contStatPath, validStat) + + // absent of quota file + try.RemoveAll(contQuotaPath) + _, err = NewCPUQuota(h, cpuLimit) + assert.Error(t, err, "should lacking of quota file") + try.WriteFile(contQuotaPath, quota) + + // absent of usage file + try.RemoveAll(contUsagePath) + _, err = NewCPUQuota(h, cpuLimit) + assert.Error(t, err, "should lacking of usage file") + try.WriteFile(contUsagePath, usage) + + cq, err := NewCPUQuota(h, cpuLimit) + assert.NoError(t, err) + assert.Equal(t, usageValue, cq.cpuUsages[0].usage) + assert.Equal(t, quotaValue, cq.curQuota) + assert.Equal(t, periodValue, cq.period) + + cu := make([]cpuUsage, numberOfRestrictedCycles) + for i := 0; i < numberOfRestrictedCycles; i++ { + cu[i] = cpuUsage{} + } + cq.cpuUsages = cu + assert.NoError(t, cq.updateUsage()) +} + +// TestCPUQuota_WriteQuota tests WriteQuota of CPUQuota +func TestCPUQuota_WriteQuota(t *testing.T) { + const ( + largerQuota = "200000" + largerQuotaVal int64 = 200000 + smallerQuota = "100000" + smallerQuotaVal int64 = 100000 + unlimitedQuota = "-1" + unlimitedQuotaVal int64 = -1 + periodUs = "100000" + cpuPeriodFile = "cpu.cfs_period_us" + cpuQuotaFile = "cpu.cfs_quota_us" + ) + + var ( + cgPath = "kubepods/testPod1/testCon1" + contPath = filepath.Join(constant.TmpTestDir, "cpu", cgPath, "") + podPeriodPath = filepath.Join(filepath.Dir(contPath), cpuPeriodFile) + podQuotaPath = filepath.Join(filepath.Dir(contPath), cpuQuotaFile) + contPeriodPath = filepath.Join(contPath, cpuPeriodFile) + contQuotaPath = filepath.Join(contPath, cpuQuotaFile) + assertValue = func(t *testing.T, paths []string, value string) { + for _, p := range paths { + data, err := util.ReadFile(p) + assert.NoError(t, err) + assert.Equal(t, value, strings.TrimSpace(string(data))) + } + } + ) + + try.RemoveAll(constant.TmpTestDir) + defer try.RemoveAll(constant.TmpTestDir) + + type fields struct { + Hierarchy *cgroup.Hierarchy + curQuota int64 + nextQuota int64 + } + tests := []struct { + name string + pre func() + fields fields + post func(t *testing.T, cq *CPUQuota) + wantErr bool + }{ + { + name: "TC1-empty cgroup path", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{}, + nextQuota: largerQuotaVal, + curQuota: smallerQuotaVal, + }, + wantErr: true, + }, + { + name: "TC2-fail to get paths", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{ + Path: "/", + }, + nextQuota: largerQuotaVal, + curQuota: smallerQuotaVal, + }, + wantErr: true, + }, + { + name: "TC3-None of the paths exist", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: "kubepods/testPod1/testCon1", + }, + nextQuota: largerQuotaVal, + curQuota: smallerQuotaVal, + }, + wantErr: true, + }, + { + name: "TC4-Only pod path existed", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: "kubepods/testPod1/testCon1", + }, + // write the pod first and then write the container + nextQuota: largerQuotaVal, + curQuota: smallerQuotaVal, + }, + pre: func() { + try.WriteFile(podQuotaPath, smallerQuota) + try.WriteFile(podPeriodPath, periodUs) + try.RemoveAll(contQuotaPath) + try.RemoveAll(contPeriodPath) + }, + post: func(t *testing.T, cq *CPUQuota) { + // Unable to write to container, so restore pod as it is + assertValue(t, []string{podQuotaPath}, smallerQuota) + assert.Equal(t, smallerQuotaVal, cq.curQuota) + }, + wantErr: true, + }, + { + name: "TC5-success delta > 0", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: "kubepods/testPod1/testCon1", + }, + nextQuota: largerQuotaVal, + curQuota: smallerQuotaVal, + }, + pre: func() { + try.WriteFile(podQuotaPath, smallerQuota) + try.WriteFile(podPeriodPath, periodUs) + try.WriteFile(contQuotaPath, smallerQuota) + try.WriteFile(contPeriodPath, periodUs) + }, + post: func(t *testing.T, cq *CPUQuota) { + assertValue(t, []string{podQuotaPath, contQuotaPath}, largerQuota) + assert.Equal(t, largerQuotaVal, cq.curQuota) + }, + wantErr: false, + }, + { + name: "TC6-success delta < 0", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: "kubepods/testPod1/testCon1", + }, + nextQuota: smallerQuotaVal, + curQuota: largerQuotaVal, + }, + pre: func() { + try.WriteFile(podQuotaPath, largerQuota) + try.WriteFile(podPeriodPath, periodUs) + try.WriteFile(contQuotaPath, largerQuota) + try.WriteFile(contPeriodPath, periodUs) + }, + post: func(t *testing.T, cq *CPUQuota) { + assertValue(t, []string{podQuotaPath, contQuotaPath}, smallerQuota) + assert.Equal(t, smallerQuotaVal, cq.curQuota) + }, + wantErr: false, + }, + { + name: "TC6.1-success delta < 0 unlimited pod", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: "kubepods/testPod1/testCon1", + }, + nextQuota: smallerQuotaVal, + curQuota: largerQuotaVal, + }, + pre: func() { + try.WriteFile(podQuotaPath, unlimitedQuota) + try.WriteFile(podPeriodPath, periodUs) + try.WriteFile(contQuotaPath, largerQuota) + try.WriteFile(contPeriodPath, periodUs) + }, + post: func(t *testing.T, cq *CPUQuota) { + assertValue(t, []string{contQuotaPath}, smallerQuota) + assertValue(t, []string{podQuotaPath}, unlimitedQuota) + assert.Equal(t, smallerQuotaVal, cq.curQuota) + }, + wantErr: false, + }, + { + name: "TC7-success delta = 0", + fields: fields{ + Hierarchy: &cgroup.Hierarchy{}, + nextQuota: smallerQuotaVal, + curQuota: smallerQuotaVal, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &CPUQuota{ + Hierarchy: tt.fields.Hierarchy, + curQuota: tt.fields.curQuota, + nextQuota: tt.fields.nextQuota, + } + if tt.pre != nil { + tt.pre() + } + if err := c.writeQuota(); (err != nil) != tt.wantErr { + t.Errorf("CPUQuota.WriteQuota() error = %v, wantErr %v", err, tt.wantErr) + } + if tt.post != nil { + tt.post(t, c) + } + }) + } +} diff --git a/pkg/lib/cpu/quotaturbo/driverevent.go b/pkg/lib/cpu/quotaturbo/driverevent.go new file mode 100644 index 0000000000000000000000000000000000000000..bd8197374f0c31523b11c820921b2872a6e50d1f --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/driverevent.go @@ -0,0 +1,181 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-16 +// Description: event driver method for quota turbo + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "fmt" + "math" + "runtime" + + "isula.org/rubik/pkg/common/util" +) + +// Driver uses different methods based on different policies. +type Driver interface { + // adjustQuota calculate the quota in the next period based on the customized policy, upper limit, and quota. + adjustQuota(status *StatusStore) +} + +// EventDriver event based quota adjustment driver. +type EventDriver struct{} + +// adjustQuota calculates quota delta based on events +func (e *EventDriver) adjustQuota(status *StatusStore) { + e.slowFallback(status) + e.fastFallback(status) + // Ensure that the CPU usage does not change by more than 10% within one minute. + // Otherwise, the available quota rollback continues but does not increase. + if !sharpFluctuates(status) { + e.elevate(status) + } else { + fmt.Printf("CPU utilization fluctuates by more than %.2f\n", status.CPUFloatingLimit) + } + for _, c := range status.cpuQuotas { + // get height limit + const easingMultiple = 2.0 + // c.Period + c.heightLimit = easingMultiple * c.cpuLimit * float64(c.period) + // get the maximum available ensuring that the overall utilization does not exceed the limit. + c.maxQuotaNextPeriod = getMaxQuota(c) + // c.Period ranges from 1000(us) to 1000000(us) and does not overflow. + c.nextQuota = int64(math.Max(math.Min(float64(c.curQuota)+c.quotaDelta, c.maxQuotaNextPeriod), + c.cpuLimit*float64(c.period))) + } +} + +// elevate boosts when cpu is suppressed +func (e *EventDriver) elevate(status *StatusStore) { + // the CPU usage of the current node is lower than the warning watermark. + // U + R <= a & a > U ======> a - U >= R && a - U > 0 =====> a - U >= R + if float64(status.AlarmWaterMark)-status.getLastCPUUtil() < status.ElevateLimit { + return + } + // sumDelta : total number of cores to be adjusted + var sumDelta float64 = 0 + delta := make(map[string]float64, 0) + for id, c := range status.cpuQuotas { + if c.curThrottle.NrThrottled > c.preThrottle.NrThrottled { + delta[id] = NsToUs(c.curThrottle.ThrottledTime-c.preThrottle.ThrottledTime) / + float64(c.curThrottle.NrThrottled-c.preThrottle.NrThrottled) / float64(c.period) + sumDelta += delta[id] + } + } + // the container quota does not need to be increased in this round. + if sumDelta == 0 { + return + } + // the total increase cannot exceed ( status.SingleTotalIncreaseLimit% ) of the total available CPUs of the node. + A := math.Min(sumDelta, util.PercentageToDecimal(status.ElevateLimit)*float64(runtime.NumCPU())) + coefficient := A / sumDelta + for id, quotaDelta := range delta { + status.cpuQuotas[id].quotaDelta += coefficient * quotaDelta * float64(status.cpuQuotas[id].period) + } +} + +// fastFallback decreases the quota to ensure that the CPU utilization of the node is below the warning water level +// when the water level exceeds the warning water level. +func (e *EventDriver) fastFallback(status *StatusStore) { + // The CPU usage of the current node is greater than the warning watermark, triggering a fast rollback. + if float64(status.AlarmWaterMark) > status.getLastCPUUtil() { + return + } + // sub: the total number of CPU quotas to be reduced on a node. + sub := util.PercentageToDecimal(float64(status.AlarmWaterMark)-status.getLastCPUUtil()) * float64(runtime.NumCPU()) + // sumDelta :total number of cpu cores that can be decreased. + var sumDelta float64 = 0 + delta := make(map[string]float64, 0) + for id, c := range status.cpuQuotas { + delta[id] = float64(c.curQuota)/float64(c.period) - c.cpuLimit + sumDelta += delta[id] + } + if sumDelta <= 0 { + return + } + // proportional adjustment of each business quota. + for id, quotaDelta := range delta { + status.cpuQuotas[id].quotaDelta += (quotaDelta / sumDelta) * sub * float64(status.cpuQuotas[id].period) + } +} + +// slowFallback triggers quota callback of unpressed containers when the CPU utilization exceeds the control watermark. +func (e *EventDriver) slowFallback(status *StatusStore) { + // The CPU usage of the current node is greater than the high watermark, triggering a slow rollback. + if float64(status.HighWaterMark) > status.getLastCPUUtil() { + return + } + coefficient := (status.getLastCPUUtil() - float64(status.HighWaterMark)) / + float64(status.AlarmWaterMark-status.HighWaterMark) * status.SlowFallbackRatio + for id, c := range status.cpuQuotas { + originQuota := int64(c.cpuLimit * float64(c.period)) + if c.curQuota > originQuota && c.curThrottle.NrThrottled == c.preThrottle.NrThrottled { + status.cpuQuotas[id].quotaDelta += coefficient * float64(originQuota-c.curQuota) + } + } +} + +// sharpFluctuates checks whether the node CPU utilization exceeds the specified value within one minute. +func sharpFluctuates(status *StatusStore) bool { + var ( + min float64 = maximumUtilization + max float64 = minimumUtilization + ) + for _, u := range status.cpuUtils { + min = math.Min(min, u.util) + max = math.Max(max, u.util) + } + return max-min > status.CPUFloatingLimit +} + +// getMaxQuota calculate the maximum available quota in the next period based on the container CPU usage in N-1 periods. +func getMaxQuota(c *CPUQuota) float64 { + if len(c.cpuUsages) <= 1 { + return c.heightLimit + } + // the time unit is nanosecond + first := c.cpuUsages[0] + last := c.cpuUsages[len(c.cpuUsages)-1] + timeDelta := NsToUs(last.timestamp - first.timestamp) + coefficient := float64(len(c.cpuUsages)) / float64(len(c.cpuUsages)-1) + maxAvailable := c.cpuLimit * timeDelta * coefficient + used := NsToUs(last.usage - first.usage) + remainingUsage := maxAvailable - used + origin := c.cpuLimit * float64(c.period) + const ( + // To prevent sharp service jitters, the Rubik proactively decreases the traffic in advance + // when the available balance reaches a certain threshold. + // The limitMultiplier is used to control the relationship between the upper limit and the threshold. + // Experiments show that the value 3 is efficient and secure. + limitMultiplier = 3 + precision = 1e-10 + ) + var threshold = limitMultiplier * c.heightLimit + remainingQuota := util.Div(remainingUsage, timeDelta, math.MaxFloat64, precision) * + float64(len(c.cpuUsages)-1) * float64(c.period) + + // gradually decrease beyond the threshold to prevent sudden dips. + res := remainingQuota + if remainingQuota <= threshold { + res = origin + util.Div((c.heightLimit-origin)*remainingQuota, threshold, threshold, precision) + } + // The utilization must not exceed the height limit and must not be less than the cpuLimit. + return math.Max(math.Min(res, c.heightLimit), origin) +} + +// NsToUs converts nanoseconds into microseconds +func NsToUs(ns int64) float64 { + // number of nanoseconds contained in 1 microsecond + const nanoSecPerMicroSec float64 = 1000 + return util.Div(float64(ns), nanoSecPerMicroSec) +} diff --git a/pkg/lib/cpu/quotaturbo/driverevent_test.go b/pkg/lib/cpu/quotaturbo/driverevent_test.go new file mode 100644 index 0000000000000000000000000000000000000000..926b4570066aa255bb968425ea22c704fc73725d --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/driverevent_test.go @@ -0,0 +1,604 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-20 +// Description: This file is used for testing driverevent.go + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "math" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +// TestEventDriverElevate tests elevate of EventDriver +func TestEventDriverElevate(t *testing.T) { + var elevateTests = []struct { + status *StatusStore + judgements func(t *testing.T, status *StatusStore) + name string + }{ + { + name: "TC1 - CPU usage >= the alarmWaterMark.", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 60, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon1": {}, + }, + cpuUtils: []cpuUtil{ + { + util: 90, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + var delta float64 = 0 + conID := "testCon1" + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + { + name: "TC2 - the container is not suppressed.", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 70, + ElevateLimit: defaultElevateLimit, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon2": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod2/testCon2", + }, + // currently not suppressed + curThrottle: &cgroup.CPUStat{ + NrThrottled: 1, + ThrottledTime: 10, + }, + preThrottle: &cgroup.CPUStat{ + NrThrottled: 1, + ThrottledTime: 10, + }, + period: 100000, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 60, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + var delta float64 = 0 + conID := "testCon2" + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + { + name: "TC3 - increase the quota of the suppressed container", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 60, + ElevateLimit: defaultElevateLimit, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon3": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod3/testCon3", + }, + curThrottle: &cgroup.CPUStat{ + NrThrottled: 50, + ThrottledTime: 200000, + }, + preThrottle: &cgroup.CPUStat{ + NrThrottled: 40, + ThrottledTime: 100000, + }, + period: 100000, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 40, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + conID := "testCon3" + c := status.cpuQuotas[conID] + coefficient := math.Min(float64(0.0001), + util.PercentageToDecimal(status.ElevateLimit)*float64(runtime.NumCPU())) / + float64(0.0001) + delta := coefficient * float64(0.0001) * float64(c.period) + assert.True(t, status.cpuQuotas[conID].quotaDelta == delta) + }, + }, + } + + e := &EventDriver{} + for _, tt := range elevateTests { + t.Run(tt.name, func(t *testing.T) { + e.elevate(tt.status) + tt.judgements(t, tt.status) + }) + } +} + +// TestSlowFallback tests slowFallback of EventDriver +func TestSlowFallback(t *testing.T) { + var slowFallBackTests = []struct { + status *StatusStore + judgements func(t *testing.T, status *StatusStore) + name string + }{ + { + name: "TC1-CPU usage <= the highWaterMark.", + status: &StatusStore{ + Config: &Config{ + HighWaterMark: 60, + SlowFallbackRatio: defaultSlowFallbackRatio, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon4": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod4/testCon4", + }, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 40, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + conID := "testCon4" + var delta float64 = 0 + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + { + name: "TC2-the container is suppressed.", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 80, + HighWaterMark: 50, + SlowFallbackRatio: defaultSlowFallbackRatio, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon5": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod5/testCon5", + }, + cpuLimit: 1, + curThrottle: &cgroup.CPUStat{ + NrThrottled: 10, + }, + preThrottle: &cgroup.CPUStat{ + NrThrottled: 0, + }, + period: 100000, + curQuota: 200000, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 70, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + var delta float64 = 0 + conID := "testCon5" + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + { + name: "TC3-decrease the quota of the uncompressed containers", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 90, + HighWaterMark: 40, + SlowFallbackRatio: defaultSlowFallbackRatio, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon6": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod6/testCon6", + }, + cpuLimit: 2, + // currently not suppressed + curThrottle: &cgroup.CPUStat{ + NrThrottled: 10, + ThrottledTime: 100000, + }, + preThrottle: &cgroup.CPUStat{ + NrThrottled: 10, + ThrottledTime: 100000, + }, + period: 100000, + curQuota: 400000, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 60.0, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + conID := "testCon6" + c := status.cpuQuotas[conID] + coefficient := (status.getLastCPUUtil() - float64(status.HighWaterMark)) / + float64(status.AlarmWaterMark-status.HighWaterMark) * status.SlowFallbackRatio + delta := coefficient * + ((float64(c.cpuLimit) * float64(c.period)) - float64(c.curQuota)) + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + } + e := &EventDriver{} + for _, tt := range slowFallBackTests { + t.Run(tt.name, func(t *testing.T) { + e.slowFallback(tt.status) + tt.judgements(t, tt.status) + }) + } +} + +// TestFastFallback tests fastFallback of EventDriver +func TestFastFallback(t *testing.T) { + var fastFallBackTests = []struct { + status *StatusStore + judgements func(t *testing.T, status *StatusStore) + name string + }{ + { + name: "TC1-CPU usage <= the AlarmWaterMark.", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 30, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon7": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod7/testCon7", + }, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 10, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + conID := "testCon7" + var delta float64 = 0 + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + { + name: "TC2-the quota of container is not increased.", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 30, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon8": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod8/testCon8", + }, + cpuLimit: 1, + period: 100, + curQuota: 100, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 48, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + var delta float64 = 0 + conID := "testCon8" + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + { + name: "TC3-decrease the quota of the containers", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 65, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon9": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod9/testCon9", + }, + cpuLimit: 3, + period: 10000, + curQuota: 40000, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 90, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + conID := "testCon9" + c := status.cpuQuotas[conID] + delta := util.PercentageToDecimal(float64(status.AlarmWaterMark)-status.getLastCPUUtil()) * + float64(runtime.NumCPU()) * float64(c.period) + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + } + e := &EventDriver{} + for _, tt := range fastFallBackTests { + t.Run(tt.name, func(t *testing.T) { + e.fastFallback(tt.status) + tt.judgements(t, tt.status) + }) + } +} + +// TestSharpFluctuates tests sharpFluctuates +func TestSharpFluctuates(t *testing.T) { + const cpuUtil90 = 90 + var sharpFluctuatesTests = []struct { + status *StatusStore + want bool + name string + }{ + { + name: "TC1-the cpu changes rapidly", + status: &StatusStore{ + Config: &Config{ + CPUFloatingLimit: defaultCPUFloatingLimit, + }, + cpuUtils: []cpuUtil{ + { + util: cpuUtil90, + }, + { + util: cpuUtil90 - defaultCPUFloatingLimit - 1, + }, + }, + }, + want: true, + }, + { + name: "TC2-the cpu changes steadily", + status: &StatusStore{ + Config: &Config{ + CPUFloatingLimit: defaultCPUFloatingLimit, + }, + cpuUtils: []cpuUtil{ + { + util: cpuUtil90, + }, + { + util: cpuUtil90 - defaultCPUFloatingLimit + 1, + }, + }, + }, + want: false, + }, + } + for _, tt := range sharpFluctuatesTests { + t.Run(tt.name, func(t *testing.T) { + assert.True(t, sharpFluctuates(tt.status) == tt.want) + }) + } +} + +// TestEventDriverAdjustQuota tests adjustQuota of EventDriver +func TestEventDriverAdjustQuota(t *testing.T) { + var eDriverAdjustQuotaTests = []struct { + status *StatusStore + judgements func(t *testing.T, status *StatusStore) + name string + }{ + { + name: "TC1-no promotion", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 80, + HighWaterMark: 73, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon10": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod10/testCon10", + }, + cpuLimit: 1, + period: 80, + curQuota: 100, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 1, + }, + { + util: -defaultCPUFloatingLimit, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + var delta float64 = 0 + conID := "testCon10" + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + { + name: "TC2-make a promotion", + status: &StatusStore{ + Config: &Config{ + AlarmWaterMark: 97, + HighWaterMark: 73, + }, + cpuQuotas: map[string]*CPUQuota{ + "testCon11": { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod11/testCon11", + }, + cpuLimit: 2, + curThrottle: &cgroup.CPUStat{ + NrThrottled: 1, + ThrottledTime: 200, + }, + preThrottle: &cgroup.CPUStat{ + NrThrottled: 0, + ThrottledTime: 100, + }, + period: 2000, + curQuota: 5000, + }, + }, + cpuUtils: []cpuUtil{ + { + util: 10, + }, + }, + }, + judgements: func(t *testing.T, status *StatusStore) { + conID := "testCon11" + c := status.cpuQuotas[conID] + coefficient := math.Min(float64(0.00005), util.PercentageToDecimal(status.ElevateLimit)* + float64(runtime.NumCPU())) / float64(0.00005) + delta := coefficient * float64(0.00005) * float64(c.period) + assert.Equal(t, delta, status.cpuQuotas[conID].quotaDelta) + }, + }, + } + e := &EventDriver{} + for _, tt := range eDriverAdjustQuotaTests { + t.Run(tt.name, func(t *testing.T) { + e.adjustQuota(tt.status) + tt.judgements(t, tt.status) + }) + } +} + +// TestGetMaxQuota tests getMaxQuota +func TestGetMaxQuota(t *testing.T) { + var getMaxQuotaTests = []struct { + cq *CPUQuota + judgements func(t *testing.T, cq *CPUQuota) + name string + }{ + { + name: "TC1-empty cpu usage", + cq: &CPUQuota{ + heightLimit: 100, + cpuUsages: []cpuUsage{}, + }, + judgements: func(t *testing.T, cq *CPUQuota) { + var res float64 = 100 + assert.Equal(t, res, getMaxQuota(cq)) + }, + }, + { + name: "TC2-The remaining value is less than 3 times the upper limit.", + cq: &CPUQuota{ + cpuUsages: []cpuUsage{ + {100000, 100000}, + {200000, 200000}, + }, + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod1/testCon1", + }, + cpuLimit: 4, + period: 100, + heightLimit: 800, + }, + judgements: func(t *testing.T, cq *CPUQuota) { + const res = 400 + float64(400*700)/float64(3*800) + assert.Equal(t, res, getMaxQuota(cq)) + }, + }, + { + name: "TC3-The remaining value is greater than 3 times the limit height.", + cq: &CPUQuota{ + cpuUsages: []cpuUsage{ + {10000, 0}, + {20000, 0}, + {30000, 0}, + {40000, 0}, + {50000, 0}, + {60000, 0}, + {70000, 0}, + {80000, 100}, + }, + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod1/testCon1", + }, + cpuLimit: 1, + period: 100, + heightLimit: 200, + }, + judgements: func(t *testing.T, cq *CPUQuota) { + var res float64 = 200 + assert.Equal(t, res, getMaxQuota(cq)) + }, + }, + { + name: "TC4-The remaining value is less than the initial value.", + cq: &CPUQuota{ + cpuUsages: []cpuUsage{ + {100, 0}, + {200, 1000000}, + }, + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.DefaultCgroupRoot, + Path: "kubepods/testPod1/testCon1", + }, + cpuLimit: 10, + period: 10, + heightLimit: 150, + }, + judgements: func(t *testing.T, cq *CPUQuota) { + var res float64 = 100 + assert.Equal(t, res, getMaxQuota(cq)) + }, + }, + } + for _, tt := range getMaxQuotaTests { + t.Run(tt.name, func(t *testing.T) { + tt.judgements(t, tt.cq) + }) + } +} diff --git a/pkg/lib/cpu/quotaturbo/statusstore.go b/pkg/lib/cpu/quotaturbo/statusstore.go new file mode 100644 index 0000000000000000000000000000000000000000..7c9f26396273f21ade0f75e45c909e9672618087 --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/statusstore.go @@ -0,0 +1,196 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-20 +// Description: QuotaTurbo Status Store + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "fmt" + "path/filepath" + "runtime" + "sync" + "time" + + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +// cpuUtil is used to store the cpu usage at a specific time +type cpuUtil struct { + timestamp int64 + util float64 +} + +// StatusStore is the information of node/containers obtained for quotaTurbo +type StatusStore struct { + // configuration of the QuotaTurbo + *Config + // ensuring Concurrent Sequential Consistency + sync.RWMutex + // map between container IDs and container CPU quota + cpuQuotas map[string]*CPUQuota + // cpu utilization sequence for N consecutive cycles + cpuUtils []cpuUtil + // /proc/stat of the previous period + lastProcStat ProcStat +} + +// NewStatusStore returns a pointer to StatusStore +func NewStatusStore() *StatusStore { + return &StatusStore{ + Config: NewConfig(), + lastProcStat: ProcStat{ + total: -1, + busy: -1, + }, + cpuQuotas: make(map[string]*CPUQuota, 0), + cpuUtils: make([]cpuUtil, 0), + } +} + +// AddCgroup adds cgroup need to be adjusted +func (store *StatusStore) AddCgroup(cgroupPath string, cpuLimit float64) error { + if len(cgroupPath) == 0 { + return fmt.Errorf("cgroup path should not be empty") + } + if store.CgroupRoot == "" { + return fmt.Errorf("undefined cgroup mount point, please set it firstly") + } + h := cgroup.NewHierarchy(store.CgroupRoot, cgroupPath) + if !isAdjustmentAllowed(h, cpuLimit) { + return fmt.Errorf("cgroup not allow to adjust") + } + c, err := NewCPUQuota(h, cpuLimit) + if err != nil { + return fmt.Errorf("error creating cpu quota: %v", err) + } + store.Lock() + store.cpuQuotas[cgroupPath] = c + store.Unlock() + return nil +} + +// RemoveCgroup deletes cgroup that do not need to be adjusted. +func (store *StatusStore) RemoveCgroup(cgroupPath string) error { + store.RLock() + cq, ok := store.cpuQuotas[cgroupPath] + store.RUnlock() + if !ok { + return nil + } + safeDel := func(id string) error { + store.Lock() + delete(store.cpuQuotas, id) + store.Unlock() + return nil + } + + if !util.PathExist(filepath.Join(cq.MountPoint, "cpu", cq.Path)) { + return safeDel(cgroupPath) + } + if err := cq.recoverQuota(); err != nil { + return fmt.Errorf("fail to recover cpu.cfs_quota_us for cgroup %s : %v", cq.Path, err) + } + return safeDel(cgroupPath) +} + +// GetAllCgroup returns all cgroup paths that are adjusting quota +func (store *StatusStore) GetAllCgroup() []string { + var res = make([]string, 0) + for _, cq := range store.cpuQuotas { + res = append(res, cq.Path) + } + return res +} + +// getLastCPUUtil obtain the latest cpu utilization +func (store *StatusStore) getLastCPUUtil() float64 { + if len(store.cpuUtils) == 0 { + return 0 + } + return store.cpuUtils[len(store.cpuUtils)-1].util +} + +// updateCPUUtils updates the cpu usage of a node +func (store *StatusStore) updateCPUUtils() error { + var ( + curUtil float64 = 0 + index = 0 + t cpuUtil + ) + ps, err := getProcStat() + if err != nil { + return err + } + if store.lastProcStat.total >= 0 { + curUtil = calculateUtils(store.lastProcStat, ps) + } + store.lastProcStat = ps + cur := time.Now().UnixNano() + store.cpuUtils = append(store.cpuUtils, cpuUtil{ + timestamp: cur, + util: curUtil, + }) + // retain utilization data for only one minute + const minuteTimeDelta = int64(time.Minute) + for index, t = range store.cpuUtils { + if cur-t.timestamp <= minuteTimeDelta { + break + } + } + if index > 0 { + store.cpuUtils = store.cpuUtils[index:] + } + return nil +} + +func (store *StatusStore) updateCPUQuotas() error { + var errs error + for id, c := range store.cpuQuotas { + if err := c.update(); err != nil { + errs = appendErr(errs, fmt.Errorf("error updating cpu quota %v: %v", id, err)) + } + } + return errs +} + +// writeQuota writes the calculated quota value into the cgroup file and takes effect +func (store *StatusStore) writeQuota() error { + var errs error + for id, c := range store.cpuQuotas { + if err := c.writeQuota(); err != nil { + errs = appendErr(errs, fmt.Errorf("error writing cgroup quota %v: %v", id, err)) + } + } + return errs +} + +// isAdjustmentAllowed judges whether quota adjustment is allowed +func isAdjustmentAllowed(h *cgroup.Hierarchy, cpuLimit float64) bool { + // 1. containers whose cgroup path does not exist are not considered. + if !util.PathExist(filepath.Join(h.MountPoint, "cpu", h.Path)) { + return false + } + + /* + 2. abnormal CPULimit + a). containers that do not limit the quota => cpuLimit = 0 + b). cpuLimit = 0 : k8s allows the CPULimit to be 0, but the quota is not limited. + c). cpuLimit >= all cores + */ + if cpuLimit <= 0 || + cpuLimit >= float64(runtime.NumCPU()) { + return false + } + return true +} diff --git a/pkg/lib/cpu/quotaturbo/statusstore_test.go b/pkg/lib/cpu/quotaturbo/statusstore_test.go new file mode 100644 index 0000000000000000000000000000000000000000..49ccddc1513d597936e16fb63b35a7a27aea597e --- /dev/null +++ b/pkg/lib/cpu/quotaturbo/statusstore_test.go @@ -0,0 +1,567 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-16 +// Description: This file is used for testing statusstore.go + +// Package quotaturbo is for Quota Turbo feature +package quotaturbo + +import ( + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/test/try" +) + +// TestIsAdjustmentAllowed tests isAdjustmentAllowed +func TestIsAdjustmentAllowed(t *testing.T) { + const contPath1 = "kubepods/testPod1/testCon1" + + try.RemoveAll(constant.TmpTestDir) + defer try.RemoveAll(constant.TmpTestDir) + + tests := []struct { + h *cgroup.Hierarchy + cpuLimit float64 + pre func() + post func() + name string + want bool + }{ + { + name: "TC1-allow adjustment", + h: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath1, + }, + cpuLimit: float64(runtime.NumCPU()) - 1, + pre: func() { + try.MkdirAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1, "cpu.cfs_quota_us"), + constant.DefaultFileMode) + }, + post: func() { + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1)) + }, + want: true, + }, + { + name: "TC2-cgroup path is not existed", + h: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath1, + }, + cpuLimit: float64(runtime.NumCPU()) - 1, + pre: func() { + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1)) + }, + want: false, + }, + { + name: "TC3-cpulimit = 0", + h: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath1, + }, + cpuLimit: 0, + pre: func() { + try.MkdirAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1, "cpu.cfs_quota_us"), + constant.DefaultFileMode) + }, + post: func() { + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1)) + }, + want: false, + }, + { + name: "TC4-cpulimit over max", + h: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath1, + }, + cpuLimit: float64(runtime.NumCPU()) + 1, + pre: func() { + try.MkdirAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1, "cpu.cfs_quota_us"), + constant.DefaultFileMode) + }, + post: func() { + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1)) + }, + want: false, + }, + { + name: "TC5-cpurequest over max", + h: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath1, + }, + cpuLimit: 0, + pre: func() { + try.MkdirAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1, "cpu.cfs_quota_us"), + constant.DefaultFileMode) + }, + post: func() { + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath1)) + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.pre != nil { + tt.pre() + } + assert.Equal(t, isAdjustmentAllowed(tt.h, tt.cpuLimit), tt.want) + if tt.post != nil { + tt.post() + } + }) + } +} + +// TestStatusStore_RemoveCgroup tests RemoveCgroup of StatusStore +func TestStatusStore_RemoveCgroup(t *testing.T) { + const ( + podPath = "kubepods/testPod1" + contPath = "kubepods/testPod1/testCon1" + ) + type fields struct { + Config *Config + cpuQuotas map[string]*CPUQuota + } + type args struct { + cgroupPath string + } + tests := []struct { + name string + fields fields + args args + wantErr bool + pre func() + post func(t *testing.T, d *StatusStore) + }{ + { + name: "TC1-empty cgroupPath", + args: args{ + cgroupPath: "", + }, + fields: fields{ + cpuQuotas: make(map[string]*CPUQuota), + }, + wantErr: false, + }, + { + name: "TC2-cgroupPath is not existed", + args: args{ + cgroupPath: contPath, + }, + fields: fields{ + cpuQuotas: map[string]*CPUQuota{ + contPath: { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath, + }, + }, + }, + }, + wantErr: false, + }, + { + name: "TC3-cgroupPath existed but can not set", + args: args{ + cgroupPath: contPath, + }, + fields: fields{ + Config: &Config{ + CgroupRoot: constant.TmpTestDir, + }, + cpuQuotas: map[string]*CPUQuota{ + contPath: { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath, + }, + curQuota: 100000, + nextQuota: 200000, + }, + }, + }, + pre: func() { + try.MkdirAll(filepath.Join(constant.TmpTestDir, "cpu", contPath), constant.DefaultDirMode) + }, + post: func(t *testing.T, d *StatusStore) { + try.RemoveAll(filepath.Join(constant.TmpTestDir, "cpu", contPath)) + }, + wantErr: true, + }, + { + name: "TC4-remove cgroupPath successfully", + args: args{ + cgroupPath: contPath, + }, + fields: fields{ + cpuQuotas: map[string]*CPUQuota{ + contPath: { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath, + }, + cpuLimit: 2, + period: 100000, + curQuota: 250000, + nextQuota: 240000, + }, + }, + }, + pre: func() { + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, "cpu.cfs_quota_us"), "250000") + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", podPath, "cpu.cfs_quota_us"), "-1") + }, + post: func(t *testing.T, d *StatusStore) { + val := strings.TrimSpace(try.ReadFile( + filepath.Join(constant.TmpTestDir, "cpu", contPath, "cpu.cfs_quota_us")).String()) + assert.Equal(t, "200000", val) + val = strings.TrimSpace(try.ReadFile( + filepath.Join(constant.TmpTestDir, "cpu", podPath, "cpu.cfs_quota_us")).String()) + assert.Equal(t, "-1", val) + assert.Equal(t, 0, len(d.cpuQuotas)) + try.RemoveAll(constant.TmpTestDir) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := &StatusStore{ + Config: tt.fields.Config, + cpuQuotas: tt.fields.cpuQuotas, + } + if tt.pre != nil { + tt.pre() + } + if err := d.RemoveCgroup(tt.args.cgroupPath); (err != nil) != tt.wantErr { + t.Errorf("StatusStore.RemoveCgroup() error = %v, wantErr %v", err, tt.wantErr) + } + if tt.post != nil { + tt.post(t, d) + } + }) + } +} + +// TestStatusStore_AddCgroup tests AddCgroup of StatusStore +func TestStatusStore_AddCgroup(t *testing.T) { + const ( + contPath = "kubepods/testPod1/testCon1" + cpuPeriodFile = "cpu.cfs_period_us" + cpuQuotaFile = "cpu.cfs_quota_us" + cpuUsageFile = "cpuacct.usage" + cpuStatFile = "cpu.stat" + stat = `nr_periods 1 + nr_throttled 1 + throttled_time 1 + ` + quota = "200000" + period = "100000" + usage = "1234567" + ) + type fields struct { + Config *Config + cpuQuotas map[string]*CPUQuota + } + type args struct { + cgroupPath string + cpuLimit float64 + } + tests := []struct { + name string + fields fields + args args + wantErr bool + pre func(t *testing.T, d *StatusStore) + post func(t *testing.T, d *StatusStore) + }{ + { + name: "TC1-empty cgroup path", + args: args{ + cgroupPath: "", + }, + fields: fields{ + cpuQuotas: make(map[string]*CPUQuota), + }, + wantErr: true, + }, + { + name: "TC2-empty cgroup mount point", + args: args{ + cgroupPath: contPath, + }, + fields: fields{ + Config: &Config{ + CgroupRoot: "", + }, + }, + wantErr: true, + }, + { + name: "TC3-cgroup not allow to adjust", + args: args{ + cgroupPath: contPath, + cpuLimit: 3, + }, + fields: fields{ + Config: &Config{ + CgroupRoot: constant.TmpTestDir, + }, + }, + wantErr: true, + }, + { + name: "TC4-failed to create CPUQuota", + args: args{ + cgroupPath: contPath, + cpuLimit: 3, + }, + fields: fields{ + Config: &Config{ + CgroupRoot: constant.TmpTestDir, + }, + cpuQuotas: make(map[string]*CPUQuota), + }, + pre: func(t *testing.T, d *StatusStore) { + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuPeriodFile), period) + }, + post: func(t *testing.T, d *StatusStore) { + try.RemoveAll(constant.TmpTestDir) + }, + wantErr: true, + }, + { + name: "TC5-add successfully", + args: args{ + cgroupPath: contPath, + cpuLimit: 2, + }, + fields: fields{ + Config: &Config{ + CgroupRoot: constant.TmpTestDir, + }, + cpuQuotas: make(map[string]*CPUQuota), + }, + pre: func(t *testing.T, d *StatusStore) { + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuPeriodFile), period) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuQuotaFile), quota) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpuacct", contPath, cpuUsageFile), usage) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuStatFile), stat) + }, + post: func(t *testing.T, d *StatusStore) { + assert.Equal(t, 1, len(d.cpuQuotas)) + try.RemoveAll(constant.TmpTestDir) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := &StatusStore{ + Config: tt.fields.Config, + cpuQuotas: tt.fields.cpuQuotas, + } + if tt.pre != nil { + tt.pre(t, d) + } + if err := d.AddCgroup(tt.args.cgroupPath, tt.args.cpuLimit); (err != nil) != tt.wantErr { + t.Errorf("StatusStore.AddCgroup() error = %v, wantErr %v", err, tt.wantErr) + } + if tt.post != nil { + tt.post(t, d) + } + }) + } +} + +// TestStatusStoreGetLastCPUUtil tests getLastCPUUtil of StatusStore +func TestStatusStore_getLastCPUUtil(t *testing.T) { + // 1. empty CPU Utils + d := &StatusStore{} + t.Run("TC1-empty CPU Util", func(t *testing.T) { + util := float64(0.0) + assert.Equal(t, util, d.getLastCPUUtil()) + }) + // 2. CPU Utils + cpuUtil20 := 20 + d = &StatusStore{cpuUtils: []cpuUtil{{ + util: float64(cpuUtil20), + }}} + t.Run("TC2-CPU Util is 20", func(t *testing.T) { + util := float64(20.0) + assert.Equal(t, util, d.getLastCPUUtil()) + }) +} + +// TestQuotaTurboUpdateCPUUtils tests updateCPUUtils of QuotaTurbo and NewProcStat +func TestStatusStore_updateCPUUtils(t *testing.T) { + status := NewStatusStore() + // 1. obtain the cpu usage for the first time + if err := status.updateCPUUtils(); err != nil { + assert.NoError(t, err) + } + num1 := 1 + assert.Equal(t, num1, len(status.cpuUtils)) + // 2. obtain the cpu usage for the second time + if err := status.updateCPUUtils(); err != nil { + assert.NoError(t, err) + } + num2 := 2 + assert.Equal(t, num2, len(status.cpuUtils)) + // 3. obtain the cpu usage after 1 minute + var minuteTimeDelta int64 = 60000000001 + status.cpuUtils[0].timestamp -= minuteTimeDelta + if err := status.updateCPUUtils(); err != nil { + assert.NoError(t, err) + } + assert.Equal(t, num2, len(status.cpuUtils)) +} + +// TestStatusStore_updateCPUQuotas tests updateCPUQuotas of StatusStore +func TestStatusStore_updateCPUQuotas(t *testing.T) { + const ( + contPath = "kubepods/testPod1/testCon1" + cpuPeriodFile = "cpu.cfs_period_us" + cpuQuotaFile = "cpu.cfs_quota_us" + cpuUsageFile = "cpuacct.usage" + cpuStatFile = "cpu.stat" + stat = `nr_periods 1 + nr_throttled 1 + throttled_time 1 + ` + quota = "200000" + period = "100000" + usage = "1234567" + ) + type fields struct { + Config *Config + cpuQuotas map[string]*CPUQuota + cpuUtils []cpuUtil + } + tests := []struct { + name string + fields fields + wantErr bool + pre func() + post func() + }{ + { + name: "TC1-fail to get CPUQuota", + fields: fields{ + Config: &Config{ + CgroupRoot: constant.TmpTestDir, + }, + cpuQuotas: map[string]*CPUQuota{ + contPath: { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath, + }, + }, + }, + cpuUtils: make([]cpuUtil, 0), + }, + wantErr: true, + }, + { + name: "TC2-update successfully", + fields: fields{ + Config: &Config{ + CgroupRoot: constant.TmpTestDir, + }, + cpuQuotas: map[string]*CPUQuota{ + contPath: { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath, + }, + }, + }, + cpuUtils: make([]cpuUtil, 0), + }, + pre: func() { + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuPeriodFile), period) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuQuotaFile), quota) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpuacct", contPath, cpuUsageFile), usage) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuStatFile), stat) + }, + post: func() { + try.RemoveAll(constant.TmpTestDir) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := &StatusStore{ + Config: tt.fields.Config, + cpuQuotas: tt.fields.cpuQuotas, + cpuUtils: tt.fields.cpuUtils, + } + if tt.pre != nil { + tt.pre() + } + if err := d.updateCPUQuotas(); (err != nil) != tt.wantErr { + t.Errorf("StatusStore.update() error = %v, wantErr %v", err, tt.wantErr) + } + if tt.post != nil { + tt.post() + } + }) + } +} + +// TestStatusStore_writeQuota tests writeQuota of StatusStore +func TestStatusStore_writeQuota(t *testing.T) { + const contPath = "kubepods/testPod1/testCon1" + tests := []struct { + name string + cpuQuotas map[string]*CPUQuota + wantErr bool + }{ + { + name: "TC1-empty cgroup path", + cpuQuotas: map[string]*CPUQuota{ + contPath: { + Hierarchy: &cgroup.Hierarchy{ + MountPoint: constant.TmpTestDir, + Path: contPath, + }, + curQuota: 100000, + nextQuota: 200000, + }, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + d := &StatusStore{ + cpuQuotas: tt.cpuQuotas, + } + if err := d.writeQuota(); (err != nil) != tt.wantErr { + t.Errorf("StatusStore.writeQuota() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/pkg/memory/dynlevel.go b/pkg/memory/dynlevel.go deleted file mode 100644 index 3c783506fd9f59cadab2a129ee686e3f0930efdb..0000000000000000000000000000000000000000 --- a/pkg/memory/dynlevel.go +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Yang Feiyu -// Create: 2022-6-7 -// Description: memory setting for pods - -package memory - -import ( - "io/ioutil" - "path/filepath" - "time" - - "k8s.io/apimachinery/pkg/util/wait" - - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" -) - -type memoryInfo struct { - total int64 - free int64 - available int64 -} - -type dynLevel struct { - m *MemoryManager - memInfo memoryInfo - st status -} - -func newDynLevel(m *MemoryManager) (f *dynLevel) { - return &dynLevel{ - st: newStatus(), - m: m, - } -} - -func (f *dynLevel) Run() { - go wait.Until(f.timerProc, time.Duration(f.m.checkInterval)*time.Second, f.m.stop) -} - -// UpdateConfig is used to update memory config -func (f *dynLevel) UpdateConfig(pod *typedef.PodInfo) { - // there is no config need to update -} - -func (f *dynLevel) timerProc() { - f.updateStatus() - log.Logf("memory manager updates status with memory free: %v, memory total: %v", f.memInfo.free, f.memInfo.total) - f.reclaim() - log.Logf("memory manager reclaims done and pressure level is %s", &f.st) -} - -func (f *dynLevel) updateStatus() { - memInfo, err := getMemoryInfo() - if err != nil { - log.Errorf("getMemoryInfo failed with error: %v, it should not happen", err) - return - } - f.memInfo = memInfo - f.st.transitionStatus(float64(memInfo.free) / float64(memInfo.total)) -} - -func (f *dynLevel) limitOfflineContainers(ft fileType) { - containers := f.m.cpm.ListOfflineContainers() - for _, c := range containers { - if err := f.limitContainer(c, ft); err != nil { - log.Errorf("limit memory for container: %v failed, filetype: %v, err: %v", c.ID, ft, err) - } - } -} - -func (f *dynLevel) limitContainer(c *typedef.ContainerInfo, ft fileType) error { - path := c.CgroupPath("memory") - limit, err := readMemoryFile(filepath.Join(path, memoryUsageFile)) - if err != nil { - return err - } - - for i := 0; i < maxRetry; i++ { - limit += int64(float64(f.memInfo.free) * extraFreePercentage) - if err = writeMemoryLimit(path, typedef.FormatInt64(limit), ft); err == nil { - break - } - log.Errorf("failed to write memory limit from path: %v, will retry now, retry num: %v", path, i) - } - - return err -} - -// dropCaches will echo 3 > /proc/sys/vm/drop_caches -func (f *dynLevel) dropCaches() { - var err error - for i := 0; i < maxRetry; i++ { - if err = ioutil.WriteFile(dropCachesFilePath, []byte("3"), constant.DefaultFileMode); err == nil { - log.Logf("drop caches success") - return - } - log.Errorf("drop caches failed, error: %v, will retry later, retry num: %v", err, i) - } -} - -func (f *dynLevel) forceEmptyOfflineContainers() { - containers := f.m.cpm.ListOfflineContainers() - for _, c := range containers { - if err := writeForceEmpty(c.CgroupPath("memory")); err != nil { - log.Errorf("force empty for container: %v failed, err: %v", c.ID, err) - } - - } -} - -func (f *dynLevel) reclaimInPressure() { - switch f.st.pressureLevel { - case low: - // do soft limit - f.limitOfflineContainers(msoftLimit) - case mid: - f.forceEmptyOfflineContainers() - case high: - // do hard limit - f.limitOfflineContainers(mlimit) - case critical: - // drop caches and do hard limit - f.dropCaches() - f.limitOfflineContainers(mlimit) - } -} - -func (f *dynLevel) reclaimInRelieve() { - f.st.relieveCnt++ - containers := f.m.cpm.ListOfflineContainers() - for _, c := range containers { - recoverContainerMemoryLimit(c, f.st.relieveCnt == relieveMaxCnt) - } -} - -func (f *dynLevel) reclaim() { - if f.st.isNormal() { - return - } - - if f.st.isRelieve() { - f.reclaimInRelieve() - return - } - - f.reclaimInPressure() -} - -func writeForceEmpty(cgroupPath string) error { - var err error - for i := 0; i < maxRetry; i++ { - if err = writeMemoryFile(cgroupPath, memoryForceEmptyFile, "0"); err == nil { - log.Logf("force cgroup memory %v empty success", cgroupPath) - return nil - } - log.Errorf("force clean memory failed for %s: %v, will retry later, retry num: %v", cgroupPath, err, i) - } - - return err -} - -func recoverContainerMemoryLimit(c *typedef.ContainerInfo, reachMax bool) { - // ratio 0.1 means, newLimit = oldLimit * 1.1 - const ratio = 0.1 - var memLimit int64 - path := c.CgroupPath("memory") - if reachMax { - memLimit = maxSysMemLimit - if err := writeMemoryLimit(path, typedef.FormatInt64(memLimit), mlimit); err != nil { - log.Errorf("failed to write memory limit from path:%v container:%v", path, c.ID) - } - - if err := writeMemoryLimit(path, typedef.FormatInt64(memLimit), msoftLimit); err != nil { - log.Errorf("failed to write memory soft limit from path:%v container:%v", path, c.ID) - } - return - } - - memLimit, err := readMemoryFile(filepath.Join(path, memoryLimitFile)) - if err != nil { - log.Errorf("failed to read from path:%v container:%v", path, c.ID) - return - } - - memLimit = int64(float64(memLimit) * (1 + ratio)) - if memLimit < 0 { - // it means the limit value has reached max, just return - return - } - - if err := writeMemoryLimit(path, typedef.FormatInt64(memLimit), mlimit); err != nil { - log.Errorf("failed to write memory limit from path:%v container:%v", path, c.ID) - } -} diff --git a/pkg/memory/fssr.go b/pkg/memory/fssr.go deleted file mode 100644 index b024d34a14b7b9191c57c7101303fc51bc404c72..0000000000000000000000000000000000000000 --- a/pkg/memory/fssr.go +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: hanchao -// Create: 2022-9-2 -// Description: -// 1. When Rubik starts, all offline memory.high is configured to 80% of total memory by default. -// 2. When memory pressure increases: Available memory freeMemory < reservedMemory(totalMemory * 5%). -// newly memory.high=memory.high-totalMemory * 10%. -// 3. When memory is rich over a period of time: freeMemory > 3 * reservedMemory, In this case, 1% of -// the totalMemory is reserved for offline applications. High=memory.high+totalMemory * 1% until memory -// free is between reservedMemory and 3 * reservedMemory. - -// Package memory provide memory reclaim strategy for offline tasks. -package memory - -import ( - "time" - - "k8s.io/apimachinery/pkg/util/wait" - - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" -) - -type fssrStatus int - -const ( - reservePercentage = 0.05 - waterlinePercentage = 0.8 - relievePercentage = 0.02 - reclaimPercentage = 0.1 - prerelieveInterval = "30m" - reserveRatio = 3 - highAsyncRatio = 90 -) - -const ( - fssrNormal fssrStatus = iota - fssrReclaim - fssrPreRelieve - fssrRelieve -) - -type fssr struct { - mmgr *MemoryManager - preRelieveStartDate time.Time - st fssrStatus - total int64 - limit int64 - reservedMemory int64 - highAsyncRatio int64 -} - -func newFssr(m *MemoryManager) (f *fssr) { - f = new(fssr) - f.init(m) - return f -} - -func (f *fssr) init(m *MemoryManager) { - memInfo, err := getMemoryInfo() - if err != nil { - log.Infof("initialization of fssr failed") - return - } - - f.mmgr = m - f.total = memInfo.total - f.reservedMemory = int64(reservePercentage * float64(f.total)) - f.limit = int64(waterlinePercentage * float64(f.total)) - f.st = fssrNormal - f.highAsyncRatio = highAsyncRatio - f.initOfflineContainerLimit() - - log.Infof("total: %v, reserved Memory: %v, limit memory: %v", f.total, f.reservedMemory, f.limit) -} - -func (f *fssr) Run() { - go wait.Until(f.timerProc, time.Duration(f.mmgr.checkInterval)*time.Second, f.mmgr.stop) -} - -// UpdateConfig is used to update memory config -func (f *fssr) UpdateConfig(pod *typedef.PodInfo) { - for _, c := range pod.Containers { - f.initContainerMemoryLimit(c) - } -} - -func (f *fssr) timerProc() { - f.updateStatus() - if f.needAdjust() { - newLimit := f.calculateNewLimit() - f.adjustOfflineContainerMemory(newLimit) - } -} - -func (f *fssr) initOfflineContainerLimit() { - if f.mmgr.cpm == nil { - log.Infof("init offline container limit failed, cpm is nil") - return - } - - containers := f.mmgr.cpm.ListOfflineContainers() - for _, c := range containers { - f.initContainerMemoryLimit(c) - } -} - -func (f *fssr) needAdjust() bool { - if f.st == fssrReclaim || f.st == fssrRelieve { - return true - } - return false -} - -func (f *fssr) updateStatus() { - curMemInfo, err := getMemoryInfo() - if err != nil { - log.Errorf("get memory info failed, err:%v", err) - return - } - oldStatus := f.st - - // Use free instead of Available - if curMemInfo.free < f.reservedMemory { - f.st = fssrReclaim - } else if curMemInfo.free > reserveRatio*f.reservedMemory { - switch f.st { - case fssrNormal: - f.st = fssrPreRelieve - f.preRelieveStartDate = time.Now() - case fssrPreRelieve, fssrRelieve: - t, _ := time.ParseDuration(prerelieveInterval) - if f.preRelieveStartDate.Add(t).Before(time.Now()) { - f.st = fssrRelieve - } - case fssrReclaim: - f.st = fssrNormal - default: - log.Errorf("status incorrect, this should not happen") - } - } - - log.Infof("update change status from %v to %v, cur available %v, cur free %v", - oldStatus, f.st, curMemInfo.available, curMemInfo.free) -} - -func (f *fssr) calculateNewLimit() int64 { - newLimit := f.limit - if f.st == fssrReclaim { - newLimit = f.limit - int64(reclaimPercentage*float64(f.total)) - if newLimit < 0 || newLimit <= f.reservedMemory { - newLimit = f.reservedMemory - log.Infof("reclaim offline containers current limit %v is too small, set as reserved memory %v", newLimit, f.reservedMemory) - } - } else if f.st == fssrRelieve { - newLimit = f.limit + int64(relievePercentage*float64(f.total)) - if newLimit > int64(waterlinePercentage*float64(f.total)) { - newLimit = int64(waterlinePercentage * float64(f.total)) - log.Infof("relieve offline containers limit soft memory exceeds waterline, set limit as waterline %v", waterlinePercentage*float64(f.total)) - } - } - return newLimit -} - -func (f *fssr) initContainerMemoryLimit(c *typedef.ContainerInfo) { - path := c.CgroupPath("memory") - if err := writeMemoryLimit(path, typedef.FormatInt64(f.limit), mhigh); err != nil { - log.Errorf("failed to initialize the limit soft memory of offline container %v: %v", c.ID, err) - } else { - log.Infof("initialize the limit soft memory of the offline container %v to %v successfully", c.ID, f.limit) - } - - if err := writeMemoryLimit(path, typedef.FormatInt64(f.highAsyncRatio), mhighAsyncRatio); err != nil { - log.Errorf("failed to initialize the async high ration of offline container %v: %v", c.ID, err) - } else { - log.Infof("initialize the async high ration of the offline container %v:%v success", c.ID, f.highAsyncRatio) - } -} - -func (f *fssr) adjustOfflineContainerMemory(limit int64) { - if f.mmgr.cpm == nil { - log.Infof("reclaim offline containers failed, cpm is nil") - return - } - - containers := f.mmgr.cpm.ListOfflineContainers() - for _, c := range containers { - path := c.CgroupPath("memory") - if err := writeMemoryLimit(path, typedef.FormatInt64(limit), mhigh); err != nil { - log.Errorf("relieve offline containers limit soft memory %v failed, err is %v", c.ID, err) - } else { - f.limit = limit - } - } -} diff --git a/pkg/memory/memory.go b/pkg/memory/memory.go deleted file mode 100644 index f25bd34cdb78eb33a452b50eacb2f616b19f7453..0000000000000000000000000000000000000000 --- a/pkg/memory/memory.go +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Song Yanting -// Create: 2022-06-10 -// Description: memory setting for pods - -package memory - -import ( - "bufio" - "bytes" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/pkg/errors" - - "isula.org/rubik/pkg/checkpoint" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" -) - -const ( - mlimit fileType = iota - msoftLimit - mhigh - mhighAsyncRatio -) - -const ( - dropCachesFilePath = "/proc/sys/vm/drop_caches" - memoryLimitFile = "memory.limit_in_bytes" - memorySoftLimitFile = "memory.soft_limit_in_bytes" - memoryHighFile = "memory.high" - memoryHighAsyncRatioFile = "memory.high_async_ratio" - memoryUsageFile = "memory.usage_in_bytes" - memoryForceEmptyFile = "memory.force_empty" - // maxSysMemLimit 9223372036854771712 is the default cgroup memory limit value - maxSysMemLimit = 9223372036854771712 - maxRetry = 3 - relieveMaxCnt = 5 - extraFreePercentage = 0.02 -) - -type fileType int - -type memDriver interface { - Run() - UpdateConfig(pod *typedef.PodInfo) -} - -// MemoryManager manages memory reclaim works. -type MemoryManager struct { - cpm *checkpoint.Manager - md memDriver - checkInterval int - stop chan struct{} -} - -// NewMemoryManager creates a new memory manager -func NewMemoryManager(cpm *checkpoint.Manager, memConfig config.MemoryConfig) (*MemoryManager, error) { - interval := memConfig.CheckInterval - if err := validateInterval(interval); err != nil { - return nil, err - } - log.Logf("new memory manager with interval:%d", interval) - mm := MemoryManager{ - cpm: cpm, - checkInterval: interval, - stop: config.ShutdownChan, - } - switch memConfig.Strategy { - case "fssr": - mm.md = newFssr(&mm) - case "dynlevel": - mm.md = newDynLevel(&mm) - case "none": - log.Infof("strategy is set to none") - return nil, nil - default: - return nil, errors.Errorf("unsupported memStrategy, expect dynlevel|fssr|none") - } - return &mm, nil -} - -func validateInterval(interval int) error { - if interval > 0 && interval <= constant.DefaultMaxMemCheckInterval { - return nil - } - return errors.Errorf("check interval should between 0 and %v", constant.DefaultMemCheckInterval) -} - -// Run wait every interval and execute run -func (m *MemoryManager) Run() { - m.md.Run() -} - -// UpdateConfig is used to update memory config -func (m *MemoryManager) UpdateConfig(pod *typedef.PodInfo) { - m.md.UpdateConfig(pod) -} - -func writeMemoryLimit(cgroupPath string, value string, ft fileType) error { - var filename string - switch ft { - case mlimit: - filename = memoryLimitFile - case msoftLimit: - filename = memorySoftLimitFile - case mhigh: - filename = memoryHighFile - case mhighAsyncRatio: - filename = memoryHighAsyncRatioFile - default: - return errors.Errorf("unsupported file type %v", ft) - } - - if err := writeMemoryFile(cgroupPath, filename, value); err != nil { - return errors.Errorf("set memory file:%s/%s=%s failed, err:%v", cgroupPath, filename, value, err) - } - - return nil -} - -func writeMemoryFile(cgroupPath, filename, value string) error { - cgFilePath, err := securejoin.SecureJoin(cgroupPath, filename) - if err != nil { - return errors.Errorf("join path failed for %s and %s: %v", cgroupPath, filename, err) - } - - return ioutil.WriteFile(cgFilePath, []byte(value), constant.DefaultFileMode) -} - -func readMemoryFile(path string) (int64, error) { - const ( - base, width = 10, 64 - ) - content, err := ioutil.ReadFile(filepath.Clean(path)) - if err != nil { - return 0, err - } - - memBytes := strings.Split(string(content), "\n")[0] - return strconv.ParseInt(memBytes, base, width) -} - -// getMemoryInfo returns memory info -func getMemoryInfo() (memoryInfo, error) { - var m memoryInfo - var total, free, available int64 - const memInfoFile = "/proc/meminfo" - - f, err := os.Open(memInfoFile) - if err != nil { - return m, err - } - - defer f.Close() - - // MemTotal: 15896176 kB - // MemFree: 3811032 kB - scan := bufio.NewScanner(f) - for scan.Scan() { - if bytes.HasPrefix(scan.Bytes(), []byte("MemTotal:")) { - if _, err := fmt.Sscanf(scan.Text(), "MemTotal:%d", &total); err != nil { - return m, err - } - } - - if bytes.HasPrefix(scan.Bytes(), []byte("MemFree:")) { - if _, err := fmt.Sscanf(scan.Text(), "MemFree:%d", &free); err != nil { - return m, err - } - } - - if bytes.HasPrefix(scan.Bytes(), []byte("MemAvailable:")) { - if _, err := fmt.Sscanf(scan.Text(), "MemAvailable:%d", &available); err != nil { - return m, err - } - } - } - - if total == 0 || free == 0 || available == 0 { - return m, errors.Errorf("Memory value should be larger than 0, MemTotal:%d, MemFree:%d, MemAvailable:%d", total, free, available) - } - - m.free = free * 1024 - m.total = total * 1024 - m.available = available * 1024 - - return m, nil -} diff --git a/pkg/memory/status.go b/pkg/memory/status.go deleted file mode 100644 index c3be203d7932ba0382f5599d81b747a293bd0153..0000000000000000000000000000000000000000 --- a/pkg/memory/status.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Yang Feiyu -// Create: 2022-6-7 -// Description: memory status functions - -package memory - -import log "isula.org/rubik/pkg/tinylog" - -const ( - // lowPressure means free / total < 30% - lowPressure = 0.3 - midPressure = 0.15 - highPressure = 0.1 - criticalPressure = 0.05 -) - -type levelInt int - -const ( - normal levelInt = iota - relieve - low - mid - high - critical -) - -type status struct { - pressureLevel levelInt - relieveCnt int -} - -func newStatus() status { - return status{ - pressureLevel: normal, - } -} - -func (s *status) set(pressureLevel levelInt) { - s.pressureLevel = pressureLevel - s.relieveCnt = 0 -} - -func (s *status) isNormal() bool { - return s.pressureLevel == normal -} - -func (s *status) isRelieve() bool { - return s.pressureLevel == relieve -} - -func (s *status) transitionStatus(freePercentage float64) { - if freePercentage > lowPressure { - switch s.pressureLevel { - case normal: - case low, mid, high, critical: - log.Logf("change status from pressure to relieve") - s.set(relieve) - case relieve: - if s.relieveCnt == relieveMaxCnt { - s.set(normal) - log.Logf("change status from relieve to normal") - } - } - return - } - s.pressureLevel = getLevelInPressure(freePercentage) -} - -func (s *status) String() string { - switch s.pressureLevel { - case normal: - return "normal" - case relieve: - return "relieve" - case low: - return "low" - case mid: - return "mid" - case high: - return "high" - case critical: - return "critical" - default: - return "unknown" - } -} - -func getLevelInPressure(freePercentage float64) levelInt { - var pressureLevel levelInt - if freePercentage <= criticalPressure { - pressureLevel = critical - } else if freePercentage <= highPressure { - pressureLevel = high - } else if freePercentage <= midPressure { - pressureLevel = mid - } else { - pressureLevel = low - } - return pressureLevel -} diff --git a/pkg/podmanager/podcache.go b/pkg/podmanager/podcache.go new file mode 100644 index 0000000000000000000000000000000000000000..9e021c4e0585a0cefab6d81f6e21d36484348320 --- /dev/null +++ b/pkg/podmanager/podcache.go @@ -0,0 +1,116 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-12 +// Description: This file defines pod cache storing pod information + +// Package podmanager implements cache connecting informer and module manager +package podmanager + +import ( + "sync" + + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/typedef" +) + +// PodCache is used to store PodInfo +type PodCache struct { + sync.RWMutex + Pods map[string]*typedef.PodInfo +} + +// NewPodCache returns a PodCache object (pointer) +func NewPodCache() *PodCache { + return &PodCache{ + Pods: make(map[string]*typedef.PodInfo, 0), + } +} + +// getPod returns the deepcopy object of pod +func (cache *PodCache) getPod(podID string) *typedef.PodInfo { + cache.RLock() + defer cache.RUnlock() + return cache.Pods[podID].DeepCopy() +} + +// podExist returns true if there is a pod whose key is podID in the pods +func (cache *PodCache) podExist(podID string) bool { + cache.RLock() + _, ok := cache.Pods[podID] + cache.RUnlock() + return ok +} + +// addPod adds pod information +func (cache *PodCache) addPod(pod *typedef.PodInfo) { + if pod == nil || pod.UID == "" { + return + } + if ok := cache.podExist(pod.UID); ok { + log.Debugf("pod %v is existed", string(pod.UID)) + return + } + cache.Lock() + cache.Pods[pod.UID] = pod + cache.Unlock() + log.Debugf("add pod %v", string(pod.UID)) +} + +// delPod deletes pod information +func (cache *PodCache) delPod(podID string) { + if ok := cache.podExist(podID); !ok { + log.Debugf("pod %v is not existed", string(podID)) + return + } + cache.Lock() + delete(cache.Pods, podID) + cache.Unlock() + log.Debugf("delete pod %v", podID) +} + +// updatePod updates pod information +func (cache *PodCache) updatePod(pod *typedef.PodInfo) { + if pod == nil || pod.UID == "" { + return + } + cache.Lock() + cache.Pods[pod.UID] = pod + cache.Unlock() + log.Debugf("update pod %v", pod.UID) +} + +// substitute replaces all the data in the cache +func (cache *PodCache) substitute(pods []*typedef.PodInfo) { + cache.Lock() + defer cache.Unlock() + cache.Pods = make(map[string]*typedef.PodInfo, 0) + if len(pods) == 0 { + return + } + for _, pod := range pods { + if pod == nil || pod.UID == "" { + continue + } + cache.Pods[pod.UID] = pod + log.Debugf("substituting pod %v", pod.UID) + } +} + +// listPod returns the deepcopy object of all pod +func (cache *PodCache) listPod() map[string]*typedef.PodInfo { + res := make(map[string]*typedef.PodInfo, len(cache.Pods)) + cache.RLock() + for id, pi := range cache.Pods { + res[id] = pi.DeepCopy() + } + cache.RUnlock() + return res +} diff --git a/pkg/podmanager/podmanager.go b/pkg/podmanager/podmanager.go new file mode 100644 index 0000000000000000000000000000000000000000..8e3eef2f7cb619e0da2b86de98407bbe545623af --- /dev/null +++ b/pkg/podmanager/podmanager.go @@ -0,0 +1,262 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-12 +// Description: This file defines PodManager passing and processing raw pod data + +// Package podmanager implements manager connecting informer and module manager +package podmanager + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/subscriber" + "isula.org/rubik/pkg/core/typedef" +) + +// PodManagerName is the unique identity of PodManager +const PodManagerName = "DefaultPodManager" + +// PodManager manages pod cache and pushes cache change events based on external input +type PodManager struct { + api.Subscriber + api.Publisher + Pods *PodCache +} + +// NewPodManager returns a PodManager pointer +func NewPodManager(publisher api.Publisher) *PodManager { + manager := &PodManager{ + Pods: NewPodCache(), + Publisher: publisher, + } + manager.Subscriber = subscriber.NewGenericSubscriber(manager, PodManagerName) + return manager +} + +// HandleEvent handles the event from publisher +func (manager *PodManager) HandleEvent(eventType typedef.EventType, event typedef.Event) { + switch eventType { + case typedef.RAWPODADD, typedef.RAWPODUPDATE, typedef.RAWPODDELETE: + manager.handleWatchEvent(eventType, event) + case typedef.RAWPODSYNCALL: + manager.handleListEvent(eventType, event) + default: + log.Infof("fail to process %s type event", eventType.String()) + } +} + +// handleWatchEvent handles the watch event +func (manager *PodManager) handleWatchEvent(eventType typedef.EventType, event typedef.Event) { + pod, err := eventToRawPod(event) + if err != nil { + log.Warnf(err.Error()) + return + } + + switch eventType { + case typedef.RAWPODADD: + manager.addFunc(pod) + case typedef.RAWPODUPDATE: + manager.updateFunc(pod) + case typedef.RAWPODDELETE: + manager.deleteFunc(pod) + default: + log.Errorf("code problem, should not go here...") + } +} + +// handleListEvent handles the list event +func (manager *PodManager) handleListEvent(eventType typedef.EventType, event typedef.Event) { + pods, err := eventToRawPods(event) + if err != nil { + log.Errorf(err.Error()) + return + } + switch eventType { + case typedef.RAWPODSYNCALL: + manager.sync(pods) + default: + log.Errorf("code problem, should not go here...") + } +} + +// EventTypes returns the intersted event types +func (manager *PodManager) EventTypes() []typedef.EventType { + return []typedef.EventType{typedef.RAWPODADD, + typedef.RAWPODUPDATE, + typedef.RAWPODDELETE, + typedef.RAWPODSYNCALL, + } +} + +// eventToRawPod converts the event interface to RawPod pointer +func eventToRawPod(e typedef.Event) (*typedef.RawPod, error) { + pod, ok := e.(*corev1.Pod) + if !ok { + return nil, fmt.Errorf("fail to get *typedef.RawPod which type is %T", e) + } + rawPod := typedef.RawPod(*pod) + return &rawPod, nil +} + +// eventToRawPods converts the event interface to RawPod pointer slice +func eventToRawPods(e typedef.Event) ([]*typedef.RawPod, error) { + pods, ok := e.([]corev1.Pod) + if !ok { + return nil, fmt.Errorf("fail to get *typedef.RawPod which type is %T", e) + } + toRawPodPointer := func(pod corev1.Pod) *typedef.RawPod { + tmp := typedef.RawPod(pod) + return &tmp + } + var pointerPods []*typedef.RawPod + for _, pod := range pods { + pointerPods = append(pointerPods, toRawPodPointer(pod)) + } + return pointerPods, nil +} + +// addFunc handles the pod add event +func (manager *PodManager) addFunc(pod *typedef.RawPod) { + // condition 1: only add running pod + if !pod.Running() { + log.Debugf("pod %v is not running", pod.UID) + return + } + // condition2: pod is not existed + if manager.Pods.podExist(pod.ID()) { + log.Debugf("pod %v has added", pod.UID) + return + } + // step1: get pod information + podInfo := pod.ExtractPodInfo() + if podInfo == nil { + log.Errorf("fail to strip info from raw pod") + return + } + // step2. add pod information + manager.tryAdd(podInfo) +} + +// updateFunc handles the pod update event +func (manager *PodManager) updateFunc(pod *typedef.RawPod) { + // step1: delete existed but not running pod + if !pod.Running() { + manager.tryDelete(pod.ID()) + return + } + + // add or update information for running pod + podInfo := pod.ExtractPodInfo() + if podInfo == nil { + log.Errorf("fail to strip info from raw pod") + return + } + // The calling order must be updated first and then added + // step2: process exsited and running pod + manager.tryUpdate(podInfo) + // step3: process not exsited and running pod + manager.tryAdd(podInfo) +} + +// deleteFunc handles the pod delete event +func (manager *PodManager) deleteFunc(pod *typedef.RawPod) { + manager.tryDelete(pod.ID()) +} + +// tryAdd tries to add pod info which is not added +func (manager *PodManager) tryAdd(podInfo *typedef.PodInfo) { + // only add when pod is not existed + if !manager.Pods.podExist(podInfo.UID) { + manager.Pods.addPod(podInfo) + manager.Publish(typedef.INFOADD, podInfo.DeepCopy()) + } +} + +// tryUpdate tries to update podinfo which is existed +func (manager *PodManager) tryUpdate(podInfo *typedef.PodInfo) { + // only update when pod is existed + if manager.Pods.podExist(podInfo.UID) { + oldPod := manager.Pods.getPod(podInfo.UID) + manager.Pods.updatePod(podInfo) + manager.Publish(typedef.INFOUPDATE, []*typedef.PodInfo{oldPod, podInfo.DeepCopy()}) + } +} + +// tryDelete tries to delete podinfo which is existed +func (manager *PodManager) tryDelete(id string) { + // only delete when pod is existed + oldPod := manager.Pods.getPod(id) + if oldPod != nil { + manager.Pods.delPod(id) + manager.Publish(typedef.INFODELETE, oldPod) + } +} + +// sync replaces all Pod information sent over +func (manager *PodManager) sync(pods []*typedef.RawPod) { + var newPods []*typedef.PodInfo + for _, pod := range pods { + if pod == nil || !pod.Running() { + continue + } + newPods = append(newPods, pod.ExtractPodInfo()) + } + manager.Pods.substitute(newPods) +} + +// ListOfflinePods returns offline pods +func (manager *PodManager) ListOfflinePods() ([]*typedef.PodInfo, error) { + return nil, nil +} + +// ListOnlinePods returns online pods +func (manager *PodManager) ListOnlinePods() ([]*typedef.PodInfo, error) { + return nil, nil +} + +func withOption(pi *typedef.PodInfo, opts []api.ListOption) bool { + for _, opt := range opts { + if !opt(pi) { + return false + } + } + return true +} + +// ListContainersWithOptions filters and returns deep copy objects of all containers +func (manager *PodManager) ListContainersWithOptions(options ...api.ListOption) map[string]*typedef.ContainerInfo { + conts := make(map[string]*typedef.ContainerInfo) + for _, pod := range manager.ListPodsWithOptions(options...) { + for _, ci := range pod.IDContainersMap { + conts[ci.ID] = ci + } + } + return conts +} + +// ListPodsWithOptions filters and returns deep copy objects of all pods +func (manager *PodManager) ListPodsWithOptions(options ...api.ListOption) map[string]*typedef.PodInfo { + // already deep copied + allPods := manager.Pods.listPod() + pods := make(map[string]*typedef.PodInfo, len(allPods)) + for _, pod := range allPods { + if !withOption(pod, options) { + continue + } + pods[pod.UID] = pod + } + return pods +} diff --git a/pkg/podmanager/podmanager_test.go b/pkg/podmanager/podmanager_test.go new file mode 100644 index 0000000000000000000000000000000000000000..85e4ffa064eee99f9f5f4c25539930e58500b611 --- /dev/null +++ b/pkg/podmanager/podmanager_test.go @@ -0,0 +1,100 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-17 +// Description: This file is used for testing podmanager + +package podmanager + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef" +) + +func TestPodManager_ListContainersWithOptions(t *testing.T) { + var ( + cont1 = &typedef.ContainerInfo{ + ID: "testCon1", + } + cont2 = &typedef.ContainerInfo{ + ID: "testCon2", + } + cont3 = &typedef.ContainerInfo{ + ID: "testCon3", + } + ) + + type fields struct { + pods *PodCache + } + type args struct { + options []api.ListOption + } + tests := []struct { + name string + fields fields + args args + want map[string]*typedef.ContainerInfo + }{ + // TODO: Add test cases. + { + name: "TC1-filter priority container", + args: args{ + []api.ListOption{ + func(pi *typedef.PodInfo) bool { + return pi.Annotations[constant.PriorityAnnotationKey] == "true" + }, + }, + }, + fields: fields{ + pods: &PodCache{ + Pods: map[string]*typedef.PodInfo{ + "testPod1": { + UID: "testPod1", + IDContainersMap: map[string]*typedef.ContainerInfo{ + cont1.ID: cont1, + cont2.ID: cont2, + }, + Annotations: map[string]string{ + constant.PriorityAnnotationKey: "true", + }, + }, + "testPod2": { + IDContainersMap: map[string]*typedef.ContainerInfo{ + cont3.ID: cont3, + }, + }, + }, + }, + }, + want: map[string]*typedef.ContainerInfo{ + cont1.ID: cont1, + cont2.ID: cont2, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + manager := &PodManager{ + Pods: tt.fields.pods, + } + if got := manager.ListContainersWithOptions(tt.args.options...); !reflect.DeepEqual(got, tt.want) { + assert.Equal(t, tt.want, got) + t.Errorf("PodManager.ListContainersWithOptions() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/qos/qos.go b/pkg/qos/qos.go deleted file mode 100644 index 35aaed5e222ad0968fe0e75b86c22c99f3bf049a..0000000000000000000000000000000000000000 --- a/pkg/qos/qos.go +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Xiang Li -// Create: 2021-04-17 -// Description: QoS setting for pods - -package qos - -import ( - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/pkg/errors" - - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" - "isula.org/rubik/pkg/util" -) - -// SupportCgroupTypes are supported cgroup types for qos setting -var SupportCgroupTypes = []string{"cpu", "memory"} - -// SetQosLevel set pod qos_level -func SetQosLevel(pod *typedef.PodInfo) error { - if err := setQos(pod); err != nil { - return errors.Errorf("set qos for pod %s(%s) error: %v", pod.Name, pod.UID, err) - } - if err := validateQos(pod); err != nil { - return errors.Errorf("validate qos for pod %s(%s) error: %v", pod.Name, pod.UID, err) - } - - log.Logf("Set pod %s(UID=%s, offline=%v) qos level OK", pod.Name, pod.UID, pod.Offline) - return nil -} - -func UpdateQosLevel(pod *typedef.PodInfo) error { - if err := validateQos(pod); err != nil { - log.Logf("Checking pod %s(%s) value failed: %v, reset it", err, pod.Name, pod.UID) - if err := setQos(pod); err != nil { - return errors.Errorf("set qos for pod %s(%s) error: %v", pod.Name, pod.UID, err) - } - } - - return nil -} - -// setQos is used for setting pod's qos level following it's cgroup path -func setQos(pod *typedef.PodInfo) error { - if len(pod.UID) > constant.MaxPodIDLen { - return errors.Errorf("Pod id too long") - } - - // default qos_level is online, no need to set online pod qos_level - if !pod.Offline { - log.Logf("Set level=%v for pod %s(%s)", constant.MaxLevel, pod.Name, pod.UID) - return nil - } - log.Logf("Set level=%v for pod %s(%s)", constant.MinLevel, pod.Name, pod.UID) - - cgroupMap, err := initCgroupPath(pod.CgroupRoot, pod.CgroupPath) - if err != nil { - return err - } - - for kind, cgPath := range cgroupMap { - switch kind { - case "cpu": - if err := setQosLevel(cgPath, constant.CPUCgroupFileName, int(constant.MinLevel)); err != nil { - return err - } - case "memory": - if err := setQosLevel(cgPath, constant.MemoryCgroupFileName, int(constant.MinLevel)); err != nil { - return err - } - } - } - - return nil -} - -func setQosLevel(root, file string, target int) error { - if !util.IsDirectory(root) { - return errors.Errorf("Invalid cgroup path %q", root) - } - if old, err := getQosLevel(root, file); err == nil && target > old { - return errors.Errorf("Not support change qos level from low to high") - } - // walk through all sub paths - if err := filepath.Walk(root, func(path string, f os.FileInfo, err error) error { - if f != nil && f.IsDir() { - cgFilePath, err := securejoin.SecureJoin(path, file) - if err != nil { - return errors.Errorf("Join path failed for %s and %s: %v", path, file, err) - } - if err = ioutil.WriteFile(cgFilePath, []byte(strconv.Itoa(target)), - constant.DefaultFileMode); err != nil { - return errors.Errorf("Setting qos level failed for %s=%d: %v", cgFilePath, target, err) - } - } - - return nil - }); err != nil { - return err - } - - return nil -} - -// validateQos is used for checking pod's qos level if equal to the value it should be set up to -func validateQos(pod *typedef.PodInfo) error { - var ( - cpuInfo, memInfo int - err error - qosLevel int - ) - - if !pod.Offline { - qosLevel = int(constant.MaxLevel) - } else { - qosLevel = int(constant.MinLevel) - } - - cgroupMap, err := initCgroupPath(pod.CgroupRoot, pod.CgroupPath) - if err != nil { - return err - } - for kind, cgPath := range cgroupMap { - switch kind { - case "cpu": - if cpuInfo, err = getQosLevel(cgPath, constant.CPUCgroupFileName); err != nil { - return errors.Errorf("read %s failed: %v", constant.CPUCgroupFileName, err) - } - case "memory": - if memInfo, err = getQosLevel(cgPath, constant.MemoryCgroupFileName); err != nil { - return errors.Errorf("read %s failed: %v", constant.MemoryCgroupFileName, err) - } - } - } - - if (cpuInfo != qosLevel) || (memInfo != qosLevel) { - return errors.Errorf("check level failed") - } - - return nil -} - -func getQosLevel(root, file string) (int, error) { - var ( - qosLevel int - rootQos []byte - err error - ) - - rootQos, err = util.ReadSmallFile(filepath.Join(root, file)) // nolint - if err != nil { - return constant.ErrCodeFailed, errors.Errorf("get root qos level failed: %v", err) - } - // walk through all sub paths - if err = filepath.Walk(root, func(path string, f os.FileInfo, err error) error { - if f != nil && f.IsDir() { - cgFilePath, err := securejoin.SecureJoin(path, file) - if err != nil { - return errors.Errorf("join path failed: %v", err) - } - data, err := util.ReadSmallFile(filepath.Clean(cgFilePath)) - if err != nil { - return errors.Errorf("get qos level failed: %v", err) - } - if strings.Compare(string(data), string(rootQos)) != 0 { - return errors.Errorf("qos differs") - } - } - return nil - }); err != nil { - return constant.ErrCodeFailed, err - } - qosLevel, err = strconv.Atoi(strings.TrimSpace(string(rootQos))) - if err != nil { - return constant.ErrCodeFailed, err - } - - return qosLevel, nil -} - -// initCgroupPath return pod's cgroup full path -func initCgroupPath(cgroupRoot, cgroupPath string) (map[string]string, error) { - if cgroupRoot == "" { - cgroupRoot = constant.DefaultCgroupRoot - } - cgroupMap := make(map[string]string, len(SupportCgroupTypes)) - for _, kind := range SupportCgroupTypes { - if err := checkCgroupPath(cgroupPath); err != nil { - return nil, err - } - fullPath := filepath.Join(cgroupRoot, kind, cgroupPath) - if len(fullPath) > constant.MaxCgroupPathLen { - return nil, errors.Errorf("length of cgroup path exceeds max limit %d", constant.MaxCgroupPathLen) - } - cgroupMap[kind] = fullPath - } - - return cgroupMap, nil -} - -func checkCgroupPath(path string) error { - pathPrefix, blacklist := "kubepods", []string{"kubepods", "kubepods/besteffort", "kubepods/burstable"} - cPath := filepath.Clean(path) - - if !strings.HasPrefix(cPath, pathPrefix) { - return errors.Errorf("invalid cgroup path %v, should start with %v", path, pathPrefix) - } - - for _, invalidPath := range blacklist { - if cPath == invalidPath { - return errors.Errorf("invalid cgroup path %v, without podID", path) - } - } - - return nil -} diff --git a/pkg/qos/qos_test.go b/pkg/qos/qos_test.go deleted file mode 100644 index c90a64912390628674a6a0ebef2d33fde69318ab..0000000000000000000000000000000000000000 --- a/pkg/qos/qos_test.go +++ /dev/null @@ -1,392 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Xiang Li -// Create: 2021-04-17 -// Description: QoS testing - -package qos - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "reflect" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/typedef" -) - -type getQosTestArgs struct { - root string - file string -} - -type getQosTestCase struct { - name string - args getQosTestArgs - want int - wantErr bool -} - -const ( - qosFileWithValueNegativeOne string = "qos_level_with_negative_one" - qosFileWithValueZero string = "qos_level_with_value_zero" - qosFileWithValueInvalid string = "qos_level_with_value_invalid" -) - -func newGetTestCases(qosDir string) []getQosTestCase { - return []getQosTestCase{ - { - name: "TC1-get qos diff with value -1", - args: getQosTestArgs{root: qosDir, file: qosFileWithValueNegativeOne}, - want: 1, - wantErr: true, - }, - { - name: "TC2-get qos ok with value 0", - args: getQosTestArgs{root: qosDir, file: qosFileWithValueZero}, - want: 0, - wantErr: false, - }, - { - name: "TC3-get qos failed with invalid value", - args: getQosTestArgs{root: qosDir, file: qosFileWithValueInvalid}, - want: 1, - wantErr: true, - }, - { - name: "TC4-get qos failed with invalid file", - args: getQosTestArgs{root: qosDir, file: "file/not/exist"}, - want: 1, - wantErr: true, - }, - { - name: "TC5-get qos failed with not exist file", - args: getQosTestArgs{root: "/path/not/exist/file", file: "file_not_exist"}, - want: 1, - wantErr: true, - }, - } -} - -// test_rubik_check_cgroup_qoslevel_with_podinfo_0001 -func Test_getQos(t *testing.T) { - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) - defer os.RemoveAll(constant.TmpTestDir) - qosDir, err := ioutil.TempDir(constant.TmpTestDir, "qos") - assert.NoError(t, err) - - os.MkdirAll(filepath.Join(qosDir, "diff"), constant.DefaultDirMode) - err = ioutil.WriteFile(filepath.Join(qosDir, qosFileWithValueNegativeOne), []byte("-1"), constant.DefaultFileMode) - assert.NoError(t, err) - err = ioutil.WriteFile(filepath.Join(qosDir, "diff", qosFileWithValueNegativeOne), []byte("0"), - constant.DefaultFileMode) - assert.NoError(t, err) - for _, dir := range []string{"", "diff"} { - err = ioutil.WriteFile(filepath.Join(qosDir, dir, qosFileWithValueZero), []byte("0"), - constant.DefaultFileMode) - assert.NoError(t, err) - err = ioutil.WriteFile(filepath.Join(qosDir, dir, qosFileWithValueInvalid), []byte("a"), - constant.DefaultFileMode) - assert.NoError(t, err) - } - - tests := newGetTestCases(qosDir) - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := getQosLevel(tt.args.root, tt.args.file) - if (err != nil) != tt.wantErr { - t.Errorf("getQosLevel() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("getQosLevel() got = %v, want %v", got, tt.want) - } - }) - } -} - -type setQoSTestArgs struct { - root string - file string - qosLevel int -} - -type setQosTestCase struct { - name string - args setQoSTestArgs - wantErr bool -} - -func newSetTestCases(qosDir string, qosFilePath *os.File) []setQosTestCase { - return []setQosTestCase{ - { - name: "TC1-set qos ok with value -1", - args: setQoSTestArgs{ - root: qosDir, - file: "cpu", - qosLevel: -1, - }, - wantErr: false, - }, - { - name: "TC1.1-set qos not ok with previous value is -1", - args: setQoSTestArgs{ - root: qosDir, - file: "cpu", - qosLevel: 0, - }, - wantErr: true, - }, - { - name: "TC2-set qos not ok with empty cgroup path", - args: setQoSTestArgs{ - root: "", - file: "cpu", - qosLevel: 0, - }, - wantErr: true, - }, - { - name: "TC3-set qos not ok with invalid cgroup path", - args: setQoSTestArgs{ - root: qosFilePath.Name(), - file: "cpu", - qosLevel: 0, - }, - wantErr: true, - }, - } -} - -// Test_setQos is setQosLevel function test -func Test_setQos(t *testing.T) { - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) - defer os.RemoveAll(constant.TmpTestDir) - qosDir, err := ioutil.TempDir(constant.TmpTestDir, "qos") - assert.NoError(t, err) - qosFilePath, err := ioutil.TempFile(qosDir, "qos_file") - assert.NoError(t, err) - - tests := newSetTestCases(qosDir, qosFilePath) - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if err := setQosLevel(tt.args.root, tt.args.file, tt.args.qosLevel); (err != nil) != tt.wantErr { - t.Errorf("setQosLevel() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } - err = qosFilePath.Close() - assert.NoError(t, err) -} - -type fields struct { - CgroupRoot string - CgroupPath string -} - -type podInfoTestcase struct { - name string - fields fields - want map[string]string - wantErr bool -} - -func newPodInfoTestcases(cgRoot string) []podInfoTestcase { - return []podInfoTestcase{ - { - name: "TC1-get cgroup path ok with pre-define cgroupRoot", - fields: fields{ - CgroupRoot: cgRoot, - CgroupPath: "kubepods/podaaa", - }, - want: map[string]string{"cpu": filepath.Join(cgRoot, "cpu", "kubepods/podaaa"), - "memory": filepath.Join(cgRoot, "memory", "kubepods/podaaa")}, - }, - { - name: "TC2-get cgroup path ok with non define cgroupRoot", - fields: fields{ - CgroupPath: "kubepods/podbbb", - }, - want: map[string]string{"cpu": filepath.Join(constant.DefaultCgroupRoot, "cpu", - "kubepods/podbbb"), "memory": filepath.Join(constant.DefaultCgroupRoot, "memory", "kubepods/podbbb")}, - }, - { - name: "TC3-get invalid cgroup path", - fields: fields{CgroupPath: "invalid/cgroup/prefix/podbbb"}, - wantErr: true, - }, - { - name: "TC4-cgroup path too long", - fields: fields{ - CgroupPath: "kubepods/cgroup/prefix/podbbb" + strings.Repeat("/long", constant.MaxCgroupPathLen), - }, - wantErr: true, - }, - { - name: "TC5-cgroup invalid cgroup path kubepods", - fields: fields{CgroupPath: "kubepods"}, - wantErr: true, - }, - { - name: "TC6-cgroup invalid cgroup path kubepods/besteffort", - fields: fields{CgroupRoot: "", CgroupPath: "kubepods/besteffort/../besteffort"}, - wantErr: true, - }, - { - name: "TC7-cgroup invalid cgroup path kubepods/burstable", - fields: fields{CgroupRoot: "", CgroupPath: "kubepods/burstable//"}, - wantErr: true, - }, - } -} - -// test_rubik_check_podinfo_0002 -func TestPodInfo_CgroupFullPath(t *testing.T) { - cgRoot := filepath.Join(constant.TmpTestDir, t.Name()) - - tests := newPodInfoTestcases(cgRoot) - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - pod := &typedef.PodInfo{ - CgroupRoot: tt.fields.CgroupRoot, - CgroupPath: tt.fields.CgroupPath, - } - cgroupMap, err := initCgroupPath(pod.CgroupRoot, pod.CgroupPath) - fmt.Println(err) - if (err != nil) != tt.wantErr { - t.Errorf("initCgroupPath() = %v, want %v", err, tt.wantErr) - } else if !reflect.DeepEqual(cgroupMap, tt.want) { - t.Errorf("initCgroupPath() = %v, want %v", cgroupMap, tt.want) - } - }) - } -} - -type setQosFields struct { - CgroupPath string - Offline bool - PodID string -} - -type setQosTestCase2 struct { - name string - fields setQosFields - wantSetErr bool - wantValidateErr bool -} - -func newSetQosTestCase2() []setQosTestCase2 { - repeatName := 70 - return []setQosTestCase2{ - { - name: "TC1-setup qos ok", - fields: setQosFields{ - CgroupPath: "kubepods/besteffort/poda5cb0d50-1234-1234-1234-e0ae4b7884b2", - Offline: true, - PodID: "poda5cb0d50-1234-1234-1234-e0ae4b7884b2", - }, - }, - { - name: "TC3-setup too long podID", - fields: setQosFields{ - CgroupPath: "kubepods/besteffort/poda5cb0d50-1234-1234-1234-e0ae4b7884b3", - Offline: false, - PodID: "poda5cb0d50" + strings.Repeat("-1234", repeatName), - }, - wantValidateErr: true, - wantSetErr: true, - }, - { - name: "TC4-setup invalid cgroupPath", - fields: setQosFields{ - CgroupPath: "besteffort/poda5cb0d50-1234-1234-e0ae4b7884b2", - Offline: true, - PodID: "poda5cb0d50-1234-1234-e0ae4b7884b2", - }, - wantValidateErr: true, - wantSetErr: true, - }, - { - name: "TC5-not exist qos file", - fields: setQosFields{ - CgroupPath: "kubepods/besteffort/poda5cb0d50-1234-1234-1234-e0ae4b7884b3", - Offline: true, - PodID: "poda5cb0d50-1234-1234-1234-e0ae4b7884b2", - }, - wantValidateErr: true, - wantSetErr: true, - }, - } -} - -// test_rubik_check_podinfo_0001 -// test_rubik_check_cgroup_qoslevel_with_podinfo_0001 -// test_rubik_check_cgroup_qoslevel_with_podinfo_0002 -func TestPodInfo_SetQos(t *testing.T) { - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) - defer os.RemoveAll(constant.TmpTestDir) - cgRoot, err := ioutil.TempDir(constant.TmpTestDir, t.Name()) - assert.NoError(t, err) - podCPUCgroup := filepath.Join(cgRoot, "/cpu/kubepods/besteffort/poda5cb0d50-1234-1234-1234-e0ae4b7884b2") - podMemoryCgroup := filepath.Join(cgRoot, "/memory/kubepods/besteffort/poda5cb0d50-1234-1234-1234-e0ae4b7884b2") - tests := newSetQosTestCase2() - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - pod := &typedef.PodInfo{ - UID: tt.fields.PodID, - CgroupPath: tt.fields.CgroupPath, - CgroupRoot: cgRoot, - Offline: tt.fields.Offline, - } - if err != nil { - if !tt.wantSetErr { - t.Errorf("new PodInfo for %s failed: %v", tt.fields.PodID, err) - } - return - } - os.MkdirAll(podCPUCgroup, constant.DefaultFileMode) - os.MkdirAll(podMemoryCgroup, constant.DefaultFileMode) - if err := setQos(pod); (err != nil) != tt.wantSetErr { - t.Errorf("setQos() error = %v, wantErr %v", err, tt.wantSetErr) - } - if err := validateQos(pod); (err != nil) != tt.wantValidateErr { - t.Errorf("validateQos() error = %v, wantErr %v", err, tt.wantValidateErr) - } - err = os.RemoveAll(podCPUCgroup) - assert.NoError(t, err) - err = os.RemoveAll(podMemoryCgroup) - assert.NoError(t, err) - }) - } - - // test cgroup qoslevel differ with pod qoslevel - pod := &typedef.PodInfo{ - UID: "poda5cb0d50-1234-1234-1234-e0ae4b7884b2", - CgroupPath: "kubepods/besteffort/poda5cb0d50-1234-1234-1234-e0ae4b7884b2", - CgroupRoot: cgRoot, - Offline: false, - } - os.MkdirAll(podCPUCgroup, constant.DefaultFileMode) - os.MkdirAll(podMemoryCgroup, constant.DefaultFileMode) - err = setQos(pod) - assert.NoError(t, err) - pod.Offline = true - err = validateQos(pod) - assert.Equal(t, true, err != nil) -} diff --git a/pkg/quota/quota_burst.go b/pkg/quota/quota_burst.go deleted file mode 100644 index 641e51422c14f007bc1dc0f8a11a1a38c2a328f5..0000000000000000000000000000000000000000 --- a/pkg/quota/quota_burst.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Yanting Song -// Create: 2022-07-19 -// Description: quota burst setting for pods - -// Package quota is for quota settings -package quota - -import ( - "io/ioutil" - "math/big" - "os" - "path/filepath" - - "github.com/pkg/errors" - - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" -) - - -// SetPodsQuotaBurst sync pod's burst quota when autoconfig is set -func SetPodsQuotaBurst(podInfos map[string]*typedef.PodInfo) { - for _, pi := range podInfos { - setPodQuotaBurst(pi) - } -} - -// UpdatePodQuotaBurst update pod's burst quota -func UpdatePodQuotaBurst(opi, npi *typedef.PodInfo) { - // cpm.GetPod returns nil if pod.UID not exist - if opi == nil || npi == nil { - log.Errorf("quota-burst got invalid nil podInfo") - return - } - if opi.QuotaBurst == npi.QuotaBurst { - return - } - setPodQuotaBurst(npi) -} - -// SetPodQuotaBurst set each container's cpu.cfs_burst_ns -func SetPodQuotaBurst(podInfo *typedef.PodInfo) { - // cpm.GetPod returns nil if pod.UID not exist - if podInfo == nil { - log.Errorf("quota-burst got invalid nil podInfo") - return - } - setPodQuotaBurst(podInfo) -} - -func setPodQuotaBurst(podInfo *typedef.PodInfo) { - if podInfo.QuotaBurst == constant.InvalidBurst { - return - } - burst := big.NewInt(podInfo.QuotaBurst).String() - for _, c := range podInfo.Containers { - err := setCtrQuotaBurst([]byte(burst), c) - if err != nil { - log.Errorf("set container quota burst failed: %v", err) - } - } -} - -func setCtrQuotaBurst(burst []byte, c *typedef.ContainerInfo) error { - const ( - fname = "cpu.cfs_burst_us" - subsys = "cpu" - ) - cgpath := c.CgroupPath(subsys) - fpath := filepath.Join(cgpath, fname) - - if _, err := os.Stat(fpath); err != nil && os.IsNotExist(err) { - return errors.Errorf("quota-burst path=%v missing", fpath) - } - - if err := ioutil.WriteFile(fpath, burst, constant.DefaultFileMode); err != nil { - return errors.Errorf("quota-burst path=%v setting failed: %v", fpath, err) - } - log.Infof("quota-burst path=%v setting success", cgpath) - return nil -} diff --git a/pkg/quota/quota_burst_test.go b/pkg/quota/quota_burst_test.go deleted file mode 100644 index 7169efb356373c7f0740d2c7da23a70b48fc087c..0000000000000000000000000000000000000000 --- a/pkg/quota/quota_burst_test.go +++ /dev/null @@ -1,245 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Yanting Song -// Create: 2022-07-19 -// Description: This file is used for test quota burst - -package quota - -import ( - "io/ioutil" - "os" - "path/filepath" - "strconv" - "testing" - - "github.com/stretchr/testify/assert" - - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/typedef" -) - -const ( - cfsBurstUs = "cpu.cfs_burst_us" - cpuSubsys = "cpu" -) - -var cis = []*typedef.ContainerInfo{ - { - Name: "FooCon", - ID: "testCon1", - PodID: "testPod1", - CgroupRoot: constant.TmpTestDir, - CgroupAddr: "kubepods/testPod1/testCon1", - }, - { - Name: "BarCon", - ID: "testCon2", - PodID: "testPod2", - CgroupRoot: constant.TmpTestDir, - CgroupAddr: "kubepods/testPod2/testCon2", - }, - { - Name: "BiuCon", - ID: "testCon3", - PodID: "testPod3", - CgroupRoot: constant.TmpTestDir, - CgroupAddr: "kubepods/testPod3/testCon3", - }, - { - Name: "NotExist", - ID: "testCon4", - PodID: "testPod4", - CgroupRoot: constant.TmpTestDir, - CgroupAddr: "kubepods/testPod4/testCon4", - }, -} - -var pis = []*typedef.PodInfo{ - // valid QuotaBurstQuota value - { - Name: "FooPod", - UID: cis[0].PodID, - Containers: map[string]*typedef.ContainerInfo{ - cis[0].Name: cis[0], - }, - QuotaBurst: 0, - }, - // invalid QuotaBurstQuota value - { - Name: "BarPod", - UID: cis[1].PodID, - Containers: map[string]*typedef.ContainerInfo{ - cis[1].Name: cis[1], - }, - QuotaBurst: -1, - }, - // valid QuotaBurstQuota value - { - Name: "BiuPod", - UID: cis[2].PodID, - Containers: map[string]*typedef.ContainerInfo{ - cis[2].Name: cis[2], - }, - QuotaBurst: 1, - }, -} - -var notExistPod = &typedef.PodInfo{ - Name: "NotExistPod", - UID: cis[3].PodID, - Containers: map[string]*typedef.ContainerInfo{ - cis[3].Name: cis[3], - }, - QuotaBurst: 0, -} - -type updateQuotaBurstTestCase struct { - oldPodInfo *typedef.PodInfo - newPodInfo *typedef.PodInfo - name string - wantValue string -} - -type podQuotaBurstTestCase struct { - podInfo *typedef.PodInfo - name string - wantValue string -} - -func createCgroupPath(t *testing.T) error { - for _, ci := range pis { - for _, ctr := range ci.Containers { - ctrAddr := filepath.Join(constant.TmpTestDir, cpuSubsys, ctr.CgroupAddr) - if err := os.MkdirAll(ctrAddr, constant.DefaultDirMode); err != nil { - return err - } - if _, err := os.Create(filepath.Join(ctrAddr, cfsBurstUs)); err != nil { - return err - } - } - } - return nil -} - -// TestSetPodsQuotaBurst tests auto check pod's quota burst -func TestSetPodsQuotaBurst(t *testing.T) { - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) - defer os.RemoveAll(constant.TmpTestDir) - - pods := make(map[string]*typedef.PodInfo, len(pis)) - for _, pi := range pis { - pods[pi.Name] = pi - } - if err := createCgroupPath(t); err != nil { - t.Errorf("createCgroupPath got %v ", err) - } - - SetPodsQuotaBurst(pods) - - for i, pi := range pis { - ctrAddr := filepath.Join(constant.TmpTestDir, cpuSubsys, cis[i].CgroupAddr) - var quotaBurst []byte - if quotaBurst, err = ioutil.ReadFile(filepath.Join(ctrAddr, cfsBurstUs)); err != nil { - t.Errorf("readFile got %v ", err) - } - var expected string - if pi.QuotaBurst == constant.InvalidBurst { - expected = "" - } else { - expected = strconv.Itoa(int(pi.QuotaBurst)) - } - assert.Equal(t, expected, string(quotaBurst)) - } -} - -// TestUpdateCtrQuotaBurst tests update pod's quota burst -func TestUpdateCtrQuotaBurst(t *testing.T) { - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) - defer os.RemoveAll(constant.TmpTestDir) - - if err := createCgroupPath(t); err != nil { - t.Errorf("createCgroupPath got %v ", err) - } - for _, tt := range []updateQuotaBurstTestCase{ - { - name: "TC1-update quota burst with old podinfo is nil", - oldPodInfo: nil, - newPodInfo: pis[0], - wantValue: "", - }, - { - name: "TC2-update quota burst without change", - oldPodInfo: pis[0], - newPodInfo: pis[0], - wantValue: "", - }, - { - name: "TC3-update quota burst", - oldPodInfo: pis[1], - newPodInfo: pis[0], - wantValue: "0", - }, - } { - t.Run(tt.name, func(t *testing.T) { - UpdatePodQuotaBurst(tt.oldPodInfo, tt.newPodInfo) - for _, ctr := range tt.newPodInfo.Containers { - ctrAddr := filepath.Join(constant.TmpTestDir, cpuSubsys, ctr.CgroupAddr) - var quotaBurst []byte - if quotaBurst, err = ioutil.ReadFile(filepath.Join(ctrAddr, cfsBurstUs)); err != nil { - t.Errorf("readFile got %v ", err) - } - assert.Equal(t, tt.wantValue, string(quotaBurst)) - } - }) - } -} - -// TestSetPodQuotaBurst tests set quota burst of pod -func TestSetPodQuotaBurst(t *testing.T) { - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) - defer os.RemoveAll(constant.TmpTestDir) - if err := createCgroupPath(t); err != nil { - t.Errorf("createCgroupPath got %v ", err) - } - err = setCtrQuotaBurst([]byte("0"), notExistPod.Containers["NotExist"]) - assert.Contains(t, err.Error(), "missing") - - for i, tt := range []podQuotaBurstTestCase{ - { - name: "TC1-set pod burst with valid quota value 0", - podInfo: pis[0], - wantValue: "0", - }, - { - name: "TC2-set pod burst with invalid quota value", - podInfo: pis[1], - wantValue: "", - }, - { - name: "TC3-set pod burst with nil podinfo", - podInfo: nil, - wantValue: "", - }, - } { - t.Run(tt.name, func(t *testing.T) { - SetPodQuotaBurst(tt.podInfo) - ctrAddr := filepath.Join(constant.TmpTestDir, cpuSubsys, cis[i].CgroupAddr) - var quotaBurst []byte - if quotaBurst, err = ioutil.ReadFile(filepath.Join(ctrAddr, cfsBurstUs)); err != nil { - t.Errorf("readFile got %v ", err) - } - assert.Equal(t, tt.wantValue, string(quotaBurst)) - }) - } -} diff --git a/pkg/autoconfig/autoconfig_test.go b/pkg/rubik/import.go similarity index 44% rename from pkg/autoconfig/autoconfig_test.go rename to pkg/rubik/import.go index 3abd1b46d875f129b29554c6f388fb2d6d845c51..a083f9a75dc7a14a37ede5f4daeb2967c4621062 100644 --- a/pkg/autoconfig/autoconfig_test.go +++ b/pkg/rubik/import.go @@ -1,4 +1,4 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. // rubik licensed under the Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. // You may obtain a copy of Mulan PSL v2 at: @@ -7,32 +7,19 @@ // IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR // PURPOSE. // See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2022-07-10 -// Description: autoconfig test +// Author: Jiaqi Yang +// Create: 2023-01-28 +// Description: This file defines the services supported by rubik -package autoconfig +// Package rubik defines the overall logic +package rubik import ( - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/kubernetes" + // introduce packages to auto register service + _ "isula.org/rubik/pkg/services/dyncache" + _ "isula.org/rubik/pkg/services/iocost" + _ "isula.org/rubik/pkg/services/preemption" + _ "isula.org/rubik/pkg/services/quotaburst" + _ "isula.org/rubik/pkg/services/quotaturbo" + _ "isula.org/rubik/pkg/version" ) - -// TestInit test Init -func TestInit(t *testing.T) { - kb := &kubernetes.Clientset{} - err := Init(kb, "nodeName") - assert.NoError(t, err) -} - -// TestAddUpdateDelHandler test Handler -func TestAddUpdateDelHandler(t *testing.T) { - oldObj := corev1.Pod{} - newObj := corev1.Pod{} - addHandler(oldObj) - updateHandler(oldObj, newObj) - deleteHandler(oldObj) -} diff --git a/pkg/rubik/rubik.go b/pkg/rubik/rubik.go index 14e9cf821c9b764e2d64f222dd14cf335f589b27..94dc37380007db70d5545c65b01890cb9712fd83 100644 --- a/pkg/rubik/rubik.go +++ b/pkg/rubik/rubik.go @@ -1,4 +1,4 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. // rubik licensed under the Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. // You may obtain a copy of Mulan PSL v2 at: @@ -7,11 +7,11 @@ // IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR // PURPOSE. // See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2021-05-20 -// Description: This file is used for rubik struct +// Author: Jiaqi Yang +// Create: 2023-01-28 +// Description: This file defines rubik agent to control the life cycle of each component -// Package rubik is for rubik struct +// Package rubik defines the overall logic package rubik import ( @@ -19,343 +19,166 @@ import ( "fmt" "os" "os/signal" - "sync/atomic" "syscall" - "github.com/coreos/go-systemd/daemon" - "github.com/pkg/errors" "golang.org/x/sys/unix" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "isula.org/rubik/pkg/autoconfig" - "isula.org/rubik/pkg/blkio" - "isula.org/rubik/pkg/cachelimit" - "isula.org/rubik/pkg/checkpoint" + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/util" "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/iocost" - "isula.org/rubik/pkg/memory" - "isula.org/rubik/pkg/perf" - "isula.org/rubik/pkg/qos" - "isula.org/rubik/pkg/quota" - "isula.org/rubik/pkg/sync" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/util" + "isula.org/rubik/pkg/core/publisher" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/informer" + "isula.org/rubik/pkg/podmanager" + "isula.org/rubik/pkg/services" ) -// Rubik defines rubik struct -type Rubik struct { - config *config.Config - kubeClient *kubernetes.Clientset - cpm *checkpoint.Manager - mm *memory.MemoryManager - nodeName string +// Agent runs a series of rubik services and manages data +type Agent struct { + config *config.Config + podManager *podmanager.PodManager + informer api.Informer + servicesManager *ServiceManager } -// NewRubik creates a new rubik object -func NewRubik(cfgPath string) (*Rubik, error) { - cfg, err := config.NewConfig(cfgPath) - if err != nil { - return nil, errors.Errorf("load config failed: %v", err) - } - - if err = log.InitConfig(cfg.LogDriver, cfg.LogDir, cfg.LogLevel, int64(cfg.LogSize)); err != nil { - return nil, errors.Errorf("init log config failed: %v", err) - } - - r := &Rubik{ - config: cfg, - } - - if err := r.initComponents(); err != nil { +// NewAgent returns an agent for given configuration +func NewAgent(cfg *config.Config) (*Agent, error) { + publisher := publisher.GetPublisherFactory().GetPublisher(publisher.GENERIC) + serviceManager := NewServiceManager() + if err := serviceManager.InitServices(cfg.Agent.EnabledFeatures, + cfg.UnwrapServiceConfig(), cfg); err != nil { return nil, err } - - return r, nil -} - -func (r *Rubik) initComponents() error { - if err := r.initKubeClient(); err != nil { - return err - } - - if err := r.initCheckpoint(); err != nil { - return err - } - - if err := r.initEventHandler(); err != nil { - return err - } - - if err := r.initNodeConfig(); err != nil { - return err - } - - if r.config.MemCfg.Enable { - if err := r.initMemoryManager(); err != nil { - return err - } + a := &Agent{ + config: cfg, + podManager: podmanager.NewPodManager(publisher), + servicesManager: serviceManager, } - - return nil -} - -// Monitor monitors shutdown signal -func (r *Rubik) Monitor() { - <-config.ShutdownChan - os.Exit(1) -} - -// Sync sync pods qos level -func (r *Rubik) Sync() error { - if !r.config.AutoCheck { - return nil - } - return sync.Sync(r.cpm.ListOfflinePods()) + return a, nil } -// CacheLimit init cache limit module -func (r *Rubik) CacheLimit() error { - if r.config.CacheCfg.Enable { - if r.cpm == nil { - return fmt.Errorf("checkpoint is not initialized before cachelimit") - } - return cachelimit.Init(r.cpm, &r.config.CacheCfg) - } - return nil -} - -// QuotaBurst sync all pod's cpu burst quota -func (r *Rubik) QuotaBurst() { - if r.config.AutoCheck { - quota.SetPodsQuotaBurst(r.cpm.ListAllPods()) - } -} - -// initKubeClient initialize kubeClient if autoconfig is enabled -func (r *Rubik) initKubeClient() error { - conf, err := rest.InClusterConfig() - if err != nil { +// Run starts and runs the agent until receiving stop signal +func (a *Agent) Run(ctx context.Context) error { + log.Infof("agent run with config:\n%s", a.config.String()) + if err := a.startInformer(ctx); err != nil { return err } - - kubeClient, err := kubernetes.NewForConfig(conf) - if err != nil { + if err := a.startServiceHandler(ctx); err != nil { return err } - - r.kubeClient = kubeClient - log.Infof("the kube-client is initialized successfully") + <-ctx.Done() + a.stopInformer() + a.stopServiceHandler() return nil } -// initEventHandler initialize the event handler and set the rubik callback function corresponding to the pod event. -func (r *Rubik) initEventHandler() error { - if r.kubeClient == nil { - return fmt.Errorf("kube-client is not initialized") - } - - autoconfig.Backend = r - if err := autoconfig.Init(r.kubeClient, r.nodeName); err != nil { - return err - } - - log.Infof("the event-handler is initialized successfully") - return nil -} - -func (r *Rubik) initMemoryManager() error { - mm, err := memory.NewMemoryManager(r.cpm, r.config.MemCfg) +// startInformer starts informer to obtain external data +func (a *Agent) startInformer(ctx context.Context) error { + publisher := publisher.GetPublisherFactory().GetPublisher(publisher.GENERIC) + informer, err := informer.GetInformerFactory().GetInformerCreator(informer.APISERVER)(publisher) if err != nil { - return err + return fmt.Errorf("fail to set informer: %v", err) } - - r.mm = mm - log.Infof("init memory manager ok") + if err := informer.Subscribe(a.podManager); err != nil { + return fmt.Errorf("fail to subscribe informer: %v", err) + } + a.informer = informer + informer.Start(ctx) return nil } -func (r *Rubik) initCheckpoint() error { - if r.kubeClient == nil { - return fmt.Errorf("kube-client is not initialized") - } - - cpm := checkpoint.NewManager(r.config.CgroupRoot) - node := os.Getenv(constant.NodeNameEnvKey) - if node == "" { - return fmt.Errorf("missing %s", constant.NodeNameEnvKey) - } - - r.nodeName = node - const specNodeNameFormat = "spec.nodeName=%s" - pods, err := r.kubeClient.CoreV1().Pods("").List(context.Background(), - metav1.ListOptions{FieldSelector: fmt.Sprintf(specNodeNameFormat, node)}) - if err != nil { - return err - } - cpm.SyncFromCluster(pods.Items) - - r.cpm = cpm - log.Infof("the checkpoint is initialized successfully") - return nil +// stopInformer stops the informer +func (a *Agent) stopInformer() { + a.informer.Unsubscribe(a.podManager) } -func (r *Rubik) initNodeConfig() error { - for _, nc := range r.config.NodeConfig { - if nc.NodeName == r.nodeName && nc.IOcostEnable { - iocost.SetIOcostEnable(true) - return iocost.ConfigIOcost(nc.IOcostConfig) - } +// startServiceHandler starts and runs the service +func (a *Agent) startServiceHandler(ctx context.Context) error { + if err := a.servicesManager.Setup(a.podManager); err != nil { + return fmt.Errorf("error setting service handler: %v", err) } + a.servicesManager.Start(ctx) + a.podManager.Subscribe(a.servicesManager) return nil } -// AddEvent handle add event from informer -func (r *Rubik) AddEvent(pod *corev1.Pod) { - // Rubik does not process add event of pods that are not in the running state. - if pod.Status.Phase != corev1.PodRunning { - return - } - r.cpm.AddPod(pod) - - pi := r.cpm.GetPod(pod.UID) - if err := qos.SetQosLevel(pi); err != nil { - log.Errorf("AddEvent handle error: %v", err) - } - quota.SetPodQuotaBurst(pi) - - if r.config.BlkioCfg.Enable { - blkio.SetBlkio(pod) - } - if r.config.MemCfg.Enable { - r.mm.UpdateConfig(pi) - } - iocost.SetPodWeight(pi) +// stopServiceHandler stops sending data to the ServiceManager +func (a *Agent) stopServiceHandler() { + a.podManager.Unsubscribe(a.servicesManager) + a.servicesManager.Stop() } -// UpdateEvent handle update event from informer -func (r *Rubik) UpdateEvent(oldPod *corev1.Pod, newPod *corev1.Pod) { - // Rubik does not process updates of pods that are not in the running state - // if the container is not running, delete it. - if newPod.Status.Phase != corev1.PodRunning { - r.cpm.DelPod(newPod.UID) - return +// runAgent creates and runs rubik's agent +func runAgent(ctx context.Context) error { + // 1. read configuration + c := config.NewConfig(config.JSON) + if err := c.LoadConfig(constant.ConfigFile); err != nil { + return fmt.Errorf("error loading config: %v", err) } - // after the Rubik is started, the pod adding events are transferred through the update handler of Kubernetes. - if !r.cpm.PodExist(newPod.UID) { - r.cpm.AddPod(newPod) - - if r.config.BlkioCfg.Enable { - blkio.SetBlkio(newPod) - } - - pi := r.cpm.GetPod(newPod.UID) - quota.SetPodQuotaBurst(pi) - - if r.config.MemCfg.Enable { - r.mm.UpdateConfig(pi) - } - iocost.SetPodWeight(pi) - } else { - opi := r.cpm.GetPod(oldPod.UID) - r.cpm.UpdatePod(newPod) - if r.config.BlkioCfg.Enable { - blkio.WriteBlkio(oldPod, newPod) - } - npi := r.cpm.GetPod(newPod.UID) - quota.UpdatePodQuotaBurst(opi, npi) - iocost.SetPodWeight(npi) + // 2. enable log system + if err := log.InitConfig(c.Agent.LogDriver, c.Agent.LogDir, c.Agent.LogLevel, c.Agent.LogSize); err != nil { + return fmt.Errorf("error initializing log: %v", err) } - cpmPod := r.cpm.GetPod(newPod.UID) - if err := qos.UpdateQosLevel(cpmPod); err != nil { - log.Errorf("UpdateEvent handle error: %v", err) - } -} + // 3. enable cgroup system + cgroup.InitMountDir(c.Agent.CgroupRoot) -// DeleteEvent handle update event from informer -func (r *Rubik) DeleteEvent(pod *corev1.Pod) { - r.cpm.DelPod(pod.UID) -} + // 4. init service components + services.InitServiceComponents(defaultRubikFeature) -func run(fcfg string) int { - rubik, err := NewRubik(fcfg) + // 5. Create and run the agent + agent, err := NewAgent(c) if err != nil { - fmt.Printf("new rubik failed: %v\n", err) - return constant.ErrCodeFailed - } - - if rubik.mm != nil { - rubik.mm.Run() + return fmt.Errorf("error new agent: %v", err) } - - log.Infof("perf hw support = %v", perf.HwSupport()) - if err = rubik.CacheLimit(); err != nil { - log.Errorf("cache limit init error: %v", err) - return constant.ErrCodeFailed + if err := agent.Run(ctx); err != nil { + return fmt.Errorf("error running agent: %v", err) } - - rubik.QuotaBurst() - if err = rubik.Sync(); err != nil { - log.Errorf("sync qos level failed: %v", err) - } - - log.Logf("Start rubik with cfg\n%v", rubik.config) - go signalHandler() - - // Notify systemd that rubik is ready SdNotify() only tries to - // notify if the NOTIFY_SOCKET environment is set, so it's - // safe to call when systemd isn't present. - // Ignore the return values here because they're not valid for - // platforms that don't use systemd. For platforms that use - // systemd, rubik doesn't log if the notification failed. - _, _ = daemon.SdNotify(false, daemon.SdNotifyReady) - - rubik.Monitor() - - return 0 + return nil } -// Run start rubik server -func Run(fcfg string) int { +// Run runs agent and process signal +func Run() int { + // 0. file mask permission setting and parameter checking unix.Umask(constant.DefaultUmask) if len(os.Args) > 1 { fmt.Println("args not allowed") - return constant.ErrCodeFailed - } - - lock, err := util.CreateLockFile(constant.LockFile) + return constant.ArgumentErrorExitCode + } + // 1. apply file locks, only one rubik process can run at the same time + lock, err := util.LockFile(constant.LockFile) + defer func() { + lock.Close() + log.DropError(os.Remove(constant.LockFile)) + }() if err != nil { fmt.Printf("set rubik lock failed: %v, check if there is another rubik running\n", err) - return constant.ErrCodeFailed + return constant.RepeatRunExitCode } + defer util.UnlockFile(lock) - ret := run(fcfg) + ctx, cancel := context.WithCancel(context.Background()) + // 2. handle external signals + go handleSignals(cancel) - util.RemoveLockFile(lock, constant.LockFile) - return ret + // 3. run rubik-agent + if err := runAgent(ctx); err != nil { + log.Errorf("error running rubik agent: %v", err) + return constant.ErrorExitCode + } + return constant.NormalExitCode } -func signalHandler() { +func handleSignals(cancel context.CancelFunc) { signalChan := make(chan os.Signal, 1) signal.Notify(signalChan, syscall.SIGTERM, syscall.SIGINT) - - var forceCount int32 = 3 for sig := range signalChan { if sig == syscall.SIGTERM || sig == syscall.SIGINT { - if atomic.AddInt32(&config.ShutdownFlag, 1) == 1 { - log.Infof("Signal %v received and starting exit...", sig) - close(config.ShutdownChan) - } - - if atomic.LoadInt32(&config.ShutdownFlag) >= forceCount { - log.Infof("3 interrupts signal received, forcing rubik shutdown") - os.Exit(1) - } + log.Infof("signal %v received and starting exit...", sig) + cancel() } } } diff --git a/pkg/rubik/rubik_feature.go b/pkg/rubik/rubik_feature.go new file mode 100644 index 0000000000000000000000000000000000000000..9cd41bc234948541b8ccb04bfba9048bd26cd629 --- /dev/null +++ b/pkg/rubik/rubik_feature.go @@ -0,0 +1,51 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file for defining rubik support features + +// Package rubik +package rubik + +import ( + "isula.org/rubik/pkg/feature" + "isula.org/rubik/pkg/services" +) + +var defaultRubikFeature = []services.FeatureSpec{ + { + Name: feature.PreemptionFeature, + Default: true, + }, + { + Name: feature.DynCacheFeature, + Default: true, + }, + { + Name: feature.IOLimitFeature, + Default: true, + }, + { + Name: feature.IOCostFeature, + Default: true, + }, + { + Name: feature.DynMemoryFeature, + Default: true, + }, + { + Name: feature.QuotaBurstFeature, + Default: true, + }, + { + Name: feature.QuotaTurboFeature, + Default: true, + }, +} diff --git a/pkg/rubik/rubik_test.go b/pkg/rubik/rubik_test.go deleted file mode 100644 index 1a23b36f46e14d20b8f43b01c6b88cd86d9ab25f..0000000000000000000000000000000000000000 --- a/pkg/rubik/rubik_test.go +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2021-05-20 -// Description: This file is used for rubik package test - -package rubik - -import ( - "fmt" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strings" - "syscall" - "testing" - "time" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - - "isula.org/rubik/pkg/checkpoint" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - "isula.org/rubik/pkg/typedef" - "isula.org/rubik/pkg/util" -) - -func TestNewRubik(t *testing.T) { - os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - defer os.RemoveAll(constant.TmpTestDir) - tmpConfigFile := filepath.Join(constant.TmpTestDir, "config.json") - os.Remove(tmpConfigFile) - - os.MkdirAll(tmpConfigFile, constant.DefaultDirMode) - _, err := NewRubik(tmpConfigFile) - assert.Equal(t, true, err != nil) - - err = os.RemoveAll(tmpConfigFile) - assert.NoError(t, err) - _, err = os.Create(tmpConfigFile) - assert.NoError(t, err) - - err = reCreateConfigFile(tmpConfigFile, `{ - "autoCheck": true, - "logDriver": "file", - "logDir": "/tmp/rubik-test", - "logSize": 2048, - "logLevel": "debug", - "cgroupRoot": "/tmp/rubik-test/cgroup" -}`) - assert.Equal(t, true, strings.Contains(err.Error(), "must be defined")) - - err = reCreateConfigFile(tmpConfigFile, `{ - "logLevel": "debugabc" -}`) - assert.Equal(t, true, err != nil) -} - -func reCreateConfigFile(path, content string) error { - err := os.Remove(path) - if err != nil { - return err - } - fd, err := os.Create(path) - if err != nil { - return err - } - defer fd.Close() - _, err = fd.WriteString(content) - if err != nil { - return err - } - _, err = NewRubik(path) - if err != nil { - return err - } - - return nil -} - -var cfgA = ` -{ - "autoCheck": true, - "logDriver": "file", - "logDir": "/tmp/rubik-test", - "logSize": 2048, - "logLevel": "debug", - "cgroupRoot": "/tmp/rubik-test/cgroup" -}` - -// TestRunAbnormality test run server abnormality -func TestRunAbnormality(t *testing.T) { - old := os.Args - defer func() { - os.Args = old - }() - configFile := "config.json" - fcfg := filepath.Join(constant.TmpTestDir, configFile) - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - if err != nil { - assert.NoError(t, err) - } - err = ioutil.WriteFile(fcfg, []byte(cfgA), constant.DefaultFileMode) - if err != nil { - assert.NoError(t, err) - } - // case: argument not valid - os.Args = []string{"invalid", "failed"} - ret := Run(fcfg) - assert.Equal(t, constant.ErrCodeFailed, ret) - os.Args = []string{"rubik"} - // case: file is locked - lock, err := util.CreateLockFile(constant.LockFile) - ret = Run(fcfg) // set rubik lock failed: ... - assert.Equal(t, constant.ErrCodeFailed, ret) - util.RemoveLockFile(lock, constant.LockFile) - // case: invalid config.json - err = ioutil.WriteFile(fcfg, []byte("invalid"), constant.DefaultFileMode) - if err != nil { - assert.NoError(t, err) - } - ret = Run(fcfg) - assert.Equal(t, constant.ErrCodeFailed, ret) -} - -// TestRun test run server -func TestRun(t *testing.T) { - if os.Getenv("BE_TESTRUN") == "1" { - // case: config.json missing, use default config.json - ret := Run("/dev/should/not/exist") - fmt.Println(ret) - return - } - cmd := exec.Command(os.Args[0], "-test.run=TestRun") - cmd.Env = append(os.Environ(), "BE_TESTRUN=1") - err := cmd.Start() - assert.NoError(t, err) - sleepTime := 3 - time.Sleep(time.Duration(sleepTime) * time.Second) - err = cmd.Process.Signal(syscall.SIGINT) - assert.NoError(t, err) -} - -// TestCacheLimit is CacheLimit function test -func TestCacheLimit(t *testing.T) { - rubik := &Rubik{ - config: &config.Config{ - CacheCfg: config.CacheConfig{ - Enable: true, - DefaultLimitMode: "invalid", - DefaultResctrlDir: constant.TmpTestDir + "invalid", - }, - }, - cpm: &checkpoint.Manager{ - Checkpoint: &checkpoint.Checkpoint{ - Pods: make(map[string]*typedef.PodInfo), - }, - }, - } - - err := rubik.CacheLimit() - assert.Equal(t, true, err != nil) - rubik.config.CacheCfg.Enable = false - err = rubik.CacheLimit() - assert.NoError(t, err) -} - -// TestSmallRun test run function -func TestSmallRun(t *testing.T) { - tmpConfigFile := filepath.Join(constant.TmpTestDir, "config.json") - os.Remove(tmpConfigFile) - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - assert.NoError(t, err) - fd, err := os.Create(tmpConfigFile) - assert.NoError(t, err) - fd.WriteString(`{ - "cacheConfig": { - "enable": true, - "defaultLimitMode": "invalid" - } -}`) - assert.NoError(t, err) - err = fd.Close() - assert.NoError(t, err) - - res := run(tmpConfigFile) - assert.Equal(t, constant.ErrCodeFailed, res) -} - -// TestInitKubeClient test initKubeClient -func TestInitKubeClient(t *testing.T) { - r := &Rubik{} - err := r.initKubeClient() - assert.Equal(t, true, strings.Contains(err.Error(), "must be defined")) -} - -// TestInitEventHandler test initEventHandler -func TestInitEventHandler(t *testing.T) { - r := &Rubik{} - err := r.initEventHandler() - assert.Equal(t, true, strings.Contains(err.Error(), "kube-client is not initialized")) - - r.kubeClient = &kubernetes.Clientset{} - err = r.initEventHandler() - assert.NoError(t, err) -} - -// TestInitCheckpoint test initCheckpoint -func TestInitCheckpoint(t *testing.T) { - r := &Rubik{config: &config.Config{CgroupRoot: ""}} - err := r.initCheckpoint() - assert.Equal(t, true, strings.Contains(err.Error(), "kube-client is not initialized")) - - r.kubeClient = &kubernetes.Clientset{} - err = r.initCheckpoint() - assert.Equal(t, true, strings.Contains(err.Error(), "missing")) -} - -// TestAddUpdateDelEvent test Event -func TestAddUpdateDelEvent(t *testing.T) { - cfg, err := config.NewConfig("") - assert.NoError(t, err) - r := &Rubik{config: cfg} - - cpm := checkpoint.NewManager("") - r.cpm = cpm - oldPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "aaa", - Name: "podaaa", - }, - Status: corev1.PodStatus{ - Phase: corev1.PodPending, - }, - } - r.AddEvent(oldPod) - oldPod.Status.Phase = corev1.PodRunning - r.AddEvent(oldPod) - assert.Equal(t, "podaaa", r.cpm.Checkpoint.Pods["aaa"].Name) - - newPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - UID: "aaa", - Name: "podbbb", - }, - Status: corev1.PodStatus{ - Phase: corev1.PodPending, - }, - } - r.UpdateEvent(oldPod, newPod) - _, ok := r.cpm.Checkpoint.Pods["aaa"] - assert.Equal(t, false, ok) - - r.AddEvent(oldPod) - newPod.Status.Phase = corev1.PodRunning - r.UpdateEvent(oldPod, newPod) - assert.Equal(t, "podbbb", r.cpm.Checkpoint.Pods["aaa"].Name) - - r.DeleteEvent(newPod) - _, ok = r.cpm.Checkpoint.Pods["aaa"] - assert.Equal(t, false, ok) - r.UpdateEvent(oldPod, newPod) - assert.Equal(t, "podbbb", r.cpm.Checkpoint.Pods["aaa"].Name) -} diff --git a/pkg/rubik/servicemanager.go b/pkg/rubik/servicemanager.go new file mode 100644 index 0000000000000000000000000000000000000000..ba63b180ad1d36e6b333bba50ae00443ef8ed979 --- /dev/null +++ b/pkg/rubik/servicemanager.go @@ -0,0 +1,291 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Create: 2023-01-28 +// Description: This file defines ServiceManager to manage the lifecycle of services + +// Package services implements service registration, discovery and management functions +package rubik + +import ( + "context" + "fmt" + "sync" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/config" + "isula.org/rubik/pkg/core/subscriber" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/services" +) + +// serviceManagerName is the unique ID of the service manager +const serviceManagerName = "serviceManager" + +// ServiceManager is used to manage the lifecycle of services +type ServiceManager struct { + api.Subscriber + api.Viewer + sync.RWMutex + RunningServices map[string]services.Service +} + +// NewServiceManager creates a servicemanager object +func NewServiceManager() *ServiceManager { + manager := &ServiceManager{ + RunningServices: make(map[string]services.Service), + } + manager.Subscriber = subscriber.NewGenericSubscriber(manager, serviceManagerName) + return manager +} + +// InitServices parses the to-be-run services config and loads them to the ServiceManager +func (manager *ServiceManager) InitServices(features []string, + serviceConfig map[string]interface{}, parser config.ConfigParser) error { + for _, feature := range features { + s, err := services.GetServiceComponent(feature) + if err != nil { + return fmt.Errorf("get component failed %s: %v", feature, err) + } + + if err := s.SetConfig(func(configName string, v interface{}) error { + config := serviceConfig[configName] + if config == nil { + return fmt.Errorf("this configuration is not available,configName:%v", configName) + } + if err := parser.UnmarshalSubConfig(config, v); err != nil { + return fmt.Errorf("this configuration failed to be serialized,configName:%v,error:%v", configName, err) + } + return nil + }); err != nil { + return fmt.Errorf("set configuration failed, err:%v", err) + } + + conf, err := parser.MarshalIndent(s.GetConfig(), "", "\t") + if err != nil { + return fmt.Errorf("fail to get service %v configuration: %v", s.ID(), err) + } + + if err := manager.AddRunningService(feature, s); err != nil { + return err + } + + if len(conf) != 0 { + log.Infof("service %v will run with configuration:\n%v", s.ID(), conf) + } else { + log.Infof("service %v will run", s.ID()) + } + } + return nil +} + +// AddRunningService adds a to-be-run service +func (manager *ServiceManager) AddRunningService(name string, s services.Service) error { + manager.Lock() + defer manager.Unlock() + if _, existed := manager.RunningServices[name]; existed { + return fmt.Errorf("service name conflict: %s", name) + } + manager.RunningServices[name] = s + return nil +} + +// HandleEvent is used to handle PodInfo events pushed by the publisher +func (manager *ServiceManager) HandleEvent(eventType typedef.EventType, event typedef.Event) { + defer func() { + if err := recover(); err != nil { + log.Errorf("panic occurr: %v", err) + } + }() + switch eventType { + case typedef.INFOADD: + manager.addFunc(event) + case typedef.INFOUPDATE: + manager.updateFunc(event) + case typedef.INFODELETE: + manager.deleteFunc(event) + default: + log.Infof("service manager fail to process %s type", eventType.String()) + } +} + +// EventTypes returns the type of event the serviceManager is interested in +func (manager *ServiceManager) EventTypes() []typedef.EventType { + return []typedef.EventType{typedef.INFOADD, typedef.INFOUPDATE, typedef.INFODELETE} +} + +// terminatingRunningServices handles services exits during the setup and exit phases +func terminatingServices(serviceMap map[string]services.Service, viewer api.Viewer) { + for name, s := range serviceMap { + if s.IsRunner() { + if err := s.Stop(); err != nil { + log.Errorf("fail to stop service %v: %v", name, err) + } else { + log.Infof("service %v stop successfully", name) + } + } + if err := s.Terminate(viewer); err != nil { + log.Errorf("fail to terminate service %v: %v", name, err) + } else { + log.Infof("service %v terminate successfully", name) + } + } +} + +// Setup pre-starts services, such as preparing the environment, etc. +func (manager *ServiceManager) Setup(v api.Viewer) error { + // only when viewer is prepared + if v == nil { + return fmt.Errorf("viewer should not be empty") + } + manager.Viewer = v + + var preStarted = make(map[string]services.Service, 0) + manager.RLock() + for name, s := range manager.RunningServices { + /* + Try to prestart the service. If any service fails, rubik exits + and invokes the terminate function to terminate the prestarted service. + */ + if err := s.PreStart(manager.Viewer); err != nil { + terminatingServices(preStarted, manager.Viewer) + return fmt.Errorf("fail to preStart service %v: %v", name, err) + } + preStarted[name] = s + log.Infof("service %v pre-start successfully", name) + } + manager.RUnlock() + return nil +} + +// Start starts and runs the persistent service +func (manager *ServiceManager) Start(ctx context.Context) { + /* + The Run function of the service will be called continuously until the context is canceled. + When a service panics while running, recover will catch the violation + and briefly restart for a short period of time. + */ + const restartDuration = 2 * time.Second + runner := func(ctx context.Context, id string, runFunc func(ctx context.Context)) { + var restartCount int64 = 0 + wait.UntilWithContext(ctx, func(ctx context.Context) { + defer func() { + if err := recover(); err != nil { + log.Errorf("service %s catch a panic: %v", id, err) + } + }() + if restartCount > 0 { + log.Warnf("service %s has restart %v times", id, restartCount) + } + restartCount++ + runFunc(ctx) + }, restartDuration) + } + + for id, s := range manager.RunningServices { + if s.IsRunner() { + go runner(ctx, id, s.Run) + } + } +} + +// Stop terminates the running service +func (manager *ServiceManager) Stop() error { + manager.RLock() + terminatingServices(manager.RunningServices, manager.Viewer) + manager.RUnlock() + return nil +} + +// addFunc handles pod addition events +func (manager *ServiceManager) addFunc(event typedef.Event) { + podInfo, ok := event.(*typedef.PodInfo) + if !ok { + log.Warnf("receive invalid event: %T", event) + return + } + + const retryCount = 5 + addOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) { + wg.Add(1) + for i := 0; i < retryCount; i++ { + if err := s.AddPod(podInfo); err != nil { + log.Errorf("service %s add func failed: %v", s.ID(), err) + } else { + break + } + } + wg.Done() + } + manager.RLock() + var wg sync.WaitGroup + for _, s := range manager.RunningServices { + go addOnce(s, podInfo.DeepCopy(), &wg) + } + wg.Wait() + manager.RUnlock() +} + +// updateFunc handles pod update events +func (manager *ServiceManager) updateFunc(event typedef.Event) { + podInfos, ok := event.([]*typedef.PodInfo) + if !ok { + log.Warnf("receive invalid event: %T", event) + return + } + const podInfosLen = 2 + if len(podInfos) != podInfosLen { + log.Warnf("pod infos contains more than two pods") + return + } + runOnce := func(s services.Service, old, new *typedef.PodInfo, wg *sync.WaitGroup) { + wg.Add(1) + log.Debugf("update Func with service: %s", s.ID()) + if err := s.UpdatePod(old, new); err != nil { + log.Errorf("service %s update func failed: %v", s.ID(), err) + } + wg.Done() + } + manager.RLock() + var wg sync.WaitGroup + for _, s := range manager.RunningServices { + go runOnce(s, podInfos[0], podInfos[1], &wg) + } + wg.Wait() + manager.RUnlock() +} + +// deleteFunc handles pod deletion events +func (manager *ServiceManager) deleteFunc(event typedef.Event) { + podInfo, ok := event.(*typedef.PodInfo) + if !ok { + log.Warnf("receive invalid event: %T", event) + return + } + + deleteOnce := func(s services.Service, podInfo *typedef.PodInfo, wg *sync.WaitGroup) { + wg.Add(1) + if err := s.DeletePod(podInfo); err != nil { + log.Errorf("service %s delete func failed: %v", s.ID(), err) + } + wg.Done() + } + manager.RLock() + var wg sync.WaitGroup + for _, s := range manager.RunningServices { + go deleteOnce(s, podInfo.DeepCopy(), &wg) + } + wg.Wait() + manager.RUnlock() +} diff --git a/pkg/services/component.go b/pkg/services/component.go new file mode 100644 index 0000000000000000000000000000000000000000..9f6728bb25a3a34e8f567d3312dbf112b9b6741a --- /dev/null +++ b/pkg/services/component.go @@ -0,0 +1,65 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is used to initilize all components + +// Package services +package services + +import ( + "isula.org/rubik/pkg/feature" + "isula.org/rubik/pkg/services/dyncache" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/pkg/services/iocost" + "isula.org/rubik/pkg/services/iolimit" + "isula.org/rubik/pkg/services/preemption" + "isula.org/rubik/pkg/services/quotaburst" + "isula.org/rubik/pkg/services/quotaturbo" +) + +// ServiceComponent is the handler function of initialization. +type ServiceComponent func(name string) error + +var ( + serviceComponents = map[string]ServiceComponent{ + feature.PreemptionFeature: initPreemptionFactory, + feature.DynCacheFeature: initDynCacheFactory, + feature.IOLimitFeature: initIOLimitFactory, + feature.IOCostFeature: initIOCostFactory, + feature.DynMemoryFeature: initDynCacheFactory, + feature.QuotaBurstFeature: initQuotaBurstFactory, + feature.QuotaTurboFeature: initQuotaTurboFactory, + } +) + +func initIOLimitFactory(name string) error { + return helper.AddFactory(name, iolimit.IOLimitFactory{ObjName: name}) +} + +func initIOCostFactory(name string) error { + return helper.AddFactory(name, iocost.IOCostFactory{ObjName: name}) +} + +func initDynCacheFactory(name string) error { + return helper.AddFactory(name, dyncache.DynCacheFactory{ObjName: name}) +} + +func initQuotaTurboFactory(name string) error { + return helper.AddFactory(name, quotaturbo.QuotaTurboFactory{ObjName: name}) +} + +func initQuotaBurstFactory(name string) error { + return helper.AddFactory(name, quotaburst.BurstFactory{ObjName: name}) +} + +func initPreemptionFactory(name string) error { + return helper.AddFactory(name, preemption.PreemptionFactory{ObjName: name}) +} diff --git a/pkg/services/dyncache/dynache_init.go b/pkg/services/dyncache/dynache_init.go new file mode 100644 index 0000000000000000000000000000000000000000..55ef6427ff7ceb1cc46b34d73a955916faebcf04 --- /dev/null +++ b/pkg/services/dyncache/dynache_init.go @@ -0,0 +1,194 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file will init cache limit directories before services running + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/perf" + "isula.org/rubik/pkg/common/util" +) + +const ( + base2, base16, bitSize = 2, 16, 32 +) + +type limitSet struct { + dir string + level string + l3Percent int + mbPercent int +} + +// InitCacheLimitDir init multi-level cache limit directories +func (c *DynCache) InitCacheLimitDir() error { + log.Debugf("init cache limit directory") + const ( + defaultL3PercentMax = 100 + defaultMbPercentMax = 100 + ) + if !perf.Support() { + return fmt.Errorf("perf event need by service %s not supported", c.ID()) + } + if err := checkHostPidns(c.config.DefaultPidNameSpace); err != nil { + return err + } + if err := checkResctrlPath(c.config.DefaultResctrlDir); err != nil { + return err + } + numaNum, err := getNUMANum(c.Attr.NumaNodeDir) + if err != nil { + return fmt.Errorf("get NUMA nodes number error: %v", err) + } + c.Attr.NumaNum = numaNum + c.Attr.L3PercentDynamic = c.config.L3Percent.Low + c.Attr.MemBandPercentDynamic = c.config.MemBandPercent.Low + + cacheLimitList := []*limitSet{ + c.newCacheLimitSet(levelDynamic, c.Attr.L3PercentDynamic, c.Attr.MemBandPercentDynamic), + c.newCacheLimitSet(levelLow, c.config.L3Percent.Low, c.config.MemBandPercent.Low), + c.newCacheLimitSet(levelMiddle, c.config.L3Percent.Mid, c.config.MemBandPercent.Mid), + c.newCacheLimitSet(levelHigh, c.config.L3Percent.High, c.config.MemBandPercent.High), + c.newCacheLimitSet(levelMax, defaultL3PercentMax, defaultMbPercentMax), + } + + for _, cl := range cacheLimitList { + if err := cl.writeResctrlSchemata(c.Attr.NumaNum); err != nil { + return err + } + } + + log.Debugf("init cache limit directory success") + return nil +} + +func (c *DynCache) newCacheLimitSet(level string, l3Per, mbPer int) *limitSet { + return &limitSet{ + level: level, + l3Percent: l3Per, + mbPercent: mbPer, + dir: filepath.Join(filepath.Clean(c.config.DefaultResctrlDir), resctrlDirPrefix+level), + } +} + +func (cl *limitSet) setDir() error { + if err := os.Mkdir(cl.dir, constant.DefaultDirMode); err != nil && !os.IsExist(err) { + return fmt.Errorf("create cache limit directory error: %v", err) + } + return nil +} + +func (cl *limitSet) writeResctrlSchemata(numaNum int) error { + // get cbm mask like "fffff" means 20 cache way + maskFile := filepath.Join(filepath.Dir(cl.dir), "info", "L3", "cbm_mask") + llc, err := calcLimitedCacheValue(maskFile, cl.l3Percent) + if err != nil { + return fmt.Errorf("get limited cache value from L3 percent error: %v", err) + } + + if err := cl.setDir(); err != nil { + return err + } + schemetaFile := filepath.Join(cl.dir, schemataFileName) + var content string + var l3List, mbList []string + for i := 0; i < numaNum; i++ { + l3List = append(l3List, fmt.Sprintf("%d=%s", i, llc)) + mbList = append(mbList, fmt.Sprintf("%d=%d", i, cl.mbPercent)) + } + l3 := fmt.Sprintf("L3:%s\n", strings.Join(l3List, ";")) + mb := fmt.Sprintf("MB:%s\n", strings.Join(mbList, ";")) + content = l3 + mb + if err := util.WriteFile(schemetaFile, content); err != nil { + return fmt.Errorf("write %s to file %s error: %v", content, schemetaFile, err) + } + + return nil +} + +func getNUMANum(path string) (int, error) { + files, err := filepath.Glob(filepath.Join(path, "node*")) + if err != nil { + return 0, err + } + return len(files), nil +} + +// getBinaryMask get l3 limit mask like "7ff" and transfer it to binary like "111 1111 1111", return binary length 11 +func getBinaryMask(path string) (int, error) { + maskValue, err := util.ReadFile(path) + if err != nil { + return -1, fmt.Errorf("get L3 mask value error: %v", err) + } + + // transfer mask to binary format + decMask, err := strconv.ParseInt(strings.TrimSpace(string(maskValue)), base16, bitSize) + if err != nil { + return -1, fmt.Errorf("transfer L3 mask value %v to decimal format error: %v", string(maskValue), err) + } + return len(strconv.FormatInt(decMask, base2)), nil +} + +// calcLimitedCacheValue calculate number of cache way could be used according to L3 limit percent +func calcLimitedCacheValue(path string, l3Percent int) (string, error) { + l3BinaryMask, err := getBinaryMask(path) + if err != nil { + return "", err + } + ten, hundred, binValue := 10, 100, 0 + binLen := l3BinaryMask * l3Percent / hundred + if binLen == 0 { + binLen = 1 + } + for i := 0; i < binLen; i++ { + binValue = binValue*ten + 1 + } + decValue, err := strconv.ParseInt(strconv.Itoa(binValue), base2, bitSize) + if err != nil { + return "", fmt.Errorf("transfer %v to decimal format error: %v", binValue, err) + } + + return strconv.FormatInt(decValue, base16), nil +} + +func checkHostPidns(path string) error { + ns, err := os.Readlink(path) + if err != nil { + return fmt.Errorf("get pid namespace inode error: %v", err) + } + hostPidInode := "4026531836" + if strings.Trim(ns, "pid:[]") != hostPidInode { + return fmt.Errorf("not share pid ns with host") + } + return nil +} + +func checkResctrlPath(path string) error { + if !util.PathExist(path) { + return fmt.Errorf("resctrl path %v not exist, not support cache limit", path) + } + schemataPath := filepath.Join(path, schemataFileName) + if !util.PathExist(schemataPath) { + return fmt.Errorf("resctrl schemata file %v not exist, check if %v directory is mounted", + schemataPath, path) + } + return nil +} diff --git a/pkg/services/dyncache/dynamic.go b/pkg/services/dyncache/dynamic.go new file mode 100644 index 0000000000000000000000000000000000000000..09bde4c4a318a366d785dac146c15af7d636634f --- /dev/null +++ b/pkg/services/dyncache/dynamic.go @@ -0,0 +1,131 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file is used for dynamic cache limit level setting + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "fmt" + "time" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/perf" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +// StartDynamic will continuously run to detect online pod cache miss and +// limit offline pod cache usage +func (c *DynCache) StartDynamic() { + if !c.dynamicExist() { + return + } + + stepMore, stepLess := 5, -50 + needMore := true + limiter := c.newCacheLimitSet(levelDynamic, c.Attr.L3PercentDynamic, c.Attr.MemBandPercentDynamic) + + for _, p := range c.listOnlinePods() { + cacheMiss, llcMiss := getPodCacheMiss(p, c.config.PerfDuration) + if cacheMiss >= c.Attr.MaxMiss || llcMiss >= c.Attr.MaxMiss { + log.Infof("online pod %v cache miss: %v LLC miss: %v exceeds maxmiss, lower offline cache limit", + p.UID, cacheMiss, llcMiss) + + if err := c.flush(limiter, stepLess); err != nil { + log.Errorf(err.Error()) + } + return + } + if cacheMiss >= c.Attr.MinMiss || llcMiss >= c.Attr.MinMiss { + needMore = false + } + } + + if needMore { + if err := c.flush(limiter, stepMore); err != nil { + log.Errorf(err.Error()) + } + } +} + +func getPodCacheMiss(pod *typedef.PodInfo, perfDu int) (int, int) { + cgroupPath := cgroup.AbsoluteCgroupPath("perf_event", pod.Path, "") + if !util.PathExist(cgroupPath) { + return 0, 0 + } + + stat, err := perf.CgroupStat(cgroupPath, time.Duration(perfDu)*time.Millisecond) + if err != nil { + return 0, 0 + } + const ( + probability = 100.0 + bias = 1.0 + ) + return int(probability * float64(stat.CacheMisses) / (bias + float64(stat.CacheReferences))), + int(probability * float64(stat.LLCMiss) / (bias + float64(stat.LLCAccess))) +} + +func (c *DynCache) dynamicExist() bool { + for _, pod := range c.listOfflinePods() { + if err := c.syncLevel(pod); err != nil { + continue + } + if pod.Annotations[constant.CacheLimitAnnotationKey] == levelDynamic { + return true + } + } + return false +} + +func (c *DynCache) flush(limitSet *limitSet, step int) error { + var nextPercent = func(value, min, max, step int) int { + value += step + if value < min { + return min + } + if value > max { + return max + } + return value + + } + l3 := nextPercent(c.Attr.L3PercentDynamic, c.config.L3Percent.Low, c.config.L3Percent.High, step) + mb := nextPercent(c.Attr.MemBandPercentDynamic, c.config.MemBandPercent.Low, c.config.MemBandPercent.High, step) + if c.Attr.L3PercentDynamic == l3 && c.Attr.MemBandPercentDynamic == mb { + return nil + } + log.Infof("flush L3 from %v to %v, Mb from %v to %v", limitSet.l3Percent, l3, limitSet.mbPercent, mb) + limitSet.l3Percent, limitSet.mbPercent = l3, mb + return c.doFlush(limitSet) +} + +func (c *DynCache) doFlush(limitSet *limitSet) error { + if err := limitSet.writeResctrlSchemata(c.Attr.NumaNum); err != nil { + return fmt.Errorf("adjust dynamic cache limit to l3:%v mb:%v error: %v", + limitSet.l3Percent, limitSet.mbPercent, err) + } + c.Attr.L3PercentDynamic = limitSet.l3Percent + c.Attr.MemBandPercentDynamic = limitSet.mbPercent + + return nil +} + +func (c *DynCache) listOnlinePods() map[string]*typedef.PodInfo { + onlineValue := "false" + return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool { + return pi.Annotations[constant.PriorityAnnotationKey] == onlineValue + }) +} diff --git a/pkg/services/dyncache/dynamic_test.go b/pkg/services/dyncache/dynamic_test.go new file mode 100644 index 0000000000000000000000000000000000000000..e538437d46f97b682a39fc160318de53dd281f93 --- /dev/null +++ b/pkg/services/dyncache/dynamic_test.go @@ -0,0 +1,282 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file is testcases for dynamic cache limit level setting + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "fmt" + "math" + "os" + "testing" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/perf" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/test/try" +) + +// TestCacheLimit_StartDynamic tests StartDynamic of CacheLimit +func TestCacheLimit_StartDynamic(t *testing.T) { + if !perf.Support() { + t.Skipf("%s only run on physical machine", t.Name()) + } + try.InitTestCGRoot(constant.DefaultCgroupRoot) + type fields struct { + Config *Config + Attr *Attr + Name string + FakePods []*try.FakePod + } + tests := []struct { + name string + fields fields + preHook func(t *testing.T, c *DynCache, fakePods []*try.FakePod) + postHook func(t *testing.T, c *DynCache, fakePods []*try.FakePod) + }{ + { + name: "TC-normal dynamic setting", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOnlinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + }, + Config: genDefaultConfig(), + Attr: &Attr{ + MaxMiss: 20, + MinMiss: 10, + }, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + resctrlDir := try.GenTestDir().String() + setMaskFile(t, resctrlDir, "3ff") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + const numaNode = 4 + genNumaNodes(c.Attr.NumaNodeDir, numaNode) + c.config.DefaultResctrlDir = resctrlDir + c.config.DefaultLimitMode = modeDynamic + c.config.PerfDuration = 10 + for _, pod := range fakePods { + if pod.Annotations[constant.PriorityAnnotationKey] == "true" { + pod.Annotations[constant.CacheLimitAnnotationKey] = "dynamic" + } + } + manager := genPodManager(fakePods) + c.Viewer = manager + }, + postHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + for _, pod := range fakePods { + pod.CleanPath() + } + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + { + name: "TC-max and min miss both 0", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOnlinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + }, + Config: genDefaultConfig(), + Attr: &Attr{ + MaxMiss: 0, + MinMiss: 0, + }, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + resctrlDir := try.GenTestDir().String() + setMaskFile(t, resctrlDir, "3ff") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + const numaNode = 4 + genNumaNodes(c.Attr.NumaNodeDir, numaNode) + c.config.DefaultResctrlDir = resctrlDir + c.config.DefaultLimitMode = modeDynamic + c.config.PerfDuration = 10 + for _, pod := range fakePods { + if pod.Annotations[constant.PriorityAnnotationKey] == "true" { + pod.Annotations[constant.CacheLimitAnnotationKey] = "dynamic" + } + } + manager := genPodManager(fakePods) + c.Viewer = manager + }, + postHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + for _, pod := range fakePods { + pod.CleanPath() + } + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + { + name: "TC-start dynamic with very high water line", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOnlinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + }, + Config: genDefaultConfig(), + Attr: &Attr{ + MaxMiss: math.MaxInt64, + MinMiss: math.MaxInt64, + }, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + resctrlDir := try.GenTestDir().String() + setMaskFile(t, resctrlDir, "3ff") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + const numaNode = 4 + genNumaNodes(c.Attr.NumaNodeDir, numaNode) + c.config.DefaultResctrlDir = resctrlDir + c.config.DefaultLimitMode = modeDynamic + c.config.PerfDuration = 10 + for _, pod := range fakePods { + if pod.Annotations[constant.PriorityAnnotationKey] == "true" { + pod.Annotations[constant.CacheLimitAnnotationKey] = "dynamic" + } + } + manager := genPodManager(fakePods) + c.Viewer = manager + }, + postHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + for _, pod := range fakePods { + pod.CleanPath() + } + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + { + name: "TC-start dynamic with low min water line", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOnlinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + }, + Config: genDefaultConfig(), + Attr: &Attr{ + MaxMiss: math.MaxInt64, + MinMiss: 0, + }, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + resctrlDir := try.GenTestDir().String() + setMaskFile(t, resctrlDir, "3ff") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + const numaNode = 4 + genNumaNodes(c.Attr.NumaNodeDir, numaNode) + c.config.DefaultResctrlDir = resctrlDir + c.config.DefaultLimitMode = modeDynamic + c.config.PerfDuration = 10 + for _, pod := range fakePods { + if pod.Annotations[constant.PriorityAnnotationKey] == "true" { + pod.Annotations[constant.CacheLimitAnnotationKey] = "dynamic" + } + } + manager := genPodManager(fakePods) + c.Viewer = manager + }, + postHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + for _, pod := range fakePods { + pod.CleanPath() + } + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + { + name: "TC-dynamic not exist", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOnlinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + try.GenFakeOnlinePod(map[*cgroup.Key]string{ + {SubSys: "perf_event", FileName: "tasks"}: fmt.Sprintf("%d", os.Getegid()), + }), + }, + Config: genDefaultConfig(), + Attr: &Attr{ + MaxMiss: 20, + MinMiss: 10, + }, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + resctrlDir := try.GenTestDir().String() + setMaskFile(t, resctrlDir, "3ff") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + const numaNode = 4 + genNumaNodes(c.Attr.NumaNodeDir, numaNode) + c.config.DefaultResctrlDir = resctrlDir + c.config.DefaultLimitMode = modeDynamic + c.config.PerfDuration = 10 + for _, pod := range fakePods { + if pod.Annotations[constant.PriorityAnnotationKey] == "true" { + pod.Annotations[constant.CacheLimitAnnotationKey] = "static" + } + } + manager := genPodManager(fakePods) + c.Viewer = manager + }, + postHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + for _, pod := range fakePods { + pod.CleanPath() + } + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &DynCache{ + config: tt.fields.Config, + Attr: tt.fields.Attr, + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + } + if tt.preHook != nil { + tt.preHook(t, c, tt.fields.FakePods) + } + c.StartDynamic() + if tt.postHook != nil { + tt.postHook(t, c, tt.fields.FakePods) + } + }) + } +} diff --git a/pkg/services/dyncache/dyncache.go b/pkg/services/dyncache/dyncache.go new file mode 100644 index 0000000000000000000000000000000000000000..3204d903c76c803ddf1b28b5a7d1aa49d179f870 --- /dev/null +++ b/pkg/services/dyncache/dyncache.go @@ -0,0 +1,233 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file is cache limit service + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "context" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/services/helper" +) + +// default value +const ( + defaultAdInt = 1000 + defaultPerfDur = 1000 + defaultLowL3 = 20 + defaultMidL3 = 30 + defaultHighL3 = 50 + defaultLowMB = 10 + defaultMidMB = 30 + defaultHighMB = 50 + defaultMaxMiss = 20 + defaultMinMiss = 10 + defaultResctrlDir = "/sys/fs/resctrl" + defaultNumaNodeDir = "/sys/devices/system/node" + defaultPidNameSpace = "/proc/self/ns/pid" + modeStatic = "static" + modeDynamic = "dynamic" +) + +// boundary value +const ( + minPercent = 10 + maxPercent = 100 + // minimum perf duration, unit ms + minPerfDur = 10 + // maximum perf duration, unit ms + maxPerfDur = 10000 + // min adjust interval, unit ms + minAdjustInterval = 10 + // max adjust interval, unit ms + maxAdjustInterval = 10000 +) + +// MultiLvlPercent define multi level percentage +type MultiLvlPercent struct { + Low int `json:"low,omitempty"` + Mid int `json:"mid,omitempty"` + High int `json:"high,omitempty"` +} + +// Config is cache limit config +type Config struct { + // DefaultLimitMode is default cache limit method(static) + DefaultLimitMode string `json:"defaultLimitMode,omitempty"` + // DefaultResctrlDir is the path of resctrl control group + // the resctrl dir is not supposed to be altered or exposed + DefaultResctrlDir string `json:"-"` + // DefaultPidNameSpace is the pid namespace used for test whether share host pid + // the value is not supposed to be altered or exposed + DefaultPidNameSpace string `json:"-"` + // AdjustInterval is dynamic cache adjust interval time + AdjustInterval int `json:"adjustInterval,omitempty"` + // PerfDuration is online pod perf dection duration time + PerfDuration int `json:"perfDuration,omitempty"` + // L3Percent is L3 cache percent for each level + L3Percent MultiLvlPercent `json:"l3Percent,omitempty"` + // MemBandPercent is memory bandwidth percent for each level + MemBandPercent MultiLvlPercent `json:"memBandPercent,omitempty"` +} + +// DynCache is cache limit service structure +type DynCache struct { + helper.ServiceBase + config *Config + Attr *Attr + Viewer api.Viewer +} + +// Attr is cache limit attribute differ from config +type Attr struct { + // NumaNodeDir is the path for numa node + NumaNodeDir string + // NumaNum stores numa number on physical machine + NumaNum int + // L3PercentDynamic stores l3 percent for dynamic cache limit setting + // the value could be changed + L3PercentDynamic int + // MemBandPercentDynamic stores memory band percent for dynamic cache limit setting + // the value could be changed + MemBandPercentDynamic int + // MaxMiss is the maximum value of cache miss + MaxMiss int + // MinMiss is the minimum value of cache miss + MinMiss int +} + +// DynCacheFactory is the factory os dyncache. +type DynCacheFactory struct { + ObjName string +} + +// Name to get the dyncache factory name. +func (i DynCacheFactory) Name() string { + return "DynCacheFactory" +} + +// NewObj to create object of dyncache. +func (i DynCacheFactory) NewObj() (interface{}, error) { + return NewDynCache(i.ObjName), nil +} + +func newConfig() *Config { + return &Config{ + DefaultLimitMode: modeStatic, + DefaultResctrlDir: defaultResctrlDir, + DefaultPidNameSpace: defaultPidNameSpace, + AdjustInterval: defaultAdInt, + PerfDuration: defaultPerfDur, + L3Percent: MultiLvlPercent{ + Low: defaultLowL3, + Mid: defaultMidL3, + High: defaultHighL3, + }, + MemBandPercent: MultiLvlPercent{ + Low: defaultLowMB, + Mid: defaultMidMB, + High: defaultHighMB, + }, + } +} + +// NewDynCache return cache limit instance with default settings +func NewDynCache(name string) *DynCache { + return &DynCache{ + ServiceBase: helper.ServiceBase{ + Name: name, + }, + Attr: &Attr{ + NumaNodeDir: defaultNumaNodeDir, + MaxMiss: defaultMaxMiss, + MinMiss: defaultMinMiss, + }, + config: newConfig(), + } +} + +// IsRunner returns true that tells other dynCache is a persistent service +func (c *DynCache) IsRunner() bool { + return true +} + +// PreStart will do some pre-setting actions +func (c *DynCache) PreStart(viewer api.Viewer) error { + c.Viewer = viewer + + if err := c.InitCacheLimitDir(); err != nil { + return err + } + return nil +} + +// GetConfig returns Config +func (c *DynCache) GetConfig() interface{} { + return c.config +} + +// Run implement service run function +func (c *DynCache) Run(ctx context.Context) { + go wait.Until(c.SyncCacheLimit, time.Second, ctx.Done()) + wait.Until(c.StartDynamic, time.Millisecond*time.Duration(c.config.AdjustInterval), ctx.Done()) +} + +// SetConfig sets and checks Config +func (c *DynCache) SetConfig(f helper.ConfigHandler) error { + config := newConfig() + if err := f(c.Name, config); err != nil { + return err + } + if err := config.Validate(); err != nil { + return err + } + c.config = config + return nil +} + +// Validate validate service's config +func (conf *Config) Validate() error { + defaultLimitMode := conf.DefaultLimitMode + if defaultLimitMode != modeStatic && defaultLimitMode != modeDynamic { + return fmt.Errorf("invalid cache limit mode: %s, should be %s or %s", + conf.DefaultLimitMode, modeStatic, modeDynamic) + } + if conf.AdjustInterval < minAdjustInterval || conf.AdjustInterval > maxAdjustInterval { + return fmt.Errorf("adjust interval %d out of range [%d,%d]", + conf.AdjustInterval, minAdjustInterval, maxAdjustInterval) + } + if conf.PerfDuration < minPerfDur || conf.PerfDuration > maxPerfDur { + return fmt.Errorf("perf duration %d out of range [%d,%d]", conf.PerfDuration, minPerfDur, maxPerfDur) + } + for _, per := range []int{ + conf.L3Percent.Low, conf.L3Percent.Mid, + conf.L3Percent.High, conf.MemBandPercent.Low, + conf.MemBandPercent.Mid, conf.MemBandPercent.High} { + if per < minPercent || per > maxPercent { + return fmt.Errorf("cache limit percentage %d out of range [%d,%d]", per, minPercent, maxPercent) + } + } + if conf.L3Percent.Low > conf.L3Percent.Mid || conf.L3Percent.Mid > conf.L3Percent.High { + return fmt.Errorf("cache limit config L3Percent does not satisfy constraint low<=mid<=high") + } + if conf.MemBandPercent.Low > conf.MemBandPercent.Mid || + conf.MemBandPercent.Mid > conf.MemBandPercent.High { + return fmt.Errorf("cache limit config MemBandPercent does not satisfy constraint low<=mid<=high") + } + return nil +} diff --git a/pkg/services/dyncache/dyncache_init_test.go b/pkg/services/dyncache/dyncache_init_test.go new file mode 100644 index 0000000000000000000000000000000000000000..9836748732fcbb07c1fd8b515d646de24c1982b1 --- /dev/null +++ b/pkg/services/dyncache/dyncache_init_test.go @@ -0,0 +1,229 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file will init cache limit directories before services running + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/perf" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/test/try" +) + +func setMaskFile(t *testing.T, resctrlDir string, data string) { + maskDir := filepath.Join(resctrlDir, "info", "L3") + maskFile := filepath.Join(maskDir, "cbm_mask") + try.MkdirAll(maskDir, constant.DefaultDirMode).OrDie() + try.WriteFile(maskFile, data).OrDie() + try.WriteFile(filepath.Join(resctrlDir, schemataFileName), + "L3:0=7fff;1=7fff;2=7fff;3=7fff\nMB:0=100;1=100;2=100;3=100").OrDie() +} + +func genNumaNodes(path string, num int) { + for i := 0; i < num; i++ { + try.MkdirAll(filepath.Join(path, fmt.Sprintf("node%d", i)), constant.DefaultDirMode).OrDie() + } +} + +// TestCacheLimit_InitCacheLimitDir tests InitCacheLimitDir of CacheLimit +func TestCacheLimit_InitCacheLimitDir(t *testing.T) { + if !perf.Support() { + t.Skipf("%s only run on physical machine", t.Name()) + } + type fields struct { + Config *Config + Attr *Attr + Name string + } + tests := []struct { + name string + fields fields + wantErr bool + preHook func(t *testing.T, c *DynCache) + postHook func(t *testing.T, c *DynCache) + }{ + { + name: "TC1-normal cache limit dir setting", + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + c.config.DefaultResctrlDir = try.GenTestDir().String() + c.config.DefaultLimitMode = modeStatic + setMaskFile(t, c.config.DefaultResctrlDir, "7fff") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + const numaNode = 4 + genNumaNodes(c.Attr.NumaNodeDir, numaNode) + }, + postHook: func(t *testing.T, c *DynCache) { + resctrlLevelMap := map[string]string{ + "rubik_max": "L3:0=7fff;1=7fff;2=7fff;3=7fff\nMB:0=100;1=100;2=100;3=100\n", + "rubik_high": "L3:0=7f;1=7f;2=7f;3=7f\nMB:0=50;1=50;2=50;3=50\n", + "rubik_middle": "L3:0=f;1=f;2=f;3=f\nMB:0=30;1=30;2=30;3=30\n", + "rubik_low": "L3:0=7;1=7;2=7;3=7\nMB:0=10;1=10;2=10;3=10\n", + "rubik_dynamic": "L3:0=7;1=7;2=7;3=7\nMB:0=10;1=10;2=10;3=10\n", + } + for level, expect := range resctrlLevelMap { + schemataFile := filepath.Join(c.config.DefaultResctrlDir, level, schemataFileName) + content := try.ReadFile(schemataFile).String() + assert.Equal(t, expect, content) + } + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + { + name: "TC2-not share with host pid namespace", + wantErr: true, + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + pidNameSpaceDir := try.GenTestDir().String() + pidNameSpaceFileOri := try.WriteFile(filepath.Join(pidNameSpaceDir, "pid:[4026531836x]"), "") + pidNameSpace := filepath.Join(pidNameSpaceDir, "pid") + + assert.NoError(t, os.Symlink(pidNameSpaceFileOri.String(), pidNameSpace)) + c.config.DefaultPidNameSpace = pidNameSpace + }, + postHook: func(t *testing.T, c *DynCache) { + try.RemoveAll(filepath.Dir(c.config.DefaultPidNameSpace)) + }, + }, + { + name: "TC3-pid namespace file is not link file", + wantErr: true, + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + pidNameSpaceDir := try.GenTestDir().String() + pidNameSpaceFileOri := try.WriteFile(filepath.Join(pidNameSpaceDir, "pid:[4026531836x]"), "") + pidNameSpace := filepath.Join(pidNameSpaceDir, "pid") + + assert.NoError(t, os.Link(pidNameSpaceFileOri.String(), pidNameSpace)) + c.config.DefaultPidNameSpace = pidNameSpace + }, + postHook: func(t *testing.T, c *DynCache) { + try.RemoveAll(filepath.Dir(c.config.DefaultPidNameSpace)) + }, + }, + { + name: "TC4-resctrl path not exist", + wantErr: true, + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + c.config.DefaultResctrlDir = "/resctrl/path/is/not/exist" + }, + }, + { + name: "TC5-resctrl schemata file not exist", + wantErr: true, + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + c.config.DefaultResctrlDir = try.GenTestDir().String() + }, + postHook: func(t *testing.T, c *DynCache) { + try.RemoveAll(c.config.DefaultResctrlDir) + }, + }, + { + name: "TC6-no numa path", + wantErr: true, + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + c.Attr.NumaNodeDir = "/numa/node/path/is/not/exist" + }, + }, + { + name: "TC7-empty cbm mask file", + wantErr: true, + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + c.config.DefaultResctrlDir = try.GenTestDir().String() + c.config.DefaultLimitMode = modeStatic + setMaskFile(t, c.config.DefaultResctrlDir, "") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + genNumaNodes(c.Attr.NumaNodeDir, 0) + }, + postHook: func(t *testing.T, c *DynCache) { + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + { + name: "TC8-low cmb mask value", + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache) { + c.config.DefaultResctrlDir = try.GenTestDir().String() + c.config.DefaultLimitMode = modeStatic + setMaskFile(t, c.config.DefaultResctrlDir, "1") + numaNodeDir := try.GenTestDir().String() + c.Attr.NumaNodeDir = numaNodeDir + genNumaNodes(c.Attr.NumaNodeDir, 0) + }, + postHook: func(t *testing.T, c *DynCache) { + try.RemoveAll(c.config.DefaultResctrlDir) + try.RemoveAll(c.Attr.NumaNodeDir) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &DynCache{ + config: tt.fields.Config, + Attr: tt.fields.Attr, + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + } + if tt.preHook != nil { + tt.preHook(t, c) + } + if err := c.InitCacheLimitDir(); (err != nil) != tt.wantErr { + t.Errorf("CacheLimit.InitCacheLimitDir() error = %v, wantErr %v", err, tt.wantErr) + } + if tt.postHook != nil { + tt.postHook(t, c) + } + }) + } +} diff --git a/pkg/services/dyncache/dyncache_test.go b/pkg/services/dyncache/dyncache_test.go new file mode 100644 index 0000000000000000000000000000000000000000..79216537d23ab8c37a5ae3093565e6bf5f92a79b --- /dev/null +++ b/pkg/services/dyncache/dyncache_test.go @@ -0,0 +1,327 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file is testcase for cache limit service + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "reflect" + "strconv" + "strings" + "testing" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/services/helper" +) + +const ( + moduleName = "dynCache" +) + +// TestCacheLimit_StartDynamic tests StartDynamic of CacheLimit +func TestCacheLimit_Validate(t *testing.T) { + const num2 = 2 + type fields struct { + Config *Config + Attr *Attr + Name string + } + tests := []struct { + name string + fields fields + wantErr bool + wantMsg string + }{ + { + name: "TC-static mode config", + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: minAdjustInterval + 1, + PerfDuration: minPerfDur + 1, + L3Percent: MultiLvlPercent{ + Low: minPercent + 1, + Mid: maxPercent/num2 + 1, + High: maxPercent - 1, + }, + MemBandPercent: MultiLvlPercent{ + Low: minPercent + 1, + Mid: maxPercent/num2 + 1, + High: maxPercent - 1, + }, + }, + }, + }, + { + name: "TC-invalid mode config", + wantErr: true, + wantMsg: modeDynamic, + fields: fields{ + Config: &Config{ + DefaultLimitMode: "invalid mode", + }, + }, + }, + { + name: "TC-invalid adjust interval less than min value", + wantErr: true, + wantMsg: strconv.Itoa(minAdjustInterval), + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: minAdjustInterval - 1, + }, + }, + }, + { + name: "TC-invalid adjust interval greater than max value", + wantErr: true, + wantMsg: strconv.Itoa(maxAdjustInterval), + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: maxAdjustInterval + 1, + }, + }, + }, + { + name: "TC-invalid perf duration less than min value", + wantErr: true, + wantMsg: strconv.Itoa(minPercent), + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: maxAdjustInterval/num2 + 1, + PerfDuration: minPerfDur - 1, + }, + }, + }, + { + name: "TC-invalid perf duration greater than max value", + wantErr: true, + wantMsg: strconv.Itoa(maxPerfDur), + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: maxAdjustInterval/num2 + 1, + PerfDuration: maxPerfDur + 1, + }, + }, + }, + { + name: "TC-invalid percent value", + wantErr: true, + wantMsg: strconv.Itoa(minPercent), + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: maxAdjustInterval/num2 + 1, + PerfDuration: maxPerfDur/num2 + 1, + L3Percent: MultiLvlPercent{ + Low: minPerfDur - 1, + }, + }, + }, + }, + { + name: "TC-invalid l3 percent low value larger than mid value", + wantErr: true, + wantMsg: "low<=mid<=high", + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: maxAdjustInterval/num2 + 1, + PerfDuration: maxPerfDur/num2 + 1, + L3Percent: MultiLvlPercent{ + Low: minPercent + num2, + Mid: minPercent + 1, + High: minPercent + 1, + }, + MemBandPercent: MultiLvlPercent{ + Low: minPercent, + Mid: minPercent + 1, + High: minPercent + num2, + }, + }, + }, + }, + { + name: "TC-invalid memband percent mid value larger than high value", + wantErr: true, + wantMsg: "low<=mid<=high", + fields: fields{ + Config: &Config{ + DefaultLimitMode: modeStatic, + AdjustInterval: maxAdjustInterval/num2 + 1, + PerfDuration: maxPerfDur/num2 + 1, + L3Percent: MultiLvlPercent{ + Low: minPercent, + Mid: minPercent + 1, + High: minPercent + num2, + }, + MemBandPercent: MultiLvlPercent{ + Low: minPercent, + Mid: maxPercent/num2 + 1, + High: maxPercent / num2, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &DynCache{ + config: tt.fields.Config, + Attr: tt.fields.Attr, + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + } + err := c.config.Validate() + if (err != nil) != tt.wantErr { + t.Errorf("CacheLimit.Validate() error = %v, wantErr %v", err, tt.wantErr) + } + if err != nil && !strings.Contains(err.Error(), tt.wantMsg) { + t.Errorf("CacheLimit.Validate() error = %v, wantMsg %v", err, tt.wantMsg) + } + }) + } +} + +func TestNewCacheLimit(t *testing.T) { + tests := []struct { + name string + want *DynCache + }{ + { + name: "TC-do nothing", + want: &DynCache{ + ServiceBase: helper.ServiceBase{ + Name: moduleName, + }, + Attr: &Attr{ + NumaNodeDir: defaultNumaNodeDir, + MaxMiss: defaultMaxMiss, + MinMiss: defaultMinMiss, + }, + config: &Config{ + DefaultLimitMode: modeStatic, + DefaultResctrlDir: defaultResctrlDir, + DefaultPidNameSpace: defaultPidNameSpace, + AdjustInterval: defaultAdInt, + PerfDuration: defaultPerfDur, + L3Percent: MultiLvlPercent{ + Low: defaultLowL3, + Mid: defaultMidL3, + High: defaultHighL3, + }, + MemBandPercent: MultiLvlPercent{ + Low: defaultLowMB, + Mid: defaultMidMB, + High: defaultHighMB, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := NewDynCache(moduleName); !reflect.DeepEqual(got, tt.want) { + t.Errorf("NewCacheLimit() = %v, want %v", got, tt.want) + } + }) + } +} + +// TestCacheLimit_PreStart tests PreStart +func TestCacheLimit_PreStart(t *testing.T) { + type fields struct { + Config *Config + Attr *Attr + Viewer api.Viewer + Name string + } + type args struct { + viewer api.Viewer + } + tests := []struct { + name string + fields fields + args args + wantErr bool + preHook func(t *testing.T, c *DynCache) + postHook func(t *testing.T, c *DynCache) + }{ + { + name: "TC-just call function", + wantErr: true, + fields: fields{ + Config: genDefaultConfig(), + Attr: &Attr{}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &DynCache{ + config: tt.fields.Config, + Attr: tt.fields.Attr, + Viewer: tt.fields.Viewer, + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + } + if err := c.PreStart(tt.args.viewer); (err != nil) != tt.wantErr { + t.Errorf("CacheLimit.PreStart() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestCacheLimit_ID tests ID +func TestCacheLimit_ID(t *testing.T) { + type fields struct { + Config *Config + Attr *Attr + Viewer api.Viewer + Name string + } + tests := []struct { + name string + fields fields + want string + }{ + { + name: "TC-return service's name", + fields: fields{ + Name: "cacheLimit", + }, + want: "cacheLimit", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &DynCache{ + config: tt.fields.Config, + Attr: tt.fields.Attr, + Viewer: tt.fields.Viewer, + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + } + if got := c.ID(); got != tt.want { + t.Errorf("CacheLimit.ID() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/services/dyncache/sync.go b/pkg/services/dyncache/sync.go new file mode 100644 index 0000000000000000000000000000000000000000..8307c41bd34ab0740121669cded0b99a40167656 --- /dev/null +++ b/pkg/services/dyncache/sync.go @@ -0,0 +1,118 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file is used for cache limit sync setting + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "fmt" + "path/filepath" + "strings" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +const ( + levelLow = "low" + levelMiddle = "middle" + levelHigh = "high" + levelMax = "max" + levelDynamic = "dynamic" + + resctrlDirPrefix = "rubik_" + schemataFileName = "schemata" +) + +var validLevel = map[string]bool{ + levelLow: true, + levelMiddle: true, + levelHigh: true, + levelMax: true, + levelDynamic: true, +} + +// SyncCacheLimit will continuously set cache limit with corresponding offline pods +func (c *DynCache) SyncCacheLimit() { + for _, p := range c.listOfflinePods() { + if err := c.syncLevel(p); err != nil { + log.Errorf("sync cache limit level err: %v", err) + continue + } + if err := c.writeTasksToResctrl(p); err != nil { + log.Errorf("set cache limit for pod %v err: %v", p.UID, err) + continue + } + } +} + +// writeTasksToResctrl will write tasks running in containers into resctrl group +func (c *DynCache) writeTasksToResctrl(pod *typedef.PodInfo) error { + if !util.PathExist(cgroup.AbsoluteCgroupPath("cpu", pod.Path, "")) { + // just return since pod maybe deleted + return nil + } + var taskList []string + cgroupKey := &cgroup.Key{SubSys: "cpu", FileName: "cgroup.procs"} + for _, container := range pod.IDContainersMap { + key := container.GetCgroupAttr(cgroupKey) + if key.Err != nil { + return key.Err + } + taskList = append(taskList, strings.Split(key.Value, "\n")...) + } + if len(taskList) == 0 { + return nil + } + + resctrlTaskFile := filepath.Join(c.config.DefaultResctrlDir, + resctrlDirPrefix+pod.Annotations[constant.CacheLimitAnnotationKey], "tasks") + for _, task := range taskList { + if err := util.WriteFile(resctrlTaskFile, task); err != nil { + if strings.Contains(err.Error(), "no such process") { + log.Errorf("pod %s task %s not exist", pod.UID, task) + continue + } + return fmt.Errorf("add task %v to file %v error: %v", task, resctrlTaskFile, err) + } + } + + return nil +} + +// syncLevel sync cache limit level +func (c *DynCache) syncLevel(pod *typedef.PodInfo) error { + if pod.Annotations[constant.CacheLimitAnnotationKey] == "" { + if c.config.DefaultLimitMode == modeStatic { + pod.Annotations[constant.CacheLimitAnnotationKey] = levelMax + } else { + pod.Annotations[constant.CacheLimitAnnotationKey] = levelDynamic + } + } + + level := pod.Annotations[constant.CacheLimitAnnotationKey] + if isValid, ok := validLevel[level]; !ok || !isValid { + return fmt.Errorf("invalid cache limit level %v for pod: %v", level, pod.UID) + } + return nil +} + +func (c *DynCache) listOfflinePods() map[string]*typedef.PodInfo { + offlineValue := "true" + return c.Viewer.ListPodsWithOptions(func(pi *typedef.PodInfo) bool { + return pi.Annotations[constant.PriorityAnnotationKey] == offlineValue + }) +} diff --git a/pkg/services/dyncache/sync_test.go b/pkg/services/dyncache/sync_test.go new file mode 100644 index 0000000000000000000000000000000000000000..80021185bb965de76cb9d25ead223040453511e2 --- /dev/null +++ b/pkg/services/dyncache/sync_test.go @@ -0,0 +1,261 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-21 +// Description: This file is testcase for cache limit sync function + +// Package dyncache is the service used for cache limit setting +package dyncache + +import ( + "path/filepath" + "testing" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/podmanager" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/test/try" +) + +func genDefaultConfig() *Config { + return &Config{ + DefaultLimitMode: modeStatic, + DefaultResctrlDir: defaultResctrlDir, + DefaultPidNameSpace: defaultPidNameSpace, + AdjustInterval: defaultAdInt, + PerfDuration: defaultPerfDur, + L3Percent: MultiLvlPercent{Low: defaultLowL3, Mid: defaultMidL3, High: defaultHighL3}, + MemBandPercent: MultiLvlPercent{Low: defaultLowMB, Mid: defaultMidMB, High: defaultHighMB}, + } +} + +func genPodManager(fakePods []*try.FakePod) *podmanager.PodManager { + pm := &podmanager.PodManager{ + Pods: &podmanager.PodCache{ + Pods: make(map[string]*typedef.PodInfo, 0), + }, + } + for _, pod := range fakePods { + pm.Pods.Pods[pod.UID] = pod.PodInfo + } + return pm +} + +func cleanFakePods(fakePods []*try.FakePod) { + for _, pod := range fakePods { + pod.CleanPath().OrDie() + } +} + +// TestCacheLimit_SyncCacheLimit tests SyncCacheLimit of CacheLimit +func TestCacheLimit_SyncCacheLimit(t *testing.T) { + resctrlDir := try.GenTestDir().String() + defer try.RemoveAll(resctrlDir) + try.InitTestCGRoot(try.TestRoot) + defaultConfig := genDefaultConfig() + defaultConfig.DefaultResctrlDir = resctrlDir + type fields struct { + Config *Config + Attr *Attr + Name string + FakePods []*try.FakePod + } + + tests := []struct { + name string + fields fields + preHook func(t *testing.T, c *DynCache, fakePods []*try.FakePod) + }{ + { + name: "TC1-normal case", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "cgroup.procs"}: "12345", + }).WithContainers(1), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + pod.Annotations[constant.CacheLimitAnnotationKey] = "low" + try.WriteFile(filepath.Join(defaultConfig.DefaultResctrlDir, + resctrlDirPrefix+pod.Annotations[constant.CacheLimitAnnotationKey], "tasks"), "") + } + c.Viewer = manager + }, + }, + { + name: "TC2-empty annotation with static mode config", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "cgroup.procs"}: "12345", + }).WithContainers(1), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + try.WriteFile(filepath.Join(defaultConfig.DefaultResctrlDir, + resctrlDirPrefix+pod.Annotations[constant.CacheLimitAnnotationKey], "tasks"), "") + } + c.Viewer = manager + }, + }, + { + name: "TC3-empty annotation with dynamic mode config", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "cgroup.procs"}: "12345", + }).WithContainers(1), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + try.WriteFile(filepath.Join(defaultConfig.DefaultResctrlDir, + resctrlDirPrefix+pod.Annotations[constant.CacheLimitAnnotationKey], "tasks"), "") + } + c.Viewer = manager + c.config.DefaultLimitMode = levelDynamic + }, + }, + { + name: "TC4-invalid annotation", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "cgroup.procs"}: "12345", + }).WithContainers(1), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + pod.Annotations[constant.CacheLimitAnnotationKey] = "invalid" + try.WriteFile(filepath.Join(defaultConfig.DefaultResctrlDir, + resctrlDirPrefix+pod.Annotations[constant.CacheLimitAnnotationKey], "tasks"), "") + } + c.Viewer = manager + }, + }, + { + name: "TC5-pod just deleted", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "cgroup.procs"}: "12345", + }).WithContainers(1), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + pod.Annotations[constant.CacheLimitAnnotationKey] = "low" + try.RemoveAll(cgroup.AbsoluteCgroupPath("cpu", pod.Path, "")) + } + c.Viewer = manager + }, + }, + { + name: "TC6-pod without cgroup.procs", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "aaa"}: "12345", + }).WithContainers(1), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + pod.Annotations[constant.CacheLimitAnnotationKey] = "low" + try.WriteFile(filepath.Join(defaultConfig.DefaultResctrlDir, + resctrlDirPrefix+pod.Annotations[constant.CacheLimitAnnotationKey], "tasks"), "") + } + c.Viewer = manager + }, + }, + { + name: "TC7-pod without containers", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "cgroup.procs"}: "12345", + }), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + pod.Annotations[constant.CacheLimitAnnotationKey] = "low" + try.WriteFile(filepath.Join(defaultConfig.DefaultResctrlDir, + resctrlDirPrefix+pod.Annotations[constant.CacheLimitAnnotationKey], "tasks"), "") + } + c.Viewer = manager + }, + }, + { + name: "TC8-invalid resctrl group path", + fields: fields{ + FakePods: []*try.FakePod{ + try.GenFakeOfflinePod(map[*cgroup.Key]string{ + {SubSys: "cpu", FileName: "cgroup.procs"}: "12345", + }).WithContainers(1), + }, + Config: defaultConfig, + Attr: &Attr{}, + }, + preHook: func(t *testing.T, c *DynCache, fakePods []*try.FakePod) { + manager := genPodManager(fakePods) + for _, pod := range manager.Pods.Pods { + pod.Annotations[constant.CacheLimitAnnotationKey] = "low" + } + c.Viewer = manager + c.config.DefaultResctrlDir = "/dev/null" + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &DynCache{ + config: tt.fields.Config, + Attr: tt.fields.Attr, + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + } + if tt.preHook != nil { + tt.preHook(t, c, tt.fields.FakePods) + } + c.SyncCacheLimit() + cleanFakePods(tt.fields.FakePods) + }) + } + try.RemoveAll(resctrlDir) +} diff --git a/pkg/services/helper/factory.go b/pkg/services/helper/factory.go new file mode 100644 index 0000000000000000000000000000000000000000..4134bffda03fd1d78c84802e15f0a84d7601f7a2 --- /dev/null +++ b/pkg/services/helper/factory.go @@ -0,0 +1,54 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is used to implement the factory + +// Package helper +package helper + +import ( + "errors" + "fmt" + "sync" +) + +// ServiceFactory is to define Service Factory +type ServiceFactory interface { + Name() string + NewObj() (interface{}, error) +} + +var ( + rwlock sync.RWMutex + serviceFactories = map[string]ServiceFactory{} +) + +// AddFactory is to add a service factory +func AddFactory(name string, factory ServiceFactory) error { + rwlock.Lock() + defer rwlock.Unlock() + if _, found := serviceFactories[name]; found { + return fmt.Errorf("factory is already exists") + } + serviceFactories[name] = factory + return nil +} + +// GetComponent is to get the interface of object. +func GetComponent(name string) (interface{}, error) { + rwlock.RLock() + defer rwlock.RUnlock() + if f, found := serviceFactories[name]; found { + return f.NewObj() + } else { + return nil, errors.New("factory is not found") + } +} diff --git a/pkg/services/helper/service_base.go b/pkg/services/helper/service_base.go new file mode 100644 index 0000000000000000000000000000000000000000..3015e898cdc7c4804cbfbc78f288a1648fe0756d --- /dev/null +++ b/pkg/services/helper/service_base.go @@ -0,0 +1,93 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is the base of service. + +// Package helper +package helper + +import ( + "context" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/typedef" +) + +// ServiceBase is the basic class of a service. +type ServiceBase struct { + Name string +} + +// NewServiceBase returns the instance of service +func NewServiceBase(serviceName string) *ServiceBase { + return &ServiceBase{ + Name: serviceName, + } +} + +// ConfigHandler is that obtains the configured callback function. +type ConfigHandler func(configName string, d interface{}) error + +// SetConfig is an interface that invoke the ConfigHandler to obtain the corresponding configuration. +func (s *ServiceBase) SetConfig(ConfigHandler) error { + return nil +} + +// PreStart is an interface for calling a collection of methods when the service is pre-started +func (s *ServiceBase) PreStart(api.Viewer) error { + log.Warnf("%v: PreStart interface is not implemented", s.Name) + return nil +} + +// Terminate is an interface that calls a collection of methods when the service terminates +func (s *ServiceBase) Terminate(api.Viewer) error { + log.Warnf("%v: Terminate interface is not implemented", s.Name) + return nil +} + +// ID is an interface that calls a collection of methods returning service's ID +func (s *ServiceBase) ID() string { + return s.Name +} + +// IsRunner to Confirm whether it is a runner +func (s *ServiceBase) IsRunner() bool { + return false +} + +// Run to start runner +func (s *ServiceBase) Run(context.Context) {} + +// Stop to stop runner +func (s *ServiceBase) Stop() error { + return nil +} + +// AddPod to deal the event of adding a pod. +func (s *ServiceBase) AddPod(*typedef.PodInfo) error { + return nil +} + +// UpdatePod to deal the pod update event. +func (S *ServiceBase) UpdatePod(old, new *typedef.PodInfo) error { + return nil +} + +// DeletePod to deal the pod deletion event. +func (s *ServiceBase) DeletePod(*typedef.PodInfo) error { + return nil +} + +// GetConfig returns the config of service +func (s *ServiceBase) GetConfig() interface{} { + return nil +} diff --git a/pkg/services/iocost/iocost.go b/pkg/services/iocost/iocost.go new file mode 100644 index 0000000000000000000000000000000000000000..9d9eb599117f5e8edd6f2a997e034c75af5c1552 --- /dev/null +++ b/pkg/services/iocost/iocost.go @@ -0,0 +1,245 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is used to implement iocost + +// Package iocost +package iocost + +import ( + "fmt" + "os" + "strings" + "unicode" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/services/helper" +) + +const ( + blkcgRootDir = "blkio" + memcgRootDir = "memory" + offlineWeight = 10 + onlineWeight = 1000 + scale = 10 +) + +// LinearParam for linear model +type LinearParam struct { + Rbps int64 `json:"rbps,omitempty"` + Rseqiops int64 `json:"rseqiops,omitempty"` + Rrandiops int64 `json:"rrandiops,omitempty"` + Wbps int64 `json:"wbps,omitempty"` + Wseqiops int64 `json:"wseqiops,omitempty"` + Wrandiops int64 `json:"wrandiops,omitempty"` +} + +// IOCostConfig define iocost for node +type IOCostConfig struct { + Dev string `json:"dev,omitempty"` + Enable bool `json:"enable,omitempty"` + Model string `json:"model,omitempty"` + Param LinearParam `json:"param,omitempty"` +} + +// NodeConfig define the config of node, include iocost +type NodeConfig struct { + NodeName string `json:"nodeName,omitempty"` + IOCostConfig []IOCostConfig `json:"config,omitempty"` +} + +// IOCost for iocost class +type IOCost struct { + helper.ServiceBase +} + +var ( + nodeName string +) + +// IOCostFactory is the factory of IOCost. +type IOCostFactory struct { + ObjName string +} + +// Name to get the IOCost factory name. +func (i IOCostFactory) Name() string { + return "IOCostFactory" +} + +// NewObj to create object of IOCost. +func (i IOCostFactory) NewObj() (interface{}, error) { + if ioCostSupport() { + nodeName = os.Getenv(constant.NodeNameEnvKey) + return &IOCost{ServiceBase: helper.ServiceBase{Name: i.ObjName}}, nil + } + return nil, fmt.Errorf("this machine not support iocost") +} + +// ioCostSupport tell if the os support iocost. +func ioCostSupport() bool { + cmdLine, err := os.ReadFile("/proc/cmdline") + if err != nil { + log.Warnf("get /pro/cmdline error") + return false + } + + if !strings.Contains(string(cmdLine), "cgroup1_writeback") { + log.Warnf("this machine not support writeback, please add 'cgroup1_writeback' to cmdline") + return false + } + + qosFile := cgroup.AbsoluteCgroupPath(blkcgRootDir, iocostQosFile) + modelFile := cgroup.AbsoluteCgroupPath(blkcgRootDir, iocostModelFile) + return util.PathExist(qosFile) && util.PathExist(modelFile) +} + +// SetConfig to config nodeConfig configure +func (io *IOCost) SetConfig(f helper.ConfigHandler) error { + if f == nil { + return fmt.Errorf("config handler function callback is nil") + } + + var nodeConfigs []NodeConfig + if err := f(io.Name, &nodeConfigs); err != nil { + return err + } + + var nodeConfig *NodeConfig + for _, config := range nodeConfigs { + if config.NodeName == nodeName { + nodeConfig = &config + break + } + if config.NodeName == "global" { + nodeConfig = &config + } + } + return io.loadConfig(nodeConfig) +} + +func (io *IOCost) loadConfig(nodeConfig *NodeConfig) error { + // ensure that previous configuration is cleared. + if err := io.clearIOCost(); err != nil { + log.Errorf("clear iocost err:%v", err) + return err + } + + // no config, return + if nodeConfig == nil { + log.Warnf("no matching node exist:%v", nodeName) + return nil + } + + io.configIOCost(nodeConfig.IOCostConfig) + return nil +} + +// PreStart is the pre-start action +func (io *IOCost) PreStart(viewer api.Viewer) error { + return io.dealExistedPods(viewer) +} + +// Terminate is the terminating action +func (b *IOCost) Terminate(viewer api.Viewer) error { + if err := b.clearIOCost(); err != nil { + return err + } + return nil +} + +func (b *IOCost) dealExistedPods(viewer api.Viewer) error { + pods := viewer.ListPodsWithOptions() + for _, pod := range pods { + if err := b.configPodIOCostWeight(pod); err != nil { + log.Errorf("config pod iocost failed, err:%v", err) + } + } + return nil +} + +// AddPod to deal the event of adding a pod. +func (b *IOCost) AddPod(podInfo *typedef.PodInfo) error { + return b.configPodIOCostWeight(podInfo) +} + +// UpdatePod to deal the pod update event. +func (b *IOCost) UpdatePod(old, new *typedef.PodInfo) error { + return b.configPodIOCostWeight(new) +} + +// DeletePod to deal the pod deletion event. +func (b *IOCost) DeletePod(podInfo *typedef.PodInfo) error { + return nil +} + +func (b *IOCost) configIOCost(configs []IOCostConfig) { + for _, config := range configs { + devno, err := getBlkDeviceNo(config.Dev) + if err != nil { + log.Errorf("this device not found:%v", config.Dev) + continue + } + if config.Model == "linear" { + if err := ConfigIOCostModel(devno, config.Param); err != nil { + log.Errorf("this device not found:%v", err) + continue + } + } else { + log.Errorf("non-linear models are not supported") + continue + } + if err := ConfigIOCostQoS(devno, config.Enable); err != nil { + log.Errorf("Config iocost qos failed:%v", err) + } + } +} + +// clearIOCost used to disable all iocost +func (b *IOCost) clearIOCost() error { + qosbytes, err := cgroup.ReadCgroupFile(blkcgRootDir, iocostQosFile) + if err != nil { + return err + } + + if len(qosbytes) == 0 { + return nil + } + + qosParams := strings.Split(string(qosbytes), "\n") + for _, qosParam := range qosParams { + words := strings.FieldsFunc(qosParam, unicode.IsSpace) + if len(words) != 0 { + if err := ConfigIOCostQoS(words[0], false); err != nil { + return err + } + } + } + return nil +} + +func (b *IOCost) configPodIOCostWeight(podInfo *typedef.PodInfo) error { + var weight uint64 = offlineWeight + if podInfo.Annotations[constant.PriorityAnnotationKey] == "true" { + weight = onlineWeight + } + for _, container := range podInfo.IDContainersMap { + if err := ConfigContainerIOCostWeight(container.Path, weight); err != nil { + return err + } + } + return nil +} diff --git a/pkg/services/iocost/iocost_origin.go b/pkg/services/iocost/iocost_origin.go new file mode 100644 index 0000000000000000000000000000000000000000..d37109f58bbfefe514c425f124b89e56c517f9ce --- /dev/null +++ b/pkg/services/iocost/iocost_origin.go @@ -0,0 +1,92 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is used to implement system iocost interface + +// Package iocost +package iocost + +import ( + "fmt" + "strconv" + + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +const ( + // iocost model file + iocostModelFile = "blkio.cost.model" + // iocost weight file + iocostWeightFile = "blkio.cost.weight" + // iocost weight qos file + iocostQosFile = "blkio.cost.qos" + // cgroup writeback file + wbBlkioinoFile = "memory.wb_blkio_ino" +) + +// ConfigIOCostQoS for config iocost qos. +func ConfigIOCostQoS(devno string, enable bool) error { + t := 0 + if enable { + t = 1 + } + qosParam := fmt.Sprintf("%v enable=%v ctrl=user min=100.00 max=100.00", devno, t) + return cgroup.WriteCgroupFile(qosParam, blkcgRootDir, iocostQosFile) +} + +// ConfigIOCostModel for config iocost model +func ConfigIOCostModel(devno string, p interface{}) error { + var paramStr string + switch param := p.(type) { + case LinearParam: + if param.Rbps <= 0 || param.Rseqiops <= 0 || param.Rrandiops <= 0 || + param.Wbps <= 0 || param.Wseqiops <= 0 || param.Wrandiops <= 0 { + return fmt.Errorf("invalid params, linear params must be greater than 0") + } + + paramStr = fmt.Sprintf("%v rbps=%v rseqiops=%v rrandiops=%v wbps=%v wseqiops=%v wrandiops=%v", + devno, + param.Rbps, param.Rseqiops, param.Rrandiops, + param.Wbps, param.Wseqiops, param.Wrandiops, + ) + default: + return fmt.Errorf("model param is errror") + } + return cgroup.WriteCgroupFile(paramStr, blkcgRootDir, iocostModelFile) +} + +// ConfigContainerIOCostWeight for config iocost weight +// cgroup v1 iocost cannot be inherited. Therefore, only the container level can be configured. +func ConfigContainerIOCostWeight(containerRelativePath string, weight uint64) error { + if err := cgroup.WriteCgroupFile(strconv.FormatUint(weight, scale), blkcgRootDir, + containerRelativePath, iocostWeightFile); err != nil { + return err + } + if err := bindMemcgBlkcg(containerRelativePath); err != nil { + return err + } + return nil +} + +// bindMemcgBlkcg for bind memcg and blkcg +func bindMemcgBlkcg(containerRelativePath string) error { + blkcgPath := cgroup.AbsoluteCgroupPath(blkcgRootDir, containerRelativePath) + ino, err := getDirInode(blkcgPath) + if err != nil { + return err + } + + if err := cgroup.WriteCgroupFile(strconv.FormatUint(ino, scale), + memcgRootDir, containerRelativePath, wbBlkioinoFile); err != nil { + return err + } + return nil +} diff --git a/pkg/iocost/iocost_test.go b/pkg/services/iocost/iocost_test.go similarity index 33% rename from pkg/iocost/iocost_test.go rename to pkg/services/iocost/iocost_test.go index 910710fc60bb48553fa51c7c60c52d60054ce2fa..a906d85e2b1ce1e51e4019f2dca65d65a5d2f38f 100644 --- a/pkg/iocost/iocost_test.go +++ b/pkg/services/iocost/iocost_test.go @@ -1,22 +1,8 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: hanchao -// Create: 2022-10-28 -// Description: iocost test - -// Package iocost is for iocost. package iocost import ( + "encoding/json" "fmt" - "io/ioutil" "os" "path/filepath" "strconv" @@ -25,48 +11,155 @@ import ( "unicode" "github.com/stretchr/testify/assert" - "isula.org/rubik/pkg/config" - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/try" - "isula.org/rubik/pkg/typedef" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/test/try" ) -const paramsLen = 2 +const ( + sysDevBlock = "/sys/dev/block" + objName = "ioCost" + paramsLen = 2 +) + +type ResultItem struct { + testName string + devno string + qosCheck bool + modelCheck bool + qosParam string + modelParam string +} -func TestIOcostFeatureSwitch(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) +var ( + iocostConfigTestItems []IOCostConfig + resultItmes []ResultItem +) + +func createTestIteam(dev, devno string, enable bool, val int64) (*IOCostConfig, *ResultItem) { + qosStr := devno + " enable=0" + name := "Test iocost disable" + if enable { + qosStr = devno + " enable=1" + name = fmt.Sprintf("Test iocost enable: val=%v", val) } - SetIOcostEnable(false) - err := ConfigIOcost(nil) - assert.Equal(t, err.Error(), "iocost feature is disable") - err = SetPodWeight(nil) - assert.Equal(t, err.Error(), "iocost feature is disable") - err = ShutDown() - assert.Equal(t, err.Error(), "iocost feature is disable") + cfg := IOCostConfig{ + Dev: dev, + Enable: enable, + Model: "linear", + Param: LinearParam{ + Rbps: val, Rseqiops: val, Rrandiops: val, + Wbps: val, Wseqiops: val, Wrandiops: val, + }, + } + res := ResultItem{ + testName: name, + devno: devno, + qosCheck: true, + modelCheck: enable, + qosParam: qosStr, + modelParam: fmt.Sprintf("%v ctrl=user model=linear rbps=%v rseqiops=%v rrandiops=%v wbps=%v wseqiops=%v wrandiops=%v", + devno, val, val, val, val, val, val), + } + return &cfg, &res } +func createIOCostConfigTestItems() { + devs, err := getAllBlockDevice() + if err != nil { + panic("get blkck devices error") + } + for dev, devno := range devs { + for _, val := range []int64{600, 700, 800} { + for _, e := range []bool{true, false, true} { + cfg, res := createTestIteam(dev, devno, e, val) + iocostConfigTestItems = append(iocostConfigTestItems, *cfg) + resultItmes = append(resultItmes, *res) + } + } + } -// TestIocostConfig is testing for IocostConfig interface. -func TestIocostConfig(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) + var dev, devno string + for k, v := range devs { + dev = k + devno = v + break } - SetIOcostEnable(true) + cfg, res := createTestIteam(dev, devno, true, 900) + iocostConfigTestItems = append(iocostConfigTestItems, *cfg) + resultItmes = append(resultItmes, *res) + + /* + cfg, res = createTestIteam(dev, devno, true, 1000) + res.testName = "Test iocost config no dev" + cfg.Dev = "XXX" + */ +} + +func TestIOCostSupport(t *testing.T) { + assert.Equal(t, ioCostSupport(), true) + cgroup.InitMountDir("/var/tmp/rubik") + assert.Equal(t, ioCostSupport(), false) + cgroup.InitMountDir(constant.DefaultCgroupRoot) +} + +func TestIOCostID(t *testing.T) { + obj := IOCost{ServiceBase: helper.ServiceBase{Name: objName}} + assert.Equal(t, obj.ID(), objName) +} + +func TestIOCostSetConfig(t *testing.T) { + obj := IOCost{ServiceBase: helper.ServiceBase{Name: objName}} + err := obj.SetConfig(nil) + assert.Error(t, err) + + err = obj.SetConfig(func(configName string, d interface{}) error { + return fmt.Errorf("config handler error test") + }) + assert.Error(t, err) + assert.EqualError(t, err, "config handler error test") + + for i, item := range iocostConfigTestItems { + nodeConfig := NodeConfig{ + NodeName: "global", + IOCostConfig: []IOCostConfig{item}, + } + + t.Run(resultItmes[i].testName, func(t *testing.T) { + var nodeConfigs []NodeConfig + nodeConfigs = append(nodeConfigs, nodeConfig) + cfgStr, err := json.Marshal(nodeConfigs) + assert.NoError(t, err) + err = obj.SetConfig(func(configName string, d interface{}) error { + assert.Equal(t, configName, objName) + return json.Unmarshal(cfgStr, d) + }) + assert.NoError(t, err) + checkResult(t, &resultItmes[i]) + }) + } +} + +func TestConfigIOCost(t *testing.T) { + obj := IOCost{ServiceBase: helper.ServiceBase{Name: objName}} + assert.Equal(t, obj.ID(), objName) + + var devname, devno string devs, err := getAllBlockDevice() assert.NoError(t, err) - var devname, devno string + assert.NotEmpty(t, devs) + for k, v := range devs { devname = k devno = v break } - tests := []struct { + testItems := []struct { name string - config config.IOcostConfig + config IOCostConfig qosCheck bool modelCheck bool qosParam string @@ -74,11 +167,11 @@ func TestIocostConfig(t *testing.T) { }{ { name: "Test iocost enable", - config: config.IOcostConfig{ + config: IOCostConfig{ Dev: devname, Enable: true, Model: "linear", - Param: config.Param{ + Param: LinearParam{ Rbps: 600, Rseqiops: 600, Rrandiops: 600, Wbps: 600, Wseqiops: 600, Wrandiops: 600, }, @@ -92,11 +185,11 @@ func TestIocostConfig(t *testing.T) { }, { name: "Test iocost disable", - config: config.IOcostConfig{ + config: IOCostConfig{ Dev: devname, Enable: false, Model: "linear", - Param: config.Param{ + Param: LinearParam{ Rbps: 600, Rseqiops: 600, Rrandiops: 600, Wbps: 600, Wseqiops: 600, Wrandiops: 600, }, @@ -106,12 +199,12 @@ func TestIocostConfig(t *testing.T) { qosParam: devno + " enable=0", }, { - name: "Test iocost enable", - config: config.IOcostConfig{ + name: "Test modifying iocost linear parameters", + config: IOCostConfig{ Dev: devname, Enable: true, Model: "linear", - Param: config.Param{ + Param: LinearParam{ Rbps: 500, Rseqiops: 500, Rrandiops: 500, Wbps: 500, Wseqiops: 500, Wrandiops: 500, }, @@ -124,12 +217,12 @@ func TestIocostConfig(t *testing.T) { "wbps=500 wseqiops=500 wrandiops=500", }, { - name: "Test iocost no dev error", - config: config.IOcostConfig{ - Dev: "xxx", - Enable: true, + name: "Test iocost disable", + config: IOCostConfig{ + Dev: devname, + Enable: false, Model: "linear", - Param: config.Param{ + Param: LinearParam{ Rbps: 600, Rseqiops: 600, Rrandiops: 600, Wbps: 600, Wseqiops: 600, Wrandiops: 600, }, @@ -139,30 +232,27 @@ func TestIocostConfig(t *testing.T) { qosParam: devno + " enable=0", }, { - name: "Test iocost enable", - config: config.IOcostConfig{ - Dev: devname, + name: "Test iocost no dev error", + config: IOCostConfig{ + Dev: "xxx", Enable: true, Model: "linear", - Param: config.Param{ - Rbps: 500, Rseqiops: 500, Rrandiops: 500, - Wbps: 500, Wseqiops: 500, Wrandiops: 500, + Param: LinearParam{ + Rbps: 600, Rseqiops: 600, Rrandiops: 600, + Wbps: 600, Wseqiops: 600, Wrandiops: 600, }, }, qosCheck: true, - modelCheck: true, - qosParam: devno + " enable=1", - modelParam: devno + " ctrl=user model=linear " + - "rbps=500 rseqiops=500 rrandiops=500 " + - "wbps=500 wseqiops=500 wrandiops=500", + modelCheck: false, + qosParam: devno + " enable=0", }, { name: "Test iocost non-linear error", - config: config.IOcostConfig{ + config: IOCostConfig{ Dev: devname, Enable: true, Model: "linearx", - Param: config.Param{ + Param: LinearParam{ Rbps: 600, Rseqiops: 600, Rrandiops: 600, Wbps: 600, Wseqiops: 600, Wrandiops: 600, }, @@ -171,31 +261,13 @@ func TestIocostConfig(t *testing.T) { modelCheck: false, qosParam: devno + " enable=0", }, - { - name: "Test iocost enable", - config: config.IOcostConfig{ - Dev: devname, - Enable: true, - Model: "linear", - Param: config.Param{ - Rbps: 500, Rseqiops: 500, Rrandiops: 500, - Wbps: 500, Wseqiops: 500, Wrandiops: 500, - }, - }, - qosCheck: true, - modelCheck: true, - qosParam: devno + " enable=1", - modelParam: devno + " ctrl=user model=linear " + - "rbps=500 rseqiops=500 rrandiops=500 " + - "wbps=500 wseqiops=500 wrandiops=500", - }, { name: "Test iocost param error", - config: config.IOcostConfig{ + config: IOCostConfig{ Dev: devname, Enable: true, Model: "linear", - Param: config.Param{ + Param: LinearParam{ Rbps: 600, Rseqiops: 600, Rrandiops: 600, Wbps: 600, Wseqiops: 600, Wrandiops: 0, }, @@ -206,18 +278,16 @@ func TestIocostConfig(t *testing.T) { }, } - for _, tt := range tests { + for _, tt := range testItems { t.Run(tt.name, func(t *testing.T) { - params := []config.IOcostConfig{ + params := []IOCostConfig{ tt.config, } - err := ConfigIOcost(params) - assert.NoError(t, err) + obj.configIOCost(params) if tt.qosCheck { - filePath := filepath.Join(config.CgroupRoot, blkSubName, iocostQosFile) - qosParamByte, err := ioutil.ReadFile(filePath) + qos, err := cgroup.ReadCgroupFile(blkcgRootDir, iocostQosFile) assert.NoError(t, err) - qosParams := strings.Split(string(qosParamByte), "\n") + qosParams := strings.Split(string(qos), "\n") for _, qosParam := range qosParams { paramList := strings.FieldsFunc(qosParam, unicode.IsSpace) if len(paramList) >= paramsLen && strings.Compare(paramList[0], devno) == 0 { @@ -227,8 +297,7 @@ func TestIocostConfig(t *testing.T) { } } if tt.modelCheck { - filePath := filepath.Join(config.CgroupRoot, blkSubName, iocostModelFile) - modelParamByte, err := ioutil.ReadFile(filePath) + modelParamByte, err := cgroup.ReadCgroupFile(blkcgRootDir, iocostModelFile) assert.NoError(t, err) modelParams := strings.Split(string(modelParamByte), "\n") for _, modelParam := range modelParams { @@ -243,138 +312,21 @@ func TestIocostConfig(t *testing.T) { } } -// TestSetPodWeight is testing for SetPodWeight interface. -func TestSetPodWeight(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) - } - - // deploy enviroment - const testCgroupPath = "/rubik-test" - rubikBlkioTestPath := filepath.Join(config.CgroupRoot, blkSubName, testCgroupPath) - rubikMemTestPath := filepath.Join(config.CgroupRoot, memSubName, testCgroupPath) - try.MkdirAll(rubikBlkioTestPath, constant.DefaultDirMode) - try.MkdirAll(rubikMemTestPath, constant.DefaultDirMode) - defer try.RemoveAll(rubikBlkioTestPath) - defer try.RemoveAll(rubikMemTestPath) - SetIOcostEnable(true) - - tests := []struct { - name string - pod *typedef.PodInfo - wantErr bool - want string - }{ - { - name: "Test online qos level", - pod: &typedef.PodInfo{ - CgroupRoot: config.CgroupRoot, - CgroupPath: testCgroupPath, - Offline: false, - }, - wantErr: false, - want: "default 1000\n", - }, - { - name: "Test offline qos level", - pod: &typedef.PodInfo{ - CgroupRoot: config.CgroupRoot, - CgroupPath: testCgroupPath, - Offline: true, - }, - wantErr: false, - want: "default 10\n", - }, - { - name: "Test error cgroup path", - pod: &typedef.PodInfo{ - CgroupRoot: config.CgroupRoot, - CgroupPath: "var/log/rubik/rubik-test", - Offline: true, - }, - wantErr: true, - want: "default 10\n", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := SetPodWeight(tt.pod) - if tt.wantErr { - assert.Equal(t, err != nil, true) - return - } - assert.NoError(t, err) - weightFile := filepath.Join(rubikBlkioTestPath, "blkio.cost.weight") - weightOnline, err := ioutil.ReadFile(weightFile) - assert.NoError(t, err) - assert.Equal(t, string(weightOnline), tt.want) - }) - } -} - -// TestBindMemcgBlkio is testing for bindMemcgBlkio -func TestBindMemcgBlkio(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) - } - - // deploy enviroment - const testCgroupPath = "rubik-test" - rubikBlkioTestPath := filepath.Join(config.CgroupRoot, blkSubName, testCgroupPath) - rubikMemTestPath := filepath.Join(config.CgroupRoot, memSubName, testCgroupPath) - try.MkdirAll(rubikBlkioTestPath, constant.DefaultDirMode) - try.MkdirAll(rubikMemTestPath, constant.DefaultDirMode) - defer try.RemoveAll(rubikBlkioTestPath) - defer try.RemoveAll(rubikMemTestPath) - SetIOcostEnable(true) - - containers := make(map[string]*typedef.ContainerInfo, 5) - for i := 0; i < 5; i++ { - dirName := "container" + strconv.Itoa(i) - blkContainer := filepath.Join(rubikBlkioTestPath, dirName) - memContainer := filepath.Join(rubikMemTestPath, dirName) - try.MkdirAll(blkContainer, constant.DefaultDirMode) - try.MkdirAll(memContainer, constant.DefaultDirMode) - containers[dirName] = &typedef.ContainerInfo{ - Name: dirName, - CgroupRoot: config.CgroupRoot, - CgroupAddr: filepath.Join(testCgroupPath, dirName), - } - } - err := bindMemcgBlkio(containers) - assert.NoError(t, err) - - for key, container := range containers { - memcgPath := container.CgroupPath(memSubName) - blkcgPath := container.CgroupPath(blkSubName) - ino, err := getDirInode(blkcgPath) - assert.NoError(t, err) - wbBlkioInfo := filepath.Join(memcgPath, wbBlkioinoFile) - blkioInoStr, err := ioutil.ReadFile(wbBlkioInfo) - assert.NoError(t, err) - result := fmt.Sprintf("wb_blkio_path:/%v/%v\nwb_blkio_ino:%v\n", testCgroupPath, key, ino) - assert.Equal(t, result, string(blkioInoStr)) - } -} - -// TestClearIOcost is testing for ClearIOcost interface. func TestClearIOcost(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) - } + obj := IOCost{ServiceBase: helper.ServiceBase{Name: objName}} + assert.Equal(t, obj.ID(), objName) devs, err := getAllBlockDevice() assert.NoError(t, err) - filePath := filepath.Join(config.CgroupRoot, blkSubName, iocostQosFile) for _, devno := range devs { qosStr := fmt.Sprintf("%v enable=1", devno) - err := writeIOcost(filePath, qosStr) + err := cgroup.WriteCgroupFile(qosStr, blkcgRootDir, iocostQosFile) assert.NoError(t, err) } - err = ShutDown() + + err = obj.Terminate(nil) assert.NoError(t, err) - qosParamByte, err := ioutil.ReadFile(filePath) + qosParamByte, err := cgroup.ReadCgroupFile(blkcgRootDir, iocostQosFile) assert.NoError(t, err) qosParams := strings.Split(string(qosParamByte), "\n") for _, qosParam := range qosParams { @@ -385,171 +337,91 @@ func TestClearIOcost(t *testing.T) { } } -// TestGetBlkDevice is testing for get block device interface. -func TestGetBlkDevice(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) - } - - devs, err := getAllBlockDevice() - assert.NoError(t, err) - for index, dev := range devs { - devno, err := getBlkDeviceNo(index) - assert.NoError(t, err) - assert.Equal(t, dev, devno) - } - _, err = getBlkDeviceNo("") - assert.Equal(t, err != nil, true) - _, err = getBlkDeviceNo("xxx") - assert.Equal(t, err != nil, true) -} - -// TestConfigQos is testing for ConfigQos interface. -func TestConfigQos(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) - } - - SetIOcostEnable(true) - devs, err1 := getAllBlockDevice() - assert.NoError(t, err1) - var devno string - for _, v := range devs { - devno = v - break - } +func TestSetPodWeight(t *testing.T) { + // deploy enviroment + const podCgroupPath = "/rubik-podtest" + rubikBlkioTestPath := cgroup.AbsoluteCgroupPath(blkcgRootDir, podCgroupPath) + rubikMemTestPath := cgroup.AbsoluteCgroupPath(memcgRootDir, podCgroupPath) + try.MkdirAll(rubikBlkioTestPath, constant.DefaultDirMode) + try.MkdirAll(rubikMemTestPath, constant.DefaultDirMode) + //defer try.RemoveAll(rubikBlkioTestPath) + //defer try.RemoveAll(rubikMemTestPath) + containerPath := podCgroupPath + "/container" + strconv.Itoa(0) + try.MkdirAll(cgroup.AbsoluteCgroupPath(memcgRootDir, containerPath), constant.DefaultDirMode) + try.MkdirAll(cgroup.AbsoluteCgroupPath(blkcgRootDir, containerPath), constant.DefaultDirMode) tests := []struct { - name string - enable bool - want string + name string + cgroupPath string + weight int + wantErr bool + want string + errMsg string }{ { - name: "Test qos disable", - enable: false, - want: devno + " enable=0", - }, - { - name: "Test qos enable", - enable: true, - want: devno + " enable=1", + name: "Test online qos level", + cgroupPath: containerPath, + weight: onlineWeight, + wantErr: false, + want: "default 1000\n", }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := configQos(tt.enable, devno) - assert.NoError(t, err) - filePath := filepath.Join(config.CgroupRoot, blkSubName, iocostQosFile) - qosParamByte, err := ioutil.ReadFile(filePath) - assert.NoError(t, err) - qosParams := strings.Split(string(qosParamByte), "\n") - for _, qosParam := range qosParams { - paramList := strings.FieldsFunc(qosParam, unicode.IsSpace) - if len(paramList) >= paramsLen && strings.Compare(paramList[0], devno) == 0 { - assert.Equal(t, tt.want, qosParam[:len(tt.want)]) - break - } - } - }) - } -} - -// TestConfigLinearModel is testing for ConfigLinearModel interface. -func TestConfigLinearModel(t *testing.T) { - if !HwSupport() { - t.Skipf("%s only run on support iocost machine", t.Name()) - } - - SetIOcostEnable(true) - devs, err := getAllBlockDevice() - assert.NoError(t, err) - var devno string - for _, v := range devs { - devno = v - break - } - - tests := []struct { - name string - linearModel config.Param - wantErr bool - modelParam string - }{ { - name: "Test linear model", - linearModel: config.Param{ - Rbps: 500, Rseqiops: 500, Rrandiops: 500, - Wbps: 500, Wseqiops: 500, Wrandiops: 500, - }, - wantErr: false, - modelParam: devno + " ctrl=user model=linear " + - "rbps=500 rseqiops=500 rrandiops=500 " + - "wbps=500 wseqiops=500 wrandiops=500", + name: "Test offline qos level", + cgroupPath: containerPath, + weight: offlineWeight, + wantErr: false, + want: "default 10\n", }, { - name: "Test linear model", - linearModel: config.Param{ - Rbps: 600, Rseqiops: 600, Rrandiops: 600, - Wbps: 600, Wseqiops: 600, Wrandiops: 600, - }, - wantErr: false, - modelParam: devno + " ctrl=user model=linear " + - "rbps=600 rseqiops=600 rrandiops=600 " + - "wbps=600 wseqiops=600 wrandiops=600", + name: "Test error cgroup path", + cgroupPath: "/var/log/rubik/rubik-test", + weight: offlineWeight, + wantErr: true, + errMsg: "no such file or diretory", }, { - name: "Test missing parameter", - linearModel: config.Param{ - Rseqiops: 600, Rrandiops: 600, - Wbps: 600, Wseqiops: 600, Wrandiops: 600, - }, - wantErr: true, + name: "Test error value", + cgroupPath: containerPath, + weight: 100000, + wantErr: true, + errMsg: "invalid argument", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := configLinearModel(tt.linearModel, devno) + err := ConfigContainerIOCostWeight(tt.cgroupPath, uint64(tt.weight)) if tt.wantErr { - assert.Equal(t, err != nil, true) + assert.Contains(t, err.Error(), tt.errMsg) return } assert.NoError(t, err) - filePath := filepath.Join(config.CgroupRoot, blkSubName, iocostModelFile) - modelParamByte, err := ioutil.ReadFile(filePath) + + // check weight + weight, err := cgroup.ReadCgroupFile(blkcgRootDir, tt.cgroupPath, iocostWeightFile) assert.NoError(t, err) + assert.Equal(t, string(weight), tt.want) - modelParams := strings.Split(string(modelParamByte), "\n") - for _, modelParam := range modelParams { - paramList := strings.FieldsFunc(modelParam, unicode.IsSpace) - if len(paramList) >= paramsLen && strings.Compare(paramList[0], devno) == 0 { - assert.Equal(t, tt.modelParam, modelParam[:len(tt.modelParam)]) - break - } - } + // check cgroup writeback + ino, err := getDirInode(cgroup.AbsoluteCgroupPath(blkcgRootDir, tt.cgroupPath)) + assert.NoError(t, err) + result := fmt.Sprintf("wb_blkio_path:%v\nwb_blkio_ino:%v\n", tt.cgroupPath, ino) + blkioInoStr, err := cgroup.ReadCgroupFile(memcgRootDir, tt.cgroupPath, wbBlkioinoFile) + assert.NoError(t, err) + assert.Equal(t, result, string(blkioInoStr)) }) } } -func TestPartFunction(t *testing.T) { - const testCgroup = "/var/rubikcgroup/" - qosParam := strings.Repeat("a", paramMaxLen+1) - err := writeIOcost(testCgroup, qosParam) - assert.Equal(t, err.Error(), "param size exceeds "+strconv.Itoa(paramMaxLen)) - - _, err = getDirInode(testCgroup) - assert.Equal(t, true, err != nil) -} - func getAllBlockDevice() (map[string]string, error) { - files, err := ioutil.ReadDir(sysDevBlock) + files, err := os.ReadDir(sysDevBlock) if err != nil { - log.Infof("read dir %v failed, err:%v", sysDevBlock, err.Error()) return nil, err } + devs := make(map[string]string) for _, f := range files { - if f.Mode()&os.ModeSymlink != 0 { + if f.Type()&os.ModeSymlink != 0 { fullName := filepath.Join(sysDevBlock, f.Name()) realPath, err := os.Readlink(fullName) if err != nil { @@ -569,3 +441,39 @@ func getAllBlockDevice() (map[string]string, error) { } return devs, nil } + +func checkResult(t *testing.T, result *ResultItem) { + if result.qosCheck { + qos, err := cgroup.ReadCgroupFile(blkcgRootDir, iocostQosFile) + assert.NoError(t, err) + qosParams := strings.Split(string(qos), "\n") + for _, qosParam := range qosParams { + paramList := strings.FieldsFunc(qosParam, unicode.IsSpace) + if len(paramList) >= paramsLen && strings.Compare(paramList[0], result.devno) == 0 { + assert.Equal(t, result.qosParam, qosParam[:len(result.qosParam)]) + break + } + } + } + if result.modelCheck { + modelParamByte, err := cgroup.ReadCgroupFile(blkcgRootDir, iocostModelFile) + assert.NoError(t, err) + modelParams := strings.Split(string(modelParamByte), "\n") + for _, modelParam := range modelParams { + paramList := strings.FieldsFunc(modelParam, unicode.IsSpace) + if len(paramList) >= paramsLen && strings.Compare(paramList[0], result.devno) == 0 { + assert.Equal(t, result.modelParam, modelParam[:len(result.modelParam)]) + break + } + } + } +} + +func TestMain(m *testing.M) { + if !ioCostSupport() { + fmt.Println("this machine not support iocost") + return + } + createIOCostConfigTestItems() + m.Run() +} diff --git a/pkg/services/iocost/utils.go b/pkg/services/iocost/utils.go new file mode 100644 index 0000000000000000000000000000000000000000..7b6fd9f7eba4cf6242b72f7ca61f00a56d91712c --- /dev/null +++ b/pkg/services/iocost/utils.go @@ -0,0 +1,59 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is used to implement blkio system call + +// Package iocost +package iocost + +import ( + "fmt" + "os" + "path/filepath" + "syscall" +) + +const ( + devNoMax = 256 +) + +func getBlkDeviceNo(devName string) (string, error) { + devPath := filepath.Join("/dev", devName) + fi, err := os.Stat(devPath) + if err != nil { + return "", fmt.Errorf("stat %s failed with error: %v", devName, err) + } + + if fi.Mode()&os.ModeDevice == 0 { + return "", fmt.Errorf("%s is not a device", devName) + } + + st, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return "", fmt.Errorf("failed to get Sys(), %v has type %v", devName, st) + } + + devno := st.Rdev + major, minor := devno/devNoMax, devno%devNoMax + return fmt.Sprintf("%v:%v", major, minor), nil +} + +func getDirInode(file string) (uint64, error) { + fi, err := os.Stat(file) + if err != nil { + return 0, err + } + st, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return 0, fmt.Errorf("failed to get Sys(), %v has type %v", file, st) + } + return st.Ino, nil +} diff --git a/api/api.go b/pkg/services/iolimit/iolimit.go similarity index 35% rename from api/api.go rename to pkg/services/iolimit/iolimit.go index d9ecb6474aeb137f2ab51501446739da0098fa0a..dcc70d2428d66cb442151b232261967a967de5df 100644 --- a/api/api.go +++ b/pkg/services/iolimit/iolimit.go @@ -1,4 +1,4 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. // rubik licensed under the Mulan PSL v2. // You can use this software according to the terms and conditions of the Mulan PSL v2. // You may obtain a copy of Mulan PSL v2 at: @@ -7,36 +7,41 @@ // IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR // PURPOSE. // See the Mulan PSL v2 for more details. -// Author: Xiang Li -// Create: 2021-04-17 -// Description: api definition for rubik +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is used to implement iolimit -// Package api is for api definition -package api +// Package iolimit +package iolimit -// PodQoS describe Pod QoS settings -type PodQoS struct { - CgroupPath string `json:"CgroupPath"` - QosLevel int `json:"QosLevel"` - CacheLimitLevel string `json:"CacheLimitLevel"` +import ( + "isula.org/rubik/pkg/services/helper" +) + +// DeviceConfig defines blkio device configurations. +type DeviceConfig struct { + DeviceName string `json:"device,omitempty"` + DeviceValue string `json:"value,omitempty"` +} + +// IOLimit is the class of IOLimit. +type IOLimit struct { + helper.ServiceBase } -// SetQosRequest is request get from north end -type SetQosRequest struct { - Pods map[string]PodQoS `json:"Pods"` +// IOLimitFactory is the factory of IOLimit. +type IOLimitFactory struct { + ObjName string } -// SetQosResponse is response format for http responser -type SetQosResponse struct { - ErrCode int `json:"code"` - Message string `json:"msg"` +// Name to get the IOLimit factory name. +func (i IOLimitFactory) Name() string { + return "IOLimitFactory" } -// VersionResponse is version response for http responser -type VersionResponse struct { - Version string `json:"Version"` - Release string `json:"Release"` - GitCommit string `json:"Commit"` - BuildTime string `json:"BuildTime"` - Usage string `json:"Usage,omitempty"` +// NewObj to create object of IOLimit. +func (i IOLimitFactory) NewObj() (interface{}, error) { + return &IOLimit{ + ServiceBase: *helper.NewServiceBase(i.ObjName), + }, nil } diff --git a/pkg/services/preemption/preemption.go b/pkg/services/preemption/preemption.go new file mode 100644 index 0000000000000000000000000000000000000000..ce436a3405d5505a7224e8c5c64827534ae0618e --- /dev/null +++ b/pkg/services/preemption/preemption.go @@ -0,0 +1,188 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-10 +// Description: This file implement qos level setting service + +// Package qos is the service used for qos level setting +package preemption + +import ( + "fmt" + "strconv" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/services/helper" +) + +var supportCgroupTypes = map[string]*cgroup.Key{ + "cpu": {SubSys: "cpu", FileName: constant.CPUCgroupFileName}, + "memory": {SubSys: "memory", FileName: constant.MemoryCgroupFileName}, +} + +// Preemption define service which related to qos level setting +type Preemption struct { + helper.ServiceBase + config PreemptionConfig +} + +// PreemptionConfig define which resources need to use the preemption +type PreemptionConfig struct { + Resource []string `json:"resource,omitempty"` +} + +// PreemptionFactory is the factory os Preemption. +type PreemptionFactory struct { + ObjName string +} + +// Name to get the Preemption factory name. +func (i PreemptionFactory) Name() string { + return "PreemptionFactory" +} + +// NewObj to create object of Preemption. +func (i PreemptionFactory) NewObj() (interface{}, error) { + return &Preemption{ServiceBase: helper.ServiceBase{Name: i.ObjName}}, nil +} + +// SetConfig to config Preemption configure +func (q *Preemption) SetConfig(f helper.ConfigHandler) error { + var c PreemptionConfig + if err := f(q.Name, &c); err != nil { + return err + } + if err := c.Validate(); err != nil { + return err + } + q.config = c + return nil +} + +// PreStart is the pre-start action +func (q *Preemption) PreStart(viewer api.Viewer) error { + for _, pod := range viewer.ListPodsWithOptions() { + if err := q.SetQoSLevel(pod); err != nil { + log.Errorf("failed to set the qos level for the previously started pod %v: %v", pod.Name, err) + } + } + return nil +} + +// AddPod implement add function when pod is added in k8s +func (q *Preemption) AddPod(pod *typedef.PodInfo) error { + if err := q.SetQoSLevel(pod); err != nil { + return err + } + if err := q.ValidateConfig(pod); err != nil { + return err + } + return nil +} + +// UpdatePod implement update function when pod info is changed +func (q *Preemption) UpdatePod(old, new *typedef.PodInfo) error { + oldQos, newQos := getQoSLevel(old), getQoSLevel(new) + switch { + case newQos == oldQos: + return nil + case newQos > oldQos: + return fmt.Errorf("does not support pod qos level setting from low to high") + default: + if err := q.ValidateConfig(new); err != nil { + if err := q.SetQoSLevel(new); err != nil { + return fmt.Errorf("failed to update the qos level of pod %s(%s): %v", new.Name, new.UID, err) + } + } + } + return nil +} + +// DeletePod implement delete function when pod is deleted by k8s +func (q *Preemption) DeletePod(pod *typedef.PodInfo) error { + return nil +} + +// ValidateConfig will validate pod's qos level between value from +// cgroup file and the one from pod info +func (q *Preemption) ValidateConfig(pod *typedef.PodInfo) error { + targetLevel := getQoSLevel(pod) + for _, r := range q.config.Resource { + if err := pod.GetCgroupAttr(supportCgroupTypes[r]).Expect(targetLevel); err != nil { + return fmt.Errorf("failed to validate the qos level configuration of pod %s: %v", pod.Name, err) + } + for _, container := range pod.IDContainersMap { + if err := container.GetCgroupAttr(supportCgroupTypes[r]).Expect(targetLevel); err != nil { + return fmt.Errorf("failed to validate the qos level configuration of container %s: %v", pod.Name, err) + } + } + } + return nil +} + +// SetQoSLevel set pod and all containers' qos level within it +func (q *Preemption) SetQoSLevel(pod *typedef.PodInfo) error { + if pod == nil { + return fmt.Errorf("empty pod info") + } + qosLevel := getQoSLevel(pod) + if qosLevel == constant.Online { + log.Infof("pod %s has already been set to online", pod.Name) + return nil + } + + for _, r := range q.config.Resource { + if err := pod.SetCgroupAttr(supportCgroupTypes[r], strconv.Itoa(qosLevel)); err != nil { + return err + } + for _, container := range pod.IDContainersMap { + if err := container.SetCgroupAttr(supportCgroupTypes[r], strconv.Itoa(qosLevel)); err != nil { + return err + } + } + } + log.Infof("the qos level of pod %s(%s) is set to %d successfully", pod.Name, pod.UID, qosLevel) + return nil +} + +func getQoSLevel(pod *typedef.PodInfo) int { + if pod == nil { + return constant.Online + } + anno, ok := pod.Annotations[constant.PriorityAnnotationKey] + if !ok { + return constant.Online + } + switch anno { + case "true": + return constant.Offline + case "false": + return constant.Online + default: + return constant.Online + } +} + +// Validate will validate the qos service config +func (conf *PreemptionConfig) Validate() error { + if len(conf.Resource) == 0 { + return fmt.Errorf("empty qos config") + } + for _, r := range conf.Resource { + if _, ok := supportCgroupTypes[r]; !ok { + return fmt.Errorf("does not support setting the %s subsystem", r) + } + } + return nil +} diff --git a/pkg/services/preemption/preemption_test.go b/pkg/services/preemption/preemption_test.go new file mode 100644 index 0000000000000000000000000000000000000000..5c716d4fab7feed3d1ffa7c651caa31a07fdae1e --- /dev/null +++ b/pkg/services/preemption/preemption_test.go @@ -0,0 +1,225 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-10 +// Description: This file test qos level setting service + +// Package qos is the service used for qos level setting +package preemption + +import ( + "testing" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/test/try" +) + +func init() { + try.InitTestCGRoot(try.TestRoot) +} + +type fields struct { + Name string + Config PreemptionConfig +} +type args struct { + old *try.FakePod + new *try.FakePod +} + +type test struct { + name string + fields fields + args args + wantErr bool + preHook func(*try.FakePod) *try.FakePod +} + +var getCommonField = func(subSys []string) fields { + return fields{ + Name: "qos", + Config: PreemptionConfig{Resource: subSys}, + } +} + +// TestPreemptionAddFunc tests AddFunc of Preemption +func TestPreemptionAddFunc(t *testing.T) { + const containerNum = 3 + var addFuncTC = []test{ + { + name: "TC1-set offline pod qos ok", + fields: getCommonField([]string{"cpu", "memory"}), + args: args{ + new: try.GenFakeOfflinePod(map[*cgroup.Key]string{ + supportCgroupTypes["cpu"]: "0", + supportCgroupTypes["memory"]: "0", + }), + }, + }, + { + name: "TC2-set online pod qos ok", + fields: getCommonField([]string{"cpu", "memory"}), + args: args{ + new: try.GenFakeOnlinePod(map[*cgroup.Key]string{ + supportCgroupTypes["cpu"]: "0", + supportCgroupTypes["memory"]: "0", + }).WithContainers(containerNum), + }, + }, + { + name: "TC3-empty pod info", + fields: getCommonField([]string{"cpu", "memory"}), + wantErr: true, + }, + { + name: "TC4-invalid annotation key", + fields: getCommonField([]string{"cpu"}), + args: args{ + new: try.GenFakeBestEffortPod(map[*cgroup.Key]string{supportCgroupTypes["cpu"]: "0"}), + }, + preHook: func(pod *try.FakePod) *try.FakePod { + newPod := pod.DeepCopy() + newPod.Annotations["undefine"] = "true" + return newPod + }, + }, + { + name: "TC5-invalid annotation value", + fields: getCommonField([]string{"cpu"}), + args: args{ + new: try.GenFakeBestEffortPod(map[*cgroup.Key]string{supportCgroupTypes["cpu"]: "0"}), + }, + preHook: func(pod *try.FakePod) *try.FakePod { + newPod := pod.DeepCopy() + newPod.Annotations[constant.PriorityAnnotationKey] = "undefine" + return newPod + }, + }, + } + + for _, tt := range addFuncTC { + t.Run(tt.name, func(t *testing.T) { + q := &Preemption{ + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + config: tt.fields.Config, + } + if tt.preHook != nil { + tt.preHook(tt.args.new) + } + if tt.args.new != nil { + if err := q.AddPod(tt.args.new.PodInfo); (err != nil) != tt.wantErr { + t.Errorf("QoS.AddPod() error = %v, wantErr %v", err, tt.wantErr) + } + + } + tt.args.new.CleanPath().OrDie() + }) + } +} + +// TestPreemptionUpdatePod tests UpdatePod of Preemption +func TestPreemptionUpdatePod(t *testing.T) { + var updateFuncTC = []test{ + { + name: "TC1-online to offline", + fields: getCommonField([]string{"cpu"}), + args: args{old: try.GenFakeOnlinePod(map[*cgroup.Key]string{supportCgroupTypes["cpu"]: "0"}).WithContainers(3)}, + preHook: func(pod *try.FakePod) *try.FakePod { + newPod := pod.DeepCopy() + newAnnotation := make(map[string]string, 0) + newAnnotation[constant.PriorityAnnotationKey] = "true" + newPod.Annotations = newAnnotation + return newPod + }, + }, + { + name: "TC2-offline to online", + fields: getCommonField([]string{"cpu"}), + args: args{old: try.GenFakeOfflinePod(map[*cgroup.Key]string{supportCgroupTypes["cpu"]: "0"})}, + preHook: func(pod *try.FakePod) *try.FakePod { + newPod := pod.DeepCopy() + newAnnotation := make(map[string]string, 0) + newAnnotation[constant.PriorityAnnotationKey] = "false" + newPod.Annotations = newAnnotation + return newPod + }, + wantErr: true, + }, + { + name: "TC3-online to online", + fields: getCommonField([]string{"cpu"}), + args: args{old: try.GenFakeOnlinePod(map[*cgroup.Key]string{supportCgroupTypes["cpu"]: "0"})}, + preHook: func(pod *try.FakePod) *try.FakePod { + return pod.DeepCopy() + }, + }, + } + + for _, tt := range updateFuncTC { + t.Run(tt.name, func(t *testing.T) { + q := &Preemption{ + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + config: tt.fields.Config, + } + if tt.preHook != nil { + tt.args.new = tt.preHook(tt.args.old) + } + if err := q.UpdatePod(tt.args.old.PodInfo, tt.args.new.PodInfo); (err != nil) != tt.wantErr { + t.Errorf("QoS.UpdatePod() error = %v, wantErr %v", err, tt.wantErr) + } + tt.args.new.CleanPath().OrDie() + tt.args.old.CleanPath().OrDie() + }) + } +} + +func TestPreemptionValidate(t *testing.T) { + var validateTC = []test{ + { + name: "TC1-normal config", + fields: fields{ + Name: "qos", + Config: PreemptionConfig{Resource: []string{"cpu", "memory"}}, + }, + }, + { + name: "TC2-abnormal config", + fields: fields{ + Name: "undefine", + Config: PreemptionConfig{Resource: []string{"undefine"}}, + }, + wantErr: true, + }, + { + name: "TC3-empty config", + wantErr: true, + }, + } + + for _, tt := range validateTC { + t.Run(tt.name, func(t *testing.T) { + q := &Preemption{ + ServiceBase: helper.ServiceBase{ + Name: tt.fields.Name, + }, + config: tt.fields.Config, + } + if err := q.config.Validate(); (err != nil) != tt.wantErr { + t.Errorf("QoS.Validate() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/pkg/services/quotaburst/quotaburst.go b/pkg/services/quotaburst/quotaburst.go new file mode 100644 index 0000000000000000000000000000000000000000..b59cccdee5c65b1ba5c1c9812fc79f7385155b10 --- /dev/null +++ b/pkg/services/quotaburst/quotaburst.go @@ -0,0 +1,190 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-01 +// Description: This file is used for quota burst + +// Package quotaburst is for Quota Burst +package quotaburst + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "runtime" + "strings" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/services/helper" +) + +// Burst is used to control cpu burst +type Burst struct { + helper.ServiceBase +} + +// BurstFactory is the factory os Burst. +type BurstFactory struct { + ObjName string +} + +// Name to get the Burst factory name. +func (i BurstFactory) Name() string { + return "BurstFactory" +} + +// NewObj to create object of Burst. +func (i BurstFactory) NewObj() (interface{}, error) { + return &Burst{ServiceBase: helper.ServiceBase{Name: i.ObjName}}, nil +} + +// AddPod implement add function when pod is added in k8s +func (conf *Burst) AddPod(podInfo *typedef.PodInfo) error { + return setPodQuotaBurst(podInfo) +} + +// UpdatePod implement update function when pod info is changed +func (conf *Burst) UpdatePod(oldPod, newPod *typedef.PodInfo) error { + if oldPod.Annotations[constant.QuotaBurstAnnotationKey] == newPod.Annotations[constant.QuotaBurstAnnotationKey] { + return nil + } + return setPodQuotaBurst(newPod) +} + +// DeletePod implement delete function when pod is deleted by k8s +func (conf *Burst) DeletePod(podInfo *typedef.PodInfo) error { + return nil +} + +// PreStart is the pre-start action +func (conf *Burst) PreStart(viewer api.Viewer) error { + pods := viewer.ListPodsWithOptions() + for _, pod := range pods { + if err := setPodQuotaBurst(pod); err != nil { + log.Errorf("error prestart cont %v: %v", pod.Name, err) + } + } + return nil +} + +func setPodQuotaBurst(podInfo *typedef.PodInfo) error { + if podInfo.Annotations[constant.QuotaBurstAnnotationKey] == "" { + return nil + } + burst, err := parseQuotaBurst(podInfo) + if err != nil { + return err + } + var podBurst int64 = 0 + const subsys = "cpu" + // 1. Try to write container burst value firstly + for _, c := range podInfo.IDContainersMap { + cgpath := cgroup.AbsoluteCgroupPath(subsys, c.Path, "") + if err := setQuotaBurst(burst, cgpath); err != nil { + log.Errorf("set container quota burst failed: %v", err) + continue + } + /* + Only when the burst value of the container is successfully set, + the burst value of the pod will be accumulated. + Ensure that Pod data must be written successfully + */ + podBurst += burst + } + // 2. Try to write pod burst value + podPath := cgroup.AbsoluteCgroupPath(subsys, podInfo.Path, "") + if err := setQuotaBurst(podBurst, podPath); err != nil { + log.Errorf("set pod quota burst failed: %v", err) + } + return nil +} + +func setQuotaBurst(burst int64, cgpath string) error { + const burstFileName = "cpu.cfs_burst_us" + fpath := filepath.Join(cgpath, burstFileName) + // check whether cgroup support cpu burst + if _, err := os.Stat(fpath); err != nil && os.IsNotExist(err) { + return fmt.Errorf("quota-burst path=%v missing", fpath) + } + if err := matchQuota(burst, cgpath); err != nil { + return err + } + // try to write cfs_burst_us + if err := ioutil.WriteFile(fpath, []byte(util.FormatInt64(burst)), constant.DefaultFileMode); err != nil { + return fmt.Errorf("quota-burst path=%v setting failed: %v", fpath, err) + } + log.Infof("quota-burst path=%v setting success", fpath) + return nil +} + +func matchQuota(burst int64, cgpath string) error { + const ( + cpuPeriodFileName = "cpu.cfs_period_us" + cpuQuotaFileName = "cpu.cfs_quota_us" + ) + quotaStr, err := util.ReadSmallFile(filepath.Join(cgpath, cpuQuotaFileName)) + if err != nil { + return fmt.Errorf("fail to read cfs.cpu_quota_us: %v", err) + } + quota, err := util.ParseInt64(strings.TrimSpace(string(quotaStr))) + if err != nil { + return fmt.Errorf("fail to parse quota as int64: %v", err) + } + + periodStr, err := util.ReadSmallFile(filepath.Join(cgpath, cpuPeriodFileName)) + if err != nil { + return fmt.Errorf("fail to read cfs.cpu_period_us: %v", err) + } + period, err := util.ParseInt64(strings.TrimSpace(string(periodStr))) + if err != nil { + return fmt.Errorf("fail to parse period as int64: %v", err) + } + + /* + The current pod has been allowed to use all cores, usually there are two situations: + 1.the pod quota is -1 (in this case, there must be a container with a quota of -1) + 2.the pod quota exceeds the maximum value (the cumulative quota value of all containers + exceeds the maximum value) + */ + maxQuota := period * int64(runtime.NumCPU()) + if quota >= maxQuota { + return fmt.Errorf("burst fail when quota exceed the maxQuota") + } + /* + All containers under the pod have set cpulimit, and the cumulative value is less than the maximum core. + At this time, the quota of the pod should be the accumulated value of the quota of all pods. + If the burst value of the container is set successfully, then the burst value of the Pod + must be set successfully + */ + if quota < burst { + return fmt.Errorf("burst should be less than or equal to quota") + } + return nil +} + +// parseQuotaBurst checks CPU quota burst annotation value. +func parseQuotaBurst(pod *typedef.PodInfo) (int64, error) { + const invalidVal int64 = -1 + val, err := util.ParseInt64(pod.Annotations[constant.QuotaBurstAnnotationKey]) + if err != nil { + return invalidVal, err + } + + if val < 0 { + return invalidVal, fmt.Errorf("quota burst value should be positive") + } + return val, nil +} diff --git a/pkg/services/quotaburst/quotaburst_test.go b/pkg/services/quotaburst/quotaburst_test.go new file mode 100644 index 0000000000000000000000000000000000000000..08e4659fe653e00897574cf7aa80a1959ca752e5 --- /dev/null +++ b/pkg/services/quotaburst/quotaburst_test.go @@ -0,0 +1,264 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-02 +// Description: This file is used for testing quota burst + +// Package quotaburst is for Quota Burst +package quotaburst + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/podmanager" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/test/try" +) + +const ( + moduleName = "quotaburst" +) + +var ( + cfsBurstUs = &cgroup.Key{SubSys: "cpu", FileName: "cpu.cfs_burst_us"} + cfsQuotaUs = &cgroup.Key{SubSys: "cpu", FileName: "cpu.cfs_quota_us"} + cfsPeriodUs = &cgroup.Key{SubSys: "cpu", FileName: "cpu.cfs_period_us"} +) + +// TestBurst_AddPod tests AddPod +func TestBurst_AddPod(t *testing.T) { + type args struct { + pod *try.FakePod + burst string + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "TC-1: set burst successfully", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{ + cfsBurstUs: "0", + cfsPeriodUs: "100000", + cfsQuotaUs: "100000", + }).WithContainers(1), + burst: "1000", + }, + wantErr: false, + }, + { + name: "TC-2.1: parseQuotaBurst invalid burst < 0", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{}), + burst: "-100", + }, + wantErr: true, + }, + { + name: "TC-2.2: parseQuotaBurst invalid burst non int64", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{}), + burst: "abc", + }, + wantErr: true, + }, + { + name: "TC-3.1: matchQuota quota file not existed", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{ + cfsBurstUs: "0", + }).WithContainers(1), + burst: "10000", + }, + wantErr: false, + }, + { + name: "TC-3.2: matchQuota quota value invalid", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{ + cfsBurstUs: "0", + cfsQuotaUs: "abc", + }), + burst: "10000", + }, + wantErr: false, + }, + { + name: "TC-3.3: matchQuota period file not existed", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{ + cfsBurstUs: "0", + cfsQuotaUs: "10000", + }), + burst: "10000", + }, + wantErr: false, + }, + { + name: "TC-3.4: matchQuota period value invalid", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{ + cfsBurstUs: "0", + cfsPeriodUs: "abc", + cfsQuotaUs: "10000", + }), + burst: "10000", + }, + wantErr: false, + }, + { + name: "TC-3.5: matchQuota quota > max", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{ + cfsBurstUs: "0", + cfsPeriodUs: "0", + cfsQuotaUs: "10000", + }), + burst: "10000", + }, + wantErr: false, + }, + { + name: "TC-3.6: matchQuota quota < burst", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{ + cfsBurstUs: "0", + cfsPeriodUs: "10000", + cfsQuotaUs: "10000", + }).WithContainers(1), + burst: "200000000", + }, + wantErr: false, + }, + { + name: "Tc-4: burst file not existed", + args: args{ + pod: try.GenFakeGuaranteedPod(map[*cgroup.Key]string{}), + burst: "1", + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := Burst{ServiceBase: helper.ServiceBase{Name: moduleName}} + if tt.args.burst != "" { + tt.args.pod.Annotations[constant.QuotaBurstAnnotationKey] = tt.args.burst + } + if err := conf.AddPod(tt.args.pod.PodInfo); (err != nil) != tt.wantErr { + t.Errorf("Burst.AddPod() error = %v, wantErr %v", err, tt.wantErr) + } + tt.args.pod.CleanPath().OrDie() + }) + } + cgroup.InitMountDir(constant.DefaultCgroupRoot) +} + +// TestOther tests other function +func TestOther(t *testing.T) { + const tcName = "TC1-test Other" + t.Run(tcName, func(t *testing.T) { + got := Burst{ServiceBase: helper.ServiceBase{Name: moduleName}} + assert.NoError(t, got.DeletePod(&typedef.PodInfo{})) + assert.Equal(t, moduleName, got.ID()) + }) +} + +// TestBurst_UpdatePod tests UpdatePod +func TestBurst_UpdatePod(t *testing.T) { + type args struct { + oldPod *typedef.PodInfo + newPod *typedef.PodInfo + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "TC1-same burst", + args: args{ + oldPod: &typedef.PodInfo{ + Annotations: map[string]string{ + constant.QuotaBurstAnnotationKey: "10", + }, + }, + newPod: &typedef.PodInfo{ + Annotations: map[string]string{ + constant.QuotaBurstAnnotationKey: "10", + }, + }, + }, + }, + { + name: "TC2-different burst", + args: args{ + oldPod: &typedef.PodInfo{ + Annotations: make(map[string]string), + }, + newPod: &typedef.PodInfo{ + Annotations: map[string]string{ + constant.QuotaBurstAnnotationKey: "10", + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := Burst{ServiceBase: helper.ServiceBase{Name: moduleName}} + if err := conf.UpdatePod(tt.args.oldPod, tt.args.newPod); (err != nil) != tt.wantErr { + t.Errorf("Burst.UpdatePod() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestBurst_PreStart tests PreStart +func TestBurst_PreStart(t *testing.T) { + type args struct { + viewer api.Viewer + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "TC1-set pod", + args: args{ + viewer: &podmanager.PodManager{ + Pods: &podmanager.PodCache{ + Pods: map[string]*typedef.PodInfo{ + "testPod1": {}, + }, + }, + }, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := Burst{ServiceBase: helper.ServiceBase{Name: moduleName}} + if err := conf.PreStart(tt.args.viewer); (err != nil) != tt.wantErr { + t.Errorf("Burst.PreStart() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/pkg/services/quotaturbo/quotaturbo.go b/pkg/services/quotaturbo/quotaturbo.go new file mode 100644 index 0000000000000000000000000000000000000000..974b2a1ce6620635e9a8560dd98a1f00cb34e0bf --- /dev/null +++ b/pkg/services/quotaturbo/quotaturbo.go @@ -0,0 +1,268 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-02-16 +// Description: quota turbo method(dynamically adjusting container quotas) + +// Package quotaturbo is for Quota Turbo +package quotaturbo + +import ( + "context" + "fmt" + "path/filepath" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/lib/cpu/quotaturbo" + "isula.org/rubik/pkg/services/helper" +) + +const ( + defaultHightWaterMark = 60 + defaultAlarmWaterMark = 80 + defaultQuotaTurboSyncInterval = 100 +) + +var ( + cpuPeriodKey = &cgroup.Key{SubSys: "cpu", FileName: "cpu.cfs_period_us"} + cpuQuotaKey = &cgroup.Key{SubSys: "cpu", FileName: "cpu.cfs_quota_us"} +) + +// QuotaTurboFactory is the QuotaTurbo factory class +type QuotaTurboFactory struct { + ObjName string +} + +// Name returns the factory class name +func (i QuotaTurboFactory) Name() string { + return "QuotaTurboFactory" +} + +// NewObj returns a QuotaTurbo object +func (i QuotaTurboFactory) NewObj() (interface{}, error) { + return NewQuotaTurbo(i.ObjName), nil +} + +// Config is the config of QuotaTurbo +type Config struct { + HighWaterMark int `json:"highWaterMark,omitempty"` + AlarmWaterMark int `json:"alarmWaterMark,omitempty"` + SyncInterval int `json:"syncInterval,omitempty"` +} + +// NewConfig returns quotaTurbo config instance +func NewConfig() *Config { + return &Config{ + HighWaterMark: defaultHightWaterMark, + AlarmWaterMark: defaultAlarmWaterMark, + SyncInterval: defaultQuotaTurboSyncInterval, + } +} + +// QuotaTurbo manages all container CPU quota data on the current node. +type QuotaTurbo struct { + conf *Config + client *quotaturbo.Client + Viewer api.Viewer + helper.ServiceBase +} + +// NewQuotaTurbo generate quota turbo objects +func NewQuotaTurbo(n string) *QuotaTurbo { + return &QuotaTurbo{ + ServiceBase: helper.ServiceBase{ + Name: n, + }, + conf: NewConfig(), + client: quotaturbo.NewClient(), + } +} + +// syncCgroups updates the cgroup in cilent according to the current whitelist pod list +func (qt *QuotaTurbo) syncCgroups(conts map[string]*typedef.ContainerInfo) { + var ( + existedCgroupPaths = qt.client.GetAllCgroup() + existedCgroupPathMap = make(map[string]struct{}, len(existedCgroupPaths)) + ) + // delete containers marked as no need to adjust quota + for _, path := range existedCgroupPaths { + id := filepath.Base(path) + existedCgroupPathMap[id] = struct{}{} + if _, found := conts[id]; !found { + if err := qt.client.RemoveCgroup(path); err != nil { + log.Errorf("error removing container %v: %v", id, err) + } else { + log.Infof("remove container %v", id) + } + } + } + for id, cont := range conts { + /* + Currently, modifying the cpu limit and container id of the container will cause the container to restart, + so it is considered that the cgroup path and cpulimit will not change during the life cycle of the container + */ + if _, ok := existedCgroupPathMap[id]; ok { + continue + } + // add container to quotaturbo + if err := qt.client.AddCgroup(cont.Path, cont.LimitResources[typedef.ResourceCPU]); err != nil { + log.Errorf("error adding container %v: %v", cont.Name, err) + } else { + log.Infof("add container %v", id) + } + } +} + +// AdjustQuota adjusts the quota of a container at a time +func (qt *QuotaTurbo) AdjustQuota(conts map[string]*typedef.ContainerInfo) { + qt.syncCgroups(conts) + if err := qt.client.AdjustQuota(); err != nil { + log.Errorf("error occur when adjust quota: %v", err) + } +} + +// Run adjusts the quota of the trust list container cyclically. +func (qt *QuotaTurbo) Run(ctx context.Context) { + wait.Until( + func() { + qt.AdjustQuota(qt.Viewer.ListContainersWithOptions( + func(pod *typedef.PodInfo) bool { + return pod.Annotations[constant.QuotaAnnotationKey] == "true" + })) + }, + time.Millisecond*time.Duration(qt.conf.SyncInterval), + ctx.Done()) +} + +// Validate verifies that the quotaTurbo parameter is set correctly +func (conf *Config) Validate() error { + const ( + minQuotaTurboWaterMark, maxQuotaTurboWaterMark = 0, 100 + minQuotaTurboSyncInterval, maxQuotaTurboSyncInterval = 100, 10000 + ) + outOfRange := func(num, min, max int) bool { + if num < min || num > max { + return true + } + return false + } + if conf.AlarmWaterMark <= conf.HighWaterMark || + outOfRange(conf.HighWaterMark, minQuotaTurboWaterMark, maxQuotaTurboWaterMark) || + outOfRange(conf.AlarmWaterMark, minQuotaTurboWaterMark, maxQuotaTurboWaterMark) { + return fmt.Errorf("alarmWaterMark >= highWaterMark, both of which ranges from 0 to 100") + } + if outOfRange(conf.SyncInterval, minQuotaTurboSyncInterval, maxQuotaTurboSyncInterval) { + return fmt.Errorf("synchronization time ranges from 100 (0.1s) to 10000 (10s)") + } + return nil +} + +// SetConfig sets and checks Config +func (qt *QuotaTurbo) SetConfig(f helper.ConfigHandler) error { + var conf = NewConfig() + if err := f(qt.Name, conf); err != nil { + return err + } + if err := conf.Validate(); err != nil { + return err + } + qt.conf = conf + return nil +} + +// GetConfig returns Config +func (qt *QuotaTurbo) GetConfig() interface{} { + return qt.conf +} + +// IsRunner returns true that tells other quotaTurbo is a persistent service +func (qt *QuotaTurbo) IsRunner() bool { + return true +} + +// PreStart is the pre-start action +func (qt *QuotaTurbo) PreStart(viewer api.Viewer) error { + // 1. set the parameters of the quotaturbo client + qt.client.CgroupRoot = cgroup.GetMountDir() + qt.client.HighWaterMark = qt.conf.HighWaterMark + qt.client.AlarmWaterMark = qt.conf.AlarmWaterMark + qt.Viewer = viewer + + // 2. attempts to fix all currently running pods and containers + pods := viewer.ListPodsWithOptions() + for _, pod := range pods { + recoverOnePodQuota(pod) + } + return nil +} + +// Terminate enters the service termination process +func (qt *QuotaTurbo) Terminate(viewer api.Viewer) error { + pods := viewer.ListPodsWithOptions() + for _, pod := range pods { + recoverOnePodQuota(pod) + } + return nil +} + +func recoverOnePodQuota(pod *typedef.PodInfo) { + const unlimited = "-1" + if err := pod.SetCgroupAttr(cpuQuotaKey, unlimited); err != nil { + log.Errorf("Fail to set the cpu quota of the pod %v to -1: %v", pod.UID, err) + return + } + + var ( + podQuota int64 = 0 + unlimitedContExistd = false + ) + + for _, cont := range pod.IDContainersMap { + // cpulimit is 0 means no quota limit + if cont.LimitResources[typedef.ResourceCPU] == 0 { + unlimitedContExistd = true + if err := cont.SetCgroupAttr(cpuQuotaKey, unlimited); err != nil { + log.Errorf("Fail to set the cpu quota of the container %v to -1: %v", cont.ID, err) + continue + } + log.Debugf("Set the cpu quota of the container %v to -1", cont.ID) + continue + } + + period, err := cont.GetCgroupAttr(cpuPeriodKey).Int64() + if err != nil { + log.Errorf("Fail to get cpu period of container %v : %v", cont.ID, err) + continue + } + + contQuota := int64(cont.LimitResources[typedef.ResourceCPU] * float64(period)) + podQuota += contQuota + if err := cont.SetCgroupAttr(cpuQuotaKey, util.FormatInt64(contQuota)); err != nil { + log.Errorf("Fail to set the cpu quota of the container %v: %v", cont.ID, err) + continue + } + log.Debugf("Set the cpu quota of the container %v to %v", cont.ID, contQuota) + } + if !unlimitedContExistd { + if err := pod.SetCgroupAttr(cpuQuotaKey, util.FormatInt64(podQuota)); err != nil { + log.Errorf("Fail to set the cpu quota of the pod %v to -1: %v", pod.UID, err) + return + } + log.Debugf("Set the cpu quota of the pod %v to %v", pod.UID, podQuota) + } +} diff --git a/pkg/services/quotaturbo/quotaturbo_test.go b/pkg/services/quotaturbo/quotaturbo_test.go new file mode 100644 index 0000000000000000000000000000000000000000..49ff033799b72832389096a66463d1f15a54b824 --- /dev/null +++ b/pkg/services/quotaturbo/quotaturbo_test.go @@ -0,0 +1,496 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Jiaqi Yang +// Date: 2023-03-11 +// Description: This file is used for testing quotaturbo.go + +package quotaturbo + +import ( + "context" + "fmt" + "math" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + "isula.org/rubik/pkg/podmanager" + "isula.org/rubik/pkg/services/helper" + "isula.org/rubik/test/try" +) + +func sameQuota(t *testing.T, path string, want int64) bool { + const cfsQuotaUsFileName = "cpu.cfs_quota_us" + data, err := util.ReadSmallFile(filepath.Join(path, cfsQuotaUsFileName)) + if err != nil { + assert.NoError(t, err) + return false + } + quota, err := util.ParseInt64(strings.ReplaceAll(string(data), "\n", "")) + if err != nil { + assert.NoError(t, err) + return false + } + if quota != want { + return false + } + return true +} + +// TestQuotaTurbo_Terminate tests Terminate function +func TestQuotaTurbo_Terminate(t *testing.T) { + const ( + fooContName = "Foo" + barContName = "Bar" + podUID = "testPod1" + wrongPodQuota = "600000" + wrongFooQuota = "300000" + wrongBarQuota = "200000" + podPath = "/sys/fs/cgroup/cpu/kubepods/testPod1/" + fooPath = "/sys/fs/cgroup/cpu/kubepods/testPod1/testCon1" + barPath = "/sys/fs/cgroup/cpu/kubepods/testPod1/testCon2" + ) + + var ( + fooCont = &typedef.ContainerInfo{ + Name: fooContName, + ID: "testCon1", + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod1/testCon1"}, + LimitResources: make(typedef.ResourceMap), + } + barCont = &typedef.ContainerInfo{ + Name: barContName, + ID: "testCon2", + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod1/testCon2"}, + LimitResources: make(typedef.ResourceMap), + } + pod = &typedef.PodInfo{ + UID: "testPod1", + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod1"}, + IDContainersMap: map[string]*typedef.ContainerInfo{ + fooCont.ID: fooCont, + barCont.ID: barCont, + }, + } + tests = []struct { + postfunc func(t *testing.T) + fooCPULimit float64 + barCPULimit float64 + name string + }{ + { + name: "TC1-one unlimited container is existed", + fooCPULimit: 2, + barCPULimit: 0, + postfunc: func(t *testing.T) { + var ( + unlimited int64 = -1 + correctFooQuota int64 = 200000 + ) + assert.True(t, sameQuota(t, podPath, unlimited)) + assert.True(t, sameQuota(t, fooPath, correctFooQuota)) + assert.True(t, sameQuota(t, barPath, unlimited)) + }, + }, + { + name: "TC2-all containers are unlimited", + fooCPULimit: 2, + barCPULimit: 1, + postfunc: func(t *testing.T) { + var ( + correctPodQuota int64 = 300000 + correctFooQuota int64 = 200000 + correctBarQuota int64 = 100000 + ) + assert.True(t, sameQuota(t, podPath, correctPodQuota)) + assert.True(t, sameQuota(t, fooPath, correctFooQuota)) + assert.True(t, sameQuota(t, barPath, correctBarQuota)) + }, + }, + { + name: "TC3-all containers are limited", + fooCPULimit: 0, + barCPULimit: 0, + postfunc: func(t *testing.T) { + var unLimited int64 = -1 + assert.True(t, sameQuota(t, podPath, unLimited)) + assert.True(t, sameQuota(t, fooPath, unLimited)) + assert.True(t, sameQuota(t, barPath, unLimited)) + }, + }, + } + ) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var ( + pm = &podmanager.PodManager{ + Pods: &podmanager.PodCache{ + Pods: map[string]*typedef.PodInfo{ + podUID: pod, + }, + }, + } + qt = &QuotaTurbo{ + Viewer: pm, + } + ) + + try.MkdirAll(fooPath, constant.DefaultDirMode) + try.MkdirAll(barPath, constant.DefaultDirMode) + defer func() { + try.RemoveAll(podPath) + }() + + fooCont.LimitResources[typedef.ResourceCPU] = tt.fooCPULimit + barCont.LimitResources[typedef.ResourceCPU] = tt.barCPULimit + + assert.NoError(t, pod.SetCgroupAttr(cpuQuotaKey, wrongPodQuota)) + assert.NoError(t, fooCont.SetCgroupAttr(cpuQuotaKey, wrongFooQuota)) + assert.NoError(t, barCont.SetCgroupAttr(cpuQuotaKey, wrongBarQuota)) + qt.Terminate(pm) + tt.postfunc(t) + }) + } +} + +// TestQuotaTurbo_PreStart tests PreStart +func TestQuotaTurbo_PreStart(t *testing.T) { + const ( + fooContName = "Foo" + podUID = "testPod1" + podPath = "/sys/fs/cgroup/cpu/kubepods/testPod1/" + ) + var ( + fooCont = &typedef.ContainerInfo{ + Name: fooContName, + ID: "testCon1", + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod1/testCon1"}, + LimitResources: make(typedef.ResourceMap), + } + pm = &podmanager.PodManager{ + Pods: &podmanager.PodCache{ + Pods: map[string]*typedef.PodInfo{ + podUID: { + UID: podUID, + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod1"}, + IDContainersMap: map[string]*typedef.ContainerInfo{ + fooCont.ID: fooCont, + }, + }, + }, + }, + } + name = "quotaturbo" + qt = NewQuotaTurbo(name) + ) + testName := "TC1- test Prestart" + t.Run(testName, func(t *testing.T) { + qt.PreStart(pm) + try.MkdirAll(podPath, constant.DefaultDirMode) + defer try.RemoveAll(podPath) + qt.PreStart(pm) + pm.Pods.Pods[podUID].IDContainersMap[fooCont.ID]. + LimitResources[typedef.ResourceCPU] = math.Min(1, float64(runtime.NumCPU())-1) + qt.PreStart(pm) + }) +} + +// TestConfig_Validate test Validate function +func TestConfig_Validate(t *testing.T) { + type fields struct { + HighWaterMark int + AlarmWaterMark int + SyncInterval int + } + tests := []struct { + name string + fields fields + wantErr bool + }{ + { + name: "TC1-alarmWaterMark is less or equal to highWaterMark", + fields: fields{ + HighWaterMark: 100, + AlarmWaterMark: 60, + SyncInterval: 1000, + }, + wantErr: true, + }, + { + name: "TC2-highWater mark exceed the max quota turbo water mark(100)", + fields: fields{ + HighWaterMark: 1000, + AlarmWaterMark: 100000, + SyncInterval: 1000, + }, + wantErr: true, + }, + { + name: "TC3-sync interval out of range(100-10000)", + fields: fields{ + HighWaterMark: 60, + AlarmWaterMark: 80, + SyncInterval: 1, + }, + wantErr: true, + }, + { + name: "TC4-normal case", + fields: fields{ + HighWaterMark: 60, + AlarmWaterMark: 100, + SyncInterval: 1000, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := &Config{ + HighWaterMark: tt.fields.HighWaterMark, + AlarmWaterMark: tt.fields.AlarmWaterMark, + SyncInterval: tt.fields.SyncInterval, + } + if err := conf.Validate(); (err != nil) != tt.wantErr { + t.Errorf("Config.Validate() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestQuotaTurbo_SetConfig tests SetConfig +func TestQuotaTurbo_SetConfig(t *testing.T) { + const name = "quotaturbo" + type args struct { + f helper.ConfigHandler + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "TC1-error function", + args: args{ + f: func(configName string, d interface{}) error { return fmt.Errorf("error occures") }, + }, + wantErr: true, + }, + { + name: "TC2-success", + args: args{ + f: func(configName string, d interface{}) error { return nil }, + }, + wantErr: false, + }, + { + name: "TC3-invalid config", + args: args{ + f: func(configName string, d interface{}) error { + c, ok := d.(*Config) + if !ok { + t.Error("fial to convert config") + } + c.AlarmWaterMark = 101 + return nil + }, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + qt := NewQuotaTurbo(name) + if err := qt.SetConfig(tt.args.f); (err != nil) != tt.wantErr { + t.Errorf("QuotaTurbo.SetConfig() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +// TestQuotaTurbo_Other tests other function +func TestQuotaTurbo_Other(t *testing.T) { + const name = "quotaturbo" + tests := []struct { + name string + want bool + }{ + { + name: "TC1-test other", + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := &QuotaTurboFactory{ObjName: name} + f.Name() + instance, err := f.NewObj() + assert.NoError(t, err) + qt, ok := instance.(*QuotaTurbo) + if !ok { + t.Error("fial to convert QuotaTurbo") + } + if got := qt.IsRunner(); got != tt.want { + t.Errorf("QuotaTurbo.IsRunner() = %v, want %v", got, tt.want) + } + qt.ID() + + }) + } +} + +// TestQuotaTurbo_AdjustQuota tests AdjustQuota +func TestQuotaTurbo_AdjustQuota(t *testing.T) { + const ( + name = "quotaturbo" + cpuPeriodFile = "cpu.cfs_period_us" + cpuQuotaFile = "cpu.cfs_quota_us" + cpuUsageFile = "cpuacct.usage" + cpuStatFile = "cpu.stat" + stat = `nr_periods 1 + nr_throttled 1 + throttled_time 1 + ` + quota = "200000" + period = "100000" + usage = "1234567" + minCPU = 2 + ) + var ( + fooCont = &typedef.ContainerInfo{ + Name: "Foo", + ID: "testCon1", + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod1/testCon1"}, + LimitResources: typedef.ResourceMap{ + typedef.ResourceCPU: math.Min(minCPU, float64(runtime.NumCPU()-1)), + }, + } + barCont = &typedef.ContainerInfo{ + Name: "Bar", + ID: "testCon2", + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod2/testCon2"}, + LimitResources: typedef.ResourceMap{ + typedef.ResourceCPU: math.Min(minCPU, float64(runtime.NumCPU()-1)), + }, + } + preEnv = func(contPath string) { + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuPeriodFile), period) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuQuotaFile), quota) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpuacct", contPath, cpuUsageFile), usage) + try.WriteFile(filepath.Join(constant.TmpTestDir, "cpu", contPath, cpuStatFile), stat) + } + ) + type args struct { + conts map[string]*typedef.ContainerInfo + } + tests := []struct { + name string + args args + pre func(t *testing.T, qt *QuotaTurbo) + post func(t *testing.T) + }{ + { + name: "TC1-fail add foo container & remove bar container successfully", + args: args{ + conts: map[string]*typedef.ContainerInfo{ + fooCont.ID: fooCont, + }, + }, + pre: func(t *testing.T, qt *QuotaTurbo) { + preEnv(barCont.Path) + assert.NoError(t, qt.client.AddCgroup(barCont.Path, barCont.LimitResources[typedef.ResourceCPU])) + assert.Equal(t, 1, len(qt.client.GetAllCgroup())) + }, + post: func(t *testing.T) { + try.RemoveAll(constant.TmpTestDir) + }, + }, + { + name: "TC2-no container add & remove bar container successfully", + args: args{ + conts: map[string]*typedef.ContainerInfo{ + fooCont.ID: fooCont, + barCont.ID: barCont, + }, + }, + pre: func(t *testing.T, qt *QuotaTurbo) { + preEnv(barCont.Path) + preEnv(fooCont.Path) + assert.NoError(t, qt.client.AddCgroup(barCont.Path, barCont.LimitResources[typedef.ResourceCPU])) + assert.NoError(t, qt.client.AddCgroup(fooCont.Path, fooCont.LimitResources[typedef.ResourceCPU])) + const cgroupLen = 2 + assert.Equal(t, cgroupLen, len(qt.client.GetAllCgroup())) + }, + post: func(t *testing.T) { + try.RemoveAll(constant.TmpTestDir) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + qt := NewQuotaTurbo(name) + qt.client.SetCgroupRoot(constant.TmpTestDir) + if tt.pre != nil { + tt.pre(t, qt) + } + qt.AdjustQuota(tt.args.conts) + if tt.post != nil { + tt.post(t) + } + }) + } +} + +// TestQuotaTurbo_Run tests run +func TestQuotaTurbo_Run(t *testing.T) { + const name = "quotaturbo" + var fooPod = &typedef.PodInfo{ + Name: "Foo", + UID: "testPod1", + Hierarchy: cgroup.Hierarchy{Path: "kubepods/testPod1"}, + Annotations: map[string]string{ + constant.QuotaAnnotationKey: "true", + }, + } + tests := []struct { + name string + }{ + { + name: "TC1-run and cancel", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + qt := NewQuotaTurbo(name) + pm := &podmanager.PodManager{ + Pods: &podmanager.PodCache{ + Pods: map[string]*typedef.PodInfo{ + fooPod.UID: fooPod, + }, + }, + } + ctx, cancel := context.WithCancel(context.Background()) + qt.Viewer = pm + go qt.Run(ctx) + const sleepTime = time.Millisecond * 200 + time.Sleep(sleepTime) + cancel() + }) + } +} diff --git a/pkg/services/service.go b/pkg/services/service.go new file mode 100644 index 0000000000000000000000000000000000000000..9ba07d8e7e741501c4df0381a4e964c22e5111ad --- /dev/null +++ b/pkg/services/service.go @@ -0,0 +1,103 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: hanchao +// Create: 2023-03-11 +// Description: This file is the Interface set of services + +// Package services +package services + +import ( + "context" + "fmt" + + "isula.org/rubik/pkg/api" + "isula.org/rubik/pkg/common/log" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/services/helper" +) + +// PodEvent for listening to pod changes. +type PodEvent interface { + // Deal processing adding a pod. + AddPod(*typedef.PodInfo) error + // Deal processing update a pod config. + UpdatePod(old, new *typedef.PodInfo) error + // Deal processing delete a pod. + DeletePod(*typedef.PodInfo) error +} + +// Runner for background service process. +type Runner interface { + // IsRunner for Confirm whether it is + IsRunner() bool + // Start runner + Run(context.Context) + // Stop runner + Stop() error +} + +// Service interface contains methods which must be implemented by all services. +type Service interface { + Runner + PodEvent + // ID is the name of plugin, must be unique. + ID() string + // SetConfig is an interface that invoke the ConfigHandler to obtain the corresponding configuration. + SetConfig(helper.ConfigHandler) error + // GetConfig is an interface for obtaining service running configurations. + GetConfig() interface{} + // PreStarter is an interface for calling a collection of methods when the service is pre-started + PreStart(api.Viewer) error + // Terminator is an interface that calls a collection of methods when the service terminates + // it will stop runner and clear configuration + Terminate(api.Viewer) error +} + +// FeatureSpec to defines the feature name and whether the feature is enabled. +type FeatureSpec struct { + // feature name + Name string + // Default is the default enablement state for the feature + Default bool +} + +// InitServiceComponents for initilize serverice components +func InitServiceComponents(specs []FeatureSpec) { + for _, spec := range specs { + if !spec.Default { + log.Warnf("feature is disabled by default:%v", spec.Name) + continue + } + + initFunc, found := serviceComponents[spec.Name] + if !found { + log.Errorf("init service failed, name:%v", spec.Name) + continue + } + + if err := initFunc(spec.Name); err != nil { + log.Warnf("init component failed, name:%v,error:%v", spec.Name, err) + } + } +} + +// GetServiceComponent to get the component service interface. +func GetServiceComponent(name string) (Service, error) { + si, err := helper.GetComponent(name) + if err != nil { + return nil, fmt.Errorf("get service failed, name:%v,err:%v", name, err) + } + srv, ok := si.(Service) + if !ok || srv == nil { + return nil, fmt.Errorf("failed to convert the type,name:%v", name) + } + return srv, nil +} diff --git a/pkg/services/service_test.go b/pkg/services/service_test.go new file mode 100644 index 0000000000000000000000000000000000000000..5b7b608b65f34a6299036d8c8cc3df6abcaffdff --- /dev/null +++ b/pkg/services/service_test.go @@ -0,0 +1,71 @@ +package services + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "isula.org/rubik/pkg/feature" +) + +var defaultFeature = []FeatureSpec{ + { + Name: feature.PreemptionFeature, + Default: true, + }, + { + Name: feature.DynCacheFeature, + Default: true, + }, + { + Name: feature.IOLimitFeature, + Default: true, + }, + { + Name: feature.IOCostFeature, + Default: true, + }, + { + Name: feature.DynMemoryFeature, + Default: true, + }, + { + Name: feature.QuotaBurstFeature, + Default: true, + }, + { + Name: feature.QuotaTurboFeature, + Default: true, + }, +} + +func TestErrorInitServiceComponents(t *testing.T) { + errFeatures := []FeatureSpec{ + { + Name: "testFeature", + Default: true, + }, + { + Name: feature.QuotaTurboFeature, + Default: false, + }, + } + + InitServiceComponents(errFeatures) + for _, feature := range errFeatures { + _, err := GetServiceComponent(feature.Name) + assert.Contains(t, err.Error(), "get service failed") + } +} + +func TestInitServiceComponents(t *testing.T) { + InitServiceComponents(defaultFeature) + for _, feature := range defaultFeature { + s, err := GetServiceComponent(feature.Name) + if err != nil { + assert.Contains(t, err.Error(), "this machine not support") + continue + } + assert.NoError(t, err) + assert.Equal(t, s.ID(), feature.Name) + } +} diff --git a/pkg/typedef/types.go b/pkg/typedef/types.go deleted file mode 100644 index a9a1e28dceae2db8a3f6dfcb04cd322df9849201..0000000000000000000000000000000000000000 --- a/pkg/typedef/types.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Jing Rui -// Create: 2021-04-27 -// Description: This file contains default constants used in the project - -// Package typedef is general used types. -package typedef - -import ( - "path/filepath" - - corev1 "k8s.io/api/core/v1" -) - -// ContainerInfo represent container -type ContainerInfo struct { - // Basic Information - Name string `json:"name"` - ID string `json:"id"` - PodID string `json:"podID"` - CgroupRoot string `json:"cgroupRoot"` - CgroupAddr string `json:"cgroupAddr"` -} - -// NewContainerInfo create container info -func NewContainerInfo(container corev1.Container, podID, conID, cgroupRoot, podCgroupPath string) *ContainerInfo { - c := ContainerInfo{ - Name: container.Name, - ID: conID, - PodID: podID, - CgroupRoot: cgroupRoot, - CgroupAddr: filepath.Join(podCgroupPath, conID), - } - return &c -} - -// CgroupPath return full cgroup path -func (ci *ContainerInfo) CgroupPath(subsys string) string { - if ci == nil || ci.Name == "" { - return "" - } - return filepath.Join(ci.CgroupRoot, subsys, ci.CgroupAddr) -} - -// Clone return deepcopy object. -func (ci *ContainerInfo) Clone() *ContainerInfo { - copy := *ci - return © -} - -// PodInfo represent pod -type PodInfo struct { - // Basic Information - Containers map[string]*ContainerInfo `json:"containers,omitempty"` - Name string `json:"name"` - UID string `json:"uid"` - CgroupPath string `json:"cgroupPath"` - Namespace string `json:"namespace"` - CgroupRoot string `json:"cgroupRoot"` - - // Service Information - Offline bool `json:"offline"` - CacheLimitLevel string `json:"cacheLimitLevel,omitempty"` - - // value of quota burst - QuotaBurst int64 `json:"quotaBurst"` -} - -// Clone return deepcopy object -func (pi *PodInfo) Clone() *PodInfo { - if pi == nil { - return nil - } - copy := *pi - // deepcopy reference object - copy.Containers = make(map[string]*ContainerInfo, len(pi.Containers)) - for _, c := range pi.Containers { - copy.Containers[c.Name] = c.Clone() - } - return © -} - -// AddContainerInfo store container info to checkpoint -func (pi *PodInfo) AddContainerInfo(containerInfo *ContainerInfo) { - // key should not be empty - if containerInfo.Name == "" { - return - } - pi.Containers[containerInfo.Name] = containerInfo -} diff --git a/pkg/typedef/types_test.go b/pkg/typedef/types_test.go deleted file mode 100644 index fe368b74bb364ac581269a05f604dcd45a03864c..0000000000000000000000000000000000000000 --- a/pkg/typedef/types_test.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Jing Rui -// Create: 2022-07-10 -// Description: This file contains default constants used in the project - -// Package typedef is general used types. -package typedef - -import ( - "io/ioutil" - "log" - "os" - "path/filepath" - "reflect" - "testing" - - "github.com/stretchr/testify/assert" - "isula.org/rubik/pkg/constant" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" -) - -func init() { - err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode) - if err != nil { - log.Fatalf("Failed to create tmp test dir for testing!") - } -} - -func genContainer() corev1.Container { - c := corev1.Container{} - c.Name = "testContainer" - c.Resources.Requests = make(corev1.ResourceList) - c.Resources.Limits = make(corev1.ResourceList) - c.Resources.Requests["cpu"] = *resource.NewMilliQuantity(10000, resource.DecimalSI) - c.Resources.Limits["cpu"] = *resource.NewMilliQuantity(10000, resource.DecimalSI) - c.Resources.Limits["memory"] = *resource.NewMilliQuantity(10000, resource.DecimalSI) - - return c -} - -// TestNewContainerInfo is testcase for NewContainerInfo -func TestNewContainerInfo(t *testing.T) { - cgRoot, err := ioutil.TempDir(constant.TmpTestDir, "cgRoot") - assert.NoError(t, err) - defer os.RemoveAll(cgRoot) - podCGPath, err := ioutil.TempDir(constant.TmpTestDir, "pod") - assert.NoError(t, err) - defer os.RemoveAll(cgRoot) - - c := genContainer() - type args struct { - container corev1.Container - podID string - conID string - cgroupRoot string - podCgroupPath string - } - tests := []struct { - want *ContainerInfo - name string - args args - }{ - { - name: "TC", - args: args{container: c, podID: "podID", cgroupRoot: cgRoot, conID: "cID", podCgroupPath: podCGPath}, - want: &ContainerInfo{ - Name: "testContainer", - ID: "cID", - PodID: "podID", - CgroupRoot: cgRoot, - CgroupAddr: filepath.Join(podCGPath, "cID"), - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := NewContainerInfo(tt.args.container, tt.args.podID, tt.args.conID, tt.args.cgroupRoot, tt.args.podCgroupPath); !reflect.DeepEqual(got, tt.want) { - t.Errorf("NewContainerInfo() = %v, want %v", got, tt.want) - } - }) - } -} - -// TestContainerInfo_CgroupPath is testcase for ContainerInfo.CgroupPath -func TestContainerInfo_CgroupPath(t *testing.T) { - cgRoot, err := ioutil.TempDir(constant.TmpTestDir, "cgRoot") - assert.NoError(t, err) - defer os.RemoveAll(cgRoot) - podCGPath, err := ioutil.TempDir(constant.TmpTestDir, "pod") - assert.NoError(t, err) - defer os.RemoveAll(podCGPath) - - emptyCi := &ContainerInfo{} - assert.Equal(t, "", emptyCi.CgroupPath("cpu")) - - ci := emptyCi.Clone() - - ci.Name = "testContainer" - ci.ID = "cID" - ci.PodID = "podID" - ci.CgroupRoot = cgRoot - ci.CgroupAddr = filepath.Join(podCGPath, "cID") - assert.Equal(t, ci.CgroupPath("cpu"), - filepath.Join(cgRoot, "cpu", filepath.Join(podCGPath, "cID"))) -} - -// TestPodInfo_Clone is testcase for PodInfo.Clone -func TestPodInfo_Clone(t *testing.T) { - cgRoot, err := ioutil.TempDir(constant.TmpTestDir, "cgRoot") - assert.NoError(t, err) - defer os.RemoveAll(cgRoot) - podCGPath, err := ioutil.TempDir(constant.TmpTestDir, "pod") - assert.NoError(t, err) - defer os.RemoveAll(podCGPath) - emptyPI := &PodInfo{} - pi := emptyPI.Clone() - pi.Containers = make(map[string]*ContainerInfo) - pi.Name = "testPod" - pi.UID = "abcd" - pi.CgroupPath = cgRoot - - containerWithOutName := genContainer() - containerWithOutName.Name = "" - - emptyNameCI := NewContainerInfo(containerWithOutName, "testPod", "cID", cgRoot, podCGPath) - pi.AddContainerInfo(emptyNameCI) - assert.Equal(t, len(pi.Containers), 0) - - ci := NewContainerInfo(genContainer(), "testPod", "cID", cgRoot, podCGPath) - pi.AddContainerInfo(ci) - newPi := pi.Clone() - assert.Equal(t, len(newPi.Containers), 1) -} diff --git a/pkg/util/pod.go b/pkg/util/pod.go deleted file mode 100644 index 1669f162ffeb8c10f555598bfcde2d035cac185b..0000000000000000000000000000000000000000 --- a/pkg/util/pod.go +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Danni Xia -// Create: 2022-05-25 -// Description: Pod related common functions - -package util - -import ( - "path/filepath" - "strings" - - corev1 "k8s.io/api/core/v1" - - "isula.org/rubik/pkg/constant" - log "isula.org/rubik/pkg/tinylog" - "isula.org/rubik/pkg/typedef" -) - -const configHashAnnotationKey = "kubernetes.io/config.hash" - -// IsOffline judges whether pod is offline pod -func IsOffline(pod *corev1.Pod) bool { - return pod.Annotations[constant.PriorityAnnotationKey] == "true" -} - -func GetPodCacheLimit(pod *corev1.Pod) string { - return pod.Annotations[constant.CacheLimitAnnotationKey] -} - -// GetQuotaBurst checks CPU quota burst annotation value. -func GetQuotaBurst(pod *corev1.Pod) int64 { - quota := pod.Annotations[constant.QuotaBurstAnnotationKey] - if quota == "" { - return constant.InvalidBurst - } - - quotaBurst, err := typedef.ParseInt64(quota) - if err != nil { - log.Errorf("pod %s burst quota annotation value %v is invalid, expect integer", pod.Name, quotaBurst) - return constant.InvalidBurst - } - if quotaBurst < 0 { - log.Errorf("pod %s burst quota annotation value %v is invalid, expect positive", pod.Name, quotaBurst) - return constant.InvalidBurst - } - return quotaBurst -} - -// GetPodCgroupPath returns cgroup path of pod -func GetPodCgroupPath(pod *corev1.Pod) string { - var cgroupPath string - id := string(pod.UID) - if configHash := pod.Annotations[configHashAnnotationKey]; configHash != "" { - id = configHash - } - - switch pod.Status.QOSClass { - case corev1.PodQOSGuaranteed: - cgroupPath = filepath.Join(constant.KubepodsCgroup, constant.PodCgroupNamePrefix+id) - case corev1.PodQOSBurstable: - cgroupPath = filepath.Join(constant.KubepodsCgroup, strings.ToLower(string(corev1.PodQOSBurstable)), - constant.PodCgroupNamePrefix+id) - case corev1.PodQOSBestEffort: - cgroupPath = filepath.Join(constant.KubepodsCgroup, strings.ToLower(string(corev1.PodQOSBestEffort)), - constant.PodCgroupNamePrefix+id) - } - - return cgroupPath -} diff --git a/pkg/util/pod_test.go b/pkg/util/pod_test.go deleted file mode 100644 index e8f5fb2223a871425c97405c7be710203c65de91..0000000000000000000000000000000000000000 --- a/pkg/util/pod_test.go +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Jingxiao Lu -// Create: 2022-05-25 -// Description: tests for pod.go - -package util - -import ( - "path/filepath" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - corev1 "k8s.io/api/core/v1" - - "isula.org/rubik/pkg/constant" -) - -const ( - trueStr = "true" -) - -func TestIsOffline(t *testing.T) { - var pod = &corev1.Pod{} - pod.Annotations = make(map[string]string) - pod.Annotations[constant.PriorityAnnotationKey] = trueStr - if !IsOffline(pod) { - t.Fatalf("%s failed for Annotations is %s", t.Name(), trueStr) - } - - delete(pod.Annotations, constant.PriorityAnnotationKey) - if IsOffline(pod) { - t.Fatalf("%s failed for Annotations no such key", t.Name()) - } -} - -// TestGetQuotaBurst is testcase for GetQuotaBurst -func TestGetQuotaBurst(t *testing.T) { - pod := &corev1.Pod{} - pod.Annotations = make(map[string]string) - maxInt64PlusOne := "9223372036854775808" - tests := []struct { - name string - quotaBurst string - want int64 - }{ - { - name: "TC1-valid quota burst", - quotaBurst: "1", - want: 1, - }, - { - name: "TC2-empty quota burst", - quotaBurst: "", - want: -1, - }, - { - name: "TC3-zero quota burst", - quotaBurst: "0", - want: 0, - }, - { - name: "TC4-negative quota burst", - quotaBurst: "-100", - want: -1, - }, - { - name: "TC5-float quota burst", - quotaBurst: "100.34", - want: -1, - }, - { - name: "TC6-nonnumerical quota burst", - quotaBurst: "nonnumerical", - want: -1, - }, - { - name: "TC7-exceed max int64", - quotaBurst: maxInt64PlusOne, - want: -1, - }, - } - for _, tt := range tests { - pod.Annotations[constant.QuotaBurstAnnotationKey] = tt.quotaBurst - assert.Equal(t, GetQuotaBurst(pod), tt.want) - } -} - -func TestGetPodCgroupPath(t *testing.T) { - var pod = &corev1.Pod{} - pod.UID = "AAA" - var guaranteedPath = filepath.Join(constant.KubepodsCgroup, constant.PodCgroupNamePrefix+string(pod.UID)) - var burstablePath = filepath.Join(constant.KubepodsCgroup, strings.ToLower(string(corev1.PodQOSBurstable)), constant.PodCgroupNamePrefix+string(pod.UID)) - var besteffortPath = filepath.Join(constant.KubepodsCgroup, strings.ToLower(string(corev1.PodQOSBestEffort)), constant.PodCgroupNamePrefix+string(pod.UID)) - pod.Annotations = make(map[string]string) - - // no pod.Annotations[configHashAnnotationKey] - pod.Status.QOSClass = corev1.PodQOSGuaranteed - if !assert.Equal(t, GetPodCgroupPath(pod), guaranteedPath) { - t.Fatalf("%s failed for PodQOSGuaranteed without configHash", t.Name()) - } - pod.Status.QOSClass = corev1.PodQOSBurstable - if !assert.Equal(t, GetPodCgroupPath(pod), burstablePath) { - t.Fatalf("%s failed for PodQOSBurstable without configHash", t.Name()) - } - pod.Status.QOSClass = corev1.PodQOSBestEffort - if !assert.Equal(t, GetPodCgroupPath(pod), besteffortPath) { - t.Fatalf("%s failed for PodQOSBestEffort without configHash", t.Name()) - } - pod.Status.QOSClass = "" - if !assert.Equal(t, GetPodCgroupPath(pod), "") { - t.Fatalf("%s failed for not setting QOSClass without configHash", t.Name()) - } - - // has pod.Annotations[configHashAnnotationKey] - pod.Annotations[configHashAnnotationKey] = "BBB" - var id = pod.Annotations[configHashAnnotationKey] - guaranteedPath = filepath.Join(constant.KubepodsCgroup, constant.PodCgroupNamePrefix+id) - burstablePath = filepath.Join(constant.KubepodsCgroup, strings.ToLower(string(corev1.PodQOSBurstable)), constant.PodCgroupNamePrefix+id) - besteffortPath = filepath.Join(constant.KubepodsCgroup, strings.ToLower(string(corev1.PodQOSBestEffort)), constant.PodCgroupNamePrefix+id) - pod.Status.QOSClass = corev1.PodQOSGuaranteed - if !assert.Equal(t, GetPodCgroupPath(pod), guaranteedPath) { - t.Fatalf("%s failed for PodQOSGuaranteed with configHash", t.Name()) - } - pod.Status.QOSClass = corev1.PodQOSBurstable - if !assert.Equal(t, GetPodCgroupPath(pod), burstablePath) { - t.Fatalf("%s failed for PodQOSBurstable with configHash", t.Name()) - } - pod.Status.QOSClass = corev1.PodQOSBestEffort - if !assert.Equal(t, GetPodCgroupPath(pod), besteffortPath) { - t.Fatalf("%s failed for PodQOSBestEffort with configHash", t.Name()) - } - pod.Status.QOSClass = "" - if !assert.Equal(t, GetPodCgroupPath(pod), "") { - t.Fatalf("%s failed for not setting QOSClass with configHash", t.Name()) - } -} diff --git a/pkg/version/version.go b/pkg/version/version.go index e285a9b08c71f46e8d7bc41095ddaf3980ce09b9..e8163fbce6ee954747df985691c5710d2580fe82 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -21,9 +21,9 @@ import ( ) var ( - // Version represents rubik version + // Version represents rubik Version Version string - // Release represents rubik release number + // Release represents rubik Release number Release string // GitCommit represents git commit number GitCommit string @@ -33,7 +33,8 @@ var ( func init() { var showVersion bool - if len(os.Args) == 2 && os.Args[1] == "-v" { + const maxArgLen = 2 + if len(os.Args) == maxArgLen && os.Args[1] == "-v" { showVersion = true } diff --git a/rubik.go b/rubik.go index fd8bedbecc536e7c6518a3774bd9429fddc61a3c..3c7676b769c2c84f49d0d2cbc2bf4a37e9c7b6cd 100644 --- a/rubik.go +++ b/rubik.go @@ -1,26 +1,11 @@ -// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -// rubik licensed under the Mulan PSL v2. -// You can use this software according to the terms and conditions of the Mulan PSL v2. -// You may obtain a copy of Mulan PSL v2 at: -// http://license.coscl.org.cn/MulanPSL2 -// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -// PURPOSE. -// See the Mulan PSL v2 for more details. -// Author: Jingrui -// Create: 2021-04-17 -// Description: This file is main program of rubik - package main import ( "os" - _ "isula.org/rubik/pkg/version" - "isula.org/rubik/pkg/constant" "isula.org/rubik/pkg/rubik" ) func main() { - os.Exit(rubik.Run(constant.ConfigFile)) + os.Exit(rubik.Run()) } diff --git a/test/try/pod.go b/test/try/pod.go new file mode 100644 index 0000000000000000000000000000000000000000..18cb0ecbc8312c671db56aeab091c3626aba5a2a --- /dev/null +++ b/test/try/pod.go @@ -0,0 +1,214 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-10 +// Description: This file contains pod info and cgroup construct + +package try + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/google/uuid" + corev1 "k8s.io/api/core/v1" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" +) + +// FakePod is used for pod testing +type FakePod struct { + *typedef.PodInfo + // Keys is cgroup key list + Keys map[*cgroup.Key]string + CGRoot string +} + +const idLen = 8 + +// GenFakeContainerInfo will only generate fake container info under specific pod +func GenFakeContainerInfo(pod *FakePod) *typedef.ContainerInfo { + containerID := genContainerID() + var fakeContainer = &typedef.ContainerInfo{ + Name: fmt.Sprintf("fakeContainer-%s", containerID[:idLen]), + ID: containerID, + Hierarchy: cgroup.Hierarchy{Path: filepath.Join(pod.Path, containerID)}, + RequestResources: make(typedef.ResourceMap, 0), + LimitResources: make(typedef.ResourceMap, 0), + } + return fakeContainer +} + +// GenFakePodInfo will only generate fake pod info but no cgroup files been +func GenFakePodInfo(qosClass corev1.PodQOSClass) *typedef.PodInfo { + podID := uuid.New().String() + // generate fake pod info + var fakePod = &typedef.PodInfo{ + Name: fmt.Sprintf("fakepod-%s", podID[:idLen]), + Namespace: "test", + UID: constant.PodCgroupNamePrefix + podID, + Hierarchy: cgroup.Hierarchy{Path: genRelativeCgroupPath(qosClass, podID)}, + Annotations: make(map[string]string, 0), + } + return fakePod +} + +// NewFakePod return fake pod info struct +func NewFakePod(keys map[*cgroup.Key]string, qosClass corev1.PodQOSClass) *FakePod { + return &FakePod{ + Keys: keys, + PodInfo: GenFakePodInfo(qosClass), + CGRoot: GetTestCGRoot(), + } +} + +func (pod *FakePod) genFakePodCgroupPath() Ret { + if !util.PathExist(TestRoot) { + MkdirAll(TestRoot, constant.DefaultDirMode).OrDie() + } + cgroup.InitMountDir(pod.CGRoot) + // generate fake cgroup path + for key, value := range pod.Keys { + // generate pod absolute cgroup path + podCGFilePath := cgroup.AbsoluteCgroupPath(key.SubSys, pod.Path, key.FileName) + if err := WriteFile(podCGFilePath, value); err.err != nil { + return err + } + } + return pod.genFakeContainersCgroupPath() +} + +func (pod *FakePod) genFakeContainersCgroupPath() Ret { + if len(pod.IDContainersMap) == 0 { + return newRet(nil) + } + + for key, value := range pod.Keys { + for _, container := range pod.IDContainersMap { + // generate container absolute cgroup path + containerCGFilePath := cgroup.AbsoluteCgroupPath(key.SubSys, container.Path, key.FileName) + if err := WriteFile(containerCGFilePath, value); err.err != nil { + return err + } + } + } + return newRet(nil) +} + +// WithContainers will generate containers under pod with container num +func (pod *FakePod) WithContainers(containerNum int) *FakePod { + pod.IDContainersMap = make(map[string]*typedef.ContainerInfo, containerNum) + for i := 0; i < containerNum; i++ { + fakeContainer := GenFakeContainerInfo(pod) + pod.IDContainersMap[fakeContainer.ID] = fakeContainer + } + pod.genFakeContainersCgroupPath() + return pod +} + +// CleanPath will delete fakepod's cgroup folders and files +func (pod *FakePod) CleanPath() Ret { + if pod == nil { + return newRet(nil) + } + for key := range pod.Keys { + path := cgroup.AbsoluteCgroupPath(key.SubSys, pod.Path, key.FileName) + if len(key.FileName) != 0 { + path = filepath.Dir(path) + } + if err := RemoveAll(path); err.err != nil { + return err + } + } + return newRet(nil) +} + +func genContainerID() string { + const delimiter = "-" + // format: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + // length: 36 + // delimiter no: 4 + uuid1 := uuid.New().String() + uuid2 := uuid.New().String() + // now one uuid length is 64 for sure + containerID := strings.ReplaceAll(uuid1, delimiter, "") + strings.ReplaceAll(uuid2, delimiter, "") + return containerID +} + +// GenFakePod gen fake pod info +func GenFakePod(keys map[*cgroup.Key]string, qosClass corev1.PodQOSClass) *FakePod { + fakePod := NewFakePod(keys, qosClass) + fakePod.genFakePodCgroupPath().OrDie() + return fakePod +} + +// GenFakeBurstablePod generate pod with qos class burstable +func GenFakeBurstablePod(keys map[*cgroup.Key]string) *FakePod { + return GenFakePod(keys, corev1.PodQOSBurstable) +} + +// GenFakeBestEffortPod generate pod with qos class best effort +func GenFakeBestEffortPod(keys map[*cgroup.Key]string) *FakePod { + return GenFakePod(keys, corev1.PodQOSBestEffort) +} + +// GenFakeGuaranteedPod generate pod with qos class guaranteed +func GenFakeGuaranteedPod(keys map[*cgroup.Key]string) *FakePod { + return GenFakePod(keys, corev1.PodQOSGuaranteed) +} + +// GenFakeOnlinePod generate online pod +func GenFakeOnlinePod(keys map[*cgroup.Key]string) *FakePod { + fakePod := GenFakeGuaranteedPod(keys) + fakePod.Annotations[constant.PriorityAnnotationKey] = "false" + return fakePod +} + +// GenFakeOfflinePod generate offline pod +func GenFakeOfflinePod(keys map[*cgroup.Key]string) *FakePod { + fakePod := GenFakeBurstablePod(keys) + fakePod.Annotations[constant.PriorityAnnotationKey] = "true" + return fakePod +} + +func genRelativeCgroupPath(qosClass corev1.PodQOSClass, id string) string { + path := "" + switch qosClass { + case corev1.PodQOSGuaranteed: + path = "" + case corev1.PodQOSBurstable: + path = strings.ToLower(string(corev1.PodQOSBurstable)) + case corev1.PodQOSBestEffort: + path = strings.ToLower(string(corev1.PodQOSBestEffort)) + } + return filepath.Join(constant.KubepodsCgroup, path, constant.PodCgroupNamePrefix+id) +} + +// DeepCopy returns fake pod deepcopy object +func (pod *FakePod) DeepCopy() *FakePod { + if pod == nil || pod.PodInfo == nil { + return nil + } + var copyKeys map[*cgroup.Key]string + if pod.Keys != nil { + copyKeys = make(map[*cgroup.Key]string, len(pod.Keys)) + for k, v := range pod.Keys { + copyKeys[k] = v + } + } + return &FakePod{ + Keys: copyKeys, + PodInfo: pod.PodInfo.DeepCopy(), + } +} diff --git a/test/try/pod_test.go b/test/try/pod_test.go new file mode 100644 index 0000000000000000000000000000000000000000..45b3e060c94d6293ad51ddbebdb740d4de3db96a --- /dev/null +++ b/test/try/pod_test.go @@ -0,0 +1,270 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-02-10 +// Description: This file contains pod info and cgroup construct + +package try + +import ( + "path/filepath" + "reflect" + "strings" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" + "isula.org/rubik/pkg/core/typedef" + "isula.org/rubik/pkg/core/typedef/cgroup" + corev1 "k8s.io/api/core/v1" +) + +func TestNewFakePod(t *testing.T) { + id := constant.PodCgroupNamePrefix + uuid.New().String() + type args struct { + keys map[*cgroup.Key]string + qosClass corev1.PodQOSClass + } + tests := []struct { + name string + args args + want *FakePod + }{ + { + name: "TC1-new fake best effort pod", + args: args{ + keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + qosClass: corev1.PodQOSBestEffort, + }, + want: &FakePod{ + Keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + PodInfo: &typedef.PodInfo{ + Name: "fakepod-" + id[:idLen], + UID: id, + Namespace: "test", + Hierarchy: cgroup.Hierarchy{ + Path: filepath.Join(constant.KubepodsCgroup, + strings.ToLower(string(corev1.PodQOSBestEffort)), + id)}, + }, + }, + }, + { + name: "TC2-new fake guaranteed pod", + args: args{ + keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + qosClass: corev1.PodQOSGuaranteed, + }, + want: &FakePod{ + Keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + PodInfo: &typedef.PodInfo{ + Name: "fakepod-" + id[:idLen], + UID: id, + Namespace: "test", + Hierarchy: cgroup.Hierarchy{ + Path: filepath.Join(constant.KubepodsCgroup, id), + }, + }, + }, + }, + { + name: "TC3-new fake burstable pod", + args: args{ + keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + qosClass: corev1.PodQOSBurstable, + }, + want: &FakePod{ + Keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + PodInfo: &typedef.PodInfo{ + Name: "fakepod-" + id[:idLen], + UID: id, + Namespace: "test", + Hierarchy: cgroup.Hierarchy{ + Path: filepath.Join(constant.KubepodsCgroup, + strings.ToLower(string(corev1.PodQOSBurstable)), + id)}, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakePod := NewFakePod(tt.args.keys, tt.args.qosClass) + assert.Equal(t, fakePod.Namespace, tt.want.Namespace) + assert.Equal(t, len(fakePod.Name), len(tt.want.Name)) + assert.Equal(t, len(fakePod.UID), len(tt.want.UID)) + assert.Equal(t, len(fakePod.Path), len(tt.want.Path)) + }) + } +} + +func TestGenFakePod(t *testing.T) { + type args struct { + keys map[*cgroup.Key]string + qosClass corev1.PodQOSClass + containerNum int + } + tests := []struct { + name string + args args + }{ + { + name: "TC1-generate burstable pod", + args: args{ + keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + qosClass: corev1.PodQOSBestEffort, + }, + }, + { + name: "TC2-generate guaranteed pod with 3 containers", + args: args{ + keys: map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"}, + qosClass: corev1.PodQOSGuaranteed, + containerNum: 3, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakePod := GenFakePod(tt.args.keys, tt.args.qosClass) + if tt.args.qosClass != corev1.PodQOSGuaranteed { + // guaranteed pod does not have path prefix like "guaranteed/podxxx" + assert.Equal(t, true, strings.Contains(fakePod.Path, strings.ToLower(string(corev1.PodQOSBestEffort)))) + + } + for key, val := range tt.args.keys { + podCgroupFile := cgroup.AbsoluteCgroupPath(key.SubSys, fakePod.Path, key.FileName) + assert.Equal(t, true, util.PathExist(podCgroupFile)) + ret := ReadFile(podCgroupFile) + assert.NoError(t, ret.err) + assert.Equal(t, val, ret.val) + } + if tt.args.containerNum != 0 { + fakePod.WithContainers(tt.args.containerNum) + for key, val := range tt.args.keys { + for _, c := range fakePod.IDContainersMap { + containerCgroupFile := cgroup.AbsoluteCgroupPath(key.SubSys, c.Path, key.FileName) + assert.Equal(t, true, util.PathExist(containerCgroupFile)) + ret := ReadFile(containerCgroupFile) + assert.NoError(t, ret.err) + assert.Equal(t, val, ret.val) + } + } + } + fakePod.CleanPath().OrDie() + }) + } +} + +func TestGenParticularFakePod(t *testing.T) { + type args struct { + keys map[*cgroup.Key]string + } + tests := []struct { + name string + kind string + wantAnnotation string + }{ + { + name: "TC1-generate online pod", + kind: "online", + wantAnnotation: "false", + }, + { + name: "TC2-generate offline pod", + kind: "offline", + wantAnnotation: "true", + }, + { + name: "TC3-generate burstable pod", + kind: "burstable", + wantAnnotation: "", + }, + { + name: "TC4-generate besteffort pod", + kind: "besteffort", + wantAnnotation: "", + }, + { + name: "TC5-generate guaranteed pod", + kind: "guaranteed", + wantAnnotation: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + keys := map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"} + var fakePod *FakePod + switch tt.kind { + case "online": + fakePod = GenFakeOnlinePod(keys) + case "offline": + fakePod = GenFakeOfflinePod(keys) + case "burstable": + fakePod = GenFakeBurstablePod(keys) + case "besteffort": + fakePod = GenFakeBestEffortPod(keys) + case "guaranteed": + fakePod = GenFakeGuaranteedPod(keys) + } + assert.Equal(t, tt.wantAnnotation, fakePod.Annotations[constant.PriorityAnnotationKey]) + fakePod.CleanPath().OrDie() + }) + } +} + +func TestFakePod_DeepCopy(t *testing.T) { + type fields struct { + PodInfo *typedef.PodInfo + Keys map[*cgroup.Key]string + } + + keys := map[*cgroup.Key]string{{SubSys: "cpu", FileName: constant.CPUCgroupFileName}: "0"} + podInfo := NewFakePod(keys, corev1.PodQOSGuaranteed).PodInfo + tests := []struct { + name string + fields fields + want *FakePod + }{ + { + name: "TC1-deep copy", + fields: fields{ + PodInfo: podInfo, + Keys: keys, + }, + want: &FakePod{ + PodInfo: podInfo, + Keys: keys, + }, + }, + { + name: "TC2-empty copy", + fields: fields{ + PodInfo: nil, + Keys: nil, + }, + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pod := &FakePod{ + PodInfo: tt.fields.PodInfo, + Keys: tt.fields.Keys, + } + if got := pod.DeepCopy(); !reflect.DeepEqual(got, tt.want) { + assert.Equal(t, got, tt.want) + t.Errorf("FakePod.DeepCopy() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/try/try.go b/test/try/try.go similarity index 74% rename from pkg/try/try.go rename to test/try/try.go index a0c4a33eebdf5e76efc15f179c499ca93a291af3..2a80abbfcfb7b4f778c8178c8b5897e810ff3db6 100644 --- a/pkg/try/try.go +++ b/test/try/try.go @@ -10,7 +10,7 @@ // Author: jingrui // Create: 2022-04-17 // Description: try provide some helper functions for unit-test. -// + // Package try provide some helper function for unit-test, if you want // to use try outside unit-test, please add notes. // @@ -25,13 +25,14 @@ package try import ( "fmt" - "io/ioutil" "os" + "path/filepath" securejoin "github.com/cyphar/filepath-securejoin" "github.com/google/uuid" - "isula.org/rubik/pkg/constant" + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/common/util" ) // Ret provide some action for error. @@ -100,28 +101,52 @@ func RemoveAll(path string) Ret { } // WriteFile wrap error to Ret. -func WriteFile(filename string, data []byte, perm os.FileMode) Ret { +func WriteFile(filename string, data string) Ret { ret := newRet(nil) ret.val = filename - if err := ioutil.WriteFile(filename, data, perm); err != nil { + if err := util.WriteFile(filename, data); err != nil { ret.err = err } return ret } +// ReadFile wrap error to Ret +func ReadFile(filename string) Ret { + ret := newRet(nil) + val, err := util.ReadFile(filename) + if err != nil { + ret.err = err + } + ret.val = string(val) + return ret +} + const ( - testdir = "/tmp/rubik-test" + // TestRoot is the root path for all test cases + TestRoot = "/tmp/rubik-test" ) +var rootDir = TestRoot + +// InitTestCGRoot sets the directory of the cgroup file system for testcases +func InitTestCGRoot(arg string) { + rootDir = arg +} + +// GetTestCGRoot return the directory of the cgroup file system for testcases +func GetTestCGRoot() string { + return rootDir +} + // GenTestDir gen testdir func GenTestDir() Ret { - name := fmt.Sprintf("%s/%s", testdir, uuid.New().String()) - ret := MkdirAll(name, constant.DefaultDirMode) - ret.val = name + path := filepath.Join(TestRoot, uuid.New().String()) + ret := MkdirAll(path, constant.DefaultDirMode) + ret.val = path return ret } // DelTestDir del testdir, this function only need call once. func DelTestDir() Ret { - return RemoveAll(testdir) + return RemoveAll(TestRoot) } diff --git a/pkg/try/try_test.go b/test/try/try_test.go similarity index 84% rename from pkg/try/try_test.go rename to test/try/try_test.go index 6d17d02829e8b979b760c430be26fe5b9f44be9d..7fbc257fbf1671edb693178f885dd62c4e436092 100644 --- a/pkg/try/try_test.go +++ b/test/try/try_test.go @@ -20,8 +20,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - - "isula.org/rubik/pkg/constant" ) // Test_OrDie test try some-func or die. @@ -29,7 +27,7 @@ func Test_OrDie(t *testing.T) { ret := GenTestDir() ret.OrDie() dname := ret.String() - WriteFile(SecureJoin(dname, "die.txt").String(), []byte("ok"), constant.DefaultFileMode).OrDie() + WriteFile(SecureJoin(dname, "die.txt").String(), "ok").OrDie() RemoveAll(dname).OrDie() } @@ -38,6 +36,6 @@ func Test_ErrMessage(t *testing.T) { ret := GenTestDir() assert.Equal(t, ret.ErrMessage(), "") dname := ret.String() - WriteFile(SecureJoin(dname, "log.txt").String(), []byte("ok"), constant.DefaultFileMode).ErrMessage() + WriteFile(SecureJoin(dname, "log.txt").String(), "ok").ErrMessage() assert.Equal(t, RemoveAll(dname).ErrMessage(), "") } diff --git a/tests/data/fuzz-test/README.md b/tests/data/fuzz-test/README.md deleted file mode 100644 index 84a17c6ea6d2e1c70a4113299e07da7695dc8197..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/README.md +++ /dev/null @@ -1,26 +0,0 @@ -## How to Construct the go fuzz test - -> Note -> Before you start, make sure you have `go-fuzz-build` and `go-fuzz` binaries, you can get them at [go-fuzz](https://github.com/dvyukov/go-fuzz) - -1. cd `rubik/tests/data/fuzz-test` and make folder the form like `fuzz-test-xxx` -2. put the materials used by fuzz in the folder you created, they looks like the following form: -```bash -$ tree fuzz-test-newconfig - fuzz-test-newconfig # test case root dir - |-- corpus # dir to store mutation corpus - | |-- case1 # mutation corpus1 - | |-- case2 # mutation corpus2 - | |-- case3 # mutation corpus3 - |-- Fuzz # fuzz go file - |-- path # record relative path to put the Fuzz file in the package -``` -3. when the above meterials are ready, go to `rubik/tests/src` -4. the **ONLY Three Things** you need to do is: - 1. copy `TEMPLATE` file to the name you want(*must start with `fuzz_test`*), for example `fuzz_test_xxx.sh` - 2. change the variable `test_name` in the script you just copy same as the name you just gave(keep same with the folder you create in the first step) - 3. uncomment the last line `main "$1"` -5. To run single go fuzz shell script by doing `$ bash fuzz_test_xxx.sh`, it will stop fuzzing after 1 minute. - If you want to change the default run time, you could do like `$ bash fuzz_test_xxx.sh 2h` to keep running 2 hours -6. To run **all** go fuzz shell scripts by first go to `rubik/tests`, then run `$ bash test.sh 2h`. - It will run all go fuzz testcases and will stop fuzzing after `2h * number of go fuzz testcases` diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/Fuzz b/tests/data/fuzz-test/fuzz-test-newconfig/Fuzz deleted file mode 100644 index 74b89a6a8cd65a03f16059329d486b0532385e0b..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/Fuzz +++ /dev/null @@ -1,31 +0,0 @@ -package config - -import ( - "io/ioutil" - "os" - "path/filepath" - - "isula.org/rubik/pkg/constant" -) - -func Fuzz(data []byte) int { - if err := os.MkdirAll(constant.TmpTestDir, constant.DefaultDirMode); err != nil { - return -1 - } - defer os.RemoveAll(constant.TmpTestDir) - tmpDir, err := ioutil.TempDir(constant.TmpTestDir, "fuzz") - if err != nil { - return -1 - } - configFile := filepath.Join(tmpDir, "fuzz_config.json") - if err := ioutil.WriteFile(configFile, data, constant.DefaultFileMode); err != nil { - return -1 - } - _, err = NewConfig(configFile) - if err != nil { - return -1 - } - - return 1 -} - diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case1 b/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case1 deleted file mode 100644 index 469f609d14c4d0c48bb1c24086b4b379d607bfcf..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case1 +++ /dev/null @@ -1,24 +0,0 @@ -{ - "autoCheck": false, - "logDriver": "stdio", - "logDir": "/var/log/rubik", - "logSize": 1024, - "logLevel": "info", - "cgroupRoot": "/sys/fs/cgroup", - "cacheConfig": { - "enable": false, - "defaultLimitMode": "static", - "adjustInterval": 1000, - "perfDuration": 1000, - "l3Percent": { - "low": 20, - "mid": 30, - "high": 50 - }, - "memBandPercent": { - "low": 10, - "mid": 30, - "high": 50 - } - } -} diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case2 b/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case2 deleted file mode 100644 index 2b570bd4fb474f48c6df3efbecdf1671343f5357..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case2 +++ /dev/null @@ -1,7 +0,0 @@ -{ - "logDriver": "file", - "logDir": "/var/lib/rubik/logs", - "logSize": 1024, - "logLevel": "info", - "cgroupRoot": "/sys/fs/cgroup" -} diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case3 b/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case3 deleted file mode 100644 index 7107b3065f280ee9cf712cbadea14385bb0504a9..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case3 +++ /dev/null @@ -1,7 +0,0 @@ -{ - "logDriver": "stdio", - "logDir": "", - "logSize": 1024, - "logLevel": "info", - "cgroupRoot": "/sys/fs/cgroup" -} diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case4 b/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case4 deleted file mode 100644 index d05d9b46d500881cd68efe9a94a88ea6fb176e2c..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case4 +++ /dev/null @@ -1,7 +0,0 @@ -{ - "logDriver": "stdio", - "logDir": "/var/lib/rubik/logs", - "logSize": 9999999999, - "logLevel": "info", - "cgroupRoot": "/sys/fs/cgroup" -} diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case5 b/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case5 deleted file mode 100644 index f6dc7bcc3a2479e56ac89014a00e308b2853be2c..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case5 +++ /dev/null @@ -1,7 +0,0 @@ -{ - "logDriver": "stdio", - "logDir": "/var/lib/rubik/logs", - "logSize": 1024, - "logLevel": "!@#!@$", - "cgroupRoot": "/sys/fs/cgroup" -} diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case6 b/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case6 deleted file mode 100644 index b87bd493b9b3e38346675a3a96f6167e1dd857ac..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case6 +++ /dev/null @@ -1,7 +0,0 @@ -{ - "logDriver": "stdio", - "logDir": "/var/lib/rubik/logs", - "logSize": 1024, - "logLevel": "info", - "cgroupRoot": "/path/not/exist" -} diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case7 b/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case7 deleted file mode 100644 index 2d58391138e2194c224a061e6204b0dd8420ae0f..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/corpus/case7 +++ /dev/null @@ -1,18 +0,0 @@ -{ - "cacheConfig": { - "enable": false, - "defaultLimitMode": "dynamic", - "adjustInterval": 1000000, - "perfDuration": 999999, - "l3Percent": { - "low": 20, - "mid": 30, - "high": 50 - }, - "memBandPercent": { - "low": 10, - "mid": 30, - "high": 50 - } - } -} \ No newline at end of file diff --git a/tests/data/fuzz-test/fuzz-test-newconfig/path b/tests/data/fuzz-test/fuzz-test-newconfig/path deleted file mode 100644 index b9ce5cd10a54d2c42c6d49c76016a6086bbda613..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-newconfig/path +++ /dev/null @@ -1 +0,0 @@ -pkg/config diff --git a/tests/data/fuzz-test/fuzz-test-pinghandler/Fuzz b/tests/data/fuzz-test/fuzz-test-pinghandler/Fuzz deleted file mode 100644 index 94ded66128f0baf3caf40cdc93a451d5284b073b..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-pinghandler/Fuzz +++ /dev/null @@ -1,23 +0,0 @@ -package httpserver - -import ( - "bufio" - "bytes" - "net/http" - "net/http/httptest" -) - -func Fuzz(data []byte) int { - r, err := http.NewRequest("GET", "/ping", bufio.NewReader(bytes.NewReader(data))) - if err != nil { - return -1 - } - w := httptest.NewRecorder() - handler := setupHandler() - handler.ServeHTTP(w, r) - if status := w.Code; status != http.StatusOK { - return -1 - } - - return 1 -} diff --git a/tests/data/fuzz-test/fuzz-test-pinghandler/corpus/case1 b/tests/data/fuzz-test/fuzz-test-pinghandler/corpus/case1 deleted file mode 100644 index 0f080be2fb989ab5163c28fb2b32815ebc65c53f..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-pinghandler/corpus/case1 +++ /dev/null @@ -1 +0,0 @@ -{"Pods": {"pod70f2828b-3f9c-42e2-97da-01c6072af4a6": {"CgroupPath": "kubepods/besteffort/pod70f2828b-3f9c-42e2-97da-01c6072af4a6", "QoSLevel": 0 }}} \ No newline at end of file diff --git a/tests/data/fuzz-test/fuzz-test-pinghandler/path b/tests/data/fuzz-test/fuzz-test-pinghandler/path deleted file mode 100644 index 90468247e568cac14b527f0f4ed193cb2a616bdb..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-pinghandler/path +++ /dev/null @@ -1 +0,0 @@ -pkg/httpserver diff --git a/tests/data/fuzz-test/fuzz-test-roothandler/Fuzz b/tests/data/fuzz-test/fuzz-test-roothandler/Fuzz deleted file mode 100644 index 36910b51c515751fcaacc44b4322657fb8742fc1..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-roothandler/Fuzz +++ /dev/null @@ -1,23 +0,0 @@ -package httpserver - -import ( - "bufio" - "bytes" - "net/http" - "net/http/httptest" -) - -func Fuzz(data []byte) int { - r, err := http.NewRequest("GET", "/", bufio.NewReader(bytes.NewReader(data))) - if err != nil { - return -1 - } - w := httptest.NewRecorder() - handler := setupHandler() - handler.ServeHTTP(w, r) - if status := w.Code; status != http.StatusOK { - return -1 - } - - return 1 -} diff --git a/tests/data/fuzz-test/fuzz-test-roothandler/corpus/case1 b/tests/data/fuzz-test/fuzz-test-roothandler/corpus/case1 deleted file mode 100644 index 91ab13639eb93a8da001e86e25c6f60609958980..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-roothandler/corpus/case1 +++ /dev/null @@ -1 +0,0 @@ -{"Pods": {"pod70f2828b-3f9c-42e2-97da-01c6072af4a6": {"CgroupPath": "kubepods/besteffort/pod70f2828b-3f9c-42e2-97da-01c6072af4a6", "QoSLevel": 0, "CacheLimitLevel": "low"}}} diff --git a/tests/data/fuzz-test/fuzz-test-roothandler/path b/tests/data/fuzz-test/fuzz-test-roothandler/path deleted file mode 100644 index 90468247e568cac14b527f0f4ed193cb2a616bdb..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-roothandler/path +++ /dev/null @@ -1 +0,0 @@ -pkg/httpserver diff --git a/tests/data/fuzz-test/fuzz-test-versionhandler/Fuzz b/tests/data/fuzz-test/fuzz-test-versionhandler/Fuzz deleted file mode 100644 index 11ed700df6f9d9b1db8f6c1bf0cfab74ddb5b267..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-versionhandler/Fuzz +++ /dev/null @@ -1,23 +0,0 @@ -package httpserver - -import ( - "bufio" - "bytes" - "net/http" - "net/http/httptest" -) - -func Fuzz(data []byte) int { - r, err := http.NewRequest("GET", "/version", bufio.NewReader(bytes.NewReader(data))) - if err != nil { - return -1 - } - w := httptest.NewRecorder() - handler := setupHandler() - handler.ServeHTTP(w, r) - if status := w.Code; status != http.StatusOK { - return -1 - } - - return 1 -} diff --git a/tests/data/fuzz-test/fuzz-test-versionhandler/corpus/case1 b/tests/data/fuzz-test/fuzz-test-versionhandler/corpus/case1 deleted file mode 100644 index 0f080be2fb989ab5163c28fb2b32815ebc65c53f..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-versionhandler/corpus/case1 +++ /dev/null @@ -1 +0,0 @@ -{"Pods": {"pod70f2828b-3f9c-42e2-97da-01c6072af4a6": {"CgroupPath": "kubepods/besteffort/pod70f2828b-3f9c-42e2-97da-01c6072af4a6", "QoSLevel": 0 }}} \ No newline at end of file diff --git a/tests/data/fuzz-test/fuzz-test-versionhandler/path b/tests/data/fuzz-test/fuzz-test-versionhandler/path deleted file mode 100644 index 90468247e568cac14b527f0f4ed193cb2a616bdb..0000000000000000000000000000000000000000 --- a/tests/data/fuzz-test/fuzz-test-versionhandler/path +++ /dev/null @@ -1 +0,0 @@ -pkg/httpserver diff --git a/tests/lib/commonlib.sh b/tests/lib/commonlib.sh deleted file mode 100755 index 1b6421f6e0bc819798222d8e303a614a71b02a24..0000000000000000000000000000000000000000 --- a/tests/lib/commonlib.sh +++ /dev/null @@ -1,282 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-15 -# Description: common lib for integration test - -TOP_DIR=$(git rev-parse --show-toplevel) -RUBIK_TEST_ROOT="${TOP_DIR}"/.fortest -RUBIK_LIB=${RUBIK_TEST_ROOT}/rubik_lib -RUBIK_RUN=${RUBIK_TEST_ROOT}/rubik_run -RUBIK_LOG=${RUBIK_TEST_ROOT}/rubik_log -RUBIK_CONFIG="${RUBIK_LIB}"/config.json -RUBIK_NAME="rubik-agent" -QOS_HANDLER="http://localhost" -VERSION_HANDLER="http://localhost/version" -PING_HANDLER="http://localhost/ping" -PAUSE_IMAGE="k8s.gcr.io/pause:3.2" -OPENEULER_IMAGE="openeuler-22.03-lts:latest" -SKIP_FLAG=111 - -mkdir -p "${RUBIK_TEST_ROOT}" -mkdir -p "${RUBIK_LIB}" "${RUBIK_RUN}" "${RUBIK_LOG}" -exit_flag=0 - -## Description: build_rubik_img will build rubik image -# Usage: build_rubik_img -# Input: $1: baseimage default is scratch -# $2: images tag default is rubik_version -# Output: rubik image -# Example: build_rubik_img -function build_rubik_img() { - image_base=${1:-"scratch"} - image_tag=${2:-"fortest"} - rubik_img="rubik:$image_tag" - if [ "$image_base" != "scratch" ]; then - cp "${RUBIK_LIB}"/Dockerfile "${RUBIK_LIB}"/Dockerfilebak - sed -i "s#scratch#${image_base}#g" "${RUBIK_LIB}"/Dockerfile - fi - if [ ! -f "${TOP_DIR}"/rubik ]; then - make release - fi - cp "$TOP_DIR"/rubik "$RUBIK_LIB" - docker images | grep ^rubik | grep "${image_tag}" > /dev/null 2>&1 - if [ $? -ne 0 ]; then - docker build -f "${RUBIK_LIB}"/Dockerfile -t "${rubik_img}" "${RUBIK_LIB}" - [ "$image_base" != "scratch" ] && rm "${RUBIK_LIB}"/Dockerfile && mv "${RUBIK_LIB}"/Dockerfilebak "${RUBIK_LIB}"/Dockerfile - fi -} - -function generate_config_file() { - # get config from yaml config map - sed -n '/config.json:/{:a;n;/---/q;p;ba}' "$RUBIK_LIB"/rubik-daemonset.yaml > "${RUBIK_CONFIG}" - # disable autoConfig - sed -i 's/\"autoConfig\": true/\"autoConfig\": false/' "${RUBIK_CONFIG}" -} - -function prepare_rubik() { - runtime_check - if pgrep rubik > /dev/null 2>&1; then - echo "rubik is already running, please stop it first" - exit 1 - fi - cp "$TOP_DIR"/Dockerfile "$TOP_DIR"/hack/rubik-daemonset.yaml "${RUBIK_LIB}" - image_base=${1:-"scratch"} - image_tag=${2:-"fortest"} - rubik_img="rubik:$image_tag" - build_rubik_img "${image_base}" "${image_tag}" - generate_config_file -} - -function run_rubik() { - prepare_rubik - image_check - if [ ! -f "${RUBIK_CONFIG}" ]; then - rubik_pid=$(docker run -tid --name=${RUBIK_NAME} --pid=host --cap-add SYS_ADMIN \ - -v "${RUBIK_RUN}":/run/rubik -v "${RUBIK_LOG}":/var/log/rubik -v /sys/fs:/sys/fs "${rubik_img}") - else - rubik_pid=$(docker run -tid --name=${RUBIK_NAME} --pid=host --cap-add SYS_ADMIN \ - -v "${RUBIK_RUN}":/run/rubik -v "${RUBIK_LOG}":/var/log/rubik -v /sys/fs:/sys/fs \ - -v "${RUBIK_CONFIG}":/var/lib/rubik/config.json "${rubik_img}") - fi - return_code=$? - echo -n "$rubik_pid" - return $return_code -} - -function kill_rubik() { - docker inspect ${RUBIK_NAME} > /dev/null 2>&1 - if [ $? -eq 0 ]; then - docker logs ${RUBIK_NAME} - docker stop -t 0 ${RUBIK_NAME} - docker rm ${RUBIK_NAME} - fi -} - -function clean_all() { - rm -rf "${RUBIK_LIB}" "${RUBIK_RUN}" "${RUBIK_LOG}" - kill_rubik -} - -function runtime_check() { - if ! docker info > /dev/null 2>&1; then - echo "docker is not found, please install it via 'yum install docker'" - exit 1 - fi -} - -function image_check() { - openEuler_image="openeuler-22.03-lts" - pause_image="k8s.gcr.io/pause" - if ! docker images | grep $openEuler_image > /dev/null 2>&1; then - echo "openEuler image ${OPENEULER_IMAGE} is not found, please prepare it first before test begin" - exit 1 - fi - if ! docker images | grep ${pause_image} > /dev/null 2>&1; then - echo "pause image ${PAUSE_IMAGE} is not found, please prepare it first before test begin" - exit 1 - fi -} - -# Description: kernel_check will check wether the environment is met for testing -# Usage: kernel_check [...] -# Input: list of functional check requirements, default check all -# Output: 0(success) 1(fail) -# Example: kernel_check CPU、kernel_check CPU MEM CACHE、kernel_check ALL -function kernel_check() { - function cpu_check() { - ls /sys/fs/cgroup/cpu/cpu.qos_level > /dev/null 2>&1 - if [ $? -ne 0 ]; then - echo "ls /sys/fs/cgroup/cpu/cpu.qos.level failed" - return 1 - fi - } - function mem_check() { - if [ ! -f /proc/sys/vm/memcg_qos_enable ]; then - echo "/proc/sys/vm/memcg_qos_enable is not an ordinary file" - return 1 - else - echo -n 1 > /proc/sys/vm/memcg_qos_enable - fi - ls /sys/fs/cgroup/memory/memory.qos_level > /dev/null 2>&1 - if [ $? -ne 0 ]; then - echo "ls /sys/fs/cgroup/memory.qos_level failed" - return 1 - fi - } - function cache_check() { - ls /sys/fs/resctrl/schemata > /dev/null 2>&1 - if [ $? -ne 0 ]; then - echo "ls /sys/fs/resctrl/schemata failed" - return 1 - fi - } - function check_all() { - cpu_check - mem_check - cache_check - } - for functional in $@; do - case $functional in - "CPU") - cpu_check - ;; - "MEM") - mem_check - ;; - "CACHE") - cache_check - ;; - "ALL" | *) - check_all - ;; - esac - done -} - -# Description: curl_cmd performs like curl command -# Usage: curl_cmd $http_handler $json_data $protocol -# Reminds: NOT recommend to use this method directly, use rubik_ping/rubik_version/rubik_qos instead in most occasions -# Input: -# $1: http_handler -# $2: json_data -# $3: protocol -# Output: curl command execute return message -# Return: success(0) or fail(not 0) -# Example: -# data=$(gen_pods_json json1 json2) -# QOS_HANDLER="http://localhost" -# PING_HANDLER="http://localhost/ping" -# VERSION_HANDLER="http://localhost/version" -# protocol="GET" -# -# curl_cmd $QOS_HANDLER $data $protocol -# curl_cmd $PING_HANDLER $protocol -# curl_cmd $VERSION_HANDLER $protocol -function curl_cmd() { - http_handler=$1 - data=$2 - protocol=$3 - result=$(curl -s -H "Accept: application/json" -H "Content-type: application/json" -X "$protocol" --data "$(echo -n "$data")" --unix-socket "${RUBIK_RUN}"/rubik.sock "$http_handler") - return_code=$? - echo "$result" - return $return_code -} - -function rubik_ping() { - curl_cmd "$PING_HANDLER" "" "GET" -} - -function rubik_version() { - curl_cmd "$VERSION_HANDLER" "" "GET" -} - -function rubik_qos() { - curl_cmd "$QOS_HANDLER" "$1" "POST" -} - -# Description: gen_single_pod_json will generate single pod qos info for one pod -# Usage: gen_single_pod_json $pod_id $cgroup_path $qos_level -# Input: $1: pod id, $2: cgroup path, $3: qos level -# Output: single pod qos info json data -# Example: json1=$(gen_single_pod_json "podaaaaaa" "this/is/cgroup/path" 1) -function gen_single_pod_json() { - pod_id=$1 - cgroup_path=$2 - qos_level=$3 - jq -n -c -r --arg pid "$pod_id" --arg cp "$cgroup_path" --arg qos "$qos_level" '{"Pods":{($pid): {"CgroupPath": $cp, "QoSLevel": ($qos|tonumber)}}}' -} - -function fn_check_result() { - if [ "$1" = "$2" ]; then - echo "PASS" - else - echo "FAIL" - ((exit_flag++)) - fi -} - -function fn_check_result_noeq() { - if [ "$1" != "$2" ]; then - echo "PASS" - else - echo "FAIL" - ((exit_flag++)) - fi -} - -function fn_check_string_contain() { - if echo "$2" | grep "$1"; then - echo "PASS" - else - echo "FAIL" - ((exit_flag++)) - fi -} - -function fn_check_string_not_contain() { - if ! echo "$2" | grep "$1"; then - echo "PASS" - else - echo "FAIL" - ((exit_flag++)) - fi -} - -# Description: long_char will generate long string by repeat char 'a' N times -# Usage: long_char $length -# Input: $1: length of string -# Output: repeate string with given length -# Example: long_char 10 -function long_char() { - length=$1 - head -c "$length" < /dev/zero | tr '\0' '\141' -} diff --git a/tests/lib/fuzz_commonlib.sh b/tests/lib/fuzz_commonlib.sh deleted file mode 100755 index eb7d81328ea719117411adcfa2f297c95dc4c3a6..0000000000000000000000000000000000000000 --- a/tests/lib/fuzz_commonlib.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2020-08-27 -# Description: common functions for fuzz tests - -# Description: check the log and return the result -# if crash, return 1 -# if not, return 0 -# Usage: check_result /path/to/log -# $1: the full path of log -function check_result() { - local log=$1 - local time="[$(date '+%Y-%m-%d %H:%M:%S')]" - result=$(grep "crash" "$log" | tail -1 | awk '{print $10}') - result=${result%%,} - if [[ $result -eq 0 ]]; then - echo "PASS$time: No crash found" - return 0 - else - echo "FAIL$time: Crash found, See details in $log" - return 1 - fi -} - -# Description: sleep x s/m/h and kill the process -# Usage: kill_after 1h 10232 -# Input: $1: time to sleep -# $2: pid to kill -function kill_after() { - local time_out=$1 - local pid_to_kill=$2 - sleep "$time_out" - for j in $(seq 1 100); do - kill -9 "$pid_to_kill" > /dev/null 2>&1 - if pgrep -a "$pid_to_kill"; then - sleep 0.2 - else - break - fi - if [[ $j -eq 100 ]]; then - return 1 - fi - done -} - -# Description: compile Fuzz.go -# Usage: make_fuzz_zip $fuzz_file $fuzz_dir $test_dir -# Input: $1: path to Fuzz.go file -# $2: dir to put the Fuzz.go file -# $3: dir store the build output -# Return: success 0; failed 1 -# Warning: all input should be abs path :-) -function make_fuzz_zip() { - fuzz_file=$1 - fuzz_dir=$2 - data_dir=$3 - cp "$fuzz_file" "$fuzz_dir" - pushd "$fuzz_dir" > /dev/null 2>&1 || return 1 - mv Fuzz Fuzz.go - if ! go-fuzz-build "$fuzz_dir"; then - echo "go-fuzz-build failed" && return 1 - fi - mv "$fuzz_dir"/*.zip "$data_dir" - rm "$fuzz_dir/Fuzz.go" - popd > /dev/null 2>&1 || return 1 -} - -# Description: set enviroment for go fuzz test -# Usage: set_env "fuzz-test-abc" $top_dir -# Input: $1: test name -# $2: abs path for rubik project -# Note: 1. test_name must start with fuzz-test, for example fuzz-test-abc -# 2. go fuzz file must have name "Fuzz.go" -# 3. top_dir must be the abs path for the rubik project -# shellcheck disable=SC2034 -function set_env() { - test_name=$1 - top_dir=$2 - - test_root=$top_dir/tests/data/fuzz-test - test_dir=$test_root/$test_name - fuzz_file=$test_dir/Fuzz - fuzz_dir="$top_dir"/"$(cat "$test_dir"/path)" - fuzz_corpus="$test_dir/corpus" - fuzz_log="$test_dir/$test_name.log" - fuzz_crashers="$test_dir/crashers" - fuzz_suppressions="$test_dir/suppressions" - fuzz_zip="" -} - -function clean_env() { - rm -rf "$fuzz_zip" "$fuzz_crashers" "$fuzz_suppressions" - find /tmp -maxdepth 1 -iname "*fuzz*" -exec rm -rf {} \; -} diff --git a/tests/src/TEMPLATE b/tests/src/TEMPLATE deleted file mode 100755 index f450653ffa773f7a5905a9b5c09a481d6bd23357..0000000000000000000000000000000000000000 --- a/tests/src/TEMPLATE +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-12 -# Description: fuzz script template - -# top dir is path of where you put rubik project -top_dir=$(git rev-parse --show-toplevel) -# keep the name same as the folder you created before like "fuzz-test-xxx" -test_name="fuzz-test-root-handler" -# exit_flag is the flag to indicate if the test success(set 0) or failed(set 1) -exit_flag=0 -# get common functions used for test script -source "$top_dir"/tests/lib/fuzz_commonlib.sh - -# prepare the env before fuzz start -function pre_fun() { - # prepare env - set_env "${test_name}" "$top_dir" - # make fuzz zip file - make_fuzz_zip "$fuzz_file" "$fuzz_dir" "$test_dir" - fuzz_zip=$(ls "$test_dir"/*fuzz.zip) - if [[ -z "$fuzz_zip" ]]; then - echo "fuzz zip file not found" - exit 1 - fi -} - -# run fuzz -function test_fun() { - local time=$1 - if [[ -z "$time" ]]; then - time=1m - fi - go-fuzz -bin="$fuzz_zip" -workdir="$test_dir" &>> "$fuzz_log" & - pid=$! - if ! kill_after $time $pid > /dev/null 2>&1; then - echo "Can not kill process $pid" - fi - check_result "$fuzz_log" - res=$? - return $res -} - -function main() { - pre_fun - test_fun "$1" - res=$? - if [ $res -ne 0 ]; then - exit_flag=1 - else - clean_env - fi -} - -# uncomment following to make script working -main "$1" - -exit $exit_flag diff --git a/tests/src/fuzz_test_newconfig.sh b/tests/src/fuzz_test_newconfig.sh deleted file mode 100755 index 88fa3be898e0f950a943b3281e792e0395fa70ef..0000000000000000000000000000000000000000 --- a/tests/src/fuzz_test_newconfig.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-12 -# Description: fuzz new config -# Number: test_rubik_fuzz_0004 - -top_dir=$(git rev-parse --show-toplevel) -test_name="fuzz-test-newconfig" -exit_flag=0 -source "$top_dir"/tests/lib/fuzz_commonlib.sh - -function pre_fun() { - set_env "${test_name}" "$top_dir" - make_fuzz_zip "$fuzz_file" "$fuzz_dir" "$test_dir" - fuzz_zip=$(ls "$test_dir"/*fuzz.zip) - if [[ -z "$fuzz_zip" ]]; then - echo "fuzz zip file not found" - exit 1 - fi -} - -function test_fun() { - local time=$1 - if [[ -z "$time" ]]; then - time=1m - fi - go-fuzz -bin="$fuzz_zip" -workdir="$test_dir" &>> "$fuzz_log" & - pid=$! - if ! kill_after $time $pid > /dev/null 2>&1; then - echo "Can not kill process $pid" - fi - check_result "$fuzz_log" - res=$? - return $res -} - -function main() { - pre_fun - test_fun "$1" - res=$? - if [ $res -ne 0 ]; then - exit_flag=1 - else - clean_env - fi -} - -main "$1" -exit $exit_flag diff --git a/tests/src/fuzz_test_pinghandler.sh b/tests/src/fuzz_test_pinghandler.sh deleted file mode 100755 index 170d1911c5fe620479eb8de517cde3187ce509fb..0000000000000000000000000000000000000000 --- a/tests/src/fuzz_test_pinghandler.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-12 -# Description: fuzz ping handler -# Number: test_rubik_fuzz_0002 - -top_dir=$(git rev-parse --show-toplevel) -test_name="fuzz-test-pinghandler" -exit_flag=0 -source "$top_dir"/tests/lib/fuzz_commonlib.sh - -function pre_fun() { - set_env "${test_name}" "$top_dir" - make_fuzz_zip "$fuzz_file" "$fuzz_dir" "$test_dir" - fuzz_zip=$(ls "$test_dir"/*fuzz.zip) - if [[ -z "$fuzz_zip" ]]; then - echo "fuzz zip file not found" - exit 1 - fi -} - -function test_fun() { - local time=$1 - if [[ -z "$time" ]]; then - time=1m - fi - go-fuzz -bin="$fuzz_zip" -workdir="$test_dir" &>> "$fuzz_log" & - pid=$! - if ! kill_after $time $pid > /dev/null 2>&1; then - echo "Can not kill process $pid" - fi - check_result "$fuzz_log" - res=$? - return $res -} - -function main() { - pre_fun - test_fun "$1" - res=$? - if [ $res -ne 0 ]; then - exit_flag=1 - else - clean_env - fi -} - -main "$1" -exit $exit_flag diff --git a/tests/src/fuzz_test_roothandler.sh b/tests/src/fuzz_test_roothandler.sh deleted file mode 100755 index 0d21c206becce981fa5b508de1e7e8023ced3f34..0000000000000000000000000000000000000000 --- a/tests/src/fuzz_test_roothandler.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-12 -# Description: fuzz root handler -# Number: test_rubik_fuzz_0001 - -top_dir=$(git rev-parse --show-toplevel) -test_name="fuzz-test-roothandler" -exit_flag=0 -source "$top_dir"/tests/lib/fuzz_commonlib.sh - -function pre_fun() { - set_env "${test_name}" "$top_dir" - make_fuzz_zip "$fuzz_file" "$fuzz_dir" "$test_dir" - fuzz_zip=$(ls "$test_dir"/*fuzz.zip) - if [[ -z "$fuzz_zip" ]]; then - echo "fuzz zip file not found" - exit 1 - fi -} - -function test_fun() { - local time=$1 - if [[ -z "$time" ]]; then - time=1m - fi - go-fuzz -bin="$fuzz_zip" -workdir="$test_dir" &>> "$fuzz_log" & - pid=$! - if ! kill_after $time $pid > /dev/null 2>&1; then - echo "Can not kill process $pid" - fi - check_result "$fuzz_log" - res=$? - return $res -} - -function main() { - pre_fun - test_fun "$1" - res=$? - if [ $res -ne 0 ]; then - exit_flag=1 - else - clean_env - fi -} - -main "$1" -exit $exit_flag diff --git a/tests/src/fuzz_test_versionhandler.sh b/tests/src/fuzz_test_versionhandler.sh deleted file mode 100755 index 00a07560f54fbc1b64f19c2e86ba9127446d2315..0000000000000000000000000000000000000000 --- a/tests/src/fuzz_test_versionhandler.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-12 -# Description: fuzz version handler -# Number: test_rubik_fuzz_0003 - -top_dir=$(git rev-parse --show-toplevel) -test_name="fuzz-test-versionhandler" -exit_flag=0 -source "$top_dir"/tests/lib/fuzz_commonlib.sh - -function pre_fun() { - set_env "${test_name}" "$top_dir" - make_fuzz_zip "$fuzz_file" "$fuzz_dir" "$test_dir" - fuzz_zip=$(ls "$test_dir"/*fuzz.zip) - if [[ -z "$fuzz_zip" ]]; then - echo "fuzz zip file not found" - exit 1 - fi -} - -function test_fun() { - local time=$1 - if [[ -z "$time" ]]; then - time=1m - fi - go-fuzz -bin="$fuzz_zip" -workdir="$test_dir" &>> "$fuzz_log" & - pid=$! - if ! kill_after $time $pid > /dev/null 2>&1; then - echo "Can not kill process $pid" - fi - check_result "$fuzz_log" - res=$? - return $res -} - -function main() { - pre_fun - test_fun "$1" - res=$? - if [ $res -ne 0 ]; then - exit_flag=1 - else - clean_env - fi -} - -main "$1" -exit $exit_flag diff --git a/tests/src/test_rubik_flags_0001.sh b/tests/src/test_rubik_flags_0001.sh deleted file mode 100755 index bbbf087e9355faf282e96b245bec5880ac37cf5b..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_flags_0001.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-30 -# Description: rubik flag check 0001 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -test_fun() { - # check rubik binary - if [ ! -f "${top_dir}"/rubik ]; then - pushd "${top_dir}" || exit 1 > /dev/null 2>&1 - make release - popd || exit 1 > /dev/null 2>&1 - fi - - # check rubik flag - if "${top_dir}"/rubik -v > /dev/null 2>&1; then - if ! "${top_dir}"/rubik --help > /dev/null 2>&1 && ! "${top_dir}"/rubik -h > /dev/null 2>&1; then - echo "PASS" - else - echo "FAILED" - fi - else - echo "FAILED" - fi -} - -test_fun - -exit "$exit_flag" diff --git a/tests/src/test_rubik_offline_0001.sh b/tests/src/test_rubik_offline_0001.sh deleted file mode 100755 index 232b02c9a0de2e2507ba5deab5df9a0f75cdbc9c..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_offline_0001.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-06-28 -# Description: 调整pod为离线业务后创建容器 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - rubik_id=$(run_rubik) - fn_check_result $? 0 - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") - fn_check_result $? 0 -} - -function test_fun() { - cgroup_path="kubepods/${pod_id}" - qos_level_offline=-1 - # generate json data - data_offline=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level_offline) - - # set pod to offline - result=$(rubik_qos "$data_offline") - fn_check_result $? 0 - fn_check_string_not_contain "set qos failed" "$result" - - # create container and check qos level - container_id=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}" "${OPENEULER_IMAGE}" bash) - cpu_qos=$(cat /sys/fs/cgroup/cpu/"$cgroup_path"/"$cid"/cpu.qos_level) - mem_qos=$(cat /sys/fs/cgroup/memory/"$cgroup_path"/"$cid"/memory.qos_level) - fn_check_result "$cpu_qos" $qos_level_offline - fn_check_result "$mem_qos" $qos_level_offline -} - -function post_fun() { - clean_all - docker rm -f "$container_id" - fn_check_result $? 0 - docker rm -f "$pod_id" - fn_check_result $? 0 - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_offline_0002.sh b/tests/src/test_rubik_offline_0002.sh deleted file mode 100755 index 3eec239c2a97be71b95f4f5f8c59bd8959e11f1f..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_offline_0002.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-06-28 -# Description: 调整带业务的pod为离线业务 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - rubik_id=$(run_rubik) - fn_check_result $? 0 - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") - fn_check_result $? 0 - containers=() - total_num=50 -} - -function test_fun() { - cgroup_path="kubepods/${pod_id}" - qos_level_offline=-1 - # generate json data - data_offline=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level_offline) - - # create containers in the pod - for i in $(seq 1 ${total_num}); do - containers[$i]=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}" "${OPENEULER_IMAGE}" bash) - fn_check_result $? 0 - done - - # set pod to offline - result=$(rubik_qos "$data_offline") - fn_check_result $? 0 - fn_check_string_not_contain "set qos failed" "$result" - - # check qos level for containers - for i in $(seq 1 ${total_num}); do - cpu_qos=$(cat /sys/fs/cgroup/cpu/"${cgroup_path}"/"${containers[$i]}"/cpu.qos_level) - mem_qos=$(cat /sys/fs/cgroup/memory/"${cgroup_path}"/"${containers[$i]}"/memory.qos_level) - fn_check_result "$cpu_qos" "$qos_level_offline" - fn_check_result "$mem_qos" "$qos_level_offline" - done -} - -function post_fun() { - docker rm -f "$rubik_id" - fn_check_result $? 0 - docker rm -f "${containers[@]}" - # Deleting multiple containers may time out. - [ "$?" -ne "0" ] && docker rm -f "${containers[@]}" - docker rm -f "$pod_id" - fn_check_result $? 0 - docker ps -a | grep -v "CONTAINER" - fn_check_result $? 1 "cleanup" - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_offline_0003.sh b/tests/src/test_rubik_offline_0003.sh deleted file mode 100755 index 277c83990c69dd32df98509284ecae2b3d200b0f..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_offline_0003.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-06-28 -# Description: pod与container状态不一致测试.调整为离线 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - rubik_id=$(run_rubik) - fn_check_result $? 0 - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") - fn_check_result $? 0 -} - -function test_fun() { - container_id=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}" "${OPENEULER_IMAGE}" bash) - cgroup_path="kubepods/${pod_id}" - qos_level=-1 - # generate json data - data=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level) - - # set container's qos to -1 - cpu_qos_path=/sys/fs/cgroup/cpu/${cgroup_path}/${container_id}/cpu.qos_level - mem_qos_path=/sys/fs/cgroup/memory/${cgroup_path}/${container_id}/memory.qos_level - echo -n ${qos_level} > "${cpu_qos_path}" - echo -n ${qos_level} > "${mem_qos_path}" - - # set pod's qos to -1 - result=$(rubik_qos "$data") - fn_check_result $? 0 - fn_check_string_not_contain "set qos failed" "$result" - - # check set ok - cpu_qos=$(cat "${cpu_qos_path}") - mem_qos=$(cat "${mem_qos_path}") - fn_check_result "${cpu_qos}" "${qos_level}" - fn_check_result "${mem_qos}" "${qos_level}" -} - -function post_fun() { - docker rm -f "$rubik_id" - fn_check_result $? 0 - docker rm -f "$container_id" - fn_check_result $? 0 - docker rm -f "$pod_id" - fn_check_result $? 0 - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_online_0001.sh b/tests/src/test_rubik_online_0001.sh deleted file mode 100755 index 2543f1458ff131957922f0e98b2601de49e55e5a..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_online_0001.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-06-28 -# Description: 不支持将离线业务转为在线业务 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - rubik_id=$(run_rubik) - fn_check_result $? 0 - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") - fn_check_result $? 0 -} - -function test_fun() { - container_id=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}" "${OPENEULER_IMAGE}" bash) - cgroup_path="kubepods/${pod_id}" - qos_level_offline=-1 - qos_level_online=0 - # generate json data - data_offline=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level_offline) - data_online=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level_online) - - # set pod to offline - result=$(rubik_qos "$data_offline") - fn_check_result $? 0 - fn_check_string_not_contain "set qos failed" "$result" - - # set pod to online - result=$(rubik_qos "$data_online") - fn_check_result $? 0 - fn_check_string_contain "set qos failed" "$result" -} - -function post_fun() { - docker rm -f "$rubik_id" - fn_check_result $? 0 - docker rm -f "$container_id" - fn_check_result $? 0 - docker rm -f "$pod_id" - fn_check_result $? 0 - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_cachelimit_abn.sh b/tests/src/test_rubik_reply_cachelimit_abn.sh deleted file mode 100755 index 90ea4c7519523a52088e61e6a67838a8d092c2f1..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_cachelimit_abn.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-05-19 -# Description: rubik cachelimit 0002 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - kernel_check CACHE - if [ $? -ne 0 ]; then - echo "Kernel not supported, skip test" - exit "${SKIP_FLAG}" - fi - run_rubik -} - -# pod not exist -function test_fun() { - local pod_name=podrubiktestpod - local cgroup_path=kubepods/podrubiktestpod - json_data=$(gen_single_pod_json ${pod_name} ${cgroup_path}) - result=$(rubik_qos "${json_data}") - if ! echo "$result" | grep "set qos failed"; then - ((exit_flag++)) - fi - rmdir /sys/fs/resctrl/rubik_* -} - -function post_fun() { - clean_all - if [[ $exit_flag -eq 0 ]]; then - echo "PASS" - else - echo "FAILED" - fi - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_cachelimit_fun.sh b/tests/src/test_rubik_reply_cachelimit_fun.sh deleted file mode 100755 index 8ea83c81efff06bb7a90a2248cceefc849a4828e..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_cachelimit_fun.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-05-19 -# Description: rubik cachelimit 0001 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - kernel_check CACHE - if [ $? -ne 0 ]; then - echo "Kernel not supported, skip test" - exit "${SKIP_FLAG}" - fi - rubik_id=$(run_rubik) - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") -} - -function test_fun() { - container_id=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}""${openEuler_image}" bash) - qos_level=-1 - data=$(gen_single_pod_json "${pod_id}" "${cgroup_path}" $qos_level) - - result=$(curl -s -H "Accept: application/json" -H "Content-type: application/json" -X POST --data '{"Pods": {"podrubiktestpod": {"CgroupPath": "kubepods/podrubiktestpod","QosLevel": -1,"CacheLimitLevel": "max"}}}' --unix-socket /run/rubik/rubik.sock http://localhost/) - if [[ $? -ne 0 ]]; then - ((exit_flag++)) - fi - cat /sys/fs/resctrl/rubik_max/tasks | grep $$ - if [[ $? -ne 0 ]]; then - ((exit_flag++)) - fi -} - -function post_fun() { - echo $$ > /sys/fs/cgroup/cpu/cgroup.procs - echo $$ > /sys/fs/resctrl/tasks - rmdir /sys/fs/cgroup/cpu/"${cg}" - rmdir /sys/fs/cgroup/memory/"${cg}" - rmdir /sys/fs/resctrl/rubik_* - clean_all - if [[ $exit_flag -eq 0 ]]; then - echo "PASS" - else - echo "FAILED" - fi - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_healthcheck_0001.sh b/tests/src/test_rubik_reply_healthcheck_0001.sh deleted file mode 100755 index 0e4aa7780f07c8e43aa5e51396c7b2359b3b0ba7..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_healthcheck_0001.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-15 -# Description: rubik reply healthcheck 0001 - -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -pre_fun() { - run_rubik -} - -test_fun() { - result=$(rubik_ping) - if [[ $? -eq 0 ]] && [[ ${result} =~ "ok" ]]; then - echo "PASS" - else - echo "FAILED" - ((exit_flag++)) - fi -} - -post_fun() { - clean_all - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_healthcheck_0002.sh b/tests/src/test_rubik_reply_healthcheck_0002.sh deleted file mode 100755 index 7ca6698be65a8b95181015c1f5c064d877300dd4..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_healthcheck_0002.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-15 -# Description: rubik reply healthcheck 0002 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -pre_fun() { - # empty, so no rubik working - continue -} - -test_fun() { - result=$(rubik_ping) - if [[ $? -ne 0 ]]; then - echo "PASS" - else - echo "FAILED" - ((exit_flag++)) - fi -} - -post_fun() { - clean_all - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_http_parameter_0001.sh b/tests/src/test_rubik_reply_http_parameter_0001.sh deleted file mode 100755 index e7c50f1a33c63ce575bc3c19741f2f24d2ada852..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_http_parameter_0001.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-06-28 -# Description: http接口有效参数测试 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -pre_fun() { - rubik_id=$(run_rubik) - fn_check_result $? 0 - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") - fn_check_result $? 0 -} - -test_fun() { - # generate json data - container_id=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}" "${OPENEULER_IMAGE}" bash) - cgroup_path="kubepods/${pod_id}" - qos_level=-1 - data=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level) - - # check http://localhost/ - result=$(rubik_qos "$data") - fn_check_result $? 0 - fn_check_string_not_contain "set qos failed" "$result" - - # check http://localhost/ping - result=$(rubik_ping) - fn_check_result $? 0 - fn_check_string_contain "ok" "$result" - - # check http://localhost/version - result=$(rubik_version) - fn_check_result $? 0 - fn_check_string_contain "Version" "$result" -} - -post_fun() { - clean_all - docker rm -f "${container_id}" - fn_check_result $? 0 - docker rm -f "${pod_id}" - fn_check_result $? 0 - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_http_parameter_0002.sh b/tests/src/test_rubik_reply_http_parameter_0002.sh deleted file mode 100755 index 5562fd6004385721107edb65236b71e8a1728d42..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_http_parameter_0002.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-06-28 -# Description: http接口无效参数测试 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - rubik_id=$(run_rubik) - fn_check_result $? 0 - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") - fn_check_result $? 0 -} - -function test_fun() { - # generate json data - container_id=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}" "${OPENEULER_IMAGE}" bash) - cgroup_path="kubepods/${pod_id}" - qos_level=-1 - data=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level) - - # construct wrong handlers - super_long_handler="http://longlonglong/$(long_char 1000)" - https_handler="https://localhost/" - special_char_handler="!@#$!@#!@#" - invalid_handler="http://localhost/not_exist" - debug_handler="http://localhost/debug/pprof/" - profile_handler="http://localhost/debug/pprof/profile" - - result=$(curl_cmd "$super_long_handler" "$data" "POST") - fn_check_string_contain "404 page not found" "$result" - - curl_cmd "$https_handler" "$data" "POST" - fn_check_result_noeq $? 0 - - curl_cmd "$special_char_handler" "$data" "POST" - fn_check_result_noeq $? 0 - - result=$(curl_cmd "$invalid_handler" "$data" "POST") - fn_check_string_contain "404 page not found" "$result" - - result=$(curl_cmd "$debug_handler" "$data" "POST") - fn_check_string_contain "404 page not found" "$result" - - result=$(curl_cmd "$profile_handler" "$data" "POST") - fn_check_string_contain "404 page not found" "$result" -} - -function post_fun() { - docker rm -f "$rubik_id" - fn_check_result $? 0 - docker rm -f "$container_id" - fn_check_result $? 0 - docker rm -f "$pod_id" - fn_check_result $? 0 - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_http_parameter_0003.sh b/tests/src/test_rubik_reply_http_parameter_0003.sh deleted file mode 100755 index 2ba6abd74d743e536aa182bc721c2b21f05bf802..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_http_parameter_0003.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2022-06-28 -# Description: http接口无效参数测试 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -function pre_fun() { - rubik_id=$(run_rubik) - fn_check_result $? 0 - logfile=${RUBIK_TEST_ROOT}/rubik_"$rubik_id"_log - docker logs -f "$rubik_id" > "$logfile" & - pod_id=$(docker run -tid --cgroup-parent /kubepods "${PAUSE_IMAGE}") - fn_check_result $? 0 -} - -function test_fun() { - # generate validate json data - container_id=$(docker run -tid --cgroup-parent /kubepods/"${pod_id}" "${OPENEULER_IMAGE}" bash) - cgroup_path="kubepods/${pod_id}" - qos_level=-1 - validate_data=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level) - - # generate invalid data - # case1: pod id not exist - pod_id_not_exist=$(cat /proc/sys/kernel/random/uuid) - pod_id_not_exist_cgroup="kubepods/${pod_id_not_exist}" - invalid_data1=$(gen_single_pod_json "$pod_id_not_exist" "$pod_id_not_exist_cgroup" $qos_level) - result=$(rubik_qos "$invalid_data1") - fn_check_result $? 0 - fn_check_string_contain "set qos failed" "$result" - grep "set qos level error" "$logfile" - fn_check_result $? 0 - echo > "$logfile" - - # case2: cgroup path not exist - cgroup_path_not_exist="kubepods/cgroup/path/not/exist" - invalid_data2=$(gen_single_pod_json "$pod_id" "$cgroup_path_not_exist" $qos_level) - result=$(rubik_qos "$invalid_data2") - fn_check_result $? 0 - fn_check_string_contain "set qos failed" "$result" - grep "set qos level error" "$logfile" - fn_check_result $? 0 - echo > "$logfile" - - # case3: super long cgroup path - cgroup_path_super_long="kubepods/$(long_char 10000)" - invalid_data3=$(gen_single_pod_json "$pod_id" "$cgroup_path_super_long" $qos_level) - result=$(rubik_qos "$invalid_data3") - fn_check_result $? 0 - fn_check_string_contain "set qos failed" "$result" - grep -i "length of cgroup path exceeds max limit 4096" "$logfile" - fn_check_result $? 0 - echo > "$logfile" - - # case4: invalid qos level - qos_level_invalid=-999 - invalid_data4=$(gen_single_pod_json "$pod_id" "$cgroup_path" $qos_level_invalid) - result=$(rubik_qos "$invalid_data4") - fn_check_result $? 0 - fn_check_string_contain "set qos failed" "$result" - grep -i "Invalid qos level number" "$logfile" - fn_check_result $? 0 - echo > "$logfile" - - # generate invalid data - # case5: pod id empty - pid_id_empty="" - pod_id_empty_cgroup="kubepods/${pod_id_empty}" - invalid_data5=$(gen_single_pod_json "$pod_id_empty" "$pod_id_empty_cgroup" $qos_level) - result=$(rubik_qos "$invalid_data5") - fn_check_result $? 0 - fn_check_string_contain "set qos failed" "$result" - grep "invalid cgroup path" "$logfile" - fn_check_result $? 0 - echo > "$logfile" - - # rubik will success with validate data - result=$(rubik_qos "$validate_data") - fn_check_result $? 0 - fn_check_string_not_contain "set qos failed" "$result" - echo > "$logfile" -} - -function post_fun() { - clean_all - docker rm -f "$container_id" - fn_check_result $? 0 - docker rm -f "$pod_id" - fn_check_result $? 0 - rm -f "$logfile" - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/src/test_rubik_reply_version_0001.sh b/tests/src/test_rubik_reply_version_0001.sh deleted file mode 100755 index 4c96a89dbe9b43652ada709aef4ea1486e36de3a..0000000000000000000000000000000000000000 --- a/tests/src/test_rubik_reply_version_0001.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-05-15 -# Description: rubik reply healthcheck 0001 - -set -x -top_dir=$(git rev-parse --show-toplevel) -source "$top_dir"/tests/lib/commonlib.sh - -pre_fun() { - prepare_rubik - run_rubik -} - -test_fun() { - result=$(rubik_version) - if [[ $? -eq 0 ]]; then - field_number=$(echo "${result}" | grep -iE "version|release|commit|buildtime" -o | wc -l) - if [[ $field_number -eq 4 ]]; then - echo "PASS" - else - echo "FAILED" - ((exit_flag++)) - fi - else - echo "FAILED" - ((exit_flag++)) - fi -} - -post_fun() { - clean_all - exit "$exit_flag" -} - -pre_fun -test_fun -post_fun diff --git a/tests/test.sh b/tests/test.sh deleted file mode 100755 index f5ae6caa5587bd14ce923255e5184d57a5a5cf48..0000000000000000000000000000000000000000 --- a/tests/test.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -# Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. -# rubik licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# Create: 2021-04-15 -# Description: DT test script - -top_dir=$(git rev-parse --show-toplevel) - -# go fuzz test -function fuzz() { - failed=0 - while IFS= read -r testfile; do - printf "%-45s" "test $(basename "$testfile"): " | tee -a "${top_dir}"/tests/fuzz.log - bash "$testfile" "$1" | tee -a "${top_dir}"/tests/fuzz.log - if [ "$PIPESTATUS" -ne 0 ]; then - failed=1 - fi - # delete tmp files to avoid "no space left" problem - find /tmp -maxdepth 1 -iname "*fuzz*" -exec rm -rf {} \; - done < <(find "$top_dir"/tests/src -maxdepth 1 -name "fuzz_*.sh" -type f -print | sort) - exit $failed -} - -# integration test -function normal() { - source "${top_dir}"/tests/lib/commonlib.sh - failed=0 - while IFS= read -r testfile; do - filename=$(basename "$testfile") - DATE=$(date "+%Y%m%d%H%M%S") - export LOGFILE=${RUBIK_TEST_ROOT}/${filename}.${DATE}.log - printf "%-45s" "$filename: " - bash -x "$testfile" > "${LOGFILE}" 2>&1 - result=$? - if [ $result -eq "${SKIP_FLAG}" ]; then - echo -e "\033[33m SKIP \033[0m" - elif [ $result -ne 0 ]; then - echo -e "\033[31m FAIL \033[0m" - failed=1 - else - echo -e "\033[32m PASS \033[0m" - fi - done < <(find "$top_dir"/tests/src -maxdepth 1 -name "test_*" -type f -print | sort) - if [[ ${failed} -ne 0 ]]; then - exit $failed - else - clean_all - fi -} - -# main function to chose which kind of test -function main() { - case "$1" in - fuzz) - fuzz "$2" - ;; - *) - normal - ;; - esac -} - -main "$@"