From 8104a29b13e3d98c160e65d773a61752dbb22e7d Mon Sep 17 00:00:00 2001 From: vegbir Date: Sat, 4 Nov 2023 09:34:00 +0000 Subject: [PATCH] rubik: supports multi-level priority of CPU and memory QoS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.Configuration changes Added volcano.sh/qos-priority annotation for kubernetes POD, which is used to identify the priority of Pod. The optional values are [support, batch, user, core, system] (the priority increases in order). If this option is not configured, the default is USR level. 2.Code optimization: Implement QoS related methods by defining the operator interface. 3.Function description Extend the two-level QoS Level to five-level QoS Level. If the volcano.sh/qos-priority and volcano.sh/preemptable annotations are set at the same time, the multi-level priority annotation takes precedence over the two-level priority annotation. volcano.sh/preemptable=”true” is equivalent to volcano.sh/qos-priority=”batch” volcano.sh/preemptable=”false” is equivalent to volcano.sh/qos-priority=”user” support, batch are offline services, user, critical, system are online services Signed-off-by: vegbir --- pkg/common/constant/constant.go | 8 +- pkg/common/constant/qos.go | 53 ++++++++++++ pkg/services/preemption/preemption.go | 44 ++++------ pkg/services/preemption/preemption_test.go | 2 +- pkg/services/preemption/qos.go | 97 ++++++++++++++++++++++ 5 files changed, 171 insertions(+), 33 deletions(-) create mode 100644 pkg/common/constant/qos.go create mode 100644 pkg/services/preemption/qos.go diff --git a/pkg/common/constant/constant.go b/pkg/common/constant/constant.go index 0bb7ee9..0f531e9 100644 --- a/pkg/common/constant/constant.go +++ b/pkg/common/constant/constant.go @@ -56,6 +56,8 @@ const ( const ( // PriorityAnnotationKey is annotation key to mark offline pod PriorityAnnotationKey = "volcano.sh/preemptable" + // MultiPriorityAnnotationKey is annotation key to set multiple levels of priority + MultiPriorityAnnotationKey = "volcano.sh/qos-priority" // CacheLimitAnnotationKey is annotation key to set L3/Mb resctrl group CacheLimitAnnotationKey = "volcano.sh/cache-limit" // QuotaBurstAnnotationKey is annotation key to set cpu.cfs_burst_ns @@ -94,12 +96,6 @@ const ( ErrorExitCode ) -// qos level -const ( - Offline = -1 - Online = 0 -) - // cgroup file name const ( // CPUCgroupFileName is name of cgroup file used for cpu qos level setting diff --git a/pkg/common/constant/qos.go b/pkg/common/constant/qos.go new file mode 100644 index 0000000..48f57db --- /dev/null +++ b/pkg/common/constant/qos.go @@ -0,0 +1,53 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Yang Jiaqi +// Create: 2023-11-02 +// Description: This file contains default constants used in the project + +// Package constant is for constant definition +package constant + +// Possible value range of MultiPriorityAnnotationKey +const ( + // QoSMin is the low-resource, non-user processes + QoSMin string = "support" + // QoSLow is the background business processes + QoSLow string = "batch" + // QoSMid is the user interactive processes + QoSMid string = "user" + // QoSHigh is the critical business processes + QoSHigh string = "core" + // QoSMax is the system-level processes + QoSMax string = "system" + // QoSNone means the user did not indicate volcano.sh/qos-priority + QoSNone string = "" +) + +// five level qos +const ( + // MinQoSLevel represents the lowest level of five priority levels + MinQoSLevel = -2 + // LowQoSLevel represents the penultimate level of the five-level priority + LowQoSLevel = -1 + // MidQoSLevel represents the third level of five levels of priority + MidQoSLevel = 0 + // HighQoSLevel represents the second highest of the five priority levels + HighQoSLevel = 1 + // MaxQoSLevel represents the highest of five priority levels + MaxQoSLevel = 2 +) + +// tew level qos +const ( + // Offline indicates the lower of the two levels of priority + Offline = -1 + // Online represents the higher of the two levels of priority + Online = 0 +) diff --git a/pkg/services/preemption/preemption.go b/pkg/services/preemption/preemption.go index 6a231f9..8d6a644 100644 --- a/pkg/services/preemption/preemption.go +++ b/pkg/services/preemption/preemption.go @@ -32,6 +32,9 @@ var supportCgroupTypes = map[string]*cgroup.Key{ "memory": {SubSys: "memory", FileName: constant.MemoryCgroupFileName}, } +// opt is an operator type singleton +var opt operator = &defaultOperator{} + // Preemption define service which related to qos level setting type Preemption struct { helper.ServiceBase @@ -98,17 +101,16 @@ func (q *Preemption) AddPod(pod *typedef.PodInfo) error { // UpdatePod implement update function when pod info is changed func (q *Preemption) UpdatePod(old, new *typedef.PodInfo) error { - oldQos, newQos := getQoSLevel(old), getQoSLevel(new) - switch { - case newQos == oldQos: + oldQos, newQos := opt.getQoSLevel(old), opt.getQoSLevel(new) + if newQos == oldQos { return nil - case newQos > oldQos: - return fmt.Errorf("does not support pod qos level setting from low to high") - default: - if err := q.validateConfig(new); err != nil { - if err := q.SetQoSLevel(new); err != nil { - return fmt.Errorf("failed to update the qos level of pod %s(%s): %v", new.Name, new.UID, err) - } + } + if err := opt.qosModifiable(oldQos, newQos); err != nil { + return err + } + if err := q.validateConfig(new); err != nil { + if err := q.SetQoSLevel(new); err != nil { + return fmt.Errorf("failed to update the qos level of pod %s(%s): %v", new.Name, new.UID, err) } } return nil @@ -122,7 +124,7 @@ func (q *Preemption) DeletePod(_ *typedef.PodInfo) error { // validateConfig will validate pod's qos level between value from // cgroup file and the one from pod info func (q *Preemption) validateConfig(pod *typedef.PodInfo) error { - targetLevel := getQoSLevel(pod) + targetLevel := opt.getQoSLevel(pod) for _, r := range q.config.Resource { if err := pod.GetCgroupAttr(supportCgroupTypes[r]).Expect(targetLevel); err != nil { return fmt.Errorf("failed to validate the qos level configuration of pod %s: %v", pod.Name, err) @@ -141,9 +143,10 @@ func (q *Preemption) SetQoSLevel(pod *typedef.PodInfo) error { if pod == nil { return fmt.Errorf("empty pod info") } - qosLevel := getQoSLevel(pod) - if qosLevel == constant.Online { - log.Infof("pod %s(%s) has already been set to online(%d)", pod.Name, pod.UID, qosLevel) + qosLevel := opt.getQoSLevel(pod) + + if qosLevel == opt.defaultQoSLevel() { + log.Infof("pod %s(%s) has already been set to %s(%d)", pod.Name, pod.UID, opt.defaultQoSLevelName(), qosLevel) return nil } @@ -163,21 +166,10 @@ func (q *Preemption) SetQoSLevel(pod *typedef.PodInfo) error { if errs != nil { return errs } - log.Infof("pod %s(%s) is set to offline(%d) successfully", pod.Name, pod.UID, qosLevel) + log.Infof("pod %s(%s) is set to %s(%d) successfully", pod.Name, pod.UID, opt.qosLevelName(qosLevel), qosLevel) return nil } -func getQoSLevel(pod *typedef.PodInfo) int { - if pod == nil { - return constant.Online - } - if pod.Offline() { - return constant.Offline - } - - return constant.Online -} - // Validate will validate the qos service config func (conf *PreemptionConfig) Validate() error { if len(conf.Resource) == 0 { diff --git a/pkg/services/preemption/preemption_test.go b/pkg/services/preemption/preemption_test.go index d1d763c..d285a3f 100644 --- a/pkg/services/preemption/preemption_test.go +++ b/pkg/services/preemption/preemption_test.go @@ -11,7 +11,7 @@ // Create: 2023-02-10 // Description: This file test qos level setting service -// Package qos is the service used for qos level setting +// Package preemption is the service used for qos level setting package preemption import ( diff --git a/pkg/services/preemption/qos.go b/pkg/services/preemption/qos.go new file mode 100644 index 0000000..ea470ec --- /dev/null +++ b/pkg/services/preemption/qos.go @@ -0,0 +1,97 @@ +// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. +// rubik licensed under the Mulan PSL v2. +// You can use this software according to the terms and conditions of the Mulan PSL v2. +// You may obtain a copy of Mulan PSL v2 at: +// http://license.coscl.org.cn/MulanPSL2 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +// PURPOSE. +// See the Mulan PSL v2 for more details. +// Author: Xiang Li +// Create: 2023-11-04 +// Description: This file implements methods of different type of qos level + +// Package preemption is the service used for qos level setting +package preemption + +import ( + "fmt" + + "isula.org/rubik/pkg/common/constant" + "isula.org/rubik/pkg/core/typedef" +) + +// operator is a collection of functions related to priority type (five-level, two-level) +type operator interface { + getQoSLevel(*typedef.PodInfo) int + defaultQoSLevel() int + defaultQoSLevelName() string + qosModifiable(int, int) error + qosLevelName(int) string +} + +type defaultOperator struct{} + +var fiveLevelsQoSDict = map[string]int{ + constant.QoSMin: constant.MinQoSLevel, + constant.QoSLow: constant.LowQoSLevel, + constant.QoSMid: constant.MidQoSLevel, + constant.QoSHigh: constant.HighQoSLevel, + constant.QoSMax: constant.MaxQoSLevel, +} + +var fiveLevelsQoSNameDict = map[int]string{ + constant.MinQoSLevel: constant.QoSMin, + constant.LowQoSLevel: constant.QoSLow, + constant.MidQoSLevel: constant.QoSMid, + constant.HighQoSLevel: constant.QoSHigh, + constant.MaxQoSLevel: constant.QoSMax, +} + +func (o *defaultOperator) getQoSLevel(pod *typedef.PodInfo) int { + if pod == nil { + return o.defaultQoSLevel() + } + // prefer MultiPriorityAnnotationKey than PriorityAnnotationKey + qos, ok := fiveLevelsQoSDict[pod.Annotations[constant.MultiPriorityAnnotationKey]] + if ok { + return qos + } + if pod.Offline() { + return constant.Offline + } + return o.defaultQoSLevel() +} + +func (o *defaultOperator) defaultQoSLevel() int { + return constant.Online +} + +func (o *defaultOperator) defaultQoSLevelName() string { + return "online" +} + +func (o *defaultOperator) qosModifiable(original, updated int) error { + // Only the value 0 can be changed, the rest cannot be changed + if original == constant.Online { + return nil + } + switch { + case updated > original: + return fmt.Errorf("does not support pod qos level setting from low to high") + case updated < original: + return fmt.Errorf("does not support pod qos level setting from high to low") + default: + return nil + } +} + +func (o *defaultOperator) qosLevelName(qosLevel int) string { + if _, existed := fiveLevelsQoSNameDict[qosLevel]; existed { + if qosLevel >= constant.Online { + return "online" + } + return "offline" + } + return "undefined" +} -- Gitee