From 90c3376908708754000c29143dbffa8d2a712b59 Mon Sep 17 00:00:00 2001 From: xiadanni Date: Thu, 4 Nov 2021 17:12:27 +0800 Subject: [PATCH] rubik: add qos level check and validate check qos level before setting cgroup and validate it after setting Signed-off-by: xiadanni --- pkg/qos/qos.go | 69 +++++++++++++++++++++++++++++++++++- pkg/sync/sync.go | 6 ++-- pkg/workerpool/workerpool.go | 8 +++-- 3 files changed, 78 insertions(+), 5 deletions(-) diff --git a/pkg/qos/qos.go b/pkg/qos/qos.go index 897db0d..abb5a3b 100644 --- a/pkg/qos/qos.go +++ b/pkg/qos/qos.go @@ -58,6 +58,9 @@ func NewPodInfo(ctx context.Context, podID string, cgmnt string, req api.PodQoS) CgroupRoot: cgmnt, Ctx: ctx, } + if err := checkQosLevel(pod.QosLevel); err != nil { + return nil, err + } if err := pod.initCgroupPath(); err != nil { return nil, err } @@ -102,6 +105,14 @@ func getQosLevel(root, file string) (int, error) { return qosLevel, nil } +func checkQosLevel(qosLevel int) error { + if qosLevel >= constant.MinLevel.Int() && qosLevel <= constant.MaxLevel.Int() { + return nil + } + + return errors.Errorf("invalid qos level number %d, should be 0 or -1", qosLevel) +} + // setQosLevel is actual function to do the setting job func setQosLevel(root, file string, target int) error { if !util.IsDirectory(root) { @@ -156,6 +167,38 @@ func (pod *PodInfo) SetQos() error { return nil } +// ValidateQos is used for checking pod's qos level if equal to the value it should be set up to +func (pod *PodInfo) ValidateQos() error { + var ( + cpuInfo, memInfo int + err error + ) + ctx := pod.Ctx + + tinylog.WithCtx(ctx).Logf("Checking level=%d for pod %s", pod.QosLevel, pod.PodID) + + for kind, cgPath := range pod.FullPath { + switch kind { + case "cpu": + if cpuInfo, err = getQosLevel(cgPath, constant.CPUCgroupFileName); err != nil { + return errors.Errorf("read %s for pod %q failed: %v", constant.CPUCgroupFileName, pod.PodID, err) + } + case "memory": + if memInfo, err = getQosLevel(cgPath, constant.MemoryCgroupFileName); err != nil { + return errors.Errorf("read %s for pod %q failed: %v", constant.MemoryCgroupFileName, pod.PodID, err) + } + } + } + + if (cpuInfo != pod.QosLevel) || (memInfo != pod.QosLevel) { + return errors.Errorf("checking level=%d for pod %s failed", pod.QosLevel, pod.PodID) + } + + tinylog.WithCtx(ctx).Logf("Checking level=%d for pod %s OK", pod.QosLevel, pod.PodID) + + return nil +} + // initCgroupPath return pod's cgroup full path func (pod *PodInfo) initCgroupPath() error { if pod.CgroupRoot == "" { @@ -163,10 +206,34 @@ func (pod *PodInfo) initCgroupPath() error { } cgroupMap := make(map[string]string, len(SupportCgroupTypes)) for _, kind := range SupportCgroupTypes { - cgroupMap[kind] = filepath.Join(pod.CgroupRoot, kind, pod.CgroupPath) + if err := checkCgroupPath(pod.CgroupPath); err != nil { + return err + } + fullPath := filepath.Join(pod.CgroupRoot, kind, pod.CgroupPath) + if len(fullPath) > constant.MaxCgroupPathLen { + return errors.Errorf("length of cgroup path exceeds max limit %d", constant.MaxCgroupPathLen) + } + cgroupMap[kind] = fullPath } pod.FullPath = cgroupMap return nil } + +func checkCgroupPath(path string) error { + pathPrefix, blacklist := "kubepods", []string{"kubepods", "kubepods/besteffort", "kubepods/burstable"} + cPath := filepath.Clean(path) + + if !strings.HasPrefix(cPath, pathPrefix) { + return errors.Errorf("invalid cgroup path %v, should start with %v", path, pathPrefix) + } + + for _, invalidPath := range blacklist { + if cPath == invalidPath { + return errors.Errorf("invalid cgroup path %v, without podID", path) + } + } + + return nil +} diff --git a/pkg/sync/sync.go b/pkg/sync/sync.go index 51a466a..cf5f4d5 100644 --- a/pkg/sync/sync.go +++ b/pkg/sync/sync.go @@ -89,8 +89,10 @@ func verifyOfflinePods(clientSet *kubernetes.Clientset) error { log.Errorf("get pod %v info for sync error: %v", pod.UID, err) continue } - sErr := podQosInfo.SetQos() - log.Logf("pod %v qos level check error: %v, reset qos error: %v", pod.UID, err, sErr) + if err = podQosInfo.ValidateQos(); err != nil { + sErr := podQosInfo.SetQos() + log.Logf("pod %v qos level check error: %v, reset qos error: %v", pod.UID, err, sErr) + } } return nil diff --git a/pkg/workerpool/workerpool.go b/pkg/workerpool/workerpool.go index 9a493c5..b9751a8 100644 --- a/pkg/workerpool/workerpool.go +++ b/pkg/workerpool/workerpool.go @@ -114,8 +114,8 @@ func (task *QosTask) context() context.Context { func (task *QosTask) do() error { var ( - errFlag bool - sErr error + errFlag bool + sErr, vErr error ) if len(task.req.Pods) > constant.MaxPodsPerRequest { @@ -132,6 +132,10 @@ func (task *QosTask) do() error { errFlag = true continue } + if vErr = pod.ValidateQos(); vErr != nil { + tinylog.WithCtx(task.ctx).Errorf("Validate pod %v qos level error: %v", podID, vErr) + errFlag = true + } } if errFlag { -- Gitee