From c2b0d3c5ae5ac5afc42716b17490601562acfa3f Mon Sep 17 00:00:00 2001 From: zhaoxiaohu Date: Fri, 23 Aug 2024 10:07:54 +0800 Subject: [PATCH] Fix kubelet panic when allocate resource for pod. Signed-off-by: zhaoxiaohu --- ...panic-when-allocate-resource-for-pod.patch | 116 ++++++++++++++++++ kubernetes.spec | 9 +- 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 0017-backport-Fix-kubelet-panic-when-allocate-resource-for-pod.patch diff --git a/0017-backport-Fix-kubelet-panic-when-allocate-resource-for-pod.patch b/0017-backport-Fix-kubelet-panic-when-allocate-resource-for-pod.patch new file mode 100644 index 0000000..29cea2a --- /dev/null +++ b/0017-backport-Fix-kubelet-panic-when-allocate-resource-for-pod.patch @@ -0,0 +1,116 @@ +From 44140f192be2eea3a71b3b6372ef45e8535dd802 Mon Sep 17 00:00:00 2001 +From: zhaoxiaohu +Date: Thu, 22 Aug 2024 16:39:45 +0800 +Subject: [PATCH] Fix kubelet panic when allocate resource for pod. + +Reference: https://github.com/kubernetes/kubernetes/pull/119561/commits/d6b8a660b081916f3fae3319581ec2c49a2f5a05 + +Signed-off-by: zhaoxiaohu +Signed-off-by: payall4u +Signed-off-by: yuwang +--- + pkg/kubelet/cm/devicemanager/manager.go | 12 ++-- + pkg/kubelet/cm/devicemanager/manager_test.go | 60 ++++++++++++++++++++ + 2 files changed, 67 insertions(+), 5 deletions(-) + +diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go +index 95cf058f..1370675b 100644 +--- a/pkg/kubelet/cm/devicemanager/manager.go ++++ b/pkg/kubelet/cm/devicemanager/manager.go +@@ -667,6 +667,13 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi + // Create a closure to help with device allocation + // Returns 'true' once no more devices need to be allocated. + allocateRemainingFrom := func(devices sets.String) bool { ++ // When we call callGetPreferredAllocationIfAvailable below, we will release ++ // the lock and call the device plugin. If someone calls ListResource concurrently, ++ // device manager will recalculate the allocatedDevices map. Some entries with ++ // empty sets may be removed, so we reinit here. ++ if m.allocatedDevices[resource] == nil { ++ m.allocatedDevices[resource] = sets.NewString() ++ } + for device := range devices.Difference(allocated) { + m.allocatedDevices[resource].Insert(device) + allocated.Insert(device) +@@ -683,11 +690,6 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi + return allocated, nil + } + +- // Needs to allocate additional devices. +- if m.allocatedDevices[resource] == nil { +- m.allocatedDevices[resource] = sets.NewString() +- } +- + // Gets Devices in use. + devicesInUse := m.allocatedDevices[resource] + // Gets Available devices. +diff --git a/pkg/kubelet/cm/devicemanager/manager_test.go b/pkg/kubelet/cm/devicemanager/manager_test.go +index 9034498c..354dee50 100644 +--- a/pkg/kubelet/cm/devicemanager/manager_test.go ++++ b/pkg/kubelet/cm/devicemanager/manager_test.go +@@ -1080,3 +1080,63 @@ func makeDevice(devOnNUMA checkpoint.DevicesPerNUMA, topology bool) map[string]p + } + return res + } ++ ++func TestDevicesToAllocateConflictWithUpdateAllocatedDevices(t *testing.T) { ++ podToAllocate := "podToAllocate" ++ containerToAllocate := "containerToAllocate" ++ podToRemove := "podToRemove" ++ containerToRemove := "containerToRemove" ++ deviceID := "deviceID" ++ resourceName := "domain1.com/resource" ++ ++ socket := filepath.Join(os.TempDir(), esocketName()) ++ devs := []*pluginapi.Device{ ++ {ID: deviceID, Health: pluginapi.Healthy}, ++ } ++ p, e := esetup(t, devs, socket, resourceName, func(n string, d []pluginapi.Device) {}) ++ ++ waitUpdateAllocatedDevicesChan := make(chan struct{}) ++ waitSetGetPreferredAllocChan := make(chan struct{}) ++ ++ p.SetGetPreferredAllocFunc(func(r *pluginapi.PreferredAllocationRequest, devs map[string]pluginapi.Device) (*pluginapi.PreferredAllocationResponse, error) { ++ waitSetGetPreferredAllocChan <- struct{}{} ++ <-waitUpdateAllocatedDevicesChan ++ return &pluginapi.PreferredAllocationResponse{ ++ ContainerResponses: []*pluginapi.ContainerPreferredAllocationResponse{ ++ { ++ DeviceIDs: []string{deviceID}, ++ }, ++ }, ++ }, nil ++ }) ++ ++ testManager := &ManagerImpl{ ++ endpoints: make(map[string]endpointInfo), ++ healthyDevices: make(map[string]sets.Set[string]), ++ unhealthyDevices: make(map[string]sets.Set[string]), ++ allocatedDevices: make(map[string]sets.Set[string]), ++ podDevices: newPodDevices(), ++ activePods: func() []*v1.Pod { return []*v1.Pod{} }, ++ sourcesReady: &sourcesReadyStub{}, ++ topologyAffinityStore: topologymanager.NewFakeManager(), ++ } ++ ++ testManager.endpoints[resourceName] = endpointInfo{ ++ e: e, ++ opts: &pluginapi.DevicePluginOptions{ ++ GetPreferredAllocationAvailable: true, ++ }, ++ } ++ testManager.healthyDevices[resourceName] = sets.NewString(deviceID) ++ testManager.podDevices.insert(podToRemove, containerToRemove, resourceName, nil, nil) ++ ++ go func() { ++ <-waitSetGetPreferredAllocChan ++ testManager.UpdateAllocatedDevices() ++ waitUpdateAllocatedDevicesChan <- struct{}{} ++ }() ++ ++ set, err := testManager.devicesToAllocate(podToAllocate, containerToAllocate, resourceName, 1, sets.NewString()) ++ assert.NoError(t, err) ++ assert.Equal(t, set, sets.NewString(deviceID)) ++} +-- +2.33.0 + diff --git a/kubernetes.spec b/kubernetes.spec index 1d048c4..7fd5a4c 100644 --- a/kubernetes.spec +++ b/kubernetes.spec @@ -3,7 +3,7 @@ Name: kubernetes Version: 1.20.2 -Release: 21 +Release: 22 Summary: Container cluster management License: ASL 2.0 URL: https://k8s.io/kubernetes @@ -40,6 +40,7 @@ Patch6012: 0013-Validate-etcd-paths.patch Patch6013: 0014-fix-node-address-validation.patch Patch6014: 0015-Add-ephemeralcontainer-to-imagepolicy-securityaccoun.patch Patch6015: 0016-Add-envFrom-to-serviceaccount-admission-plugin.patch +Patch6016: 0017-backport-Fix-kubelet-panic-when-allocate-resource-for-pod.patch %description Container cluster management. @@ -271,6 +272,12 @@ getent passwd kube >/dev/null || useradd -r -g kube -d / -s /sbin/nologin \ %systemd_postun kubelet kube-proxy %changelog +* Thu Aug 22 2024 zhaoxiaohu - 1.20.2-22 +- Type:bugfix +- CVE:NA +- SUG:NA +- DESC:fix kubelet panic when allocate resource for pod. #119561 + * Mon Apr 29 2024 liuxu - 1.20.2-21 - Type:bugfix - CVE:NA -- Gitee