diff --git a/0010-bugfix-os-operator-and-proxy-fix-the-issue-that-some.patch b/0010-bugfix-os-operator-and-proxy-fix-the-issue-that-some.patch new file mode 100644 index 0000000000000000000000000000000000000000..1950652cd2b19de544ce1b4ef8f2a281b4bf80af --- /dev/null +++ b/0010-bugfix-os-operator-and-proxy-fix-the-issue-that-some.patch @@ -0,0 +1,272 @@ +From 8fce3e81822b0a5818adfb4ed5112030ed6b957e Mon Sep 17 00:00:00 2001 +From: liyuanr +Date: Tue, 10 Sep 2024 16:14:21 +0800 +Subject: [PATCH] bugfix (os-operator and proxy): fix the issue that some node + configurations are not delivered. + +When configuring node, the operator updates osinstance and then node. +The time of the two updates is affected by the response time of the APIServer. +The update time may be different. If the proxy completes the configuration immediately +after the osinstance is updated and the node label is updated, the configuration label +on the node is not deleted. As a result, the node is skipped during the next configuration. +Therefore, the osinstance and node label check is added to the proxy.The configuration is performed +only after the operator is updated. +In addition, the logs of the operator and proxy are optimized as follows: +1. Fixe an issue where error logs are printed when the values of starttime and endtime are the same. +2. Delete the logs used during development from the time.go file. +3. The log about the successful deletion of the serial label by the operator is added. +4. Add a description before obtaining the logs of the node to be checked. (whether to add +serial labels or upgrade/configuration) +5. Logs are added when a node is being upgraded or configuration is returned in serial mode. +6. Some debug logs are added to the operator. + +Signed-off-by: liyuanr +--- + .../proxy/src/controller/controller.rs | 26 ++++++++++++++++--- + KubeOS-Rust/proxy/src/controller/utils.rs | 6 ++--- + KubeOS-Rust/proxy/src/main.rs | 2 +- + cmd/operator/controllers/operation.go | 1 + + cmd/operator/controllers/os_controller.go | 12 ++++++--- + cmd/operator/controllers/times.go | 4 +-- + 6 files changed, 37 insertions(+), 14 deletions(-) + +diff --git a/KubeOS-Rust/proxy/src/controller/controller.rs b/KubeOS-Rust/proxy/src/controller/controller.rs +index 40405b2d..787a0e1c 100644 +--- a/KubeOS-Rust/proxy/src/controller/controller.rs ++++ b/KubeOS-Rust/proxy/src/controller/controller.rs +@@ -57,6 +57,7 @@ pub async fn reconcile( + return Ok(NO_REQUEUE) + } + }else { ++ debug!("osinstance correspending os name is None, not in upgrading or configuring"); + return Ok(REQUEUE_NORMAL) + } + +@@ -68,7 +69,7 @@ pub async fn reconcile( + .as_ref() + .ok_or(Error::MissingSubResource { value: String::from("node.status.node_info") })? + .os_image; +- debug!("os expected osversion is {},actual osversion is {}", os_cr.spec.osversion, node_os_image); ++ debug!("os expected osversion is {}, actual osversion is {}", os_cr.spec.osversion, node_os_image); + if check_version(&os_cr.spec.osversion, node_os_image) { + match ConfigType::SysConfig.check_config_version(&os, &osinstance) { + ConfigOperation::Reassign => { +@@ -94,10 +95,26 @@ pub async fn reconcile( + }, + _ => {}, + } ++ if node.labels().contains_key(LABEL_UPGRADING) || node.labels().contains_key(LABEL_CONFIGURING) { ++ if osinstance.spec.nodestatus == NODE_STATUS_IDLE { ++ info!( ++ "node has upgrade/config label , but osinstance.spec.nodestatus is idle. Operation:refesh node and wait reassgin" ++ ); ++ proxy_controller ++ .refresh_node( ++ node, ++ osinstance, ++ &get_config_version(os_cr.spec.upgradeconfigs.as_ref()), ++ ConfigType::UpgradeConfig, ++ ) ++ .await?; ++ return Ok(REQUEUE_NORMAL); ++ } + proxy_controller.set_config(&mut osinstance, ConfigType::SysConfig).await?; + proxy_controller + .refresh_node(node, osinstance, &get_config_version(os_cr.spec.sysconfigs.as_ref()), ConfigType::SysConfig) + .await?; ++ } + } else { + if os_cr.spec.opstype == NODE_STATUS_CONFIG { + return Err(Error::UpgradeBeforeConfig); +@@ -117,7 +134,7 @@ pub async fn reconcile( + if node.labels().contains_key(LABEL_UPGRADING) { + if osinstance.spec.nodestatus == NODE_STATUS_IDLE { + info!( +- "node has upgrade label ,but osinstance.spec.nodestatus is idle. Operation:refesh node and wait reassgin" ++ "node has upgrade label , but osinstance.spec.nodestatus is idle. Operation:refesh node and wait reassgin" + ); + proxy_controller + .refresh_node( +@@ -196,12 +213,13 @@ impl ProxyController { + let node_api: Api = Api::all(self.k8s_client.clone()); + let labels = node.labels_mut(); + if labels.contains_key(LABEL_UPGRADING) { ++ debug!("delete label {}", LABEL_UPGRADING); + labels.remove(LABEL_UPGRADING); + node = node_api.replace(&node.name(), &PostParams::default(), &node).await?; +- }else if labels.contains_key(LABEL_CONFIGURING) { ++ }else if labels.contains_key(LABEL_CONFIGURING){ ++ debug!("delete label {}", LABEL_CONFIGURING); + labels.remove(LABEL_CONFIGURING); + node = node_api.replace(&node.name(), &PostParams::default(), &node).await?; +- + } + if let Some(node_spec) = &node.spec { + if let Some(node_unschedulable) = node_spec.unschedulable { +diff --git a/KubeOS-Rust/proxy/src/controller/utils.rs b/KubeOS-Rust/proxy/src/controller/utils.rs +index 148ca24d..7e7b41d9 100644 +--- a/KubeOS-Rust/proxy/src/controller/utils.rs ++++ b/KubeOS-Rust/proxy/src/controller/utils.rs +@@ -47,7 +47,7 @@ impl ConfigType { + let os_config_version = get_config_version(os.spec.upgradeconfigs.as_ref()); + let osi_config_version = get_config_version(osinstance.spec.upgradeconfigs.as_ref()); + debug!( +- "os upgradeconfig version is{},osinstance spec upragdeconfig version is{}", ++ "os upgradeconfig version is {}, osinstance spec upragdeconfig version is {}", + os_config_version, osi_config_version + ); + if !check_version(&os_config_version, &osi_config_version) { +@@ -61,7 +61,7 @@ impl ConfigType { + let os_config_version = get_config_version(os.spec.sysconfigs.as_ref()); + let osi_config_version = get_config_version(osinstance.spec.sysconfigs.as_ref()); + debug!( +- "os sysconfig version is{},osinstance spec sysconfig version is{}", ++ "os sysconfig version is {},osinstance spec sysconfig version is {}", + os_config_version, osi_config_version + ); + if !check_version(&os_config_version, &osi_config_version) { +@@ -108,7 +108,7 @@ impl ConfigType { + }, + } + debug!( +- "osinstance soec config version is {},status config version is {}", ++ "osinstance spec config version is {}, status config version is {}", + spec_config_version, status_config_version + ); + if spec_config_version != status_config_version && osinstance.spec.nodestatus != NODE_STATUS_IDLE { +diff --git a/KubeOS-Rust/proxy/src/main.rs b/KubeOS-Rust/proxy/src/main.rs +index 5c122ba2..c15aebed 100644 +--- a/KubeOS-Rust/proxy/src/main.rs ++++ b/KubeOS-Rust/proxy/src/main.rs +@@ -27,7 +27,7 @@ use controller::{ + const PROXY_VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); + #[tokio::main] + async fn main() -> Result<()> { +- Builder::from_env(Env::default().default_filter_or("info")).target(Target::Stdout).init(); ++ Builder::from_env(Env::default().default_filter_or("proxy=info")).target(Target::Stdout).init(); + let client = Client::try_default().await?; + let os: Api = Api::all(client.clone()); + let controller_client = ControllerClient::new(client.clone()); +diff --git a/cmd/operator/controllers/operation.go b/cmd/operator/controllers/operation.go +index 5ac3d6d4..9f130479 100644 +--- a/cmd/operator/controllers/operation.go ++++ b/cmd/operator/controllers/operation.go +@@ -93,6 +93,7 @@ func deleteSerialLabel(ctx context.Context, r common.ReadStatusWriter, nodes []c + log.Error(err, "unable to delete serial label ", "node", node.Name+", skip this node") + errList = append(errList, err) + } ++ log.Info("delete node " + node.Name + " serial label " + values.LabelSerial + " successfully") + } + } + if len(errList) > 0 { +diff --git a/cmd/operator/controllers/os_controller.go b/cmd/operator/controllers/os_controller.go +index 9e2e8e49..f9e65b47 100644 +--- a/cmd/operator/controllers/os_controller.go ++++ b/cmd/operator/controllers/os_controller.go +@@ -75,7 +75,6 @@ func Reconcile(ctx context.Context, r common.ReadStatusWriter, req ctrl.Request) + " , the end time " + os.Spec.TimeWindow.EndTime) + return values.Requeue, nil + } +- + ops := os.Spec.OpsType + var opsInsatnce operation + switch ops { +@@ -106,6 +105,7 @@ func Reconcile(ctx context.Context, r common.ReadStatusWriter, req ctrl.Request) + if err != nil { + return values.RequeueNow, err + } ++ log.V(1).Info("get all nodes num is " + strconv.Itoa(len(allNodes))) + switch os.Spec.ExecutionMode { + case ExecutionModeParallel: + result, err := excuteParallelOperation(ctx, r, os, opsInsatnce, len(allNodes)) +@@ -197,6 +197,7 @@ func calNodeLimit(ctx context.Context, r common.ReadStatusWriter, + func assignOperation(ctx context.Context, r common.ReadStatusWriter, os upgradev1.OS, limit int, + opsInstance operation, requirements []labels.Requirement) (int, error) { + if limit == 0 { ++ log.V(1).Info("limit is 0 , do not need to assign operation") + return 0, nil + } + nodes, err := getNodes(ctx, r, limit+1, requirements...) // one more to see if all nodes updated +@@ -283,6 +284,7 @@ func setTimeInterval(timeInterval int) ctrl.Result { + + func excuteParallelOperation(ctx context.Context, r common.ReadStatusWriter, os upgradev1.OS, + opsInsatnce operation, nodeNum int) (ctrl.Result, error) { ++ log.V(1).Info("start parallel operation") + opsLabel := opsInsatnce.getOpsLabel() + opsLabel.op = selection.Exists + opsNodesReq, err := newopsNodesRequirement(os.Spec.NodeSelector, +@@ -294,6 +296,7 @@ func excuteParallelOperation(ctx context.Context, r common.ReadStatusWriter, os + if err != nil { + return values.RequeueNow, nil + } ++ log.V(1).Info("get limit is " + strconv.Itoa(limit)) + opsLabel.op = selection.DoesNotExist + noOpsNodesReq, err := newopsNodesRequirement(os.Spec.NodeSelector, + selection.Equals, opsLabel).createNodeRequirement(ctx, r) +@@ -308,6 +311,7 @@ func excuteParallelOperation(ctx context.Context, r common.ReadStatusWriter, os + + func excuteSerialOperation(ctx context.Context, r common.ReadStatusWriter, os upgradev1.OS, + opsInsatnce operation, nodeNum int) (ctrl.Result, error) { ++ log.V(1).Info("start serial operation") + opsLabel := opsInsatnce.getOpsLabel() + opsLabel.op = selection.Exists + opsNodesReq, err := newopsNodesRequirement(os.Spec.NodeSelector, +@@ -320,6 +324,7 @@ func excuteSerialOperation(ctx context.Context, r common.ReadStatusWriter, os up + return values.RequeueNow, nil + } + if len(opsNodeNum) > 0 { ++ log.V(1).Info("a node is being upgraded or configured. Wait until the node upgrade or configuration is complete.") + return values.Requeue, nil + } + +@@ -332,7 +337,7 @@ func excuteSerialOperation(ctx context.Context, r common.ReadStatusWriter, os up + if err != nil { + return values.RequeueNow, nil + } +- ++ log.V(1).Info("get the number of nodes which need to be added serial label num is " + strconv.Itoa(serialNodeLimit)) + noSerialNodesRequirement, err := newSerialNodesRequirement(os.Spec.NodeSelector, + selection.Equals, selection.DoesNotExist).createNodeRequirement(ctx, r) + if err != nil { +@@ -342,10 +347,12 @@ func excuteSerialOperation(ctx context.Context, r common.ReadStatusWriter, os up + serialOpsInstance := serialOps{ + label: opsInsatnce.getOpsLabel(), + } ++ log.V(1).Info("start add serial label to nodes") + if _, err := assignOperation(ctx, r, os, serialNodeLimit, serialOpsInstance, noSerialNodesRequirement); err != nil { + return values.RequeueNow, nil + } + ++ log.V(1).Info("start check nodes needed to be upgrade/configure or not") + serialLimit := 1 // 1 is the number of operation nodes when excution mode in serial + count, err := assignOperation(ctx, r, os, serialLimit, opsInsatnce, serialNodesRequirement) + if err != nil { +@@ -355,5 +362,4 @@ func excuteSerialOperation(ctx context.Context, r common.ReadStatusWriter, os up + return values.Requeue, nil + } + return setTimeInterval(os.Spec.TimeInterval), nil +- + } +diff --git a/cmd/operator/controllers/times.go b/cmd/operator/controllers/times.go +index 3a72cce9..f651c0e4 100644 +--- a/cmd/operator/controllers/times.go ++++ b/cmd/operator/controllers/times.go +@@ -62,14 +62,12 @@ func isWithinTimeWindow(start, end string) (bool, error) { + } + if endTime.Before(startTime) { + if layoutStart == DATE_TIME { +- return false, fmt.Errorf("invalid TimeWindow: Start %s Time is after end time %s", ++ return false, fmt.Errorf("invalid TimeWindow: start time %s is after end time %s", + startTime.Format(layoutStart), endTime.Format(layoutEnd)) + } + endTime = endTime.Add(oneDayTime) +- fmt.Printf("endtime time add 24 hour is %s\n", endTime.Format(layoutStart)) + if now.Before(startTime) { + now = now.Add(oneDayTime) +- fmt.Printf("now time add 24 hour is %s\n", now.Format(layoutStart)) + } + + } +-- +2.33.0.windows.2 + diff --git a/KubeOS.spec b/KubeOS.spec index 7dfcb473e2c431319c034f67c9b75c3baf0ec53a..3b1b21aa4e2f9f04239df58f7348422eaf4b21c3 100644 --- a/KubeOS.spec +++ b/KubeOS.spec @@ -2,7 +2,7 @@ Name: KubeOS Version: 1.0.6 -Release: 3 +Release: 4 Summary: O&M platform used to update the whole OS as an entirety License: Mulan PSL v2 Source0: https://gitee.com/openeuler/KubeOS/repository/archive/v%{version}.tar.gz @@ -15,6 +15,7 @@ Patch6: 0006-operator-delete-unnecessary-fmt-and-add-printing-for.patch Patch7: 0007-feat-os-operator-support-setting-TimeWindow-and-Time.patch Patch8: 0008-feat-os-proxy-add-ExcutionMode-to-os.patch Patch9: 0009-bugfix-fix-the-problem-that-proxy-will-get-all-os-fo.patch +Patch10: 0010-bugfix-os-operator-and-proxy-fix-the-issue-that-some.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: make rust cargo openssl-devel @@ -126,7 +127,13 @@ install -p -m 0600 ./files/os-release %{buildroot}/opt/kubeOS/files rm -rfv %{buildroot} %changelog -* Tue Jun 11 2024 Yuhang Wei - 1.0.6-3 +* Tue Sep 10 2024 liyuanrong - 1.0.6-4 +- Type:requirement +- CVE:NA +- SUG:restart +- DESC:fix the issue that some node configurations are not delivered + +* Wed Aug 21 2024 liyuanrong - 1.0.6-3 - Type:requirement - CVE:NA - SUG:restart