diff --git a/backport-Feature-lrmd-Perform-the-TLS-handshake-asynchronousl.patch b/backport-Feature-lrmd-Perform-the-TLS-handshake-asynchronousl.patch new file mode 100644 index 0000000000000000000000000000000000000000..f81e05e9b8464683d9acb5e49f353159dee06beb --- /dev/null +++ b/backport-Feature-lrmd-Perform-the-TLS-handshake-asynchronousl.patch @@ -0,0 +1,145 @@ +From d9c9b3af781be0ba2bc40c177d60fc3cc7ec1459 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Aug 2024 13:29:35 -0400 +Subject: [PATCH] Feature: lrmd: Perform the TLS handshake asynchronously. + +It can take some time for the gnutls handshake to complete, during which +time the cluster is stuck waiting. Instead, immediately attempt the +handshake. If that fails, then start a mainloop source that will +repeatedly attempt the handshake and report results when it finishes. + +Fixes T824 +--- + lib/lrmd/lrmd_client.c | 81 ++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 74 insertions(+), 7 deletions(-) + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index cbba4f6b14..dac316fbb7 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -100,6 +100,7 @@ typedef struct lrmd_private_s { + int expected_late_replies; + GList *pending_notify; + crm_trigger_t *process_notify; ++ crm_trigger_t *handshake_trigger; + #endif + + lrmd_event_callback callback; +@@ -626,6 +627,10 @@ lrmd_tls_connection_destroy(gpointer userdata) + g_list_free_full(native->pending_notify, lrmd_free_xml); + native->pending_notify = NULL; + } ++ if (native->handshake_trigger != NULL) { ++ mainloop_destroy_trigger(native->handshake_trigger); ++ native->handshake_trigger = NULL; ++ } + + free(native->remote->buffer); + free(native->remote->start_state); +@@ -1499,12 +1504,55 @@ add_tls_to_mainloop(lrmd_t *lrmd, bool do_handshake) + return rc; + } + ++struct handshake_data_s { ++ lrmd_t *lrmd; ++ time_t start_time; ++ int timeout_sec; ++}; ++ ++static gboolean ++try_handshake_cb(gpointer user_data) ++{ ++ struct handshake_data_s *hs = user_data; ++ lrmd_t *lrmd = hs->lrmd; ++ lrmd_private_t *native = lrmd->lrmd_private; ++ pcmk__remote_t *remote = native->remote; ++ ++ int rc = pcmk_rc_ok; ++ int tls_rc = GNUTLS_E_SUCCESS; ++ ++ if (time(NULL) >= hs->start_time + hs->timeout_sec) { ++ rc = ETIME; ++ ++ tls_handshake_failed(lrmd, GNUTLS_E_TIMEDOUT, rc); ++ free(hs); ++ return 0; ++ } ++ ++ rc = pcmk__tls_client_try_handshake(remote, &tls_rc); ++ ++ if (rc == pcmk_rc_ok) { ++ tls_handshake_succeeded(lrmd); ++ free(hs); ++ return 0; ++ } else if (rc == EAGAIN) { ++ mainloop_set_trigger(native->handshake_trigger); ++ return 1; ++ } else { ++ rc = EKEYREJECTED; ++ tls_handshake_failed(lrmd, tls_rc, rc); ++ free(hs); ++ return 0; ++ } ++} ++ + static void + lrmd_tcp_connect_cb(void *userdata, int rc, int sock) + { + lrmd_t *lrmd = userdata; + lrmd_private_t *native = lrmd->lrmd_private; + gnutls_datum_t psk_key = { NULL, 0 }; ++ int tls_rc = GNUTLS_E_SUCCESS; + + native->async_timer = 0; + +@@ -1517,9 +1565,7 @@ lrmd_tcp_connect_cb(void *userdata, int rc, int sock) + return; + } + +- /* The TCP connection was successful, so establish the TLS connection. +- * @TODO make this async to avoid blocking code in client +- */ ++ /* The TCP connection was successful, so establish the TLS connection. */ + + native->sock = sock; + +@@ -1546,11 +1592,32 @@ lrmd_tcp_connect_cb(void *userdata, int rc, int sock) + return; + } + +- if (tls_client_handshake(lrmd) != pcmk_rc_ok) { +- return; +- } ++ /* If the TLS handshake immediately succeeds or fails, we can handle that ++ * now without having to deal with mainloops and retries. Otherwise, add a ++ * trigger to keep trying until we get a result (or it times out). ++ */ ++ rc = pcmk__tls_client_try_handshake(native->remote, &tls_rc); ++ if (rc == EAGAIN) { ++ struct handshake_data_s *hs = NULL; + +- tls_handshake_succeeded(lrmd); ++ if (native->handshake_trigger != NULL) { ++ return; ++ } ++ ++ hs = pcmk__assert_alloc(1, sizeof(struct handshake_data_s)); ++ hs->lrmd = lrmd; ++ hs->start_time = time(NULL); ++ hs->timeout_sec = TLS_HANDSHAKE_TIMEOUT; ++ ++ native->handshake_trigger = mainloop_add_trigger(G_PRIORITY_LOW, try_handshake_cb, hs); ++ mainloop_set_trigger(native->handshake_trigger); ++ ++ } else if (rc == pcmk_rc_ok) { ++ tls_handshake_succeeded(lrmd); ++ ++ } else { ++ tls_handshake_failed(lrmd, tls_rc, rc); ++ } + } + + static int +-- +2.33.1.windows.1 + diff --git a/backport-Low-lrmd-Report-connection-failures-in-tls_handshake.patch b/backport-Low-lrmd-Report-connection-failures-in-tls_handshake.patch new file mode 100644 index 0000000000000000000000000000000000000000..ca5815c0b6e5a0f02b6bbb94821e47a59a0e5142 --- /dev/null +++ b/backport-Low-lrmd-Report-connection-failures-in-tls_handshake.patch @@ -0,0 +1,48 @@ +From 5d97a82227e19f7f567ab3a16264dc0162cd0cf7 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 14 Aug 2024 10:01:51 -0400 +Subject: [PATCH] Low: lrmd: Report connection failures in + tls_handshake_failed. + +This means we can also get rid of a couple calls to +report_async_connection_result that are no longer necessary because this +patch would cause duplicate calls. +--- + lib/lrmd/lrmd_client.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index ee0daae9b4..cbba4f6b14 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -1399,6 +1399,8 @@ tls_handshake_failed(lrmd_t *lrmd, int tls_rc, int rc) + "Pacemaker Remote server %s:%d failed: %s", + native->server, native->port, + (rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc)); ++ report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); ++ + gnutls_deinit(*native->remote->tls_session); + gnutls_free(native->remote->tls_session); + native->remote->tls_session = NULL; +@@ -1545,7 +1547,6 @@ lrmd_tcp_connect_cb(void *userdata, int rc, int sock) + } + + if (tls_client_handshake(lrmd) != pcmk_rc_ok) { +- report_async_connection_result(lrmd, -EKEYREJECTED); + return; + } + +@@ -1676,10 +1677,6 @@ lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) + #ifdef HAVE_GNUTLS_GNUTLS_H + case pcmk__client_tls: + rc = lrmd_tls_connect_async(lrmd, timeout); +- if (rc) { +- /* connection failed, report rc now */ +- report_async_connection_result(lrmd, rc); +- } + break; + #endif + default: +-- +2.33.1.windows.1 + diff --git a/backport-Refactor-lrmd-Move-TLS-connection-success-failure-in.patch b/backport-Refactor-lrmd-Move-TLS-connection-success-failure-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..9457bec959217ffff04db5dde0b3e3c08772ce59 --- /dev/null +++ b/backport-Refactor-lrmd-Move-TLS-connection-success-failure-in.patch @@ -0,0 +1,92 @@ +From 15894cd13edd0612e6213d3aaca2de07e06cc851 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Thu, 8 Aug 2024 13:26:40 -0400 +Subject: [PATCH] Refactor: lrmd: Move TLS connection success/failure into + functions. + +--- + lib/lrmd/lrmd_client.c | 44 ++++++++++++++++++++++++++++++------------ + 1 file changed, 32 insertions(+), 12 deletions(-) + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 74292fb8fe..ee0daae9b4 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -67,8 +67,11 @@ gnutls_psk_client_credentials_t psk_cred_s; + static void lrmd_tls_disconnect(lrmd_t * lrmd); + static int global_remote_msg_id = 0; + static void lrmd_tls_connection_destroy(gpointer userdata); ++static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_handshake); + #endif + ++static void report_async_connection_result(lrmd_t * lrmd, int rc); ++ + typedef struct lrmd_private_s { + uint64_t type; + char *token; +@@ -1386,6 +1389,32 @@ lrmd_gnutls_global_init(void) + } + gnutls_init = 1; + } ++ ++static void ++tls_handshake_failed(lrmd_t *lrmd, int tls_rc, int rc) ++{ ++ lrmd_private_t *native = lrmd->lrmd_private; ++ ++ crm_warn("Disconnecting after TLS handshake with " ++ "Pacemaker Remote server %s:%d failed: %s", ++ native->server, native->port, ++ (rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc)); ++ gnutls_deinit(*native->remote->tls_session); ++ gnutls_free(native->remote->tls_session); ++ native->remote->tls_session = NULL; ++ lrmd_tls_connection_destroy(lrmd); ++} ++ ++static void ++tls_handshake_succeeded(lrmd_t *lrmd) ++{ ++ lrmd_private_t *native = lrmd->lrmd_private; ++ ++ crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", ++ native->server, native->port); ++ add_tls_to_mainloop(lrmd, true); ++ report_async_connection_result(lrmd, pcmk_rc2legacy(pcmk_rc_ok)); ++} + #endif + + static void +@@ -1420,15 +1449,9 @@ tls_client_handshake(lrmd_t *lrmd) + &tls_rc); + + if (rc != pcmk_rc_ok) { +- crm_warn("Disconnecting after TLS handshake with " +- "Pacemaker Remote server %s:%d failed: %s", +- native->server, native->port, +- (rc == EPROTO)? gnutls_strerror(tls_rc) : pcmk_rc_str(rc)); +- gnutls_deinit(*native->remote->tls_session); +- gnutls_free(native->remote->tls_session); +- native->remote->tls_session = NULL; +- lrmd_tls_connection_destroy(lrmd); ++ tls_handshake_failed(lrmd, tls_rc, rc); + } ++ + return rc; + } + +@@ -1526,10 +1549,7 @@ lrmd_tcp_connect_cb(void *userdata, int rc, int sock) + return; + } + +- crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", +- native->server, native->port); +- rc = add_tls_to_mainloop(lrmd, true); +- report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); ++ tls_handshake_succeeded(lrmd); + } + + static int +-- +2.33.1.windows.1 + diff --git a/pacemaker.spec b/pacemaker.spec index bbe602cd88e51ca72212390ef2555c11d558f61e..e7d6854e3f1cdd786829593635a7ba9e8a401f9d 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -17,7 +17,7 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.8 -%global specversion 6 +%global specversion 7 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit 3980678f0372f2c7c294c01f61d63f0b2cafaad1 @@ -163,7 +163,10 @@ Patch7: backport-Log-pacemaker-based-client-name-can-be-NULL.patch Patch8: backport-Refactor-libcib-drop-op_common.patch Patch9: backport-Refactor-libcrmcommon-Add-pcmk__tls_client_try_hands.patch Patch10: backport-Low-tools-handle-orphans-when-outputting-node-histor.patch - +Patch11: backport-Refactor-lrmd-Move-TLS-connection-success-failure-in.patch +Patch12: backport-Low-lrmd-Report-connection-failures-in-tls_handshake.patch +Patch13: backport-Feature-lrmd-Perform-the-TLS-handshake-asynchronousl.patch + Requires: resource-agents Requires: %{pkgname_pcmk_libs} = %{version}-%{release} Requires: %{name}-cluster-libs = %{version}-%{release} @@ -770,6 +773,11 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Thu Dec 19 2024 liupei - 2.1.8-7 +- Feature: lrmd: Perform the TLS handshake asynchronously. +- Low: lrmd: Report connection failures in tls_handshake_failed. +- Refactor: lrmd: Move TLS connection success/failure into functions. + * Thu Dec 12 2024 liupei - 2.1.8-6 - Refactor: libcrmcommon: Add pcmk__tls_client_try_handshake. - Low: tools: handle orphans when outputting node history in crm_mon