From d7d7430dc510123f6ac8b8b033bff59278ef8927 Mon Sep 17 00:00:00 2001 From: eulerstorage Date: Mon, 24 Feb 2020 10:00:46 +0800 Subject: [PATCH] fix memory leak of glusterfs --- ...ore-fix-memory-pool-management-races.patch | 450 ++++++++++++++++++ glusterfs.spec | 10 +- 2 files changed, 459 insertions(+), 1 deletion(-) create mode 100644 0000-core-fix-memory-pool-management-races.patch diff --git a/0000-core-fix-memory-pool-management-races.patch b/0000-core-fix-memory-pool-management-races.patch new file mode 100644 index 0000000..48dca4f --- /dev/null +++ b/0000-core-fix-memory-pool-management-races.patch @@ -0,0 +1,450 @@ +From fa6a9ff44433c064188f57aec90a64a1b627006a Mon Sep 17 00:00:00 2001 +From: Xavi Hernandez +Date: Fri, 7 Feb 2020 10:19:57 +0100 +Subject: [PATCH] core: fix memory pool management races + +Objects allocated from a per-thread memory pool keep a reference to it +to be able to return the object to the pool when not used anymore. The +object holding this reference can have a long life cycle that could +survive a glfs_fini() call. + +This means that it's unsafe to destroy memory pools from glfs_fini(). + +Another side effect of destroying memory pools from glfs_fini() is that +the TLS variable that points to one of those pools cannot be reset for +all alive threads. This means that any attempt to allocate memory from +those threads will access already free'd memory, which is very +dangerous. + +To fix these issues, mem_pools_fini() doesn't destroy pool lists +anymore. Only at process termination the pools are destroyed. + +Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 +Fixes: bz#1801684 +Signed-off-by: Xavi Hernandez + +diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c +index 63b6358..ae06f8b 100644 +--- a/libglusterfs/src/globals.c ++++ b/libglusterfs/src/globals.c +@@ -314,7 +314,18 @@ glusterfs_cleanup(void *ptr) + GF_FREE(thread_syncopctx.groups); + } + +- mem_pool_thread_destructor(); ++ mem_pool_thread_destructor(NULL); ++} ++ ++void ++gf_thread_needs_cleanup(void) ++{ ++ /* The value stored in free_key TLS is not really used for anything, but ++ * pthread implementation doesn't call the TLS destruction function unless ++ * it's != NULL. This function must be called whenever something is ++ * allocated for this thread so that glusterfs_cleanup() will be called ++ * and resources can be released. */ ++ (void)pthread_setspecific(free_key, (void *)1); + } + + static void +diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h +index e19a7cf..82cd662 100644 +--- a/libglusterfs/src/glusterfs/globals.h ++++ b/libglusterfs/src/glusterfs/globals.h +@@ -162,6 +162,9 @@ glusterfs_leaseid_exist(void); + int + glusterfs_globals_init(glusterfs_ctx_t *ctx); + ++void ++gf_thread_needs_cleanup(void); ++ + struct tvec_base * + glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx); + void +diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h +index e044175..90fb882 100644 +--- a/libglusterfs/src/glusterfs/mem-pool.h ++++ b/libglusterfs/src/glusterfs/mem-pool.h +@@ -244,24 +244,26 @@ typedef struct per_thread_pool { + } per_thread_pool_t; + + typedef struct per_thread_pool_list { +- /* +- * These first two members are protected by the global pool lock. When +- * a thread first tries to use any pool, we create one of these. We +- * link it into the global list using thr_list so the pool-sweeper +- * thread can find it, and use pthread_setspecific so this thread can +- * find it. When the per-thread destructor runs, we "poison" the pool +- * list to prevent further allocations. This also signals to the +- * pool-sweeper thread that the list should be detached and freed after +- * the next time it's swept. +- */ ++ /* thr_list is used to place the TLS pool_list into the active global list ++ * (pool_threads) or the inactive global list (pool_free_threads). It's ++ * protected by the global pool_lock. */ + struct list_head thr_list; +- unsigned int poison; ++ ++ /* This lock is used to update poison and the hot/cold lists of members ++ * of 'pools' array. */ ++ pthread_spinlock_t lock; ++ ++ /* This field is used to mark a pool_list as not being owned by any thread. ++ * This means that the sweeper thread won't be cleaning objects stored in ++ * its pools. mem_put() uses it to decide if the object being released is ++ * placed into its original pool_list or directly destroyed. */ ++ bool poison; ++ + /* + * There's really more than one pool, but the actual number is hidden + * in the implementation code so we just make it a single-element array + * here. + */ +- pthread_spinlock_t lock; + per_thread_pool_t pools[1]; + } per_thread_pool_list_t; + +@@ -306,7 +308,7 @@ void + mem_pool_destroy(struct mem_pool *pool); + + void +-mem_pool_thread_destructor(void); ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); + + void + gf_mem_acct_enable_set(void *ctx); +diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c +index d0f8a64..e08e56c 100644 +--- a/libglusterfs/src/mem-pool.c ++++ b/libglusterfs/src/mem-pool.c +@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL; + #define POOL_SWEEP_SECS 30 + + typedef struct { +- struct list_head death_row; + pooled_obj_hdr_t *cold_lists[N_COLD_LISTS]; + unsigned int n_cold_lists; + } sweep_state_t; +@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; + static unsigned int init_count = 0; + static pthread_t sweeper_tid; + +-gf_boolean_t ++static bool + collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list) + { + unsigned int i; + per_thread_pool_t *pt_pool; +- gf_boolean_t poisoned; + + (void)pthread_spin_lock(&pool_list->lock); + +- poisoned = pool_list->poison != 0; +- if (!poisoned) { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- if (pt_pool->cold_list) { +- if (state->n_cold_lists >= N_COLD_LISTS) { +- break; +- } +- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; ++ for (i = 0; i < NPOOLS; ++i) { ++ pt_pool = &pool_list->pools[i]; ++ if (pt_pool->cold_list) { ++ if (state->n_cold_lists >= N_COLD_LISTS) { ++ (void)pthread_spin_unlock(&pool_list->lock); ++ return true; + } +- pt_pool->cold_list = pt_pool->hot_list; +- pt_pool->hot_list = NULL; ++ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; + } ++ pt_pool->cold_list = pt_pool->hot_list; ++ pt_pool->hot_list = NULL; + } + + (void)pthread_spin_unlock(&pool_list->lock); + +- return poisoned; ++ return false; + } + +-void ++static void + free_obj_list(pooled_obj_hdr_t *victim) + { + pooled_obj_hdr_t *next; +@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim) + } + } + +-void * ++static void * + pool_sweeper(void *arg) + { + sweep_state_t state; + per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- per_thread_pool_t *pt_pool; +- unsigned int i; +- gf_boolean_t poisoned; ++ uint32_t i; ++ bool pending; + + /* + * This is all a bit inelegant, but the point is to avoid doing + * expensive things (like freeing thousands of objects) while holding a +- * global lock. Thus, we split each iteration into three passes, with ++ * global lock. Thus, we split each iteration into two passes, with + * only the first and fastest holding the lock. + */ + ++ pending = true; ++ + for (;;) { +- sleep(POOL_SWEEP_SECS); ++ /* If we know there's pending work to do (or it's the first run), we ++ * do collect garbage more often. */ ++ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS); ++ + (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); +- INIT_LIST_HEAD(&state.death_row); + state.n_cold_lists = 0; ++ pending = false; + + /* First pass: collect stuff that needs our attention. */ + (void)pthread_mutex_lock(&pool_lock); +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list) ++ list_for_each_entry(pool_list, &pool_threads, thr_list) + { +- (void)pthread_mutex_unlock(&pool_lock); +- poisoned = collect_garbage(&state, pool_list); +- (void)pthread_mutex_lock(&pool_lock); +- +- if (poisoned) { +- list_move(&pool_list->thr_list, &state.death_row); ++ if (collect_garbage(&state, pool_list)) { ++ pending = true; + } + } + (void)pthread_mutex_unlock(&pool_lock); + +- /* Second pass: free dead pools. */ +- (void)pthread_mutex_lock(&pool_free_lock); +- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- pt_pool = &pool_list->pools[i]; +- free_obj_list(pt_pool->cold_list); +- free_obj_list(pt_pool->hot_list); +- pt_pool->hot_list = pt_pool->cold_list = NULL; +- } +- list_del(&pool_list->thr_list); +- list_add(&pool_list->thr_list, &pool_free_threads); +- } +- (void)pthread_mutex_unlock(&pool_free_lock); +- +- /* Third pass: free cold objects from live pools. */ ++ /* Second pass: free cold objects from live pools. */ + for (i = 0; i < state.n_cold_lists; ++i) { + free_obj_list(state.cold_lists[i]); + } + (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + } ++ ++ return NULL; + } + + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { +- per_thread_pool_list_t *pool_list = thread_pool_list; +- +- /* The pool-sweeper thread will take it from here. +- * +- * We can change 'poison' here without taking locks because the change +- * itself doesn't interact with other parts of the code and a simple write +- * is already atomic from the point of view of the processor. +- * +- * This change can modify what mem_put() does, but both possibilities are +- * fine until the sweeper thread kicks in. The real synchronization must be +- * between mem_put() and the sweeper thread. */ ++ per_thread_pool_t *pt_pool; ++ uint32_t i; ++ ++ if (pool_list == NULL) { ++ pool_list = thread_pool_list; ++ } ++ ++ /* The current thread is terminating. None of the allocated objects will ++ * be used again. We can directly destroy them here instead of delaying ++ * it until the next sweeper loop. */ + if (pool_list != NULL) { +- pool_list->poison = 1; ++ /* Remove pool_list from the global list to avoid that sweeper ++ * could touch it. */ ++ pthread_mutex_lock(&pool_lock); ++ list_del(&pool_list->thr_list); ++ pthread_mutex_unlock(&pool_lock); ++ ++ /* We need to protect hot/cold changes from potential mem_put() calls ++ * that reference this pool_list. Once poison is set to true, we are ++ * sure that no one else will touch hot/cold lists. The only possible ++ * race is when at the same moment a mem_put() is adding a new item ++ * to the hot list. We protect from that by taking pool_list->lock. ++ * After that we don't need the lock to destroy the hot/cold lists. */ ++ pthread_spin_lock(&pool_list->lock); ++ pool_list->poison = true; ++ pthread_spin_unlock(&pool_list->lock); ++ ++ for (i = 0; i < NPOOLS; i++) { ++ pt_pool = &pool_list->pools[i]; ++ ++ free_obj_list(pt_pool->hot_list); ++ pt_pool->hot_list = NULL; ++ ++ free_obj_list(pt_pool->cold_list); ++ pt_pool->cold_list = NULL; ++ } ++ ++ pthread_mutex_lock(&pool_free_lock); ++ list_add(&pool_list->thr_list, &pool_free_threads); ++ pthread_mutex_unlock(&pool_free_lock); ++ + thread_pool_list = NULL; + } + } +@@ -528,6 +538,30 @@ mem_pools_preinit(void) + init_done = GF_MEMPOOL_INIT_EARLY; + } + ++static __attribute__((destructor)) void ++mem_pools_postfini(void) ++{ ++ per_thread_pool_list_t *pool_list, *next; ++ ++ /* This is part of a process shutdown (or dlclose()) which means that ++ * most probably all threads should be stopped. However this is not the ++ * case for gluster and there are even legitimate situations in which we ++ * could have some threads alive. What is sure is that none of those ++ * threads should be using anything from this library, so destroying ++ * everything here should be fine and safe. */ ++ ++ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) ++ { ++ mem_pool_thread_destructor(pool_list); ++ } ++ ++ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) ++ { ++ list_del(&pool_list->thr_list); ++ FREE(pool_list); ++ } ++} ++ + /* Call mem_pools_init() once threading has been configured completely. This + * prevent the pool_sweeper thread from getting killed once the main() thread + * exits during deamonizing. */ +@@ -560,10 +594,6 @@ mem_pools_fini(void) + */ + break; + case 1: { +- per_thread_pool_list_t *pool_list; +- per_thread_pool_list_t *next_pl; +- unsigned int i; +- + /* if mem_pools_init() was not called, sweeper_tid will be invalid + * and the functions will error out. That is not critical. In all + * other cases, the sweeper_tid will be valid and the thread gets +@@ -571,32 +601,11 @@ mem_pools_fini(void) + (void)pthread_cancel(sweeper_tid); + (void)pthread_join(sweeper_tid, NULL); + +- /* At this point all threads should have already terminated, so +- * it should be safe to destroy all pending per_thread_pool_list_t +- * structures that are stored for each thread. */ +- mem_pool_thread_destructor(); +- +- /* free all objects from all pools */ +- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, +- thr_list) +- { +- for (i = 0; i < NPOOLS; ++i) { +- free_obj_list(pool_list->pools[i].hot_list); +- free_obj_list(pool_list->pools[i].cold_list); +- pool_list->pools[i].hot_list = NULL; +- pool_list->pools[i].cold_list = NULL; +- } +- +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } +- +- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads, +- thr_list) +- { +- list_del(&pool_list->thr_list); +- FREE(pool_list); +- } ++ /* There could be threads still running in some cases, so we can't ++ * destroy pool_lists in use. We can also not destroy unused ++ * pool_lists because some allocated objects may still be pointing ++ * to them. */ ++ mem_pool_thread_destructor(NULL); + + init_done = GF_MEMPOOL_INIT_DESTROY; + /* Fall through. */ +@@ -617,7 +626,7 @@ mem_pools_fini(void) + { + } + void +-mem_pool_thread_destructor(void) ++mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) + { + } + +@@ -738,13 +747,21 @@ mem_get_pool_list(void) + } + } + ++ /* There's no need to take pool_list->lock, because this is already an ++ * atomic operation and we don't need to synchronize it with any change ++ * in hot/cold lists. */ ++ pool_list->poison = false; ++ + (void)pthread_mutex_lock(&pool_lock); +- pool_list->poison = 0; + list_add(&pool_list->thr_list, &pool_threads); + (void)pthread_mutex_unlock(&pool_lock); + + thread_pool_list = pool_list; + ++ /* Ensure that all memory objects associated to the new pool_list are ++ * destroyed when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ + return pool_list; + } + +diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c +index 2eb7b49..9d987f8 100644 +--- a/libglusterfs/src/syncop.c ++++ b/libglusterfs/src/syncop.c +@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups) + + /* set/reset the ngrps, this is where reset of groups is handled */ + opctx->ngrps = count; ++ ++ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) { ++ /* This is the first time we are storing groups into the TLS structu ++ * so we mark the current thread so that it will be properly cleaned ++ * up when the thread terminates. */ ++ gf_thread_needs_cleanup(); ++ } + opctx->valid |= SYNCOPCTX_GROUPS; + + out: +-- +1.8.3.1 + diff --git a/glusterfs.spec b/glusterfs.spec index 00742f4..3932597 100644 --- a/glusterfs.spec +++ b/glusterfs.spec @@ -3,13 +3,15 @@ Name: glusterfs Version: 7.0 -Release: 3 +Release: 4 License: GPLv2 and LGPLv3+ Summary: Aggregating distributed file system URL: http://docs.gluster.org/ Source0: https://download.gluster.org/pub/gluster/glusterfs/7/7.0/glusterfs-7.0.tar.gz Source7: glusterfsd.service +Patch0: 0000-core-fix-memory-pool-management-races.patch + BuildRequires: systemd bison flex gcc make libtool ncurses-devel readline-devel libattr-devel BuildRequires: libxml2-devel openssl-devel libaio-devel libacl-devel python3-devel git perl BuildRequires: libtirpc-devel rpcgen libuuid-devel sqlite-devel lvm2-devel firewalld @@ -457,6 +459,12 @@ exit 0 %{_mandir}/man8/*gluster*.8* %changelog +* Mon Feb 24 2020 renxudong - 7.0-4 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:fix glfs_new memory leak + * Wed Feb 12 2020 Ruijun Ge - 7.0-3 - Type:bugfix - ID:NA -- Gitee