[PATCH] Fix pthreadpool fork behaviour

Volker Lendecke Volker.Lendecke at SerNet.DE
Wed Aug 30 14:08:18 UTC 2017


Hi!

At a customer site we've had tons of smbds stuck in

#0  0x00007fb9266bc579 in pthread_cond_destroy@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
#1  0x00007fb9217bd16a in pthreadpool_free (pool=0x7fb9285b0590) at ../source3/lib/pthreadpool/pthreadpool.c:246
#2  0x00007fb9217bd6a1 in pthreadpool_free (pool=<optimized out>) at ../source3/lib/pthreadpool/pthreadpool.c:301
#3  pthreadpool_destroy (pool=<optimized out>) at ../source3/lib/pthreadpool/pthreadpool.c:287
#4  0x00007fb9217bde6c in pthreadpool_tevent_destructor (pool=pool at entry=0x7fb9285aeeb0) at ../source3/lib/pthreadpool/pthreadpool_tevent.c:82
#5  0x00007fb9256c20e0 in _tc_free_internal (location=0x7fb9217c1c28 "../source3/lib/messages_dgm.c:1285", tc=0x7fb9285aee50) at ../lib/talloc/talloc.c:1078
#6  _tc_free_children_internal (location=0x7fb9217c1c28 "../source3/lib/messages_dgm.c:1285", ptr=0x7fb9285b0460, tc=0x7fb9285b0400) at ../lib/talloc/talloc.c:1593
#7  _tc_free_internal (tc=0x7fb9285b0400, location=0x7fb9217c1c28 "../source3/lib/messages_dgm.c:1285") at ../lib/talloc/talloc.c:1104
#8  0x00007fb9217bfbac in messaging_dgm_destroy () at ../source3/lib/messages_dgm.c:1285
#9  0x00007fb9217c086d in msg_dgm_ref_destructor (r=r at entry=0x7fb92867e320) at ../source3/lib/messages_dgm_ref.c:160
#10 0x00007fb9256c1ef3 in _tc_free_internal (tc=0x7fb92867e2c0, location=0x7fb923d92bfe "../source3/lib/messages.c:393") at ../lib/talloc/talloc.c:1078
#11 0x00007fb923d6f79e in messaging_reinit (msg_ctx=msg_ctx at entry=0x7fb9285b0390) at ../source3/lib/messages.c:393
#12 0x00007fb923d6305b in reinit_after_fork (msg_ctx=msg_ctx at entry=0x7fb9285b0390, ev_ctx=ev_ctx at entry=0x7fb9285af5f0, parent_longlived=parent_longlived at entry=true, comment=comment at entry=0x0) at ../source3/lib/util.c:477

which led to the attached patchset. The new test works fine on FreeBSD
11 too.

Comments?

Thanks, Volker

P.S: I think this isn't the whole story yet, but this patchset I
believe improves the situation a bit.

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From 36b368dec1f08f53b6ae4d3a84a18cc8982d3d1a Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 28 Aug 2017 16:38:19 +0200
Subject: [PATCH 1/2] pthreadpool: Fix fork behaviour

glibc's pthread_cond_wait(&c, &m) increments m.__data.__nusers, making
pthread_mutex_destroy return EBUSY. Thus we can't allow any thread waiting for
a job across a fork. Also, the state of the condvar itself is unclear across a
fork. Right now to me it looks like an initialized but unused condvar can be
used in the child. Busy worker threads don't cause any trouble here, they don't
hold mutexes or condvars. Also, they can't reach the condvar because _prepare
holds all mutexes.

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/pthreadpool/pthreadpool.c | 67 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/lib/pthreadpool/pthreadpool.c b/lib/pthreadpool/pthreadpool.c
index f97cdcc6c15..ca919763708 100644
--- a/lib/pthreadpool/pthreadpool.c
+++ b/lib/pthreadpool/pthreadpool.c
@@ -89,6 +89,13 @@ struct pthreadpool {
 	 * Number of idle threads
 	 */
 	int num_idle;
+
+	/*
+	 * Condition variable indicating that we should quickly go
+	 * away making away for fork() without anybody waiting on
+	 * pool->condvar.
+	 */
+	pthread_cond_t *prefork_cond;
 };
 
 static pthread_mutex_t pthreadpools_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -148,6 +155,7 @@ int pthreadpool_init(unsigned max_threads, struct pthreadpool **presult,
 	pool->num_threads = 0;
 	pool->max_threads = max_threads;
 	pool->num_idle = 0;
+	pool->prefork_cond = NULL;
 
 	ret = pthread_mutex_lock(&pthreadpools_mutex);
 	if (ret != 0) {
@@ -169,6 +177,47 @@ int pthreadpool_init(unsigned max_threads, struct pthreadpool **presult,
 	return 0;
 }
 
+static void pthreadpool_prepare_pool(struct pthreadpool *pool)
+{
+	pthread_cond_t prefork_cond = PTHREAD_COND_INITIALIZER;
+	int ret;
+
+	ret = pthread_mutex_lock(&pool->mutex);
+	assert(ret == 0);
+
+	while (pool->num_idle != 0) {
+		/*
+		 * Exit all idle threads, which are all blocked in
+		 * pool->condvar. In the child we can destroy the
+		 * pool, which would result in undefined behaviour in
+		 * the pthread_cond_destroy(pool->condvar). glibc just
+		 * blocks here.
+		 */
+		pool->prefork_cond = &prefork_cond;
+
+		ret = pthread_cond_signal(&pool->condvar);
+		assert(ret == 0);
+
+		ret = pthread_cond_wait(&prefork_cond, &pool->mutex);
+		assert(ret == 0);
+
+		pool->prefork_cond = NULL;
+	}
+
+	ret = pthread_cond_destroy(&prefork_cond);
+	assert(ret == 0);
+
+	/*
+	 * Probably it's well-defined somewhere: What happens to
+	 * condvars after a fork? The rationale of pthread_atfork only
+	 * writes about mutexes. So better be safe than sorry and
+	 * destroy/reinit pool->condvar across a fork.
+	 */
+
+	ret = pthread_cond_destroy(&pool->condvar);
+	assert(ret == 0);
+}
+
 static void pthreadpool_prepare(void)
 {
 	int ret;
@@ -180,8 +229,7 @@ static void pthreadpool_prepare(void)
 	pool = pthreadpools;
 
 	while (pool != NULL) {
-		ret = pthread_mutex_lock(&pool->mutex);
-		assert(ret == 0);
+		pthreadpool_prepare_pool(pool);
 		pool = pool->next;
 	}
 }
@@ -194,6 +242,8 @@ static void pthreadpool_parent(void)
 	for (pool = DLIST_TAIL(pthreadpools);
 	     pool != NULL;
 	     pool = DLIST_PREV(pool)) {
+		ret = pthread_cond_init(&pool->condvar, NULL);
+		assert(ret == 0);
 		ret = pthread_mutex_unlock(&pool->mutex);
 		assert(ret == 0);
 	}
@@ -216,6 +266,8 @@ static void pthreadpool_child(void)
 		pool->head = 0;
 		pool->num_jobs = 0;
 
+		ret = pthread_cond_init(&pool->condvar, NULL);
+		assert(ret == 0);
 		ret = pthread_mutex_unlock(&pool->mutex);
 		assert(ret == 0);
 	}
@@ -406,6 +458,17 @@ static void *pthreadpool_server(void *arg)
 				&pool->condvar, &pool->mutex, &ts);
 			pool->num_idle -= 1;
 
+			if (pool->prefork_cond != NULL) {
+				/*
+				 * Me must allow fork() to continue
+				 * without anybody waiting on
+				 * &pool->condvar.
+				 */
+				pthread_cond_signal(pool->prefork_cond);
+				pthreadpool_server_exit(pool);
+				return NULL;
+			}
+
 			if (res == ETIMEDOUT) {
 
 				if (pool->num_jobs == 0) {
-- 
2.11.0


From 0a9b890ac9f11bae92b4736a27503bae8eec9978 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 29 Aug 2017 21:57:54 +0200
Subject: [PATCH 2/2] pthreadpool: Test fork with an active thread

Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/pthreadpool/tests.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/lib/pthreadpool/tests.c b/lib/pthreadpool/tests.c
index c4d2e6a1382..999118286eb 100644
--- a/lib/pthreadpool/tests.c
+++ b/lib/pthreadpool/tests.c
@@ -7,6 +7,7 @@
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <signal.h>
 #include "pthreadpool_pipe.h"
 #include "pthreadpool_tevent.h"
 
@@ -192,6 +193,113 @@ static int test_fork(void)
 	return 0;
 }
 
+static void busyfork_job(void *private_data)
+{
+	return;
+}
+
+static int test_busyfork(void)
+{
+	struct pthreadpool_pipe *p;
+	int fds[2];
+	struct pollfd pfd;
+	pid_t child, waitret;
+	int ret, jobnum, wstatus;
+
+	ret = pipe(fds);
+	if (ret == -1) {
+		perror("pipe failed");
+		return -1;
+	}
+
+	ret = pthreadpool_pipe_init(1, &p);
+	if (ret != 0) {
+		fprintf(stderr, "pthreadpool_pipe_init failed: %s\n",
+			strerror(ret));
+		return -1;
+	}
+
+	ret = pthreadpool_pipe_add_job(p, 1, busyfork_job, NULL);
+	if (ret != 0) {
+		fprintf(stderr, "pthreadpool_add_job failed: %s\n",
+			strerror(ret));
+		return -1;
+	}
+
+	ret = pthreadpool_pipe_finished_jobs(p, &jobnum, 1);
+	if (ret != 1) {
+		fprintf(stderr, "pthreadpool_pipe_finished_jobs failed\n");
+		return -1;
+	}
+
+	poll(NULL, 0, 200);
+
+	child = fork();
+	if (child < 0) {
+		perror("fork failed");
+		return -1;
+	}
+
+	if (child == 0) {
+		ret = pthreadpool_pipe_destroy(p);
+		if (ret != 0) {
+			fprintf(stderr, "pthreadpool_pipe_destroy failed: "
+				"%s\n", strerror(ret));
+			exit(1);
+		}
+		exit(0);
+	}
+
+	ret = close(fds[1]);
+	if (ret == -1) {
+		perror("close failed");
+		return -1;
+	}
+
+	pfd = (struct pollfd) { .fd = fds[0], .events = POLLIN };
+
+	ret = poll(&pfd, 1, 5000);
+	if (ret == -1) {
+		perror("poll failed");
+		return -1;
+	}
+	if (ret == 0) {
+		fprintf(stderr, "Child did not exit for 5 seconds\n");
+		/*
+		 * The child might hang forever in
+		 * pthread_cond_destroy for example. Be kind to the
+		 * system and kill it.
+		 */
+		kill(child, SIGTERM);
+		return -1;
+	}
+	if (ret != 1) {
+		fprintf(stderr, "poll returned %d -- huh??\n", ret);
+		return -1;
+	}
+
+	poll(NULL, 0, 200);
+
+	waitret = waitpid(child, &wstatus, WNOHANG);
+	if (waitret != child) {
+		fprintf(stderr, "waitpid returned %d\n", (int)waitret);
+		return -1;
+	}
+
+	if (!WIFEXITED(wstatus)) {
+		fprintf(stderr, "child did not properly exit\n");
+		return -1;
+	}
+
+	ret = WEXITSTATUS(wstatus);
+	if (ret != 0) {
+		fprintf(stderr, "child returned %d\n", ret);
+		return -1;
+	}
+
+	return 0;
+}
+
 static void test_tevent_wait(void *private_data)
 {
 	int *timeout = private_data;
@@ -301,6 +409,12 @@ int main(void)
 		return 1;
 	}
 
+	ret = test_busyfork();
+	if (ret != 0) {
+		fprintf(stderr, "test_busyfork failed\n");
+		return 1;
+	}
+
 	printf("success\n");
 	return 0;
 }
-- 
2.11.0



More information about the samba-technical mailing list