[PATCHES][BUG 13170] Fix crash in pthreadpool thread after failure from pthread_create
Christof Schmitt
cs at samba.org
Tue Dec 5 19:24:21 UTC 2017
On Mon, Dec 04, 2017 at 01:33:20PM +0100, Andreas Schneider via samba-technical wrote:
> On Thursday, 30 November 2017 20:14:35 CET Christof Schmitt via samba-
> technical wrote:
> > On Thu, Nov 30, 2017 at 01:40:00PM +0100, Volker Lendecke wrote:
> > > On Wed, Nov 29, 2017 at 12:23:34PM -0700, Christof Schmitt via samba-
> technical wrote:
> > > > As discussed in a side-thread, my initial patch lets the error surface
> > > > back to the client. Here is an updated patch that avoids this issue. If
> > > > no thread can be created, but another still exists, let that handle the
> > > > request. If no thread exists, then fallback to processing the job
> > > > synchronously, as the system might be hitting a resource limit.
> > >
> > > From a review perspective, this looks good. For the refactoring one of
> > > course RB+. However, pthreadpool being such a fragile piece of code I
> > > would love to see unit tests for this. This will require explicit
> > > error injects with some DEVELOPER only code, but is it possible to get
> > > this? If you don't have time, I might try to code something up.
> >
> > Yes, see attached patch. I am not sure if that is the best solution, but
> > it is easy enough. A more complicate approach would be LD_PRELOAD to
> > overwrite the libc functions.
>
> Use mocking support from cmocka to mock pthreadpool_create_thread()?
>
>
> See https://lwn.net/Articles/558106/
Thanks for the pointer. Mocking the function completely does not work,
as the good path requires creating an actual thread, but the --wrap
option to override the call to pthread_create in the linker is useful.
cmocka looks useful and the pthreadpool tests could be converted to
cmocka. I decided against this approach for now to minimize the changes
and to also allow an easy backport to 4.6 where cmocka is not yet
available.
Christof
-------------- next part --------------
From 3addce5db0504bc9850fc29b8fa77a327ea20a70 Mon Sep 17 00:00:00 2001
From: Christof Schmitt <cs at samba.org>
Date: Tue, 28 Nov 2017 10:49:36 -0700
Subject: [PATCH 1/3] pthreadpool: Move creating of thread to new function
No functional change, but this simplifies error handling.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=13170
Signed-off-by: Christof Schmitt <cs at samba.org>
---
lib/pthreadpool/pthreadpool.c | 79 ++++++++++++++++++++++++-------------------
1 file changed, 45 insertions(+), 34 deletions(-)
diff --git a/lib/pthreadpool/pthreadpool.c b/lib/pthreadpool/pthreadpool.c
index 23885aa..4e1e5d4 100644
--- a/lib/pthreadpool/pthreadpool.c
+++ b/lib/pthreadpool/pthreadpool.c
@@ -521,14 +521,56 @@ static void *pthreadpool_server(void *arg)
}
}
-int pthreadpool_add_job(struct pthreadpool *pool, int job_id,
- void (*fn)(void *private_data), void *private_data)
+static int pthreadpool_create_thread(struct pthreadpool *pool)
{
pthread_attr_t thread_attr;
pthread_t thread_id;
int res;
sigset_t mask, omask;
+ /*
+ * Create a new worker thread. It should not receive any signals.
+ */
+
+ sigfillset(&mask);
+
+ res = pthread_attr_init(&thread_attr);
+ if (res != 0) {
+ return res;
+ }
+
+ res = pthread_attr_setdetachstate(
+ &thread_attr, PTHREAD_CREATE_DETACHED);
+ if (res != 0) {
+ pthread_attr_destroy(&thread_attr);
+ return res;
+ }
+
+ res = pthread_sigmask(SIG_BLOCK, &mask, &omask);
+ if (res != 0) {
+ pthread_attr_destroy(&thread_attr);
+ return res;
+ }
+
+ res = pthread_create(&thread_id, &thread_attr, pthreadpool_server,
+ (void *)pool);
+
+ assert(pthread_sigmask(SIG_SETMASK, &omask, NULL) == 0);
+
+ pthread_attr_destroy(&thread_attr);
+
+ if (res == 0) {
+ pool->num_threads += 1;
+ }
+
+ return res;
+}
+
+int pthreadpool_add_job(struct pthreadpool *pool, int job_id,
+ void (*fn)(void *private_data), void *private_data)
+{
+ int res;
+
res = pthread_mutex_lock(&pool->mutex);
if (res != 0) {
return res;
@@ -570,43 +612,12 @@ int pthreadpool_add_job(struct pthreadpool *pool, int job_id,
return 0;
}
- /*
- * Create a new worker thread. It should not receive any signals.
- */
-
- sigfillset(&mask);
-
- res = pthread_attr_init(&thread_attr);
- if (res != 0) {
- pthread_mutex_unlock(&pool->mutex);
- return res;
- }
-
- res = pthread_attr_setdetachstate(
- &thread_attr, PTHREAD_CREATE_DETACHED);
+ res = pthreadpool_create_thread(pool);
if (res != 0) {
- pthread_attr_destroy(&thread_attr);
- pthread_mutex_unlock(&pool->mutex);
- return res;
- }
-
- res = pthread_sigmask(SIG_BLOCK, &mask, &omask);
- if (res != 0) {
- pthread_attr_destroy(&thread_attr);
pthread_mutex_unlock(&pool->mutex);
return res;
}
- res = pthread_create(&thread_id, &thread_attr, pthreadpool_server,
- (void *)pool);
- if (res == 0) {
- pool->num_threads += 1;
- }
-
- assert(pthread_sigmask(SIG_SETMASK, &omask, NULL) == 0);
-
- pthread_attr_destroy(&thread_attr);
-
pthread_mutex_unlock(&pool->mutex);
return res;
}
--
1.8.3.1
From 927d6f69a1177d8ade815b0256370a140b79d695 Mon Sep 17 00:00:00 2001
From: Christof Schmitt <cs at samba.org>
Date: Tue, 28 Nov 2017 10:59:06 -0700
Subject: [PATCH 2/3] pthreadpool: Undo put_job when returning error
When an error is returned to the caller of pthreadpool_add_job, the job
should not be kept in the internal job array. Otherwise the caller might
free the data structure and a later worker thread would still reference
it.
When it is not possible to create a single worker thread, the system
might be out of resources or hitting a configured limit. In this case
fall back to calling the job function synchronously instead of raising
the error to the caller and possibly back to the SMB client.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=13170
Signed-off-by: Christof Schmitt <cs at samba.org>
---
lib/pthreadpool/pthreadpool.c | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/lib/pthreadpool/pthreadpool.c b/lib/pthreadpool/pthreadpool.c
index 4e1e5d4..309aba9 100644
--- a/lib/pthreadpool/pthreadpool.c
+++ b/lib/pthreadpool/pthreadpool.c
@@ -429,6 +429,11 @@ static bool pthreadpool_put_job(struct pthreadpool *p,
return true;
}
+static void pthreadpool_undo_put_job(struct pthreadpool *p)
+{
+ p->num_jobs -= 1;
+}
+
static void *pthreadpool_server(void *arg)
{
struct pthreadpool *pool = (struct pthreadpool *)arg;
@@ -599,6 +604,9 @@ int pthreadpool_add_job(struct pthreadpool *pool, int job_id,
* We have idle threads, wake one.
*/
res = pthread_cond_signal(&pool->condvar);
+ if (res != 0) {
+ pthreadpool_undo_put_job(pool);
+ }
pthread_mutex_unlock(&pool->mutex);
return res;
}
@@ -614,8 +622,24 @@ int pthreadpool_add_job(struct pthreadpool *pool, int job_id,
res = pthreadpool_create_thread(pool);
if (res != 0) {
- pthread_mutex_unlock(&pool->mutex);
- return res;
+ if (pool->num_threads == 0) {
+ /*
+ * No thread could be created to run job,
+ * fallback to sync call.
+ */
+ pthreadpool_undo_put_job(pool);
+ pthread_mutex_unlock(&pool->mutex);
+
+ fn(private_data);
+ return pool->signal_fn(job_id, fn, private_data,
+ pool->signal_fn_private_data);
+ }
+
+ /*
+ * At least one thread is still available, let
+ * that one run the queued job.
+ */
+ res = 0;
}
pthread_mutex_unlock(&pool->mutex);
--
1.8.3.1
From d3f79236f12dbf1a5daf59e7f2c4cc09a8d3977f Mon Sep 17 00:00:00 2001
From: Christof Schmitt <cs at samba.org>
Date: Tue, 5 Dec 2017 11:48:10 -0700
Subject: [PATCH 3/3] pthreadpool: Add test for handling pthread_create failure
This test uses the --wrap option of the linker to simulate errors from
pthread_create.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=13170
Signed-off-by: Christof Schmitt <cs at samba.org>
---
lib/pthreadpool/tests.c | 148 ++++++++++++++++++++++++++++++++++++++++++
lib/pthreadpool/wscript_build | 1 +
2 files changed, 149 insertions(+)
diff --git a/lib/pthreadpool/tests.c b/lib/pthreadpool/tests.c
index 1aab80c..d768a75 100644
--- a/lib/pthreadpool/tests.c
+++ b/lib/pthreadpool/tests.c
@@ -382,6 +382,148 @@ static int test_tevent_1(void)
return 0;
}
+static int inject_pthread_create_failure;
+
+int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine) (void *), void *arg);
+int __real_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine) (void *), void *arg);
+
+int __wrap_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine) (void *), void *arg)
+{
+ if (inject_pthread_create_failure != 0) {
+ return inject_pthread_create_failure;
+ }
+
+ return __real_pthread_create(thread, attr, start_routine, arg);
+}
+
+static void test_job_threadid(void *ptr)
+{
+ pthread_t *threadid = ptr;
+
+ *threadid = pthread_self();
+}
+
+static int test_create_do(struct tevent_context *ev,
+ struct pthreadpool_tevent *pool,
+ bool *in_main_thread)
+{
+ struct tevent_req *req;
+ pthread_t main_thread, worker_thread;
+ bool ok;
+ int ret;
+
+ main_thread = pthread_self();
+
+ req = pthreadpool_tevent_job_send(
+ ev, ev, pool, test_job_threadid, &worker_thread);
+ if (req == NULL) {
+ fprintf(stderr, "pthreadpool_tevent_job_send failed\n");
+ TALLOC_FREE(ev);
+ return ENOMEM;
+ }
+
+ ok = tevent_req_poll(req, ev);
+ if (!ok) {
+ ret = errno;
+ fprintf(stderr, "tevent_req_poll failed: %s\n",
+ strerror(ret));
+ TALLOC_FREE(ev);
+ return ret;
+ }
+
+
+ ret = pthreadpool_tevent_job_recv(req);
+ TALLOC_FREE(req);
+ if (ret != 0) {
+ fprintf(stderr, "tevent_req_recv failed: %s\n",
+ strerror(ret));
+ TALLOC_FREE(ev);
+ return ret;
+ }
+ *in_main_thread = pthread_equal(worker_thread, main_thread);
+
+ return 0;
+}
+
+static int test_create(void)
+{
+ struct tevent_context *ev;
+ struct pthreadpool_tevent *pool;
+ int ret;
+ bool in_main_thread;
+
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ ret = errno;
+ fprintf(stderr, "tevent_context_init failed: %s\n",
+ strerror(ret));
+ return ret;
+ }
+ ret = pthreadpool_tevent_init(ev, 0, &pool);
+ if (ret != 0) {
+ fprintf(stderr, "pthreadpool_tevent_init failed: %s\n",
+ strerror(ret));
+ TALLOC_FREE(ev);
+ return ret;
+ }
+
+ /*
+ * When pthreadpool cannot create the first worker thread,
+ * this job will run in the sync fallback in the main thread.
+ */
+ inject_pthread_create_failure = EAGAIN;
+ ret = test_create_do(ev, pool, &in_main_thread);
+ if (ret != 0) {
+ fprintf(stderr, "First job failed.\n");
+ goto out;
+ }
+ if (!in_main_thread) {
+ fprintf(stderr, "First job did not run in main thread.\n");
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * When a thread can be created, the job will run in the worker thread.
+ */
+ inject_pthread_create_failure = 0;
+ ret = test_create_do(ev, pool, &in_main_thread);
+ if (ret != 0) {
+ fprintf(stderr, "Second job failed.\n");
+ goto out;
+ }
+ if (in_main_thread) {
+ fprintf(stderr, "Second job did not run in worker thread.\n");
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Workerthread will still be active for a second; immediately
+ * running another job will also use the worker thread, even
+ * if a new thread cannot be created.
+ */
+ inject_pthread_create_failure = EAGAIN;
+ ret = test_create_do(ev, pool, &in_main_thread);
+ if (ret != 0) {
+ fprintf(stderr, "Third job failed.\n");
+ goto out;
+ }
+ if (in_main_thread) {
+ fprintf(stderr, "Third job did not run in worker thread.\n");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ TALLOC_FREE(pool);
+ TALLOC_FREE(ev);
+ return ret;
+}
+
int main(void)
{
int ret;
@@ -423,6 +565,12 @@ int main(void)
return 1;
}
+ ret = test_create();
+ if (ret != 0) {
+ fprintf(stderr, "test_create failed\n");
+ return 1;
+ }
+
printf("success\n");
return 0;
}
diff --git a/lib/pthreadpool/wscript_build b/lib/pthreadpool/wscript_build
index d530463..b60557b 100644
--- a/lib/pthreadpool/wscript_build
+++ b/lib/pthreadpool/wscript_build
@@ -19,5 +19,6 @@ else:
bld.SAMBA_BINARY('pthreadpooltest',
source='tests.c',
deps='PTHREADPOOL',
+ ldflags='-Wl,--wrap=pthread_create',
enabled=bld.env.WITH_PTHREADPOOL,
install=False)
--
1.8.3.1
More information about the samba-technical
mailing list