[PATCH] for bug 11381

Volker Lendecke Volker.Lendecke at SerNet.DE
Wed Jul 8 12:14:56 UTC 2015


Hi!

While we are still discussing the proper way to fix bug
11381, let me propose the attached interim fix as an
emergency patch that restores pre-mutex tdb behaviour.

With best regards,

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From e2a2777d2b1013cab150c7f59f938e3fc32ebc20 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 6 Jul 2015 13:13:36 +0200
Subject: [PATCH 1/2] tdb: Fix bug 11381, deadlock

This fixes a deadlock in tdb that is a bad interaction between tdb_lockall
and tdb_traverse. This deadlock condition has been around even before
tdb mutexes, it's just that the kernel fcntl EDEADLK detection protected
us from this ABBA lock condition to become a real deadlock stalling
processes. With tdb mutexes, this deadlock protection is gone, so we do
lock dead.

This patch glosses over this particular ABBA condition, making tdb with
mutexes behave the same as tdb without mutexes. Admittedly this is no
real fix, but it works around a real user's problem.

Bug: https://bugzilla.samba.org/show_bug.cgi?id=11381
Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/common/traverse.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/lib/tdb/common/traverse.c b/lib/tdb/common/traverse.c
index 618670f..e18e3c3 100644
--- a/lib/tdb/common/traverse.c
+++ b/lib/tdb/common/traverse.c
@@ -245,13 +245,25 @@ _PUBLIC_ int tdb_traverse(struct tdb_context *tdb,
 		 tdb_traverse_func fn, void *private_data)
 {
 	struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
+	enum tdb_lock_flags lock_flags;
 	int ret;
 
 	if (tdb->read_only || tdb->traverse_read) {
 		return tdb_traverse_read(tdb, fn, private_data);
 	}
 
-	if (tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT)) {
+	lock_flags = TDB_LOCK_WAIT;
+
+	if (tdb->allrecord_lock.count != 0) {
+		/*
+		 * This avoids a deadlock between tdb_lockall() and
+		 * tdb_traverse(). See
+		 * https://bugzilla.samba.org/show_bug.cgi?id=11381
+		 */
+		lock_flags = TDB_LOCK_NOWAIT;
+	}
+
+	if (tdb_transaction_lock(tdb, F_WRLCK, lock_flags)) {
 		return -1;
 	}
 
-- 
1.9.1


From befec5ce283dda35cd606a065d2b61bee2f5023a Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 6 Jul 2015 10:49:47 +0200
Subject: [PATCH 2/2] tdb: Reproducer for Bug 11381

Bug: https://bugzilla.samba.org/show_bug.cgi?id=11381
Signed-off-by: Volker Lendecke <vl at samba.org>
---
 lib/tdb/test/run-allrecord-traverse-deadlock.c | 203 +++++++++++++++++++++++++
 lib/tdb/wscript                                |   1 +
 2 files changed, 204 insertions(+)
 create mode 100644 lib/tdb/test/run-allrecord-traverse-deadlock.c

diff --git a/lib/tdb/test/run-allrecord-traverse-deadlock.c b/lib/tdb/test/run-allrecord-traverse-deadlock.c
new file mode 100644
index 0000000..2c58206
--- /dev/null
+++ b/lib/tdb/test/run-allrecord-traverse-deadlock.c
@@ -0,0 +1,203 @@
+#include "../common/tdb_private.h"
+#include "../common/io.c"
+#include "../common/tdb.c"
+#include "../common/lock.c"
+#include "../common/freelist.c"
+#include "../common/traverse.c"
+#include "../common/transaction.c"
+#include "../common/error.c"
+#include "../common/open.c"
+#include "../common/check.c"
+#include "../common/hash.c"
+#include "../common/mutex.c"
+#include "tap-interface.h"
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdarg.h>
+#include "logging.h"
+
+static void do_allrecord_lock(const char *name, int tdb_flags, int up,
+			      int down)
+{
+	struct tdb_context *tdb;
+	int ret;
+	ssize_t nread, nwritten;
+	char c = 0;
+
+	tdb = tdb_open_ex(name, 3, tdb_flags,
+			  O_RDWR|O_CREAT, 0755, &taplogctx, NULL);
+	ok(tdb, "tdb_open_ex should succeed");
+
+	ret = tdb_lockall(tdb);
+	ok(ret == 0, "tdb_lockall should succeed");
+
+	nwritten = write(up, &c, sizeof(c));
+	ok(nwritten == sizeof(c), "write should succeed");
+
+	nread = read(down, &c, sizeof(c));
+	ok(nread == sizeof(c), "read should succeed");
+
+	ret = tdb_traverse(tdb, NULL, NULL);
+	ok(ret == -1, "do_allrecord_lock: traverse should fail");
+
+	nwritten = write(up, &c, sizeof(c));
+	ok(nwritten == sizeof(c), "write should succeed");
+
+	exit(0);
+}
+
+static void do_traverse(const char *name, int tdb_flags, int up, int down)
+{
+	struct tdb_context *tdb;
+	int ret;
+	ssize_t nread, nwritten;
+	char c = 0;
+
+	tdb = tdb_open_ex(name, 3, tdb_flags,
+			  O_RDWR|O_CREAT, 0755, &taplogctx, NULL);
+	ok(tdb, "tdb_open_ex should succeed");
+
+	ret = tdb_traverse(tdb, NULL, NULL);
+	ok(ret == 1, "do_traverse: tdb_traverse should return 1 record");
+
+	nwritten = write(up, &c, sizeof(c));
+	ok(nwritten == sizeof(c), "write should succeed");
+
+	nread = read(down, &c, sizeof(c));
+	ok(nread == sizeof(c), "read should succeed");
+
+	exit(0);
+}
+
+/*
+ * Process 1: get the allrecord_lock on a tdb.
+ * Process 2: start a traverse, this will stall waiting for the
+ *            first chainlock: That is taken by the allrecord_lock
+ * Process 1: start a traverse: This will get EDEADLK in trying to
+ *            get the TRANSACTION_LOCK. It will deadlock for mutexes,
+ *            which don't have built-in deadlock detection.
+ */
+
+static int do_tests(const char *name, int tdb_flags)
+{
+	struct tdb_context *tdb;
+	int ret;
+	pid_t traverse_child, allrecord_child;
+	int traverse_down[2];
+	int traverse_up[2];
+	int allrecord_down[2];
+	int allrecord_up[2];
+	char c;
+	ssize_t nread, nwritten;
+	TDB_DATA key, data;
+
+	key.dsize = strlen("hi");
+	key.dptr = discard_const_p(uint8_t, "hi");
+	data.dsize = strlen("world");
+	data.dptr = discard_const_p(uint8_t, "world");
+
+	tdb = tdb_open_ex(name, 3, tdb_flags,
+			  O_RDWR|O_CREAT, 0755, &taplogctx, NULL);
+	ok(tdb, "tdb_open_ex should succeed");
+
+	ret = tdb_store(tdb, key, data, TDB_INSERT);
+	ok(ret == 0, "tdb_store should succeed");
+
+	ret = pipe(traverse_down);
+	ok(ret == 0, "pipe should succeed");
+
+	ret = pipe(traverse_up);
+	ok(ret == 0, "pipe should succeed");
+
+	ret = pipe(allrecord_down);
+	ok(ret == 0, "pipe should succeed");
+
+	ret = pipe(allrecord_up);
+	ok(ret == 0, "pipe should succeed");
+
+	allrecord_child = fork();
+	ok(allrecord_child != -1, "fork should succeed");
+
+	if (allrecord_child == 0) {
+		tdb_close(tdb);
+		close(traverse_up[0]);
+		close(traverse_up[1]);
+		close(traverse_down[0]);
+		close(traverse_down[1]);
+		close(allrecord_up[0]);
+		close(allrecord_down[1]);
+		do_allrecord_lock(name, tdb_flags,
+				  allrecord_up[1], allrecord_down[0]);
+		exit(0);
+	}
+	close(allrecord_up[1]);
+	close(allrecord_down[0]);
+
+	nread = read(allrecord_up[0], &c, sizeof(c));
+	ok(nread == sizeof(c), "read should succeed");
+
+	traverse_child = fork();
+	ok(traverse_child != -1, "fork should succeed");
+
+	if (traverse_child == 0) {
+		tdb_close(tdb);
+		close(traverse_up[0]);
+		close(traverse_down[1]);
+		close(allrecord_up[0]);
+		close(allrecord_down[1]);
+		do_traverse(name, tdb_flags,
+			    traverse_up[1], traverse_down[0]);
+		exit(0);
+	}
+	close(traverse_up[1]);
+	close(traverse_down[0]);
+
+	poll(NULL, 0, 1000);
+
+	nwritten = write(allrecord_down[1], &c, sizeof(c));
+	ok(nwritten == sizeof(c), "write should succeed");
+
+	nread = read(traverse_up[0], &c, sizeof(c));
+	ok(nread == sizeof(c), "read should succeed");
+
+	nwritten = write(traverse_down[1], &c, sizeof(c));
+	ok(nwritten == sizeof(c), "write should succeed");
+
+	nread = read(allrecord_up[0], &c, sizeof(c));
+	ok(nread == sizeof(c), "ret should succeed");
+
+	close(traverse_up[0]);
+	close(traverse_down[1]);
+	close(allrecord_up[0]);
+	close(allrecord_down[1]);
+	diag("%s tests done", name);
+	return exit_status();
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+	bool mutex_support;
+
+	mutex_support = tdb_runtime_check_for_robust_mutexes();
+
+	ret = do_tests("marklock-deadlock-fcntl.tdb",
+		       TDB_CLEAR_IF_FIRST |
+		       TDB_INCOMPATIBLE_HASH);
+	ok(ret == 0, "marklock-deadlock-fcntl.tdb tests should succeed");
+
+	if (!mutex_support) {
+		skip(1, "No robust mutex support, "
+			"skipping marklock-deadlock-mutex.tdb tests");
+		return exit_status();
+	}
+
+	ret = do_tests("marklock-deadlock-mutex.tdb",
+		       TDB_CLEAR_IF_FIRST |
+		       TDB_MUTEX_LOCKING |
+		       TDB_INCOMPATIBLE_HASH);
+	ok(ret == 0, "marklock-deadlock-mutex.tdb tests should succeed");
+
+	return exit_status();
+}
diff --git a/lib/tdb/wscript b/lib/tdb/wscript
index b86671e..c573d36 100644
--- a/lib/tdb/wscript
+++ b/lib/tdb/wscript
@@ -41,6 +41,7 @@ tdb1_unit_tests = [
     'run-wronghash-fail',
     'run-zero-append',
     'run-marklock-deadlock',
+    'run-allrecord-traverse-deadlock',
     'run-mutex-openflags2',
     'run-mutex-trylock',
     'run-mutex-allrecord-bench',
-- 
1.9.1



More information about the samba-technical mailing list