[PATCH] Fix bug #11375 - ordering issue in teardown in server exit.

Stefan Metzmacher metze at samba.org
Wed Nov 25 12:05:11 UTC 2015


Am 25.11.2015 um 08:05 schrieb Stefan Metzmacher:
> Hi Jeremy,
> 
>> On Tue, Nov 24, 2015 at 10:09:39PM +0100, Stefan Metzmacher wrote:
>>> smbXsrv_session_logoff() sets session->table = NULL;
>>> and should exit immediately the 2nd time.
>>> Am I missing something?
>>>
>>> This can only explained if it's NOT called twice,
>>> but the first time AFTER sconn is already gone.
>>
>> Yes, you're right. It must be the first call
>> after sconn is deleted.
>>
>>> A wild guess, could we have a bug in dbwrap_rbt traverse?
>>> That may skip entries randomly?
>>
>> That's a tricky recursive function. Maybe.
>> I'll take a close look....
> 
> I added some tests. I started to write this mail
> saying it all works, but I just got this:
> 
> And the failure reproduces very often, but
> valgrind doesn't give errors.
> 
> I'd guess it's related to __rb_rotate_right and __rb_rotate_left
> and our traverse code doesn't have this in mind.
> I'm wondering if we need to somehow make use of rb_next(), rb_prev(),
> rb_first() and rb_last().

I don't understand that code and added a linked list that
is used for the traverse.

metze
-------------- next part --------------
From dc4a5b3fb6d0281da524f01cddfd14febf40c10c Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 25 Nov 2015 10:17:34 +0100
Subject: [PATCH 1/4] dbwrap_rbt: use talloc_zero_size() instead of a partial
 ZERO_STRUCT()

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375
BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/dbwrap/dbwrap_rbt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/dbwrap/dbwrap_rbt.c b/lib/dbwrap/dbwrap_rbt.c
index 0764a2c..37d9649 100644
--- a/lib/dbwrap/dbwrap_rbt.c
+++ b/lib/dbwrap/dbwrap_rbt.c
@@ -153,7 +153,7 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag)
 		return NT_STATUS_INSUFFICIENT_RESOURCES;
 	}
 
-	node = talloc_size(db_ctx, reclen);
+	node = talloc_zero_size(db_ctx, reclen);
 	if (node == NULL) {
 		return NT_STATUS_NO_MEMORY;
 	}
@@ -172,8 +172,6 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag)
 		 */
 	}
 
-	ZERO_STRUCT(node->rb_node);
-
 	node->keysize = rec->key.dsize;
 	node->valuesize = data.dsize;
 
-- 
1.9.1


From a467a9c9dff5eb6c26e23da3dc11d4923b93cf58 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 25 Nov 2015 09:22:08 +0100
Subject: [PATCH 2/4] dbwrap_rbt: add nested traverse protection

Multiple dbwrap_traverse_read() calls are possible.

store() and delete() on a fetch locked record
are rejected during dbwrap_traverse_read().

A dbwrap_traverse() within a dbwrap_traverse_read()
behaves like a dbwrap_traverse_read().

Nested dbwrap_traverse() calls are not possible.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375
BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/dbwrap/dbwrap_rbt.c | 71 ++++++++++++++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/lib/dbwrap/dbwrap_rbt.c b/lib/dbwrap/dbwrap_rbt.c
index 37d9649..622ab0d 100644
--- a/lib/dbwrap/dbwrap_rbt.c
+++ b/lib/dbwrap/dbwrap_rbt.c
@@ -27,6 +27,8 @@
 
 struct db_rbt_ctx {
 	struct rb_root tree;
+	size_t traverse_read;
+	bool traverse_write;
 };
 
 struct db_rbt_rec {
@@ -126,6 +128,10 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag)
 	ssize_t reclen;
 	TDB_DATA this_key, this_val;
 
+	if (db_ctx->traverse_read > 0) {
+		return NT_STATUS_MEDIA_WRITE_PROTECTED;
+	}
+
 	if (rec_priv->node != NULL) {
 
 		/*
@@ -222,6 +228,10 @@ static NTSTATUS db_rbt_delete(struct db_record *rec)
 		rec->db->private_data, struct db_rbt_ctx);
 	struct db_rbt_rec *rec_priv = (struct db_rbt_rec *)rec->private_data;
 
+	if (db_ctx->traverse_read > 0) {
+		return NT_STATUS_MEDIA_WRITE_PROTECTED;
+	}
+
 	if (rec_priv->node == NULL) {
 		return NT_STATUS_OK;
 	}
@@ -232,16 +242,6 @@ static NTSTATUS db_rbt_delete(struct db_record *rec)
 	return NT_STATUS_OK;
 }
 
-static NTSTATUS db_rbt_store_deny(struct db_record *rec, TDB_DATA data, int flag)
-{
-	return NT_STATUS_MEDIA_WRITE_PROTECTED;
-}
-
-static NTSTATUS db_rbt_delete_deny(struct db_record *rec)
-{
-	return NT_STATUS_MEDIA_WRITE_PROTECTED;
-}
-
 struct db_rbt_search_result {
 	TDB_DATA key;
 	TDB_DATA val;
@@ -414,13 +414,8 @@ static int db_rbt_traverse_internal(struct db_context *db,
 	ZERO_STRUCT(rec);
 	rec.db = db;
 	rec.private_data = &rec_priv;
-	if (rw) {
-		rec.store = db_rbt_store;
-		rec.delete_rec = db_rbt_delete;
-	} else {
-		rec.store = db_rbt_store_deny;
-		rec.delete_rec = db_rbt_delete_deny;
-	}
+	rec.store = db_rbt_store;
+	rec.delete_rec = db_rbt_delete;
 	db_rbt_parse_node(rec_priv.node, &rec.key, &rec.value);
 
 	ret = f(&rec, private_data);
@@ -440,18 +435,21 @@ static int db_rbt_traverse_internal(struct db_context *db,
 	return db_rbt_traverse_internal(db, rb_right, f, private_data, count, rw);
 }
 
-static int db_rbt_traverse(struct db_context *db,
-			   int (*f)(struct db_record *db,
-				    void *private_data),
-			   void *private_data)
+static int db_rbt_traverse_read(struct db_context *db,
+				int (*f)(struct db_record *db,
+					 void *private_data),
+				void *private_data)
 {
 	struct db_rbt_ctx *ctx = talloc_get_type_abort(
 		db->private_data, struct db_rbt_ctx);
 	uint32_t count = 0;
+	int ret;
 
-	int ret = db_rbt_traverse_internal(db, ctx->tree.rb_node,
-					   f, private_data, &count,
-					   true /* rw */);
+	ctx->traverse_read++;
+	ret = db_rbt_traverse_internal(db, ctx->tree.rb_node,
+				       f, private_data, &count,
+				       false /* rw */);
+	ctx->traverse_read--;
 	if (ret != 0) {
 		return -1;
 	}
@@ -461,18 +459,29 @@ static int db_rbt_traverse(struct db_context *db,
 	return count;
 }
 
-static int db_rbt_traverse_read(struct db_context *db,
-				int (*f)(struct db_record *db,
-					 void *private_data),
-				void *private_data)
+static int db_rbt_traverse(struct db_context *db,
+			   int (*f)(struct db_record *db,
+				    void *private_data),
+			   void *private_data)
 {
 	struct db_rbt_ctx *ctx = talloc_get_type_abort(
 		db->private_data, struct db_rbt_ctx);
 	uint32_t count = 0;
+	int ret;
+
+	if (ctx->traverse_write) {
+		return -1;
+	};
+
+	if (ctx->traverse_read > 0) {
+		return db_rbt_traverse_read(db, f, private_data);
+	}
 
-	int ret = db_rbt_traverse_internal(db, ctx->tree.rb_node,
-					   f, private_data, &count,
-					   false /* rw */);
+	ctx->traverse_write = true;
+	ret = db_rbt_traverse_internal(db, ctx->tree.rb_node,
+				       f, private_data, &count,
+				       true /* rw */);
+	ctx->traverse_write = false;
 	if (ret != 0) {
 		return -1;
 	}
-- 
1.9.1


From 43dbea33ecc00c48c2529ada666fc61ec4fb8da4 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 25 Nov 2015 09:22:08 +0100
Subject: [PATCH 3/4] dbwrap_rbt: fix modifying the db during traverse

We delete and add of records rebalace the tree, but our
traverse code doesn't handle that and skips records
randomly.

We maintain records in a linked list for now
in addition to the rbtree and use that list during
traverse.

This add a bit overhead, but at least it works reliable.
If someone finds a way to do reliable traverse with the
rebalanced tree, we can replace this commit.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375
BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/dbwrap/dbwrap_rbt.c | 104 ++++++++++++++++++++++++++----------------------
 1 file changed, 57 insertions(+), 47 deletions(-)

diff --git a/lib/dbwrap/dbwrap_rbt.c b/lib/dbwrap/dbwrap_rbt.c
index 622ab0d..3b5e589 100644
--- a/lib/dbwrap/dbwrap_rbt.c
+++ b/lib/dbwrap/dbwrap_rbt.c
@@ -22,13 +22,15 @@
 #include "dbwrap/dbwrap_private.h"
 #include "dbwrap/dbwrap_rbt.h"
 #include "../lib/util/rbtree.h"
+#include "../lib/util/dlinklist.h"
 
 #define DBWRAP_RBT_ALIGN(_size_) (((_size_)+15)&~15)
 
 struct db_rbt_ctx {
 	struct rb_root tree;
+	struct db_rbt_node *nodes;
 	size_t traverse_read;
-	bool traverse_write;
+	struct db_rbt_node **traverse_nextp;
 };
 
 struct db_rbt_rec {
@@ -40,6 +42,7 @@ struct db_rbt_rec {
 struct db_rbt_node {
 	struct rb_node rb_node;
 	size_t keysize, valuesize;
+	struct db_rbt_node *prev, *next;
 };
 
 /*
@@ -123,7 +126,8 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag)
 	struct db_rbt_node *node;
 
 	struct rb_node ** p;
-	struct rb_node * parent;
+	struct rb_node *parent = NULL;
+	struct db_rbt_node *parent_node = NULL;
 
 	ssize_t reclen;
 	TDB_DATA this_key, this_val;
@@ -165,12 +169,19 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag)
 	}
 
 	if (rec_priv->node != NULL) {
+		if (db_ctx->traverse_nextp != NULL) {
+			if (*db_ctx->traverse_nextp == rec_priv->node) {
+				*db_ctx->traverse_nextp = node;
+			}
+		}
+
 		/*
 		 * We need to delete the key from the tree and start fresh,
 		 * there's not enough space in the existing record
 		 */
 
 		rb_erase(&rec_priv->node->rb_node, &db_ctx->tree);
+		DLIST_REMOVE(db_ctx->nodes, rec_priv->node);
 
 		/*
 		 * Keep the existing node around for a while: If the record
@@ -197,10 +208,11 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag)
 		TDB_DATA search_key, search_val;
 		int res;
 
-		parent = (*p);
-
 		r = db_rbt2node(*p);
 
+		parent = (*p);
+		parent_node = r;
+
 		db_rbt_parse_node(r, &search_key, &search_val);
 
 		res = db_rbt_compare(this_key, search_key);
@@ -217,6 +229,7 @@ static NTSTATUS db_rbt_store(struct db_record *rec, TDB_DATA data, int flag)
 	}
 
 	rb_link_node(&node->rb_node, parent, p);
+	DLIST_ADD_AFTER(db_ctx->nodes, node, parent_node);
 	rb_insert_color(&node->rb_node, &db_ctx->tree);
 
 	return NT_STATUS_OK;
@@ -236,7 +249,14 @@ static NTSTATUS db_rbt_delete(struct db_record *rec)
 		return NT_STATUS_OK;
 	}
 
+	if (db_ctx->traverse_nextp != NULL) {
+		if (*db_ctx->traverse_nextp == rec_priv->node) {
+			*db_ctx->traverse_nextp = rec_priv->node->next;
+		}
+	}
+
 	rb_erase(&rec_priv->node->rb_node, &db_ctx->tree);
+	DLIST_REMOVE(db_ctx->nodes, rec_priv->node);
 	TALLOC_FREE(rec_priv->node);
 
 	return NT_STATUS_OK;
@@ -383,56 +403,48 @@ static NTSTATUS db_rbt_parse_record(struct db_context *db, TDB_DATA key,
 }
 
 static int db_rbt_traverse_internal(struct db_context *db,
-				    struct rb_node *n,
 				    int (*f)(struct db_record *db,
 					     void *private_data),
 				    void *private_data, uint32_t* count,
 				    bool rw)
 {
-	struct rb_node *rb_right;
-	struct rb_node *rb_left;
-	struct db_record rec;
-	struct db_rbt_rec rec_priv;
+	struct db_rbt_ctx *ctx = talloc_get_type_abort(
+		db->private_data, struct db_rbt_ctx);
+	struct db_rbt_node *cur = NULL;
+	struct db_rbt_node *next = NULL;
 	int ret;
 
-	if (n == NULL) {
-		return 0;
-	}
-
-	rb_left = n->rb_left;
-	rb_right = n->rb_right;
+	for (cur = ctx->nodes; cur != NULL; cur = next) {
+		struct db_record rec;
+		struct db_rbt_rec rec_priv;
 
-	ret = db_rbt_traverse_internal(db, rb_left, f, private_data, count, rw);
-	if (ret != 0) {
-		return ret;
-	}
+		rec_priv.node = cur;
+		next = rec_priv.node->next;
 
-	rec_priv.node = db_rbt2node(n);
-	/* n might be altered by the callback function */
-	n = NULL;
+		ZERO_STRUCT(rec);
+		rec.db = db;
+		rec.private_data = &rec_priv;
+		rec.store = db_rbt_store;
+		rec.delete_rec = db_rbt_delete;
+		db_rbt_parse_node(rec_priv.node, &rec.key, &rec.value);
 
-	ZERO_STRUCT(rec);
-	rec.db = db;
-	rec.private_data = &rec_priv;
-	rec.store = db_rbt_store;
-	rec.delete_rec = db_rbt_delete;
-	db_rbt_parse_node(rec_priv.node, &rec.key, &rec.value);
-
-	ret = f(&rec, private_data);
-	(*count) ++;
-	if (ret != 0) {
-		return ret;
-	}
-
-	if (rec_priv.node != NULL) {
-		/*
-		 * If the current record is still there
-		 * we should take the current rb_right.
-		 */
-		rb_right = rec_priv.node->rb_node.rb_right;
+		if (rw) {
+			ctx->traverse_nextp = &next;
+		}
+		ret = f(&rec, private_data);
+		(*count) ++;
+		if (rw) {
+			ctx->traverse_nextp = NULL;
+		}
+		if (ret != 0) {
+			return ret;
+		}
+		if (rec_priv.node != NULL) {
+			next = rec_priv.node->next;
+		}
 	}
 
-	return db_rbt_traverse_internal(db, rb_right, f, private_data, count, rw);
+	return 0;
 }
 
 static int db_rbt_traverse_read(struct db_context *db,
@@ -446,7 +458,7 @@ static int db_rbt_traverse_read(struct db_context *db,
 	int ret;
 
 	ctx->traverse_read++;
-	ret = db_rbt_traverse_internal(db, ctx->tree.rb_node,
+	ret = db_rbt_traverse_internal(db,
 				       f, private_data, &count,
 				       false /* rw */);
 	ctx->traverse_read--;
@@ -469,7 +481,7 @@ static int db_rbt_traverse(struct db_context *db,
 	uint32_t count = 0;
 	int ret;
 
-	if (ctx->traverse_write) {
+	if (ctx->traverse_nextp != NULL) {
 		return -1;
 	};
 
@@ -477,11 +489,9 @@ static int db_rbt_traverse(struct db_context *db,
 		return db_rbt_traverse_read(db, f, private_data);
 	}
 
-	ctx->traverse_write = true;
-	ret = db_rbt_traverse_internal(db, ctx->tree.rb_node,
+	ret = db_rbt_traverse_internal(db,
 				       f, private_data, &count,
 				       true /* rw */);
-	ctx->traverse_write = false;
 	if (ret != 0) {
 		return -1;
 	}
-- 
1.9.1


From 1d0acd956a3fd886d92c22e61f8793a987043fec Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 25 Nov 2015 00:13:17 +0100
Subject: [PATCH 4/4] s3:torture: add traverse testing to LOCAL-RBTREE

BUG: https://bugzilla.samba.org/show_bug.cgi?id=11375
BUG: https://bugzilla.samba.org/show_bug.cgi?id=11394

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/torture/torture.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/source3/torture/torture.c b/source3/torture/torture.c
index ef75d214..6c34e4c 100644
--- a/source3/torture/torture.c
+++ b/source3/torture/torture.c
@@ -8347,11 +8347,29 @@ static bool rbt_testval(struct db_context *db, const char *key,
 	return ret;
 }
 
+static int local_rbtree_traverse_read(struct db_record *rec, void *private_data)
+{
+	int *count2 = (int *)private_data;
+	(*count2)++;
+	return 0;
+}
+
+static int local_rbtree_traverse_delete(struct db_record *rec, void *private_data)
+{
+	int *count2 = (int *)private_data;
+	(*count2)++;
+	dbwrap_record_delete(rec);
+	return 0;
+}
+
 static bool run_local_rbtree(int dummy)
 {
 	struct db_context *db;
 	bool ret = false;
 	int i;
+	NTSTATUS status;
+	int count = 0;
+	int count2 = 0;
 
 	db = db_open_rbt(NULL);
 
@@ -8394,6 +8412,27 @@ static bool run_local_rbtree(int dummy)
 	}
 
 	ret = true;
+	count = 0; count2 = 0;
+	status = dbwrap_traverse_read(db, local_rbtree_traverse_read,
+				      &count2, &count);
+	printf("%s: read1: %d %d, %s\n", __func__, count, count2, nt_errstr(status));
+	if ((count != count2) || (count != 1000)) {
+		ret = false;
+	}
+	count = 0; count2 = 0;
+	status = dbwrap_traverse(db, local_rbtree_traverse_delete,
+				 &count2, &count);
+	printf("%s: delete: %d %d, %s\n", __func__, count, count2, nt_errstr(status));
+	if ((count != count2) || (count != 1000)) {
+		ret = false;
+	}
+	count = 0; count2 = 0;
+	status = dbwrap_traverse_read(db, local_rbtree_traverse_read,
+				      &count2, &count);
+	printf("%s: read2: %d %d, %s\n", __func__, count, count2, nt_errstr(status));
+	if ((count != count2) || (count != 0)) {
+		ret = false;
+	}
 
  done:
 	TALLOC_FREE(db);
-- 
1.9.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20151125/8a2f41dc/signature.sig>


More information about the samba-technical mailing list