RFC: dbwrap_ctdb and empty vs deleted records

Ralph Böhme slow at samba.org
Sat Jul 23 11:47:59 UTC 2016


On Fri, Jul 22, 2016 at 11:49:34PM +0200, Ralph Böhme wrote:
> On Fri, Jul 22, 2016 at 04:42:35PM +0200, Michael Adam wrote:
> > Thinking aloud:
> > 
> > The example in the commit msg is from brlock code.
> > We'd need to lock a file, release it, have an
> > empty brl record, and before it gets vacuumed
> > call do_lock on the file again.
> > 
> > So possibly by a long vacuum interval and a specially
> > crafted sequence of file ops...
> 
> hm, my reading of the commit message of the revert was a bit
> different:
> 
>     do_lock()
>       -> grabs lock on brl record with brl_get_locks()
>         -> calls brl_lock()
>           -> calls brl_lock_posix or _windows_default()
>             -> calls contend_level2_oplocks_begin()
>               -> calls brl_locks_get_read_only()
>                 -> calls dbwrap_parse_record on the same brl record as above
> 
> This suggest that this can be triggered by a single request calling
> into do_lock(). Can it?

no, it can't. Sorry, wooly thinking late at night.

You need exactly the order of operations you described:

- lock byterange
- unlock -> this creates the tombstone record
- lock byterange again

I've added a brl test and with that am able to reproduce the deadlock
when 925625b52886d40b50fc is reverted.

My patch does not deadlock, so it seems it's ok. Updated patchset that
contains the test attached.

The test is not really needed to trigger the deadlock, earlier tests
in the smb2.lock testsuite will deadlock as well. But having an
explicit test will serve as documentation of the issue.

Please review&push if ok.

Cheerio!
-slow
-------------- next part --------------
From 2c3fbec888540b85e2582b6b6136fdb527da33f1 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Fri, 1 Jul 2016 17:45:53 +0200
Subject: [PATCH 1/3] dbwrap_ctdb: move struct db_ctdb_parse_record_state

Will be needed in the next commit. No change in behavour.

Bug: https://bugzilla.samba.org/show_bug.cgi?id=12005

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/lib/dbwrap/dbwrap_ctdb.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 3bbb9be..c45c257 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -99,6 +99,14 @@ static int db_ctdb_ltdb_parser(TDB_DATA key, TDB_DATA data,
 	return 0;
 }
 
+struct db_ctdb_parse_record_state {
+	void (*parser)(TDB_DATA key, TDB_DATA data, void *private_data);
+	void *private_data;
+	uint32_t my_vnn;
+	bool ask_for_readonly_copy;
+	bool done;
+};
+
 static NTSTATUS db_ctdb_ltdb_parse(
 	struct db_ctdb_ctx *db, TDB_DATA key,
 	void (*parser)(TDB_DATA key, struct ctdb_ltdb_header *header,
@@ -1215,14 +1223,6 @@ static struct db_record *db_ctdb_try_fetch_locked(struct db_context *db,
 	return fetch_locked_internal(ctx, mem_ctx, key, true);
 }
 
-struct db_ctdb_parse_record_state {
-	void (*parser)(TDB_DATA key, TDB_DATA data, void *private_data);
-	void *private_data;
-	uint32_t my_vnn;
-	bool ask_for_readonly_copy;
-	bool done;
-};
-
 static void db_ctdb_parse_record_parser(
 	TDB_DATA key, struct ctdb_ltdb_header *header,
 	TDB_DATA data, void *private_data)
-- 
2.7.4


From 12afa6c3febf3fb70ba0a207253b6b4bf77a439b Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Fri, 1 Jul 2016 17:21:04 +0200
Subject: [PATCH 2/3] dbwrap_ctdb: check for ltdb tombstone records

When fetching records from remote ctdb nodes via ctdbd_parse() or in
db_ctdb_traverse(), we already checks for tombstone records and skip
them. Add the same check to db_ctdb_ltdb_parser() when fetching records
from the ltbd.

See also bug: https://bugzilla.samba.org/show_bug.cgi?id=10008

Commit 925625b52886d40b50fc631bad8bdc81970f7598 reverted part of the
patch in bug 10008 due to a deadlock it introduced.

This patch avoids the deadlock because it ensures db_ctdb_parse_record()
returns NT_STATUS_NOT_FOUND if an empty record is found in the ltdb and
not calling ctdb.

Bug: https://bugzilla.samba.org/show_bug.cgi?id=12005

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/lib/dbwrap/dbwrap_ctdb.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index c45c257..4244d66 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -79,6 +79,7 @@ struct db_ctdb_ltdb_parse_state {
 	void (*parser)(TDB_DATA key, struct ctdb_ltdb_header *header,
 		       TDB_DATA data, void *private_data);
 	void *private_data;
+	bool empty_record;
 };
 
 static int db_ctdb_ltdb_parser(TDB_DATA key, TDB_DATA data,
@@ -91,6 +92,15 @@ static int db_ctdb_ltdb_parser(TDB_DATA key, TDB_DATA data,
 		return -1;
 	}
 
+	if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
+		/*
+		 * Possible record tombstone or just a fresh and empty
+		 * record
+		 */
+		state->empty_record = true;
+		return -1;
+	}
+
 	state->parser(
 		key, (struct ctdb_ltdb_header *)data.dptr,
 		make_tdb_data(data.dptr + sizeof(struct ctdb_ltdb_header),
@@ -105,6 +115,7 @@ struct db_ctdb_parse_record_state {
 	uint32_t my_vnn;
 	bool ask_for_readonly_copy;
 	bool done;
+	bool empty_record;
 };
 
 static NTSTATUS db_ctdb_ltdb_parse(
@@ -113,6 +124,8 @@ static NTSTATUS db_ctdb_ltdb_parse(
 		       TDB_DATA data, void *private_data),
 	void *private_data)
 {
+	struct db_ctdb_parse_record_state *rec_state =
+		(struct db_ctdb_parse_record_state *)private_data;
 	struct db_ctdb_ltdb_parse_state state;
 	int ret;
 
@@ -122,6 +135,9 @@ static NTSTATUS db_ctdb_ltdb_parse(
 	ret = tdb_parse_record(db->wtdb->tdb, key, db_ctdb_ltdb_parser,
 			       &state);
 	if (ret == -1) {
+		if (state.empty_record) {
+			rec_state->empty_record = true;
+		}
 		return NT_STATUS_NOT_FOUND;
 	}
 	return NT_STATUS_OK;
@@ -1267,6 +1283,7 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 	state.parser = parser;
 	state.private_data = private_data;
 	state.my_vnn = ctdbd_vnn(ctx->conn);
+	state.empty_record = false;
 
 	if (ctx->transaction != NULL) {
 		struct db_ctdb_transaction_handle *h = ctx->transaction;
@@ -1301,6 +1318,18 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 		return NT_STATUS_OK;
 	}
 
+	if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND) && state.empty_record) {
+		/*
+		 * This is an empty record. ctdbd does not distinguish
+		 * between empty and deleted records. Samba right now
+		 * can live without empty records, so lets treat
+		 * zero-size (i.e. deleted) records as non-existing.
+		 *
+		 * See bugs 10008 and 12005.
+		 */
+		return NT_STATUS_NOT_FOUND;
+	}
+
 	ret = ctdbd_parse(ctx->conn, ctx->db_id, key,
 			  state.ask_for_readonly_copy, parser, private_data);
 	if (ret != 0) {
-- 
2.7.4


From a921818c476368b27d6e9ac4d002e9ce86fa36a3 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sat, 23 Jul 2016 11:08:13 +0200
Subject: [PATCH 3/3] s4/torture: add a test for ctdb-tombstrone-record
 deadlock

This tests for a possible deadlock between smbd and ctdb dealing with
ctdb tombstone records.

Commit 925625b52886d40b50fc631bad8bdc81970f7598 explains the deadlock in
more details and contains the fix. It's a fix for a regression
introduced by the patch for bug 10008 (1cae59ce112c).

If you ever want to use this test against that specific commit:

$ git checkout 925625b52886d40b50fc631bad8bdc81970f7598
$ git cherry-pick THIS_COMMIT

This should not deadlock on a ctdb cluster.

$ git revert 925625b52886d40b50fc631bad8bdc81970f7598

This will deadlock.

The patches in this bugreport (12005) avoid the deadlock as well.

Bug: https://bugzilla.samba.org/show_bug.cgi?id=12005

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source4/torture/smb2/lock.c | 46 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/source4/torture/smb2/lock.c b/source4/torture/smb2/lock.c
index 68e353d..dc1b91b 100644
--- a/source4/torture/smb2/lock.c
+++ b/source4/torture/smb2/lock.c
@@ -3033,6 +3033,51 @@ done:
 	return ret;
 }
 
+/**
+ * Test lock interaction between smbd and ctdb with tombstone records
+ */
+static bool test_deadlock(struct torture_context *torture,
+			  struct smb2_tree *tree)
+{
+	NTSTATUS status;
+	bool ret = true;
+	struct smb2_handle h;
+	uint8_t buf[200];
+	bool correct = true;
+	const char *fname = BASEDIR "\\deadlock.txt";
+
+	if (!lpcfg_clustering(torture->lp_ctx)) {
+		torture_skip(torture, "Test must be run on a ctdb cluster\n");
+		return true;
+	}
+
+	status = torture_smb2_testdir(tree, BASEDIR, &h);
+	CHECK_STATUS(status, NT_STATUS_OK);
+	smb2_util_close(tree, h);
+
+	status = torture_smb2_testfile(tree, fname, &h);
+	CHECK_STATUS(status, NT_STATUS_OK);
+
+	ZERO_STRUCT(buf);
+	status = smb2_util_write(tree, h, buf, 0, ARRAY_SIZE(buf));
+	CHECK_STATUS(status, NT_STATUS_OK);
+
+	ret = NT_STATUS_IS_OK(test_smb2_lock(tree, h, 0, 1, true));
+	EXPECTED(ret, true);
+	ret = NT_STATUS_IS_OK(test_smb2_unlock(tree, h, 0, 1));
+	EXPECTED(ret, true);
+
+	ret = NT_STATUS_IS_OK(test_smb2_lock(tree, h, 0, 1, true));
+	EXPECTED(ret, true);
+	ret = NT_STATUS_IS_OK(test_smb2_unlock(tree, h, 0, 1));
+	EXPECTED(ret, true);
+
+done:
+	smb2_util_close(tree, h);
+	smb2_deltree(tree, BASEDIR);
+	return correct;
+}
+
 /* basic testing of SMB2 locking
 */
 struct torture_suite *torture_smb2_lock_init(void)
@@ -3068,6 +3113,7 @@ struct torture_suite *torture_smb2_lock_init(void)
 	torture_suite_add_2smb2_test(suite, "overlap", test_overlap);
 	torture_suite_add_1smb2_test(suite, "truncate", test_truncate);
 	torture_suite_add_1smb2_test(suite, "replay", test_replay);
+	torture_suite_add_1smb2_test(suite, "ctdb-delrec-deadlock", test_deadlock);
 
 	suite->description = talloc_strdup(suite, "SMB2-LOCK tests");
 
-- 
2.7.4



More information about the samba-technical mailing list