Refactor dbwrap_ctdb a bit

Volker Lendecke Volker.Lendecke at SerNet.DE
Tue Nov 27 03:58:20 MST 2012


On Fri, Nov 23, 2012 at 11:09:12PM +0100, Michael Adam wrote:
> same here
> 
> > but I did them on purpose :-)
> 
> I took that for granted... :)

Attached find a new patchset that leaves the removal of the
NULL check in and changes the calling convention for the
ltdb_parse functions. It also adds the code that was the
reason for me taking a closer look at dbwrap_ctdb, namely it
converts the notify_trigger function to asynchronously fire
off the ctdb fetch requests. It should bring back part of
the baseline performance that we lost in the cluster case
when doing the scalability changes for notify.

This adds more than the initial patchset I sent, so watch
out before pushing.

With best regards,

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From 91b2f945e6d5ff896f3e8dfbec2d1158e5a19fb3 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 8 Nov 2012 14:15:25 +0100
Subject: [PATCH 01/31] dbwrap: Use dbwrap_parse_record in dbwrap_fetch_uint32_bystring

---
 lib/dbwrap/dbwrap_util.c |   40 +++++++++++++++++++++++++++-------------
 1 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/lib/dbwrap/dbwrap_util.c b/lib/dbwrap/dbwrap_util.c
index d0a34cc..4185fff 100644
--- a/lib/dbwrap/dbwrap_util.c
+++ b/lib/dbwrap/dbwrap_util.c
@@ -97,32 +97,46 @@ NTSTATUS dbwrap_store_int32_bystring(struct db_context *db, const char *keystr,
 	return status;
 }
 
+struct dbwrap_fetch_uint32_state {
+	NTSTATUS status;
+	uint32_t result;
+};
+
+static void dbwrap_fetch_uint32_parser(TDB_DATA key, TDB_DATA data,
+				       void *private_data)
+{
+	struct dbwrap_fetch_uint32_state *state =
+		(struct dbwrap_fetch_uint32_state *)private_data;
+
+	if (data.dsize != sizeof(state->result)) {
+		state->status = NT_STATUS_INTERNAL_DB_CORRUPTION;
+		return;
+	}
+	state->result = IVAL(data.dptr, 0);
+	state->status = NT_STATUS_OK;
+}
+
 NTSTATUS dbwrap_fetch_uint32_bystring(struct db_context *db,
 				      const char *keystr, uint32_t *val)
 {
-	TDB_DATA dbuf;
+	struct dbwrap_fetch_uint32_state state;
 	NTSTATUS status;
 
 	if (val == NULL) {
 		return NT_STATUS_INVALID_PARAMETER;
 	}
 
-	status = dbwrap_fetch_bystring(db, talloc_tos(), keystr, &dbuf);
+	state.status = NT_STATUS_INTERNAL_ERROR;
+
+	status = dbwrap_parse_record(db, string_term_tdb_data(keystr),
+				     dbwrap_fetch_uint32_parser, &state);
 	if (!NT_STATUS_IS_OK(status)) {
 		return status;
 	}
-
-	if ((dbuf.dptr == NULL) || (dbuf.dsize == 0)) {
-		return NT_STATUS_NOT_FOUND;
-	}
-	if (dbuf.dsize != sizeof(uint32_t)) {
-		TALLOC_FREE(dbuf.dptr);
-		return NT_STATUS_UNSUCCESSFUL;
+	if (NT_STATUS_IS_OK(state.status)) {
+		*val = state.result;
 	}
-
-	*val = IVAL(dbuf.dptr, 0);
-	TALLOC_FREE(dbuf.dptr);
-	return NT_STATUS_OK;
+	return state.status;
 }
 
 NTSTATUS dbwrap_store_uint32_bystring(struct db_context *db,
-- 
1.7.3.4


From 8d31ffbe4f802969f27d5cc7740164b8c2acd3ab Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 7 Nov 2012 21:24:27 +0100
Subject: [PATCH 02/31] s3: test dbwrap_ctdb

---
 source3/Makefile.in                |    1 +
 source3/torture/proto.h            |    1 +
 source3/torture/test_dbwrap_ctdb.c |  122 ++++++++++++++++++++++++++++++++++++
 source3/torture/torture.c          |    1 +
 source3/wscript_build              |    1 +
 5 files changed, 126 insertions(+), 0 deletions(-)
 create mode 100644 source3/torture/test_dbwrap_ctdb.c

diff --git a/source3/Makefile.in b/source3/Makefile.in
index a2d7906..3555687 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -1267,6 +1267,7 @@ SMBTORTURE_OBJ1 = torture/torture.o torture/nbio.o torture/scanner.o torture/uta
 		torture/test_notify.o \
 		torture/test_dbwrap_watch.o \
 		torture/test_idmap_tdb_common.o \
+		torture/test_dbwrap_ctdb.o \
 		torture/t_strappend.o
 
 SMBTORTURE_OBJ = $(SMBTORTURE_OBJ1) $(PARAM_OBJ) $(TLDAP_OBJ) \
diff --git a/source3/torture/proto.h b/source3/torture/proto.h
index 0c6fc70..4f4c9e2 100644
--- a/source3/torture/proto.h
+++ b/source3/torture/proto.h
@@ -110,5 +110,6 @@ bool run_notify_bench2(int dummy);
 bool run_notify_bench3(int dummy);
 bool run_dbwrap_watch1(int dummy);
 bool run_idmap_tdb_common_test(int dummy);
+bool run_local_dbwrap_ctdb(int dummy);
 
 #endif /* __TORTURE_H__ */
diff --git a/source3/torture/test_dbwrap_ctdb.c b/source3/torture/test_dbwrap_ctdb.c
new file mode 100644
index 0000000..c45c3f6
--- /dev/null
+++ b/source3/torture/test_dbwrap_ctdb.c
@@ -0,0 +1,122 @@
+/*
+ * Unix SMB/CIFS implementation.
+ * Test dbwrap_ctdb API
+ * Copyright (C) Volker Lendecke 2012
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "includes.h"
+#include "torture/proto.h"
+#include "system/filesys.h"
+#include "lib/dbwrap/dbwrap.h"
+#include "lib/dbwrap/dbwrap_ctdb.h"
+
+bool run_local_dbwrap_ctdb(int dummy)
+{
+	struct db_context *db;
+	int res;
+	bool ret = false;
+	NTSTATUS status;
+	uint32_t val;
+
+	db = db_open_ctdb(talloc_tos(), "torture.tdb", 0, TDB_DEFAULT,
+			  O_RDWR, 0755, DBWRAP_LOCK_ORDER_1);
+	if (db == NULL) {
+		perror("db_open_ctdb failed");
+		goto fail;
+	}
+
+	res = dbwrap_transaction_start(db);
+	if (res != 0) {
+		fprintf(stderr, "dbwrap_transaction_start failed");
+		goto fail;
+	}
+	res = dbwrap_transaction_cancel(db);
+	if (res != 0) {
+		fprintf(stderr, "dbwrap_transaction_cancel failed");
+		goto fail;
+	}
+
+	res = dbwrap_transaction_start(db);
+	if (res != 0) {
+		fprintf(stderr, "dbwrap_transaction_start failed");
+		goto fail;
+	}
+
+	status = dbwrap_store_uint32_bystring(db, "foo", 1);
+	if (!NT_STATUS_IS_OK(status)) {
+		fprintf(stderr, "store_uint32 failed: %s\n",
+			nt_errstr(status));
+		goto fail;
+	}
+	status = dbwrap_fetch_uint32_bystring(db, "foo", &val);
+	if (!NT_STATUS_IS_OK(status)) {
+		fprintf(stderr, "store_uint32 failed: %s\n",
+			nt_errstr(status));
+		goto fail;
+	}
+	if (val != 1) {
+		fprintf(stderr, "fetch_uint32 gave %u, expected 1",
+			(unsigned)val);
+		goto fail;
+	}
+
+	status = dbwrap_store_uint32_bystring(db, "bar", 5);
+	if (!NT_STATUS_IS_OK(status)) {
+		fprintf(stderr, "store_uint32 failed: %s\n",
+			nt_errstr(status));
+		goto fail;
+	}
+	status = dbwrap_fetch_uint32_bystring(db, "bar", &val);
+	if (!NT_STATUS_IS_OK(status)) {
+		fprintf(stderr, "store_uint32 failed: %s\n",
+			nt_errstr(status));
+		goto fail;
+	}
+	if (val != 5) {
+		fprintf(stderr, "fetch_uint32 gave %u, expected 1",
+			(unsigned)val);
+		goto fail;
+	}
+
+	status = dbwrap_store_uint32_bystring(db, "foo", 2);
+	if (!NT_STATUS_IS_OK(status)) {
+		fprintf(stderr, "store_uint32 failed: %s\n",
+			nt_errstr(status));
+		goto fail;
+	}
+	status = dbwrap_fetch_uint32_bystring(db, "foo", &val);
+	if (!NT_STATUS_IS_OK(status)) {
+		fprintf(stderr, "store_uint32 failed: %s\n",
+			nt_errstr(status));
+		goto fail;
+	}
+	if (val != 2) {
+		fprintf(stderr, "fetch_uint32 gave %u, expected 1",
+			(unsigned)val);
+		goto fail;
+	}
+
+	res = dbwrap_transaction_commit(db);
+	if (res != 0) {
+		fprintf(stderr, "dbwrap_transaction_commit failed");
+		goto fail;
+	}
+
+	ret = true;
+fail:
+	TALLOC_FREE(db);
+	return ret;
+}
diff --git a/source3/torture/torture.c b/source3/torture/torture.c
index 0cca680..89b34e4 100644
--- a/source3/torture/torture.c
+++ b/source3/torture/torture.c
@@ -9150,6 +9150,7 @@ static struct {
 	{ "LOCAL-remove_duplicate_addrs2", run_local_remove_duplicate_addrs2, 0},
 	{ "local-tdb-opener", run_local_tdb_opener, 0 },
 	{ "local-tdb-writer", run_local_tdb_writer, 0 },
+	{ "LOCAL-DBWRAP-CTDB", run_local_dbwrap_ctdb, 0 },
 	{NULL, NULL, 0}};
 
 
diff --git a/source3/wscript_build b/source3/wscript_build
index b87b4d1..b9984fe 100755
--- a/source3/wscript_build
+++ b/source3/wscript_build
@@ -557,6 +557,7 @@ SMBTORTURE_SRC1 = '''torture/torture.c torture/nbio.c torture/scanner.c torture/
                 lib/tevent_barrier.c
                 torture/test_dbwrap_watch.c
                 torture/test_idmap_tdb_common.c
+                torture/test_dbwrap_ctdb.c
                 torture/t_strappend.c'''
 
 SMBTORTURE_SRC = '''${SMBTORTURE_SRC1}
-- 
1.7.3.4


From b42cefa2b7b16d5e577dd1b946bab305d5a8a8c1 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 7 Nov 2012 16:25:31 +0100
Subject: [PATCH 03/31] s3: Remove header==NULL code from db_ctdb_marshall_record

The only call chain (via db_ctdb_marshall_add) has header != NULL
---
 source3/lib/dbwrap/dbwrap_ctdb.c |   17 +++++------------
 1 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index e4c87ea..5be4bf7 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -177,9 +177,6 @@ static NTSTATUS db_ctdb_ltdb_store(struct db_ctdb_ctx *db,
 
 /*
   form a ctdb_rec_data record from a key/data pair
-
-  note that header may be NULL. If not NULL then it is included in the data portion
-  of the record
  */
 static struct ctdb_rec_data *db_ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid,
 						  TDB_DATA key,
@@ -190,7 +187,7 @@ static struct ctdb_rec_data *db_ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32
 	struct ctdb_rec_data *d;
 
 	length = offsetof(struct ctdb_rec_data, data) + key.dsize +
-		data.dsize + (header?sizeof(*header):0);
+		data.dsize + sizeof(*header);
 	d = (struct ctdb_rec_data *)talloc_size(mem_ctx, length);
 	if (d == NULL) {
 		return NULL;
@@ -199,14 +196,10 @@ static struct ctdb_rec_data *db_ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32
 	d->reqid = reqid;
 	d->keylen = key.dsize;
 	memcpy(&d->data[0], key.dptr, key.dsize);
-	if (header) {
-		d->datalen = data.dsize + sizeof(*header);
-		memcpy(&d->data[key.dsize], header, sizeof(*header));
-		memcpy(&d->data[key.dsize+sizeof(*header)], data.dptr, data.dsize);
-	} else {
-		d->datalen = data.dsize;
-		memcpy(&d->data[key.dsize], data.dptr, data.dsize);
-	}
+
+	d->datalen = data.dsize + sizeof(*header);
+	memcpy(&d->data[key.dsize], header, sizeof(*header));
+	memcpy(&d->data[key.dsize+sizeof(*header)], data.dptr, data.dsize);
 	return d;
 }
 
-- 
1.7.3.4


From d484b6426e730760673376589aa1e6d3685f3daf Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 7 Nov 2012 16:39:16 +0100
Subject: [PATCH 04/31] s3: Slightly simplify db_ctdb_transaction_commit

Avoid an unnecessary "else".
---
 source3/lib/dbwrap/dbwrap_ctdb.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 5be4bf7..d352404 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -836,7 +836,8 @@ again:
 		if (new_seqnum == old_seqnum) {
 			/* Recovery prevented all our changes: retry. */
 			goto again;
-		} else if (new_seqnum != (old_seqnum + 1)) {
+		}
+		if (new_seqnum != (old_seqnum + 1)) {
 			DEBUG(0, (__location__ " ERROR: new_seqnum[%lu] != "
 				  "old_seqnum[%lu] + (0 or 1) after failed "
 				  "TRANS3_COMMIT - this should not happen!\n",
-- 
1.7.3.4


From 757fabfc7f9ecfbc82bfe5099845b62e60623d67 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 8 Nov 2012 11:37:30 +0100
Subject: [PATCH 05/31] s3: Add db_ctdb_ltdb_parse

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   42 ++++++++++++++++++++++++++++++++++++++
 1 files changed, 42 insertions(+), 0 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index d352404..abcdd35 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -88,6 +88,48 @@ static NTSTATUS tdb_error_to_ntstatus(struct tdb_context *tdb)
 	return map_nt_error_from_tdb(tret);
 }
 
+struct db_ctdb_ltdb_parse_state {
+	void (*parser)(TDB_DATA key, struct ctdb_ltdb_header *header,
+		       TDB_DATA data, void *private_data);
+	void *private_data;
+};
+
+static int db_ctdb_ltdb_parser(TDB_DATA key, TDB_DATA data,
+			       void *private_data)
+{
+	struct db_ctdb_ltdb_parse_state *state =
+		(struct db_ctdb_ltdb_parse_state *)private_data;
+
+	if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
+		return -1;
+	}
+	state->parser(
+		key, (struct ctdb_ltdb_header *)data.dptr,
+		make_tdb_data(data.dptr + sizeof(struct ctdb_ltdb_header),
+			      data.dsize - sizeof(struct ctdb_ltdb_header)),
+		state->private_data);
+	return 0;
+}
+
+static NTSTATUS db_ctdb_ltdb_parse(
+	struct db_ctdb_ctx *db, TDB_DATA key,
+	void (*parser)(TDB_DATA key, struct ctdb_ltdb_header *header,
+		       TDB_DATA data, void *private_data),
+	void *private_data)
+{
+	struct db_ctdb_ltdb_parse_state state;
+	int ret;
+
+	state.parser = parser;
+	state.private_data = private_data;
+
+	ret = tdb_parse_record(db->wtdb->tdb, key, db_ctdb_ltdb_parser,
+			       &state);
+	if (ret == -1) {
+		return NT_STATUS_NOT_FOUND;
+	}
+	return NT_STATUS_OK;
+}
 
 /**
  * fetch a record from the tdb, separating out the header
-- 
1.7.3.4


From 4c2c694fb092e21d3fdd500467a41f5badd74d99 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 8 Nov 2012 11:52:43 +0100
Subject: [PATCH 06/31] s3: Use db_ctdb_ltdb_parse in db_ctdb_ltdb_fetch

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   76 +++++++++++++++++++++++---------------
 1 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index abcdd35..5e09e1d 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -131,6 +131,39 @@ static NTSTATUS db_ctdb_ltdb_parse(
 	return NT_STATUS_OK;
 }
 
+struct db_ctdb_ltdb_fetch_state {
+	struct ctdb_ltdb_header *header;
+	TALLOC_CTX *mem_ctx;
+	TDB_DATA *data;
+	bool oom;
+};
+
+static void db_ctdb_ltdb_fetch_parser(
+	TDB_DATA key, struct ctdb_ltdb_header *header,
+	TDB_DATA data, void *private_data)
+{
+	struct db_ctdb_ltdb_fetch_state *state =
+		(struct db_ctdb_ltdb_fetch_state *)private_data;
+
+	if (state->header != NULL) {
+		memcpy(state->header, header, sizeof(struct ctdb_ltdb_header));
+	}
+	if (state->data == NULL) {
+		return;
+	}
+	state->data->dsize = data.dsize;
+	if (data.dsize == 0) {
+		state->data->dptr = NULL;
+		return;
+	}
+	state->data->dptr = talloc_memdup(state->mem_ctx, data.dptr,
+					  data.dsize);
+	if (state->data->dptr == NULL) {
+		state->oom = true;
+		return;
+	}
+}
+
 /**
  * fetch a record from the tdb, separating out the header
  * information and returning the body of the record.
@@ -141,12 +174,17 @@ static NTSTATUS db_ctdb_ltdb_fetch(struct db_ctdb_ctx *db,
 				   TALLOC_CTX *mem_ctx,
 				   TDB_DATA *data)
 {
-	TDB_DATA rec;
+	struct db_ctdb_ltdb_fetch_state state;
 	NTSTATUS status;
 
-	rec = tdb_fetch_compat(db->wtdb->tdb, key);
-	if (rec.dsize < sizeof(struct ctdb_ltdb_header)) {
-		status = NT_STATUS_NOT_FOUND;
+	state.header = header;
+	state.mem_ctx = mem_ctx;
+	state.data = data;
+	state.oom = false;
+
+	status = db_ctdb_ltdb_parse(db, key, db_ctdb_ltdb_fetch_parser,
+				    &state);
+	if (!NT_STATUS_IS_OK(status)) {
 		if (data) {
 			ZERO_STRUCTP(data);
 		}
@@ -154,34 +192,12 @@ static NTSTATUS db_ctdb_ltdb_fetch(struct db_ctdb_ctx *db,
 			header->dmaster = (uint32_t)-1;
 			header->rsn = 0;
 		}
-		goto done;
-	}
-
-	if (header) {
-		*header = *(struct ctdb_ltdb_header *)rec.dptr;
+		return status;
 	}
-
-	if (data) {
-		data->dsize = rec.dsize - sizeof(struct ctdb_ltdb_header);
-		if (data->dsize == 0) {
-			data->dptr = NULL;
-		} else {
-			data->dptr = (unsigned char *)talloc_memdup(mem_ctx,
-					rec.dptr
-					 + sizeof(struct ctdb_ltdb_header),
-					data->dsize);
-			if (data->dptr == NULL) {
-				status = NT_STATUS_NO_MEMORY;
-				goto done;
-			}
-		}
+	if (state.oom) {
+		return NT_STATUS_NO_MEMORY;
 	}
-
-	status = NT_STATUS_OK;
-
-done:
-	SAFE_FREE(rec.dptr);
-	return status;
+	return NT_STATUS_OK;
 }
 
 /*
-- 
1.7.3.4


From dae7cd4c26c9c91c0c79b1e094b45d15a2272df6 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Thu, 8 Nov 2012 12:00:11 +0100
Subject: [PATCH 07/31] s3: Use db_ctdb_ltdb_parse in db_ctdb_fetch_db_seqnum_from_db

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   41 +++++++++++++++++++------------------
 1 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 5e09e1d..8a0f806 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -741,6 +741,19 @@ static NTSTATUS db_ctdb_delete_transaction(struct db_record *rec)
 	return status;
 }
 
+static void db_ctdb_fetch_db_seqnum_parser(
+	TDB_DATA key, struct ctdb_ltdb_header *header,
+	TDB_DATA data, void *private_data)
+{
+	uint64_t *seqnum = (uint64_t *)private_data;
+
+	if (data.dsize != sizeof(uint64_t)) {
+		*seqnum = 0;
+		return;
+	}
+	memcpy(seqnum, data.dptr, sizeof(*seqnum));
+}
+
 /**
  * Fetch the db sequence number of a persistent db directly from the db.
  */
@@ -748,36 +761,24 @@ static NTSTATUS db_ctdb_fetch_db_seqnum_from_db(struct db_ctdb_ctx *db,
 						uint64_t *seqnum)
 {
 	NTSTATUS status;
-	const char *keyname = CTDB_DB_SEQNUM_KEY;
 	TDB_DATA key;
-	TDB_DATA data;
-	struct ctdb_ltdb_header header;
-	TALLOC_CTX *mem_ctx = talloc_stackframe();
 
 	if (seqnum == NULL) {
 		return NT_STATUS_INVALID_PARAMETER;
 	}
 
-	key = string_term_tdb_data(keyname);
+	key = string_term_tdb_data(CTDB_DB_SEQNUM_KEY);
 
-	status = db_ctdb_ltdb_fetch(db, key, &header, mem_ctx, &data);
-	if (!NT_STATUS_IS_OK(status) &&
-	    !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND))
-	{
-		goto done;
-	}
-
-	status = NT_STATUS_OK;
+	status = db_ctdb_ltdb_parse(
+		db, key, db_ctdb_fetch_db_seqnum_parser, seqnum);
 
-	if (data.dsize != sizeof(uint64_t)) {
+	if (NT_STATUS_IS_OK(status)) {
+		return NT_STATUS_OK;
+	}
+	if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
 		*seqnum = 0;
-		goto done;
+		return NT_STATUS_OK;
 	}
-
-	*seqnum = *(uint64_t *)data.dptr;
-
-done:
-	TALLOC_FREE(mem_ctx);
 	return status;
 }
 
-- 
1.7.3.4


From 6234944b45cae9fec45406a962968fe6b96ed3f5 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 10 Nov 2012 14:42:21 +0100
Subject: [PATCH 08/31] s3: Slightly simplify db_ctdb_marshall_loop_next

Both callers give a key argument
---
 source3/lib/dbwrap/dbwrap_ctdb.c |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 8a0f806..43b4b28 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -339,10 +339,9 @@ static struct ctdb_rec_data *db_ctdb_marshall_loop_next(struct ctdb_marshall_buf
 		*reqid = r->reqid;
 	}
 
-	if (key != NULL) {
-		key->dptr   = &r->data[0];
-		key->dsize  = r->keylen;
-	}
+	key->dptr   = &r->data[0];
+	key->dsize  = r->keylen;
+
 	if (data != NULL) {
 		data->dptr  = &r->data[r->keylen];
 		data->dsize = r->datalen;
-- 
1.7.3.4


From e6c676b5ada3258a93f034bd4e86dde82dabf6b2 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 10 Nov 2012 14:46:10 +0100
Subject: [PATCH 09/31] s3: Factor out db_ctdb_marshall_loop_next_key from db_ctdb_marshall_loop_next

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   33 ++++++++++++++++++++++++---------
 1 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 43b4b28..567521c 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -324,24 +324,40 @@ static TDB_DATA db_ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
      - pass r==NULL to start
      - loop the number of times indicated by m->count
 */
+static struct ctdb_rec_data *db_ctdb_marshall_loop_next_key(
+	struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r, TDB_DATA *key)
+{
+	if (r == NULL) {
+		r = (struct ctdb_rec_data *)&m->data[0];
+	} else {
+		r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
+	}
+
+	key->dptr   = &r->data[0];
+	key->dsize  = r->keylen;
+	return r;
+}
+
+/*
+   loop over a marshalling buffer
+
+     - pass r==NULL to start
+     - loop the number of times indicated by m->count
+*/
 static struct ctdb_rec_data *db_ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
 						     uint32_t *reqid,
 						     struct ctdb_ltdb_header *header,
 						     TDB_DATA *key, TDB_DATA *data)
 {
+	r = db_ctdb_marshall_loop_next_key(m, r, key);
 	if (r == NULL) {
-		r = (struct ctdb_rec_data *)&m->data[0];
-	} else {
-		r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
+		return NULL;
 	}
 
 	if (reqid != NULL) {
 		*reqid = r->reqid;
 	}
 
-	key->dptr   = &r->data[0];
-	key->dsize  = r->keylen;
-
 	if (data != NULL) {
 		data->dptr  = &r->data[r->keylen];
 		data->dsize = r->datalen;
@@ -1438,9 +1454,8 @@ static int db_ctdb_traverse(struct db_context *db,
 
 			for (i=0; i<mbuf->count; i++) {
 				TDB_DATA key;
-				rec =db_ctdb_marshall_loop_next(mbuf, rec,
-								NULL, NULL,
-								&key, NULL);
+				rec =db_ctdb_marshall_loop_next_key(
+					mbuf, rec, &key);
 				SMB_ASSERT(rec != NULL);
 
 				if (!tdb_exists(ltdb, key)) {
-- 
1.7.3.4


From ca1799e9c7474f0377a7bc88168ee1a2a54b9013 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 10 Nov 2012 15:03:35 +0100
Subject: [PATCH 10/31] s3: Factor out db_ctdb_marshall_buf_parse from db_ctdb_marshall_buf_next

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   69 +++++++++++--------------------------
 1 files changed, 21 insertions(+), 48 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 567521c..703f29a 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -338,43 +338,22 @@ static struct ctdb_rec_data *db_ctdb_marshall_loop_next_key(
 	return r;
 }
 
-/*
-   loop over a marshalling buffer
-
-     - pass r==NULL to start
-     - loop the number of times indicated by m->count
-*/
-static struct ctdb_rec_data *db_ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
-						     uint32_t *reqid,
-						     struct ctdb_ltdb_header *header,
-						     TDB_DATA *key, TDB_DATA *data)
+static bool db_ctdb_marshall_buf_parse(
+	struct ctdb_rec_data *r, uint32_t *reqid,
+	struct ctdb_ltdb_header **header, TDB_DATA *data)
 {
-	r = db_ctdb_marshall_loop_next_key(m, r, key);
-	if (r == NULL) {
-		return NULL;
+	if (r->datalen < sizeof(struct ctdb_ltdb_header)) {
+		return false;
 	}
 
-	if (reqid != NULL) {
-		*reqid = r->reqid;
-	}
+	*reqid = r->reqid;
 
-	if (data != NULL) {
-		data->dptr  = &r->data[r->keylen];
-		data->dsize = r->datalen;
-		if (header != NULL) {
-			data->dptr += sizeof(*header);
-			data->dsize -= sizeof(*header);
-		}
-	}
+	data->dptr  = &r->data[r->keylen] + sizeof(struct ctdb_ltdb_header);
+	data->dsize = r->datalen - sizeof(struct ctdb_ltdb_header);
 
-	if (header != NULL) {
-		if (r->datalen < sizeof(*header)) {
-			return NULL;
-		}
-		*header = *(struct ctdb_ltdb_header *)&r->data[r->keylen];
-	}
+	*header = (struct ctdb_ltdb_header *)&r->data[r->keylen];
 
-	return r;
+	return true;
 }
 
 /**
@@ -460,8 +439,7 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
 					     TDB_DATA *pdata)
 {
 	struct ctdb_rec_data *rec = NULL;
-	struct ctdb_ltdb_header h;
-	bool found = false;
+	struct ctdb_ltdb_header *h = NULL;
 	TDB_DATA data;
 	int i;
 
@@ -469,9 +447,6 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
 		return false;
 	}
 
-	ZERO_STRUCT(h);
-	ZERO_STRUCT(data);
-
 	/*
 	 * Walk the list of records written during this
 	 * transaction. If we want to read one we have already
@@ -481,26 +456,24 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
 	 */
 
 	for (i=0; i<buf->count; i++) {
-		TDB_DATA tkey, tdata;
+		TDB_DATA tkey;
 		uint32_t reqid;
-		struct ctdb_ltdb_header hdr;
-
-		ZERO_STRUCT(hdr);
 
-		rec = db_ctdb_marshall_loop_next(buf, rec, &reqid, &hdr, &tkey,
-						 &tdata);
+		rec = db_ctdb_marshall_loop_next_key(buf, rec, &tkey);
 		if (rec == NULL) {
 			return false;
 		}
 
-		if (tdb_data_equal(key, tkey)) {
-			found = true;
-			data = tdata;
-			h = hdr;
+		if (!tdb_data_equal(key, tkey)) {
+			continue;
+		}
+
+		if (!db_ctdb_marshall_buf_parse(rec, &reqid, &h, &data)) {
+			return false;
 		}
 	}
 
-	if (!found) {
+	if (h == NULL) {
 		return false;
 	}
 
@@ -514,7 +487,7 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
 	}
 
 	if (pheader != NULL) {
-		*pheader = h;
+		*pheader = *h;
 	}
 
 	return true;
-- 
1.7.3.4


From 6bc93755a9298305414fb559e276bc43a2d8e2a3 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 12 Nov 2012 12:13:39 +0100
Subject: [PATCH 11/31] s3: Factor out parse_newest_in_marshall_buffer from pull_newest_from_marshall_buffer

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   66 +++++++++++++++++++++++++++++--------
 1 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 703f29a..160a97d 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -432,11 +432,11 @@ static int db_ctdb_transaction_start(struct db_context *db)
 	return 0;
 }
 
-static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
-					     TDB_DATA key,
-					     struct ctdb_ltdb_header *pheader,
-					     TALLOC_CTX *mem_ctx,
-					     TDB_DATA *pdata)
+static bool parse_newest_in_marshall_buffer(
+	struct ctdb_marshall_buffer *buf, TDB_DATA key,
+	void (*parser)(TDB_DATA key, struct ctdb_ltdb_header *header,
+		       TDB_DATA data, void *private_data),
+	void *private_data)
 {
 	struct ctdb_rec_data *rec = NULL;
 	struct ctdb_ltdb_header *h = NULL;
@@ -477,19 +477,55 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
 		return false;
 	}
 
-	if (pdata != NULL) {
-		data.dptr = (uint8_t *)talloc_memdup(mem_ctx, data.dptr,
-						     data.dsize);
-		if ((data.dsize != 0) && (data.dptr == NULL)) {
-			return false;
-		}
-		*pdata = data;
-	}
+	parser(key, h, data, private_data);
+
+	return true;
+}
+
+struct pull_newest_from_marshall_buffer_state {
+	struct ctdb_ltdb_header *pheader;
+	TALLOC_CTX *mem_ctx;
+	TDB_DATA *pdata;
+};
+
+static void pull_newest_from_marshall_buffer_parser(
+	TDB_DATA key, struct ctdb_ltdb_header *header,
+	TDB_DATA data, void *private_data)
+{
+	struct pull_newest_from_marshall_buffer_state *state =
+		(struct pull_newest_from_marshall_buffer_state *)private_data;
 
-	if (pheader != NULL) {
-		*pheader = *h;
+	if (state->pheader != NULL) {
+		memcpy(state->pheader, header, sizeof(*state->pheader));
 	}
+	if (state->pdata != NULL) {
+		state->pdata->dsize = data.dsize;
+		state->pdata->dptr = (uint8_t *)talloc_memdup(
+			state->mem_ctx, data.dptr, data.dsize);
+	}
+}
+
+static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
+					     TDB_DATA key,
+					     struct ctdb_ltdb_header *pheader,
+					     TALLOC_CTX *mem_ctx,
+					     TDB_DATA *pdata)
+{
+	struct pull_newest_from_marshall_buffer_state state;
+
+	state.pheader = pheader;
+	state.mem_ctx = mem_ctx;
+	state.pdata = pdata;
 
+	if (!parse_newest_in_marshall_buffer(
+		    buf, key, pull_newest_from_marshall_buffer_parser,
+		    &state)) {
+		return false;
+	}
+	if ((pdata != NULL) && (pdata->dsize != 0) && (pdata->dptr == NULL)) {
+		/* ENOMEM */
+		return false;
+	}
 	return true;
 }
 
-- 
1.7.3.4


From ed91d5ce3f55f30c4bb13af071ae9fbd2b28bc19 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 12 Nov 2012 13:03:56 +0100
Subject: [PATCH 12/31] s3: Avoid db_ctdb_fetch for persistent databases

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   44 ++++++++++++++++++++++++++++++++++++++
 1 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 160a97d..6bb5392 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -1347,15 +1347,59 @@ static NTSTATUS db_ctdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx,
 	return status;
 }
 
+struct db_ctdb_parse_record_state {
+	void (*parser)(TDB_DATA key, TDB_DATA data, void *private_data);
+	void *private_data;
+};
+
+static void db_ctdb_parse_record_parser(
+	TDB_DATA key, struct ctdb_ltdb_header *header,
+	TDB_DATA data, void *private_data)
+{
+	struct db_ctdb_parse_record_state *state =
+		(struct db_ctdb_parse_record_state *)private_data;
+	state->parser(key, data, state->private_data);
+}
+
 static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 				     void (*parser)(TDB_DATA key,
 						    TDB_DATA data,
 						    void *private_data),
 				     void *private_data)
 {
+	struct db_ctdb_ctx *ctx = talloc_get_type_abort(
+		db->private_data, struct db_ctdb_ctx);
+	struct db_ctdb_parse_record_state state;
 	NTSTATUS status;
 	TDB_DATA data;
 
+	state.parser = parser;
+	state.private_data = private_data;
+
+	if (ctx->transaction != NULL) {
+		struct db_ctdb_transaction_handle *h = ctx->transaction;
+		bool found;
+
+		/*
+		 * Transactions only happen for persistent db's.
+		 */
+
+		found = parse_newest_in_marshall_buffer(
+			h->m_write, key, db_ctdb_parse_record_parser, &state);
+
+		if (found) {
+			return NT_STATUS_OK;
+		}
+	}
+
+	if (db->persistent) {
+		/*
+		 * Persistent db, but not found in the transaction buffer
+		 */
+		return db_ctdb_ltdb_parse(
+			ctx, key, db_ctdb_parse_record_parser, &state);
+	}
+
 	status = db_ctdb_fetch(db, talloc_tos(), key, &data);
 	if (!NT_STATUS_IS_OK(status)) {
 		return status;
-- 
1.7.3.4


From 3a53bb3b6928a27d6e346e8309f580ca433e656a Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 12 Nov 2012 13:06:38 +0100
Subject: [PATCH 13/31] s3: Remove unused code for fetching persistent ctdb records

The only entry point here is parse_record, and this catches the persistent
case with a direct parse now
---
 source3/lib/dbwrap/dbwrap_ctdb.c |  129 --------------------------------------
 1 files changed, 0 insertions(+), 129 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 6bb5392..47d3b42 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -131,75 +131,6 @@ static NTSTATUS db_ctdb_ltdb_parse(
 	return NT_STATUS_OK;
 }
 
-struct db_ctdb_ltdb_fetch_state {
-	struct ctdb_ltdb_header *header;
-	TALLOC_CTX *mem_ctx;
-	TDB_DATA *data;
-	bool oom;
-};
-
-static void db_ctdb_ltdb_fetch_parser(
-	TDB_DATA key, struct ctdb_ltdb_header *header,
-	TDB_DATA data, void *private_data)
-{
-	struct db_ctdb_ltdb_fetch_state *state =
-		(struct db_ctdb_ltdb_fetch_state *)private_data;
-
-	if (state->header != NULL) {
-		memcpy(state->header, header, sizeof(struct ctdb_ltdb_header));
-	}
-	if (state->data == NULL) {
-		return;
-	}
-	state->data->dsize = data.dsize;
-	if (data.dsize == 0) {
-		state->data->dptr = NULL;
-		return;
-	}
-	state->data->dptr = talloc_memdup(state->mem_ctx, data.dptr,
-					  data.dsize);
-	if (state->data->dptr == NULL) {
-		state->oom = true;
-		return;
-	}
-}
-
-/**
- * fetch a record from the tdb, separating out the header
- * information and returning the body of the record.
- */
-static NTSTATUS db_ctdb_ltdb_fetch(struct db_ctdb_ctx *db,
-				   TDB_DATA key,
-				   struct ctdb_ltdb_header *header,
-				   TALLOC_CTX *mem_ctx,
-				   TDB_DATA *data)
-{
-	struct db_ctdb_ltdb_fetch_state state;
-	NTSTATUS status;
-
-	state.header = header;
-	state.mem_ctx = mem_ctx;
-	state.data = data;
-	state.oom = false;
-
-	status = db_ctdb_ltdb_parse(db, key, db_ctdb_ltdb_fetch_parser,
-				    &state);
-	if (!NT_STATUS_IS_OK(status)) {
-		if (data) {
-			ZERO_STRUCTP(data);
-		}
-		if (header) {
-			header->dmaster = (uint32_t)-1;
-			header->rsn = 0;
-		}
-		return status;
-	}
-	if (state.oom) {
-		return NT_STATUS_NO_MEMORY;
-	}
-	return NT_STATUS_OK;
-}
-
 /*
  * Store a record together with the ctdb record header
  * in the local copy of the database.
@@ -529,58 +460,6 @@ static bool pull_newest_from_marshall_buffer(struct ctdb_marshall_buffer *buf,
 	return true;
 }
 
-/*
-  fetch a record inside a transaction
- */
-static NTSTATUS db_ctdb_transaction_fetch(struct db_ctdb_ctx *db,
-					  TALLOC_CTX *mem_ctx,
-					  TDB_DATA key, TDB_DATA *data)
-{
-	struct db_ctdb_transaction_handle *h = db->transaction;
-	NTSTATUS status;
-	bool found;
-
-	found = pull_newest_from_marshall_buffer(h->m_write, key, NULL,
-						 mem_ctx, data);
-	if (found) {
-		return NT_STATUS_OK;
-	}
-
-	status = db_ctdb_ltdb_fetch(h->ctx, key, NULL, mem_ctx, data);
-
-	if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
-		*data = tdb_null;
-	}
-
-	return status;
-}
-
-/**
- * Fetch a record from a persistent database
- * without record locking and without an active transaction.
- *
- * This just fetches from the local database copy.
- * Since the databases are kept in syc cluster-wide,
- * there is no point in doing a ctdb call to fetch the
- * record from the lmaster. It does even harm since migration
- * of records bump their RSN and hence render the persistent
- * database inconsistent.
- */
-static NTSTATUS db_ctdb_fetch_persistent(struct db_ctdb_ctx *db,
-					 TALLOC_CTX *mem_ctx,
-					 TDB_DATA key, TDB_DATA *data)
-{
-	NTSTATUS status;
-
-	status = db_ctdb_ltdb_fetch(db, key, NULL, mem_ctx, data);
-
-	if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
-		*data = tdb_null;
-	}
-
-	return status;
-}
-
 static NTSTATUS db_ctdb_store_transaction(struct db_record *rec, TDB_DATA data, int flag);
 static NTSTATUS db_ctdb_delete_transaction(struct db_record *rec);
 
@@ -1296,14 +1175,6 @@ static NTSTATUS db_ctdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx,
 	NTSTATUS status;
 	TDB_DATA ctdb_data;
 
-	if (ctx->transaction) {
-		return db_ctdb_transaction_fetch(ctx, mem_ctx, key, data);
-	}
-
-	if (db->persistent) {
-		return db_ctdb_fetch_persistent(ctx, mem_ctx, key, data);
-	}
-
 	/* try a direct fetch */
 	ctdb_data = tdb_fetch_compat(ctx->wtdb->tdb, key);
 
-- 
1.7.3.4


From 6e9200b687a6a266e29190957c8033b146037032 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 12 Nov 2012 13:27:07 +0100
Subject: [PATCH 14/31] s3: Factor out db_ctdb_can_use_local_hdr from db_ctdb_can_use_local_copy

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   25 ++++++++++++++-----------
 1 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 47d3b42..974c337 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -970,18 +970,9 @@ static int db_ctdb_record_destr(struct db_record* data)
  * Check whether we have a valid local copy of the given record,
  * either for reading or for writing.
  */
-static bool db_ctdb_can_use_local_copy(TDB_DATA ctdb_data, bool read_only)
+static bool db_ctdb_can_use_local_hdr(const struct ctdb_ltdb_header *hdr,
+				      bool read_only)
 {
-	struct ctdb_ltdb_header *hdr;
-
-	if (ctdb_data.dptr == NULL)
-		return false;
-
-	if (ctdb_data.dsize < sizeof(struct ctdb_ltdb_header))
-		return false;
-
-	hdr = (struct ctdb_ltdb_header *)ctdb_data.dptr;
-
 #ifdef HAVE_CTDB_WANT_READONLY_DECL
 	if (hdr->dmaster != get_my_vnn()) {
 		/* If we're not dmaster, it must be r/o copy. */
@@ -997,6 +988,18 @@ static bool db_ctdb_can_use_local_copy(TDB_DATA ctdb_data, bool read_only)
 #endif
 }
 
+static bool db_ctdb_can_use_local_copy(TDB_DATA ctdb_data, bool read_only)
+{
+	if (ctdb_data.dptr == NULL)
+		return false;
+
+	if (ctdb_data.dsize < sizeof(struct ctdb_ltdb_header))
+		return false;
+
+	return db_ctdb_can_use_local_hdr(
+		(struct ctdb_ltdb_header *)ctdb_data.dptr, read_only);
+}
+
 static struct db_record *fetch_locked_internal(struct db_ctdb_ctx *ctx,
 					       TALLOC_CTX *mem_ctx,
 					       TDB_DATA key,
-- 
1.7.3.4


From 12f445f9e5f5514062947391a7645637c4a2330a Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 12 Nov 2012 13:36:48 +0100
Subject: [PATCH 15/31] s3: Directly parse local existing records in db_ctdb_parse_record

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   22 ++++++++++++++++++++++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 974c337..67109f5 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -1224,6 +1224,7 @@ static NTSTATUS db_ctdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx,
 struct db_ctdb_parse_record_state {
 	void (*parser)(TDB_DATA key, TDB_DATA data, void *private_data);
 	void *private_data;
+	bool done;
 };
 
 static void db_ctdb_parse_record_parser(
@@ -1235,6 +1236,19 @@ static void db_ctdb_parse_record_parser(
 	state->parser(key, data, state->private_data);
 }
 
+static void db_ctdb_parse_record_parser_nonpersistent(
+	TDB_DATA key, struct ctdb_ltdb_header *header,
+	TDB_DATA data, void *private_data)
+{
+	struct db_ctdb_parse_record_state *state =
+		(struct db_ctdb_parse_record_state *)private_data;
+
+	if (db_ctdb_can_use_local_hdr(header, true)) {
+		state->parser(key, data, state->private_data);
+		state->done = true;
+	}
+}
+
 static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 				     void (*parser)(TDB_DATA key,
 						    TDB_DATA data,
@@ -1274,6 +1288,14 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 			ctx, key, db_ctdb_parse_record_parser, &state);
 	}
 
+	state.done = false;
+
+	status = db_ctdb_ltdb_parse(
+		ctx, key, db_ctdb_parse_record_parser_nonpersistent, &state);
+	if (NT_STATUS_IS_OK(status) && state.done) {
+		return NT_STATUS_OK;
+	}
+
 	status = db_ctdb_fetch(db, talloc_tos(), key, &data);
 	if (!NT_STATUS_IS_OK(status)) {
 		return status;
-- 
1.7.3.4


From 0d50dfea6e65720482b05f5328419bca702f4bcd Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Mon, 12 Nov 2012 13:42:23 +0100
Subject: [PATCH 16/31] s3: Remove db_ctdb_fetch

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   66 ++++++-------------------------------
 1 files changed, 11 insertions(+), 55 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 67109f5..975cab6 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -1167,63 +1167,10 @@ static struct db_record *db_ctdb_try_fetch_locked(struct db_context *db,
 	return fetch_locked_internal(ctx, mem_ctx, key, true);
 }
 
-/*
-  fetch (unlocked, no migration) operation on ctdb
- */
-static NTSTATUS db_ctdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx,
-			      TDB_DATA key, TDB_DATA *data)
-{
-	struct db_ctdb_ctx *ctx = talloc_get_type_abort(db->private_data,
-							struct db_ctdb_ctx);
-	NTSTATUS status;
-	TDB_DATA ctdb_data;
-
-	/* try a direct fetch */
-	ctdb_data = tdb_fetch_compat(ctx->wtdb->tdb, key);
-
-	/*
-	 * See if we have a valid record and we are the dmaster. If so, we can
-	 * take the shortcut and just return it.
-	 * we bypass the dmaster check for persistent databases
-	 */
-	if (db_ctdb_can_use_local_copy(ctdb_data, true)) {
-		/*
-		 * We have a valid local copy - avoid the ctdb protocol op
-		 */
-		data->dsize = ctdb_data.dsize - sizeof(struct ctdb_ltdb_header);
-
-		data->dptr = (uint8_t *)talloc_memdup(
-			mem_ctx, ctdb_data.dptr+sizeof(struct ctdb_ltdb_header),
-			data->dsize);
-
-		SAFE_FREE(ctdb_data.dptr);
-
-		if (data->dptr == NULL) {
-			return NT_STATUS_NO_MEMORY;
-		}
-		return NT_STATUS_OK;
-	}
-
-	SAFE_FREE(ctdb_data.dptr);
-
-	/*
-	 * We weren't able to get it locally - ask ctdb to fetch it for us.
-	 * If we already had *something*, it's probably worth making a local
-	 * read-only copy.
-	 */
-	status = ctdbd_fetch(messaging_ctdbd_connection(), ctx->db_id, key,
-			     mem_ctx, data,
-			     ctdb_data.dsize >= sizeof(struct ctdb_ltdb_header));
-	if (!NT_STATUS_IS_OK(status)) {
-		DEBUG(5, ("ctdbd_fetch failed: %s\n", nt_errstr(status)));
-	}
-
-	return status;
-}
-
 struct db_ctdb_parse_record_state {
 	void (*parser)(TDB_DATA key, TDB_DATA data, void *private_data);
 	void *private_data;
+	bool ask_for_readonly_copy;
 	bool done;
 };
 
@@ -1246,6 +1193,13 @@ static void db_ctdb_parse_record_parser_nonpersistent(
 	if (db_ctdb_can_use_local_hdr(header, true)) {
 		state->parser(key, data, state->private_data);
 		state->done = true;
+	} else {
+		/*
+		 * We found something in the db, so it seems that this record,
+		 * while not usable locally right now, is popular. Ask for a
+		 * R/O copy.
+		 */
+		state->ask_for_readonly_copy = true;
 	}
 }
 
@@ -1289,6 +1243,7 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 	}
 
 	state.done = false;
+	state.ask_for_readonly_copy = false;
 
 	status = db_ctdb_ltdb_parse(
 		ctx, key, db_ctdb_parse_record_parser_nonpersistent, &state);
@@ -1296,7 +1251,8 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 		return NT_STATUS_OK;
 	}
 
-	status = db_ctdb_fetch(db, talloc_tos(), key, &data);
+	status = ctdbd_fetch(messaging_ctdbd_connection(), ctx->db_id, key,
+			     talloc_tos(), &data, state.ask_for_readonly_copy);
 	if (!NT_STATUS_IS_OK(status)) {
 		return status;
 	}
-- 
1.7.3.4


From 3180941742aac70e48a07dd8810984febff32237 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Fri, 23 Nov 2012 17:54:57 +0100
Subject: [PATCH 17/31] s3: Add ctdbd_parse

---
 source3/include/ctdbd_conn.h |    5 +++
 source3/lib/ctdbd_conn.c     |   71 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 0 deletions(-)

diff --git a/source3/include/ctdbd_conn.h b/source3/include/ctdbd_conn.h
index 5778a92..295d41e 100644
--- a/source3/include/ctdbd_conn.h
+++ b/source3/include/ctdbd_conn.h
@@ -65,6 +65,11 @@ NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id,
 NTSTATUS ctdbd_fetch(struct ctdbd_connection *conn, uint32_t db_id,
 		     TDB_DATA key, TALLOC_CTX *mem_ctx, TDB_DATA *data,
 		     bool local_copy);
+NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
+		     TDB_DATA key, bool local_copy,
+		     void (*parser)(TDB_DATA key, TDB_DATA data,
+				    void *private_data),
+		     void *private_data);
 
 NTSTATUS ctdbd_traverse(uint32_t db_id,
 			void (*fn)(TDB_DATA key, TDB_DATA data,
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index 84f26e0..3e1f331 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -1494,6 +1494,77 @@ NTSTATUS ctdbd_fetch(struct ctdbd_connection *conn, uint32_t db_id,
 	return status;
 }
 
+/*
+ * Fetch a record and parse it
+ */
+NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
+		     TDB_DATA key, bool local_copy,
+		     void (*parser)(TDB_DATA key, TDB_DATA data,
+				    void *private_data),
+		     void *private_data)
+{
+	struct ctdb_req_call req;
+	struct ctdb_reply_call *reply;
+	NTSTATUS status;
+	uint32_t flags;
+
+#ifdef HAVE_CTDB_WANT_READONLY_DECL
+	flags = local_copy ? CTDB_WANT_READONLY : 0;
+#else
+	flags = 0;
+#endif
+
+	ZERO_STRUCT(req);
+
+	req.hdr.length = offsetof(struct ctdb_req_call, data) + key.dsize;
+	req.hdr.ctdb_magic   = CTDB_MAGIC;
+	req.hdr.ctdb_version = CTDB_VERSION;
+	req.hdr.operation    = CTDB_REQ_CALL;
+	req.hdr.reqid        = ctdbd_next_reqid(conn);
+	req.flags            = flags;
+	req.callid           = CTDB_FETCH_FUNC;
+	req.db_id            = db_id;
+	req.keylen           = key.dsize;
+
+	status = ctdb_packet_send(
+		conn->pkt, 2,
+		data_blob_const(&req, offsetof(struct ctdb_req_call, data)),
+		data_blob_const(key.dptr, key.dsize));
+
+	if (!NT_STATUS_IS_OK(status)) {
+		DEBUG(3, ("ctdb_packet_send failed: %s\n", nt_errstr(status)));
+		return status;
+	}
+
+	status = ctdb_packet_flush(conn->pkt);
+
+	if (!NT_STATUS_IS_OK(status)) {
+		DEBUG(3, ("write to ctdbd failed: %s\n", nt_errstr(status)));
+		cluster_fatal("cluster dispatch daemon control write error\n");
+	}
+
+	status = ctdb_read_req(conn, req.hdr.reqid, NULL, (void *)&reply);
+
+	if (!NT_STATUS_IS_OK(status)) {
+		DEBUG(0, ("ctdb_read_req failed: %s\n", nt_errstr(status)));
+		goto fail;
+	}
+
+	if (reply->hdr.operation != CTDB_REPLY_CALL) {
+		DEBUG(0, ("received invalid reply\n"));
+		status = NT_STATUS_INTERNAL_ERROR;
+		goto fail;
+	}
+
+	parser(key, make_tdb_data(&reply->data[0], reply->datalen),
+	       private_data);
+
+	status = NT_STATUS_OK;
+ fail:
+	TALLOC_FREE(reply);
+	return status;
+}
+
 struct ctdbd_traverse_state {
 	void (*fn)(TDB_DATA key, TDB_DATA data, void *private_data);
 	void *private_data;
-- 
1.7.3.4


From 3cc61e793348cfb13ab9b90455cff07caed64969 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 24 Nov 2012 14:14:37 +0000
Subject: [PATCH 18/31] s3: Use ctdbd_parse in db_ctdb_parse_record

---
 source3/lib/dbwrap/dbwrap_ctdb.c |   11 ++---------
 1 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 975cab6..4bfd57c 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -1213,7 +1213,6 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 		db->private_data, struct db_ctdb_ctx);
 	struct db_ctdb_parse_record_state state;
 	NTSTATUS status;
-	TDB_DATA data;
 
 	state.parser = parser;
 	state.private_data = private_data;
@@ -1251,14 +1250,8 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 		return NT_STATUS_OK;
 	}
 
-	status = ctdbd_fetch(messaging_ctdbd_connection(), ctx->db_id, key,
-			     talloc_tos(), &data, state.ask_for_readonly_copy);
-	if (!NT_STATUS_IS_OK(status)) {
-		return status;
-	}
-	parser(key, data, private_data);
-	TALLOC_FREE(data.dptr);
-	return NT_STATUS_OK;
+	return ctdbd_parse(messaging_ctdbd_connection(), ctx->db_id, key,
+			   state.ask_for_readonly_copy, parser, private_data);
 }
 
 struct traverse_state {
-- 
1.7.3.4


From f302cd4fc3a78c49b6cd42c7ebc9b5ecee19fe41 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 24 Nov 2012 14:15:38 +0000
Subject: [PATCH 19/31] s3: remove ctdbd_fetch

---
 source3/include/ctdbd_conn.h |    3 --
 source3/lib/ctdbd_conn.c     |   81 ------------------------------------------
 2 files changed, 0 insertions(+), 84 deletions(-)

diff --git a/source3/include/ctdbd_conn.h b/source3/include/ctdbd_conn.h
index 295d41e..64cb1d5 100644
--- a/source3/include/ctdbd_conn.h
+++ b/source3/include/ctdbd_conn.h
@@ -62,9 +62,6 @@ NTSTATUS ctdbd_db_attach(struct ctdbd_connection *conn, const char *name,
 NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id,
 		       TDB_DATA key);
 
-NTSTATUS ctdbd_fetch(struct ctdbd_connection *conn, uint32_t db_id,
-		     TDB_DATA key, TALLOC_CTX *mem_ctx, TDB_DATA *data,
-		     bool local_copy);
 NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
 		     TDB_DATA key, bool local_copy,
 		     void (*parser)(TDB_DATA key, TDB_DATA data,
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index 3e1f331..3905f7a 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -1414,87 +1414,6 @@ NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id,
 }
 
 /*
- * remotely fetch a record (read-only)
- */
-NTSTATUS ctdbd_fetch(struct ctdbd_connection *conn, uint32_t db_id,
-		     TDB_DATA key, TALLOC_CTX *mem_ctx, TDB_DATA *data,
-		     bool local_copy)
-{
-	struct ctdb_req_call req;
-	struct ctdb_reply_call *reply;
-	NTSTATUS status;
-	uint32_t flags;
-
-#ifdef HAVE_CTDB_WANT_READONLY_DECL
-	flags = local_copy ? CTDB_WANT_READONLY : 0;
-#else
-	flags = 0;
-#endif
-
-	ZERO_STRUCT(req);
-
-	req.hdr.length = offsetof(struct ctdb_req_call, data) + key.dsize;
-	req.hdr.ctdb_magic   = CTDB_MAGIC;
-	req.hdr.ctdb_version = CTDB_VERSION;
-	req.hdr.operation    = CTDB_REQ_CALL;
-	req.hdr.reqid        = ctdbd_next_reqid(conn);
-	req.flags            = flags;
-	req.callid           = CTDB_FETCH_FUNC;
-	req.db_id            = db_id;
-	req.keylen           = key.dsize;
-
-	status = ctdb_packet_send(
-		conn->pkt, 2,
-		data_blob_const(&req, offsetof(struct ctdb_req_call, data)),
-		data_blob_const(key.dptr, key.dsize));
-
-	if (!NT_STATUS_IS_OK(status)) {
-		DEBUG(3, ("ctdb_packet_send failed: %s\n", nt_errstr(status)));
-		return status;
-	}
-
-	status = ctdb_packet_flush(conn->pkt);
-
-	if (!NT_STATUS_IS_OK(status)) {
-		DEBUG(3, ("write to ctdbd failed: %s\n", nt_errstr(status)));
-		cluster_fatal("cluster dispatch daemon control write error\n");
-	}
-
-	status = ctdb_read_req(conn, req.hdr.reqid, NULL, (void *)&reply);
-
-	if (!NT_STATUS_IS_OK(status)) {
-		DEBUG(0, ("ctdb_read_req failed: %s\n", nt_errstr(status)));
-		goto fail;
-	}
-
-	if (reply->hdr.operation != CTDB_REPLY_CALL) {
-		DEBUG(0, ("received invalid reply\n"));
-		status = NT_STATUS_INTERNAL_ERROR;
-		goto fail;
-	}
-
-	data->dsize = reply->datalen;
-	if (data->dsize == 0) {
-		data->dptr = NULL;
-		goto done;
-	}
-
-	data->dptr = (uint8 *)talloc_memdup(mem_ctx, &reply->data[0],
-					    reply->datalen);
-	if (data->dptr == NULL) {
-		DEBUG(0, ("talloc failed\n"));
-		status = NT_STATUS_NO_MEMORY;
-		goto fail;
-	}
-
- done:
-	status = NT_STATUS_OK;
- fail:
-	TALLOC_FREE(reply);
-	return status;
-}
-
-/*
  * Fetch a record and parse it
  */
 NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
-- 
1.7.3.4


From 07ca49fa19e23280ac3237ef71b68115b7011ccb Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 24 Nov 2012 14:42:06 +0000
Subject: [PATCH 20/31] s3: Avoid a talloc_stackframe()

We have only a single allocation in this routine, so I think we can live
without a stackframe.
---
 source3/lib/dbwrap/dbwrap_ctdb.c |    6 ++----
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 4bfd57c..0e8b535 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -140,15 +140,13 @@ static NTSTATUS db_ctdb_ltdb_store(struct db_ctdb_ctx *db,
 				   struct ctdb_ltdb_header *header,
 				   TDB_DATA data)
 {
-	TALLOC_CTX *tmp_ctx = talloc_stackframe();
 	TDB_DATA rec;
 	int ret;
 
 	rec.dsize = data.dsize + sizeof(struct ctdb_ltdb_header);
-	rec.dptr = (uint8_t *)talloc_size(tmp_ctx, rec.dsize);
+	rec.dptr = (uint8_t *)talloc_size(talloc_tos(), rec.dsize);
 
 	if (rec.dptr == NULL) {
-		talloc_free(tmp_ctx);
 		return NT_STATUS_NO_MEMORY;
 	}
 
@@ -157,7 +155,7 @@ static NTSTATUS db_ctdb_ltdb_store(struct db_ctdb_ctx *db,
 
 	ret = tdb_store(db->wtdb->tdb, key, rec, TDB_REPLACE);
 
-	talloc_free(tmp_ctx);
+	talloc_free(rec.dptr);
 
 	return (ret == 0) ? NT_STATUS_OK
 			  : tdb_error_to_ntstatus(db->wtdb->tdb);
-- 
1.7.3.4


From d843eb44a8608a7dd3207778e30e674ab476cd6c Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 24 Nov 2012 14:51:02 +0000
Subject: [PATCH 21/31] s3: Use tdb_null in db_ctdb_delete

---
 source3/lib/dbwrap/dbwrap_ctdb.c |    5 +----
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 0e8b535..7fed68d 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -909,7 +909,6 @@ static NTSTATUS db_ctdb_send_schedule_for_deletion(struct db_record *rec)
 
 static NTSTATUS db_ctdb_delete(struct db_record *rec)
 {
-	TDB_DATA data;
 	NTSTATUS status;
 
 	/*
@@ -917,9 +916,7 @@ static NTSTATUS db_ctdb_delete(struct db_record *rec)
 	 * tdb-level cleanup
 	 */
 
-	ZERO_STRUCT(data);
-
-	status = db_ctdb_store(rec, data, 0);
+	status = db_ctdb_store(rec, tdb_null, 0);
 	if (!NT_STATUS_IS_OK(status)) {
 		return status;
 	}
-- 
1.7.3.4


From 2b594a57c86e91abba75a0998bc63ed7f630ab38 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 30 Oct 2012 17:17:33 +0100
Subject: [PATCH 22/31] dbwrap: Add dbwrap_parse_records

This is preparatory work for tuning the notify implementation in the
clustered case. For notify, have to read a lot of records, most of which
probably do not exist. Figuring out that these records do not exist
takes a while in the ctdb case. By implementing dbwrap_parse_records
we will give dbwrap_ctdb.c the chance to fire off all fetch requests
simultaneously and only have one real round-trip into ctdbd.
---
 lib/dbwrap/dbwrap.c |   41 +++++++++++++++++++++++++++++++++++++++++
 lib/dbwrap/dbwrap.h |    7 +++++++
 2 files changed, 48 insertions(+), 0 deletions(-)

diff --git a/lib/dbwrap/dbwrap.c b/lib/dbwrap/dbwrap.c
index 0e0422c..165b729 100644
--- a/lib/dbwrap/dbwrap.c
+++ b/lib/dbwrap/dbwrap.c
@@ -424,6 +424,47 @@ NTSTATUS dbwrap_parse_record(struct db_context *db, TDB_DATA key,
 	return db->parse_record(db, key, parser, private_data);
 }
 
+struct dbwrap_parse_records_state {
+	void (*parser)(TDB_DATA key, TDB_DATA data,
+		       unsigned key_index, void *private_data);
+	void *private_data;
+	unsigned idx;
+};
+
+static void dbwrap_parse_records_parser(TDB_DATA key, TDB_DATA data,
+					void *private_data)
+{
+	struct dbwrap_parse_records_state *state =
+		(struct dbwrap_parse_records_state *)private_data;
+	state->parser(key, data, state->idx, state->private_data);
+}
+
+NTSTATUS dbwrap_parse_records(struct db_context *db,
+			      TDB_DATA *keys, unsigned num_keys,
+			      void (*parser)(TDB_DATA key, TDB_DATA data,
+					     unsigned key_index,
+					     void *private_data),
+			      void *private_data)
+{
+	struct dbwrap_parse_records_state state;
+
+	state.parser = parser;
+	state.private_data = private_data;
+
+	for (state.idx = 0; state.idx < num_keys; state.idx++) {
+		NTSTATUS status;
+		status = dbwrap_parse_record(
+			db, keys[state.idx], dbwrap_parse_records_parser,
+			&state);
+		if (!NT_STATUS_IS_OK(status) &&
+		    !NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
+			return status;
+		}
+	}
+	return NT_STATUS_OK;
+}
+
+
 int dbwrap_wipe(struct db_context *db)
 {
 	if (db->wipe == NULL) {
diff --git a/lib/dbwrap/dbwrap.h b/lib/dbwrap/dbwrap.h
index 3b0d61c..42ef0e7 100644
--- a/lib/dbwrap/dbwrap.h
+++ b/lib/dbwrap/dbwrap.h
@@ -74,6 +74,13 @@ NTSTATUS dbwrap_parse_record(struct db_context *db, TDB_DATA key,
 			     void (*parser)(TDB_DATA key, TDB_DATA data,
 					    void *private_data),
 			     void *private_data);
+NTSTATUS dbwrap_parse_records(struct db_context *db,
+			      TDB_DATA *keys, unsigned num_keys,
+			      void (*parser)(TDB_DATA key, TDB_DATA data,
+					     unsigned key_index,
+					     void *private_data),
+			      void *private_data);
+
 int dbwrap_wipe(struct db_context *db);
 int dbwrap_check(struct db_context *db);
 int dbwrap_get_seqnum(struct db_context *db);
-- 
1.7.3.4


From f35a4cef413fba54a73cc78064cfce14904d8fbf Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 24 Nov 2012 16:08:07 +0000
Subject: [PATCH 23/31] s3: Add ctdbd_parse_records

---
 source3/include/ctdbd_conn.h |    7 +++
 source3/lib/ctdbd_conn.c     |   97 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 0 deletions(-)

diff --git a/source3/include/ctdbd_conn.h b/source3/include/ctdbd_conn.h
index 64cb1d5..e6188cc 100644
--- a/source3/include/ctdbd_conn.h
+++ b/source3/include/ctdbd_conn.h
@@ -67,6 +67,13 @@ NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
 		     void (*parser)(TDB_DATA key, TDB_DATA data,
 				    void *private_data),
 		     void *private_data);
+NTSTATUS ctdbd_parse_records(struct ctdbd_connection *conn, uint32_t db_id,
+			     TDB_DATA *keys, unsigned *key_indexes,
+			     unsigned num_keys,
+			     void (*parser)(TDB_DATA key, TDB_DATA data,
+					    unsigned key_index,
+					    void *private_data),
+			     void *private_data);
 
 NTSTATUS ctdbd_traverse(uint32_t db_id,
 			void (*fn)(TDB_DATA key, TDB_DATA data,
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index 3905f7a..91795a9 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -1484,6 +1484,103 @@ NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
 	return status;
 }
 
+NTSTATUS ctdbd_parse_records(struct ctdbd_connection *conn, uint32_t db_id,
+			     TDB_DATA *keys, unsigned *key_indexes,
+			     unsigned num_keys,
+			     void (*parser)(TDB_DATA key, TDB_DATA data,
+					    unsigned key_index,
+					    void *private_data),
+			     void *private_data)
+{
+	TALLOC_CTX *frame = talloc_stackframe();
+	struct ctdb_req_call req;
+	uint32_t *reqids;
+	unsigned i, num_received;
+	NTSTATUS status;
+
+	reqids = talloc_array(talloc_tos(), uint32_t, num_keys);
+	if (reqids == NULL) {
+		TALLOC_FREE(frame);
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	ZERO_STRUCT(req);
+
+	req.hdr.ctdb_magic   = CTDB_MAGIC;
+	req.hdr.ctdb_version = CTDB_VERSION;
+	req.hdr.operation    = CTDB_REQ_CALL;
+	req.flags            = 0;
+	req.callid           = CTDB_FETCH_FUNC;
+	req.db_id            = db_id;
+
+	for (i=0; i<num_keys; i++) {
+		TDB_DATA key = keys[i];
+
+		reqids[i] = ctdbd_next_reqid(conn);
+
+		req.hdr.length = offsetof(struct ctdb_req_call, data) +
+			key.dsize;
+		req.hdr.reqid = reqids[i];
+		req.keylen = key.dsize;
+
+		status = ctdb_packet_send(
+			conn->pkt, 2,
+			data_blob_const(
+				&req, offsetof(struct ctdb_req_call, data)),
+			data_blob_const(key.dptr, key.dsize));
+		if (!NT_STATUS_IS_OK(status)) {
+			DEBUG(3, ("ctdb_packet_send failed: %s\n",
+				  nt_errstr(status)));
+			goto fail;
+		}
+	}
+
+	status = ctdb_packet_flush(conn->pkt);
+	if (!NT_STATUS_IS_OK(status)) {
+		DEBUG(3, ("write to ctdbd failed: %s\n", nt_errstr(status)));
+		cluster_fatal("cluster dispatch daemon control write error\n");
+	}
+
+	for (num_received = 0; num_received < num_keys; num_received++) {
+		struct ctdb_reply_call *reply;
+		uint32_t reqid;
+
+		status = ctdb_read_req(conn, 0, talloc_tos(), (void *)&reply);
+		if (!NT_STATUS_IS_OK(status)) {
+			DEBUG(10, ("ctdb_read_req failed: %s\n",
+				   nt_errstr(status)));
+			goto fail;
+		}
+		if (reply->hdr.operation != CTDB_REPLY_CALL) {
+			DEBUG(0, ("received invalid reply\n"));
+			status = NT_STATUS_INTERNAL_ERROR;
+			goto fail;
+		}
+		reqid = reply->hdr.reqid;
+
+		for (i=0; i<num_keys; i++) {
+			if (reqid == reqids[i]) {
+				break;
+			}
+		}
+		if (i == num_keys) {
+			DEBUG(10, ("Received unknown request number %u\n",
+				   (unsigned)reqid));
+			goto fail;
+		}
+
+		parser(keys[i], make_tdb_data(&reply->data[0], reply->datalen),
+		       key_indexes[i], private_data);
+
+		TALLOC_FREE(reply);
+		num_received += 1;
+	}
+	status = NT_STATUS_OK;
+fail:
+	TALLOC_FREE(frame);
+	return status;
+}
+
 struct ctdbd_traverse_state {
 	void (*fn)(TDB_DATA key, TDB_DATA data, void *private_data);
 	void *private_data;
-- 
1.7.3.4


From 101561896dbe8f0b5f674e3fa591cebbb03fb358 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Sat, 24 Nov 2012 17:00:34 +0000
Subject: [PATCH 24/31] s3: Add dbwrap_ctdb_parse_records for nonpersistent dbs

---
 lib/dbwrap/dbwrap.c              |    4 ++
 lib/dbwrap/dbwrap_private.h      |    6 ++
 source3/lib/ctdbd_conn.c         |    5 ++-
 source3/lib/dbwrap/dbwrap_ctdb.c |   98 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 112 insertions(+), 1 deletions(-)

diff --git a/lib/dbwrap/dbwrap.c b/lib/dbwrap/dbwrap.c
index 165b729..0f6623a 100644
--- a/lib/dbwrap/dbwrap.c
+++ b/lib/dbwrap/dbwrap.c
@@ -448,6 +448,10 @@ NTSTATUS dbwrap_parse_records(struct db_context *db,
 {
 	struct dbwrap_parse_records_state state;
 
+	if (db->parse_records != NULL) {
+		return db->parse_records(db, keys, num_keys, parser,
+					 private_data);
+	}
 	state.parser = parser;
 	state.private_data = private_data;
 
diff --git a/lib/dbwrap/dbwrap_private.h b/lib/dbwrap/dbwrap_private.h
index d49a568..8ae820a 100644
--- a/lib/dbwrap/dbwrap_private.h
+++ b/lib/dbwrap/dbwrap_private.h
@@ -59,6 +59,12 @@ struct db_context {
 				 void (*parser)(TDB_DATA key, TDB_DATA data,
 						void *private_data),
 				 void *private_data);
+	NTSTATUS (*parse_records)(struct db_context *db, TDB_DATA *keys,
+				  unsigned num_keys,
+				  void (*parser)(TDB_DATA key, TDB_DATA data,
+						 unsigned key_index,
+						 void *private_data),
+				  void *private_data);
 	int (*exists)(struct db_context *db,TDB_DATA key);
 	int (*wipe)(struct db_context *db);
 	int (*check)(struct db_context *db);
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c
index 91795a9..0cbdb3c 100644
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -1513,6 +1513,8 @@ NTSTATUS ctdbd_parse_records(struct ctdbd_connection *conn, uint32_t db_id,
 	req.callid           = CTDB_FETCH_FUNC;
 	req.db_id            = db_id;
 
+	DEBUG(1, ("parse_records called for %u keys\n", num_keys));
+
 	for (i=0; i<num_keys; i++) {
 		TDB_DATA key = keys[i];
 
@@ -1558,6 +1560,8 @@ NTSTATUS ctdbd_parse_records(struct ctdbd_connection *conn, uint32_t db_id,
 		}
 		reqid = reply->hdr.reqid;
 
+		DEBUG(1, ("received reqid %u\n", (unsigned)reqid));
+
 		for (i=0; i<num_keys; i++) {
 			if (reqid == reqids[i]) {
 				break;
@@ -1573,7 +1577,6 @@ NTSTATUS ctdbd_parse_records(struct ctdbd_connection *conn, uint32_t db_id,
 		       key_indexes[i], private_data);
 
 		TALLOC_FREE(reply);
-		num_received += 1;
 	}
 	status = NT_STATUS_OK;
 fail:
diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 7fed68d..1ed630c 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -979,6 +979,8 @@ static bool db_ctdb_can_use_local_hdr(const struct ctdb_ltdb_header *hdr,
 	 */
 	return read_only || !(hdr->flags & CTDB_REC_RO_HAVE_DELEGATIONS);
 #else
+	DEBUG(10, ("hdr->dmaster=%u, my_vnn=%u\n",
+		   (unsigned)hdr->dmaster, (unsigned)get_my_vnn()));
 	return (hdr->dmaster == get_my_vnn());
 #endif
 }
@@ -1249,6 +1251,99 @@ static NTSTATUS db_ctdb_parse_record(struct db_context *db, TDB_DATA key,
 			   state.ask_for_readonly_copy, parser, private_data);
 }
 
+struct db_ctdb_parse_records_state {
+	void (*parser)(TDB_DATA key, TDB_DATA data, unsigned key_index,
+		       void *private_data);
+	unsigned key_index;
+	void *private_data;
+	bool done;
+};
+
+static void db_ctdb_parse_records_parser(
+	TDB_DATA key, struct ctdb_ltdb_header *header,
+	TDB_DATA data, void *private_data)
+{
+	struct db_ctdb_parse_records_state *state =
+		(struct db_ctdb_parse_records_state *)private_data;
+
+	DEBUG(1, ("parse_records_parser called for idx=%u\n",
+		  state->key_index));
+
+	if (db_ctdb_can_use_local_hdr(header, true)) {
+		state->parser(key, data, state->key_index,
+			      state->private_data);
+		state->done = true;
+	}
+}
+
+/*
+ * Parse a number of records. This callback function is only implemented for
+ * nonpersistent databases. Persistent databases are like local tdb
+ * files. Here the fallback from dbwrap.c kicks in and is just efficient
+ * enough. For nonpersistent tdbs we have to reach out to ctdbd and do this in
+ * parallel.
+ */
+
+static NTSTATUS db_ctdb_parse_records(
+	struct db_context *db, TDB_DATA *keys, unsigned num_keys,
+	void (*parser)(TDB_DATA key, TDB_DATA data, unsigned key_index,
+		       void *private_data),
+	void *private_data)
+{
+	TALLOC_CTX *frame = talloc_stackframe();
+	struct db_ctdb_ctx *ctx = talloc_get_type_abort(
+		db->private_data, struct db_ctdb_ctx);
+	struct db_ctdb_parse_records_state state;
+	TDB_DATA *remote_keys;
+	unsigned i, num_remote_keys;
+	unsigned *key_indexes;
+	NTSTATUS status = NT_STATUS_NO_MEMORY;
+
+	key_indexes = talloc_array(talloc_tos(), unsigned, num_keys);
+	if (key_indexes == NULL) {
+		goto fail;
+	}
+	remote_keys = talloc_array(talloc_tos(), TDB_DATA, num_keys);
+	if (remote_keys == NULL) {
+		goto fail;
+	}
+
+	num_remote_keys = 0;
+	state.parser = parser;
+	state.private_data = private_data;
+
+	for (i=0; i<num_keys; i++) {
+
+		state.done = false;
+		state.key_index = i;
+
+		status = db_ctdb_ltdb_parse(
+			ctx, keys[i], db_ctdb_parse_records_parser, &state);
+		if (NT_STATUS_IS_OK(status) && state.done) {
+			continue;
+		}
+
+		/*
+		 * Ask ctdb about this record
+		 */
+		key_indexes[num_remote_keys] = i;
+		remote_keys[num_remote_keys] = keys[i];
+		num_remote_keys += 1;
+	}
+
+	if (num_remote_keys != 0) {
+		status = ctdbd_parse_records(
+			messaging_ctdbd_connection(), ctx->db_id,
+			remote_keys, key_indexes, num_remote_keys,
+			parser, private_data);
+	} else {
+		status = NT_STATUS_OK;
+	}
+fail:
+	TALLOC_FREE(frame);
+	return status;
+}
+
 struct traverse_state {
 	struct db_context *db;
 	int (*fn)(struct db_record *rec, void *private_data);
@@ -1578,6 +1673,9 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx,
 	result->fetch_locked = db_ctdb_fetch_locked;
 	result->try_fetch_locked = db_ctdb_try_fetch_locked;
 	result->parse_record = db_ctdb_parse_record;
+	if (!result->persistent) {
+		result->parse_records = db_ctdb_parse_records;
+	}
 	result->traverse = db_ctdb_traverse;
 	result->traverse_read = db_ctdb_traverse_read;
 	result->get_seqnum = db_ctdb_get_seqnum;
-- 
1.7.3.4


From 98bcdf5588e56b62859134cccbe8953d210af86f Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 31 Oct 2012 13:02:19 +0100
Subject: [PATCH 25/31] s3: Avoid some talloc_realloc in notify_internal

For the nonclustered case we will only ever have one vnn in notify_index.tdb.
For this case, without this patch we did talloc_realloc when collecting vnns to
be able to do the memcpy instead of explicit copy with a for-loop. This new
code will partition the new vnns we see when parsing a notify_index.tdb record
into ourselves and all foreign vnns, only really collecting the foreign ones in
an array.
---
 source3/smbd/notify_internal.c |   22 ++++++++++++++++------
 1 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/source3/smbd/notify_internal.c b/source3/smbd/notify_internal.c
index 938c57b..7a2b1f5 100644
--- a/source3/smbd/notify_internal.c
+++ b/source3/smbd/notify_internal.c
@@ -484,7 +484,7 @@ static void notify_trigger_index_parser(TDB_DATA key, TDB_DATA data,
 	struct notify_trigger_index_state *state =
 		(struct notify_trigger_index_state *)private_data;
 	uint32_t *new_vnns;
-	size_t i, num_vnns, num_new_vnns;
+	size_t i, num_vnns, num_new_vnns, num_remote_vnns;
 
 	if ((data.dsize % sizeof(uint32_t)) != 0) {
 		DEBUG(1, ("Invalid record size in notify index db: %u\n",
@@ -493,22 +493,32 @@ static void notify_trigger_index_parser(TDB_DATA key, TDB_DATA data,
 	}
 	new_vnns = (uint32_t *)data.dptr;
 	num_new_vnns = data.dsize / sizeof(uint32_t);
-
-	num_vnns = talloc_array_length(state->vnns);
+	num_remote_vnns = num_new_vnns;
 
 	for (i=0; i<num_new_vnns; i++) {
 		if (new_vnns[i] == state->my_vnn) {
 			state->found_my_vnn = true;
+			num_remote_vnns -= 1;
 		}
 	}
+	if (num_remote_vnns == 0) {
+		return;
+	}
 
+	num_vnns = talloc_array_length(state->vnns);
 	state->vnns = talloc_realloc(state->mem_ctx, state->vnns, uint32_t,
-				     num_vnns + num_new_vnns);
-	if ((num_vnns + num_new_vnns != 0) && (state->vnns == NULL)) {
+				     num_vnns + num_remote_vnns);
+	if (state->vnns == NULL) {
 		DEBUG(1, ("talloc_realloc failed\n"));
 		return;
 	}
-	memcpy(&state->vnns[num_vnns], data.dptr, data.dsize);
+
+	for (i=0; i<num_new_vnns; i++) {
+		if (new_vnns[i] != state->my_vnn) {
+			state->vnns[num_vnns] = new_vnns[i];
+			num_vnns += 1;
+		}
+	}
 }
 
 static int vnn_cmp(const void *p1, const void *p2)
-- 
1.7.3.4


From 6dc1a109f6e6a1b0bc92365121303a28bc12392c Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 31 Oct 2012 13:08:18 +0100
Subject: [PATCH 26/31] s3: Slightly simplify notify_trigger

This straightens the for-loop walking the path components slightly
---
 source3/smbd/notify_internal.c |   11 +++++------
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/source3/smbd/notify_internal.c b/source3/smbd/notify_internal.c
index 7a2b1f5..7c00964 100644
--- a/source3/smbd/notify_internal.c
+++ b/source3/smbd/notify_internal.c
@@ -620,6 +620,7 @@ void notify_trigger(struct notify_context *notify,
 
 	idx_state.mem_ctx = talloc_tos();
 	idx_state.vnns = NULL;
+	idx_state.found_my_vnn = false;
 	idx_state.my_vnn = get_my_vnn();
 
 	for (p = path; p != NULL; p = next_p) {
@@ -629,18 +630,16 @@ void notify_trigger(struct notify_context *notify,
 		next_p = strchr(p+1, '/');
 		recursive = (next_p != NULL);
 
-		idx_state.found_my_vnn = false;
-
 		dbwrap_parse_record(
 			notify->db_index,
 			make_tdb_data(discard_const_p(uint8_t, path), path_len),
 			notify_trigger_index_parser, &idx_state);
 
-		if (!idx_state.found_my_vnn) {
-			continue;
+		if (idx_state.found_my_vnn) {
+			notify_trigger_local(notify, action, filter,
+					     path, path_len, recursive);
+			idx_state.found_my_vnn = false;
 		}
-		notify_trigger_local(notify, action, filter,
-				     path, path_len, recursive);
 	}
 
 	ctdbd_conn = messaging_ctdbd_connection();
-- 
1.7.3.4


From 349ec52176379a3c1fa4ae5939be0d4e68389e95 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 31 Oct 2012 13:10:12 +0100
Subject: [PATCH 27/31] s3: Slightly simplify notify_trigger

We have a good chance that we did not collect any remote vnns. This
avoids trying to walk the remote vnns altogether.
---
 source3/smbd/notify_internal.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/source3/smbd/notify_internal.c b/source3/smbd/notify_internal.c
index 7c00964..e2b41c1 100644
--- a/source3/smbd/notify_internal.c
+++ b/source3/smbd/notify_internal.c
@@ -642,6 +642,10 @@ void notify_trigger(struct notify_context *notify,
 		}
 	}
 
+	if (idx_state.vnns == NULL) {
+		goto done;
+	}
+
 	ctdbd_conn = messaging_ctdbd_connection();
 	if (ctdbd_conn == NULL) {
 		goto done;
-- 
1.7.3.4


From 59be500cb1391d2d8fdf87148cc09a55ae117dbf Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 31 Oct 2012 13:11:19 +0100
Subject: [PATCH 28/31] s3: We don't collect our own vnn anymore

notify_trigger_index_parser will not anymore add ourselves into the vnn
list that it collects.
---
 source3/smbd/notify_internal.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/source3/smbd/notify_internal.c b/source3/smbd/notify_internal.c
index e2b41c1..e99f17f 100644
--- a/source3/smbd/notify_internal.c
+++ b/source3/smbd/notify_internal.c
@@ -664,9 +664,6 @@ void notify_trigger(struct notify_context *notify,
 		if (vnn == last_vnn) {
 			continue;
 		}
-		if (vnn == idx_state.my_vnn) {
-			continue;
-		}
 		if ((remote_blob == NULL) &&
 		    !notify_push_remote_blob(
 			    talloc_tos(), action, filter,
-- 
1.7.3.4


From 3136da125e16aacb6fbc17d186229736789e205c Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 31 Oct 2012 13:13:50 +0100
Subject: [PATCH 29/31] s3: Remove an optimization that became unnecessary

After we only collect nonlocal vnns in idx_state.vnns now, at this point
we *know* we have something to send to a remote node. The previous code
avoided the call to notify_push_remote_blob with an if-statement that
has now become unnecessary.
---
 source3/smbd/notify_internal.c |   13 ++++++-------
 1 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/source3/smbd/notify_internal.c b/source3/smbd/notify_internal.c
index e99f17f..abc8c26 100644
--- a/source3/smbd/notify_internal.c
+++ b/source3/smbd/notify_internal.c
@@ -655,7 +655,12 @@ void notify_trigger(struct notify_context *notify,
 	qsort(idx_state.vnns, num_vnns, sizeof(uint32_t), vnn_cmp);
 
 	last_vnn = 0xffffffff;
-	remote_blob = NULL;
+
+	if (!notify_push_remote_blob(talloc_tos(), action, filter, path,
+				     &remote_blob, &remote_blob_len)) {
+		DEBUG(1, ("notify_push_remote_blob failed\n"));
+		goto done;
+	}
 
 	for (i=0; i<num_vnns; i++) {
 		uint32_t vnn = idx_state.vnns[i];
@@ -664,12 +669,6 @@ void notify_trigger(struct notify_context *notify,
 		if (vnn == last_vnn) {
 			continue;
 		}
-		if ((remote_blob == NULL) &&
-		    !notify_push_remote_blob(
-			    talloc_tos(), action, filter,
-			    path, &remote_blob, &remote_blob_len)) {
-			break;
-		}
 
 		status = ctdbd_messaging_send_blob(
 			ctdbd_conn, vnn, CTDB_SRVID_SAMBA_NOTIFY_PROXY,
-- 
1.7.3.4


From 9739704bd1d41ff1024e3a6af143efb67339de31 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 31 Oct 2012 13:49:52 +0100
Subject: [PATCH 30/31] s3: Use dbwrap_parse_records in notify_trigger

---
 source3/smbd/notify_internal.c |   68 ++++++++++++++++++++++++++++------------
 1 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/source3/smbd/notify_internal.c b/source3/smbd/notify_internal.c
index abc8c26..a46a82c 100644
--- a/source3/smbd/notify_internal.c
+++ b/source3/smbd/notify_internal.c
@@ -40,6 +40,7 @@
 #include "ctdbd_conn.h"
 #include "ctdb_conn.h"
 #include "lib/util/tevent_unix.h"
+#include "lib/util/bitmap.h"
 
 struct notify_list {
 	struct notify_list *next, *prev;
@@ -475,11 +476,11 @@ struct notify_trigger_index_state {
 	TALLOC_CTX *mem_ctx;
 	uint32_t *vnns;
 	uint32_t my_vnn;
-	bool found_my_vnn;
+	struct bitmap *local;
 };
 
 static void notify_trigger_index_parser(TDB_DATA key, TDB_DATA data,
-					void *private_data)
+					unsigned key_idx, void *private_data)
 {
 	struct notify_trigger_index_state *state =
 		(struct notify_trigger_index_state *)private_data;
@@ -497,7 +498,7 @@ static void notify_trigger_index_parser(TDB_DATA key, TDB_DATA data,
 
 	for (i=0; i<num_new_vnns; i++) {
 		if (new_vnns[i] == state->my_vnn) {
-			state->found_my_vnn = true;
+			bitmap_set(state->local, key_idx);
 			num_remote_vnns -= 1;
 		}
 	}
@@ -603,12 +604,14 @@ void notify_trigger(struct notify_context *notify,
 		    uint32_t action, uint32_t filter, const char *path)
 {
 	struct ctdbd_connection *ctdbd_conn;
-	struct notify_trigger_index_state idx_state;
-	const char *p, *next_p;
-	size_t i, num_vnns;
+	struct notify_trigger_index_state idx_state = { 0, };
+	const char *p;
+	size_t i, num_vnns, num_subpaths;
 	uint32_t last_vnn;
 	uint8_t *remote_blob = NULL;
 	size_t remote_blob_len = 0;
+	TDB_DATA *subpaths = NULL;
+	NTSTATUS status;
 
 	DEBUG(10, ("notify_trigger called action=0x%x, filter=0x%x, "
 		   "path=%s\n", (unsigned)action, (unsigned)filter, path));
@@ -618,27 +621,51 @@ void notify_trigger(struct notify_context *notify,
 		return;
 	}
 
+	num_subpaths = 0;
+
+	for (p = path; p != NULL; p = strchr(p+1, '/')) {
+		num_subpaths += 1;
+	}
+
+	subpaths = talloc_array(talloc_tos(), TDB_DATA, num_subpaths);
+	if (subpaths == NULL) {
+		DEBUG(1, ("talloc_array failed\n"));
+		goto done;
+	}
+
+	i = 0;
+	for (p = path; p != NULL; p = strchr(p+1, '/')) {
+		subpaths[i] = make_tdb_data(
+			discard_const_p(uint8_t, path), p - path);
+		i += 1;
+	}
+
 	idx_state.mem_ctx = talloc_tos();
 	idx_state.vnns = NULL;
-	idx_state.found_my_vnn = false;
 	idx_state.my_vnn = get_my_vnn();
 
-	for (p = path; p != NULL; p = next_p) {
-		ptrdiff_t path_len = p - path;
-		bool recursive;
+	idx_state.local = bitmap_talloc(talloc_tos(), num_subpaths);
+	if (idx_state.local == NULL) {
+		DEBUG(1, ("bitmap_talloc failed\n"));
+		goto done;
+	}
 
-		next_p = strchr(p+1, '/');
-		recursive = (next_p != NULL);
+	status = dbwrap_parse_records(
+		notify->db_index, subpaths, num_subpaths,
+		notify_trigger_index_parser, &idx_state);
+	if (!NT_STATUS_IS_OK(status)) {
+		DEBUG(10, ("dbwrap_parse_records failed: %s\n",
+			   nt_errstr(status)));
+		goto done;
+	}
 
-		dbwrap_parse_record(
-			notify->db_index,
-			make_tdb_data(discard_const_p(uint8_t, path), path_len),
-			notify_trigger_index_parser, &idx_state);
+	for (i=0; i<num_subpaths; i++) {
+		if (bitmap_query(idx_state.local, i)) {
+			bool recursive = (i < (num_subpaths-1));
 
-		if (idx_state.found_my_vnn) {
 			notify_trigger_local(notify, action, filter,
-					     path, path_len, recursive);
-			idx_state.found_my_vnn = false;
+					     path, subpaths[i].dsize,
+					     recursive);
 		}
 	}
 
@@ -664,7 +691,6 @@ void notify_trigger(struct notify_context *notify,
 
 	for (i=0; i<num_vnns; i++) {
 		uint32_t vnn = idx_state.vnns[i];
-		NTSTATUS status;
 
 		if (vnn == last_vnn) {
 			continue;
@@ -683,8 +709,10 @@ void notify_trigger(struct notify_context *notify,
 	}
 
 done:
+	TALLOC_FREE(subpaths);
 	TALLOC_FREE(remote_blob);
 	TALLOC_FREE(idx_state.vnns);
+	TALLOC_FREE(idx_state.local);
 }
 
 static void notify_trigger_local(struct notify_context *notify,
-- 
1.7.3.4


From 7b8fea665afbf9e150db1998f75f351bf491c5fc Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 31 Oct 2012 13:51:31 +0100
Subject: [PATCH 31/31] s3: Introduce a talloc_stackframe in notify_trigger

4 individual TALLOC_FREE is sufficient I think :-)
---
 source3/smbd/notify_internal.c |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/source3/smbd/notify_internal.c b/source3/smbd/notify_internal.c
index a46a82c..ce757dc 100644
--- a/source3/smbd/notify_internal.c
+++ b/source3/smbd/notify_internal.c
@@ -603,14 +603,15 @@ static bool notify_pull_remote_blob(TALLOC_CTX *mem_ctx,
 void notify_trigger(struct notify_context *notify,
 		    uint32_t action, uint32_t filter, const char *path)
 {
+	TALLOC_CTX *frame = talloc_stackframe();
 	struct ctdbd_connection *ctdbd_conn;
-	struct notify_trigger_index_state idx_state = { 0, };
+	struct notify_trigger_index_state idx_state;
 	const char *p;
 	size_t i, num_vnns, num_subpaths;
 	uint32_t last_vnn;
-	uint8_t *remote_blob = NULL;
-	size_t remote_blob_len = 0;
-	TDB_DATA *subpaths = NULL;
+	uint8_t *remote_blob;
+	size_t remote_blob_len;
+	TDB_DATA *subpaths;
 	NTSTATUS status;
 
 	DEBUG(10, ("notify_trigger called action=0x%x, filter=0x%x, "
@@ -618,7 +619,7 @@ void notify_trigger(struct notify_context *notify,
 
 	/* see if change notify is enabled at all */
 	if (notify == NULL) {
-		return;
+		goto done;
 	}
 
 	num_subpaths = 0;
@@ -709,10 +710,7 @@ void notify_trigger(struct notify_context *notify,
 	}
 
 done:
-	TALLOC_FREE(subpaths);
-	TALLOC_FREE(remote_blob);
-	TALLOC_FREE(idx_state.vnns);
-	TALLOC_FREE(idx_state.local);
+	TALLOC_FREE(frame);
 }
 
 static void notify_trigger_local(struct notify_context *notify,
-- 
1.7.3.4



More information about the samba-technical mailing list