ctdb: Adding memory pool for queue callback

Swen Schillig swen at vnet.ibm.com
Thu Nov 8 08:22:41 UTC 2018


On Wed, 2018-11-07 at 18:19 +0100, Volker Lendecke wrote:
> On Wed, Nov 07, 2018 at 05:47:02PM +0100, Volker Lendecke via samba-
> technical wrote:
> > On Wed, Nov 07, 2018 at 05:28:04PM +0100, Swen Schillig wrote:
> > > showing values like 
> > > [swen at linux ~]$ ./a.out 
> > > It took 8.963634 seconds to execute 10 million talloc/free
> > > cycles.
> > > It took 5.951885 seconds to execute 10 million talloc(pool)/free
> > > cycles.
> > > It took 4.095244 seconds to execute 10 million malloc/free
> > > cycles.
> > > 
> > > So, I think there's still enough progress to justify the change.
> > 
> > This is with -O3, right? With -O0 the picture looks entirely
> > different
> > to me. Are you 100% certain your production build uses -O3?
> 
> Just got external confirmation that you are in fact using -O2, which
> yields a performance improvement for the talloc_pool solution.
> 
> The patch has one bug: The pool is declared as a char*. This must be
> a
> TALLOC_CTX, which is a void*. Minor one, but required IMO.
Ok, Done !
> 
> So based on that micro-benchmark I trust you that you can measure a
> performance improvements in SMB-level benchmarks, otherwise it would
> not be so imporant for you to get this patch in. The patch has the
> downside that it chews a permanent 1k block of memory per ctdb client
> connection, and there can be a *LOT* of those.
Let's assume we have 1 million, which is more than 'LOT' 
.....this results to 1G of memory.
Considering we're talking about a server here, this is very little in
comparison to other requirements.
> 
> I know he is difficult to catch these days, but as I am not a core
> ctdb developer, I would love Amitay to give a quick comment.

I'm considering Martin as a core developer to CTDB as well and he gave
his +1 already, but sure the more eyes look at this the better.
... just as a side node, Amitay did already "approved" something very
similar but slightly more progressive.

commit 32d867cf09a15626b991be414ab6440f68953f35
Author: Swen Schillig <swen at vnet.ibm.com>
Date:   Mon Jan 8 14:55:31 2018 +0100

    ctdb-common: Optimize sock_queue's memory managament
    
    Make use of talloc pools for the sock_queue's memory requirements.
    
    Signed-off-by: Swen Schillig <swen at vnet.ibm.com>
    Reviewed-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    Autobuild-User(master): Martin Schwenke <martins at samba.org>
    Autobuild-Date(master): Tue Jan 30 18:12:32 CET 2018 on sn-devel-144

diff --git a/ctdb/common/sock_io.c b/ctdb/common/sock_io.c
index ef7cfeb2ea1..51341ce023e 100644
--- a/ctdb/common/sock_io.c
+++ b/ctdb/common/sock_io.c
@@ -94,6 +94,17 @@ struct sock_queue {
        size_t buflen, begin, end;
 };
 
+/*
+ * The reserved talloc headers, SOCK_QUEUE_OBJ_COUNT,
+ * and the pre-allocated pool-memory SOCK_QUEUE_POOL_SIZE,
+ * are used for the sub-objects queue->im, queue->queue, queue->fde
+ * and queue->buf.
+ * If the memory allocating sub-objects of struct sock_queue change,
+ * those values need to be adjusted.
+ */
+#define SOCK_QUEUE_OBJ_COUNT 4
+#define SOCK_QUEUE_POOL_SIZE 2048
+
 static bool sock_queue_set_fd(struct sock_queue *queue, int fd);
 static void sock_queue_handler(struct tevent_context *ev,
                               struct tevent_fd *fde, uint16_t flags,
@@ -111,10 +122,12 @@ struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx,
 {
        struct sock_queue *queue;
 
-       queue = talloc_zero(mem_ctx, struct sock_queue);
+       queue = talloc_pooled_object(mem_ctx, struct sock_queue,
+                                    SOCK_QUEUE_OBJ_COUNT, SOCK_QUEUE_POOL_SIZE);
        if (queue == NULL) {
                return NULL;
        }
+       memset(queue, 0, sizeof(struct sock_queue));
 
        queue->ev = ev;
        queue->callback = callback;


Cheers Swen
-------------- next part --------------
From 4aef71f929c629c067de5d58f7cf3661d0e56569 Mon Sep 17 00:00:00 2001
From: Swen Schillig <swen at vnet.ibm.com>
Date: Mon, 12 Mar 2018 11:00:55 +0100
Subject: [PATCH 1/2] ctdb: Introduce buffer.offset to avoid memmove

The memmove operation is quiet expensive, therefore,
a new buffer attribute "offset" is introduced to support
an optimized buffer processing.

Signed-off-by: Swen Schillig <swen at vnet.ibm.com>
---
 ctdb/common/ctdb_io.c | 55 ++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c
index 32d8fc753a6..5bed7a61b31 100644
--- a/ctdb/common/ctdb_io.c
+++ b/ctdb/common/ctdb_io.c
@@ -43,6 +43,7 @@ struct ctdb_buffer {
 	uint8_t *data;
 	uint32_t length;
 	uint32_t size;
+	uint32_t offset;
 };
 
 struct ctdb_queue_pkt {
@@ -95,14 +96,14 @@ static void queue_process_event(struct tevent_context *ev, struct tevent_immedia
 static void queue_process(struct ctdb_queue *queue)
 {
 	uint32_t pkt_size;
-	uint8_t *data;
+	uint8_t *data = NULL;
 
 	if (queue->buffer.length < sizeof(pkt_size)) {
 		return;
 	}
 
 	/* Did we at least read the size into the buffer */
-	pkt_size = *(uint32_t *)queue->buffer.data;
+	pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
 	if (pkt_size == 0) {
 		DEBUG(DEBUG_CRIT, ("Invalid packet of length 0\n"));
 		goto failed;
@@ -114,20 +115,26 @@ static void queue_process(struct ctdb_queue *queue)
 	}
 
 	/* Extract complete packet */
-	data = talloc_memdup(queue, queue->buffer.data, pkt_size);
+	data = talloc_memdup(queue,
+			     queue->buffer.data + queue->buffer.offset,
+			     pkt_size);
+
 	if (data == NULL) {
 		D_ERR("read error alloc failed for %u\n", pkt_size);
 		return;
 	}
 
-	/* Shift packet out from buffer */
-	if (queue->buffer.length > pkt_size) {
-		memmove(queue->buffer.data,
-			queue->buffer.data + pkt_size,
-			queue->buffer.length - pkt_size);
-	}
+	queue->buffer.offset += pkt_size;
 	queue->buffer.length -= pkt_size;
 
+	if (queue->buffer.offset < pkt_size ||
+	    queue->buffer.offset > queue->buffer.size) {
+		D_ERR("buffer offset overflow\n");
+		TALLOC_FREE(queue->buffer.data);
+		memset(&queue->buffer, 0, sizeof(queue->buffer));
+		goto failed;
+	}
+
 	if (queue->buffer.length > 0) {
 		/* There is more data to be processed, schedule an event */
 		tevent_schedule_immediate(queue->im, queue->ctdb->ev,
@@ -137,6 +144,7 @@ static void queue_process(struct ctdb_queue *queue)
 			TALLOC_FREE(queue->buffer.data);
 			queue->buffer.size = 0;
 		}
+		queue->buffer.offset = 0;
 	}
 
 	/* It is the responsibility of the callback to free 'data' */
@@ -145,10 +153,8 @@ static void queue_process(struct ctdb_queue *queue)
 
 failed:
 	queue->callback(NULL, 0, queue->private_data);
-
 }
 
-
 /*
   called when an incoming connection is readable
   This function MUST be safe for reentry via the queue callback!
@@ -156,7 +162,7 @@ failed:
 static void queue_io_read(struct ctdb_queue *queue)
 {
 	int num_ready = 0;
-	uint32_t pkt_size;
+	uint32_t pkt_size = 0;
 	ssize_t nread;
 	uint8_t *data;
 
@@ -185,12 +191,12 @@ static void queue_io_read(struct ctdb_queue *queue)
 		goto data_read;
 	}
 
-	if (queue->buffer.length < sizeof(pkt_size)) {
+	if (sizeof(pkt_size) > queue->buffer.length) {
 		/* data read is not sufficient to gather message size */
-		goto data_read;
+		goto buffer_shift;
 	}
 
-	pkt_size = *(uint32_t *)queue->buffer.data;
+	pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
 	if (pkt_size > queue->buffer.size) {
 		data = talloc_realloc_size(queue,
 					   queue->buffer.data,
@@ -201,6 +207,21 @@ static void queue_io_read(struct ctdb_queue *queue)
 		}
 		queue->buffer.data = data;
 		queue->buffer.size = pkt_size;
+		/* fall through here as we might need to move the data as well */
+	}
+
+buffer_shift:
+	if (sizeof(pkt_size) > queue->buffer.size - queue->buffer.offset ||
+	    pkt_size > queue->buffer.size - queue->buffer.offset) {
+		/* Either the offset has progressed too far to host at least
+		 * the size information or the remaining space in the buffer
+		 * is not sufficient for the full message.
+		 * Therefore, move the data and try again.
+		 */
+		memmove(queue->buffer.data,
+			queue->buffer.data + queue->buffer.offset,
+			queue->buffer.length);
+		queue->buffer.offset = 0;
 	}
 
 data_read:
@@ -208,7 +229,9 @@ data_read:
 
 	if (num_ready > 0) {
 		nread = sys_read(queue->fd,
-				 queue->buffer.data + queue->buffer.length,
+				 queue->buffer.data +
+					queue->buffer.offset +
+					queue->buffer.length,
 				 num_ready);
 		if (nread <= 0) {
 			DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread));
-- 
2.17.2


From 229c07d0c045917bc4884cef9db2244025df870d Mon Sep 17 00:00:00 2001
From: Swen Schillig <swen at vnet.ibm.com>
Date: Mon, 12 Mar 2018 17:56:21 +0100
Subject: [PATCH 2/2] ctdb: Adding memory pool for queue callback

The received packet is copied into a newly allocated memory chunk for further
processing by the assigned callback. Once this is done, the memory is free'd.
This is repeated for each received packet making the memory allocation / free
an expensive task. To optimize this process, a memory pool is defined which
is sized identically to the queue's buffer.
During tests it could be seen that more than 95% of all messages were sized
below the standard buffer_size of 1k.

Signed-off-by: Swen Schillig <swen at vnet.ibm.com>
---
 ctdb/common/ctdb_io.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c
index 5bed7a61b31..d86540762ea 100644
--- a/ctdb/common/ctdb_io.c
+++ b/ctdb/common/ctdb_io.c
@@ -65,6 +65,7 @@ struct ctdb_queue {
 	size_t alignment;
 	void *private_data;
 	ctdb_queue_cb_fn_t callback;
+	TALLOC_CTX *data_pool;
 	const char *name;
 	uint32_t buffer_size;
 };
@@ -115,7 +116,7 @@ static void queue_process(struct ctdb_queue *queue)
 	}
 
 	/* Extract complete packet */
-	data = talloc_memdup(queue,
+	data = talloc_memdup(queue->data_pool,
 			     queue->buffer.data + queue->buffer.offset,
 			     pkt_size);
 
@@ -479,5 +480,11 @@ struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
 		queue->buffer_size = 1024;
 	}
 
+	queue->data_pool = talloc_pool(queue, queue->buffer_size);
+	if (queue->data_pool == NULL) {
+		TALLOC_FREE(queue);
+		return NULL;
+	}
+
 	return queue;
 }
-- 
2.17.2



More information about the samba-technical mailing list