ctdb: Adding memory pool for queue callback
Swen Schillig
swen at vnet.ibm.com
Thu Nov 8 08:22:41 UTC 2018
On Wed, 2018-11-07 at 18:19 +0100, Volker Lendecke wrote:
> On Wed, Nov 07, 2018 at 05:47:02PM +0100, Volker Lendecke via samba-
> technical wrote:
> > On Wed, Nov 07, 2018 at 05:28:04PM +0100, Swen Schillig wrote:
> > > showing values like
> > > [swen at linux ~]$ ./a.out
> > > It took 8.963634 seconds to execute 10 million talloc/free
> > > cycles.
> > > It took 5.951885 seconds to execute 10 million talloc(pool)/free
> > > cycles.
> > > It took 4.095244 seconds to execute 10 million malloc/free
> > > cycles.
> > >
> > > So, I think there's still enough progress to justify the change.
> >
> > This is with -O3, right? With -O0 the picture looks entirely
> > different
> > to me. Are you 100% certain your production build uses -O3?
>
> Just got external confirmation that you are in fact using -O2, which
> yields a performance improvement for the talloc_pool solution.
>
> The patch has one bug: The pool is declared as a char*. This must be
> a
> TALLOC_CTX, which is a void*. Minor one, but required IMO.
Ok, Done !
>
> So based on that micro-benchmark I trust you that you can measure a
> performance improvements in SMB-level benchmarks, otherwise it would
> not be so imporant for you to get this patch in. The patch has the
> downside that it chews a permanent 1k block of memory per ctdb client
> connection, and there can be a *LOT* of those.
Let's assume we have 1 million, which is more than 'LOT'
.....this results to 1G of memory.
Considering we're talking about a server here, this is very little in
comparison to other requirements.
>
> I know he is difficult to catch these days, but as I am not a core
> ctdb developer, I would love Amitay to give a quick comment.
I'm considering Martin as a core developer to CTDB as well and he gave
his +1 already, but sure the more eyes look at this the better.
... just as a side node, Amitay did already "approved" something very
similar but slightly more progressive.
commit 32d867cf09a15626b991be414ab6440f68953f35
Author: Swen Schillig <swen at vnet.ibm.com>
Date: Mon Jan 8 14:55:31 2018 +0100
ctdb-common: Optimize sock_queue's memory managament
Make use of talloc pools for the sock_queue's memory requirements.
Signed-off-by: Swen Schillig <swen at vnet.ibm.com>
Reviewed-by: Amitay Isaacs <amitay at gmail.com>
Reviewed-by: Martin Schwenke <martin at meltin.net>
Autobuild-User(master): Martin Schwenke <martins at samba.org>
Autobuild-Date(master): Tue Jan 30 18:12:32 CET 2018 on sn-devel-144
diff --git a/ctdb/common/sock_io.c b/ctdb/common/sock_io.c
index ef7cfeb2ea1..51341ce023e 100644
--- a/ctdb/common/sock_io.c
+++ b/ctdb/common/sock_io.c
@@ -94,6 +94,17 @@ struct sock_queue {
size_t buflen, begin, end;
};
+/*
+ * The reserved talloc headers, SOCK_QUEUE_OBJ_COUNT,
+ * and the pre-allocated pool-memory SOCK_QUEUE_POOL_SIZE,
+ * are used for the sub-objects queue->im, queue->queue, queue->fde
+ * and queue->buf.
+ * If the memory allocating sub-objects of struct sock_queue change,
+ * those values need to be adjusted.
+ */
+#define SOCK_QUEUE_OBJ_COUNT 4
+#define SOCK_QUEUE_POOL_SIZE 2048
+
static bool sock_queue_set_fd(struct sock_queue *queue, int fd);
static void sock_queue_handler(struct tevent_context *ev,
struct tevent_fd *fde, uint16_t flags,
@@ -111,10 +122,12 @@ struct sock_queue *sock_queue_setup(TALLOC_CTX *mem_ctx,
{
struct sock_queue *queue;
- queue = talloc_zero(mem_ctx, struct sock_queue);
+ queue = talloc_pooled_object(mem_ctx, struct sock_queue,
+ SOCK_QUEUE_OBJ_COUNT, SOCK_QUEUE_POOL_SIZE);
if (queue == NULL) {
return NULL;
}
+ memset(queue, 0, sizeof(struct sock_queue));
queue->ev = ev;
queue->callback = callback;
Cheers Swen
-------------- next part --------------
From 4aef71f929c629c067de5d58f7cf3661d0e56569 Mon Sep 17 00:00:00 2001
From: Swen Schillig <swen at vnet.ibm.com>
Date: Mon, 12 Mar 2018 11:00:55 +0100
Subject: [PATCH 1/2] ctdb: Introduce buffer.offset to avoid memmove
The memmove operation is quiet expensive, therefore,
a new buffer attribute "offset" is introduced to support
an optimized buffer processing.
Signed-off-by: Swen Schillig <swen at vnet.ibm.com>
---
ctdb/common/ctdb_io.c | 55 ++++++++++++++++++++++++++++++-------------
1 file changed, 39 insertions(+), 16 deletions(-)
diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c
index 32d8fc753a6..5bed7a61b31 100644
--- a/ctdb/common/ctdb_io.c
+++ b/ctdb/common/ctdb_io.c
@@ -43,6 +43,7 @@ struct ctdb_buffer {
uint8_t *data;
uint32_t length;
uint32_t size;
+ uint32_t offset;
};
struct ctdb_queue_pkt {
@@ -95,14 +96,14 @@ static void queue_process_event(struct tevent_context *ev, struct tevent_immedia
static void queue_process(struct ctdb_queue *queue)
{
uint32_t pkt_size;
- uint8_t *data;
+ uint8_t *data = NULL;
if (queue->buffer.length < sizeof(pkt_size)) {
return;
}
/* Did we at least read the size into the buffer */
- pkt_size = *(uint32_t *)queue->buffer.data;
+ pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
if (pkt_size == 0) {
DEBUG(DEBUG_CRIT, ("Invalid packet of length 0\n"));
goto failed;
@@ -114,20 +115,26 @@ static void queue_process(struct ctdb_queue *queue)
}
/* Extract complete packet */
- data = talloc_memdup(queue, queue->buffer.data, pkt_size);
+ data = talloc_memdup(queue,
+ queue->buffer.data + queue->buffer.offset,
+ pkt_size);
+
if (data == NULL) {
D_ERR("read error alloc failed for %u\n", pkt_size);
return;
}
- /* Shift packet out from buffer */
- if (queue->buffer.length > pkt_size) {
- memmove(queue->buffer.data,
- queue->buffer.data + pkt_size,
- queue->buffer.length - pkt_size);
- }
+ queue->buffer.offset += pkt_size;
queue->buffer.length -= pkt_size;
+ if (queue->buffer.offset < pkt_size ||
+ queue->buffer.offset > queue->buffer.size) {
+ D_ERR("buffer offset overflow\n");
+ TALLOC_FREE(queue->buffer.data);
+ memset(&queue->buffer, 0, sizeof(queue->buffer));
+ goto failed;
+ }
+
if (queue->buffer.length > 0) {
/* There is more data to be processed, schedule an event */
tevent_schedule_immediate(queue->im, queue->ctdb->ev,
@@ -137,6 +144,7 @@ static void queue_process(struct ctdb_queue *queue)
TALLOC_FREE(queue->buffer.data);
queue->buffer.size = 0;
}
+ queue->buffer.offset = 0;
}
/* It is the responsibility of the callback to free 'data' */
@@ -145,10 +153,8 @@ static void queue_process(struct ctdb_queue *queue)
failed:
queue->callback(NULL, 0, queue->private_data);
-
}
-
/*
called when an incoming connection is readable
This function MUST be safe for reentry via the queue callback!
@@ -156,7 +162,7 @@ failed:
static void queue_io_read(struct ctdb_queue *queue)
{
int num_ready = 0;
- uint32_t pkt_size;
+ uint32_t pkt_size = 0;
ssize_t nread;
uint8_t *data;
@@ -185,12 +191,12 @@ static void queue_io_read(struct ctdb_queue *queue)
goto data_read;
}
- if (queue->buffer.length < sizeof(pkt_size)) {
+ if (sizeof(pkt_size) > queue->buffer.length) {
/* data read is not sufficient to gather message size */
- goto data_read;
+ goto buffer_shift;
}
- pkt_size = *(uint32_t *)queue->buffer.data;
+ pkt_size = *(uint32_t *)(queue->buffer.data + queue->buffer.offset);
if (pkt_size > queue->buffer.size) {
data = talloc_realloc_size(queue,
queue->buffer.data,
@@ -201,6 +207,21 @@ static void queue_io_read(struct ctdb_queue *queue)
}
queue->buffer.data = data;
queue->buffer.size = pkt_size;
+ /* fall through here as we might need to move the data as well */
+ }
+
+buffer_shift:
+ if (sizeof(pkt_size) > queue->buffer.size - queue->buffer.offset ||
+ pkt_size > queue->buffer.size - queue->buffer.offset) {
+ /* Either the offset has progressed too far to host at least
+ * the size information or the remaining space in the buffer
+ * is not sufficient for the full message.
+ * Therefore, move the data and try again.
+ */
+ memmove(queue->buffer.data,
+ queue->buffer.data + queue->buffer.offset,
+ queue->buffer.length);
+ queue->buffer.offset = 0;
}
data_read:
@@ -208,7 +229,9 @@ data_read:
if (num_ready > 0) {
nread = sys_read(queue->fd,
- queue->buffer.data + queue->buffer.length,
+ queue->buffer.data +
+ queue->buffer.offset +
+ queue->buffer.length,
num_ready);
if (nread <= 0) {
DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread));
--
2.17.2
From 229c07d0c045917bc4884cef9db2244025df870d Mon Sep 17 00:00:00 2001
From: Swen Schillig <swen at vnet.ibm.com>
Date: Mon, 12 Mar 2018 17:56:21 +0100
Subject: [PATCH 2/2] ctdb: Adding memory pool for queue callback
The received packet is copied into a newly allocated memory chunk for further
processing by the assigned callback. Once this is done, the memory is free'd.
This is repeated for each received packet making the memory allocation / free
an expensive task. To optimize this process, a memory pool is defined which
is sized identically to the queue's buffer.
During tests it could be seen that more than 95% of all messages were sized
below the standard buffer_size of 1k.
Signed-off-by: Swen Schillig <swen at vnet.ibm.com>
---
ctdb/common/ctdb_io.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c
index 5bed7a61b31..d86540762ea 100644
--- a/ctdb/common/ctdb_io.c
+++ b/ctdb/common/ctdb_io.c
@@ -65,6 +65,7 @@ struct ctdb_queue {
size_t alignment;
void *private_data;
ctdb_queue_cb_fn_t callback;
+ TALLOC_CTX *data_pool;
const char *name;
uint32_t buffer_size;
};
@@ -115,7 +116,7 @@ static void queue_process(struct ctdb_queue *queue)
}
/* Extract complete packet */
- data = talloc_memdup(queue,
+ data = talloc_memdup(queue->data_pool,
queue->buffer.data + queue->buffer.offset,
pkt_size);
@@ -479,5 +480,11 @@ struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
queue->buffer_size = 1024;
}
+ queue->data_pool = talloc_pool(queue, queue->buffer_size);
+ if (queue->data_pool == NULL) {
+ TALLOC_FREE(queue);
+ return NULL;
+ }
+
return queue;
}
--
2.17.2
More information about the samba-technical
mailing list