[SCM] CTDB repository - branch 1.2.40 updated - ctdb-1.2.67-3-g1dfb6c1

Wed Sep 11 03:15:58 CEST 2013

The branch, 1.2.40 has been updated
       via  1dfb6c1e5fdc9676a7de58bb974f8dddd16e9366 (commit)
       via  96a15964231716b4909d9d5ec19cc4ed1d744177 (commit)
       via  da21e4df641a668bbf379720fbfc556fdadf71a3 (commit)
      from  91f522f928f28b3c3463963aedd71a251545b910 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40


- Log -----------------------------------------------------------------
commit 1dfb6c1e5fdc9676a7de58bb974f8dddd16e9366
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Tue Sep 10 16:30:01 2013 +1000

    New version 1.2.68
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit 96a15964231716b4909d9d5ec19cc4ed1d744177
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Sep 6 14:09:09 2013 +1000

    recoverd: Nodes to be used as forced rebalance targets must be healthy
    
    Right now, if a node is marked as a rebalance target then it can host
    IPs even if it is unhealthy, inactive, ...
    
    Also move the log message into the conditional so it is only generated
    if the PNN is actually going to be a rebalancing target.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit da21e4df641a668bbf379720fbfc556fdadf71a3
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Wed Aug 21 14:42:06 2013 +1000

    common/io: Limit the queue buffer size for fair scheduling via tevent
    
    If we process all the data available in a socket buffer, CTDB can stay busy
    processing lots of packets via immediate event mechanism in tevent.  After
    processing an immediate event, tevent returns without epoll_wait.  So as long
    as there are immediate events, tevent will never poll other FDs.  CTDB will
    report this as "Event handling took xx seconds" warning.  This is misleading
    since CTDB is very busy processing packets, but never gets to the point of
    polling FDs.
    
    The improvement in socket handling made it worse when handling traverse
    control.  There were lots of packets filled in the socket buffer quickly and
    CTDB stayed busy processing those packets and not polling other FDs and timer
    events.  This can lead to controls timing out and in worse case other nodes
    marking busy node as disconnected.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    (cherry picked from commit 92939c1178d04116d842708bc2d6a9c2950e36cc)

-----------------------------------------------------------------------

Summary of changes:
 common/ctdb_io.c           |   43 +++++++++++++++++++++++++++++++------------
 packaging/RPM/ctdb.spec.in |    5 ++++-
 server/ctdb_takeover.c     |    5 +++--
 3 files changed, 38 insertions(+), 15 deletions(-)


Changeset truncated at 500 lines:

diff --git a/common/ctdb_io.c b/common/ctdb_io.c
index 4e164d9..99c50c1 100644
--- a/common/ctdb_io.c
+++ b/common/ctdb_io.c
@@ -30,11 +30,14 @@
 #include "../include/ctdb_client.h"
 #include <stdarg.h>
 
+#define QUEUE_BUFFER_SIZE	(16*1024)
+
 /* structures for packet queueing - see common/ctdb_io.c */
 struct ctdb_buffer {
 	uint8_t *data;
 	uint32_t length;
 	uint32_t size;
+	uint32_t extend;
 };
 
 struct ctdb_queue_pkt {
@@ -114,7 +117,9 @@ static void queue_process(struct ctdb_queue *queue)
 	}
 
 	if (queue->buffer.length < pkt_size) {
-		DEBUG(DEBUG_DEBUG, ("Partial packet data read\n"));
+		if (pkt_size > QUEUE_BUFFER_SIZE) {
+			queue->buffer.extend = pkt_size;
+		}
 		return;
 	}
 
@@ -138,6 +143,11 @@ static void queue_process(struct ctdb_queue *queue)
 		/* There is more data to be processed, schedule an event */
 		tevent_schedule_immediate(queue->im, queue->ctdb->ev,
 					  queue_process_event, queue);
+	} else {
+		if (queue->buffer.size > QUEUE_BUFFER_SIZE) {
+			TALLOC_FREE(queue->buffer.data);
+			queue->buffer.size = 0;
+		}
 	}
 
 	/* It is the responsibility of the callback to free 'data' */
@@ -159,6 +169,7 @@ static void queue_io_read(struct ctdb_queue *queue)
 	int num_ready = 0;
 	ssize_t nread;
 	uint8_t *data;
+	int navail;
 
 	if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
 		return;
@@ -170,29 +181,37 @@ static void queue_io_read(struct ctdb_queue *queue)
 
 	if (queue->buffer.data == NULL) {
 		/* starting fresh, allocate buf to read data */
-		queue->buffer.data = talloc_size(queue, num_ready);
+		queue->buffer.data = talloc_size(queue, QUEUE_BUFFER_SIZE);
 		if (queue->buffer.data == NULL) {
 			DEBUG(DEBUG_ERR, ("read error alloc failed for %u\n", num_ready));
 			goto failed;
 		}
-		queue->buffer.size = num_ready;
-	} else if (queue->buffer.length + num_ready > queue->buffer.size) {
+		queue->buffer.size = QUEUE_BUFFER_SIZE;
+	} else if (queue->buffer.extend > 0) {
 		/* extending buffer */
-		data = talloc_realloc_size(queue, queue->buffer.data, queue->buffer.length + num_ready);
+		data = talloc_realloc_size(queue, queue->buffer.data, queue->buffer.extend);
 		if (data == NULL) {
-			DEBUG(DEBUG_ERR, ("read error realloc failed for %u\n", queue->buffer.length + num_ready));
+			DEBUG(DEBUG_ERR, ("read error realloc failed for %u\n", queue->buffer.extend));
 			goto failed;
 		}
 		queue->buffer.data = data;
-		queue->buffer.size = queue->buffer.length + num_ready;
+		queue->buffer.size = queue->buffer.extend;
+		queue->buffer.extend = 0;
 	}
 
-	nread = read(queue->fd, queue->buffer.data + queue->buffer.length, num_ready);
-	if (nread <= 0) {
-		DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread));
-		goto failed;
+	navail = queue->buffer.size - queue->buffer.length;
+	if (num_ready > navail) {
+		num_ready = navail;
+	}
+
+	if (num_ready > 0) {
+		nread = read(queue->fd, queue->buffer.data + queue->buffer.length, num_ready);
+		if (nread <= 0) {
+			DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread));
+			goto failed;
+		}
+		queue->buffer.length += nread;
 	}
-	queue->buffer.length += nread;
 
 	queue_process(queue);
 	return;
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index b3daf2f..baaabbe 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -3,7 +3,7 @@ Name: ctdb
 Summary: Clustered TDB
 Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
-Version: 1.2.67
+Version: 1.2.68
 Release: 1GITHASH
 Epoch: 0
 License: GNU GPL version 3
@@ -155,6 +155,9 @@ development libraries for ctdb
 
 %changelog
 
+* Tue Sep 10 2013 : Version 1.2.68
+  - Use fixed size queue buffers for fair scheduling across tevent FDs
+  - Nodes to be used as forced rebalance targets must be healthy
 * Wed Aug 14 2013 : Version 1.2.67
   - When takeover fails, call fail callback only once and not once per IP
   - Do not send ipreallocated event to banned nodes
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index 9fdf227..721be29 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -1556,11 +1556,12 @@ void lcp2_init(struct ctdb_context * tmp_ctx,
 	while (force_rebalance_list != NULL) {
 		struct ctdb_rebalancenodes *next = force_rebalance_list->next;
 
-		if (force_rebalance_list->pnn <= nodemap->num) {
+		if (force_rebalance_list->pnn <= nodemap->num &&
+		    !(nodemap->nodes[force_rebalance_list->pnn].flags & mask)) {
 			(*newly_healthy)[force_rebalance_list->pnn] = true;
+			DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn));
 		}
 
-		DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn));
 		talloc_free(force_rebalance_list);
 		force_rebalance_list = next;
 	}


-- 
CTDB repository