[SCM] CTDB repository - branch master updated - ctdb-1.0.65-14-gd1057ed

Ronnie Sahlberg sahlberg at samba.org
Tue Dec 2 02:28:36 GMT 2008


The branch, master has been updated
       via  d1057ed6de7de9f2a64d8fa012c52647e89b515b (commit)
      from  e577a276900854622f4e9da9d1ccd7b484d0d1ec (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit d1057ed6de7de9f2a64d8fa012c52647e89b515b
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Dec 2 13:26:30 2008 +1100

    redesign how reloadnodes is implemented.
    
    modify the transport methods to allow to restart individual connections
    and set up destructors properly.
    
    only tear down/set-up tcp connections to nodes removed from the cluster
    or nodes added to the cluster.
    Leave tcp connections to unchanged nodes connected.
    
    make "ctdb reloadnodes" explicitely cause a recovery of the cluster once
    the files have been realoaded

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h |    3 ++-
 server/ctdb_recover.c  |   29 ++++++++++++++++++++++++++---
 tcp/tcp_connect.c      |    5 ++++-
 tcp/tcp_init.c         |   48 ++++++++++++++++++++++++++++++++++++------------
 tools/ctdb.c           |    3 +++
 5 files changed, 71 insertions(+), 17 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 19f85f2..c40ffbd 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -236,8 +236,9 @@ struct ctdb_node {
 */
 struct ctdb_methods {
 	int (*initialise)(struct ctdb_context *); /* initialise transport structures */	
-	int (*start)(struct ctdb_context *); /* start protocol processing */	
+	int (*start)(struct ctdb_context *); /* start the transport */
 	int (*add_node)(struct ctdb_node *); /* setup a new node */	
+	int (*connect_node)(struct ctdb_node *); /* connect to node */
 	int (*queue_pkt)(struct ctdb_node *, uint8_t *data, uint32_t length);
 	void *(*allocate_pkt)(TALLOC_CTX *mem_ctx, size_t );
 	void (*shutdown)(struct ctdb_context *); /* shutdown transport */
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 9be2eb2..8bed9e6 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -213,20 +213,43 @@ static void
 ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te, 
 			       struct timeval t, void *private_data)
 {
-	int i;
-
+	int i, num_nodes;
 	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+	TALLOC_CTX *tmp_ctx;
+	struct ctdb_node **nodes;	
+
+	tmp_ctx = talloc_new(ctdb);
+
+	/* steal the old nodes file for a while */
+	talloc_steal(tmp_ctx, ctdb->nodes);
+	nodes = ctdb->nodes;
+	ctdb->nodes = NULL;
+	num_nodes = ctdb->num_nodes;
+	ctdb->num_nodes = 0;
 
+	/* load the new nodes file */
 	ctdb_load_nodes_file(ctdb);
 
 	for (i=0; i<ctdb->num_nodes; i++) {
+		/* keep any identical pre-existing nodes and connections */
+		if ((i < num_nodes) && ctdb_same_address(&ctdb->nodes[i]->address, &nodes[i]->address)) {
+			talloc_free(ctdb->nodes[i]);
+			ctdb->nodes[i] = talloc_steal(ctdb->nodes, nodes[i]);
+			continue;
+		}
+
+		/* any new or different nodes must be added */
 		if (ctdb->methods->add_node(ctdb->nodes[i]) != 0) {
 			DEBUG(DEBUG_CRIT, (__location__ " methods->add_node failed at %d\n", i));
 			ctdb_fatal(ctdb, "failed to add node. shutting down\n");
 		}
+		if (ctdb->methods->connect_node(ctdb->nodes[i]) != 0) {
+			DEBUG(DEBUG_CRIT, (__location__ " methods->add_connect failed at %d\n", i));
+			ctdb_fatal(ctdb, "failed to connect to node. shutting down\n");
+		}
 	}
-	ctdb->methods->start(ctdb);
 
+	talloc_free(tmp_ctx);
 	return;
 }
 
diff --git a/tcp/tcp_connect.c b/tcp/tcp_connect.c
index 0e892af..bd8af57 100644
--- a/tcp/tcp_connect.c
+++ b/tcp/tcp_connect.c
@@ -47,6 +47,7 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node)
 
 /*
   called when a complete packet has come in - should not happen on this socket
+  unless the other side closes the connection with RST or FIN
  */
 void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
 {
@@ -59,7 +60,8 @@ void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
 	}
 
 	ctdb_tcp_stop_connection(node);
-	tnode->connect_te = event_add_timed(node->ctdb->ev, tnode, timeval_zero(),
+	tnode->connect_te = event_add_timed(node->ctdb->ev, tnode,
+					    timeval_current_ofs(3, 0),
 					    ctdb_tcp_node_connect, node);
 }
 
@@ -149,6 +151,7 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
 		return;
 	}
 
+DEBUG(DEBUG_ERR,("create socket...\n"));
 	tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
 	set_nonblocking(tnode->fd);
 	set_close_on_exec(tnode->fd);
diff --git a/tcp/tcp_init.c b/tcp/tcp_init.c
index 8b33efe..737bd8e 100644
--- a/tcp/tcp_init.c
+++ b/tcp/tcp_init.c
@@ -25,6 +25,17 @@
 #include "../include/ctdb_private.h"
 #include "ctdb_tcp.h"
 
+static int tnode_destructor(struct ctdb_tcp_node *tnode)
+{
+	struct ctdb_node *node = talloc_find_parent_bytype(tnode, struct ctdb_node);
+
+	if (tnode->fd != -1) {
+		close(tnode->fd);
+		tnode->fd = -1;
+	}
+
+	return 0;
+}
 
 /*
   initialise tcp portion of a ctdb node 
@@ -37,6 +48,7 @@ static int ctdb_tcp_add_node(struct ctdb_node *node)
 
 	tnode->fd = -1;
 	node->private_data = tnode;
+	talloc_set_destructor(tnode, tnode_destructor);
 
 	tnode->out_queue = ctdb_queue_setup(node->ctdb, node, tnode->fd, CTDB_TCP_ALIGNMENT,
 					ctdb_tcp_tnode_cb, node);
@@ -70,21 +82,18 @@ static int ctdb_tcp_initialise(struct ctdb_context *ctdb)
 /*
   start the protocol going
 */
-static int ctdb_tcp_start(struct ctdb_context *ctdb)
+static int ctdb_tcp_connect_node(struct ctdb_node *node)
 {
-	int i;
+	struct ctdb_context *ctdb = node->ctdb;
+	struct ctdb_tcp_node *tnode = talloc_get_type(
+		node->private_data, struct ctdb_tcp_node);
 
-	/* startup connections to the other servers - will happen on
+	/* startup connection to the other server - will happen on
 	   next event loop */
-	for (i=0;i<ctdb->num_nodes;i++) {
-		struct ctdb_node *node = *(ctdb->nodes + i);
-		struct ctdb_tcp_node *tnode = talloc_get_type(
-			node->private_data, struct ctdb_tcp_node);
-		if (!ctdb_same_address(&ctdb->address, &node->address)) {
-			tnode->connect_te = event_add_timed(ctdb->ev, tnode, 
-							    timeval_zero(), 
-							    ctdb_tcp_node_connect, node);
-		}
+	if (!ctdb_same_address(&ctdb->address, &node->address)) {
+		tnode->connect_te = event_add_timed(ctdb->ev, tnode, 
+						    timeval_zero(), 
+						    ctdb_tcp_node_connect, node);
 	}
 
 	return 0;
@@ -119,6 +128,20 @@ static void ctdb_tcp_shutdown(struct ctdb_context *ctdb)
 	ctdb->private_data = NULL;
 }
 
+/*
+  start the transport
+*/
+static int ctdb_tcp_start(struct ctdb_context *ctdb)
+{
+	int i;
+
+	for (i=0; i<ctdb->num_nodes; i++) {
+		ctdb_tcp_connect_node(ctdb->nodes[i]);
+	}
+
+	return 0;
+}
+
 
 /*
   transport packet allocator - allows transport to control memory for packets
@@ -138,6 +161,7 @@ static const struct ctdb_methods ctdb_tcp_methods = {
 	.start        = ctdb_tcp_start,
 	.queue_pkt    = ctdb_tcp_queue_pkt,
 	.add_node     = ctdb_tcp_add_node,
+	.connect_node = ctdb_tcp_connect_node,
 	.allocate_pkt = ctdb_tcp_allocate_pkt,
 	.shutdown     = ctdb_tcp_shutdown,
 	.restart      = ctdb_tcp_restart,
diff --git a/tools/ctdb.c b/tools/ctdb.c
index 63fba20..c9656fe 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -2406,6 +2406,9 @@ static int control_reload_nodes_file(struct ctdb_context *ctdb, int argc, const
 		DEBUG(DEBUG_ERR, ("ERROR: Failed to reload nodes file on node %u. You MUST fix that node manually!\n", mypnn));
 	}
 
+	/* initiate a recovery */
+	control_recover(ctdb, argc, argv);
+
 	return 0;
 }
 


-- 
CTDB repository


More information about the samba-cvs mailing list