Rev 320: timeout pending controls immediately when a node becomes disconnected in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Fri May 18 13:48:29 GMT 2007


------------------------------------------------------------
revno: 320
revision-id: tridge at samba.org-20070518134829-y384ldoxc07qxxi5
parent: tridge at samba.org-20070518132336-gjpdua5y9j3ydutx
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge
timestamp: Fri 2007-05-18 23:48:29 +1000
message:
  timeout pending controls immediately when a node becomes disconnected
modified:
  common/ctdb.c                  ctdb.c-20061127094323-t50f58d65iaao5of-2
  common/ctdb_daemon.c           ctdb_daemon.c-20070409200331-3el1kqgdb9m4ib0g-1
  common/ctdb_monitor.c          ctdb_monitor.c-20070518100625-8jf4ft1mjzmb22ck-1
  include/ctdb_private.h         ctdb_private.h-20061117234101-o3qt14umlg9en8z0-13
=== modified file 'common/ctdb.c'
--- a/common/ctdb.c	2007-05-18 13:23:36 +0000
+++ b/common/ctdb.c	2007-05-18 13:48:29 +0000
@@ -379,6 +379,7 @@
 	node->flags &= ~NODE_FLAGS_CONNECTED;
 	DEBUG(1,("%s: node %s is dead: %d connected\n", 
 		 node->ctdb->name, node->name, node->ctdb->num_connected));
+	ctdb_daemon_cancel_controls(node->ctdb, node);
 }
 
 /*

=== modified file 'common/ctdb_daemon.c'
--- a/common/ctdb_daemon.c	2007-05-18 09:19:35 +0000
+++ b/common/ctdb_daemon.c	2007-05-18 13:48:29 +0000
@@ -836,16 +836,18 @@
 
 
 struct daemon_control_state {
+	struct daemon_control_state *next, *prev;
 	struct ctdb_client *client;
 	struct ctdb_req_control *c;
 	uint32_t reqid;
+	struct ctdb_node *node;
 };
 
 /*
   callback when a control reply comes in
  */
 static void daemon_control_callback(struct ctdb_context *ctdb,
-				    uint32_t status, TDB_DATA data, 
+				    int32_t status, TDB_DATA data, 
 				    const char *errormsg,
 				    void *private_data)
 {
@@ -880,6 +882,30 @@
 }
 
 /*
+  fail all pending controls to a disconnected node
+ */
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
+{
+	struct daemon_control_state *state;
+	while ((state = node->pending_controls)) {
+		DLIST_REMOVE(node->pending_controls, state);
+		daemon_control_callback(ctdb, (uint32_t)-1, tdb_null, 
+					"node is disconnected", state);
+	}
+}
+
+/*
+  destroy a daemon_control_state
+ */
+static int daemon_control_destructor(struct daemon_control_state *state)
+{
+	if (state->node) {
+		DLIST_REMOVE(state->node->pending_controls, state);
+	}
+	return 0;
+}
+
+/*
   this is called when the ctdb daemon received a ctdb request control
   from a local client over the unix domain socket
  */
@@ -900,6 +926,14 @@
 	state->client = client;
 	state->c = talloc_steal(state, c);
 	state->reqid = c->hdr.reqid;
+	if (ctdb_validate_vnn(client->ctdb, c->hdr.destnode)) {
+		state->node = client->ctdb->nodes[c->hdr.destnode];
+		DLIST_ADD(state->node->pending_controls, state);
+	} else {
+		state->node = NULL;
+	}
+
+	talloc_set_destructor(state, daemon_control_destructor);
 	
 	data.dptr = &c->data[0];
 	data.dsize = c->datalen;
@@ -912,6 +946,10 @@
 		DEBUG(0,(__location__ " Failed to send control to remote node %u\n",
 			 c->hdr.destnode));
 	}
+
+	if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
+		talloc_free(state);
+	}
 }
 
 /*

=== modified file 'common/ctdb_monitor.c'
--- a/common/ctdb_monitor.c	2007-05-18 13:23:36 +0000
+++ b/common/ctdb_monitor.c	2007-05-18 13:48:29 +0000
@@ -58,6 +58,7 @@
 		if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
 			DEBUG(0,("Node %u is dead - marking as not connected\n", node->vnn));
 			node->flags &= ~NODE_FLAGS_CONNECTED;
+			ctdb_daemon_cancel_controls(ctdb, node);
 			/* maybe tell the transport layer to kill the
 			   sockets as well?
 			*/

=== modified file 'include/ctdb_private.h'
--- a/include/ctdb_private.h	2007-05-18 13:23:36 +0000
+++ b/include/ctdb_private.h	2007-05-18 13:48:29 +0000
@@ -74,7 +74,7 @@
 
 /* used for callbacks in ctdb_control requests */
 typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *,
-					   uint32_t status, TDB_DATA data, 
+					   int32_t status, TDB_DATA data, 
 					   const char *errormsg,
 					   void *private_data);
 
@@ -93,6 +93,10 @@
 	/* used by the dead node monitoring */
 	uint32_t dead_count;
 	uint32_t rx_cnt;
+
+	/* a list of controls pending to this node, so we can time them out quickly
+	   if the node becomes disconnected */
+	struct daemon_control_state *pending_controls;
 };
 
 /*
@@ -823,4 +827,6 @@
 int ctdb_start_monitoring(struct ctdb_context *ctdb);
 void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
+void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node);
+
 #endif



More information about the samba-cvs mailing list