[SCM] CTDB repository - branch master updated - ctdb-2.3-68-g5cdad2b

Amitay Isaacs amitay at samba.org
Wed Aug 21 04:22:37 MDT 2013


The branch, master has been updated
       via  5cdad2b8ebd71a5e458c301d00eac00a211feeb3 (commit)
       via  0fe79662e20e347d9e1cb12a42cd356e33572402 (commit)
       via  444521c852749558f39dc6131acce9e47eefd489 (commit)
       via  4bf0b1c9d21986eecb7682f935bd6154c65533cc (commit)
       via  d8eb2e7fdd7645719370dad4f2faa5c3fffa8249 (commit)
      from  f9556a6f1fe0046308c8b363e6dcaf3f7ce6f2b7 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 5cdad2b8ebd71a5e458c301d00eac00a211feeb3
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Aug 9 17:00:10 2013 +1000

    tools/ctdb: Fix message in showban when node is banned
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 0fe79662e20e347d9e1cb12a42cd356e33572402
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Aug 9 16:58:42 2013 +1000

    tools/ctdb: Reimplement ban/unban using update_flags_wait_and_ipreallocate()
    
    This has the side effect of making these commands more resilient to
    control timeouts.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 444521c852749558f39dc6131acce9e47eefd489
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Aug 9 16:34:59 2013 +1000

    tools/ctdb: Factor out common pattern used in disable/enable/stop/continue
    
    Now we will only have one set of bugs.  :-)
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>

commit 4bf0b1c9d21986eecb7682f935bd6154c65533cc
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Aug 9 15:41:37 2013 +1000

    tools/ctdb: Factor, simplify and improve robustness of ipreallocate code
    
    Having other functions call control_ipreallocate() suggests that the
    it might look at the argv/argv arguments that are passed.  This is not
    the case.  Change the callers so they call the new ipreallocate()
    function instead.
    
    Broadcast CTDB_SRVID_TAKEOVER_RUN to all connected nodes.  Inactive
    nodes will ignore it.  This is safe since we only want 1 reply.  If we
    didn't get a response, we don't actually care if there's no active
    recovery master - just fire, wait, retry, ...
    
    Ignore some failures on the basis that they might be transient, so it
    is probably worth retrying.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit d8eb2e7fdd7645719370dad4f2faa5c3fffa8249
Author: Martin Schwenke <martin at meltin.net>
Date:   Thu Aug 15 04:38:02 2013 +1000

    tools/ctdb: Use ctdb_get_pnn() to get PNN of the current node
    
    This has already been stored at connect time and can't fail.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

-----------------------------------------------------------------------

Summary of changes:
 tools/ctdb.c |  376 ++++++++++++++++++++--------------------------------------
 1 files changed, 127 insertions(+), 249 deletions(-)


Changeset truncated at 500 lines:

diff --git a/tools/ctdb.c b/tools/ctdb.c
index b580779..eeff548 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1973,75 +1973,53 @@ static void ctdb_every_second(struct event_context *ev, struct timed_event *te,
 				ctdb_every_second, ctdb);
 }
 
-/*
-  ask the recovery daemon on the recovery master to perform a ip reallocation
+/* Send an ipreallocate to the recovery daemon on all nodes.  Only the
+ * recovery master will answer.
  */
-static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
+static int ipreallocate(struct ctdb_context *ctdb)
 {
-	int i, ret;
+	int ret;
 	TDB_DATA data;
 	struct takeover_run_reply rd;
-	struct ctdb_node_map *nodemap=NULL;
-	int count;
-	struct timeval tv = timeval_current();
+	struct timeval tv;
 
-	/* we need some events to trigger so we can timeout and restart
-	   the loop
-	*/
+	/* Time ticks to enable timeouts to be processed */
 	event_add_timed(ctdb->ev, ctdb, 
 				timeval_current_ofs(1, 0),
 				ctdb_every_second, ctdb);
 
-	rd.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
-	if (rd.pnn == -1) {
-		DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
-		return -1;
-	}
+	rd.pnn = ctdb_get_pnn(ctdb);
 	rd.srvid = getpid();
 
-	/* register a message port for receiveing the reply so that we
-	   can receive the reply
-	*/
+	/* Register message port for reply from recovery master */
 	ctdb_client_set_message_handler(ctdb, rd.srvid, ip_reallocate_handler, NULL);
 
 	data.dptr = (uint8_t *)&rd;
 	data.dsize = sizeof(rd);
 
 again:
-	/* get the number of nodes and node flags */
-	if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap) != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-		sleep(1);
-		goto again;
-	}
-
-	ipreallocate_finished = false;
-	count = 0;
-	for (i=0; i<nodemap->num;i++) {
-		if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
-			continue;
-		} else {
-			/* Send to all active nodes. Only recmaster will reply. */
-			ret = ctdb_client_send_message(ctdb, i, CTDB_SRVID_TAKEOVER_RUN, data);
-			if (ret != 0) {
-				DEBUG(DEBUG_ERR,("Failed to send ip takeover run request message to %u\n", options.pnn));
-				return -1;
-			}
-			count++;
-		}
-	}
-	if (count == 0) {
-		DEBUG(DEBUG_ERR,("No recmaster available, no need to wait for cluster convergence\n"));
-		return 0;
+	/* Send to all connected nodes. Only recmaster replies */
+	ret = ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
+				       CTDB_SRVID_TAKEOVER_RUN, data);
+	if (ret != 0) {
+		/* This can only happen if the socket is closed and
+		 * there's no way to recover from that, so don't try
+		 * again.
+		 */
+		DEBUG(DEBUG_WARNING,
+		      ("Failed to send IP reallocation request to connected nodes\n"));
+		return -1;
 	}
 
 	tv = timeval_current();
-	/* this loop will terminate when we have received the reply */
+	/* This loop terminates the reply is received */
 	while (timeval_elapsed(&tv) < 5.0 && !ipreallocate_finished) {
 		event_loop_once(ctdb->ev);
 	}
 
 	if (!ipreallocate_finished) {
+		DEBUG(DEBUG_NOTICE,
+		      ("Still waiting for confirmation of IP reallocation\n"));
 		goto again;
 	}
 
@@ -2049,6 +2027,11 @@ again:
 }
 
 
+static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+	return ipreallocate(ctdb);
+}
+
 /*
   add a public ip address to a node
  */
@@ -2907,157 +2890,114 @@ static int control_getpid(struct ctdb_context *ctdb, int argc, const char **argv
 	return 0;
 }
 
-/*
-  disable a remote node
- */
-static int control_disable(struct ctdb_context *ctdb, int argc, const char **argv)
+typedef bool update_flags_handler_t(struct ctdb_context *ctdb, void *data);
+
+static int update_flags_and_ipreallocate(struct ctdb_context *ctdb,
+					      void *data,
+					      update_flags_handler_t handler,
+					      uint32_t flag,
+					      const char *desc,
+					      bool set_flag)
 {
-	int ret;
-	struct ctdb_node_map *nodemap=NULL;
+	struct ctdb_node_map *nodemap = NULL;
+	bool flag_is_set;
 
-	/* check if the node is already disabled */
+	/* Check if the node is already in the desired state */
 	if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
 		DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
 		exit(10);
 	}
-	if (nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED) {
-		DEBUG(DEBUG_ERR,("Node %d is already disabled.\n", options.pnn));
+	flag_is_set = nodemap->nodes[options.pnn].flags & flag;
+	if (set_flag == flag_is_set) {
+		DEBUG(DEBUG_NOTICE, ("Node %d is %s %s\n", options.pnn,
+				     (set_flag ? "already" : "not"), desc));
 		return 0;
 	}
 
 	do {
-		ret = ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn, NODE_FLAGS_PERMANENTLY_DISABLED, 0);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to disable node %u\n", options.pnn));
-			return ret;
+		if (!handler(ctdb, data)) {
+			DEBUG(DEBUG_WARNING,
+			      ("Failed to send control to set state %s on node %u, try again\n",
+			       desc, options.pnn));
 		}
 
 		sleep(1);
 
-		/* read the nodemap and verify the change took effect */
-		if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-			exit(10);
+		/* Read the nodemap and verify the change took effect.
+		 * Even if the above control/hanlder timed out then it
+		 * could still have worked!
+		 */
+		if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE,
+					 ctdb, &nodemap) != 0) {
+			DEBUG(DEBUG_WARNING,
+			      ("Unable to get nodemap from local node, try again\n"));
 		}
+		flag_is_set = nodemap->nodes[options.pnn].flags & flag;
+	} while (nodemap == NULL || (set_flag != flag_is_set));
 
-	} while (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED));
-	ret = control_ipreallocate(ctdb, argc, argv);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
-		return ret;
-	}
-
-	return 0;
+	return ipreallocate(ctdb);
 }
 
-/*
-  enable a disabled remote node
- */
-static int control_enable(struct ctdb_context *ctdb, int argc, const char **argv)
+/* Administratively disable a node */
+static bool update_flags_disabled(struct ctdb_context *ctdb, void *data)
 {
-	int ret;
-
-	struct ctdb_node_map *nodemap=NULL;
-
-
-	/* check if the node is already enabled */
-	if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-		exit(10);
-	}
-	if (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED)) {
-		DEBUG(DEBUG_ERR,("Node %d is already enabled.\n", options.pnn));
-		return 0;
-	}
-
-	do {
-		ret = ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn, 0, NODE_FLAGS_PERMANENTLY_DISABLED);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to enable node %u\n", options.pnn));
-			return ret;
-		}
-
-		sleep(1);
+	return ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn,
+				  NODE_FLAGS_PERMANENTLY_DISABLED, 0) == 0;
+}
 
-		/* read the nodemap and verify the change took effect */
-		if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-			exit(10);
-		}
+static int control_disable(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+	return update_flags_and_ipreallocate(ctdb, NULL,
+						  update_flags_disabled,
+						  NODE_FLAGS_PERMANENTLY_DISABLED,
+						  "disabled",
+						  true /* set_flag*/);
+}
 
-	} while (nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED);
+/* Administratively re-enable a node */
+static bool update_flags_not_disabled(struct ctdb_context *ctdb, void *data)
+{
+	return ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn,
+				  0, NODE_FLAGS_PERMANENTLY_DISABLED) == 0;
+}
 
-	ret = control_ipreallocate(ctdb, argc, argv);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
-		return ret;
-	}
+static int control_enable(struct ctdb_context *ctdb,  int argc, const char **argv)
+{
+	return update_flags_and_ipreallocate(ctdb, NULL,
+						  update_flags_not_disabled,
+						  NODE_FLAGS_PERMANENTLY_DISABLED,
+						  "disabled",
+						  false /* set_flag*/);
+}
 
-	return 0;
+/* Stop a node */
+static bool update_flags_stopped(struct ctdb_context *ctdb, void *data)
+{
+	return ctdb_ctrl_stop_node(ctdb, TIMELIMIT(), options.pnn) == 0;
 }
 
-/*
-  stop a remote node
- */
 static int control_stop(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-	int ret;
-	struct ctdb_node_map *nodemap=NULL;
-
-	do {
-		ret = ctdb_ctrl_stop_node(ctdb, TIMELIMIT(), options.pnn);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to stop node %u   try again\n", options.pnn));
-		}
-	
-		sleep(1);
-
-		/* read the nodemap and verify the change took effect */
-		if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-		}
-
-	} while (nodemap == NULL || !(nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED));
-	ret = control_ipreallocate(ctdb, argc, argv);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
-		return ret;
-	}
+	return update_flags_and_ipreallocate(ctdb, NULL,
+						  update_flags_stopped,
+						  NODE_FLAGS_STOPPED,
+						  "stopped",
+						  true /* set_flag*/);
+}
 
-	return 0;
+/* Continue a stopped node */
+static bool update_flags_not_stopped(struct ctdb_context *ctdb, void *data)
+{
+	return ctdb_ctrl_continue_node(ctdb, TIMELIMIT(), options.pnn) == 0;
 }
 
-/*
-  restart a stopped remote node
- */
 static int control_continue(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-	int ret;
-
-	struct ctdb_node_map *nodemap=NULL;
-
-	do {
-		ret = ctdb_ctrl_continue_node(ctdb, TIMELIMIT(), options.pnn);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to continue node %u\n", options.pnn));
-			return ret;
-		}
-	
-		sleep(1);
-
-		/* read the nodemap and verify the change took effect */
-		if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
-			DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-		}
-
-	} while (nodemap == NULL || nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED);
-	ret = control_ipreallocate(ctdb, argc, argv);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
-		return ret;
-	}
-
-	return 0;
+	return update_flags_and_ipreallocate(ctdb, NULL,
+						  update_flags_not_stopped,
+						  NODE_FLAGS_STOPPED,
+						  "stopped",
+						  false /* set_flag */);
 }
 
 static uint32_t get_generation(struct ctdb_context *ctdb)
@@ -3101,90 +3041,47 @@ static uint32_t get_generation(struct ctdb_context *ctdb)
 	}
 }
 
-/*
-  ban a node from the cluster
- */
+/* Ban a node */
+static bool update_state_banned(struct ctdb_context *ctdb, void *data)
+{
+	struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)data;
+	return ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, bantime) == 0;
+}
+
 static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-	int ret;
-	struct ctdb_node_map *nodemap=NULL;
 	struct ctdb_ban_time bantime;
 
 	if (argc < 1) {
 		usage();
 	}
 	
-	/* verify the node exists */
-	ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-		return ret;
-	}
-
-	if (nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED) {
-		DEBUG(DEBUG_ERR,("Node %u is already banned.\n", options.pnn));
-		return -1;
-	}
-
 	bantime.pnn  = options.pnn;
 	bantime.time = strtoul(argv[0], NULL, 0);
 
-	ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR,("Banning node %d for %d seconds failed.\n", bantime.pnn, bantime.time));
-		return -1;
-	}	
-
-	ret = control_ipreallocate(ctdb, argc, argv);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
-		return ret;
-	}
-
-	return 0;
+	return update_flags_and_ipreallocate(ctdb, &bantime,
+						  update_state_banned,
+						  NODE_FLAGS_BANNED,
+						  "banned",
+						  true /* set_flag*/);
 }
 
 
-/*
-  unban a node from the cluster
- */
+/* Unban a node */
 static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-	int ret;
-	struct ctdb_node_map *nodemap=NULL;
 	struct ctdb_ban_time bantime;
 
-	/* verify the node exists */
-	ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-		return ret;
-	}
-
-	if (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED)) {
-		DEBUG(DEBUG_ERR,("Node %u is not banned.\n", options.pnn));
-		return -1;
-	}
-
 	bantime.pnn  = options.pnn;
 	bantime.time = 0;
 
-	ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR,("Unbanning node %d failed.\n", bantime.pnn));
-		return -1;
-	}	
-
-	ret = control_ipreallocate(ctdb, argc, argv);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
-		return ret;
-	}
-
-	return 0;
+	return update_flags_and_ipreallocate(ctdb, &bantime,
+						  update_state_banned,
+						  NODE_FLAGS_BANNED,
+						  "banned",
+						  false /* set_flag*/);
 }
 
-
 /*
   show ban information for a node
  */
@@ -3210,7 +3107,8 @@ static int control_showban(struct ctdb_context *ctdb, int argc, const char **arg
 	if (bantime->time == 0) {
 		printf("Node %u is not banned\n", bantime->pnn);
 	} else {
-		printf("Node %u is banned banned for %d seconds\n", bantime->pnn, bantime->time);
+		printf("Node %u is banned, %d seconds remaining\n",
+		       bantime->pnn, bantime->time);
 	}
 
 	return 0;
@@ -4169,15 +4067,9 @@ static int control_getlog(struct ctdb_context *ctdb, int argc, const char **argv
 	TDB_DATA data;
 	struct timeval tv;
 
-	/* Since this can fail, do it first */
-	log_addr.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
-	if (log_addr.pnn == -1) {
-		DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
-		return -1;
-	}
-
 	/* Process options */
 	main_daemon = true;
+	log_addr.pnn = ctdb_get_pnn(ctdb);
 	log_addr.level = DEBUG_NOTICE;
 	for (i = 0; i < argc; i++) {
 		if (strcmp(argv[i], "recoverd") == 0) {
@@ -4302,15 +4194,9 @@ static int reloadips_all(struct ctdb_context *ctdb)
 		}
 	}
 
-
-	rips.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
-	if (rips.pnn == -1) {
-		DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
-		return 1;
-	}
+	rips.pnn = ctdb_get_pnn(ctdb);
 	rips.srvid = getpid();
 
-
 	/* register a message port for receiveing the reply so that we
 	   can receive the reply


-- 
CTDB repository


More information about the samba-cvs mailing list