[SCM] CTDB repository - branch master updated - ctdb-2.3-68-g5cdad2b
Amitay Isaacs
amitay at samba.org
Wed Aug 21 04:22:37 MDT 2013
The branch, master has been updated
via 5cdad2b8ebd71a5e458c301d00eac00a211feeb3 (commit)
via 0fe79662e20e347d9e1cb12a42cd356e33572402 (commit)
via 444521c852749558f39dc6131acce9e47eefd489 (commit)
via 4bf0b1c9d21986eecb7682f935bd6154c65533cc (commit)
via d8eb2e7fdd7645719370dad4f2faa5c3fffa8249 (commit)
from f9556a6f1fe0046308c8b363e6dcaf3f7ce6f2b7 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 5cdad2b8ebd71a5e458c301d00eac00a211feeb3
Author: Martin Schwenke <martin at meltin.net>
Date: Fri Aug 9 17:00:10 2013 +1000
tools/ctdb: Fix message in showban when node is banned
Signed-off-by: Martin Schwenke <martin at meltin.net>
commit 0fe79662e20e347d9e1cb12a42cd356e33572402
Author: Martin Schwenke <martin at meltin.net>
Date: Fri Aug 9 16:58:42 2013 +1000
tools/ctdb: Reimplement ban/unban using update_flags_wait_and_ipreallocate()
This has the side effect of making these commands more resilient to
control timeouts.
Signed-off-by: Martin Schwenke <martin at meltin.net>
commit 444521c852749558f39dc6131acce9e47eefd489
Author: Martin Schwenke <martin at meltin.net>
Date: Fri Aug 9 16:34:59 2013 +1000
tools/ctdb: Factor out common pattern used in disable/enable/stop/continue
Now we will only have one set of bugs. :-)
Signed-off-by: Martin Schwenke <martin at meltin.net>
Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>
commit 4bf0b1c9d21986eecb7682f935bd6154c65533cc
Author: Martin Schwenke <martin at meltin.net>
Date: Fri Aug 9 15:41:37 2013 +1000
tools/ctdb: Factor, simplify and improve robustness of ipreallocate code
Having other functions call control_ipreallocate() suggests that the
it might look at the argv/argv arguments that are passed. This is not
the case. Change the callers so they call the new ipreallocate()
function instead.
Broadcast CTDB_SRVID_TAKEOVER_RUN to all connected nodes. Inactive
nodes will ignore it. This is safe since we only want 1 reply. If we
didn't get a response, we don't actually care if there's no active
recovery master - just fire, wait, retry, ...
Ignore some failures on the basis that they might be transient, so it
is probably worth retrying.
Signed-off-by: Martin Schwenke <martin at meltin.net>
commit d8eb2e7fdd7645719370dad4f2faa5c3fffa8249
Author: Martin Schwenke <martin at meltin.net>
Date: Thu Aug 15 04:38:02 2013 +1000
tools/ctdb: Use ctdb_get_pnn() to get PNN of the current node
This has already been stored at connect time and can't fail.
Signed-off-by: Martin Schwenke <martin at meltin.net>
-----------------------------------------------------------------------
Summary of changes:
tools/ctdb.c | 376 ++++++++++++++++++++--------------------------------------
1 files changed, 127 insertions(+), 249 deletions(-)
Changeset truncated at 500 lines:
diff --git a/tools/ctdb.c b/tools/ctdb.c
index b580779..eeff548 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1973,75 +1973,53 @@ static void ctdb_every_second(struct event_context *ev, struct timed_event *te,
ctdb_every_second, ctdb);
}
-/*
- ask the recovery daemon on the recovery master to perform a ip reallocation
+/* Send an ipreallocate to the recovery daemon on all nodes. Only the
+ * recovery master will answer.
*/
-static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
+static int ipreallocate(struct ctdb_context *ctdb)
{
- int i, ret;
+ int ret;
TDB_DATA data;
struct takeover_run_reply rd;
- struct ctdb_node_map *nodemap=NULL;
- int count;
- struct timeval tv = timeval_current();
+ struct timeval tv;
- /* we need some events to trigger so we can timeout and restart
- the loop
- */
+ /* Time ticks to enable timeouts to be processed */
event_add_timed(ctdb->ev, ctdb,
timeval_current_ofs(1, 0),
ctdb_every_second, ctdb);
- rd.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
- if (rd.pnn == -1) {
- DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
- return -1;
- }
+ rd.pnn = ctdb_get_pnn(ctdb);
rd.srvid = getpid();
- /* register a message port for receiveing the reply so that we
- can receive the reply
- */
+ /* Register message port for reply from recovery master */
ctdb_client_set_message_handler(ctdb, rd.srvid, ip_reallocate_handler, NULL);
data.dptr = (uint8_t *)&rd;
data.dsize = sizeof(rd);
again:
- /* get the number of nodes and node flags */
- if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap) != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- sleep(1);
- goto again;
- }
-
- ipreallocate_finished = false;
- count = 0;
- for (i=0; i<nodemap->num;i++) {
- if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
- continue;
- } else {
- /* Send to all active nodes. Only recmaster will reply. */
- ret = ctdb_client_send_message(ctdb, i, CTDB_SRVID_TAKEOVER_RUN, data);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,("Failed to send ip takeover run request message to %u\n", options.pnn));
- return -1;
- }
- count++;
- }
- }
- if (count == 0) {
- DEBUG(DEBUG_ERR,("No recmaster available, no need to wait for cluster convergence\n"));
- return 0;
+ /* Send to all connected nodes. Only recmaster replies */
+ ret = ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
+ CTDB_SRVID_TAKEOVER_RUN, data);
+ if (ret != 0) {
+ /* This can only happen if the socket is closed and
+ * there's no way to recover from that, so don't try
+ * again.
+ */
+ DEBUG(DEBUG_WARNING,
+ ("Failed to send IP reallocation request to connected nodes\n"));
+ return -1;
}
tv = timeval_current();
- /* this loop will terminate when we have received the reply */
+ /* This loop terminates the reply is received */
while (timeval_elapsed(&tv) < 5.0 && !ipreallocate_finished) {
event_loop_once(ctdb->ev);
}
if (!ipreallocate_finished) {
+ DEBUG(DEBUG_NOTICE,
+ ("Still waiting for confirmation of IP reallocation\n"));
goto again;
}
@@ -2049,6 +2027,11 @@ again:
}
+static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+ return ipreallocate(ctdb);
+}
+
/*
add a public ip address to a node
*/
@@ -2907,157 +2890,114 @@ static int control_getpid(struct ctdb_context *ctdb, int argc, const char **argv
return 0;
}
-/*
- disable a remote node
- */
-static int control_disable(struct ctdb_context *ctdb, int argc, const char **argv)
+typedef bool update_flags_handler_t(struct ctdb_context *ctdb, void *data);
+
+static int update_flags_and_ipreallocate(struct ctdb_context *ctdb,
+ void *data,
+ update_flags_handler_t handler,
+ uint32_t flag,
+ const char *desc,
+ bool set_flag)
{
- int ret;
- struct ctdb_node_map *nodemap=NULL;
+ struct ctdb_node_map *nodemap = NULL;
+ bool flag_is_set;
- /* check if the node is already disabled */
+ /* Check if the node is already in the desired state */
if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
exit(10);
}
- if (nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED) {
- DEBUG(DEBUG_ERR,("Node %d is already disabled.\n", options.pnn));
+ flag_is_set = nodemap->nodes[options.pnn].flags & flag;
+ if (set_flag == flag_is_set) {
+ DEBUG(DEBUG_NOTICE, ("Node %d is %s %s\n", options.pnn,
+ (set_flag ? "already" : "not"), desc));
return 0;
}
do {
- ret = ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn, NODE_FLAGS_PERMANENTLY_DISABLED, 0);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to disable node %u\n", options.pnn));
- return ret;
+ if (!handler(ctdb, data)) {
+ DEBUG(DEBUG_WARNING,
+ ("Failed to send control to set state %s on node %u, try again\n",
+ desc, options.pnn));
}
sleep(1);
- /* read the nodemap and verify the change took effect */
- if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- exit(10);
+ /* Read the nodemap and verify the change took effect.
+ * Even if the above control/hanlder timed out then it
+ * could still have worked!
+ */
+ if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE,
+ ctdb, &nodemap) != 0) {
+ DEBUG(DEBUG_WARNING,
+ ("Unable to get nodemap from local node, try again\n"));
}
+ flag_is_set = nodemap->nodes[options.pnn].flags & flag;
+ } while (nodemap == NULL || (set_flag != flag_is_set));
- } while (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED));
- ret = control_ipreallocate(ctdb, argc, argv);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
- return ret;
- }
-
- return 0;
+ return ipreallocate(ctdb);
}
-/*
- enable a disabled remote node
- */
-static int control_enable(struct ctdb_context *ctdb, int argc, const char **argv)
+/* Administratively disable a node */
+static bool update_flags_disabled(struct ctdb_context *ctdb, void *data)
{
- int ret;
-
- struct ctdb_node_map *nodemap=NULL;
-
-
- /* check if the node is already enabled */
- if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- exit(10);
- }
- if (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED)) {
- DEBUG(DEBUG_ERR,("Node %d is already enabled.\n", options.pnn));
- return 0;
- }
-
- do {
- ret = ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn, 0, NODE_FLAGS_PERMANENTLY_DISABLED);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to enable node %u\n", options.pnn));
- return ret;
- }
-
- sleep(1);
+ return ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn,
+ NODE_FLAGS_PERMANENTLY_DISABLED, 0) == 0;
+}
- /* read the nodemap and verify the change took effect */
- if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- exit(10);
- }
+static int control_disable(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+ return update_flags_and_ipreallocate(ctdb, NULL,
+ update_flags_disabled,
+ NODE_FLAGS_PERMANENTLY_DISABLED,
+ "disabled",
+ true /* set_flag*/);
+}
- } while (nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED);
+/* Administratively re-enable a node */
+static bool update_flags_not_disabled(struct ctdb_context *ctdb, void *data)
+{
+ return ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn,
+ 0, NODE_FLAGS_PERMANENTLY_DISABLED) == 0;
+}
- ret = control_ipreallocate(ctdb, argc, argv);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
- return ret;
- }
+static int control_enable(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+ return update_flags_and_ipreallocate(ctdb, NULL,
+ update_flags_not_disabled,
+ NODE_FLAGS_PERMANENTLY_DISABLED,
+ "disabled",
+ false /* set_flag*/);
+}
- return 0;
+/* Stop a node */
+static bool update_flags_stopped(struct ctdb_context *ctdb, void *data)
+{
+ return ctdb_ctrl_stop_node(ctdb, TIMELIMIT(), options.pnn) == 0;
}
-/*
- stop a remote node
- */
static int control_stop(struct ctdb_context *ctdb, int argc, const char **argv)
{
- int ret;
- struct ctdb_node_map *nodemap=NULL;
-
- do {
- ret = ctdb_ctrl_stop_node(ctdb, TIMELIMIT(), options.pnn);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to stop node %u try again\n", options.pnn));
- }
-
- sleep(1);
-
- /* read the nodemap and verify the change took effect */
- if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- }
-
- } while (nodemap == NULL || !(nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED));
- ret = control_ipreallocate(ctdb, argc, argv);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
- return ret;
- }
+ return update_flags_and_ipreallocate(ctdb, NULL,
+ update_flags_stopped,
+ NODE_FLAGS_STOPPED,
+ "stopped",
+ true /* set_flag*/);
+}
- return 0;
+/* Continue a stopped node */
+static bool update_flags_not_stopped(struct ctdb_context *ctdb, void *data)
+{
+ return ctdb_ctrl_continue_node(ctdb, TIMELIMIT(), options.pnn) == 0;
}
-/*
- restart a stopped remote node
- */
static int control_continue(struct ctdb_context *ctdb, int argc, const char **argv)
{
- int ret;
-
- struct ctdb_node_map *nodemap=NULL;
-
- do {
- ret = ctdb_ctrl_continue_node(ctdb, TIMELIMIT(), options.pnn);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to continue node %u\n", options.pnn));
- return ret;
- }
-
- sleep(1);
-
- /* read the nodemap and verify the change took effect */
- if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap) != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- }
-
- } while (nodemap == NULL || nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED);
- ret = control_ipreallocate(ctdb, argc, argv);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
- return ret;
- }
-
- return 0;
+ return update_flags_and_ipreallocate(ctdb, NULL,
+ update_flags_not_stopped,
+ NODE_FLAGS_STOPPED,
+ "stopped",
+ false /* set_flag */);
}
static uint32_t get_generation(struct ctdb_context *ctdb)
@@ -3101,90 +3041,47 @@ static uint32_t get_generation(struct ctdb_context *ctdb)
}
}
-/*
- ban a node from the cluster
- */
+/* Ban a node */
+static bool update_state_banned(struct ctdb_context *ctdb, void *data)
+{
+ struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)data;
+ return ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, bantime) == 0;
+}
+
static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
{
- int ret;
- struct ctdb_node_map *nodemap=NULL;
struct ctdb_ban_time bantime;
if (argc < 1) {
usage();
}
- /* verify the node exists */
- ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- return ret;
- }
-
- if (nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED) {
- DEBUG(DEBUG_ERR,("Node %u is already banned.\n", options.pnn));
- return -1;
- }
-
bantime.pnn = options.pnn;
bantime.time = strtoul(argv[0], NULL, 0);
- ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,("Banning node %d for %d seconds failed.\n", bantime.pnn, bantime.time));
- return -1;
- }
-
- ret = control_ipreallocate(ctdb, argc, argv);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
- return ret;
- }
-
- return 0;
+ return update_flags_and_ipreallocate(ctdb, &bantime,
+ update_state_banned,
+ NODE_FLAGS_BANNED,
+ "banned",
+ true /* set_flag*/);
}
-/*
- unban a node from the cluster
- */
+/* Unban a node */
static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
{
- int ret;
- struct ctdb_node_map *nodemap=NULL;
struct ctdb_ban_time bantime;
- /* verify the node exists */
- ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
- return ret;
- }
-
- if (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED)) {
- DEBUG(DEBUG_ERR,("Node %u is not banned.\n", options.pnn));
- return -1;
- }
-
bantime.pnn = options.pnn;
bantime.time = 0;
- ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,("Unbanning node %d failed.\n", bantime.pnn));
- return -1;
- }
-
- ret = control_ipreallocate(ctdb, argc, argv);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
- return ret;
- }
-
- return 0;
+ return update_flags_and_ipreallocate(ctdb, &bantime,
+ update_state_banned,
+ NODE_FLAGS_BANNED,
+ "banned",
+ false /* set_flag*/);
}
-
/*
show ban information for a node
*/
@@ -3210,7 +3107,8 @@ static int control_showban(struct ctdb_context *ctdb, int argc, const char **arg
if (bantime->time == 0) {
printf("Node %u is not banned\n", bantime->pnn);
} else {
- printf("Node %u is banned banned for %d seconds\n", bantime->pnn, bantime->time);
+ printf("Node %u is banned, %d seconds remaining\n",
+ bantime->pnn, bantime->time);
}
return 0;
@@ -4169,15 +4067,9 @@ static int control_getlog(struct ctdb_context *ctdb, int argc, const char **argv
TDB_DATA data;
struct timeval tv;
- /* Since this can fail, do it first */
- log_addr.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
- if (log_addr.pnn == -1) {
- DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
- return -1;
- }
-
/* Process options */
main_daemon = true;
+ log_addr.pnn = ctdb_get_pnn(ctdb);
log_addr.level = DEBUG_NOTICE;
for (i = 0; i < argc; i++) {
if (strcmp(argv[i], "recoverd") == 0) {
@@ -4302,15 +4194,9 @@ static int reloadips_all(struct ctdb_context *ctdb)
}
}
-
- rips.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
- if (rips.pnn == -1) {
- DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
- return 1;
- }
+ rips.pnn = ctdb_get_pnn(ctdb);
rips.srvid = getpid();
-
/* register a message port for receiveing the reply so that we
can receive the reply
--
CTDB repository
More information about the samba-cvs
mailing list