[SCM] CTDB repository - branch master updated -
ctdb-1.0.65-24-g39c7737
Ronnie Sahlberg
sahlberg at samba.org
Tue Dec 9 00:37:59 GMT 2008
The branch, master has been updated
via 39c77371a2f995025a584691fe61af12dc6ed5d7 (commit)
via 4eac0214e732e6c2f867d66ec71d4406680dbb94 (commit)
from 7b41b518c3ffebf1712445a8c6242509dc798003 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 39c77371a2f995025a584691fe61af12dc6ed5d7
Author: root <root at test1n1.VSOFS1.COM>
Date: Tue Dec 9 12:03:42 2008 +1100
add a helper that waits until the clueter is no longe rin recovery mode and return the generation number.
change the ban/unban logic to wait until we are not in recovery before it bans/unbans the node.
also wait until after the cluster has recovered from the ban/unban before returning so that the cluster is in recpovery mode == normal when the command returns. this makes it much easier to script things ...
commit 4eac0214e732e6c2f867d66ec71d4406680dbb94
Author: root <root at test1n1.VSOFS1.COM>
Date: Tue Dec 9 10:45:14 2008 +1100
update to the flags handling
make sure to abort the monitoring and restart if we failed to get the nodemap from a remote node
-----------------------------------------------------------------------
Summary of changes:
server/ctdb_recoverd.c | 29 +++++++++-------
tools/ctdb.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 104 insertions(+), 14 deletions(-)
Changeset truncated at 500 lines:
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 468977c..540749d 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -2278,19 +2278,9 @@ static void async_getnodemap_callback(struct ctdb_context *ctdb, uint32_t node_p
static int get_remote_nodemaps(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
struct ctdb_node_map *nodemap,
- struct ctdb_node_map ***remote_nodemaps)
+ struct ctdb_node_map **remote_nodemaps)
{
uint32_t *nodes;
- int i;
-
- *remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num);
- if (*remote_nodemaps == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " failed to allocate remote nodemap array\n"));
- return -1;
- }
- for(i=0; i<nodemap->num; i++) {
- (*remote_nodemaps)[i] = NULL;
- }
nodes = list_of_active_nodes(ctdb, nodemap, mem_ctx, true);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_NODEMAP,
@@ -2298,7 +2288,7 @@ static int get_remote_nodemaps(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
CONTROL_TIMEOUT(), false, tdb_null,
async_getnodemap_callback,
NULL,
- *remote_nodemaps) != 0) {
+ remote_nodemaps) != 0) {
DEBUG(DEBUG_ERR, (__location__ " Unable to pull all remote nodemaps\n"));
return -1;
@@ -2651,7 +2641,15 @@ again:
/* get the nodemap for all active remote nodes
*/
- if (get_remote_nodemaps(ctdb, mem_ctx, nodemap, &remote_nodemaps) != 0) {
+ remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num);
+ if (remote_nodemaps == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " failed to allocate remote nodemap array\n"));
+ goto again;
+ }
+ for(i=0; i<nodemap->num; i++) {
+ remote_nodemaps[i] = NULL;
+ }
+ if (get_remote_nodemaps(ctdb, mem_ctx, nodemap, remote_nodemaps) != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to read remote nodemaps\n"));
goto again;
}
@@ -2663,6 +2661,11 @@ again:
continue;
}
+ if (remote_nodemaps[j] == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Did not get a remote nodemap for node %d, restarting monitoring\n", j));
+ goto again;
+ }
+
/* if the nodes disagree on how many nodes there are
then this is a good reason to try recovery
*/
diff --git a/tools/ctdb.c b/tools/ctdb.c
index 27fa7de..bb663a6 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1255,6 +1255,49 @@ static int control_enable(struct ctdb_context *ctdb, int argc, const char **argv
return 0;
}
+static uint32_t get_generation(struct ctdb_context *ctdb)
+{
+ struct ctdb_vnn_map *vnnmap=NULL;
+ int ret;
+
+ /* wait until the recmaster is not in recovery mode */
+ while (1) {
+ uint32_t recmode, recmaster;
+
+ if (vnnmap != NULL) {
+ talloc_free(vnnmap);
+ vnnmap = NULL;
+ }
+
+ /* get the recmaster */
+ ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, &recmaster);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get recmaster from node %u\n", options.pnn));
+ exit(10);
+ }
+
+ /* get recovery mode */
+ ret = ctdb_ctrl_getrecmode(ctdb, ctdb, TIMELIMIT(), recmaster, &recmode);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get recmode from node %u\n", options.pnn));
+ exit(10);
+ }
+
+ /* get the current generation number */
+ ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), recmaster, ctdb, &vnnmap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get vnnmap from recmaster (%u)\n", recmaster));
+ exit(10);
+ }
+
+ if ((recmode == CTDB_RECOVERY_NORMAL)
+ && (vnnmap->generation != 1)){
+ return vnnmap->generation;
+ }
+ sleep(1);
+ }
+}
+
/*
ban a node from the cluster
*/
@@ -1264,10 +1307,27 @@ static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
struct ctdb_ban_info b;
TDB_DATA data;
uint32_t ban_time;
+ struct ctdb_node_map *nodemap=NULL;
+ uint32_t generation, next_generation;
if (argc < 1) {
usage();
}
+
+ /* record the current generation number */
+ generation = get_generation(ctdb);
+
+
+ /* verify the node exists */
+ ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
+ return ret;
+ }
+ if (options.pnn >= nodemap->num) {
+ DEBUG(DEBUG_ERR, ("Node %u does not exist\n", options.pnn));
+ return ret;
+ }
/* verify we can access the node */
ret = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), options.pnn);
@@ -1276,6 +1336,11 @@ static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
+ if (nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED) {
+ DEBUG(DEBUG_ERR,("Node %u is already banned.\n", options.pnn));
+ return -1;
+ }
+
ban_time = strtoul(argv[0], NULL, 0);
b.pnn = options.pnn;
@@ -1289,7 +1354,16 @@ static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
DEBUG(DEBUG_ERR,("Failed to ban node %u\n", options.pnn));
return -1;
}
-
+
+ /* wait until we are in a new generation */
+ while (1) {
+ next_generation = get_generation(ctdb);
+ if (next_generation != generation) {
+ return 0;
+ }
+ sleep(1);
+ }
+
return 0;
}
@@ -1301,6 +1375,10 @@ static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
TDB_DATA data;
+ uint32_t generation, next_generation;
+
+ /* record the current generation number */
+ generation = get_generation(ctdb);
/* verify we can access the node */
ret = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), options.pnn);
@@ -1318,6 +1396,15 @@ static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
+ /* wait until we are in a new generation */
+ while (1) {
+ next_generation = get_generation(ctdb);
+ if (next_generation != generation) {
+ return 0;
+ }
+ sleep(1);
+ }
+
return 0;
}
--
CTDB repository
More information about the samba-cvs
mailing list