[SCM] CTDB repository - branch master updated -
ctdb-1.0.65-1-ga9a1156
Ronnie Sahlberg
sahlberg at samba.org
Thu Nov 20 01:44:09 GMT 2008
The branch, master has been updated
via a9a1156ea4e10483a4bf4265b8e9203f0af033aa (commit)
from 1f25958dc739677a487fa496fbeffcda7a0f2204 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit a9a1156ea4e10483a4bf4265b8e9203f0af033aa
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed Nov 19 14:43:46 2008 +1100
reqrite the handling of flag updates across the cluster to eliminate a
race between the ctdb tool and the recovery daemon both at once
trying to push flag changes across the cluster.
-----------------------------------------------------------------------
Summary of changes:
client/ctdb_client.c | 92 ++++++++++++++++++++++++++++++++++++-----
include/ctdb.h | 15 ++++++-
include/ctdb_private.h | 8 ----
server/ctdb_control.c | 2 +-
server/ctdb_daemon.c | 34 ---------------
server/ctdb_monitor.c | 46 ++++++++++-----------
server/ctdb_recoverd.c | 107 ++++++++++++++++++++---------------------------
7 files changed, 163 insertions(+), 141 deletions(-)
Changeset truncated at 500 lines:
diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index fcd10b2..16fc03b 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -2353,23 +2353,59 @@ int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32
{
int ret;
TDB_DATA data;
- struct ctdb_node_modflags m;
- int32_t res;
+ struct ctdb_node_map *nodemap=NULL;
+ struct ctdb_node_flag_change c;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+ uint32_t recmaster;
+ uint32_t *nodes;
- m.set = set;
- m.clear = clear;
- data.dsize = sizeof(m);
- data.dptr = (unsigned char *)&m;
+ /* find the recovery master */
+ ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
+ talloc_free(tmp_ctx);
+ return ret;
+ }
- ret = ctdb_control(ctdb, destnode, 0,
- CTDB_CONTROL_MODIFY_FLAGS, 0, data,
- NULL, NULL, &res, &timeout, NULL);
- if (ret != 0 || res != 0) {
- DEBUG(DEBUG_ERR,(__location__ " ctdb_control for modflags failed\n"));
+
+ /* read the node flags from the recmaster */
+ ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+ if (destnode >= nodemap->num) {
+ DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
+ talloc_free(tmp_ctx);
+ return -1;
+ }
+
+ c.pnn = destnode;
+ c.old_flags = nodemap->nodes[destnode].flags;
+ c.new_flags = c.old_flags;
+ c.new_flags |= set;
+ c.new_flags &= ~clear;
+
+ data.dsize = sizeof(c);
+ data.dptr = (unsigned char *)&c;
+
+ /* send the flags update to all connected nodes */
+ nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
+
+ if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
+ nodes,
+ timeout, false, data,
+ NULL, NULL,
+ NULL) != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " ctdb_control to disable node failed\n"));
+
+ talloc_free(tmp_ctx);
return -1;
}
+ talloc_free(tmp_ctx);
return 0;
}
@@ -2959,6 +2995,40 @@ uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
return nodes;
}
+uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map *node_map,
+ TALLOC_CTX *mem_ctx,
+ bool include_self)
+{
+ int i, j, num_nodes;
+ uint32_t *nodes;
+
+ for (i=num_nodes=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+ continue;
+ }
+ num_nodes++;
+ }
+
+ nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
+ CTDB_NO_MEMORY_FATAL(ctdb, nodes);
+
+ for (i=j=0;i<node_map->num;i++) {
+ if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
+ continue;
+ }
+ if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
+ continue;
+ }
+ nodes[j++] = node_map->nodes[i].pnn;
+ }
+
+ return nodes;
+}
+
/*
this is used to test if a pnn lock exists and if it exists will return
the number of connections that pnn has reported or -1 if that recovery
diff --git a/include/ctdb.h b/include/ctdb.h
index fd5af20..bf6c4a4 100644
--- a/include/ctdb.h
+++ b/include/ctdb.h
@@ -71,9 +71,9 @@ struct ctdb_call_info {
#define CTDB_SRVID_RELEASE_IP 0xF300000000000000LL
/*
- a message ID meaning that a nodes flags have changed
+ a message ID to set the node flags in the recovery daemon
*/
-#define CTDB_SRVID_NODE_FLAGS_CHANGED 0xF400000000000000LL
+#define CTDB_SRVID_SET_NODE_FLAGS 0xF400000000000000LL
/*
a message ID meaning that a node should be banned
@@ -96,6 +96,12 @@ struct ctdb_call_info {
*/
#define CTDB_SRVID_MEM_DUMP 0xF800000000000000LL
+/*
+ a message ID to get the recovery daemon to push the node flags out
+ */
+#define CTDB_SRVID_PUSH_NODE_FLAGS 0xF900000000000000LL
+
+
/* used on the domain socket, send a pdu to the local daemon */
#define CTDB_CURRENT_NODE 0xF0000001
@@ -532,6 +538,11 @@ int ctdb_ctrl_getreclock(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, const char **reclock);
+
+uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
+ struct ctdb_node_map *node_map,
+ TALLOC_CTX *mem_ctx,
+ bool include_self);
uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
struct ctdb_node_map *node_map,
TALLOC_CTX *mem_ctx,
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index f526f26..cb83f86 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -637,14 +637,6 @@ struct ctdb_node_flag_change {
};
/*
- structure to change flags on a node
- */
-struct ctdb_node_modflags {
- uint32_t set;
- uint32_t clear;
-};
-
-/*
struct for admin setting a ban
*/
struct ctdb_ban_info {
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 5f65547..15f5000 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -326,7 +326,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
return ctdb_control_list_tunables(ctdb, outdata);
case CTDB_CONTROL_MODIFY_FLAGS:
- CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_node_modflags));
+ CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_node_flag_change));
return ctdb_control_modflags(ctdb, indata);
case CTDB_CONTROL_KILL_TCP:
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 8ddda70..0aaab23 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -31,36 +31,6 @@
static void daemon_incoming_packet(void *, struct ctdb_req_header *);
-/*
- handler for when a node changes its flags
-*/
-static void flag_change_handler(struct ctdb_context *ctdb, uint64_t srvid,
- TDB_DATA data, void *private_data)
-{
- struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
-
- if (data.dsize != sizeof(*c) || !ctdb_validate_pnn(ctdb, c->pnn)) {
- DEBUG(DEBUG_CRIT,(__location__ "Invalid data in ctdb_node_flag_change\n"));
- return;
- }
-
- if (!ctdb_validate_pnn(ctdb, c->pnn)) {
- DEBUG(DEBUG_CRIT,("Bad pnn %u in flag_change_handler\n", c->pnn));
- return;
- }
-
- /* don't get the disconnected flag from the other node */
- ctdb->nodes[c->pnn]->flags =
- (ctdb->nodes[c->pnn]->flags&NODE_FLAGS_DISCONNECTED)
- | (c->new_flags & ~NODE_FLAGS_DISCONNECTED);
- DEBUG(DEBUG_DEBUG,("Node flags for node %u are now 0x%x\n", c->pnn, ctdb->nodes[c->pnn]->flags));
-
- /* make sure we don't hold any IPs when we shouldn't */
- if (c->pnn == ctdb->pnn &&
- (ctdb->nodes[c->pnn]->flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_BANNED))) {
- ctdb_release_all_ips(ctdb);
- }
-}
static void print_exit_message(void)
{
@@ -91,10 +61,6 @@ static void ctdb_start_transport(struct ctdb_context *ctdb)
/* Make sure we log something when the daemon terminates */
atexit(print_exit_message);
- /* a handler for when nodes are disabled/enabled */
- ctdb_register_message_handler(ctdb, ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED,
- flag_change_handler, NULL);
-
/* start monitoring for connected/disconnected nodes */
ctdb_start_keepalive(ctdb);
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 6526397..21f0382 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -81,9 +81,9 @@ static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
data.dptr = (uint8_t *)&c;
data.dsize = sizeof(c);
- /* tell the other nodes that something has changed */
- ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
- CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+ /* ask the recovery daemon to push these changes out to all nodes */
+ ctdb_daemon_send_message(ctdb, ctdb->pnn,
+ CTDB_SRVID_PUSH_NODE_FLAGS, data);
}
@@ -212,35 +212,33 @@ void ctdb_start_monitoring(struct ctdb_context *ctdb)
*/
int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
{
- struct ctdb_node_modflags *m = (struct ctdb_node_modflags *)indata.dptr;
- TDB_DATA data;
- struct ctdb_node_flag_change c;
- struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
- uint32_t old_flags = node->flags;
+ struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)indata.dptr;
+ struct ctdb_node *node;
+
+ if (c->pnn >= ctdb->num_nodes) {
+ DEBUG(DEBUG_ERR,(__location__ " Node %d is invalid, num_nodes :%d\n", c->pnn, ctdb->num_nodes));
+ return -1;
+ }
- node->flags |= m->set;
- node->flags &= ~m->clear;
+ node = ctdb->nodes[c->pnn];
+ c->old_flags = node->flags;
+ node->flags = c->new_flags & ~NODE_FLAGS_DISCONNECTED;
+ node->flags |= (c->old_flags & NODE_FLAGS_DISCONNECTED);
- if (node->flags == old_flags) {
- DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", ctdb->pnn, node->flags));
+ if (node->flags == c->old_flags) {
+ DEBUG(DEBUG_INFO, ("Control modflags on node %u - Unchanged - flags 0x%x\n", c->pnn, node->flags));
return 0;
}
- DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", ctdb->pnn, node->flags));
+ DEBUG(DEBUG_INFO, ("Control modflags on node %u - flags now 0x%x\n", c->pnn, node->flags));
- /* if we have been banned, go into recovery mode */
- c.pnn = ctdb->pnn;
- c.old_flags = old_flags;
- c.new_flags = node->flags;
-
- data.dptr = (uint8_t *)&c;
- data.dsize = sizeof(c);
- /* tell the other nodes that something has changed */
- ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
- CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+ /* tell the recovery daemon something has changed */
+ ctdb_daemon_send_message(ctdb, ctdb->pnn,
+ CTDB_SRVID_SET_NODE_FLAGS, indata);
- if ((node->flags & NODE_FLAGS_BANNED) && !(old_flags & NODE_FLAGS_BANNED)) {
+ /* if we have become banned, we should go into recovery mode */
+ if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED)) {
/* make sure we are frozen */
DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n"));
/* Reset the generation id to 1 to make us ignore any
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 350897a..bef4bb1 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -643,37 +643,26 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node
{
int i;
for (i=0;i<nodemap->num;i++) {
- struct ctdb_node_flag_change c;
- TDB_DATA data;
-
- c.pnn = nodemap->nodes[i].pnn;
- c.old_flags = nodemap->nodes[i].flags;
- c.new_flags = nodemap->nodes[i].flags;
-
- data.dptr = (uint8_t *)&c;
- data.dsize = sizeof(c);
-
- ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
- CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+ int ret;
+ ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn, nodemap->nodes[i].flags, ~nodemap->nodes[i].flags);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+ return -1;
+ }
}
return 0;
}
static int update_our_flags_on_all_nodes(struct ctdb_context *ctdb, uint32_t pnn, struct ctdb_node_map *nodemap)
{
- struct ctdb_node_flag_change c;
- TDB_DATA data;
-
- c.pnn = nodemap->nodes[pnn].pnn;
- c.old_flags = nodemap->nodes[pnn].flags;
- c.new_flags = nodemap->nodes[pnn].flags;
-
- data.dptr = (uint8_t *)&c;
- data.dsize = sizeof(c);
+ int ret;
- ctdb_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
- CTDB_SRVID_NODE_FLAGS_CHANGED, data);
+ ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[pnn].pnn, nodemap->nodes[pnn].flags, ~nodemap->nodes[pnn].flags);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+ return -1;
+ }
return 0;
}
@@ -1038,8 +1027,14 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *n
return MONITOR_FAILED;
}
if (nodemap->nodes[j].flags != remote_nodemap->nodes[j].flags) {
- struct ctdb_node_flag_change c;
- TDB_DATA data;
+ int ban_changed = (nodemap->nodes[j].flags ^ remote_nodemap->nodes[j].flags) & NODE_FLAGS_BANNED;
+
+ if (ban_changed) {
+ DEBUG(DEBUG_NOTICE,("Remote node %u had different BANNED flags 0x%x, local had 0x%x - trigger a re-election\n",
+ nodemap->nodes[j].pnn,
+ remote_nodemap->nodes[j].flags,
+ nodemap->nodes[j].flags));
+ }
/* We should tell our daemon about this so it
updates its flags or else we will log the same
@@ -1047,16 +1042,11 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *n
Since we are the recovery master we can just as
well update the flags on all nodes.
*/
- c.pnn = nodemap->nodes[j].pnn;
- c.old_flags = nodemap->nodes[j].flags;
- c.new_flags = remote_nodemap->nodes[j].flags;
-
- data.dptr = (uint8_t *)&c;
- data.dsize = sizeof(c);
-
- ctdb_send_message(ctdb, ctdb->pnn,
- CTDB_SRVID_NODE_FLAGS_CHANGED,
- data);
+ ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, nodemap->nodes[j].flags, ~nodemap->nodes[j].flags);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+ return -1;
+ }
/* Update our local copy of the flags in the recovery
daemon.
@@ -1069,10 +1059,7 @@ static int update_local_flags(struct ctdb_recoverd *rec, struct ctdb_node_map *n
/* If the BANNED flag has changed for the node
this is a good reason to do a new election.
*/
- if ((c.old_flags ^ c.new_flags) & NODE_FLAGS_BANNED) {
- DEBUG(DEBUG_NOTICE,("Remote node %u had different BANNED flags 0x%x, local had 0x%x - trigger a re-election\n",
- nodemap->nodes[j].pnn, c.new_flags,
- c.old_flags));
+ if (ban_changed) {
talloc_free(mem_ctx);
return MONITOR_ELECTION_NEEDED;
}
@@ -1940,15 +1927,6 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
changed_flags = c->old_flags ^ c->new_flags;
- /* Dont let messages from remote nodes change the DISCONNECTED flag.
- This flag is handled locally based on whether the local node
- can communicate with the node or not.
- */
- c->new_flags &= ~NODE_FLAGS_DISCONNECTED;
- if (nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED) {
- c->new_flags |= NODE_FLAGS_DISCONNECTED;
- }
-
if (nodemap->nodes[i].flags != c->new_flags) {
DEBUG(DEBUG_NOTICE,("Node %u has changed flags - now 0x%x was 0x%x\n", c->pnn, c->new_flags, c->old_flags));
}
@@ -1981,6 +1959,20 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
talloc_free(tmp_ctx);
}
+/*
+ handler for when we need to push out flag changes ot all other nodes
+*/
+static void push_flags_handler(struct ctdb_context *ctdb, uint64_t srvid,
+ TDB_DATA data, void *private_data)
+{
+ int ret;
+ struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
+
+ ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), c->pnn, c->new_flags, ~c->new_flags);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
+ }
+}
struct verify_recmode_normal_data {
@@ -2312,10 +2304,13 @@ static void monitor_cluster(struct ctdb_context *ctdb)
/* register a message port for recovery elections */
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);
- /* and one for when nodes are disabled/enabled */
- ctdb_set_message_handler(ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED, monitor_handler, rec);
+ /* when nodes are disabled/enabled */
+ ctdb_set_message_handler(ctdb, CTDB_SRVID_SET_NODE_FLAGS, monitor_handler, rec);
+
+ /* when we are asked to puch out a flag change */
+ ctdb_set_message_handler(ctdb, CTDB_SRVID_PUSH_NODE_FLAGS, push_flags_handler, rec);
- /* and one for when nodes are banned */
+ /* when nodes are banned */
ctdb_set_message_handler(ctdb, CTDB_SRVID_BAN_NODE, ban_handler, rec);
/* and one for when nodes are unbanned */
@@ -2794,16 +2789,6 @@ again:
}
- DEBUG(DEBUG_DEBUG, (__location__ " Update flags on all nodes\n"));
- /*
- update all nodes to have the same flags that we have
- */
- ret = update_flags_on_all_nodes(ctdb, nodemap);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes\n"));
- goto again;
- }
-
goto again;
}
--
CTDB repository
More information about the samba-cvs
mailing list