[SCM] CTDB repository - branch master updated - ctdb-2.1-97-g05f785b
Amitay Isaacs
amitay at samba.org
Sun May 5 21:36:18 MDT 2013
The branch, master has been updated
via 05f785b51cfd8b22b3ae35bf034127fbc07005be (commit)
via 0b7257642f62ebd83c05b6e2922f0dc2737f175c (commit)
from b5a8791268e938d7e017056e0e2bd2cbec1fa690 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 05f785b51cfd8b22b3ae35bf034127fbc07005be
Author: Martin Schwenke <martin at meltin.net>
Date: Tue Apr 30 17:22:23 2013 +1000
ctdbd: Avoid freeing non-monitor event callback when monitoring is disabled
When running a non-monitor event, check is made for any active monitor
events. If there is an active monitor event, then the active monitor
event is cancelled. This is done by freeing state->callback which is
allocated from monitor_context.
When CTDB is stopped or shutdown, monitoring is disabled by freeing
monitor_context, which frees callback and then stopped or shutdown event
is run. This creates a new callback structure which is allocated at
the exact same memory location as the monitor callback which was freed.
So in the check for active monitor events, it frees the new callback
for non-monitor event. Since the callback function flags successful
completion of that event, it is never marked complete and CTDB is stuck
in a loop waiting for completion.
Move the monitor cancellation to the top of the function so that this
can't happen.
Follow log snippest highlights the problem.
2013/04/30 16:54:10.673807 [21505]: Received SHUTDOWN command. Stopping CTDB daemon.
2013/04/30 16:54:10.673814 [21505]: Shutting down recovery daemon
2013/04/30 16:54:10.673852 [21505]: server/eventscript.c:696 in remove_callback 0x1c6d5c0
2013/04/30 16:54:10.673858 [21505]: Monitoring has been stopped
2013/04/30 16:54:10.673899 [21505]: server/eventscript.c:594 Sending SIGTERM to child pid:23847
2013/04/30 16:54:10.673913 [21505]: server/eventscript.c:629 searching for callback 0x1c6d5c0
2013/04/30 16:54:10.673932 [21505]: server/eventscript.c:641 running callback
2013/04/30 16:54:10.673939 [21505]: server/eventscript.c:866 in event_script_callback
2013/04/30 16:54:10.673946 [21505]: server/eventscript.c:696 in remove_callback 0x1c6d5c0
Signed-off-by: Martin Schwenke <martin at meltin.net>
Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>
commit 0b7257642f62ebd83c05b6e2922f0dc2737f175c
Author: Martin Schwenke <martin at meltin.net>
Date: Thu Feb 21 10:43:35 2013 +1100
recoverd: Interface reference count changes should not cause takeover runs
At the moment a naive compare of the all the interface data is done.
So, if any IPs move then the reference counts for the the relevant
interfaces change, interfaces appear to have changed and another
takeover run is initiated by each node that took/released IPs.
This change stops the spurious takeover runs by changing the interface
comparison to ignore the reference counts.
Signed-off-by: Martin Schwenke <martin at meltin.net>
-----------------------------------------------------------------------
Summary of changes:
server/ctdb_recoverd.c | 70 ++++++++++++++++++++++++++++++++---------------
server/eventscript.c | 61 ++++++++++++++++++++---------------------
2 files changed, 77 insertions(+), 54 deletions(-)
Changeset truncated at 500 lines:
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 2cfa9b9..c3a1852 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -2855,17 +2855,61 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
return status;
}
+static bool interfaces_have_changed(struct ctdb_context *ctdb,
+ struct ctdb_recoverd *rec)
+{
+ struct ctdb_control_get_ifaces *ifaces = NULL;
+ TALLOC_CTX *mem_ctx;
+ bool ret = false;
+
+ mem_ctx = talloc_new(NULL);
+
+ /* Read the interfaces from the local node */
+ if (ctdb_ctrl_get_ifaces(ctdb, CONTROL_TIMEOUT(),
+ CTDB_CURRENT_NODE, mem_ctx, &ifaces) != 0) {
+ DEBUG(DEBUG_ERR, ("Unable to get interfaces from local node %u\n", ctdb->pnn));
+ /* We could return an error. However, this will be
+ * rare so we'll decide that the interfaces have
+ * actually changed, just in case.
+ */
+ talloc_free(mem_ctx);
+ return true;
+ }
+
+ if (!rec->ifaces) {
+ /* We haven't been here before so things have changed */
+ ret = true;
+ } else if (rec->ifaces->num != ifaces->num) {
+ /* Number of interfaces has changed */
+ ret = true;
+ } else {
+ /* See if interface names or link states have changed */
+ int i;
+ for (i = 0; i < rec->ifaces->num; i++) {
+ struct ctdb_control_iface_info * iface = &rec->ifaces->ifaces[i];
+ if (strcmp(iface->name, ifaces->ifaces[i].name) != 0 ||
+ iface->link_state != ifaces->ifaces[i].link_state) {
+ ret = true;
+ break;
+ }
+ }
+ }
+
+ talloc_free(rec->ifaces);
+ rec->ifaces = talloc_steal(rec, ifaces);
+
+ talloc_free(mem_ctx);
+ return ret;
+}
/* called to check that the local allocation of public ip addresses is ok.
*/
static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn, struct ctdb_node_map *nodemap)
{
TALLOC_CTX *mem_ctx = talloc_new(NULL);
- struct ctdb_control_get_ifaces *ifaces = NULL;
struct ctdb_uptime *uptime1 = NULL;
struct ctdb_uptime *uptime2 = NULL;
int ret, j;
- bool need_iface_check = false;
bool need_takeover_run = false;
ret = ctdb_ctrl_uptime(ctdb, mem_ctx, CONTROL_TIMEOUT(),
@@ -2876,27 +2920,7 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec
return -1;
}
-
- /* read the interfaces from the local node */
- ret = ctdb_ctrl_get_ifaces(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &ifaces);
- if (ret != 0) {
- DEBUG(DEBUG_ERR, ("Unable to get interfaces from local node %u\n", pnn));
- talloc_free(mem_ctx);
- return -1;
- }
-
- if (!rec->ifaces) {
- need_iface_check = true;
- } else if (rec->ifaces->num != ifaces->num) {
- need_iface_check = true;
- } else if (memcmp(rec->ifaces, ifaces, talloc_get_size(ifaces)) != 0) {
- need_iface_check = true;
- }
-
- talloc_free(rec->ifaces);
- rec->ifaces = talloc_steal(rec, ifaces);
-
- if (need_iface_check) {
+ if (interfaces_have_changed(ctdb, rec)) {
DEBUG(DEBUG_NOTICE, ("The interfaces status has changed on "
"local node %u - force takeover run\n",
pnn));
diff --git a/server/eventscript.c b/server/eventscript.c
index 817cb0c..41bf21d 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -707,36 +707,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
{
struct ctdb_event_script_state *state;
- state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
- CTDB_NO_MEMORY(ctdb, state);
-
- /* The callback isn't done if the context is freed. */
- state->callback = talloc(mem_ctx, struct event_script_callback);
- CTDB_NO_MEMORY(ctdb, state->callback);
- DLIST_ADD(ctdb->script_callbacks, state->callback);
- talloc_set_destructor(state->callback, remove_callback);
- state->callback->ctdb = ctdb;
- state->callback->fn = callback;
- state->callback->private_data = private_data;
-
- state->ctdb = ctdb;
- state->from_user = from_user;
- state->call = call;
- state->options = talloc_vasprintf(state, fmt, ap);
- state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
- state->scripts = NULL;
- if (state->options == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
- talloc_free(state);
- return -1;
- }
- if (!check_options(state->call, state->options)) {
- DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
- ctdb_eventscript_call_names[state->call], state->options));
- talloc_free(state);
- return -1;
- }
-
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
/* we guarantee that only some specifically allowed event scripts are run
while in recovery */
@@ -755,7 +725,6 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
if (i == ARRAY_SIZE(allowed_calls)) {
DEBUG(DEBUG_ERR,("Refusing to run event scripts call '%s' while in recovery\n",
ctdb_eventscript_call_names[call]));
- talloc_free(state);
return -1;
}
}
@@ -778,6 +747,36 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
ctdb->current_monitor = NULL;
}
+ state = talloc(ctdb->event_script_ctx, struct ctdb_event_script_state);
+ CTDB_NO_MEMORY(ctdb, state);
+
+ /* The callback isn't done if the context is freed. */
+ state->callback = talloc(mem_ctx, struct event_script_callback);
+ CTDB_NO_MEMORY(ctdb, state->callback);
+ DLIST_ADD(ctdb->script_callbacks, state->callback);
+ talloc_set_destructor(state->callback, remove_callback);
+ state->callback->ctdb = ctdb;
+ state->callback->fn = callback;
+ state->callback->private_data = private_data;
+
+ state->ctdb = ctdb;
+ state->from_user = from_user;
+ state->call = call;
+ state->options = talloc_vasprintf(state, fmt, ap);
+ state->timeout = timeval_set(ctdb->tunable.script_timeout, 0);
+ state->scripts = NULL;
+ if (state->options == NULL) {
+ DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
+ talloc_free(state);
+ return -1;
+ }
+ if (!check_options(state->call, state->options)) {
+ DEBUG(DEBUG_ERR, ("Bad eventscript options '%s' for %s\n",
+ ctdb_eventscript_call_names[state->call], state->options));
+ talloc_free(state);
+ return -1;
+ }
+
DEBUG(DEBUG_INFO,(__location__ " Starting eventscript %s %s\n",
ctdb_eventscript_call_names[state->call],
state->options));
--
CTDB repository
More information about the samba-cvs
mailing list