[SCM] CTDB repository - branch status-test updated - ctdb-1.0.104-10-gc5f7981
Ronnie Sahlberg
sahlberg at samba.org
Mon Nov 23 13:42:26 MST 2009
The branch, status-test has been updated
via c5f798116bf3b7954e23c7267b056ee1f5560f45 (commit)
from f1e350f9edb74cc44b6c5be4c062fd93e98ba8c4 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=status-test
- Log -----------------------------------------------------------------
commit c5f798116bf3b7954e23c7267b056ee1f5560f45
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Tue Nov 24 07:40:51 2009 +1100
rework and simplify the eventscript handling
-----------------------------------------------------------------------
Summary of changes:
include/ctdb_private.h | 11 ++-
server/eventscript.c | 251 +++++++++++++++++++----------------------------
2 files changed, 109 insertions(+), 153 deletions(-)
Changeset truncated at 500 lines:
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index ba6a675..bee921d 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -448,12 +448,15 @@ struct ctdb_context {
int start_as_disabled;
int start_as_stopped;
uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
- TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
uint32_t *recd_ping_count;
TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */
- TALLOC_CTX *script_monitor_ctx; /* a context where we store results while running the monitor event */
- TALLOC_CTX *last_monitor_ctx;
- TALLOC_CTX *event_script_ctx; /* non-monitoring events */
+
+ TALLOC_CTX *monitor_event_script_ctx;
+ TALLOC_CTX *other_event_script_ctx;
+
+ struct ctdb_monitor_script_status_ctx *current_monitor_status_ctx;
+ struct ctdb_monitor_script_status_ctx *last_monitor_status_ctx;
+
TALLOC_CTX *banning_ctx;
};
diff --git a/server/eventscript.c b/server/eventscript.c
index 4c193c8..1e76cd6 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -65,7 +65,6 @@ struct ctdb_event_script_state {
int fd[2];
void *private_data;
const char *options;
- struct timed_event *te;
struct timeval timeout;
};
@@ -81,28 +80,22 @@ struct ctdb_monitor_script_status {
char *output;
};
-struct ctdb_monitor_status {
- struct timeval start;
- struct timeval finished;
- int32_t status;
+struct ctdb_monitor_script_status_ctx {
struct ctdb_monitor_script_status *scripts;
- struct ctdb_event_script_state *state;
};
-
/* called from ctdb_logging when we have received output on STDERR from
* one of the eventscripts
*/
int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t len)
{
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
struct ctdb_monitor_script_status *script;
- if (monitoring_status == NULL) {
+ if (ctdb->current_monitor_status_ctx == NULL) {
return -1;
}
- script = monitoring_status->scripts;
+ script = ctdb->current_monitor_status_ctx->scripts;
if (script == NULL) {
return -1;
}
@@ -121,17 +114,13 @@ int ctdb_log_event_script_output(struct ctdb_context *ctdb, char *str, uint16_t
*/
int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
{
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
-
DEBUG(DEBUG_INFO, ("event script init called\n"));
- if (monitoring_status == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Init called when context is NULL\n"));
- return 0;
+ if (ctdb->current_monitor_status_ctx == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when initing script\n"));
+ return -1;
}
- monitoring_status->start = timeval_current();
-
return 0;
}
@@ -142,41 +131,26 @@ int32_t ctdb_control_event_script_init(struct ctdb_context *ctdb)
int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA indata)
{
const char *name = (const char *)indata.dptr;
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
- struct ctdb_event_script_state *state;
struct ctdb_monitor_script_status *script;
DEBUG(DEBUG_INFO, ("event script start called : %s\n", name));
- if (monitoring_status == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when starting to run script %s\n", name));
+ if (ctdb->current_monitor_status_ctx == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when starting script\n"));
return -1;
}
- script = talloc_zero(monitoring_status, struct ctdb_monitor_script_status);
+ script = talloc_zero(ctdb->current_monitor_status_ctx, struct ctdb_monitor_script_status);
if (script == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to talloc ctdb_monitor_script_status for script %s\n", name));
return -1;
}
- script->next = monitoring_status->scripts;
+ script->next = ctdb->current_monitor_status_ctx->scripts;
script->name = talloc_strdup(script, name);
CTDB_NO_MEMORY(ctdb, script->name);
script->start = timeval_current();
- monitoring_status->scripts = script;
-
- state = monitoring_status->state;
- if (state != NULL) {
- /* reset the timeout for the next eventscript */
- if (!timeval_is_zero(&state->timeout)) {
- if (state->te != NULL) {
- talloc_free(state->te);
- state->te = NULL;
- }
- state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
- }
-
- }
+ ctdb->current_monitor_status_ctx->scripts = script;
return 0;
}
@@ -187,15 +161,14 @@ int32_t ctdb_control_event_script_start(struct ctdb_context *ctdb, TDB_DATA inda
int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indata)
{
int32_t res = *((int32_t *)indata.dptr);
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
struct ctdb_monitor_script_status *script;
- if (monitoring_status == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
+ if (ctdb->current_monitor_status_ctx == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
return -1;
}
- script = monitoring_status->scripts;
+ script = ctdb->current_monitor_status_ctx->scripts;
if (script == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
return -1;
@@ -214,17 +187,16 @@ int32_t ctdb_control_event_script_stop(struct ctdb_context *ctdb, TDB_DATA indat
int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA indata)
{
const char *name = (const char *)indata.dptr;
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
struct ctdb_monitor_script_status *script;
DEBUG(DEBUG_INFO, ("event script disabed called for script %s\n", name));
- if (monitoring_status == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when script finished.\n"));
+ if (ctdb->current_monitor_status_ctx == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " current_monitor_status_ctx is NULL when script finished\n"));
return -1;
}
- script = monitoring_status->scripts;
+ script = ctdb->current_monitor_status_ctx->scripts;
if (script == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script is NULL when the script had finished\n"));
return -1;
@@ -242,24 +214,19 @@ int32_t ctdb_control_event_script_disabled(struct ctdb_context *ctdb, TDB_DATA i
*/
int32_t ctdb_control_event_script_finished(struct ctdb_context *ctdb)
{
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
-
DEBUG(DEBUG_INFO, ("event script finished called\n"));
- if (monitoring_status == NULL) {
+ if (ctdb->current_monitor_status_ctx == NULL) {
DEBUG(DEBUG_ERR,(__location__ " script_status is NULL when monitoring event finished\n"));
return -1;
}
- monitoring_status->finished = timeval_current();
- monitoring_status->status = MONITOR_SCRIPT_OK;
-
- if (ctdb->last_monitor_ctx) {
- talloc_free(ctdb->last_monitor_ctx);
- ctdb->last_monitor_ctx = NULL;
+ if (ctdb->last_monitor_status_ctx) {
+ talloc_free(ctdb->last_monitor_status_ctx);
+ ctdb->last_monitor_status_ctx = NULL;
}
- ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
- ctdb->script_monitor_ctx = NULL;
+ ctdb->last_monitor_status_ctx = talloc_steal(ctdb, ctdb->current_monitor_status_ctx);
+ ctdb->current_monitor_status_ctx = NULL;
return 0;
}
@@ -303,11 +270,11 @@ static struct ctdb_monitoring_wire *marshall_monitoring_scripts(TALLOC_CTX *mem_
int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->last_monitor_ctx;
+ struct ctdb_monitor_script_status_ctx *script_status = talloc_get_type(ctdb->last_monitor_status_ctx, struct ctdb_monitor_script_status_ctx);
struct ctdb_monitoring_wire *monitoring_scripts;
- if (monitoring_status == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " last_monitor_ctx is NULL when reading status\n"));
+ if (script_status == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " last_monitor_status_ctx is NULL when reading status\n"));
return -1;
}
@@ -318,7 +285,7 @@ int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb, TDB_DATA
}
monitoring_scripts->num_scripts = 0;
- monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, monitoring_status->scripts);
+ monitoring_scripts = marshall_monitoring_scripts(outdata, monitoring_scripts, script_status->scripts);
if (monitoring_scripts == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Monitoring scritps is NULL. can not return data to client\n"));
return -1;
@@ -510,7 +477,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
/* we guarantee that only some specifically allowed event scripts are run
while in recovery */
- const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip", "stopped" };
+ const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip", "stopped" };
int i;
for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break;
@@ -547,7 +514,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
status of the event asynchronously.
*/
if ((ctdb->tunable.use_status_events_for_monitoring != 0)
- && (!strcmp(options, "status"))) {
+ && (!strcmp(options, "monitor"))) {
cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s",
ctdb->event_script_dir,
current->name, "status");
@@ -642,7 +609,7 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
struct ctdb_event_script_state *state =
talloc_get_type(p, struct ctdb_event_script_state);
struct ctdb_context *ctdb = state->ctdb;
- signed char rt = -1;
+ signed char rt = 0;
read(state->fd[0], &rt, sizeof(rt));
@@ -653,9 +620,10 @@ static void ctdb_event_script_handler(struct event_context *ev, struct fd_event
state->callback = NULL;
}
+ ctdb->event_script_timeouts = 0;
+
talloc_set_destructor(state, NULL);
talloc_free(state);
- ctdb->event_script_timeouts = 0;
}
static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
@@ -680,12 +648,9 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
- char *options;
- struct ctdb_monitor_status *monitoring_status = (struct ctdb_monitor_status *)ctdb->script_monitor_ctx;
-
- state->te = NULL;
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u pid : %d\n", state->options, ctdb->event_script_timeouts, state->child));
+
if (kill(state->child, 0) != 0) {
DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));
if (state->callback) {
@@ -697,10 +662,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
return;
}
- options = talloc_strdup(ctdb, state->options);
- CTDB_NO_MEMORY_VOID(ctdb, options);
-
- if (!strcmp(options, "monitor")) {
+ if (!strcmp(state->options, "monitor")) {
/* if it is a monitor event, we allow it to "hang" a few times
before we declare it a failure and ban ourself (and make
ourself unhealthy)
@@ -708,21 +670,12 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
ctdb->event_script_timeouts++;
+
if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
- if (ctdb->tunable.script_unhealthy_on_timeout != 0) {
- DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
- if (state->callback) {
- state->callback(ctdb, -ETIME, private_data);
- state->callback = NULL;
- }
- } else {
- ctdb->event_script_timeouts = 0;
- DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
- ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
- if (state->callback) {
- state->callback(ctdb, -1, private_data);
- state->callback = NULL;
- }
+ DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Making node unhealthy\n", ctdb->tunable.script_ban_count));
+ if (state->callback) {
+ state->callback(ctdb, -ETIME, private_data);
+ state->callback = NULL;
}
} else {
if (state->callback) {
@@ -730,40 +683,48 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
state->callback = NULL;
}
}
- } else if (!strcmp(options, "startup")) {
+ } else if (!strcmp(state->options, "startup")) {
DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
if (state->callback) {
state->callback(ctdb, -1, private_data);
state->callback = NULL;
}
} else {
- /* if it is not a monitor event we ban ourself immediately */
+ /* if it is not a monitor or a startup event we ban ourself
+ immediately
+ */
DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
+
ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+
if (state->callback) {
state->callback(ctdb, -1, private_data);
state->callback = NULL;
}
}
- if ((!strcmp(options, "monitor")) && (monitoring_status != NULL)) {
+ if (!strcmp(state->options, "monitor") || !strcmp(state->options, "status")) {
struct ctdb_monitor_script_status *script;
- script = monitoring_status->scripts;
+ if (ctdb->current_monitor_status_ctx == NULL) {
+ talloc_free(state);
+ return;
+ }
+
+ script = ctdb->current_monitor_status_ctx->scripts;
if (script != NULL) {
script->timedout = 1;
}
- monitoring_status->status = MONITOR_SCRIPT_TIMEOUT;
- if (ctdb->last_monitor_ctx) {
- talloc_free(ctdb->last_monitor_ctx);
- ctdb->last_monitor_ctx = NULL;
+
+ if (ctdb->last_monitor_status_ctx) {
+ talloc_free(ctdb->last_monitor_status_ctx);
+ ctdb->last_monitor_status_ctx = NULL;
}
- ctdb->last_monitor_ctx = talloc_steal(ctdb, ctdb->script_monitor_ctx);
- ctdb->script_monitor_ctx = NULL;
+ ctdb->last_monitor_status_ctx = talloc_steal(ctdb, ctdb->current_monitor_status_ctx);
+ ctdb->current_monitor_status_ctx = NULL;
}
talloc_free(state);
- talloc_free(options);
}
/*
@@ -795,41 +756,51 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
void *private_data,
const char *fmt, va_list ap)
{
- struct ctdb_monitor_status *monitoring_status;
+ TALLOC_CTX *mem_ctx;
struct ctdb_event_script_state *state;
int ret;
- if (!strcmp(fmt, "monitor")) {
- if (ctdb->script_monitor_ctx != NULL) {
- talloc_free(ctdb->script_monitor_ctx);
- ctdb->script_monitor_ctx = NULL;
+ if (!strcmp(fmt, "monitor") || !strcmp(fmt, "status")) {
+ /* if this was a "monitor" or a status event, we recycle the
+ context to start a new monitor event
+ */
+ if (ctdb->monitor_event_script_ctx != NULL) {
+ talloc_free(ctdb->monitor_event_script_ctx);
+ ctdb->monitor_event_script_ctx = NULL;
}
- monitoring_status = talloc_zero(ctdb, struct ctdb_monitor_status);
- } else {
- if (ctdb->event_script_ctx == NULL) {
- ctdb->event_script_ctx = talloc_zero(ctdb, struct ctdb_monitor_status);
+ ctdb->monitor_event_script_ctx = talloc_new(ctdb);
+ mem_ctx = ctdb->monitor_event_script_ctx;
+
+ if (ctdb->current_monitor_status_ctx != NULL) {
+ talloc_free(ctdb->current_monitor_status_ctx);
+ ctdb->current_monitor_status_ctx = NULL;
}
- monitoring_status = ctdb->event_script_ctx;
- }
- if (monitoring_status == NULL) {
- DEBUG(DEBUG_ERR, (__location__ " ERROR: Failed to talloc script_monitoring context\n"));
- return -1;
+ ctdb->current_monitor_status_ctx = talloc(ctdb->monitor_event_script_ctx, struct ctdb_monitor_script_status_ctx);
+ ctdb->current_monitor_status_ctx->scripts = NULL;
+ } else {
+ /* any other script will first terminate any monitor event */
+ if (ctdb->monitor_event_script_ctx != NULL) {
+ talloc_free(ctdb->monitor_event_script_ctx);
+ ctdb->monitor_event_script_ctx = NULL;
+ }
+ /* and then use a context common for all non-monitor events */
+ if (ctdb->other_event_script_ctx != NULL) {
+ talloc_free(ctdb->other_event_script_ctx);
+ ctdb->other_event_script_ctx = NULL;
+ }
+ ctdb->other_event_script_ctx = talloc_new(ctdb);
+ mem_ctx = ctdb->other_event_script_ctx;
}
- state = talloc(monitoring_status, struct ctdb_event_script_state);
- if (state == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " could not allocate state\n"));
- return -1;
- }
- monitoring_status->state = state;
+ state = talloc(mem_ctx, struct ctdb_event_script_state);
+ CTDB_NO_MEMORY(ctdb, state);
state->ctdb = ctdb;
state->callback = callback;
state->private_data = private_data;
state->options = talloc_vasprintf(state, fmt, ap);
state->timeout = timeout;
- state->te = NULL;
if (state->options == NULL) {
DEBUG(DEBUG_ERR, (__location__ " could not allocate state->options\n"));
talloc_free(state);
@@ -860,22 +831,14 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
set_close_on_exec(state->fd[1]);
rt = ctdb_event_script_v(ctdb, state->options);
- while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
- write(state->fd[1], &rt, sizeof(rt));
- usleep(100000);
- }
+ write(state->fd[1], &rt, sizeof(rt));
+ close(state->fd[1]);
_exit(rt);
}
- talloc_set_destructor(state, event_script_destructor);
- if (!strcmp(fmt, "monitor")) {
- ctdb->script_monitor_ctx = monitoring_status;
- } else {
- ctdb->event_script_ctx = monitoring_status;
- }
-
close(state->fd[1]);
set_close_on_exec(state->fd[0]);
+ talloc_set_destructor(state, event_script_destructor);
DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child eventscript process\n", state->fd[0]));
@@ -883,7 +846,7 @@ static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
ctdb_event_script_handler, state);
if (!timeval_is_zero(&state->timeout)) {
- state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
+ event_add_timed(ctdb->ev, state, timeval_current_ofs(state->timeout.tv_sec, state->timeout.tv_usec), ctdb_event_script_timeout, state);
} else {
DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
}
@@ -930,8 +893,8 @@ static void event_script_callback(struct ctdb_context *ctdb, int status, void *p
}
/*
- run the event script, waiting for it to complete. Used when the caller doesn't want to
- continue till the event script has finished.
+ run the event script, waiting for it to complete. Used when the caller
+ doesn't want to continue till the event script has finished.
*/
--
CTDB repository
More information about the samba-cvs
mailing list