[SCM] CTDB repository - branch master updated - 60f3c04bd8b20ecbe937ffed08875cdc6898b422

Ronnie Sahlberg sahlberg at samba.org
Mon Jul 7 10:43:28 GMT 2008


The branch, master has been updated
       via  60f3c04bd8b20ecbe937ffed08875cdc6898b422 (commit)
      from  6043f926f89b361c7fe14fc60d2769fd2ba63dfc (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 60f3c04bd8b20ecbe937ffed08875cdc6898b422
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jul 7 20:38:59 2008 +1000

    use more libral handling of event scripts timing out.
    
    If the event script that timed out was for the "monitor" event, then
    even if it timed out we still return SUCCESS back to the guy invoking the eventscript.
    Only consider the eventscript for "monitor" to have failed with an error
    IFF it actually terminated with an error,   or if it timed out 5 times in a row and hung.

-----------------------------------------------------------------------

Summary of changes:
 server/ctdb_tunables.c |    2 +-
 server/eventscript.c   |   32 +++++++++++++++++++++++++-------
 2 files changed, 26 insertions(+), 8 deletions(-)


Changeset truncated at 500 lines:

diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index 9b9c79c..d138137 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -38,7 +38,7 @@ static const struct {
 	{ "MonitorInterval",     15,  offsetof(struct ctdb_tunable, monitor_interval) },
 	{ "TickleUpdateInterval",20,  offsetof(struct ctdb_tunable, tickle_update_interval) },
 	{ "EventScriptTimeout",  20,  offsetof(struct ctdb_tunable, script_timeout) },
-	{ "EventScriptBanCount",  3,  offsetof(struct ctdb_tunable, script_ban_count) },
+	{ "EventScriptBanCount",  5,  offsetof(struct ctdb_tunable, script_ban_count) },
 	{ "RecoveryGracePeriod", 60,  offsetof(struct ctdb_tunable, recovery_grace_period) },
 	{ "RecoveryBanPeriod",  300,  offsetof(struct ctdb_tunable, recovery_ban_period) },
 	{ "DatabaseHashSize", 10000,  offsetof(struct ctdb_tunable, database_hash_size) },
diff --git a/server/eventscript.c b/server/eventscript.c
index 0e4af03..54d914b 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -257,15 +257,33 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
 
 	DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
 
-	talloc_free(state);
-	callback(ctdb, -1, private_data);
-
-	ctdb->event_script_timeouts++;
-	if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
-		ctdb->event_script_timeouts = 0;
-		DEBUG(DEBUG_ERR, ("Maximum timeout count reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.recovery_ban_period));
+	if (!strcmp(state->options, "monitor")) {
+		/* if it is a monitor event, we allow it to "hang" a few times
+		   before we declare it a failure and ban ourself (and make
+		   ourself unhealthy)
+		*/
+		DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
+
+		ctdb->event_script_timeouts++;
+		if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
+			ctdb->event_script_timeouts = 0;
+			DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
+			ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+			callback(ctdb, -1, private_data);
+		} else {
+		  	callback(ctdb, 0, private_data);
+		}
+	} else if (!strcmp(state->options, "startup")) {
+		DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
+		callback(ctdb, -1, private_data);
+	} else {
+		/* if it is not a monitor event we ban ourself immediately */
+		DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
 		ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
+		callback(ctdb, -1, private_data);
 	}
+
+	talloc_free(state);
 }
 
 /*


-- 
CTDB repository


More information about the samba-cvs mailing list