[SCM] CTDB repository - branch master updated - ctdb-1.0.57-26-g475cfad

Ronnie Sahlberg sahlberg at samba.org
Wed Sep 17 04:39:41 GMT 2008


The branch, master has been updated
       via  475cfada33b4c13aaaca773d5485bbe26bffbf46 (commit)
       via  196968c552e6ebcb57389d769a4b25f42fa8bc5d (commit)
      from  5e641ef9d6cca286061138a9680dcf2495736e8b (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 475cfada33b4c13aaaca773d5485bbe26bffbf46
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Sep 17 14:24:12 2008 +1000

    use the correct tunable   failcount not timeout

commit 196968c552e6ebcb57389d769a4b25f42fa8bc5d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed Sep 17 14:17:41 2008 +1000

    The ctdb daemon keeps track of whether the recovery process is running
    correctly by measuring how long it was since the last successful
    communication with the recovery daemon was recorded.
    
    After a certain timeout the ctdb daemon would deem the recovery daemon
    as inoperable and shut down.
    
    If the system clock is suddenly changed forward by many (60 or more)
    seconds this could cause the timeout to trigger prematurely/immediately
    where ctdb would incorrectly think that more than 60 seconds had passed
    since last successful communications and thus abort.
    
    Instead of cehcking for one timeout occuring, only deem the recovery
    daemon to be "down" and trigger a shutdown if communications have
    timedout for three intervals in a row.

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h |    3 ++-
 server/ctdb_recover.c  |   21 ++++++++++++++++-----
 server/ctdb_tunables.c |    3 ++-
 3 files changed, 20 insertions(+), 7 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index b2ded31..5b0ba81 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -115,6 +115,7 @@ struct ctdb_tunable {
 	uint32_t no_ip_failback;
 	uint32_t verbose_memory_names;
 	uint32_t recd_ping_timeout;
+	uint32_t recd_ping_failcount;
 };
 
 /*
@@ -418,7 +419,7 @@ struct ctdb_context {
 	int start_as_disabled;
 	uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
 	TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
-	TALLOC_CTX *recd_ping_ctx;
+	uint32_t *recd_ping_count;
 };
 
 struct ctdb_db_context {
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 6b207d5..dc6533c 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -974,8 +974,19 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda
 static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
 {
 	struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+	uint32_t *count = talloc_get_type(ctdb->recd_ping_count, uint32_t);
 
-	DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Shutting down ctdb daemon\n"));
+	DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Count : %u\n", *count));
+
+	if (*count < ctdb->tunable.recd_ping_failcount) {
+		(*count)++;
+		event_add_timed(ctdb->ev, ctdb->recd_ping_count, 
+			timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
+			ctdb_recd_ping_timeout, ctdb);
+		return;
+	}
+
+	DEBUG(DEBUG_ERR, (__location__ " Final timeout for recovery daemon ping. Shutting down ctdb daemon\n"));
 
 	ctdb_stop_recoverd(ctdb);
 	ctdb_stop_keepalive(ctdb);
@@ -995,13 +1006,13 @@ static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event
 */
 int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
 {
-	talloc_free(ctdb->recd_ping_ctx);
+	talloc_free(ctdb->recd_ping_count);
 
-	ctdb->recd_ping_ctx = talloc_new(ctdb);
-	CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_ctx);
+	ctdb->recd_ping_count = talloc_zero(ctdb, uint32_t);
+	CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_count);
 
 	if (ctdb->tunable.recd_ping_timeout != 0) {
-		event_add_timed(ctdb->ev, ctdb->recd_ping_ctx, 
+		event_add_timed(ctdb->ev, ctdb->recd_ping_count, 
 			timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
 			ctdb_recd_ping_timeout, ctdb);
 	}
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index de3e466..cc8e067 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -50,7 +50,8 @@ static const struct {
 	{ "ReclockPingPeriod",   60,  offsetof(struct ctdb_tunable,  reclock_ping_period) },
 	{ "NoIPFailback",         0,  offsetof(struct ctdb_tunable, no_ip_failback) },
 	{ "VerboseMemoryNames",   0,  offsetof(struct ctdb_tunable, verbose_memory_names) },
-	{ "RecdPingTimeout",	 60,  offsetof(struct ctdb_tunable, recd_ping_timeout) },
+	{ "RecdPingTimeout",	 20,  offsetof(struct ctdb_tunable, recd_ping_timeout) },
+	{ "RecdFailCount",	  3,  offsetof(struct ctdb_tunable, recd_ping_failcount) },
 };
 
 /*


-- 
CTDB repository


More information about the samba-cvs mailing list