[SCM] CTDB repository - branch 1.0.69 updated - ctdb-1.0.69-16-g87e674f

Ronnie Sahlberg sahlberg at samba.org
Thu Apr 30 15:18:51 GMT 2009


The branch, 1.0.69 has been updated
       via  87e674f93728fb20209b6473f07ca323530ddbdc (commit)
       via  478f43bc3c970edc7c8db3e34095774261a48056 (commit)
      from  a63c79318678abe99d2a36fe4465e63eafc008b7 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.69


- Log -----------------------------------------------------------------
commit 87e674f93728fb20209b6473f07ca323530ddbdc
Author: root <root at rcn1.VSOFS1.COM>
Date:   Fri May 1 01:24:27 2009 +1000

    new version 1.0.69-5

commit 478f43bc3c970edc7c8db3e34095774261a48056
Author: root <root at rcn1.VSOFS1.COM>
Date:   Fri May 1 01:18:27 2009 +1000

    Add a new variable VerifyRecoveryLock which can be used to disable the test that the recovery daemon holds the lock properly when performing a recovery

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h  |    1 +
 packaging/RPM/ctdb.spec |    8 +++++++-
 server/ctdb_recover.c   |   15 +++++++++++----
 server/ctdb_tunables.c  |    1 +
 4 files changed, 20 insertions(+), 5 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index df194bc..5121cd9 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -118,6 +118,7 @@ struct ctdb_tunable {
 	uint32_t recd_ping_failcount;
 	uint32_t log_latency_ms;
 	uint32_t recovery_drop_all_ips;
+	uint32_t verify_recovery_lock;
 };
 
 /*
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index 01d09e0..432a6aa 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -5,7 +5,7 @@ Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
 Name: ctdb
 Version: 1.0
-Release: 69_4
+Release: 69_5
 Epoch: 0
 License: GNU GPL version 3
 Group: System Environment/Daemons
@@ -121,6 +121,12 @@ fi
 %{_includedir}/ctdb_private.h
 
 %changelog
+* Fri May 1 2009 : Version 1.0.69-5
+ - Add a new variable VerifyRecoveryLock. When set to 0 this will skip
+   the test inside the main where it verifies that the recovery masted does
+   hold the lock to the reclock file while performing a recovery.
+ - Change the timeout for waiting for a reclock child process to terminate to
+   15 seconds and increase the logging of this potentially fatal condition.
 * Sun Apr 26 2009 : Version 1.0.69_4
  - Add TDB_NO_NESTING to the tdb layer to prevent transaction nesting.
  - Make sure that when we start a recovery transaction that this is not a
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 153f698..c2807b4 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -505,7 +505,7 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even
 	   caused by the cluster filesystem being very slow to
 	   arbitrate locks immediately after a node failure.	   
 	 */
-	DEBUG(DEBUG_NOTICE,(__location__ " set_recmode timeout - allowing recmode set\n"));
+	DEBUG(DEBUG_ERR,(__location__ " set_recmode child process hung/timedout CFS slow to grant locks? (allowing recmode set anyway)\n"));
 	state->ctdb->recovery_mode = state->recmode;
 	ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
 	talloc_free(state);
@@ -628,11 +628,17 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 	state = talloc(ctdb, struct ctdb_set_recmode_state);
 	CTDB_NO_MEMORY(ctdb, state);
 
+
+	if (ctdb->tunable.verify_recovery_lock == 0) {
+		/* dont need to verify the reclock file */
+		ctdb->recovery_mode = recmode;
+		return 0;
+	}
+
 	/* For the rest of what needs to be done, we need to do this in
 	   a child process since 
 	   1, the call to ctdb_recovery_lock() can block if the cluster
 	      filesystem is in the process of recovery.
-	   2, running of the script may take a while.
 	*/
 	ret = pipe(state->fd);
 	if (ret != 0) {
@@ -653,7 +659,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 		char cc = 0;
 		close(state->fd[0]);
 
-		/* we should not be able to get the lock on the nodes list, 
+		/* we should not be able to get the lock on the reclock file, 
 		  as it should  be held by the recovery master 
 		*/
 		if (ctdb_recovery_lock(ctdb, false)) {
@@ -665,6 +671,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 		/* make sure we die when our parent dies */
 		while (kill(parent, 0) == 0 || errno != ESRCH) {
 			sleep(5);
+			write(state->fd[1], &cc, 1);
 		}
 		_exit(0);
 	}
@@ -672,7 +679,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 
 	talloc_set_destructor(state, set_recmode_destructor);
 
-	state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(3, 0),
+	state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(15, 0),
 				    ctdb_set_recmode_timeout, state);
 
 	state->fde = event_add_fd(ctdb->ev, state, state->fd[0],
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index fe6a4b4..821236f 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -54,6 +54,7 @@ static const struct {
 	{ "RecdFailCount",	  3,  offsetof(struct ctdb_tunable, recd_ping_failcount) },
 	{ "LogLatencyMs",         0,  offsetof(struct ctdb_tunable, log_latency_ms) },
 	{ "RecoveryDropAllIPs",  60,  offsetof(struct ctdb_tunable, recovery_drop_all_ips) },
+	{ "VerifyRecoveryLock",   1,  offsetof(struct ctdb_tunable, verify_recovery_lock) },
 };
 
 /*


-- 
CTDB repository


More information about the samba-cvs mailing list