[SCM] CTDB repository - branch 1.0.69 updated -
ctdb-1.0.69-16-g87e674f
Ronnie Sahlberg
sahlberg at samba.org
Thu Apr 30 15:18:51 GMT 2009
The branch, 1.0.69 has been updated
via 87e674f93728fb20209b6473f07ca323530ddbdc (commit)
via 478f43bc3c970edc7c8db3e34095774261a48056 (commit)
from a63c79318678abe99d2a36fe4465e63eafc008b7 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.69
- Log -----------------------------------------------------------------
commit 87e674f93728fb20209b6473f07ca323530ddbdc
Author: root <root at rcn1.VSOFS1.COM>
Date: Fri May 1 01:24:27 2009 +1000
new version 1.0.69-5
commit 478f43bc3c970edc7c8db3e34095774261a48056
Author: root <root at rcn1.VSOFS1.COM>
Date: Fri May 1 01:18:27 2009 +1000
Add a new variable VerifyRecoveryLock which can be used to disable the test that the recovery daemon holds the lock properly when performing a recovery
-----------------------------------------------------------------------
Summary of changes:
include/ctdb_private.h | 1 +
packaging/RPM/ctdb.spec | 8 +++++++-
server/ctdb_recover.c | 15 +++++++++++----
server/ctdb_tunables.c | 1 +
4 files changed, 20 insertions(+), 5 deletions(-)
Changeset truncated at 500 lines:
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index df194bc..5121cd9 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -118,6 +118,7 @@ struct ctdb_tunable {
uint32_t recd_ping_failcount;
uint32_t log_latency_ms;
uint32_t recovery_drop_all_ips;
+ uint32_t verify_recovery_lock;
};
/*
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index 01d09e0..432a6aa 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -5,7 +5,7 @@ Vendor: Samba Team
Packager: Samba Team <samba at samba.org>
Name: ctdb
Version: 1.0
-Release: 69_4
+Release: 69_5
Epoch: 0
License: GNU GPL version 3
Group: System Environment/Daemons
@@ -121,6 +121,12 @@ fi
%{_includedir}/ctdb_private.h
%changelog
+* Fri May 1 2009 : Version 1.0.69-5
+ - Add a new variable VerifyRecoveryLock. When set to 0 this will skip
+ the test inside the main where it verifies that the recovery masted does
+ hold the lock to the reclock file while performing a recovery.
+ - Change the timeout for waiting for a reclock child process to terminate to
+ 15 seconds and increase the logging of this potentially fatal condition.
* Sun Apr 26 2009 : Version 1.0.69_4
- Add TDB_NO_NESTING to the tdb layer to prevent transaction nesting.
- Make sure that when we start a recovery transaction that this is not a
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index 153f698..c2807b4 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -505,7 +505,7 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even
caused by the cluster filesystem being very slow to
arbitrate locks immediately after a node failure.
*/
- DEBUG(DEBUG_NOTICE,(__location__ " set_recmode timeout - allowing recmode set\n"));
+ DEBUG(DEBUG_ERR,(__location__ " set_recmode child process hung/timedout CFS slow to grant locks? (allowing recmode set anyway)\n"));
state->ctdb->recovery_mode = state->recmode;
ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
@@ -628,11 +628,17 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
state = talloc(ctdb, struct ctdb_set_recmode_state);
CTDB_NO_MEMORY(ctdb, state);
+
+ if (ctdb->tunable.verify_recovery_lock == 0) {
+ /* dont need to verify the reclock file */
+ ctdb->recovery_mode = recmode;
+ return 0;
+ }
+
/* For the rest of what needs to be done, we need to do this in
a child process since
1, the call to ctdb_recovery_lock() can block if the cluster
filesystem is in the process of recovery.
- 2, running of the script may take a while.
*/
ret = pipe(state->fd);
if (ret != 0) {
@@ -653,7 +659,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
char cc = 0;
close(state->fd[0]);
- /* we should not be able to get the lock on the nodes list,
+ /* we should not be able to get the lock on the reclock file,
as it should be held by the recovery master
*/
if (ctdb_recovery_lock(ctdb, false)) {
@@ -665,6 +671,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
sleep(5);
+ write(state->fd[1], &cc, 1);
}
_exit(0);
}
@@ -672,7 +679,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
talloc_set_destructor(state, set_recmode_destructor);
- state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(3, 0),
+ state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(15, 0),
ctdb_set_recmode_timeout, state);
state->fde = event_add_fd(ctdb->ev, state, state->fd[0],
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index fe6a4b4..821236f 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -54,6 +54,7 @@ static const struct {
{ "RecdFailCount", 3, offsetof(struct ctdb_tunable, recd_ping_failcount) },
{ "LogLatencyMs", 0, offsetof(struct ctdb_tunable, log_latency_ms) },
{ "RecoveryDropAllIPs", 60, offsetof(struct ctdb_tunable, recovery_drop_all_ips) },
+ { "VerifyRecoveryLock", 1, offsetof(struct ctdb_tunable, verify_recovery_lock) },
};
/*
--
CTDB repository
More information about the samba-cvs
mailing list