[SCM] CTDB repository - branch master updated - ctdb-1.0.65-8-g6579a6a

Fri Nov 21 00:01:08 GMT 2008

The branch, master has been updated
       via  6579a6a2a7161214adedf0f67dce62f4a4ad1afe (commit)
       via  fe6ddf7992ca3e72a26dbac6666e0f6270da611f (commit)
       via  5403ed6dcfdfc101b05b43f83002e720d81b4e38 (commit)
       via  a5d5aa455c7f7eb93d3fa6f403d5b8e0b795109d (commit)
       via  000018f2f4fb9f2452f56731b027dd6a7beda111 (commit)
      from  0a6f9326cb99f14b5c9edd0d8854d8229df49910 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 6579a6a2a7161214adedf0f67dce62f4a4ad1afe
Author: Andrew Tridgell <tridge at samba.org>
Date:   Fri Nov 21 08:05:59 2008 +1100

    fixed problem with looping ctdb recoveries
    
    After a node failure, GPFS can get into a state where non-blocking
    fcntl() locks can take a long time. This means to the ctdb set_recmode
    test timing out, which leads to a recovery failure, and a new
    recovery. The recovery loop can last a long time.
    
    The fix is to consider a fcntl timeout as a success of this test. The
    test is to see that we can't lock the shared reclock file, so a
    timeout is fine for a success.

commit fe6ddf7992ca3e72a26dbac6666e0f6270da611f
Merge: 5403ed6dcfdfc101b05b43f83002e720d81b4e38 0a6f9326cb99f14b5c9edd0d8854d8229df49910
Author: Andrew Tridgell <tridge at samba.org>
Date:   Thu Nov 20 21:23:26 2008 +1100

    Merge commit 'ronnie/master'

commit 5403ed6dcfdfc101b05b43f83002e720d81b4e38
Merge: a5d5aa455c7f7eb93d3fa6f403d5b8e0b795109d 6fb2f8a36239e5902e27cf10213f85faf216d6f1
Author: Andrew Tridgell <tridge at samba.org>
Date:   Thu Oct 16 12:58:25 2008 +1100

    Merge commit 'ronnie/master'

commit a5d5aa455c7f7eb93d3fa6f403d5b8e0b795109d
Author: Andrew Tridgell <tridge at samba.org>
Date:   Thu Oct 9 18:45:12 2008 +1100

    added some more gpfs commands per-filesystem

commit 000018f2f4fb9f2452f56731b027dd6a7beda111
Author: Andrew Tridgell <tridge at samba.org>
Date:   Tue Sep 30 07:16:17 2008 -0700

    The author of the upstream code asked for this code to be GPLv2+ not GPLv3

-----------------------------------------------------------------------

Summary of changes:
 lib/util/idtree.c      |    2 +-
 server/ctdb_recover.c  |   11 +++++++++--
 tools/ctdb_diagnostics |    8 ++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)


Changeset truncated at 500 lines:

diff --git a/lib/util/idtree.c b/lib/util/idtree.c
index 1b69319..06544e1 100644
--- a/lib/util/idtree.c
+++ b/lib/util/idtree.c
@@ -12,7 +12,7 @@
     
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or
+   the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c
index c8b0ba0..39b73ac 100644
--- a/server/ctdb_recover.c
+++ b/server/ctdb_recover.c
@@ -477,7 +477,14 @@ static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_even
 	struct ctdb_set_recmode_state *state = talloc_get_type(private_data, 
 					   struct ctdb_set_recmode_state);
 
-	ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_set_recmode");
+	/* we consider this a success, not a failure, as we failed to
+	   set the recovery lock which is what we wanted.  This can be
+	   caused by the cluster filesystem being very slow to
+	   arbitrate locks immediately after a node failure.	   
+	 */
+	DEBUG(DEBUG_NOTICE,(__location__ " set_recmode timeout - allowing recmode set\n"));
+	state->ctdb->recovery_mode = state->recmode;
+	ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
 	talloc_free(state);
 }
 
@@ -643,7 +650,7 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
 	talloc_set_destructor(state, set_recmode_destructor);
 
 	state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(3, 0),
-			ctdb_set_recmode_timeout, state);
+				    ctdb_set_recmode_timeout, state);
 
 	state->fde = event_add_fd(ctdb->ev, state, state->fd[0],
 				EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
diff --git a/tools/ctdb_diagnostics b/tools/ctdb_diagnostics
index 4415a90..8304a68 100755
--- a/tools/ctdb_diagnostics
+++ b/tools/ctdb_diagnostics
@@ -180,6 +180,14 @@ EOF
  show_all "/usr/lpp/mmfs/bin/mmlsquota"
  show_all "/usr/lpp/mmfs/bin/mmlscluster"
  show_all "/usr/lpp/mmfs/bin/mmlsmgr"
+ devlist=`mmlsfs all|grep ^File.system.attributes | cut -d/ -f3 | cut -d: -f1`
+ for d in $devlist; do
+     show_all mmdf $d
+     show_all mmlsdisk $d
+     show_all mmlsfileset $d
+     show_all mmlspolicy $d
+     show_all mmlssnapshot $d
+ done
  fslist=`mount|grep type.gpfs|awk '{print $1}'`
  for fs in $fslist; do
      show_all "/usr/lpp/mmfs/bin/mmlssnapshot $fs"


-- 
CTDB repository