[SCM] CTDB repository - branch master updated - ctdb-2.2-6-gcd4358b

Thu Jun 13 23:54:58 MDT 2013

The branch, master has been updated
       via  cd4358b01c6c3d413b431f5760029d2b163b9c03 (commit)
       via  0e2b5a8f89440a53f996482ac0c98b31a4f2cad3 (commit)
       via  ce2ef2be8aa22c0baf868daac8d4cf27246baa14 (commit)
       via  2503245db10d567af708a04edd3a3b488c24f401 (commit)
       via  99b0d8b8ecc36dfc493775b9ebced54539c182d2 (commit)
       via  c429394afbabaee09f9216dc743419adddf523ea (commit)
      from  ac0892d3a57adb0587a37de0f94fa686bed8970f (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit cd4358b01c6c3d413b431f5760029d2b163b9c03
Author: Martin Schwenke <martin at meltin.net>
Date:   Thu Jun 13 16:32:06 2013 +1000

    tests/eventscripts: Unit tests for $CTDB_NFS_DUMP_STUCK_THREADS
    
    Includes minor test infrastructure updates.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 0e2b5a8f89440a53f996482ac0c98b31a4f2cad3
Author: Martin Schwenke <martin at meltin.net>
Date:   Thu Jun 13 16:30:45 2013 +1000

    tests/eventscripts: Fix -X tracing in iterate_test()
    
    ... and delete a bogus comment.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit ce2ef2be8aa22c0baf868daac8d4cf27246baa14
Author: Martin Schwenke <martin at meltin.net>
Date:   Thu Jun 13 15:50:44 2013 +1000

    tests/eventscripts: Add unit tests for $CTDB_MONITOR_NFS_THREAD_COUNT
    
    Includes minor test infrastructure updates.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 2503245db10d567af708a04edd3a3b488c24f401
Author: Martin Schwenke <martin at meltin.net>
Date:   Thu Jun 13 11:56:25 2013 +1000

    eventscripts: New configuration varable $CTDB_NFS_DUMP_STUCK_THREADS
    
    If some nfsd threads are still alive after a shutdown during a restart
    then this indicates the maximum number of threads for which a stack
    trace should be dumped.  This can be useful for trying to determine
    why nfsd is stuck.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 99b0d8b8ecc36dfc493775b9ebced54539c182d2
Author: Martin Schwenke <martin at meltin.net>
Date:   Thu Jun 13 10:17:20 2013 +1000

    eventscripts: Add new option $CTDB_MONITOR_NFS_THREAD_COUNT
    
    Consider the following example:
    
    1. There are 256 nfsd threads configured.
    2. 200 threads are "stuck" in system calls, perhaps waiting for the
       underlying filesystem when an attempt is made to restart NFS.
    3. 56 threads exit when NFS is stopped.
    4. 56 new threads are started when NFS is started.
    5. 200 "stuck" threads exit leaving only 56 threads running.
    
    Setting this option to "yes" makes the 60.nfs monitor event look for
    this situation and try to correct it.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit c429394afbabaee09f9216dc743419adddf523ea
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri May 31 14:55:07 2013 +1000

    recoverd: Log node that causes takoever run to fail
    
    Extend takeover_fail_callback() to just log (and not do any ban
    processing) when the callback data is NULL.  Always call
    ctdb_takeover_run() with the callback so that useful errors are always
    logged.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>

-----------------------------------------------------------------------

Summary of changes:
 config/ctdb.sysconfig                              |   19 +++++++++++++
 config/events.d/60.nfs                             |   25 +++++++++++++++++
 config/functions                                   |   24 +++++++++++++++++
 server/ctdb_recoverd.c                             |   18 ++++++++-----
 tests/eventscripts/60.nfs.monitor.102.sh           |   15 ++++++++++
 tests/eventscripts/60.nfs.monitor.103.sh           |   15 ++++++++++
 tests/eventscripts/60.nfs.monitor.104.sh           |   18 ++++++++++++
 ...60.nfs.monitor.112.sh => 60.nfs.monitor.113.sh} |    7 +++-
 ...60.nfs.monitor.112.sh => 60.nfs.monitor.114.sh} |    7 +++-
 tests/eventscripts/etc-ctdb/rc.local               |    6 ++++
 tests/eventscripts/scripts/local.sh                |   28 +++++++++++++++-----
 tests/eventscripts/stubs/pidof                     |   10 +++++++
 12 files changed, 174 insertions(+), 18 deletions(-)
 create mode 100755 tests/eventscripts/60.nfs.monitor.102.sh
 create mode 100755 tests/eventscripts/60.nfs.monitor.103.sh
 create mode 100755 tests/eventscripts/60.nfs.monitor.104.sh
 copy tests/eventscripts/{60.nfs.monitor.112.sh => 60.nfs.monitor.113.sh} (61%)
 copy tests/eventscripts/{60.nfs.monitor.112.sh => 60.nfs.monitor.114.sh} (60%)
 create mode 100755 tests/eventscripts/stubs/pidof


Changeset truncated at 500 lines:

diff --git a/config/ctdb.sysconfig b/config/ctdb.sysconfig
index 6f58e8f..7e775a2 100644
--- a/config/ctdb.sysconfig
+++ b/config/ctdb.sysconfig
@@ -129,6 +129,25 @@ CTDB_RECOVERY_LOCK="/some/place/on/shared/storage"
 # CTDB_MONITOR_FREE_MEMORY_WARN=100
 # CTDB_MONITOR_FREE_MEMORY=10
 
+# Should the 60.nfs monitor event try to correct the number of nfsd
+# threads?  This works around a limitation in some NFS initscripts
+# where some threads can be stuck in host filesystem calls (perhaps
+# due to slow storage), a restart occurs, some threads don't exit, the
+# start only adds the missing number of threads, the stuck threads
+# exit, and the result is a lower than expected thread count.  Note
+# that if you must also set $RPCNFSDCOUNT (RedHat/Debian) or
+# $USE_KERNEL_NFSD_NUMBER (SUSE) in your NFS configuration so the
+# monitoring code knows how many threads there should be - if neither
+# of these are set then this option will be ignored.  The default is
+# to not do this check.
+# CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+
+
+# The number of nfsd threads to dump stack traces for if some are
+# still alive after stopping NFS during a restart.  The default is to
+# dump no stack traces.
+# CTDB_NFS_DUMP_STUCK_THREADS=5
+
 # When set to yes, the CTDB node will start in DISABLED mode and not host
 # any public ip addresses. The administrator needs to explicitely enable
 # the node with "ctdb enable"
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index eb98ee1..53f78df 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -26,6 +26,29 @@ service_reconfigure ()
     } >/dev/null 2>&1
 }
 
+nfs_check_thread_count ()
+{
+    [ "$CTDB_MONITOR_NFS_THREAD_COUNT" = "yes" ] || return 0
+
+    # If $RPCNFSDCOUNT/$USE_KERNEL_NFSD_NUMBER isn't set then we could
+    # guess the default from the initscript.  However, let's just
+    # assume that those using the default don't care about the number
+    # of threads and that they have switched on this feature in error.
+    _configured_threads="${RPCNFSDCOUNT:-${USE_KERNEL_NFSD_NUMBER}}"
+    [ -n "$_configured_threads" ] || return 0
+
+    # nfsd should be running the configured number of threads.  If
+    # there are a different number of threads then tell nfsd the
+    # correct number.  
+    _running_threads=$(get_proc "fs/nfsd/threads")
+    # Intentionally not arithmetic comparison - avoids extra errors
+    # when get_proc() fails...
+    if [ "$_running_threads" != "$_configured_threads" ] ; then
+	echo "Attempting to correct number of nfsd threads from ${_running_threads} to ${_configured_threads}"
+	set_proc "fs/nfsd/threads" "$_configured_threads"
+    fi
+}
+
 loadconfig
 
 [ "$NFS_SERVER_MODE" != "ganesha" ] || exit 0
@@ -71,6 +94,8 @@ case "$1" in
 
 	nfs_check_rpc_services
 
+	nfs_check_thread_count
+
 	# Every 10 minutes, update the statd state database for which
 	# clients need notifications
 	nfs_statd_update 600
diff --git a/config/functions b/config/functions
index f4707a7..0a806cb 100755
--- a/config/functions
+++ b/config/functions
@@ -779,6 +779,7 @@ startstop_nfs() {
 			set_proc "fs/nfsd/threads" 0
 			service nfsserver stop > /dev/null 2>&1
 			pkill -9 nfsd
+			nfs_dump_some_threads
 			service nfsserver start
 			;;
 		esac
@@ -798,6 +799,7 @@ startstop_nfs() {
 			service nfs stop > /dev/null 2>&1
 			service nfslock stop > /dev/null 2>&1
 			pkill -9 nfsd
+			nfs_dump_some_threads
 			service nfslock start
 			service nfs start
 			;;
@@ -810,6 +812,28 @@ startstop_nfs() {
 	esac
 }
 
+# Dump up to the configured number of nfsd thread backtraces.
+nfs_dump_some_threads ()
+{
+    [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] || return 0
+
+    # Optimisation to avoid running an unnecessary pidof
+    [ $CTDB_NFS_DUMP_STUCK_THREADS -gt 0 ] || return 0
+
+    _count=0
+    for _pid in $(pidof nfsd) ; do
+	[ $_count -le $CTDB_NFS_DUMP_STUCK_THREADS ] || break
+
+	# Do this first to avoid racing with thread exit
+	_stack=$(get_proc "${_pid}/stack" 2>/dev/null)
+	if [ -n "$_stack" ] ; then
+	    echo "Stack trace for stuck nfsd thread [${_pid}]:"
+	    echo "$_stack"
+	    _count=$(($_count + 1))
+	fi
+    done
+}
+
 ########################################################
 # start/stop the nfs lockmanager service on different platforms
 ########################################################
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index c3a1852..f18cdf4 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1527,12 +1527,16 @@ static int sync_recovery_lock_file_across_cluster(struct ctdb_recoverd *rec)
  */
 static void takeover_fail_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
 {
-	struct ctdb_recoverd *rec = talloc_get_type(callback_data, struct ctdb_recoverd);
+	DEBUG(DEBUG_ERR, ("Node %u failed the takeover run\n", node_pnn));
 
-	DEBUG(DEBUG_ERR, (__location__ " Node %u failed the takeover run. Setting it as recovery fail culprit\n", node_pnn));
+	if (callback_data != NULL) {
+		struct ctdb_recoverd *rec = talloc_get_type(callback_data, struct ctdb_recoverd);
 
-	ctdb_set_culprit(rec, node_pnn);
-	rec->need_takeover_run = true;
+		DEBUG(DEBUG_ERR, ("Setting node %u as recovery fail culprit\n", node_pnn));
+
+		ctdb_set_culprit(rec, node_pnn);
+		rec->need_takeover_run = true;
+	}
 }
 
 
@@ -1825,7 +1829,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
 		return -1;
 	}
 	rec->need_takeover_run = false;
-	ret = ctdb_takeover_run(ctdb, nodemap, NULL, NULL);
+	ret = ctdb_takeover_run(ctdb, nodemap, takeover_fail_callback, NULL);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
 		rec->need_takeover_run = true;
@@ -2184,7 +2188,7 @@ static void ctdb_rebalance_timeout(struct event_context *ev, struct timed_event
 
 	DEBUG(DEBUG_NOTICE,("Rebalance all nodes that have had ip assignment changes.\n"));
 
-	ret = ctdb_takeover_run(ctdb, rec->nodemap, NULL, NULL);
+	ret = ctdb_takeover_run(ctdb, rec->nodemap, takeover_fail_callback, NULL);
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
 		rec->need_takeover_run = true;
@@ -2410,7 +2414,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 		rec->need_takeover_run = true;
 	}
 	if (ret == 0) {
-		ret = ctdb_takeover_run(ctdb, rec->nodemap, NULL, NULL);
+		ret = ctdb_takeover_run(ctdb, rec->nodemap, takeover_fail_callback, NULL);
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
 			rec->need_takeover_run = true;
diff --git a/tests/eventscripts/60.nfs.monitor.102.sh b/tests/eventscripts/60.nfs.monitor.102.sh
new file mode 100755
index 0000000..bb988aa
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.102.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available, check nfsd thread count, count matches"
+
+setup_nfs
+
+CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+RPCNFSDCOUNT=8
+FAKE_NFSD_THREAD_PIDS="1 2 3 4 5 6 7 8"
+
+ok_null
+
+simple_test
diff --git a/tests/eventscripts/60.nfs.monitor.103.sh b/tests/eventscripts/60.nfs.monitor.103.sh
new file mode 100755
index 0000000..75d7291
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.103.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+define_test "all services available, check nfsd thread count, not enough threads"
+
+setup_nfs
+
+CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+RPCNFSDCOUNT=8
+FAKE_NFSD_THREAD_PIDS="1 2 3 4 5"
+
+ok "Attempting to correct number of nfsd threads from 5 to 8"
+
+simple_test
diff --git a/tests/eventscripts/60.nfs.monitor.104.sh b/tests/eventscripts/60.nfs.monitor.104.sh
new file mode 100755
index 0000000..a052be8
--- /dev/null
+++ b/tests/eventscripts/60.nfs.monitor.104.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+. "${TEST_SCRIPTS_DIR}/unit.sh"
+
+# Add this extra test to catch a design change where we only ever
+# increase the number of threads.  That is, this test would need to be
+# consciously removed.
+define_test "all services available, check nfsd thread count, too many threads"
+
+setup_nfs
+
+CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+RPCNFSDCOUNT=4
+FAKE_NFSD_THREAD_PIDS="1 2 3 4 5 6"
+
+ok "Attempting to correct number of nfsd threads from 6 to 4"
+
+simple_test
diff --git a/tests/eventscripts/60.nfs.monitor.112.sh b/tests/eventscripts/60.nfs.monitor.113.sh
similarity index 61%
copy from tests/eventscripts/60.nfs.monitor.112.sh
copy to tests/eventscripts/60.nfs.monitor.113.sh
index c5c39b2..caa4989 100755
--- a/tests/eventscripts/60.nfs.monitor.112.sh
+++ b/tests/eventscripts/60.nfs.monitor.113.sh
@@ -2,13 +2,16 @@
 
 . "${TEST_SCRIPTS_DIR}/unit.sh"
 
-define_test "knfsd down, 6 iterations"
+define_test "knfsd down, 6 iterations, dump 5 threads, none hung"
 
 # knfsd fails and attempts to restart it fail.
-
 setup_nfs
 rpc_services_down "nfs"
 
+# Additionally, any hung threads should have stack traces dumped.
+CTDB_NFS_DUMP_STUCK_THREADS=5
+FAKE_NFSD_THREAD_PIDS=""
+
 iterate_test 6 'ok_null' \
     2 'rpc_set_service_failure_response "nfsd"' \
     4 'rpc_set_service_failure_response "nfsd"' \
diff --git a/tests/eventscripts/60.nfs.monitor.112.sh b/tests/eventscripts/60.nfs.monitor.114.sh
similarity index 60%
copy from tests/eventscripts/60.nfs.monitor.112.sh
copy to tests/eventscripts/60.nfs.monitor.114.sh
index c5c39b2..8279395 100755
--- a/tests/eventscripts/60.nfs.monitor.112.sh
+++ b/tests/eventscripts/60.nfs.monitor.114.sh
@@ -2,13 +2,16 @@
 
 . "${TEST_SCRIPTS_DIR}/unit.sh"
 
-define_test "knfsd down, 6 iterations"
+define_test "knfsd down, 6 iterations, dump 5 threads, 3 hung"
 
 # knfsd fails and attempts to restart it fail.
-
 setup_nfs
 rpc_services_down "nfs"
 
+# Additionally, any hung threads should have stack traces dumped.
+CTDB_NFS_DUMP_STUCK_THREADS=5
+FAKE_NFSD_THREAD_PIDS="1001 1002 1003"
+
 iterate_test 6 'ok_null' \
     2 'rpc_set_service_failure_response "nfsd"' \
     4 'rpc_set_service_failure_response "nfsd"' \
diff --git a/tests/eventscripts/etc-ctdb/rc.local b/tests/eventscripts/etc-ctdb/rc.local
index ae93ae5..9cd4d55 100755
--- a/tests/eventscripts/etc-ctdb/rc.local
+++ b/tests/eventscripts/etc-ctdb/rc.local
@@ -33,6 +33,12 @@ get_proc ()
 	sys/net/ipv4/conf/all/arp_filter)
 	    echo 1
 	    ;;
+	fs/nfsd/threads)
+	    echo "$FAKE_NFSD_THREAD_PIDS" | wc -w
+	    ;;
+	*/stack)
+	    echo "[<ffffffff87654321>] fake_stack_trace_for_pid_${1}+0x0/0xff"
+	    ;;
 	*)
 	    echo "get_proc: \"$1\" not implemented"
 	    exit 1
diff --git a/tests/eventscripts/scripts/local.sh b/tests/eventscripts/scripts/local.sh
index 3f55830..6e2f15c 100644
--- a/tests/eventscripts/scripts/local.sh
+++ b/tests/eventscripts/scripts/local.sh
@@ -555,6 +555,9 @@ setup_nfs ()
 
     export CTDB_NFS_SKIP_SHARE_CHECK="no"
 
+    export CTDB_MONITOR_NFS_THREAD_COUNT RPCNFSDCOUNT FAKE_NFSD_THREAD_PIDS
+    export CTDB_NFS_DUMP_STUCK_THREADS
+
     # Reset the failcounts for nfs services.
     eventscript_call eval rm -f '$ctdb_fail_dir/nfs_*'
 
@@ -673,7 +676,19 @@ program $_pn version $_ver is not available"
 			case "${_progname}${_action#restart}" in
 			    nfsd)
 				_t="\
-Trying to restart NFS service
+Trying to restart NFS service"
+
+				if [ -n "$CTDB_NFS_DUMP_STUCK_THREADS" ] ; then
+				    for _pid in $FAKE_NFSD_THREAD_PIDS ; do
+					_t="\
+$_t
+Stack trace for stuck nfsd thread [${_pid}]:
+[<ffffffff87654321>] fake_stack_trace_for_pid_${_pid}/stack+0x0/0xff"
+				    done
+				fi
+
+				_t="\
+${_t}
 Starting nfslock: OK
 Starting nfs: OK"
 				;;
@@ -790,11 +805,6 @@ EOF
 
 # Any args are passed to the eventscript.
 
-# Eventscript tracing can be done by setting:
-#   EVENTSCRIPTS_TESTS_TRACE="sh -x"
-
-# or similar.  This will almost certainly make a test fail but is
-# useful for debugging.
 simple_test ()
 {
     [ -n "$event" ] || die 'simple_test: $event not set'
@@ -899,7 +909,11 @@ iterate_test ()
 	    shift 2
 	fi
 
-	_out=$($EVENTSCRIPTS_TESTS_TRACE "${CTDB_BASE}/events.d/$script" "$event" $args 2>&1)
+	_trace=""
+	if $TEST_COMMAND_TRACE ; then
+	    _trace="sh -x"
+	fi
+	_out=$($_trace "${CTDB_BASE}/events.d/$script" "$event" $args 2>&1)
 	_rc=$?
 
     if [ -n "$OUT_FILTER" ] ; then
diff --git a/tests/eventscripts/stubs/pidof b/tests/eventscripts/stubs/pidof
new file mode 100755
index 0000000..b6ad6d8
--- /dev/null
+++ b/tests/eventscripts/stubs/pidof
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+case "$1" in
+    nfsd)
+	echo "$FAKE_NFSD_THREAD_PIDS"
+	;;
+    *)
+	echo "pidof: \"$1\" not implemented"
+	exit 1
+esac


-- 
CTDB repository