[SCM] CTDB repository - branch master updated - a4e89f57a8d733ea74df7b0de31eb977d6d37388

Ronnie Sahlberg sahlberg at samba.org
Tue Jul 8 00:03:04 GMT 2008


The branch, master has been updated
       via  a4e89f57a8d733ea74df7b0de31eb977d6d37388 (commit)
      from  bfba5c7249eff8a10a43b53c1b89dd44b625fd10 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit a4e89f57a8d733ea74df7b0de31eb977d6d37388
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Jul 8 09:58:10 2008 +1000

    update the monitor event for nfs to track how many times in a row it has failed
    to "ping" the local nfs daemon.
    
    Once it has failed more than 3 times in a row it will attempt to restart the nfs service.

-----------------------------------------------------------------------

Summary of changes:
 config/events.d/60.nfs |   25 +++++++++++++++++++++++++
 1 files changed, 25 insertions(+), 0 deletions(-)


Changeset truncated at 500 lines:

diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index c62eed3..de7e012 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -68,6 +68,27 @@ case $cmd in
 	;;
 
       monitor)
+	# check how many times in a row that nfsd has stopped responding
+	# after 3 times in a row we try to restart the full nfs service
+	NFSD_FAIL_COUNT_FILE=$CTDB_BASE/state/nfs/nfsd_fail_count
+	[ ! -f $NFSD_FAIL_COUNT_FILE ] && {
+		echo 0 > $NFSD_FAIL_COUNT_FILE
+	}
+	NFSD_FAIL_COUNT=`cat $NFSD_FAIL_COUNT_FILE`
+	[ -z "$NFSD_FAIL_COUNT" ] && {
+		echo 0 > $NFSD_FAIL_COUNT_FILE
+		NFSD_FAIL_COUNT=`cat $NFSD_FAIL_COUNT_FILE`
+	}
+	# ok it has failed a few times too many. try restarting it.
+	[ `expr "$NFSD_FAIL_COUNT" ">" "3"` != "0" ] && {
+		echo 60.nfs NFSD: trying to restart NFSD...
+		echo 0 > $NFSD_FAIL_COUNT_FILE
+		service nfs restart
+		exit 0
+	}
+	expr "$NFSD_FAIL_COUNT" "+" "1" > $NFSD_FAIL_COUNT_FILE
+
+
 	# check that statd responds to rpc requests
 	# if statd is not running we try to restart it once and wait
 	# for the next monitoring event to verify if it is running or not
@@ -93,6 +114,7 @@ case $cmd in
 	fi
 
 
+
 	# check that NFS responds to rpc requests
 	ctdb_check_rpc "NFS" 100003 3
 
@@ -114,6 +136,9 @@ case $cmd in
 		rpc.mountd $RPCMOUNTDOPTS &
 		exit 1
 	}
+	
+	# everything was ok with nfs so reset the fail count back to 0
+	echo 0 > $NFSD_FAIL_COUNT_FILE
 	;;
 
 esac


-- 
CTDB repository


More information about the samba-cvs mailing list