[SCM] CTDB repository - branch master updated -
a4e89f57a8d733ea74df7b0de31eb977d6d37388
Ronnie Sahlberg
sahlberg at samba.org
Tue Jul 8 00:03:04 GMT 2008
The branch, master has been updated
via a4e89f57a8d733ea74df7b0de31eb977d6d37388 (commit)
from bfba5c7249eff8a10a43b53c1b89dd44b625fd10 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit a4e89f57a8d733ea74df7b0de31eb977d6d37388
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Tue Jul 8 09:58:10 2008 +1000
update the monitor event for nfs to track how many times in a row it has failed
to "ping" the local nfs daemon.
Once it has failed more than 3 times in a row it will attempt to restart the nfs service.
-----------------------------------------------------------------------
Summary of changes:
config/events.d/60.nfs | 25 +++++++++++++++++++++++++
1 files changed, 25 insertions(+), 0 deletions(-)
Changeset truncated at 500 lines:
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index c62eed3..de7e012 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -68,6 +68,27 @@ case $cmd in
;;
monitor)
+ # check how many times in a row that nfsd has stopped responding
+ # after 3 times in a row we try to restart the full nfs service
+ NFSD_FAIL_COUNT_FILE=$CTDB_BASE/state/nfs/nfsd_fail_count
+ [ ! -f $NFSD_FAIL_COUNT_FILE ] && {
+ echo 0 > $NFSD_FAIL_COUNT_FILE
+ }
+ NFSD_FAIL_COUNT=`cat $NFSD_FAIL_COUNT_FILE`
+ [ -z "$NFSD_FAIL_COUNT" ] && {
+ echo 0 > $NFSD_FAIL_COUNT_FILE
+ NFSD_FAIL_COUNT=`cat $NFSD_FAIL_COUNT_FILE`
+ }
+ # ok it has failed a few times too many. try restarting it.
+ [ `expr "$NFSD_FAIL_COUNT" ">" "3"` != "0" ] && {
+ echo 60.nfs NFSD: trying to restart NFSD...
+ echo 0 > $NFSD_FAIL_COUNT_FILE
+ service nfs restart
+ exit 0
+ }
+ expr "$NFSD_FAIL_COUNT" "+" "1" > $NFSD_FAIL_COUNT_FILE
+
+
# check that statd responds to rpc requests
# if statd is not running we try to restart it once and wait
# for the next monitoring event to verify if it is running or not
@@ -93,6 +114,7 @@ case $cmd in
fi
+
# check that NFS responds to rpc requests
ctdb_check_rpc "NFS" 100003 3
@@ -114,6 +136,9 @@ case $cmd in
rpc.mountd $RPCMOUNTDOPTS &
exit 1
}
+
+ # everything was ok with nfs so reset the fail count back to 0
+ echo 0 > $NFSD_FAIL_COUNT_FILE
;;
esac
--
CTDB repository
More information about the samba-cvs
mailing list