[SCM] CTDB repository - branch 1.13 updated - ctdb-1.42-108-g93b176b
Ronnie Sahlberg
sahlberg at samba.org
Wed May 16 19:52:36 MDT 2012
The branch, 1.13 has been updated
via 93b176b39dddc71011ec7182deda435874474e45 (commit)
via 06041c372525924add825bf23a0758546465a516 (commit)
via 6f30c8d233b150d12f7e37e54769fdd670437b90 (commit)
from 6ebd66a10b3fce30cf686f67349c4372c1042dc9 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.13
- Log -----------------------------------------------------------------
commit 93b176b39dddc71011ec7182deda435874474e45
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Thu May 17 11:16:57 2012 +1000
GANESHA: make the ganesha script executable by default
commit 06041c372525924add825bf23a0758546465a516
Author: Martin Schwenke <martin at meltin.net>
Date: Wed May 16 17:24:21 2012 +1000
Eventscripts: Modernise 60.ganesha to match 60.nfs
Originally from Srikrishan Malik <srikrishan.malik at in.ibm.com> with
some style changes by me.
Signed-off-by: Martin Schwenke <martin at meltin.net>
commit 6f30c8d233b150d12f7e37e54769fdd670437b90
Author: Martin Schwenke <martin at meltin.net>
Date: Wed May 16 13:29:58 2012 +1000
Eventscripts: restart lockd in the background when going unhealthy
Sometimes the restart can hang when there are I/O problems. Then the
eventscript times out and gets killed so the node never marked as
unhealthy.
Restarting in the background avoids this.
Signed-off-by: Martin Schwenke <martin at meltin.net>
-----------------------------------------------------------------------
Summary of changes:
Makefile.in | 2 +-
config/events.d/60.ganesha | 207 ++++++++++++++++++++------------------------
config/events.d/60.nfs | 4 +-
config/functions | 31 +++++++
config/statd-callout | 14 ++-
5 files changed, 137 insertions(+), 121 deletions(-)
Changeset truncated at 500 lines:
diff --git a/Makefile.in b/Makefile.in
index cdebbd7..cbb987a 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -343,7 +343,7 @@ install: all $(PMDA_INSTALL)
${INSTALLCMD} -m 755 config/events.d/41.httpd $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/50.samba $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/60.nfs $(DESTDIR)$(etcdir)/ctdb/events.d
- ${INSTALLCMD} -m 644 config/events.d/60.ganesha $(DESTDIR)$(etcdir)/ctdb/events.d
+ ${INSTALLCMD} -m 755 config/events.d/60.ganesha $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/62.cnfs $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/70.iscsi $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/91.lvs $(DESTDIR)$(etcdir)/ctdb/events.d
diff --git a/config/events.d/60.ganesha b/config/events.d/60.ganesha
index fb3b7c2..cee7792 100755
--- a/config/events.d/60.ganesha
+++ b/config/events.d/60.ganesha
@@ -1,34 +1,58 @@
#!/bin/sh
# script to manage nfs in a clustered environment
-start_nfs() {
- mkdir -p $CTDB_VARDIR/state/nfs
- mkdir -p $CTDB_VARDIR/state/statd/ip
- ctdb_service_stop
- ctdb_service_start
- echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
+. $CTDB_BASE/functions
+
+service_name="nfs-ganesha-gpfs"
+
+
+service_start ()
+{
+ startstop_ganesha stop
+ startstop_ganesha start
+ set_proc "sys/net/ipv4/tcp_tw_recycle" 1
}
-. $CTDB_BASE/functions
+service_stop ()
+{
+ startstop_ganesha stop
+}
+
+service_reconfigure ()
+{
+ # if the ips have been reallocated, we must restart ganesha
+ # across all nodes and ping all statd listeners
+ [ -x $CTDB_BASE/statd-callout ] && {
+ $CTDB_BASE/statd-callout notify &
+ } >/dev/null 2>&1
+}
+loadconfig "nfs"
-loadconfig nfs
+[ "$NFS_SERVER_MODE" == "ganesha" ] || exit 0
-[ "$NFS_SERVER_MODE" = "GANESHA" ] || exit 0
+ctdb_setup_service_state_dir
+
+statd_update_trigger="$service_state_dir/update-trigger"
+# We want this file to always exist. The corner case is when
+# auto-start/stop is switched off, NFS is added as a managed service
+# some time after ctdbd is started and someone else starts the NFS
+# service for us. In this case this file might not otherwise exist
+# when we get to a monitor event.
+touch "$statd_update_trigger"
-service_name="nfs-ganesha-gpfs"
ctdb_start_stop_service
is_ctdb_managed_service || exit 0
+ctdb_service_check_reconfigure
+
case "$1" in
init)
# read statd from persistent database
;;
startup)
ctdb_service_start
- mkdir -p $CTDB_VARDIR/state/statd
- touch $CTDB_VARDIR/state/statd/update-trigger
;;
shutdown)
@@ -44,111 +68,68 @@ case "$1" in
;;
monitor)
- if ctdb_service_needs_reconfigure ; then
- ctdb_service_reconfigure
- exit 0
- fi
update_tickles 2049
-
- # check that statd responds to rpc requests
- # if statd is not running we try to restart it
- if ctdb_check_rpc "STATD" status 1 >/dev/null ; then
- (service_name="nfs_statd"; ctdb_counter_init)
- else
- p="rpc.statd" ; cmd="$p"
- cmd="${cmd}${STATD_HOSTNAME:+ -n }${STATD_HOSTNAME}"
- cmd="${cmd}${STATD_PORT:+ -p }${STATD_PORT}"
- cmd="${cmd}${STATD_OUTGOING_PORT:+ -o }${STATD_OUTGOING_PORT}"
- (
- service_name="nfs_statd"
- ctdb_counter_incr
- ctdb_check_counter_limit 10 quiet >/dev/null
- ) || {
- echo "$ctdb_check_rpc_out"
- echo "Trying to restart STATD [$cmd]"
- }
- $cmd
- fi
-
- # check that NFS responds to rpc requests
- [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" = "yes" ] || {
- if ctdb_check_rpc "NFS" nfs 3 >/dev/null ; then
- (service_name="nfs_knfsd"; ctdb_counter_init)
- else
- (
- service_name="nfs_knfsd"
- ctdb_counter_incr
-
- ctdb_check_counter_equal 10 || {
- echo "Trying to restart NFS service"
- ctdb_service_stop
- ctdb_service_start
- exit 0
- }
-
- ctdb_check_counter_limit 15 quiet >/dev/null
- ) || {
- echo "$ctdb_check_rpc_out"
- echo "Trying to restart NFS service"
- ctdb_service_stop
- ctdb_service_start
- exit 1
- }
- fi
- }
-
- # and that its directories are available
- [ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
- grep Path /etc/ganesha/gpfs.ganesha.exports.conf |
- cut -f2 -d\" | ctdb_check_directories
- } || exit $?
-
- # check that lockd responds to rpc requests
- ctdb_check_rpc "LOCKD" nlockmgr 4 || {
- echo "Trying to restart lock manager service"
- ctdb_service_stop
- ctdb_service_start
- exit 1
- }
-
- # check mounts responds to rpc requests
- ctdb_check_rpc "MOUNTD" mountd 1 >/dev/null || {
- echo "Trying to restart mountd service"
- ctdb_service_stop
- ctdb_service_start
- exit 1
- }
-
- # rquotad needs special handling since it is sometimes not started
- # correctly on RHEL5
- # this is not a critical service so we dont flag the node as unhealthy
- ctdb_check_rpc "RQUOTAD" rquotad 1 || {
- p="rpc.rquotad"
- cmd="${p}${RQUOTAD_PORT:+ -p }${RQUOTAD_PORT}"
- echo "Trying to restart RQUOTAD [${cmd}]"
- killall -q -9 $p
- $cmd &
- }
-
- # once every 60 seconds, update the statd state database for which
- # clients need notifications
- LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null`
- CURRENT_TIME=`date +"%s"`
- [ $CURRENT_TIME -ge $(($LAST_UPDATE + 60)) ] && {
- mkdir -p $CTDB_VARDIR/state/statd
- touch $CTDB_VARDIR/state/statd/update-trigger
- $CTDB_BASE/statd-callout updatelocal &
- $CTDB_BASE/statd-callout updateremote &
- }
- ;;
+ # check that statd responds to rpc requests
+ # if statd is not running we try to restart it
+ # we only do this IF we have a rpc.statd command.
+ # For platforms where rpc.statd does not exist, we skip
+ # the check completely
+ p="rpc.statd"
+ which $p >/dev/null 2>/dev/null && \
+ nfs_check_rpc_service "statd" 1 \
+ -ge 6 "verbose unhealthy" \
+ -eq 4 "verbose restart" \
+ -eq 2 "restart:bs"
+
+ PIDFILE="/var/run/ganesha.pid"
+ RUNNING=0
+ if [ -e $PIDFILE ]
+ then
+ PID=`cat $PIDFILE`
+ GANESHA="/usr/bin/gpfs.ganesha.nfsd"
+ RUNNING=`cat /proc/$PID/cmdline | grep $GANESHA | wc -l`
+ fi
+ if [ $RUNNING != 1 ]
+ then
+ echo "Trying fast restart of NFS service"
+ startstop_ganesha restart
+ fi
+
+ # check that NFS responds to rpc requests
+ if [ "$CTDB_NFS_SKIP_KNFSD_ALIVE_CHECK" != "yes" ] ; then
+ nfs_check_rpc_service "ganesha" \
+ -ge 6 "verbose unhealthy" \
+ -eq 4 "verbose restart" \
+ -eq 2 "restart:bs"
+ fi
+
+
+ # rquotad is sometimes not started correctly on RHEL5
+ # not a critical service so we dont flag the node as unhealthy
+ nfs_check_rpc_service "rquotad" 1\
+ -gt 0 "verbose restart:b"
+
+
+ # Check that directories for shares actually exist.
+ [ "$CTDB_NFS_SKIP_SHARE_CHECK" = "yes" ] || {
+ grep Path /etc/ganesha/gpfs.ganesha.exports.conf |
+ cut -f2 -d\" | ctdb_check_directories
+ } || exit $?
+
+ # once every 60 seconds, update the statd state database for which
+ # clients need notifications
+ LAST_UPDATE=`stat --printf="%Y" "$statd_update_trigger" 2>/dev/null`
+ CURRENT_TIME=`date +"%s"`
+ [ $CURRENT_TIME -ge $(($LAST_UPDATE + 60)) ] && {
+ touch "$statd_update_trigger"
+ $CTDB_BASE/statd-callout updatelocal &
+ $CTDB_BASE/statd-callout updateremote &
+ }
+ ;;
ipreallocated)
- # if the ips have been reallocated, we must restart the lockmanager
- # across all nodes and ping all statd listeners
- [ -x $CTDB_BASE/statd-callout ] && {
- $CTDB_BASE/statd-callout notify &
- } >/dev/null 2>&1
+ ctdb_service_set_reconfigure
;;
*)
ctdb_standard_event_handler "$@"
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index e8ac61f..ef2c1f7 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -27,7 +27,7 @@ service_reconfigure ()
loadconfig
-[ "$NFS_SERVER_MODE" != "GANESHA" ] || exit 0
+[ "$NFS_SERVER_MODE" != "ganesha" ] || exit 0
ctdb_setup_service_state_dir
@@ -98,7 +98,7 @@ case "$1" in
# check that lockd responds to rpc requests
nfs_check_rpc_service "lockd" \
- -ge 15 "verbose restart unhealthy" \
+ -ge 15 "verbose restart:b unhealthy" \
-eq 10 "restart:bs"
# mountd is sometimes not started correctly on RHEL5
diff --git a/config/functions b/config/functions
index 9c2898c..a07aa8e 100755
--- a/config/functions
+++ b/config/functions
@@ -228,6 +228,12 @@ nfs_check_rpc_service ()
_restart="echo 'Trying to restart NFS service'"
_restart="${_restart}; startstop_nfs restart"
;;
+ ganesha)
+ _rpc_prog=nfs
+ _version=${_v:-3}
+ _restart="echo 'Trying to restart Ganesha NFS service'"
+ _restart="${_restart}; startstop_ganesha restart"
+ ;;
mountd)
_opts="${MOUNTD_PORT:+ -p }${MOUNTD_PORT}"
;;
@@ -678,6 +684,31 @@ tickle_tcp_connections() {
}
########################################################
+# start/stop the Ganesha nfs service
+########################################################
+startstop_ganesha()
+{
+ _ganesha_fsal_list="gpfs"
+ for _fsal in $_ganesha_fsal_list ; do
+ _service_name="nfs-ganesha-${_fsal}"
+ if [ -x /etc/init.d/$_service_name ] ; then
+ break
+ fi
+ done
+ case "$1" in
+ start)
+ service "$_service_name" start
+ ;;
+ stop)
+ service "$_service_name" stop
+ ;;
+ restart)
+ service "$_service_name" restart
+ ;;
+ esac
+}
+
+########################################################
# start/stop the nfs service on different platforms
########################################################
startstop_nfs() {
diff --git a/config/statd-callout b/config/statd-callout
index 3078ef2..63fee09 100755
--- a/config/statd-callout
+++ b/config/statd-callout
@@ -138,11 +138,15 @@ case "$1" in
# will respond "strangely" immediately after restarting it, which
# causes clients to fail to reclaim the locks.
#
- startstop_nfslock stop > /dev/null 2>&1
- sleep 2
-
- # now start lockmanager again with the new state directory.
- startstop_nfslock start > /dev/null 2>&1
+ if [ "$NFS_SERVER_MODE" = "ganesha" ] ; then
+ startstop_ganesha stop >/dev/null 2>&1
+ sleep 2
+ startstop_ganesha start >/dev/null 2>&1
+ else
+ startstop_nfslock stop >/dev/null 2>&1
+ sleep 2
+ startstop_nfslock start >/dev/null 2>&1
+ fi
# we now need to send out additional statd notifications to ensure
# that clients understand that the lockmanager has restarted.
--
CTDB repository
More information about the samba-cvs
mailing list