Rev 478: - added monitoring of rpc ports for nfs, and of Samba ports and directories in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Wed Jun 6 02:08:43 GMT 2007


------------------------------------------------------------
revno: 478
revision-id: tridge at samba.org-20070606020842-dbjg17e73v52anv2
parent: tridge at samba.org-20070606012706-wlzgtpwgr0hfrifu
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge
timestamp: Wed 2007-06-06 12:08:42 +1000
message:
  - added monitoring of rpc ports for nfs, and of Samba ports and directories
  - added monitoring of the ethernet link state
  
  When monitoring detects an error, the node loses its public IP address
modified:
  common/ctdb_tunables.c         ctdb_tunables.c-20070604095258-4m34d7cm1qa7yos9-1
  config/events.d/10.interface   10.interface-20070604050809-s21zslfirn07zjt8-1
  config/events.d/50.samba       samba-20070601105340-vlcvnp6euoj3zdwy-3
  config/events.d/59.nfslock     nfslock-20070601105340-vlcvnp6euoj3zdwy-2
  config/events.d/60.nfs         nfs-20070601141008-hy3h4qgbk1jd2jci-1
  config/functions               functions-20070601105405-gajwirydr5a9zd6x-1
=== modified file 'common/ctdb_tunables.c'
--- a/common/ctdb_tunables.c	2007-06-06 00:25:46 +0000
+++ b/common/ctdb_tunables.c	2007-06-06 02:08:42 +0000
@@ -36,7 +36,7 @@
 	{ "RecoverInterval",   1,  offsetof(struct ctdb_tunable, recover_interval) },
 	{ "ElectionTimeout",   3,  offsetof(struct ctdb_tunable, election_timeout) },
 	{ "TakeoverTimeout",   5,  offsetof(struct ctdb_tunable, takeover_timeout) },
-	{ "MonitorInterval",  60,  offsetof(struct ctdb_tunable, monitor_interval) },
+	{ "MonitorInterval",  15,  offsetof(struct ctdb_tunable, monitor_interval) },
 };
 
 /*

=== modified file 'config/events.d/10.interface'
--- a/config/events.d/10.interface	2007-06-04 05:09:03 +0000
+++ b/config/events.d/10.interface	2007-06-06 02:08:42 +0000
@@ -6,6 +6,9 @@
 # public interface
 
 . /etc/ctdb/functions
+loadconfig ctdb
+
+[ -z "$CTDB_PUBLIC_INTERFACE" ] && exit 0
 
 cmd="$1"
 shift
@@ -73,6 +76,15 @@
      shutdown)
 	;;
 
+     monitor)
+	[ -x /usr/sbin/ethtool ] && {
+	    /usr/sbin/ethtool $CTDB_PUBLIC_INTERFACE | grep 'Link detected: yes' > /dev/null || {
+		echo "`date` ERROR: No link on network interface $CTDB_PUBLIC_INTERFACE"
+		exit 1
+	    }
+	}
+	;;
+
 esac
 
 exit 0

=== modified file 'config/events.d/50.samba'
--- a/config/events.d/50.samba	2007-06-05 07:43:19 +0000
+++ b/config/events.d/50.samba	2007-06-06 02:08:42 +0000
@@ -50,6 +50,15 @@
 	service smb stop
 	service winbind stop
 	;;
+
+     monitor)
+	smb_dirs=`testparm -st 2> /dev/null | egrep '^\s*path = '  | cut -d= -f2`
+	ctdb_check_directories "Samba" $smb_dirs	
+
+	smb_ports=`testparm -stv 2> /dev/null | egrep '\s*smb ports =' | cut -d= -f2`
+	ctdb_check_tcp_ports "Samba" $smb_ports
+	;;
+
 esac
 
 # ignore unknown commands

=== modified file 'config/events.d/59.nfslock'
--- a/config/events.d/59.nfslock	2007-06-05 08:16:45 +0000
+++ b/config/events.d/59.nfslock	2007-06-06 02:08:42 +0000
@@ -51,6 +51,14 @@
 
 	/bin/rm -f /etc/ctdb/state/statd/restart
 	;;
+
+      monitor)
+	# check that lockd responds to rpc requests
+	ctdb_check_rpc "statd" 100024 1
+	ctdb_check_rpc "lockd" 100021 1
+	ctdb_check_directories "statd" $STATD_SHARED_DIRECTORY
+	;;
+
 esac
 
 exit 0

=== modified file 'config/events.d/60.nfs'
--- a/config/events.d/60.nfs	2007-06-05 05:18:37 +0000
+++ b/config/events.d/60.nfs	2007-06-06 02:08:42 +0000
@@ -9,6 +9,8 @@
 cmd="$1"
 shift
 
+PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
+
 case $cmd in 
      startup)
 	mkdir -p /etc/ctdb/state/nfs
@@ -45,6 +47,16 @@
 	/bin/rm -f /etc/ctdb/state/nfs/restart
 	;;
 
+      monitor)
+	# check that NFS responds to rpc requests
+	ctdb_check_rpc "NFS" 100003 3
+	ctdb_check_rpc "mount" 100005 1
+
+	# and that its directories are available
+	nfs_dirs=`grep -v '^#' < /etc/exports | cut -d' ' -f1`
+	ctdb_check_directories "nfs" $nfs_dirs
+	;;
+
 esac
 
 exit 0

=== modified file 'config/functions'
--- a/config/functions	2007-06-03 12:07:07 +0000
+++ b/config/functions	2007-06-06 02:08:42 +0000
@@ -28,7 +28,7 @@
 
 ######################################################
 # wait for a set of tcp ports
-# usage: ctdb_wait_tcp_ports SERICE_NAME <ports...>
+# usage: ctdb_wait_tcp_ports SERVICE_NAME <ports...>
 ######################################################
 ctdb_wait_tcp_ports() {
   service_name="$1"
@@ -59,9 +59,10 @@
 }
 
 
+
 ######################################################
 # wait for a set of directories
-# usage: ctdb_wait_directories SERICE_NAME <directories...>
+# usage: ctdb_wait_directories SERVICE_NAME <directories...>
 ######################################################
 ctdb_wait_directories() {
   service_name="$1"
@@ -84,3 +85,58 @@
   echo "`/bin/date` Local directories for $service_name are available"
 }
 
+
+######################################################
+# check that a rpc server is registered with portmap
+# and responding to requests
+# usage: ctdb_check_rpc SERVICE_NAME PROGNUM VERSION
+######################################################
+ctdb_check_rpc() {
+    service_name="$1"
+    prognum="$2"
+    version="$3"
+    rpcinfo -u localhost $prognum $version > /dev/null || {
+	    echo "`date` ERROR: $service_name not responding to rpc requests"
+	    exit 1
+    }
+}
+
+######################################################
+# check a set of directories is available
+# usage: ctdb_check_directories SERVICE_NAME <directories...>
+######################################################
+ctdb_check_directories() {
+  service_name="$1"
+  shift
+  wait_dirs="$*"
+  [ -z "$wait_dirs" ] && return;
+  for d in $wait_dirs; do
+      [ -d $d ] || {
+	  echo "`date` ERROR: $service_name directory $d not available"
+	  exit 1
+      }
+  done
+}
+
+######################################################
+# check a set of tcp ports
+# usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
+######################################################
+ctdb_check_tcp_ports() {
+  service_name="$1"
+  shift
+  wait_ports="$*"
+  [ -z "$wait_ports" ] && return;
+  for p in $wait_ports; do
+      all_ok=1
+      if [ -x /usr/bin/netcat ]; then
+          /usr/bin/netcat -z 127.0.0.1 $p || all_ok=0
+      elif [ -x /usr/bin/nc ]; then
+          /usr/bin/nc -z 127.0.0.1 $p || all_ok=0
+      fi
+      [ $all_ok -eq 1 ] || {
+	  echo "`date` ERROR: $service_name tcp port $p is not responding"
+	  exit 1
+      }
+  done
+}



More information about the samba-cvs mailing list