[SCM] CTDB repository - branch status-test updated - ctdb-1.0.104-7-g364e70b

Fri Nov 13 03:31:17 MST 2009

The branch, status-test has been updated
       via  364e70b763f0ccd7714d15723ad3ea4d7e2968a1 (commit)
      from  1908bac812650ca25151051f5d86815e0b8ed319 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=status-test


- Log -----------------------------------------------------------------
commit 364e70b763f0ccd7714d15723ad3ea4d7e2968a1
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Nov 13 18:28:25 2009 +1100

    Eventscripts: Untested factorisations and introduction of status event.
    
    This is the first stage of an experimental change to eventscripts.
    Ronnie and I did a few hours of factorisation of 40.vsftpd and applied
    many of the changes to 41.httpd.  Other eventscripts were also
    modified.
    
    At this stage this is completely untested.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

-----------------------------------------------------------------------

Summary of changes:
 config/events.d/40.vsftpd |   73 ++++++++---------
 config/events.d/41.httpd  |   65 +++++++--------
 config/events.d/50.samba  |    4 +-
 config/events.d/70.iscsi  |    6 +-
 config/functions          |  200 ++++++++++++++++++++++++++++++++++++---------
 5 files changed, 229 insertions(+), 119 deletions(-)


Changeset truncated at 500 lines:

diff --git a/config/events.d/40.vsftpd b/config/events.d/40.vsftpd
index 315c150..a87b07b 100755
--- a/config/events.d/40.vsftpd
+++ b/config/events.d/40.vsftpd
@@ -1,68 +1,61 @@
 #!/bin/sh
 # event strict to manage vsftpd in a cluster environment
 
+service_name="vsftpd"
+# make sure the service is stopped first
+service_start="service vsftpd stop > /dev/null 2>&1 ; service vsftpd start"
+service_stop="service vsftpd stop"
+service_reconfigure="service $service_name restart"
+service_fail_limit=2
+service_tcp_ports=21
+
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig vsftpd
 
-[ "$CTDB_MANAGES_VSFTPD" = "yes" ] || exit 0
+loadconfig
 
-cmd="$1"
-shift
+ctdb_start_stop_service
 
-# Count the number of monitor failures.  The cluster only becomes
-# unhealthy after 2 failures.
-VSFTPD_FAILS="fail-count"
-VSFTPD_LIMIT=2
+is_ctdb_managed_service || exit 0
 
 case $cmd in 
      startup)
-	/bin/mkdir -p $CTDB_BASE/state/vsftpd
-
-	# make sure the service is stopped first
-	service vsftpd stop > /dev/null 2>&1
-	service vsftpd start
-
-	ctdb_counter_init "$VSFTPD_FAILS"
+	ctdb_service_start
 	;;
 
      shutdown)
-	service vsftpd stop
-	;;
-
-     takeip)
-	echo "restart" > $CTDB_BASE/state/vsftpd/restart
+	ctdb_service_stop
 	;;
 
-     releaseip)
-	echo "restart" > $CTDB_BASE/state/vsftpd/restart
+     takeip|releaseip)
+	ctdb_service_set_reconfigure
 	;;
 
      recovered)
 	# if we have taken or released any ips we must 
 	# restart vsftpd to ensure that all tcp connections are reset
-	[ -f $CTDB_BASE/state/vsftpd/restart ] && {
-		service vsftpd stop > /dev/null 2>&1
-		service vsftpd start
-		/bin/rm -f $CTDB_BASE/state/vsftpd/restart 2>/dev/null
-		ctdb_counter_init "$VSFTPD_FAILS"
-	} >/dev/null 2>&1
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	fi
 	;;
 
      monitor)
-	# Subshell catches the "exit 1"
-	if (ctdb_check_tcp_ports "ftp" 21) ; then
-	    ctdb_counter_init "$VSFTPD_FAILS"
-	else
-	    ctdb_counter_incr "$VSFTPD_FAILS"
-	    if ctdb_counter_limit "$VSFTPD_FAILS" $VSFTPD_LIMIT ; then
-		echo "ERROR: more than $VSFTPD_LIMIT consecutive failures, marking cluster unhealthy"
-		exit 1
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	    exit 0
+	fi
+
+	if [ -n "$service_tcp_ports" ] ; then
+	    if ctdb_check_tcp_ports $service_tcp_ports ; then
+		ctdb_counter_init
 	    else
-		echo "WARNING: less than $VSFTPD_LIMIT consecutive failures, not unhealthy yet"
+		ctdb_counter_incr
+		ctdb_check_counter_limit
+		exit 0 # only count 1 failure per monitor event
 	    fi
-		
-	fi
+	fi	
+	;;
+    status)
+	ctdb_checkstatus || exit $?
 	;;
 esac
 
diff --git a/config/events.d/41.httpd b/config/events.d/41.httpd
index d6e7f99..421f42a 100755
--- a/config/events.d/41.httpd
+++ b/config/events.d/41.httpd
@@ -2,64 +2,57 @@
 # event script to manage httpd in a cluster environment
 
 . $CTDB_BASE/functions
-loadconfig ctdb
 
 detect_init_style
 
 case $CTDB_INIT_STYLE in
-	redhat)
-		CTDB_SERVICE_HTTP="httpd"
-		CTDB_CONFIG_HTTP="http"
-		;;
-	suse)
-		CTDB_SERVICE_HTTP="apache2"
-		CTDB_CONFIG_HTTP="apache2"
-		;;
-	debian)
-		CTDB_SERVICE_HTTP="apache2"
-		CTDB_CONFIG_HTTP="apache2"
-		;;
-	*)
-		# should not happen.
-		# for now use red hat style as default
-		CTDB_SERVICE_HTTP="httpd"
-		CTDB_CONFIG_HTTP="http"
-		;;
+    redhat)
+	service_name="httpd"
+	service_config="http"
+	;;
+    suse|debian|*)
+	service_name="apache2"
+	service_config="apache2"
+	;;
 esac
 
-loadconfig "${CTDB_CONFIG_HTTP}"
-
-[ "$CTDB_MANAGES_HTTPD" = "yes" ] || exit 0
-
-cmd="$1"
-shift
-
 # RHEL5 sometimes use a SIGKILL to terminate httpd, which then leaks
 # semaphores.  This is a hack to clean them up.
 cleanup_httpd_semaphore_leak() {
-    killall -q -0 "${CTDB_SERVICE_HTTP}" ||
+    killall -q -0 "$service_name" ||
     for i in $(ipcs -s | awk '$3 == "apache" { print $2 }') ; do
 	ipcrm -s $i
     done
 }
 
+##########
+
+service_start="cleanup_httpd_semaphore_leak; service $service_name start"
+service_stop="service $service_name stop; killall -q -9 $service_name"
+service_reconfigure="service $service_name restart"
+
+loadconfig
+
+ctdb_start_stop_service
+
 case $cmd in
     startup)
-	cleanup_httpd_semaphore_leak
-	service "${CTDB_SERVICE_HTTP}" start
+	ctdb_service_start
 	;;
 
     shutdown)
-	service "${CTDB_SERVICE_HTTP}" stop
-	killall -q -9 "${CTDB_SERVICE_HTTP}"
+	ctdb_service_stop
 	;;
 
-     monitor)
-	( ctdb_check_tcp_ports "http" 80 )
-	if [ $? -ne 0 ] ; then
+    monitor)
+	if ctdb_service_needs_reconfigure ; then
+	    ctdb_service_reconfigure
+	    exit 0
+	fi
+
+	if ! ctdb_check_tcp_ports 80 ; then
 	    echo "HTTPD is not running. Trying to restart HTTPD."
-	    cleanup_httpd_semaphore_leak
-	    service "${CTDB_SERVICE_HTTP}" start
+	    ctdb_service_start
 	    exit 1
 	fi
 	;;
diff --git a/config/events.d/50.samba b/config/events.d/50.samba
index 814fb9a..bd4c5ff 100755
--- a/config/events.d/50.samba
+++ b/config/events.d/50.samba
@@ -3,6 +3,8 @@
 
 PATH=/bin:/usr/bin:$PATH
 
+service_name="samba"
+
 . $CTDB_BASE/functions
 loadconfig ctdb
 loadconfig samba
@@ -245,7 +247,7 @@ case $cmd in
 		[ -z "$smb_ports" ] && {
 			smb_ports=`testparm_cat --parameter-name="smb ports"`
 		}
-		ctdb_check_tcp_ports "Samba" $smb_ports
+		ctdb_check_tcp_ports $smb_ports || exit $?
 	}
 
 	# check winbind is OK
diff --git a/config/events.d/70.iscsi b/config/events.d/70.iscsi
index 426e412..b32494b 100755
--- a/config/events.d/70.iscsi
+++ b/config/events.d/70.iscsi
@@ -1,11 +1,9 @@
 #!/bin/sh
 # ctdb event script for TGTD based iSCSI
 
-PATH=/bin:/usr/bin:$PATH
+service_name="iscsi"
 
 . $CTDB_BASE/functions
-loadconfig ctdb
-loadconfig iscsi
 
 cmd="$1"
 shift
@@ -64,7 +62,7 @@ case $cmd in
 
      monitor)
 	[ -f $CTDB_BASE/state/iscsi/iscsi_active ] && {
-		ctdb_check_tcp_ports "iscsi" 3260
+		ctdb_check_tcp_ports 3260 || exit $?
 	}
 	;;
 esac
diff --git a/config/functions b/config/functions
index 043051b..2b2f6b3 100644
--- a/config/functions
+++ b/config/functions
@@ -1,9 +1,17 @@
 # utility functions for ctdb event scripts
 
+PATH=/bin:/usr/bin:$PATH
+
 #######################################
 # pull in a system config file, if any
 loadconfig() {
     name="$1"
+
+    if [ -n "$name" ] ; then
+	loadconfig ctdb
+	loadconfig "${service_config:-${service_name}}"
+    fi
+
     if [ -f /etc/sysconfig/$name ]; then
 	. /etc/sysconfig/$name
     elif [ -f /etc/default/$name ]; then
@@ -31,37 +39,25 @@ detect_init_style() {
 ######################################################
 # simulate /sbin/service on platforms that don't have it
 service() { 
-  service_name="$1"
-  op="$2"
+  _service_name="$1"
+  _op="$2"
 
   # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
+  test "x$_service_name" = "x" && return
 
   if [ -x /sbin/service ]; then
-      /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      /etc/rc.d/init.d/$service_name "$op"
+      /sbin/service "$_service_name" "$_op"
+  elif [ -x /etc/init.d/$_service_name ]; then
+      /etc/init.d/$_service_name "$_op"
+  elif [ -x /etc/rc.d/init.d/$_service_name ]; then
+      /etc/rc.d/init.d/$_service_name "$_op"
   fi
 }
 
 ######################################################
 # simulate /sbin/service (niced) on platforms that don't have it
 nice_service() { 
-  service_name="$1"
-  op="$2"
-
-  # do nothing, when no service was specified
-  test "x$service_name" = "x" && return
-
-  if [ -x /sbin/service ]; then
-      nice /sbin/service "$service_name" "$op"
-  elif [ -x /etc/init.d/$service_name ]; then
-      nice /etc/init.d/$service_name "$op"
-  elif [ -x /etc/rc.d/init.d/$service_name ]; then
-      nice /etc/rc.d/init.d/$service_name "$op"
-  fi
+    nice service "$@"
 }
 
 ######################################################
@@ -110,13 +106,13 @@ ctdb_wait_tcp_ports() {
 	          (netstat -a -n | egrep "0.0.0.0:$p[[:space:]]*LISTEN" > /dev/null) || all_ok=0
 	      else 
 		  echo "No tool to check tcp ports availabe. can not check in ctdb_wait_tcp_ports"
-		  return
+		  return 127
 	      fi
 	  done
 	  [ $all_ok -eq 1 ] || sleep 1
 	  ctdb status > /dev/null 2>&1 || {
   		echo "ctdb daemon has died. Exiting tcp wait $service_name"
-		exit 1
+		return 1
 	  }
   done
   echo "Local tcp services for $service_name are up"
@@ -202,10 +198,7 @@ ctdb_check_directories() {
 # usage: ctdb_check_tcp_ports SERVICE_NAME <ports...>
 ######################################################
 ctdb_check_tcp_ports() {
-  service_name="$1"
-  shift
-  wait_ports="$*"
-  [ -z "$wait_ports" ] && return;
+  [ -z "$1" ] && return;
 
   # check availability of netcat or netstat first
   NETCAT=""
@@ -224,7 +217,7 @@ ctdb_check_tcp_ports() {
       NETCAT=/bin/nc
   fi
 
-  for p in $wait_ports; do
+  for p ; do
       all_ok=1
 
       if [ "x${NETCAT}" != "x" ]; then
@@ -507,31 +500,161 @@ remove_ip() {
 # ctdb_counter_limit succeeds when count >= <limit>
 ########################################################
 _ctdb_counter_common () {
-    _tag="$1"
     _eventscript="${0##*/}" # basename
-
-    _counter_file="$CTDB_BASE/state/${_eventscript}-${_tag}"
+    _counter_file="$ctdb_fail_dir/${service_name:-${_eventscript}}"
     mkdir -p "${_counter_file%/*}" # dirname
 }
 ctdb_counter_init () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common
 
     echo -n > "$_counter_file"
 }
 ctdb_counter_incr () {
-    _ctdb_counter_common "$1"
+    _ctdb_counter_common
 
     # unary counting!
     echo -n 1 >> "$_counter_file"
 }
-ctdb_counter_limit () {
-    _ctdb_counter_common "$1"
-    _limit="$2"
+ctdb_check_counter_limit () {
+    _ctdb_counter_common
 
     # unary counting!
     _size=$(stat -c "%s" "$_counter_file" 2>/dev/null || echo 0)
-    [ $_size -ge $_limit ]
+    if [ $_size -ge $service_fail_limit ] ; then
+	echo "ERROR: more than $service_fail_limit consecutive failures, marking cluster unhealthy"
+	exit 1
+    else
+	echo "WARNING: less than $service_fail_limit consecutive failures, not unhealthy yet"
+    fi
+}
+########################################################
+
+ctdb_spool_dir="/var/spool/ctdb"
+ctdb_status_dir="$ctdb_spool_dir/status"
+ctdb_fail_dir="$ctdb_spool_dir/failcount"
+ctdb_active_dir="$ctdb_spool_dir/active"
+
+ctdb_checkstatus ()
+{
+    if [ -r "$ctdb_status_dir/$service_name/unhealthy" ] ; then
+	log_status_cat "unhealthy" "$ctdb_status_dir/$service_name/unhealthy"
+	return 1
+    elif [ -r "$ctdb_status_dir/$service_name/banned" ] ; then
+	log_status_cat "banned" "$ctdb_status_dir/$service_name/banned"
+	return 2
+    else
+	return 0
+    fi
+}
+
+ctdb_setstatus ()
+{
+    d="$ctdb_status_dir/$service_name"
+    case "$1" in
+	unhealthy|banned)
+	    mkdir -p "$d"
+	    cat "$2" >"$d/$1"
+	    ;;
+	*)
+	    for i in "banned" "unhealthy" ; do
+		rm -f "$d/$i"
+	    done
+	    ;;
+    esac
+}
+
+ctdb_service_needs_reconfigure ()
+{
+    [ -e "$ctdb_status_dir/$service_name/reconfigure" ]
 }
+
+ctdb_service_set_reconfigure ()
+{
+    d="$ctdb_status_dir/$service_name"
+    mkdir -p "$d"
+    >"$d/reconfigure"
+}
+
+ctdb_service_unset_reconfigure ()
+{
+    rm -f "$ctdb_status_dir/$service_name/reconfigure"
+}
+
+ctdb_service_reconfigure ()
+{
+    if [ -n "$service_reconfigure" ] ; then
+	$service_reconfigure
+    else
+	service "$service_name" restart
+    fi
+    ctdb_service_unset_reconfigure
+    ctdb_counter_init
+}
+
+ctdb_compat_managed_service ()
+{
+    if [ "$1" = "yes" ] ; then
+	t="$t $2 "
+    fi
+}
+
+is_ctdb_managed_service ()
+{
+    t=" $CTDB_MANAGED_SERVICES "
+
+    ctdb_compat_managed_service "$CTDB_MANAGES_VSFTPD"   "vsftpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SAMBA"    "samba"
+    ctdb_compat_managed_service "$CTDB_MANAGES_SCP"      "scp"
+    ctdb_compat_managed_service "$CTDB_MANAGES_WINDBIND" "windbind"
+    ctdb_compat_managed_service "$CTDB_MANAGES_HTTPD"    "httpd"
+    ctdb_compat_managed_service "$CTDB_MANAGES_ISCSI"    "iscsi"
+    ctdb_compat_managed_service "$CTDB_MANAGES_CLAMD"    "clamd"
+
+    # Returns 0 if "<space>$service_name<space>" appears in $t
+    [ "${t#* ${service_name} }" != "${t}" ]
+}
+
+ctdb_start_stop_service ()
+{
+    _active="$ctdb_active_dir/$service_name"
+
+    if is_ctdb_managed_service ; then
+	if ! [ -e "$_active" ] ; then
+	    echo "Starting service $service_name"
+	    ctdb_service_start || exit $?
+	    mkdir -p "$ctdb_active_dir"
+	    touch "$_active"
+	    exit 0
+	fi
+    elif ! is_ctdb_managed_service ; then
+	if [ -e "$_active" ] ; then
+	    echo "Stopping service $service_name"


-- 
CTDB repository