[SCM] CTDB repository - branch master updated - ctdb-1.0.114-295-g8909d3a

Mon Aug 30 02:24:44 MDT 2010

The branch, master has been updated
       via  8909d3a10362a8e58ffd71bc4cd035c12c584157 (commit)
       via  cc8c851e2e0b46f00b18a6dc61fd2774e97850dd (commit)
       via  51561720d2b4db5b307da3d410661075e2a6c3ca (commit)
       via  48cd8325c070f6942aa13a25269021e4c8ed188f (commit)
       via  c32ffd203e42a39010ce2d6e98253e8e48de515a (commit)
       via  c35d3e6341bc4e288393efa429b68bf6568b9b11 (commit)
       via  4763ccbfeaedd0fd953dbeda17ef9af41386688b (commit)
       via  dca4c4ebf3c35f8db3ae208efb7a83abbf726ed6 (commit)
       via  08b636b500855e38e708e6963d8e63ded97c25ec (commit)
       via  090d9c8443cfa13d45f8c5d2845aea5aa9f7251d (commit)
       via  718ddc2264c28185fcddbc9cb0c7137d198a43a7 (commit)
       via  6a33a7715067175869ea2f3f15b64c3371079a6b (commit)
       via  4cdf3b9adc7edfd80a2901ef8457ae67aab0829a (commit)
       via  c0a916c40c623c0aa8245526283a064dbeea4b57 (commit)
       via  d4189c7c3fceaa833f9f0446a2b06af6fed714ec (commit)
       via  8222fef1e61836b9bfd406205f9ffb9396aa7480 (commit)
       via  a88ee78686bd5aa2b789f5959e0562315a13525d (commit)
       via  5bb6b28ab7b45b7242d100ae8f1483d02e1d0d1d (commit)
      from  230bec8d375b778b20ff3cb7f9864c26323997f3 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 8909d3a10362a8e58ffd71bc4cd035c12c584157
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Aug 30 18:22:28 2010 +1000

    we no longer have a 61.nfstickle script

commit cc8c851e2e0b46f00b18a6dc61fd2774e97850dd
Merge: 230bec8d375b778b20ff3cb7f9864c26323997f3 51561720d2b4db5b307da3d410661075e2a6c3ca
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Aug 30 18:22:05 2010 +1000

    Merge commit 'martins/master'

-----------------------------------------------------------------------

Summary of changes:
 config/events.d/60.nfs                 |    3 ++
 config/events.d/61.nfstickle           |   62 --------------------------------
 config/functions                       |   55 ++++++++++++++++++++++++++++
 packaging/RPM/ctdb.spec.in             |    1 -
 tests/complex/31_nfs_tickle.sh         |   38 +++++++++++++-------
 tests/scripts/ctdb_test_functions.bash |   45 +++++++++++++----------
 tests/simple/16_ctdb_config_add_ip.sh  |    2 +-
 tests/simple/20_ctdb_getmonmode.sh     |   13 ++-----
 8 files changed, 112 insertions(+), 107 deletions(-)
 delete mode 100755 config/events.d/61.nfstickle


Changeset truncated at 500 lines:

diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index 15c2b89..6acc889 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -6,6 +6,7 @@ start_nfs() {
 	/bin/mkdir -p $CTDB_BASE/state/statd/ip
 	startstop_nfs stop
 	startstop_nfs start
+	echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
 }
 
 . $CTDB_BASE/functions
@@ -47,6 +48,8 @@ case "$1" in
 	    exit 0
 	fi
 
+	update_tickles 2049
+
 	# check that statd responds to rpc requests
 	# if statd is not running we try to restart it
 	rpcinfo -u localhost 100024 1 > /dev/null || {
diff --git a/config/events.d/61.nfstickle b/config/events.d/61.nfstickle
deleted file mode 100755
index 3bfef4f..0000000
--- a/config/events.d/61.nfstickle
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/sh
-# ctdb event script for NFS tickle acks
-
-. $CTDB_BASE/functions
-
-service_name="nfs"
-
-loadconfig
-
-service_start="mkdir -p $CTDB_BASE/state/nfstickle;mkdir -p $NFS_TICKLE_SHARED_DIRECTORY/`hostname`;echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle"
-service_reconfigure=$service_start
-
-ctdb_start_stop_service
-
-[ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0
-
-case "$1" in 
-     startup)
-	ctdb_service_start
-	;;
-	
-     takeip)
-	iface=$2
-	ip=$3
-	# first send a grat arp, to ensure the client knows the updated
-	# mac address for this IP
-	ctdb gratiousarp $ip $iface
-	# send tickle acks for all the connections the old server had
-	for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
-		[ -f $f ] && cat $f | while read dest; do
-			# send three, in case of lost packets
-			echo "Sending NFS tickle ack for $ip to $dest"
-			for i in `seq 1 3`; do
-				ctdb tickle $dest $ip:2049
-			done
-		done
-	done
-	;;
-
-     monitor)
-	mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
-	rm -f $mydir/*
-	# record our connections to shared storage
-	netstat -tn |
-	awk -v mydir="$mydir" '
-$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ /:2049$/ {
-  destip = gensub(/:2049$/, "", 1, $4);
-  c[destip] = c[destip] (c[destip] ? "\n" : "" ) $5;
-}
-END {
-  for (ip in c) {
-    print c[ip] > mydir "/" ip
-  }
-}'
-	;;
-
-    *)
-	ctdb_standard_event_handler "$@"
-	;;
-esac
-
-exit 0
diff --git a/config/functions b/config/functions
index 35b7db2..a20cadf 100755
--- a/config/functions
+++ b/config/functions
@@ -775,6 +775,61 @@ iptables()
 }
 
 ########################################################
+# tickle handling
+########################################################
+
+# Temporary directory for tickles.
+tickledir="$CTDB_BASE/state/tickles"
+mkdir -p "$tickledir"
+
+update_tickles ()
+{
+	_port="$1"
+
+	mkdir -p "$tickledir" # Just in case
+
+	# Who am I?
+	_pnn=$(ctdb pnn) ; _pnn=${_pnn#PNN:}
+
+	# What public IPs do I hold?
+	_ips=$(ctdb -Y ip | awk -F: -v pnn=$_pnn '$3 == pnn {print $2}')
+
+	# IPs as a regexp choice
+	_ipschoice="($(echo $_ips | sed -e 's/ /|/g' -e 's/\./\\\\./g'))"
+
+	# Record connections to our public IPs in a temporary file
+	_my_connections="${tickledir}/${_port}.connections"
+	rm -f "$_my_connections"
+	netstat -tn |
+	awk -v destpat="^${_ipschoice}:${_port}\$" \
+	  '$1 == "tcp" && $6 == "ESTABLISHED" && $4 ~ destpat {print $5, $4}' |
+	sort >"$_my_connections"
+
+	# Record our current tickles in a temporary file
+	_my_tickles="${tickledir}/${_port}.tickles"
+	rm -f "$_my_tickles"
+	for _i in $_ips ; do
+		ctdb -Y gettickles $_i $_port | 
+		awk -F: 'NR > 1 { printf "%s:%s %s:%s\n", $2, $3, $4, $5 }'
+	done |
+	sort >"$_my_tickles"
+
+	# Add tickles for connections that we haven't already got tickles for
+	comm -23 "$_my_connections" "$_my_tickles" |
+	while read _src _dst ; do
+		ctdb addtickle $_src $_dst
+	done
+
+	# Remove tickles for connections that are no longer there
+	comm -13 "$_my_connections" "$_my_tickles" |
+	while read _src _dst ; do
+		ctdb deltickle $_src $_dst
+	done
+
+	rm -f "$_my_connections" "$_my_tickles" 
+}
+
+########################################################
 # load a site local config file
 ########################################################
 
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index d14db30..df175ae 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -107,7 +107,6 @@ rm -rf $RPM_BUILD_ROOT
 %{_sysconfdir}/ctdb/events.d/41.httpd
 %{_sysconfdir}/ctdb/events.d/50.samba
 %{_sysconfdir}/ctdb/events.d/60.nfs
-%{_sysconfdir}/ctdb/events.d/61.nfstickle
 %{_sysconfdir}/ctdb/events.d/62.cnfs
 %{_sysconfdir}/ctdb/events.d/70.iscsi
 %{_sysconfdir}/ctdb/events.d/91.lvs
diff --git a/tests/complex/31_nfs_tickle.sh b/tests/complex/31_nfs_tickle.sh
index eb6644b..030e34f 100755
--- a/tests/complex/31_nfs_tickle.sh
+++ b/tests/complex/31_nfs_tickle.sh
@@ -57,7 +57,7 @@ ctdb_test_exit_hook_add ctdb_test_eventscript_uninstall
 ctdb_test_eventscript_install
 
 # We need this for later, so we know how long to sleep.
-try_command_on_node 0 $CTDB getvar MonitorInterval
+try_command_on_node any $CTDB getvar MonitorInterval
 monitor_interval="${out#*= }"
 #echo "Monitor interval on node $test_node is $monitor_interval seconds."
 
@@ -77,28 +77,40 @@ echo "Source socket is $src_socket"
 
 wait_for_monitor_event $test_node
 
-echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..."
-f="/etc/sysconfig/nfs"
-try_command_on_node -v 0 "[ -r $f ] &&  sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true
+echo "Sleeping until tickles are synchronised across nodes..."
+try_command_on_node $test_node $CTDB getvar TickleUpdateInterval
+sleep_for "${out#*= }"
 
-nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}"
+if try_command_on_node any "test -r /etc/ctdb/events.d/61.nfstickle" ; then
+    echo "Trying to determine NFS_TICKLE_SHARED_DIRECTORY..."
+    f="/etc/sysconfig/nfs"
+    try_command_on_node -v any "[ -r $f ] &&  sed -n -e s@^NFS_TICKLE_SHARED_DIRECTORY=@@p $f" || true
 
-try_command_on_node $test_node hostname
-test_hostname=$out
+    nfs_tickle_shared_directory="${out:-/gpfs/.ctdb/nfs-tickles}"
 
-try_command_on_node -v 0 cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip"
+    try_command_on_node $test_node hostname
+    test_hostname=$out
+
+    try_command_on_node -v any cat "${nfs_tickle_shared_directory}/$test_hostname/$test_ip"
+else
+    echo "That's OK, we'll use \"ctdb gettickles\", which is newer..."
+    try_command_on_node -v any "ctdb -Y gettickles $test_ip $test_port"
+fi
 
 if [ "${out/${src_socket}/}" != "$out" ] ; then
-    echo "GOOD: NFS connection tracked OK in tickles file."
+    echo "GOOD: NFS connection tracked OK."
 else
-    echo "BAD: Socket not tracked in NFS tickles file:"
+    echo "BAD: Socket not tracked in NFS tickles."
     testfailures=1
 fi
 
 tcptickle_sniff_start $src_socket "${test_ip}:${test_port}"
 
-echo "Disabling node $test_node"
-try_command_on_node 1 $CTDB disable -n $test_node
-wait_until_node_has_status $test_node disabled
+# We need to be nasty to make that the node being failed out doesn't
+# get a chance to send any tickles and confuse our sniff.
+echo "Killing ctdbd on ${test_node}..."
+try_command_on_node $test_node killall -9 ctdbd
+
+wait_until_node_has_status $test_node disconnected
 
 tcptickle_sniff_wait_show
diff --git a/tests/scripts/ctdb_test_functions.bash b/tests/scripts/ctdb_test_functions.bash
index 42053c0..1433a46 100644
--- a/tests/scripts/ctdb_test_functions.bash
+++ b/tests/scripts/ctdb_test_functions.bash
@@ -66,7 +66,7 @@ ctdb_test_exit ()
     # now complete.
     set +e
 
-    echo "*** TEST COMPLETE (RC=$status), CLEANING UP..."
+    echo "*** TEST COMPLETED (RC=$status) AT $(date '+%F %T'), CLEANING UP..."
 
     eval "$ctdb_test_exit_hook" || true
     unset ctdb_test_exit_hook
@@ -80,7 +80,7 @@ ctdb_test_exit ()
 	# leave the recovery in restart_ctdb so that future tests that
 	# might do a manual restart mid-test will benefit.
 	echo "Forcing a recovery..."
-	onnode 0 ctdb recover
+	onnode 0 $CTDB recover
     fi
 
     exit $status
@@ -336,13 +336,15 @@ _cluster_is_healthy ()
 {
     local out x count line
 
-    out=$(ctdb -Y status 2>&1) || return 1
+    out=$($CTDB -Y status 2>/dev/null) || return 1
 
     {
         read x
 	count=0
         while read line ; do
-	    count=$(($count + 1))
+	    # We need to see valid lines if we're going to be healthy.
+	    [ "${line#:[0-9]}" != "$line" ] && count=$(($count + 1))
+	    # A line indicating a node is unhealthy causes failure.
 	    [ "${line##:*:*:*1:}" != "$line" ] && return 1
         done
 	[ $count -gt 0 ] && return $?
@@ -357,9 +359,9 @@ cluster_is_healthy ()
     else
 	echo "Cluster is UNHEALTHY"
 	if ! ${ctdb_test_restart_scheduled:-false} ; then
-	    echo "DEBUG:"
+	    echo "DEBUG AT $(date '+%F %T'):"
 	    local i
-	    for i in "onnode -q 0 ctdb status" "onnode -q 0 onnode all ctdb scriptstatus" ; do
+	    for i in "onnode -q 0 $CTDB status" "onnode -q 0 onnode all $CTDB scriptstatus" ; do
 		echo "$i"
 		$i || true
 	    done
@@ -407,7 +409,7 @@ node_has_status ()
     if [ -n "$bits" ] ; then
 	local out x line
 
-	out=$(ctdb -Y status 2>&1) || return 1
+	out=$($CTDB -Y status 2>&1) || return 1
 
 	{
             read x
@@ -420,9 +422,9 @@ node_has_status ()
 	    return 1
 	} <<<"$out" # Yay bash!
     elif [ -n "$fpat" ] ; then
-	ctdb statistics -n "$pnn" | egrep -q "$fpat"
+	$CTDB statistics -n "$pnn" | egrep -q "$fpat"
     elif [ -n "$mpat" ] ; then
-	ctdb getmonmode -n "$pnn" | egrep -q "$mpat"
+	$CTDB getmonmode -n "$pnn" | egrep -q "$mpat"
     else
 	echo 'node_has_status: unknown mode, neither $bits nor $fpat is set'
 	return 1
@@ -437,8 +439,8 @@ wait_until_node_has_status ()
 
     echo "Waiting until node $pnn has status \"$status\"..."
 
-    if ! onnode any $CTDB_TEST_WRAPPER wait_until $timeout node_has_status "$pnn" "$status" ; then
-	for i in "onnode -q any ctdb status" "onnode -q any onnode all ctdb scriptstatus" ; do
+    if ! wait_until $timeout onnode any $CTDB_TEST_WRAPPER node_has_status "$pnn" "$status" ; then
+	for i in "onnode -q any $CTDB status" "onnode -q any onnode all $CTDB scriptstatus" ; do
 	    echo "$i"
 	    $i || true
 	done
@@ -579,9 +581,9 @@ tcpdump_wait ()
 
     echo "Waiting for tcpdump to capture some packets..."
     if ! wait_until 30 tcpdump_check ; then
-	echo "DEBUG:"
+	echo "DEBUG AT $(date '+%F %T'):"
 	local i
-	for i in "onnode -q 0 ctdb status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do
+	for i in "onnode -q 0 $CTDB status" "netstat -tanp" "tcpdump -n -e -r $tcpdump_filename" ; do
 	    echo "$i"
 	    $i || true
 	done
@@ -638,7 +640,7 @@ gratarp_sniff_wait_show ()
 daemons_stop ()
 {
     echo "Attempting to politely shutdown daemons..."
-    onnode 1 ctdb shutdown -n all || true
+    onnode 1 $CTDB shutdown -n all || true
 
     echo "Sleeping for a while..."
     sleep_for 1
@@ -794,16 +796,16 @@ _ctdb_start_post ()
     onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
 
     echo "Setting RerecoveryTimeout to 1"
-    onnode -pq all "ctdb setvar RerecoveryTimeout 1"
+    onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
 
     # In recent versions of CTDB, forcing a recovery like this blocks
     # until the recovery is complete.  Hopefully this will help the
     # cluster to stabilise before a subsequent test.
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
     sleep_for 1
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
 
     echo "ctdb is ready"
 }
@@ -855,16 +857,16 @@ restart_ctdb ()
     onnode -q 1  $CTDB_TEST_WRAPPER wait_until_healthy || return 1
 
     echo "Setting RerecoveryTimeout to 1"
-    onnode -pq all "ctdb setvar RerecoveryTimeout 1"
+    onnode -pq all "$CTDB setvar RerecoveryTimeout 1"
 
     # In recent versions of CTDB, forcing a recovery like this blocks
     # until the recovery is complete.  Hopefully this will help the
     # cluster to stabilise before a subsequent test.
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
     sleep_for 1
     echo "Forcing a recovery..."
-    onnode -q 0 ctdb recover
+    onnode -q 0 $CTDB recover
 
     echo "ctdb is ready"
 }
@@ -1036,3 +1038,6 @@ wait_for_monitor_event ()
     wait_until 120 ! ctdb_test_eventscript_flag exists $pnn "monitor"
 
 }
+
+# Make sure that $CTDB is set.
+: ${CTDB:=ctdb}
diff --git a/tests/simple/16_ctdb_config_add_ip.sh b/tests/simple/16_ctdb_config_add_ip.sh
index 6fee386..3207661 100755
--- a/tests/simple/16_ctdb_config_add_ip.sh
+++ b/tests/simple/16_ctdb_config_add_ip.sh
@@ -115,6 +115,6 @@ if wait_until 60 ips_are_on_nodeglob $test_node ${add_ip%/*} ; then
     echo "That worked!"
 else
     echo "BAD: IP didn't get added."
-    try_command_on_node $test_node ctdb ip -n all
+    try_command_on_node $test_node $CTDB ip -n all
     exit 1
 fi
diff --git a/tests/simple/20_ctdb_getmonmode.sh b/tests/simple/20_ctdb_getmonmode.sh
index eab3dad..56a38d8 100755
--- a/tests/simple/20_ctdb_getmonmode.sh
+++ b/tests/simple/20_ctdb_getmonmode.sh
@@ -18,7 +18,7 @@ Steps:
 
 1. Verify that the status on all of the ctdb nodes is 'OK'.
 2. Use 'ctdb getmodmode -n <node>' to get the current monitoring mode.
-3. Verify that it shows monitoring as 'active'.
+3. Verify that it looks sane.
 4. Verify that the command prints the output in colon-separated format
    when run with the '-Y' option.
 5. Disable monitoring on the node using 'ctdb disablemonitor'.
@@ -47,19 +47,12 @@ try_command_on_node -v 0 $CTDB getmonmode -n $test_node
 
 sanity_check_output \
     1 \
-    '^Monitoring mode:ACTIVE \(0\)$' \
+    '^Monitoring mode:(ACTIVE \(0\)|DISABLED \(1\))$' \
     "$out"
 
-colons=$(printf ':mode:\n:0:')
-
 try_command_on_node -v 0 $CTDB -Y getmonmode -n $test_node
 
-if [ "$out" = "$colons" ] ; then
-    echo "Looks OK"
-else
-    echo "BAD: -Y output isn't what was expected"
-    testfailures=1
-fi
+sanity_check_output 2 '^(:mode:|:0:|:1:)$' "$out"
 
 try_command_on_node -v 0 $CTDB disablemonitor -n $test_node
 


-- 
CTDB repository