[SCM] CTDB repository - branch 1.0.112 updated - ctdb-1.0.111-44-gf8b3238

Mon Feb 22 22:41:15 MST 2010

The branch, 1.0.112 has been updated
       via  f8b32385257c9526999a229e84020677deb79eaf (commit)
       via  b00e33d98c75297c34560e765ce5604ab4d402ec (commit)
       via  e274b4fde6084031646f0e5541ca5a2bdf9217d0 (commit)
      from  9cb1a3393efc479494de11839cfd3b080b7c8914 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.112


- Log -----------------------------------------------------------------
commit f8b32385257c9526999a229e84020677deb79eaf
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Feb 23 16:19:51 2010 +1100

    new version 1.0.112-10
    
    * Tue Feb 23 2010 : Version 1.0.112-10
     - revert the change in 10.0.0.112-9 and make a new attempt to make the scripts\
     behave.
     - make writing the ticklelist in 61.nfstickle a background task to avoid
       having a long cluster fs pause cause a node to become unhealthy
     - critical bugfix. during an error path in the "end recovery" code
       we could release a memory block before we had finished referencing it
       which could lead to a segv.   bz 61068
     - make sure we tear down the natgw configuration when a node become stopped
       or else we might end up with a duplicate ip address when a different node
       takes over the natgw role.   bz 61036

commit b00e33d98c75297c34560e765ce5604ab4d402ec
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Feb 23 16:09:09 2010 +1100

    store the nfs tickles for 61.nfstickle in a background shell
    instead of blocking while it finishes.
    
    this avoids having the eventscript hang/timeout if the underlying cluster filesystem hangs and blocks for 30+ seconds.

commit e274b4fde6084031646f0e5541ca5a2bdf9217d0
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Feb 23 16:07:17 2010 +1100

    Revert "Ignore any scripts that timesout for most events, except startup."
    
    This reverts commit 527597ed6d9142c0b47a9c419c828793826ac95e.

-----------------------------------------------------------------------

Summary of changes:
 config/events.d/61.nfstickle |   23 ++++++++++++++---------
 packaging/RPM/ctdb.spec.in   |   12 +++++++++++-
 server/eventscript.c         |   16 +---------------
 3 files changed, 26 insertions(+), 25 deletions(-)


Changeset truncated at 500 lines:

diff --git a/config/events.d/61.nfstickle b/config/events.d/61.nfstickle
index 14794fe..c91d393 100755
--- a/config/events.d/61.nfstickle
+++ b/config/events.d/61.nfstickle
@@ -15,6 +15,19 @@ ctdb_start_stop_service
 
 [ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0
 
+store_tickles()
+{
+	mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
+	rm -f $mydir/*
+	# record our connections to shared storage
+	netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' |
+		awk '{print $4" "$5}' | 
+		while read dest src; do
+			ip=${dest%:*}
+			echo $src >> $mydir/$ip
+		done
+}
+
 case "$1" in 
      startup)
 	ctdb_service_start
@@ -39,15 +52,7 @@ case "$1" in
 	;;
 
      monitor)
-	mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname`
-	rm -f $mydir/*
-	# record our connections to shared storage
-	netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' |
-		awk '{print $4" "$5}' | 
-		while read dest src; do
-			ip=${dest%:*}
-			echo $src >> $mydir/$ip
-		done
+	store_tickles &
 	;;
 
     *)
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index de15224..9a2f10b 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -5,7 +5,7 @@ Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
 Name: ctdb
 Version: 1.0.112
-Release: 9
+Release: 10
 Epoch: 0
 License: GNU GPL version 3
 Group: System Environment/Daemons
@@ -123,6 +123,16 @@ rm -rf $RPM_BUILD_ROOT
 %{_docdir}/ctdb/tests/bin/ctdb_transaction
 
 %changelog
+* Tue Feb 23 2010 : Version 1.0.112-10
+ - revert the change in 10.0.0.112-9 and make a new attempt to make the scripts behave.
+ - make writing the ticklelist in 61.nfstickle a background task to avoid
+   having a long cluster fs pause cause a node to become unhealthy
+ - critical bugfix. during an error path in the "end recovery" code
+   we could release a memory block before we had finished referencing it
+   which could lead to a segv.   bz 61068
+ - make sure we tear down the natgw configuration when a node become stopped
+   or else we might end up with a duplicate ip address when a different node
+   takes over the natgw role.   bz 61036
 * Tue Feb 16 2010 : Version 1.0.112-9
  - treat all scripts that hang (except for startup) as success even if they hung
 * Mon Feb 15 2010 : Version 1.0.112-8
diff --git a/server/eventscript.c b/server/eventscript.c
index 8cac635..e0908e1 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -516,21 +516,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
 	DEBUG(DEBUG_ERR,("Event script timed out : %s %s %s count : %u  pid : %d\n",
 			 current->name, ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child));
 
-	/* ignore timeouts for these events */
-	switch (state->call) {
-	case CTDB_EVENT_START_RECOVERY:
-	case CTDB_EVENT_RECOVERED:
-	case CTDB_EVENT_TAKE_IP:
-	case CTDB_EVENT_RELEASE_IP:
-	case CTDB_EVENT_STOPPED:
-	case CTDB_EVENT_MONITOR:
-	case CTDB_EVENT_STATUS:
-		state->scripts->scripts[state->current].status = 0;
-		DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call));
-		break;
-        default:
-		state->scripts->scripts[state->current].status = -ETIME;
-	}
+	state->scripts->scripts[state->current].status = -ETIME;
 
 	if (kill(state->child, 0) != 0) {
 		DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno));


-- 
CTDB repository