[SCM] CTDB repository - branch 1.0.112 updated - ctdb-1.0.111-154-gf26749e

Ronnie Sahlberg sahlberg at samba.org
Tue Mar 1 03:21:52 MST 2011


The branch, 1.0.112 has been updated
       via  f26749ea3080ccfabc8d6dfe482e8b72495c9c10 (commit)
       via  f003e9131784419f36741bad0b1029713d7f9b77 (commit)
       via  ea0953db764085011358cb0cfa36a4d91fdf0058 (commit)
       via  ccf39ce5191b3d5ebd536bee8c04a86c4f07a58e (commit)
      from  96c29a83f6c937d45785c3d14434f95c48564dd3 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.112


- Log -----------------------------------------------------------------
commit f26749ea3080ccfabc8d6dfe482e8b72495c9c10
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Mar 1 19:42:35 2011 +1100

    new version 1.0.112-40

commit f003e9131784419f36741bad0b1029713d7f9b77
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Mar 1 19:41:43 2011 +1100

    dont log an error if we waitpid but there is no child process to wait for

commit ea0953db764085011358cb0cfa36a4d91fdf0058
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Mar 1 12:09:42 2011 +1100

    If/when the recovery daemon terminates unexpectedly, try to restart it again from the main daemon instead of just shutting down the main deamon too.
    
    While it does not address the reason for recovery daemon shutting down, it reduces the impact of such issues and makes the system more robust.

commit ccf39ce5191b3d5ebd536bee8c04a86c4f07a58e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Mar 1 19:09:52 2011 +1100

    re-sync to 1.0.112-39 srpm

-----------------------------------------------------------------------

Summary of changes:
 config/events.d/11.natgw   |    4 +---
 config/events.d/60.nfs     |    1 +
 packaging/RPM/ctdb.spec.in |    7 ++++++-
 server/ctdb_daemon.c       |    2 +-
 server/ctdb_recoverd.c     |   25 +++++++++++++++----------
 5 files changed, 24 insertions(+), 15 deletions(-)


Changeset truncated at 500 lines:

diff --git a/config/events.d/11.natgw b/config/events.d/11.natgw
index ac34a24..2fc232a 100755
--- a/config/events.d/11.natgw
+++ b/config/events.d/11.natgw
@@ -77,10 +77,8 @@ case "$1" in
 		# We do this so that the ip address will exist on a
 		# non-loopback interface so that samba may send it along in the
 		# KDC requests.
+		ip addr add $CTDB_NATGW_PUBLIC_IP_HOST dev lo scope host
 		ip route add 0.0.0.0/0 via $NATGWIP metric 10
-		# Make sure winbindd does not stay bound to this address
-		# if we are no longer natgwmaster
-		smbcontrol winbindd ip-dropped $CTDB_NATGW_PUBLIC_IP >/dev/null 2>/dev/null
 	fi
 
 	# flush our route cache
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index 2c48d05..645b3b1 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -96,6 +96,7 @@ case "$1" in
 	(ctdb_check_rpc "lockd" 100021 1)
 	[ $? = "0" ] || {
 	    echo "Trying to restart lock manager service"
+	    startstop_nfs restart
 	    startstop_nfslock restart
 	    exit 1
 	}
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 0adb693..3ca8a45 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -5,7 +5,7 @@ Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
 Name: ctdb
 Version: 1.0.112
-Release: 38
+Release: 40
 Epoch: 0
 License: GNU GPL version 3
 Group: System Environment/Daemons
@@ -125,6 +125,11 @@ rm -rf $RPM_BUILD_ROOT
 %{_docdir}/ctdb/tests/bin/ctdb_transaction
 
 %changelog
+* Tue Mar 1 2011 : Version 1.0.112-40
+ - Try restarting the recovery daemon when it fails.
+* Wed Oct 27 2010 : Version 1.0.112-39
+ - When restarting lockmanager, also restart nfs itself in case nfs failed to 
+   start.
 * Wed Oct 14 2010 : Version 1.0.112-38
  - Try to restart LOCKD if "service nfslock start" fails.
 * Wed Oct 13 2010 : Version 1.0.112-37
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 1dbf3e6..3eb832e 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -695,7 +695,7 @@ static void sig_child_handler(struct event_context *ev,
 
 	while (pid != 0) {
 		pid = waitpid(-1, &status, WNOHANG);
-		if (pid == -1) {
+		if (pid == -1 && errno != ECHILD) {
 			DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
 			return;
 		}
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index fbc5eec..fe0f535 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -72,6 +72,7 @@ struct ctdb_recoverd {
 #define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
 #define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
 
+static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data);
 
 /*
   ban a node for a period of time
@@ -3455,18 +3456,12 @@ static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
 	struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
 
 	if (kill(ctdb->recoverd_pid, 0) != 0) {
-		DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid));
+		DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Trying to restart recovery daemon.\n", (int)ctdb->recoverd_pid));
 
-		ctdb_stop_recoverd(ctdb);
-		ctdb_stop_keepalive(ctdb);
-		ctdb_stop_monitoring(ctdb);
-		ctdb_release_all_ips(ctdb);
-		if (ctdb->methods != NULL) {
-			ctdb->methods->shutdown(ctdb);
-		}
-		ctdb_event_script(ctdb, CTDB_EVENT_SHUTDOWN);
+		event_add_timed(ctdb->ev, ctdb, timeval_zero(), 
+				ctdb_restart_recd, ctdb);
 
-		exit(10);	
+		return;
 	}
 
 	event_add_timed(ctdb->ev, ctdb, 
@@ -3566,3 +3561,13 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb)
 	DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
 	kill(ctdb->recoverd_pid, SIGTERM);
 }
+
+static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te, 
+		       struct timeval t, void *private_data)
+{
+	struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+
+	DEBUG(DEBUG_ERR,("Restarting recovery daemon\n"));
+	ctdb_stop_recoverd(ctdb);
+	ctdb_start_recoverd(ctdb);
+}


-- 
CTDB repository


More information about the samba-cvs mailing list