[SCM] CTDB repository - branch 2.5 updated - ctdb-2.5.5-35-gb0ac45f

Amitay Isaacs amitay at samba.org
Wed Aug 5 01:18:46 UTC 2015


The branch, 2.5 has been updated
       via  b0ac45fcb7097d9db965a7c3858de872e16387b6 (commit)
       via  714b2189a91f8ced21c2dc1dd8e792a90e769fa0 (commit)
       via  abcf24832161d841ee9840efc40d128e1f9edd3d (commit)
       via  0656c1969693b69a826ee184bb1a8b0e8c7ea9fc (commit)
       via  af7c066630ebaf1398b6970acccb28d3a4bd42e7 (commit)
      from  ed3c1f234ea76d32c35361f5c92a9dc2ead86121 (commit)

https://git.samba.org/?p=ctdb.git;a=shortlog;h=2.5


- Log -----------------------------------------------------------------
commit b0ac45fcb7097d9db965a7c3858de872e16387b6
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Jul 24 15:32:42 2015 +1000

    daemon: Check if updates are in flight when releasing all IPs
    
    Some code involved in releasing IPs is not re-entrant.  Memory
    corruption can occur if, for example, overlapping attempts are made to
    ban a node.  We haven't been able to recreate the corruption but this
    should protect against it.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Reviewed-by: Amitay Isaacs <amitay at gmail.com>
    
    (Imported from commit 952a50485f68b3cffdf57da84aa9bb9fde630b7e)

commit 714b2189a91f8ced21c2dc1dd8e792a90e769fa0
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Mon Jul 27 16:51:08 2015 +1000

    banning: If node is already banned, do not run ctdb_local_node_got_banned()
    
    This calls release_all_ips() only once on the first ban.  If the node gets
    banned again due to event script timeout while running release_all_ips(),
    then avoid calling release_all_ips() in re-entrant fashion.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    (Imported from commit 8eb04d09b119e234c88150e1dc35fc5057f9c926)

commit abcf24832161d841ee9840efc40d128e1f9edd3d
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Fri Jul 24 07:39:26 2015 +1000

    client: Return the correct status sent from the daemon
    
    If a control fails and error message is set, the returned status of the
    control is always set to -1 ignoring the status passed by the daemon.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    (Imported from commit 1286b02e24a521dafa7061d09fb5c21d1ebb3011)

commit 0656c1969693b69a826ee184bb1a8b0e8c7ea9fc
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Tue Jul 21 16:37:04 2015 +1000

    daemon: Correctly process the exit code from failed eventscripts
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    Autobuild-User(master): Martin Schwenke <martins at samba.org>
    Autobuild-Date(master): Wed Jul 22 15:03:53 CEST 2015 on sn-devel-104
    
    (Imported from commit 00ec3c477eba50206801b451ae4eb64c12aba5db)

commit af7c066630ebaf1398b6970acccb28d3a4bd42e7
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Mon Jul 20 16:37:58 2015 +1000

    tool: Correctly print timed out event scripts output
    
    The timed out error is ignored for certain events (start_recovery,
    recoverd, takeip, releaseip).  If these events time out, then the debug
    hung script outputs the following:
    
     3 scripts were executed last releaseip cycle
     00.ctdb              Status:OK    Duration:4.381 Thu Jul 16 23:45:24 2015
     01.reclock           Status:OK    Duration:13.422 Thu Jul 16 23:45:28 2015
     10.external          Status:DISABLED
     10.interface         Status:OK    Duration:-1437083142.208 Thu Jul 16 23:45:42 2015
    
    The endtime for timed out scripts is not set.  Since the status is not
    returned as -ETIME for some events, ctdb scriptstatus prints -ve duration.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    (Imported from commit 71b89b2b7a9768de437347e6678370b2682da892)

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c       |  2 +-
 server/ctdb_banning.c      |  7 ++++++-
 server/ctdb_event_helper.c |  6 +++++-
 server/ctdb_takeover.c     | 18 +++++++++++++++---
 tools/ctdb.c               |  8 ++++++++
 5 files changed, 35 insertions(+), 6 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 1c33e66..32c9357 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -1138,7 +1138,7 @@ int ctdb_control_recv(struct ctdb_context *ctdb,
 			state->async.fn(state);
 		}
 		talloc_free(tmp_ctx);
-		return -1;
+		return (status == 0 ? -1 : state->status);
 	}
 
 	if (outdata) {
diff --git a/server/ctdb_banning.c b/server/ctdb_banning.c
index a9d1891..d8f7ab1 100644
--- a/server/ctdb_banning.c
+++ b/server/ctdb_banning.c
@@ -80,6 +80,7 @@ void ctdb_local_node_got_banned(struct ctdb_context *ctdb)
 int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 {
 	struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)indata.dptr;
+	bool already_banned;
 
 	DEBUG(DEBUG_INFO,("SET BAN STATE\n"));
 
@@ -107,9 +108,11 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 		return 0;
 	}
 
+	already_banned = false;
 	if (ctdb->banning_ctx != NULL) {
 		talloc_free(ctdb->banning_ctx);
 		ctdb->banning_ctx = NULL;
+		already_banned = true;
 	}
 
 	if (bantime->time == 0) {
@@ -136,7 +139,9 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata)
 
 	event_add_timed(ctdb->ev, ctdb->banning_ctx, timeval_current_ofs(bantime->time,0), ctdb_ban_node_event, ctdb);
 
-	ctdb_local_node_got_banned(ctdb);
+	if (!already_banned) {
+		ctdb_local_node_got_banned(ctdb);
+	}
 	return 0;
 }
 
diff --git a/server/ctdb_event_helper.c b/server/ctdb_event_helper.c
index f14e336..a1b5318 100644
--- a/server/ctdb_event_helper.c
+++ b/server/ctdb_event_helper.c
@@ -128,7 +128,11 @@ int main(int argc, char *argv[])
 		exit(1);
 	}
 	if (WIFEXITED(status)) {
-		output = -WEXITSTATUS(status);
+		output = WEXITSTATUS(status);
+		/* Only errors should be returned as -ve values */
+		if (output == ENOENT || output == ENOEXEC) {
+			output = -output;
+		}
 		sys_write(write_fd, &output, sizeof(output));
 		exit(0);
 	}
diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c
index 29d54f2..095ae41 100644
--- a/server/ctdb_takeover.c
+++ b/server/ctdb_takeover.c
@@ -3246,9 +3246,6 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
 }
 
 
-/*
-  release all IPs on shutdown
- */
 void ctdb_release_all_ips(struct ctdb_context *ctdb)
 {
 	struct ctdb_vnn *vnn;
@@ -3263,6 +3260,20 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
 			continue;
 		}
 
+		/* Don't allow multiple releases at once.  Some code,
+		 * particularly ctdb_tickle_sentenced_connections() is
+		 * not re-entrant */
+		if (vnn->update_in_flight) {
+			DEBUG(DEBUG_WARNING,
+			      (__location__
+			       " Not releasing IP %s/%u on interface %s, an update is already in progess\n",
+				    ctdb_addr_to_str(&vnn->public_address),
+				    vnn->public_netmask_bits,
+				    ctdb_vnn_iface_string(vnn)));
+			continue;
+		}
+		vnn->update_in_flight = true;
+
 		DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
 				    ctdb_addr_to_str(&vnn->public_address),
 				    vnn->public_netmask_bits,
@@ -3274,6 +3285,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
 				  vnn->public_netmask_bits);
 		release_kill_clients(ctdb, &vnn->public_address);
 		ctdb_vnn_unassign_iface(ctdb, vnn);
+		vnn->update_in_flight = false;
 		count++;
 	}
 
diff --git a/tools/ctdb.c b/tools/ctdb.c
index ebbe84e..7979657 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1453,6 +1453,14 @@ static int control_one_scriptstatus(struct ctdb_context *ctdb,
 	for (i=0; i<script_status->num_scripts; i++) {
 		const char *status = NULL;
 
+		/* The ETIME status is ignored for certain events.
+		 * In that case the status is 0, but endtime is not set.
+		 */
+		if (script_status->scripts[i].status == 0 &&
+		    timeval_is_zero(&script_status->scripts[i].finished)) {
+			script_status->scripts[i].status = -ETIME;
+		}
+
 		switch (script_status->scripts[i].status) {
 		case -ETIME:
 			status = "TIMEDOUT";


-- 
CTDB repository



More information about the samba-cvs mailing list