[SCM] CTDB repository - branch 1.2 updated - ctdb-1.9.1-486-gfa72d7d

Ronnie Sahlberg sahlberg at samba.org
Sun Oct 16 22:22:18 MDT 2011


The branch, 1.2 has been updated
       via  fa72d7d50beb7155234a8e15fa5c8443a3c34eaf (commit)
      from  1d48b3f6cb27d84425863f576c7bbd3e1a8f9863 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2


- Log -----------------------------------------------------------------
commit fa72d7d50beb7155234a8e15fa5c8443a3c34eaf
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Oct 17 12:11:54 2011 +1100

    S1031575
    
    When performing addip   we dont allow "gratious failvoer"  which can, due to timing, and depending on which order the "ctdb addip ..." is called on the nodes lead to imperfect balancing of the ip addresses when addigng several at the same time.
    
    This patch makes sure that once the ip address is added to a node, any node,  this ip address is released from the node currently hosting the address and there will possibly be a failover after a short while while performing the rebalance of the ip address.
    
    This means that when performing "ctdb addip ..." and adding it to a new node, this could affect/disrupt the i/o on this address to the node currently hosting the address,  but
    it will mean we do get a more even distribution after the assignment.
    
    This is based on the assumption that it will be more common to "add completely new ip to a set of nodes"  rather than "add an ip address that is already in service to a brand new node"

-----------------------------------------------------------------------

Summary of changes:
 tools/ctdb.c |   92 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 79 insertions(+), 13 deletions(-)


Changeset truncated at 500 lines:

diff --git a/tools/ctdb.c b/tools/ctdb.c
index 6cada94..d49bc8f 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1325,6 +1325,80 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
 	return 0;
 }
 
+static int rebalance_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
+{
+	struct ctdb_public_ip ip;
+	int ret;
+	uint32_t *nodes;
+	uint32_t disable_time;
+	TDB_DATA data;
+	struct ctdb_node_map *nodemap=NULL;
+	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
+
+	disable_time = 30;
+	data.dptr  = (uint8_t*)&disable_time;
+	data.dsize = sizeof(disable_time);
+	ret = ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED, CTDB_SRVID_DISABLE_IP_CHECK, data);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,("Failed to send message to disable ipcheck\n"));
+		return -1;
+	}
+
+	ip.pnn  = -1;
+	ip.addr = *addr;
+
+	data.dptr  = (uint8_t *)&ip;
+	data.dsize = sizeof(ip);
+
+	ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &nodemap);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
+		talloc_free(tmp_ctx);
+		return ret;
+	}
+
+       	nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
+	ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_RELEASE_IP,
+					nodes, 0,
+					LONGTIMELIMIT(),
+					false, data,
+					NULL, NULL,
+					NULL);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,("Failed to release IP on nodes\n"));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	talloc_free(tmp_ctx);
+	return 0;
+}
+
+/*
+  release an ip form all nodes and have it re-assigned by recd
+ */
+static int control_rebalanceip(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+	ctdb_sock_addr addr;
+
+	if (argc < 1) {
+		usage();
+		return -1;
+	}
+
+	if (parse_ip(argv[0], NULL, 0, &addr) == 0) {
+		DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0]));
+		return -1;
+	}
+
+	if (rebalance_ip(ctdb, &addr) != 0) {
+		DEBUG(DEBUG_ERR,("Error when trying to reassign ip\n"));
+		return -1;
+	}
+
+	return 0;
+}
+
 void getips_store_callback(void *param, void *data)
 {
 	struct ctdb_public_ip *node_ip = (struct ctdb_public_ip *)data;
@@ -1661,7 +1735,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
 
 
 	/* Dont timeout. This command waits for an ip reallocation
-	   which sometimes can take wuite a while if there has
+	   which sometimes can take quite a while if there has
 	   been a recent recovery
 	*/
 	alarm(0);
@@ -1689,18 +1763,9 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
 		return ret;
 	}
 
-	do {
-		ret = control_ipreallocate(ctdb, argc, argv);
-		if (ret != 0) {
-			DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u. Wait 3 seconds and try again.\n", options.pnn));
-			sleep(3);
-			retries++;
-		}
-	} while (retries < 5 && ret != 0);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u. Giving up.\n", options.pnn));
-		talloc_free(tmp_ctx);
-		return ret;
+	if (rebalance_ip(ctdb, &addr) != 0) {
+		DEBUG(DEBUG_ERR,("Error when trying to reassign ip\n"));
+		return -1;
 	}
 
 	talloc_free(tmp_ctx);
@@ -4925,6 +4990,7 @@ static const struct {
 	{ "listnodes",       control_listnodes,		false,	true, "list all nodes in the cluster"},
 	{ "reloadnodes",     control_reload_nodes_file,	false,	false, "reload the nodes file and restart the transport on all nodes"},
 	{ "moveip",          control_moveip,		false,	false, "move/failover an ip address to another node", "<ip> <node>"},
+	{ "rebalanceip",     control_rebalanceip,	false,	false, "release an ip from the node and let recd rebalance it", "<ip>"},
 	{ "addip",           control_addip,		true,	false, "add a ip address to a node", "<ip/mask> <iface>"},
 	{ "delip",           control_delip,		false,	false, "delete an ip address from a node", "<ip>"},
 	{ "eventscript",     control_eventscript,	true,	false, "run the eventscript with the given parameters on a node", "<arguments>"},


-- 
CTDB repository


More information about the samba-cvs mailing list