Rev 497: choose the most connected node first in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Thu Jun 7 09:17:27 GMT 2007


------------------------------------------------------------
revno: 497
revision-id: tridge at samba.org-20070607091727-nyesysrrvcjrbgf3
parent: tridge at samba.org-20070607083937-cr7d8z9lf4cyxckz
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge
timestamp: Thu 2007-06-07 19:17:27 +1000
message:
  choose the most connected node first
modified:
  common/ctdb_recoverd.c         recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
=== modified file 'common/ctdb_recoverd.c'
--- a/common/ctdb_recoverd.c	2007-06-07 08:39:37 +0000
+++ b/common/ctdb_recoverd.c	2007-06-07 09:17:27 +0000
@@ -790,11 +790,67 @@
 }
 
 
+/*
+  elections are won by first checking the number of connected nodes, then
+  the priority time, then the vnn
+ */
 struct election_message {
+	uint32_t num_connected;
+	struct timeval priority_time;
 	uint32_t vnn;
-	struct timeval priority_time;
 };
 
+/*
+  form this nodes election data
+ */
+static void ctdb_election_data(struct ctdb_recoverd *rec, struct election_message *em)
+{
+	int ret, i;
+	struct ctdb_node_map *nodemap;
+	struct ctdb_context *ctdb = rec->ctdb;
+
+	ZERO_STRUCTP(em);
+
+	em->vnn = rec->ctdb->vnn;
+	em->priority_time = rec->priority_time;
+
+	ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, rec, &nodemap);
+	if (ret != 0) {
+		return;
+	}
+
+	for (i=0;i<nodemap->num;i++) {
+		if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
+			em->num_connected++;
+		}
+	}
+	talloc_free(nodemap);
+}
+
+/*
+  see if the given election data wins
+ */
+static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message *em)
+{
+	struct election_message myem;
+	int cmp;
+
+	ctdb_election_data(rec, &myem);
+
+	/* try to use the most connected node */
+	cmp = (int)myem.num_connected - (int)em->num_connected;
+
+	/* then the longest running node */
+	if (cmp == 0) {
+		cmp = timeval_compare(&myem.priority_time, &em->priority_time);
+	}
+
+	if (cmp == 0) {
+		cmp = (int)myem.vnn - (int)em->vnn;
+	}
+
+	return cmp > 0;
+}
 
 /*
   send out an election request
@@ -809,8 +865,7 @@
 	
 	srvid = CTDB_SRVID_RECOVERY;
 
-	emsg.vnn = vnn;
-	emsg.priority_time = rec->priority_time;
+	ctdb_election_data(rec, &emsg);
 
 	election_data.dsize = sizeof(struct election_message);
 	election_data.dptr  = (unsigned char *)&emsg;
@@ -821,7 +876,7 @@
 	 */
 	ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), vnn, vnn);
 	if (ret != 0) {
-		DEBUG(0, (__location__ " failed to send recmaster election request"));
+		DEBUG(0, (__location__ " failed to send recmaster election request\n"));
 		return -1;
 	}
 
@@ -843,16 +898,14 @@
 	int ret;
 	struct election_message *em = (struct election_message *)data.dptr;
 	TALLOC_CTX *mem_ctx;
-	int cmp;
 
 	mem_ctx = talloc_new(ctdb);
-		
+
 	/* someone called an election. check their election data
 	   and if we disagree and we would rather be the elected node, 
 	   send a new election message to all other nodes
 	 */
-	cmp = timeval_compare(&em->priority_time, &rec->priority_time);
-	if (cmp > 0 || (cmp == 0 && em->vnn > ctdb->vnn)) {
+	if (ctdb_election_win(rec, em)) {
 		ret = send_election_request(rec, mem_ctx, ctdb_get_vnn(ctdb));
 		if (ret!=0) {
 			DEBUG(0, (__location__ " failed to initiate recmaster election"));



More information about the samba-cvs mailing list