Rev 678: make election handling much more scalable in
http://samba.org/~tridge/ctdb
tridge at samba.org
tridge at samba.org
Mon Nov 12 23:27:45 GMT 2007
------------------------------------------------------------
revno: 678
revision-id:tridge at samba.org-20071112232744-kq8gyv0g8w1qoksv
parent: tridge at samba.org-20071112021015-2vs75yqqbllm60db
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge.stable
timestamp: Tue 2007-11-13 10:27:44 +1100
message:
make election handling much more scalable
modified:
server/ctdb_recoverd.c recoverd.c-20070503213540-bvxuyd9jm1f7ig90-1
=== modified file 'server/ctdb_recoverd.c'
--- a/server/ctdb_recoverd.c 2007-10-22 02:34:08 +0000
+++ b/server/ctdb_recoverd.c 2007-11-12 23:27:44 +0000
@@ -47,6 +47,8 @@
bool need_takeover_run;
bool need_recovery;
uint32_t node_flags;
+ struct timed_event *send_election_te;
+ struct timed_event *election_timeout;
};
#define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
@@ -714,6 +716,29 @@
}
}
+/*
+ called when an election times out (ends)
+ */
+static void ctdb_election_timeout(struct event_context *ev, struct timed_event *te,
+ struct timeval t, void *p)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
+ rec->election_timeout = NULL;
+}
+
+
+/*
+ wait for an election to finish. It finished election_timeout seconds after
+ the last election packet is received
+ */
+static void ctdb_wait_election(struct ctdb_recoverd *rec)
+{
+ struct ctdb_context *ctdb = rec->ctdb;
+ while (rec->election_timeout) {
+ event_loop_once(ctdb->ev);
+ }
+}
+
/*
update our local flags from all remote connected nodes.
@@ -1099,7 +1124,7 @@
/*
send out an election request
*/
-static int send_election_request(struct ctdb_recoverd *rec, TALLOC_CTX *mem_ctx, uint32_t pnn)
+static int send_election_request(struct ctdb_recoverd *rec, uint32_t pnn)
{
int ret;
TDB_DATA election_data;
@@ -1156,6 +1181,24 @@
talloc_free(tmp_ctx);
}
+
+/*
+ we think we are winning the election - send a broadcast election request
+ */
+static void election_send_request(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
+{
+ struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
+ int ret;
+
+ ret = send_election_request(rec, ctdb_get_pnn(rec->ctdb));
+ if (ret != 0) {
+ DEBUG(0,("Failed to send election request!\n"));
+ }
+
+ talloc_free(rec->send_election_te);
+ rec->send_election_te = NULL;
+}
+
/*
handler for recovery master elections
*/
@@ -1167,6 +1210,12 @@
struct election_message *em = (struct election_message *)data.dptr;
TALLOC_CTX *mem_ctx;
+ /* we got an election packet - update the timeout for the election */
+ talloc_free(rec->election_timeout);
+ rec->election_timeout = event_add_timed(ctdb->ev, ctdb,
+ timeval_current_ofs(ctdb->tunable.election_timeout, 0),
+ ctdb_election_timeout, rec);
+
mem_ctx = talloc_new(ctdb);
/* someone called an election. check their election data
@@ -1174,14 +1223,19 @@
send a new election message to all other nodes
*/
if (ctdb_election_win(rec, em)) {
- ret = send_election_request(rec, mem_ctx, ctdb_get_pnn(ctdb));
- if (ret!=0) {
- DEBUG(0, (__location__ " failed to initiate recmaster election"));
+ if (!rec->send_election_te) {
+ rec->send_election_te = event_add_timed(ctdb->ev, rec,
+ timeval_current_ofs(0, 500000),
+ election_send_request, rec);
}
talloc_free(mem_ctx);
/*unban_all_nodes(ctdb);*/
return;
}
+
+ /* we didn't win */
+ talloc_free(rec->send_election_te);
+ rec->send_election_te = NULL;
/* release the recmaster lock */
if (em->pnn != ctdb->pnn &&
@@ -1225,15 +1279,20 @@
DEBUG(0, (__location__ " Unable to set recovery mode to active on cluster\n"));
return;
}
-
- ret = send_election_request(rec, mem_ctx, pnn);
+
+ talloc_free(rec->election_timeout);
+ rec->election_timeout = event_add_timed(ctdb->ev, ctdb,
+ timeval_current_ofs(ctdb->tunable.election_timeout, 0),
+ ctdb_election_timeout, rec);
+
+ ret = send_election_request(rec, pnn);
if (ret!=0) {
DEBUG(0, (__location__ " failed to initiate recmaster election"));
return;
}
/* wait for a few seconds to collect all responses */
- ctdb_wait_timeout(ctdb, ctdb->tunable.election_timeout);
+ ctdb_wait_election(rec);
}
@@ -1551,6 +1610,11 @@
/* we only check for recovery once every second */
ctdb_wait_timeout(ctdb, ctdb->tunable.recover_interval);
+ if (rec->election_timeout) {
+ /* an election is in progress */
+ goto again;
+ }
+
/* get relevant tunables */
ret = ctdb_ctrl_get_all_tunables(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &ctdb->tunable);
if (ret != 0) {
More information about the samba-cvs
mailing list