[SCM] CTDB repository - branch master updated - 08e6c100971e1d72edd050f7f502145363f3c1ab

Andrew Tridgell tridge at samba.org
Tue May 5 06:13:18 GMT 2009


The branch, master has been updated
       via  08e6c100971e1d72edd050f7f502145363f3c1ab (commit)
       via  8ae93da7f6341f1cddb15786df67644f29648b9b (commit)
       via  b06d4c6ee7da97aaa810aa4c95952b6505cf720f (commit)
       via  d26d522fd0a1df7f06be056e7458ed5349a00658 (commit)
       via  d02c0e9371981bc6830f792d65e9f08832656cf0 (commit)
       via  87e674f93728fb20209b6473f07ca323530ddbdc (commit)
       via  478f43bc3c970edc7c8db3e34095774261a48056 (commit)
       via  a63c79318678abe99d2a36fe4465e63eafc008b7 (commit)
       via  8628402d4cad4b9ef580151b85e36b6a5909e56c (commit)
       via  c61c4f7a7628232e9094d463275ba6abf7b79941 (commit)
       via  460e6b2402fc9bca9e9835e3a203278ac0bcaf8f (commit)
       via  fb582515c674b76e06aed28d04f24ae3a849cfe2 (commit)
       via  b5ef99f14fb02deb128b4b0956508fc980886407 (commit)
       via  ac5557659e667da5f3a33cc612e06a21396fce2d (commit)
       via  968739ea9af2aaba90e6bb4569ae4d9fb60b79aa (commit)
       via  2031fd8b83bd832d1104128c3faddf209e6bb23e (commit)
       via  0563b3c6d9787164475538fbc1342f8c6be62eac (commit)
       via  3aba067b7fa7d77836901f82deee441a07b3a15d (commit)
       via  0a7dbe56d1b4545e9f45c29ea719d24b3b3ada3e (commit)
       via  767e92f64b63a6bcaa36efe633e8a3e55a803e79 (commit)
       via  7b6bada79b4e0534b261440438b4256bc446191f (commit)
      from  c62e71fa8b6170245dbb66057b441b94c10e2c5d (commit)

http://gitweb.samba.org/?p=tridge/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 08e6c100971e1d72edd050f7f502145363f3c1ab
Merge: c62e71fa8b6170245dbb66057b441b94c10e2c5d 8ae93da7f6341f1cddb15786df67644f29648b9b
Author: Andrew Tridgell <tridge at samba.org>
Date:   Tue May 5 16:12:04 2009 +1000

    merged 1.0.69 into head

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c    |    4 +-
 include/ctdb_private.h  |   16 ++++--
 packaging/RPM/ctdb.spec |   28 +++++++++++
 server/ctdb_control.c   |    6 ++-
 server/ctdb_traverse.c  |  124 ++++++++++++++++++++++++++++++++++++++--------
 5 files changed, 147 insertions(+), 31 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 3bdb4b2..8c3bb74 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -2399,10 +2399,10 @@ int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32
 					timeout, false, data,
 					NULL, NULL,
 					NULL) != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " ctdb_control to disable node failed\n"));
+		DEBUG(DEBUG_ERR, (__location__ " ctdb_control to update nodeflags failed\n"));
 
 		talloc_free(tmp_ctx);
-		return -1;
+		return 0;
 	}
 
 	talloc_free(tmp_ctx);
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index eac27f7..9ba4cc0 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -430,6 +430,7 @@ struct ctdb_db_context {
 	struct ctdb_registered_call *calls; /* list of registered calls */
 	uint32_t seqnum;
 	struct timed_event *te;
+	struct ctdb_traverse_local_handle *traverse;
 };
 
 
@@ -555,11 +556,12 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_TAKEOVER_IP             = 89,
 		    CTDB_CONTROL_GET_PUBLIC_IPS          = 90,
 		    CTDB_CONTROL_GET_NODEMAP             = 91,
-		    CTDB_CONTROL_EVENT_SCRIPT_INIT       = 92,
-		    CTDB_CONTROL_EVENT_SCRIPT_START      = 93,
-		    CTDB_CONTROL_EVENT_SCRIPT_STOP       = 94,
-		    CTDB_CONTROL_EVENT_SCRIPT_FINISHED   = 95,
-		    CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS = 96,
+		    CTDB_CONTROL_TRAVERSE_KILL		 = 92,
+		    CTDB_CONTROL_EVENT_SCRIPT_INIT       = 93,
+		    CTDB_CONTROL_EVENT_SCRIPT_START      = 94,
+		    CTDB_CONTROL_EVENT_SCRIPT_STOP       = 95,
+		    CTDB_CONTROL_EVENT_SCRIPT_FINISHED   = 96,
+		    CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS = 97,
 };	
 
 /*
@@ -1118,9 +1120,11 @@ struct ctdb_client_call_state {
 
 
 int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA indata, 
-				    TDB_DATA *outdata, uint32_t srcnode);
+				    TDB_DATA *outdata, uint32_t srcnode, uint32_t client_id);
 int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata);
 int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata);
+int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA indata, 
+				    TDB_DATA *outdata, uint32_t srcnode);
 
 int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data);
 
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index bbd467b..ac4f908 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -131,6 +131,13 @@ fi
 %{_libdir}/pkgconfig/ctdb.pc
 
 %changelog
+<<<<<<< HEAD:packaging/RPM/ctdb.spec
+* Tue May 5 2009 : Version 1.0.69-6
+ - Try to add back the routes in 99.routing both for the takeip and also the
+   releaseip event since during releaseip) too many addresses may have been
+   deleted, causing routes to be lost.
+   See 10.interfaces for the workaround for "ip addr del" deleting too
+   many addresses.
 * Fri May 1 2009 : Version 1.0.80
  - change init shutdown level to 01 for ctdb so it stops before any of the other services
  - if we can not pull a database from a remote node during recovery, mark that node as a culprit so it becomes banned
@@ -139,6 +146,24 @@ fi
  - dont unconditionally kill off ctdb and restrat it on "service ctdb start". Fail "service ctdb start" with an error if ctdb is already running.
  - Add a new tunable "VerifyRecoveryLock" that can be set to 0 to prevent the main ctdb daemon to verify that the recovery master has locked the reclock file correctly before allowing it to set the recovery mode to active.
  - fix a cosmetic bug with ctdb statistics where certain counters could become negative.
+* Fri May 1 2009 : Version 1.0.69-5
+ - Add a new variable VerifyRecoveryLock. When set to 0 this will skip
+   the test inside the main where it verifies that the recovery masted does
+   hold the lock to the reclock file while performing a recovery.
+ - Change the timeout for waiting for a reclock child process to terminate to
+   15 seconds and increase the logging of this potentially fatal condition.
+* Sun Apr 26 2009 : Version 1.0.69_4
+ - Add TDB_NO_NESTING to the tdb layer to prevent transaction nesting.
+ - Make sure that when we start a recovery transaction that this is not a
+   nested transaction.
+ - Add a tuneable RecoveryDropAllIPs that specifies after how long being in
+   recovery mode a node will elect to drop all its public addresses.
+* Fri Apr 24 2009 : Version 1.0.69_3
+ - Make sure that if during recovery a node is stuck and does not reply to
+   pull_db requests that we eventually ban this node from the recovery master.
+* Thu Apr 23 2009 : Version 1.0.69_2
+ - In the recovery daemon we dont need to check the nodemap status
+   of banned nodes.
 * Wed Apr 8 2009 : Version 1.0.79
  - From Mathieu Parent: add a ctdb pkgconfig file
  - Fix bug 6250
@@ -184,6 +209,9 @@ fi
    using "1.0.70" as a release and "-1" as the revision instead of as
    previously using "1.0" as release and ".70" as the revision.
    By Michael Adams.
+* Wed Feb 5 2009 : Version 1.0.69_1
+ - Dont check the result of the modflags control, to allow compatibility
+   with earlier versions of ctdb
 * Thu Dec 18 2008 : Version 1.0.69
  - Various fixes to scripts by M Adam
  - Dont call ctdb_fatal() when the transport is down during shutdown
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index ac77696..d5d4766 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -217,7 +217,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 
 	case CTDB_CONTROL_TRAVERSE_START:
 		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start));
-		return ctdb_control_traverse_start(ctdb, indata, outdata, srcnode);
+		return ctdb_control_traverse_start(ctdb, indata, outdata, srcnode, client_id);
 
 	case CTDB_CONTROL_TRAVERSE_ALL:
 		return ctdb_control_traverse_all(ctdb, indata, outdata);
@@ -225,6 +225,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	case CTDB_CONTROL_TRAVERSE_DATA:
 		return ctdb_control_traverse_data(ctdb, indata, outdata);
 
+	case CTDB_CONTROL_TRAVERSE_KILL:
+		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start));
+		return ctdb_control_traverse_kill(ctdb, indata, outdata, srcnode);
+
 	case CTDB_CONTROL_REGISTER_SRVID:
 		return daemon_register_message_handler(ctdb, client_id, srvid);
 
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index 35ae318..60a8e90 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -24,6 +24,7 @@
 #include "db_wrap.h"
 #include "lib/tdb/include/tdb.h"
 #include "../include/ctdb_private.h"
+#include "lib/util/dlinklist.h"
 
 typedef void (*ctdb_traverse_fn_t)(void *private_data, TDB_DATA key, TDB_DATA data);
 
@@ -32,9 +33,12 @@ typedef void (*ctdb_traverse_fn_t)(void *private_data, TDB_DATA key, TDB_DATA da
   terminate the traverse
  */
 struct ctdb_traverse_local_handle {
+	struct ctdb_traverse_local_handle *next, *prev;
 	struct ctdb_db_context *ctdb_db;
 	int fd[2];
 	pid_t child;
+	uint64_t srvid;
+	uint32_t client_reqid;
 	void *private_data;
 	ctdb_traverse_fn_t callback;
 	struct timeval start_time;
@@ -73,6 +77,7 @@ static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *p
  */
 static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
 {
+	DLIST_REMOVE(h->ctdb_db->traverse, h);
 	kill(h->child, SIGKILL);
 	return 0;
 }
@@ -114,6 +119,14 @@ static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DAT
 	return 0;
 }
 
+struct traverse_all_state {
+	struct ctdb_context *ctdb;
+	struct ctdb_traverse_local_handle *h;
+	uint32_t reqid;
+	uint32_t srcnode;
+	uint32_t client_reqid;
+	uint64_t srvid;
+};
 
 /*
   setup a non-blocking traverse of a local ltdb. The callback function
@@ -124,12 +137,12 @@ static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DAT
  */
 static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_context *ctdb_db,
 							      ctdb_traverse_fn_t callback,
-							      void *private_data)
+							      struct traverse_all_state *all_state)
 {
 	struct ctdb_traverse_local_handle *h;
 	int ret;
 
-	h = talloc_zero(ctdb_db, struct ctdb_traverse_local_handle);
+	h = talloc_zero(all_state, struct ctdb_traverse_local_handle);
 	if (h == NULL) {
 		return NULL;
 	}
@@ -151,8 +164,10 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 	}
 
 	h->callback = callback;
-	h->private_data = private_data;
+	h->private_data = all_state;
 	h->ctdb_db = ctdb_db;
+	h->client_reqid = all_state->client_reqid;
+	h->srvid = all_state->srvid;
 
 	if (h->child == 0) {
 		/* start the traverse in the child */
@@ -164,6 +179,8 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 	close(h->fd[1]);
 	talloc_set_destructor(h, traverse_local_destructor);
 
+	DLIST_ADD(ctdb_db->traverse, h);
+
 	/*
 	  setup a packet queue between the child and the parent. This
 	  copes with all the async and packet boundary issues
@@ -202,6 +219,8 @@ struct ctdb_traverse_all {
 	uint32_t db_id;
 	uint32_t reqid;
 	uint32_t pnn;
+	uint32_t client_reqid;
+	uint64_t srvid;
 };
 
 /* called when a traverse times out */
@@ -216,6 +235,17 @@ static void ctdb_traverse_all_timeout(struct event_context *ev, struct timed_eve
 	talloc_free(state);
 }
 
+
+struct traverse_start_state {
+	struct ctdb_context *ctdb;
+	struct ctdb_traverse_all_handle *h;
+	uint32_t srcnode;
+	uint32_t reqid;
+	uint32_t db_id;
+	uint64_t srvid;
+};
+
+
 /*
   setup a cluster-wide non-blocking traverse of a ctdb. The
   callback function will be called on every record in the local
@@ -226,7 +256,7 @@ static void ctdb_traverse_all_timeout(struct event_context *ev, struct timed_eve
  */
 static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_context *ctdb_db,
 								 ctdb_traverse_fn_t callback,
-								 void *private_data)
+								 struct traverse_start_state *start_state)
 {
 	struct ctdb_traverse_all_handle *state;
 	struct ctdb_context *ctdb = ctdb_db->ctdb;
@@ -244,7 +274,7 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
 	state->ctdb_db      = ctdb_db;
 	state->reqid        = ctdb_reqid_new(ctdb_db->ctdb, state);
 	state->callback     = callback;
-	state->private_data = private_data;
+	state->private_data = start_state;
 	state->null_count   = 0;
 	
 	talloc_set_destructor(state, ctdb_traverse_all_destructor);
@@ -252,6 +282,8 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
 	r.db_id = ctdb_db->db_id;
 	r.reqid = state->reqid;
 	r.pnn   = ctdb->pnn;
+	r.client_reqid = start_state->reqid;
+	r.srvid = start_state->srvid;
 
 	data.dptr = (uint8_t *)&r;
 	data.dsize = sizeof(r);
@@ -300,13 +332,6 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
 	return state;
 }
 
-struct traverse_all_state {
-	struct ctdb_context *ctdb;
-	struct ctdb_traverse_local_handle *h;
-	uint32_t reqid;
-	uint32_t srcnode;
-};
-
 /*
   called for each record during a traverse all 
  */
@@ -351,7 +376,7 @@ int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_
 	struct ctdb_db_context *ctdb_db;
 
 	if (data.dsize != sizeof(struct ctdb_traverse_all)) {
-		DEBUG(DEBUG_ERR,("Invalid size in ctdb_control_traverse_all\n"));
+		DEBUG(DEBUG_ERR,(__location__ " Invalid size in ctdb_control_traverse_all\n"));
 		return -1;
 	}
 
@@ -368,6 +393,8 @@ int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_
 	state->reqid = c->reqid;
 	state->srcnode = c->pnn;
 	state->ctdb = ctdb;
+	state->client_reqid = c->client_reqid;
+	state->srvid = c->srvid;
 
 	state->h = ctdb_traverse_local(ctdb_db, traverse_all_callback, state);
 	if (state->h == NULL) {
@@ -431,13 +458,55 @@ int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB
 	return 0;
 }	
 
-struct traverse_start_state {
-	struct ctdb_context *ctdb;
-	struct ctdb_traverse_all_handle *h;
-	uint32_t srcnode;
-	uint32_t reqid;
-	uint64_t srvid;
-};
+/*
+  kill a in-progress traverse, used when a client disconnects
+ */
+int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA data, 
+				   TDB_DATA *outdata, uint32_t srcnode)
+{
+	struct ctdb_traverse_start *d = (struct ctdb_traverse_start *)data.dptr;
+	struct ctdb_db_context *ctdb_db;
+	struct ctdb_traverse_local_handle *t;
+
+	ctdb_db = find_ctdb_db(ctdb, d->db_id);
+	if (ctdb_db == NULL) {
+		return -1;
+	}
+
+	for (t=ctdb_db->traverse; t; t=t->next) {
+		if (t->client_reqid == d->reqid &&
+		    t->srvid == d->srvid) {
+			talloc_free(t);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+  this is called when a client disconnects during a traverse
+  we need to notify all the nodes taking part in the search that they
+  should kill their traverse children
+ */
+static int ctdb_traverse_start_destructor(struct traverse_start_state *state)
+{
+	struct ctdb_traverse_start r;
+	TDB_DATA data;
+
+	r.db_id = state->db_id;
+	r.reqid = state->reqid;
+	r.srvid = state->srvid;
+
+	data.dptr = (uint8_t *)&r;
+	data.dsize = sizeof(r);
+
+	ctdb_daemon_send_control(state->ctdb, CTDB_BROADCAST_CONNECTED, 0, 
+				 CTDB_CONTROL_TRAVERSE_KILL, 
+				 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+	return 0;
+}
 
 /*
   callback which sends records as messages to the client
@@ -461,19 +530,27 @@ static void traverse_start_callback(void *p, TDB_DATA key, TDB_DATA data)
 	ctdb_dispatch_message(state->ctdb, state->srvid, cdata);
 	if (key.dsize == 0 && data.dsize == 0) {
 		/* end of traverse */
+		talloc_set_destructor(state, NULL);
 		talloc_free(state);
 	}
 }
 
+
 /*
   start a traverse_all - called as a control from a client
  */
 int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data, 
-				    TDB_DATA *outdata, uint32_t srcnode)
+				    TDB_DATA *outdata, uint32_t srcnode, uint32_t client_id)
 {
 	struct ctdb_traverse_start *d = (struct ctdb_traverse_start *)data.dptr;
 	struct traverse_start_state *state;
 	struct ctdb_db_context *ctdb_db;
+	struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
+
+	if (client == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " No client found\n"));
+		return -1;		
+	}
 
 	if (data.dsize != sizeof(*d)) {
 		DEBUG(DEBUG_ERR,("Bad record size in ctdb_control_traverse_start\n"));
@@ -485,7 +562,7 @@ int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
 		return -1;
 	}
 
-	state = talloc(ctdb_db, struct traverse_start_state);
+	state = talloc(client, struct traverse_start_state);
 	if (state == NULL) {
 		return -1;
 	}
@@ -493,6 +570,7 @@ int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
 	state->srcnode = srcnode;
 	state->reqid = d->reqid;
 	state->srvid = d->srvid;
+	state->db_id = d->db_id;
 	state->ctdb = ctdb;
 
 	state->h = ctdb_daemon_traverse_all(ctdb_db, traverse_start_callback, state);
@@ -501,5 +579,7 @@ int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
 		return -1;
 	}
 
+	talloc_set_destructor(state, ctdb_traverse_start_destructor);
+
 	return 0;
 }


-- 
CTDB repository


More information about the samba-cvs mailing list