[SCM] CTDB repository - branch 1.0.69 updated - ctdb-1.0.69-21-g9a00035

Ronnie Sahlberg sahlberg at samba.org
Tue May 5 06:33:02 GMT 2009


The branch, 1.0.69 has been updated
       via  9a00035c9c815736d0a5909dfc4de7f4c3ac41d4 (commit)
       via  8ae93da7f6341f1cddb15786df67644f29648b9b (commit)
       via  b06d4c6ee7da97aaa810aa4c95952b6505cf720f (commit)
      from  d26d522fd0a1df7f06be056e7458ed5349a00658 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.69


- Log -----------------------------------------------------------------
commit 9a00035c9c815736d0a5909dfc4de7f4c3ac41d4
Author: root <root at rcn1.VSOFS1.COM>
Date:   Tue May 5 16:33:21 2009 +1000

    change the talloc hierarchy for the main transaction_start context and the individual transaction_all handles

commit 8ae93da7f6341f1cddb15786df67644f29648b9b
Author: Andrew Tridgell <tridge at samba.org>
Date:   Tue May 5 12:17:21 2009 +1000

    fixed an error with newer autotools versions

commit b06d4c6ee7da97aaa810aa4c95952b6505cf720f
Author: Andrew Tridgell <tridge at samba.org>
Date:   Tue May 5 12:17:07 2009 +1000

    fixed a problem with clients disconnecting during a traverse
    
    When a client (such as smbstatus) is killed, it may have outstanding
    traverse children on remote nodes. We need to catch the client
    disconnect in ctdbd and send a control to all nodes telling them to
    kill those outstanding traverse children.

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h       |    6 ++-
 lib/replace/autoconf-2.60.m4 |    2 +
 server/ctdb_control.c        |    6 ++-
 server/ctdb_traverse.c       |  135 ++++++++++++++++++++++++++++++++---------
 4 files changed, 117 insertions(+), 32 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 5121cd9..e14a272 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -439,6 +439,7 @@ struct ctdb_db_context {
 	struct ctdb_registered_call *calls; /* list of registered calls */
 	uint32_t seqnum;
 	struct timed_event *te;
+	struct ctdb_traverse_local_handle *traverse;
 };
 
 
@@ -564,6 +565,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_TAKEOVER_IP             = 89,
 		    CTDB_CONTROL_GET_PUBLIC_IPS          = 90,
 		    CTDB_CONTROL_GET_NODEMAP             = 91,
+		    CTDB_CONTROL_TRAVERSE_KILL		 = 92,
 };	
 
 /*
@@ -1131,9 +1133,11 @@ struct ctdb_client_call_state {
 
 
 int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA indata, 
-				    TDB_DATA *outdata, uint32_t srcnode);
+				    TDB_DATA *outdata, uint32_t srcnode, uint32_t client_id);
 int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata);
 int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata);
+int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA indata, 
+				    TDB_DATA *outdata, uint32_t srcnode);
 
 int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data);
 
diff --git a/lib/replace/autoconf-2.60.m4 b/lib/replace/autoconf-2.60.m4
index acdcd38..2d5dbc1 100644
--- a/lib/replace/autoconf-2.60.m4
+++ b/lib/replace/autoconf-2.60.m4
@@ -179,6 +179,7 @@ AC_DEFUN([AC_PROG_CC_C99],
 # ------------------------
 # Enable extensions on systems that normally disable them,
 # typically due to standards-conformance issues.
+m4_ifndef([AC_USE_SYSTEM_EXTENSIONS],[
 AC_DEFUN([AC_USE_SYSTEM_EXTENSIONS],
 [
   AC_BEFORE([$0], [AC_COMPILE_IFELSE])
@@ -208,3 +209,4 @@ AC_DEFUN([AC_USE_SYSTEM_EXTENSIONS],
     AC_DEFINE([__EXTENSIONS__])
   AC_DEFINE([_POSIX_PTHREAD_SEMANTICS])
 ])
+])
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 15f5000..73d5d46 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -217,7 +217,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 
 	case CTDB_CONTROL_TRAVERSE_START:
 		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start));
-		return ctdb_control_traverse_start(ctdb, indata, outdata, srcnode);
+		return ctdb_control_traverse_start(ctdb, indata, outdata, srcnode, client_id);
 
 	case CTDB_CONTROL_TRAVERSE_ALL:
 		return ctdb_control_traverse_all(ctdb, indata, outdata);
@@ -225,6 +225,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	case CTDB_CONTROL_TRAVERSE_DATA:
 		return ctdb_control_traverse_data(ctdb, indata, outdata);
 
+	case CTDB_CONTROL_TRAVERSE_KILL:
+		CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start));
+		return ctdb_control_traverse_kill(ctdb, indata, outdata, srcnode);
+
 	case CTDB_CONTROL_REGISTER_SRVID:
 		return daemon_register_message_handler(ctdb, client_id, srvid);
 
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index 35ae318..dbb4b4f 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -24,6 +24,7 @@
 #include "db_wrap.h"
 #include "lib/tdb/include/tdb.h"
 #include "../include/ctdb_private.h"
+#include "lib/util/dlinklist.h"
 
 typedef void (*ctdb_traverse_fn_t)(void *private_data, TDB_DATA key, TDB_DATA data);
 
@@ -32,9 +33,12 @@ typedef void (*ctdb_traverse_fn_t)(void *private_data, TDB_DATA key, TDB_DATA da
   terminate the traverse
  */
 struct ctdb_traverse_local_handle {
+	struct ctdb_traverse_local_handle *next, *prev;
 	struct ctdb_db_context *ctdb_db;
 	int fd[2];
 	pid_t child;
+	uint64_t srvid;
+	uint32_t client_reqid;
 	void *private_data;
 	ctdb_traverse_fn_t callback;
 	struct timeval start_time;
@@ -73,6 +77,7 @@ static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *p
  */
 static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
 {
+	DLIST_REMOVE(h->ctdb_db->traverse, h);
 	kill(h->child, SIGKILL);
 	return 0;
 }
@@ -87,7 +92,6 @@ static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DAT
 	struct ctdb_rec_data *d;
 	struct ctdb_ltdb_header *hdr;
 
-	
 	hdr = (struct ctdb_ltdb_header *)data.dptr;
 
 	if (h->ctdb_db->persistent == 0) {
@@ -114,6 +118,14 @@ static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DAT
 	return 0;
 }
 
+struct traverse_all_state {
+	struct ctdb_context *ctdb;
+	struct ctdb_traverse_local_handle *h;
+	uint32_t reqid;
+	uint32_t srcnode;
+	uint32_t client_reqid;
+	uint64_t srvid;
+};
 
 /*
   setup a non-blocking traverse of a local ltdb. The callback function
@@ -124,12 +136,12 @@ static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DAT
  */
 static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_context *ctdb_db,
 							      ctdb_traverse_fn_t callback,
-							      void *private_data)
+							      struct traverse_all_state *all_state)
 {
 	struct ctdb_traverse_local_handle *h;
 	int ret;
 
-	h = talloc_zero(ctdb_db, struct ctdb_traverse_local_handle);
+	h = talloc_zero(all_state, struct ctdb_traverse_local_handle);
 	if (h == NULL) {
 		return NULL;
 	}
@@ -151,8 +163,10 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 	}
 
 	h->callback = callback;
-	h->private_data = private_data;
+	h->private_data = all_state;
 	h->ctdb_db = ctdb_db;
+	h->client_reqid = all_state->client_reqid;
+	h->srvid = all_state->srvid;
 
 	if (h->child == 0) {
 		/* start the traverse in the child */
@@ -164,6 +178,8 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 	close(h->fd[1]);
 	talloc_set_destructor(h, traverse_local_destructor);
 
+	DLIST_ADD(ctdb_db->traverse, h);
+
 	/*
 	  setup a packet queue between the child and the parent. This
 	  copes with all the async and packet boundary issues
@@ -202,6 +218,8 @@ struct ctdb_traverse_all {
 	uint32_t db_id;
 	uint32_t reqid;
 	uint32_t pnn;
+	uint32_t client_reqid;
+	uint64_t srvid;
 };
 
 /* called when a traverse times out */
@@ -210,12 +228,23 @@ static void ctdb_traverse_all_timeout(struct event_context *ev, struct timed_eve
 {
 	struct ctdb_traverse_all_handle *state = talloc_get_type(private_data, struct ctdb_traverse_all_handle);
 
+	DEBUG(DEBUG_ERR,(__location__ " Traverse all timeout on database:%s\n", state->ctdb_db->db_name));
 	state->ctdb->statistics.timeouts.traverse++;
 
 	state->callback(state->private_data, tdb_null, tdb_null);
-	talloc_free(state);
 }
 
+
+struct traverse_start_state {
+	struct ctdb_context *ctdb;
+	struct ctdb_traverse_all_handle *h;
+	uint32_t srcnode;
+	uint32_t reqid;
+	uint32_t db_id;
+	uint64_t srvid;
+};
+
+
 /*
   setup a cluster-wide non-blocking traverse of a ctdb. The
   callback function will be called on every record in the local
@@ -226,7 +255,7 @@ static void ctdb_traverse_all_timeout(struct event_context *ev, struct timed_eve
  */
 static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_context *ctdb_db,
 								 ctdb_traverse_fn_t callback,
-								 void *private_data)
+								 struct traverse_start_state *start_state)
 {
 	struct ctdb_traverse_all_handle *state;
 	struct ctdb_context *ctdb = ctdb_db->ctdb;
@@ -235,7 +264,7 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
 	struct ctdb_traverse_all r;
 	uint32_t destination;
 
-	state = talloc(ctdb_db, struct ctdb_traverse_all_handle);
+	state = talloc(start_state, struct ctdb_traverse_all_handle);
 	if (state == NULL) {
 		return NULL;
 	}
@@ -244,7 +273,7 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
 	state->ctdb_db      = ctdb_db;
 	state->reqid        = ctdb_reqid_new(ctdb_db->ctdb, state);
 	state->callback     = callback;
-	state->private_data = private_data;
+	state->private_data = start_state;
 	state->null_count   = 0;
 	
 	talloc_set_destructor(state, ctdb_traverse_all_destructor);
@@ -252,6 +281,8 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
 	r.db_id = ctdb_db->db_id;
 	r.reqid = state->reqid;
 	r.pnn   = ctdb->pnn;
+	r.client_reqid = start_state->reqid;
+	r.srvid = start_state->srvid;
 
 	data.dptr = (uint8_t *)&r;
 	data.dsize = sizeof(r);
@@ -300,13 +331,6 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
 	return state;
 }
 
-struct traverse_all_state {
-	struct ctdb_context *ctdb;
-	struct ctdb_traverse_local_handle *h;
-	uint32_t reqid;
-	uint32_t srcnode;
-};
-
 /*
   called for each record during a traverse all 
  */
@@ -351,7 +375,7 @@ int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_
 	struct ctdb_db_context *ctdb_db;
 
 	if (data.dsize != sizeof(struct ctdb_traverse_all)) {
-		DEBUG(DEBUG_ERR,("Invalid size in ctdb_control_traverse_all\n"));
+		DEBUG(DEBUG_ERR,(__location__ " Invalid size in ctdb_control_traverse_all\n"));
 		return -1;
 	}
 
@@ -368,6 +392,8 @@ int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_
 	state->reqid = c->reqid;
 	state->srcnode = c->pnn;
 	state->ctdb = ctdb;
+	state->client_reqid = c->client_reqid;
+	state->srvid = c->srvid;
 
 	state->h = ctdb_traverse_local(ctdb_db, traverse_all_callback, state);
 	if (state->h == NULL) {
@@ -423,21 +449,59 @@ int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB
 	private_data = state->private_data;
 
 	callback(private_data, key, data);
-	if (key.dsize == 0 && data.dsize == 0) {
-		/* we've received all of the null replies, so all
-		   nodes are finished */
-		talloc_free(state);
-	}
 	return 0;
 }	
 
-struct traverse_start_state {
-	struct ctdb_context *ctdb;
-	struct ctdb_traverse_all_handle *h;
-	uint32_t srcnode;
-	uint32_t reqid;
-	uint64_t srvid;
-};
+/*
+  kill a in-progress traverse, used when a client disconnects
+ */
+int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA data, 
+				   TDB_DATA *outdata, uint32_t srcnode)
+{
+	struct ctdb_traverse_start *d = (struct ctdb_traverse_start *)data.dptr;
+	struct ctdb_db_context *ctdb_db;
+	struct ctdb_traverse_local_handle *t;
+
+	ctdb_db = find_ctdb_db(ctdb, d->db_id);
+	if (ctdb_db == NULL) {
+		return -1;
+	}
+
+	for (t=ctdb_db->traverse; t; t=t->next) {
+		if (t->client_reqid == d->reqid &&
+		    t->srvid == d->srvid) {
+			talloc_free(t);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+  this is called when a client disconnects during a traverse
+  we need to notify all the nodes taking part in the search that they
+  should kill their traverse children
+ */
+static int ctdb_traverse_start_destructor(struct traverse_start_state *state)
+{
+	struct ctdb_traverse_start r;
+	TDB_DATA data;
+
+	DEBUG(DEBUG_ERR,(__location__ " Traverse cancelled by client disconnect for database:0x%08x\n", state->db_id));
+	r.db_id = state->db_id;
+	r.reqid = state->reqid;
+	r.srvid = state->srvid;
+
+	data.dptr = (uint8_t *)&r;
+	data.dsize = sizeof(r);
+
+	ctdb_daemon_send_control(state->ctdb, CTDB_BROADCAST_CONNECTED, 0, 
+				 CTDB_CONTROL_TRAVERSE_KILL, 
+				 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
+	return 0;
+}
 
 /*
   callback which sends records as messages to the client
@@ -461,19 +525,27 @@ static void traverse_start_callback(void *p, TDB_DATA key, TDB_DATA data)
 	ctdb_dispatch_message(state->ctdb, state->srvid, cdata);
 	if (key.dsize == 0 && data.dsize == 0) {
 		/* end of traverse */
+		talloc_set_destructor(state, NULL);
 		talloc_free(state);
 	}
 }
 
+
 /*
   start a traverse_all - called as a control from a client
  */
 int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data, 
-				    TDB_DATA *outdata, uint32_t srcnode)
+				    TDB_DATA *outdata, uint32_t srcnode, uint32_t client_id)
 {
 	struct ctdb_traverse_start *d = (struct ctdb_traverse_start *)data.dptr;
 	struct traverse_start_state *state;
 	struct ctdb_db_context *ctdb_db;
+	struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
+
+	if (client == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " No client found\n"));
+		return -1;		
+	}
 
 	if (data.dsize != sizeof(*d)) {
 		DEBUG(DEBUG_ERR,("Bad record size in ctdb_control_traverse_start\n"));
@@ -485,7 +557,7 @@ int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
 		return -1;
 	}
 
-	state = talloc(ctdb_db, struct traverse_start_state);
+	state = talloc(client, struct traverse_start_state);
 	if (state == NULL) {
 		return -1;
 	}
@@ -493,6 +565,7 @@ int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
 	state->srcnode = srcnode;
 	state->reqid = d->reqid;
 	state->srvid = d->srvid;
+	state->db_id = d->db_id;
 	state->ctdb = ctdb;
 
 	state->h = ctdb_daemon_traverse_all(ctdb_db, traverse_start_callback, state);
@@ -501,5 +574,7 @@ int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
 		return -1;
 	}
 
+	talloc_set_destructor(state, ctdb_traverse_start_destructor);
+
 	return 0;
 }


-- 
CTDB repository


More information about the samba-cvs mailing list