Rev 169: - fixed a problem with packets to ourselves. The packets were being in http://samba.org/~tridge/ctdb

tridge at samba.org tridge at samba.org
Fri Apr 20 07:58:38 GMT 2007


------------------------------------------------------------
revno: 169
revision-id: tridge at samba.org-20070420075837-29a520d0d03b3196
parent: tridge at samba.org-20070419215034-109370d61422c5cb
committer: Andrew Tridgell <tridge at samba.org>
branch nick: tridge
timestamp: Fri 2007-04-20 17:58:37 +1000
message:
  - fixed a problem with packets to ourselves. The packets were being
    processed immediately, but the input routines indirectly assumed
    they were being called as a new event (for example, a calling
    routine might queue the packet, then afterwards modify the ltdb
    record). The solution was to make self packets queue via a zero
    timeout.
  
  - fixed unlinking of the socket in a exit in the lockwait code. Needed
    an _exit instead of exit so atexit() doesn't trigger
  
  - print latency of lockwait delays
modified:
  common/ctdb.c                  ctdb.c-20061127094323-t50f58d65iaao5of-2
  common/ctdb_call.c             ctdb_call.c-20061128065342-to93h6eejj5kon81-1
  common/ctdb_lockwait.c         ctdb_lockwait.c-20070416214118-n1aeonljj3vpdd9q-1
=== modified file 'common/ctdb.c'
--- a/common/ctdb.c	2007-04-19 06:27:56 +0000
+++ b/common/ctdb.c	2007-04-20 07:58:37 +0000
@@ -331,6 +331,44 @@
 	DEBUG(3,("ctdb_connect_wait: got all %d nodes\n", expected));
 }
 
+struct queue_next {
+	struct ctdb_context *ctdb;
+	struct ctdb_req_header *hdr;
+};
+
+
+/*
+  trigered when a deferred packet is due
+ */
+static void queue_next_trigger(struct event_context *ev, struct timed_event *te, 
+			       struct timeval t, void *private_data)
+{
+	struct queue_next *q = talloc_get_type(private_data, struct queue_next);
+	ctdb_recv_pkt(q->ctdb, (uint8_t *)q->hdr, q->hdr->length);
+	talloc_free(q);
+}	
+
+/*
+  defer a packet, so it is processed on the next event loop
+  this is used for sending packets to ourselves
+ */
+static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
+{
+	struct queue_next *q;
+	q = talloc(ctdb, struct queue_next);
+	if (q == NULL) {
+		DEBUG(0,(__location__ " Failed to allocate deferred packet\n"));
+		return;
+	}
+	q->ctdb = ctdb;
+	q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
+	if (q->hdr == NULL) {
+		DEBUG(0,("Error copying deferred packet to self\n"));
+		return;
+	}
+	event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
+}
+
 /*
   queue a packet or die
 */
@@ -338,7 +376,9 @@
 {
 	struct ctdb_node *node;
 	node = ctdb->nodes[hdr->destnode];
-	if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
+	if (hdr->destnode == ctdb->vnn && !(ctdb->flags & CTDB_FLAG_SELF_CONNECT)) {
+		ctdb_defer_packet(ctdb, hdr);
+	} else if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
 		ctdb_fatal(ctdb, "Unable to queue packet\n");
 	}
 }

=== modified file 'common/ctdb_call.c'
--- a/common/ctdb_call.c	2007-04-19 07:43:27 +0000
+++ b/common/ctdb_call.c	2007-04-20 07:58:37 +0000
@@ -222,17 +222,12 @@
 	memcpy(&r->data[0], key->dptr, key->dsize);
 	memcpy(&r->data[key->dsize], data->dptr, data->dsize);
 
-	if (r->hdr.destnode == ctdb->vnn) {
-		/* we are the lmaster - don't send to ourselves */
-		ctdb_recv_pkt(ctdb, (uint8_t *)&r->hdr, r->hdr.length);
-		return;
-	} else {
-		ctdb_queue_packet(ctdb, &r->hdr);
-
-		/* update the ltdb to record the new dmaster */
-		header->dmaster = r->hdr.destnode;
-		ctdb_ltdb_store(ctdb_db, *key, header, *data);
-	}
+	/* XXX - probably not necessary when lmaster==dmaster
+	   update the ltdb to record the new dmaster */
+	header->dmaster = r->hdr.destnode;
+	ctdb_ltdb_store(ctdb_db, *key, header, *data);
+	
+	ctdb_queue_packet(ctdb, &r->hdr);
 
 	talloc_free(r);
 }
@@ -280,7 +275,8 @@
 	}
 	
 	/* its a protocol error if the sending node is not the current dmaster */
-	if (header.dmaster != hdr->srcnode) {
+	if (header.dmaster != hdr->srcnode && 
+	    hdr->srcnode != ctdb_lmaster(ctdb_db->ctdb, &key)) {
 		ctdb_fatal(ctdb, "dmaster request from non-master");
 		return;
 	}
@@ -313,13 +309,7 @@
 	r->datalen       = data.dsize;
 	memcpy(&r->data[0], data.dptr, data.dsize);
 
-	if (r->hdr.destnode == r->hdr.srcnode) {
-		/* inject the packet back into the input queue */
-		talloc_steal(ctdb, r);
-		ctdb_recv_pkt(ctdb, (uint8_t *)&r->hdr, r->hdr.length);
-	} else {
-		ctdb_queue_packet(ctdb, &r->hdr);
-	}
+	ctdb_queue_packet(ctdb, &r->hdr);
 
 	talloc_free(tmp_ctx);
 }

=== modified file 'common/ctdb_lockwait.c'
--- a/common/ctdb_lockwait.c	2007-04-17 01:26:59 +0000
+++ b/common/ctdb_lockwait.c	2007-04-20 07:58:37 +0000
@@ -34,6 +34,7 @@
 	pid_t child;
 	void *private_data;
 	void (*callback)(void *);
+	struct timeval t;
 };
 
 static void lockwait_handler(struct event_context *ev, struct fd_event *fde, 
@@ -46,6 +47,7 @@
 	pid_t child = h->child;
 	talloc_set_destructor(h, NULL);
 	close(h->fd[0]);
+	DEBUG(3,(__location__ " lockwait took %.6f seconds\n", timeval_elapsed(&h->t)));
 	talloc_free(h);	
 	callback(p);
 	waitpid(child, NULL, 0);
@@ -106,7 +108,7 @@
 		 * Do we need a tdb_reopen here?
 		 */
 		tdb_chainlock(ctdb_db->ltdb->tdb, key);
-		exit(0);
+		_exit(0);
 	}
 
 	close(result->fd[1]);
@@ -120,5 +122,7 @@
 		return NULL;
 	}
 
+	result->t = timeval_current();
+
 	return result;
 }



More information about the samba-cvs mailing list