[SCM] CTDB repository - branch master updated - ctdb-1.11-108-gbda24b7

Ronnie Sahlberg sahlberg at samba.org
Mon Nov 7 22:48:23 MST 2011


The branch, master has been updated
       via  bda24b7f313289404b68ce8b9177fbd6b6a05dd7 (commit)
       via  82d6946ad8b3348e8b9d3d971f24925ade02d1be (commit)
      from  7c02d242af552aa732f5c70ea4eeefbc8a8542e2 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit bda24b7f313289404b68ce8b9177fbd6b6a05dd7
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Nov 8 16:20:09 2011 +1100

    New version 1.12
    
    * Tue Nov 8 2011 : Version 1.12
     - Add new tunable : AllowClientDBAttach that can be used to stop
       client db access during maintenance operations
     - Updated logging for interfaces that are missing or dont exist but are
       configured to be used.
     - Add timeout argument to ctdb_cmdline_client
     - PDMA support
     - Initial support for 'readonly' delegations for ctdb databases
       This will when finished greatly improve performance for contended hot
       records that are used for just read-access.
     - New 'ctdb cattdb' command
     - Massive updates to tests and eventscripts
     - LCP2 ip allocation algorithm
     - Record Fetch collapse. Collapse multiple fetch-lock requests from cients
       to a single network fetch and defer other concurrent requests until the
       initial fetch completes, and then service the deferred calls locally.
       This will greatly improve performance for contended hot records
       where clients request write-locks.

commit 82d6946ad8b3348e8b9d3d971f24925ade02d1be
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Nov 8 06:55:46 2011 +1100

    Record Fetch Collapse: Collapse multiple fetch request into one single request.
    
    When multiple clients fetch the same record concurrently, send only one single
    fetch across the network and deferr all other fetches locally.
    This improves performance for hot records and reduces cpu load on ctdb.

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb_private.h     |    4 +
 packaging/RPM/ctdb.spec.in |   18 ++++
 server/ctdb_daemon.c       |  207 ++++++++++++++++++++++++++++++++++++++++++++
 server/ctdb_ltdb_server.c  |   11 +++
 4 files changed, 240 insertions(+), 0 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 7e59473..f1818b9 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -527,6 +527,10 @@ struct ctdb_db_context {
 				  struct ctdb_ltdb_header *header,
 				  TDB_DATA data);
 
+	/* used to track which records we are currently fetching
+	   so we can avoid sending duplicate fetch requests
+	*/
+	struct trbt_tree *deferred_fetch;
 };
 
 
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 4b2d98b..969a01b 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -146,6 +146,24 @@ development libraries for ctdb
 %{_libdir}/libctdb.a
 
 %changelog
+* Tue Nov 8 2011 : Version 1.12
+ - Add new tunable : AllowClientDBAttach that can be used to stop
+   client db access during maintenance operations
+ - Updated logging for interfaces that are missing or dont exist but are
+   configured to be used.
+ - Add timeout argument to ctdb_cmdline_client
+ - PDMA support
+ - Initial support for 'readonly' delegations for ctdb databases
+   This will when finished greatly improve performance for contended hot
+   records that are used for just read-access.
+ - New 'ctdb cattdb' command
+ - Massive updates to tests and eventscripts
+ - LCP2 ip allocation algorithm
+ - Record Fetch collapse. Collapse multiple fetch-lock requests from cients
+   to a single network fetch and defer other concurrent requests until the 
+   initial fetch completes, and then service the deferred calls locally.
+   This will greatly improve performance for contended hot records 
+   where clients request write-locks.
 * Thu Sep 1 2011 : Version 1.11
  - Major updates.
 * Tue May 25 2010 : Version 1.10
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 88d1210..69fb6fb 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -27,6 +27,7 @@
 #include "system/wait.h"
 #include "../include/ctdb_client.h"
 #include "../include/ctdb_private.h"
+#include "../common/rb_tree.h"
 #include <sys/socket.h>
 
 struct ctdb_client_pid_list {
@@ -359,6 +360,190 @@ static void daemon_incoming_packet_wrap(void *p, struct ctdb_req_header *hdr)
 	daemon_incoming_packet(client, hdr);	
 }
 
+struct ctdb_deferred_fetch_call {
+	struct ctdb_deferred_fetch_call *next, *prev;
+	struct ctdb_req_call *c;
+	struct ctdb_daemon_packet_wrap *w;
+};
+
+struct ctdb_deferred_fetch_queue {
+	struct ctdb_deferred_fetch_call *deferred_calls;
+};
+
+struct ctdb_deferred_requeue {
+	struct ctdb_deferred_fetch_call *dfc;
+	struct ctdb_client *client;
+};
+
+/* called from a timer event and starts reprocessing the deferred call.*/
+static void reprocess_deferred_call(struct event_context *ev, struct timed_event *te, 
+				       struct timeval t, void *private_data)
+{
+	struct ctdb_deferred_requeue *dfr = (struct ctdb_deferred_requeue *)private_data;
+	struct ctdb_client *client = dfr->client;
+
+	talloc_steal(client, dfr->dfc->c);
+	daemon_incoming_packet(client, (struct ctdb_req_header *)dfr->dfc->c);
+	talloc_free(dfr);
+}
+
+/* the referral context is destroyed either after a timeout or when the initial
+   fetch-lock has finished.
+   at this stage, immediately start reprocessing the queued up deferred
+   calls so they get reprocessed immediately (and since we are dmaster at
+   this stage, trigger the waiting smbd processes to pick up and aquire the
+   record right away.
+*/
+static int deferred_fetch_queue_destructor(struct ctdb_deferred_fetch_queue *dfq)
+{
+
+	/* need to reprocess the packets from the queue explicitely instead of
+	   just using a normal destructor since we want, need, to
+	   call the clients in the same oder as the requests queued up
+	*/
+	while (dfq->deferred_calls != NULL) {
+		struct ctdb_client *client;
+		struct ctdb_deferred_fetch_call *dfc = dfq->deferred_calls;
+		struct ctdb_deferred_requeue *dfr;
+
+		DLIST_REMOVE(dfq->deferred_calls, dfc);
+
+		client = ctdb_reqid_find(dfc->w->ctdb, dfc->w->client_id, struct ctdb_client);
+		if (client == NULL) {
+			DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n",
+				 dfc->w->client_id));
+			continue;
+		}
+
+		/* process it by pushing it back onto the eventloop */
+		dfr = talloc(client, struct ctdb_deferred_requeue);
+		if (dfr == NULL) {
+			DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch requeue structure\n"));
+			continue;
+		}
+
+		dfr->dfc    = talloc_steal(dfr, dfc);
+		dfr->client = client;
+
+		event_add_timed(dfc->w->ctdb->ev, client, timeval_zero(), reprocess_deferred_call, dfr);
+	}
+
+	return 0;
+}
+
+/* insert the new deferral context into the rb tree.
+   there should never be a pre-existing context here, but check for it
+   warn and destroy the previous context if there is already a deferral context
+   for this key.
+*/
+static void *insert_dfq_callback(void *parm, void *data)
+{
+        if (data) {
+		DEBUG(DEBUG_ERR,("Already have DFQ registered. Free old %p and create new %p\n", data, parm));
+                talloc_free(data);
+        }
+        return parm;
+}
+
+/* if the original fetch-lock did not complete within a reasonable time,
+   free the context and context for all deferred requests to cause them to be
+   re-inserted into the event system.
+*/
+static void dfq_timeout(struct event_context *ev, struct timed_event *te, 
+				  struct timeval t, void *private_data)
+{
+	talloc_free(private_data);
+}
+
+/* This function is used in the local daemon to register a KEY in a database
+   for being "fetched"
+   While the remote fetch is in-flight, any futher attempts to re-fetch the
+   same record will be deferred until the fetch completes.
+*/
+static int setup_deferred_fetch_locks(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
+{
+	uint32_t *k;
+	struct ctdb_deferred_fetch_queue *dfq;
+
+	k = talloc_zero_size(call, ((call->key.dsize + 3) & 0xfffffffc) + 4);
+	if (k == NULL) {
+		DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
+		return -1;
+	}
+
+	k[0] = (call->key.dsize + 3) / 4 + 1;
+	memcpy(&k[1], call->key.dptr, call->key.dsize);
+
+	dfq  = talloc(call, struct ctdb_deferred_fetch_queue);
+	if (dfq == NULL) {
+		DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch queue structure\n"));
+		talloc_free(k);
+		return -1;
+	}
+	dfq->deferred_calls = NULL;
+
+	trbt_insertarray32_callback(ctdb_db->deferred_fetch, k[0], &k[0], insert_dfq_callback, dfq);
+
+	talloc_set_destructor(dfq, deferred_fetch_queue_destructor);
+
+	/* if the fetch havent completed in 30 seconds, just tear it all down
+	   and let it try again as the events are reissued */
+	event_add_timed(ctdb_db->ctdb->ev, dfq, timeval_current_ofs(30, 0), dfq_timeout, dfq);
+
+	talloc_free(k);
+	return 0;
+}
+
+/* check if this is a duplicate request to a fetch already in-flight
+   if it is, make this call deferred to be reprocessed later when
+   the in-flight fetch completes.
+*/
+static int requeue_duplicate_fetch(struct ctdb_db_context *ctdb_db, struct ctdb_client *client, TDB_DATA key, struct ctdb_req_call *c)
+{
+	uint32_t *k;
+	struct ctdb_deferred_fetch_queue *dfq;
+	struct ctdb_deferred_fetch_call *dfc;
+
+	k = talloc_zero_size(c, ((key.dsize + 3) & 0xfffffffc) + 4);
+	if (k == NULL) {
+		DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n"));
+		return -1;
+	}
+
+	k[0] = (key.dsize + 3) / 4 + 1;
+	memcpy(&k[1], key.dptr, key.dsize);
+
+	dfq = trbt_lookuparray32(ctdb_db->deferred_fetch, k[0], &k[0]);
+	if (dfq == NULL) {
+		talloc_free(k);
+		return -1;
+	}
+
+
+	talloc_free(k);
+
+	dfc = talloc(dfq, struct ctdb_deferred_fetch_call);
+	if (dfc == NULL) {
+		DEBUG(DEBUG_ERR, ("Failed to allocate deferred fetch call structure\n"));
+		return -1;
+	}
+
+	dfc->w = talloc(dfc, struct ctdb_daemon_packet_wrap);
+	if (dfc->w == NULL) {
+		DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch daemon packet wrap structure\n"));
+		talloc_free(dfc);
+		return -1;
+	}
+
+	dfc->c = talloc_steal(dfc, c);
+	dfc->w->ctdb = ctdb_db->ctdb;
+	dfc->w->client_id = client->client_id;
+
+	DLIST_ADD_END(dfq->deferred_calls, dfc, NULL);
+
+	return 0;
+}
+
 
 /*
   this is called when the ctdb daemon received a ctdb request call
@@ -424,6 +609,20 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
 		return;
 	}
 
+	if (c->flags & CTDB_IMMEDIATE_MIGRATION) {
+		/* check if this fetch-lock request is a duplicate for a
+		   request we already have in flight. If so defer it until
+		   the first request completes.
+		 */
+		if (requeue_duplicate_fetch(ctdb_db, client, key, c) == 0) {
+			ret = ctdb_ltdb_unlock(ctdb_db, key);
+			if (ret != 0) {
+				DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret));
+			}
+			return;
+		}
+	}
+
 	/* Dont do READONLY if we dont have a tracking database */
 	if ((c->flags & CTDB_WANT_READONLY) && !ctdb_db->readonly) {
 		c->flags &= ~CTDB_WANT_READONLY;
@@ -513,6 +712,14 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
 		state = ctdb_call_local_send(ctdb_db, call, &header, &data);
 	} else {
 		state = ctdb_daemon_call_send_remote(ctdb_db, call, &header);
+		if (call->flags & CTDB_IMMEDIATE_MIGRATION) {
+			/* This request triggered a remote fetch-lock.
+			   set up a deferral for this key so any additional
+			   fetch-locks are deferred until the current one
+			   finishes.
+			 */
+			setup_deferred_fetch_locks(ctdb_db, call);
+		}
 	}
 
 	ret = ctdb_ltdb_unlock(ctdb_db, key);
diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c
index 745b3d2..713606d 100644
--- a/server/ctdb_ltdb_server.c
+++ b/server/ctdb_ltdb_server.c
@@ -962,6 +962,17 @@ again:
 		}
 	}
 
+	/* set up a rb tree we can use to track which records we have a 
+	   fetch-lock in-flight for so we can defer any additional calls
+	   for the same record.
+	 */
+	ctdb_db->deferred_fetch = trbt_create(ctdb_db, 0);
+	if (ctdb_db->deferred_fetch == NULL) {
+		DEBUG(DEBUG_ERR,("Failed to create deferred fetch rb tree for ctdb database\n"));
+		talloc_free(ctdb_db);
+		return -1;
+	}
+
 	DLIST_ADD(ctdb->db_list, ctdb_db);
 
 	/* setting this can help some high churn databases */


-- 
CTDB repository


More information about the samba-cvs mailing list