[SCM] CTDB repository - branch master updated - ctdb-1.10-80-g3e9317a

Sun Jan 23 19:08:07 MST 2011

The branch, master has been updated
       via  3e9317a2e1f687b04bf51575d47fcd4faa6e6515 (commit)
       via  a81da1e67cd11734839c3fa7ae1ddaaf3459416d (commit)
       via  5e614e8cfd1e9a4b13035a0e400b7a60a745b510 (commit)
       via  1b3c5278aa1bf712606e2ec138e6be7b2e8a6ad1 (commit)
      from  52ee2b3ce822344d0f55ac040fe25f6ec5c0d7c2 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 3e9317a2e1f687b04bf51575d47fcd4faa6e6515
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 24 11:42:50 2011 +1100

    LockWait congestion.
    
    Add a dlist to track all active lockwait child processes.
    Everytime creating a new lockwait handle, check if there is already an
    active lockwait process for this database/key and if so,
    send the new request straight to the overflow queue.
    
    This means we will only have one active lockwaic child process for a certain key,
    even if there were thousands of fetch-lock requests for this key.
    
    When the lockwait processing finishes for the original request, the processing in d_overflow() will automagically process all remaining keys as well.
    
    Add back a --nosetsched argument to make it easier to run under gdb

commit a81da1e67cd11734839c3fa7ae1ddaaf3459416d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 24 09:43:45 2011 +1100

    Compile fix

commit 5e614e8cfd1e9a4b13035a0e400b7a60a745b510
Author: Rusty Russell <rusty at rustcorp.com.au>
Date:   Fri Jan 21 21:17:02 2011 +1030

    ctdb_lockwait: create overflow queue.
    
    Once we have more than 200 children waiting on a particular db, don't create
    any more.  Just put them on an overflow queue, and when a child gets a lock
    search that queue to see if others were after the same lock (they probably
    were).

commit 1b3c5278aa1bf712606e2ec138e6be7b2e8a6ad1
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 24 07:39:33 2011 +1100

    Add a new test tool that fetch locks a record and then blocks until it receives
    user input to unlock the record again.

-----------------------------------------------------------------------

Summary of changes:
 Makefile.in                                        |    6 +-
 include/ctdb_private.h                             |    3 +
 server/ctdb_lockwait.c                             |   94 +++++++++++++++++---
 server/ctdbd.c                                     |    8 ++-
 .../{ctdb_fetch_one.c => ctdb_fetch_lock_once.c}   |   60 ++++--------
 5 files changed, 115 insertions(+), 56 deletions(-)
 copy tests/src/{ctdb_fetch_one.c => ctdb_fetch_lock_once.c} (69%)


Changeset truncated at 500 lines:

diff --git a/Makefile.in b/Makefile.in
index e5ef81f..6bbf616 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -67,7 +67,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
 	$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
 
 TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \
-	tests/bin/ctdb_store \
+	tests/bin/ctdb_fetch_lock_once tests/bin/ctdb_store \
 	tests/bin/ctdb_randrec tests/bin/ctdb_persistent \
 	tests/bin/ctdb_traverse tests/bin/rb_test tests/bin/ctdb_transaction \
 	@INFINIBAND_BINS@
@@ -162,6 +162,10 @@ tests/bin/ctdb_fetch_one: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_one.o
 	@echo Linking $@
 	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_one.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
+tests/bin/ctdb_fetch_lock_once: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_lock_once.o 
+	@echo Linking $@
+	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_lock_once.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+
 tests/bin/ctdb_store: $(CTDB_CLIENT_OBJ) tests/src/ctdb_store.o 
 	@echo Linking $@
 	@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_store.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 31da0d5..c189a5f 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -507,6 +507,9 @@ struct ctdb_db_context {
 	bool transaction_active;
 	struct ctdb_vacuum_handle *vacuum_handle;
 	char *unhealthy_reason;
+	int pending_requests;
+	struct lockwait_handle *lockwait_active;
+	struct lockwait_handle *lockwait_overflow;
 };
 
 
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index 1d3a597..9860692 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -23,10 +23,12 @@
 #include "system/wait.h"
 #include "db_wrap.h"
 #include "lib/tdb/include/tdb.h"
+#include "lib/util/dlinklist.h"
 #include "../include/ctdb_private.h"
 
 
 struct lockwait_handle {
+	struct lockwait_handle *next, *prev;
 	struct ctdb_context *ctdb;
 	struct ctdb_db_context *ctdb_db;
 	struct fd_event *fde;
@@ -38,6 +40,45 @@ struct lockwait_handle {
 	struct timeval start_time;
 };
 
+/* If we managed to obtain a lock, find any overflow records which wanted the
+ * same one and do all the callbacks at once. */
+static void do_overflow(struct ctdb_db_context *ctdb_db,
+			TDB_DATA key)
+{
+	struct lockwait_handle *i, *next;
+	TALLOC_CTX *tmp_ctx = talloc_new(ctdb_db);
+
+	for (i = ctdb_db->lockwait_overflow; i; i = next) {
+		/* Careful: destructor removes it from list! */
+		next = i->next;
+		if (key.dsize == i->key.dsize
+		    && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) {
+			/* Callback might free them, so reparent. */
+			talloc_steal(tmp_ctx, i);
+			i->callback(i->private_data);
+		}
+	}
+
+	/* This will free them if callback didn't. */
+	talloc_free(tmp_ctx);
+
+	/* Remove one from the overflow queue if there is one. */
+	if (ctdb_db->lockwait_overflow) {
+		i = ctdb_db->lockwait_overflow;
+		ctdb_lockwait(ctdb_db, i->key, i->callback, i->private_data);
+		talloc_free(i);
+	}
+}
+
+static int lockwait_destructor(struct lockwait_handle *h)
+{
+	CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
+	kill(h->child, SIGKILL);
+	h->ctdb_db->pending_requests--;
+	DLIST_REMOVE(h->ctdb_db->lockwait_active, h);
+	return 0;
+}
+
 static void lockwait_handler(struct event_context *ev, struct fd_event *fde, 
 			     uint16_t flags, void *private_data)
 {
@@ -45,16 +86,14 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
 						     struct lockwait_handle);
 	void (*callback)(void *) = h->callback;
 	void *p = h->private_data;
-	pid_t child = h->child;
 	TDB_DATA key = h->key;
 	struct tdb_context *tdb = h->ctdb_db->ltdb->tdb;
 	TALLOC_CTX *tmp_ctx = talloc_new(ev);
 
 	key.dptr = talloc_memdup(tmp_ctx, key.dptr, key.dsize);
+	h->ctdb_db->pending_requests--;
 
-	talloc_set_destructor(h, NULL);
 	CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "lockwait", lockwait_latency, h->start_time);
-	CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
 
 	/* the handle needs to go away when the context is gone - when
 	   the handle goes away this implicitly closes the pipe, which
@@ -69,16 +108,19 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
 
 	tdb_chainlock_mark(tdb, key);
 	callback(p);
+	if (h->ctdb_db->lockwait_overflow) {
+		do_overflow(h->ctdb_db, key);
+	}
 	tdb_chainlock_unmark(tdb, key);
 
-	kill(child, SIGKILL);
 	talloc_free(tmp_ctx);
 }
 
-static int lockwait_destructor(struct lockwait_handle *h)
+
+static int overflow_lockwait_destructor(struct lockwait_handle *h)
 {
 	CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
-	kill(h->child, SIGKILL);
+	DLIST_REMOVE(h->ctdb_db->lockwait_overflow, h);
 	return 0;
 }
 
@@ -97,7 +139,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 				      void (*callback)(void *private_data),
 				      void *private_data)
 {
-	struct lockwait_handle *result;
+	struct lockwait_handle *result, *i;
 	int ret;
 	pid_t parent = getpid();
 
@@ -109,6 +151,33 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
+	result->callback = callback;
+	result->private_data = private_data;
+	result->ctdb = ctdb_db->ctdb;
+	result->ctdb_db = ctdb_db;
+	result->key = key;
+
+	/* If we already have a lockwait child for this request, then put this
+	   request on the overflow queue straight away
+	 */
+	for (i = ctdb_db->lockwait_active; i; i = i->next) {
+		if (key.dsize == i->key.dsize
+		    && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) {
+			DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
+			talloc_set_destructor(result, overflow_lockwait_destructor);
+			return result;
+		}
+	}
+
+	/* Don't fire off too many children at once! */
+	if (ctdb_db->pending_requests > 200) {
+		DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
+		talloc_set_destructor(result, overflow_lockwait_destructor);
+		DEBUG(DEBUG_DEBUG, (__location__ " Created overflow for %s\n",
+				    ctdb_db->db_name));
+		return result;
+	}
+
 	ret = pipe(result->fd);
 
 	if (ret != 0) {
@@ -127,12 +196,6 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
-	result->callback = callback;
-	result->private_data = private_data;
-	result->ctdb = ctdb_db->ctdb;
-	result->ctdb_db = ctdb_db;
-	result->key = key;
-
 	if (result->child == 0) {
 		char c = 0;
 		close(result->fd[0]);
@@ -149,8 +212,12 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 	close(result->fd[1]);
 	set_close_on_exec(result->fd[0]);
 
+	/* This is an active lockwait child process */
+	DLIST_ADD_END(ctdb_db->lockwait_active, result, NULL);
+
 	DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child lockwait process\n", result->fd[0]));
 
+	ctdb_db->pending_requests++;
 	talloc_set_destructor(result, lockwait_destructor);
 
 	result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
@@ -164,6 +231,5 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 	tevent_fd_set_auto_close(result->fde);
 
 	result->start_time = timeval_current();
-
 	return result;
 }
diff --git a/server/ctdbd.c b/server/ctdbd.c
index bddd658..9eaba1d 100644
--- a/server/ctdbd.c
+++ b/server/ctdbd.c
@@ -43,6 +43,7 @@ static struct {
 	const char *single_public_ip;
 	const char *node_ip;
 	int         valgrinding;
+	int         nosetsched;
 	int         use_syslog;
 	int         start_as_disabled;
 	int         start_as_stopped;
@@ -133,6 +134,7 @@ int main(int argc, const char *argv[])
 		{ "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
 		{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
 		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
+		{ "nosetsched", 0, POPT_ARG_NONE, &options.nosetsched, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
 		{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
 		{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
 		{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@@ -315,7 +317,11 @@ int main(int argc, const char *argv[])
 	}
 
 	ctdb->valgrinding = options.valgrinding;
-	ctdb->do_setsched = !ctdb->valgrinding;
+	if (options.valgrinding || options.nosetsched) {
+		ctdb->do_setsched = 0;
+	} else {
+		ctdb->do_setsched = 1;
+	}
 
 	if (options.max_persistent_check_errors < 0) {
 		ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
diff --git a/tests/src/ctdb_fetch_one.c b/tests/src/ctdb_fetch_lock_once.c
similarity index 69%
copy from tests/src/ctdb_fetch_one.c
copy to tests/src/ctdb_fetch_lock_once.c
index 15be3ca..ff131b8 100644
--- a/tests/src/ctdb_fetch_one.c
+++ b/tests/src/ctdb_fetch_lock_once.c
@@ -1,6 +1,6 @@
 /* 
-   simple ctdb benchmark
-   This test just fetch_locks a record and releases it in a loop.
+   simple ctdb test tool
+   This test just fetch_locks a record and releases it once.
 
    Copyright (C) Ronnie Sahlberg 2009
 
@@ -24,55 +24,39 @@
 #include "popt.h"
 #include "cmdline.h"
 
-#include <sys/time.h>
-#include <time.h>
-
-static int timelimit = 10;
-static int lock_count = 0;
-
 static struct ctdb_db_context *ctdb_db;
 
 #define TESTKEY "testkey"
 
 
-static void alarm_handler(int sig)
-{
-	printf("Locks:%d\n", lock_count);
-	lock_count=0;
-
-	timelimit--;
-	if (timelimit <= 0) {
-		exit(0);
-	}
-	alarm(1);
-}
-
 /*
-	Just try locking/unlocking the same record over and over
+	Just try locking/unlocking a single record once
 */
-static void bench_fetch_one_loop(struct ctdb_context *ctdb, struct event_context *ev)
+static void fetch_lock_once(struct ctdb_context *ctdb, struct event_context *ev)
 {
+	TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
 	TDB_DATA key, data;
+	struct ctdb_record_handle *h;
 
 	key.dptr = discard_const(TESTKEY);
 	key.dsize = strlen(TESTKEY);
 
+	printf("Trying to fetch lock the record ...\n");
 
-	while (1) {
-		TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
-		struct ctdb_record_handle *h;
-
-		h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
-		if (h == NULL) {
-			printf("Failed to fetch record '%s' on node %d\n", 
-		       		(const char *)key.dptr, ctdb_get_pnn(ctdb));
-			talloc_free(tmp_ctx);
-			continue;
-		}
-
+	h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
+	if (h == NULL) {
+		printf("Failed to fetch record '%s' on node %d\n", 
+	       		(const char *)key.dptr, ctdb_get_pnn(ctdb));
 		talloc_free(tmp_ctx);
-		lock_count++;
+		exit(10);
 	}
+
+	printf("Record fetchlocked.\n");
+	printf("Press enter to release the record ...\n");
+	(void)getchar();
+
+	talloc_free(tmp_ctx);
+	printf("Record released.\n");
 }
 
 /*
@@ -85,7 +69,6 @@ int main(int argc, const char *argv[])
 	struct poptOption popt_options[] = {
 		POPT_AUTOHELP
 		POPT_CTDB_CMDLINE
-		{ "timelimit", 't', POPT_ARG_INT, &timelimit, 0, "timelimit", "integer" },
 		POPT_TABLEEND
 	};
 	int opt;
@@ -131,10 +114,7 @@ int main(int argc, const char *argv[])
 		event_loop_once(ev);
 	}
 
-	signal(SIGALRM, alarm_handler);
-	alarm(1);
-
-	bench_fetch_one_loop(ctdb, ev);
+	fetch_lock_once(ctdb, ev);
 
 	return 0;
 }


-- 
CTDB repository