[SCM] CTDB repository - branch master updated - ctdb-1.10-80-g3e9317a
Ronnie Sahlberg
sahlberg at samba.org
Sun Jan 23 19:08:07 MST 2011
The branch, master has been updated
via 3e9317a2e1f687b04bf51575d47fcd4faa6e6515 (commit)
via a81da1e67cd11734839c3fa7ae1ddaaf3459416d (commit)
via 5e614e8cfd1e9a4b13035a0e400b7a60a745b510 (commit)
via 1b3c5278aa1bf712606e2ec138e6be7b2e8a6ad1 (commit)
from 52ee2b3ce822344d0f55ac040fe25f6ec5c0d7c2 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 3e9317a2e1f687b04bf51575d47fcd4faa6e6515
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Mon Jan 24 11:42:50 2011 +1100
LockWait congestion.
Add a dlist to track all active lockwait child processes.
Everytime creating a new lockwait handle, check if there is already an
active lockwait process for this database/key and if so,
send the new request straight to the overflow queue.
This means we will only have one active lockwaic child process for a certain key,
even if there were thousands of fetch-lock requests for this key.
When the lockwait processing finishes for the original request, the processing in d_overflow() will automagically process all remaining keys as well.
Add back a --nosetsched argument to make it easier to run under gdb
commit a81da1e67cd11734839c3fa7ae1ddaaf3459416d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Mon Jan 24 09:43:45 2011 +1100
Compile fix
commit 5e614e8cfd1e9a4b13035a0e400b7a60a745b510
Author: Rusty Russell <rusty at rustcorp.com.au>
Date: Fri Jan 21 21:17:02 2011 +1030
ctdb_lockwait: create overflow queue.
Once we have more than 200 children waiting on a particular db, don't create
any more. Just put them on an overflow queue, and when a child gets a lock
search that queue to see if others were after the same lock (they probably
were).
commit 1b3c5278aa1bf712606e2ec138e6be7b2e8a6ad1
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Mon Jan 24 07:39:33 2011 +1100
Add a new test tool that fetch locks a record and then blocks until it receives
user input to unlock the record again.
-----------------------------------------------------------------------
Summary of changes:
Makefile.in | 6 +-
include/ctdb_private.h | 3 +
server/ctdb_lockwait.c | 94 +++++++++++++++++---
server/ctdbd.c | 8 ++-
.../{ctdb_fetch_one.c => ctdb_fetch_lock_once.c} | 60 ++++--------
5 files changed, 115 insertions(+), 56 deletions(-)
copy tests/src/{ctdb_fetch_one.c => ctdb_fetch_lock_once.c} (69%)
Changeset truncated at 500 lines:
diff --git a/Makefile.in b/Makefile.in
index e5ef81f..6bbf616 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -67,7 +67,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \
- tests/bin/ctdb_store \
+ tests/bin/ctdb_fetch_lock_once tests/bin/ctdb_store \
tests/bin/ctdb_randrec tests/bin/ctdb_persistent \
tests/bin/ctdb_traverse tests/bin/rb_test tests/bin/ctdb_transaction \
@INFINIBAND_BINS@
@@ -162,6 +162,10 @@ tests/bin/ctdb_fetch_one: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_one.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_one.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+tests/bin/ctdb_fetch_lock_once: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_lock_once.o
+ @echo Linking $@
+ @$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_lock_once.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+
tests/bin/ctdb_store: $(CTDB_CLIENT_OBJ) tests/src/ctdb_store.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_store.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 31da0d5..c189a5f 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -507,6 +507,9 @@ struct ctdb_db_context {
bool transaction_active;
struct ctdb_vacuum_handle *vacuum_handle;
char *unhealthy_reason;
+ int pending_requests;
+ struct lockwait_handle *lockwait_active;
+ struct lockwait_handle *lockwait_overflow;
};
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index 1d3a597..9860692 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -23,10 +23,12 @@
#include "system/wait.h"
#include "db_wrap.h"
#include "lib/tdb/include/tdb.h"
+#include "lib/util/dlinklist.h"
#include "../include/ctdb_private.h"
struct lockwait_handle {
+ struct lockwait_handle *next, *prev;
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct fd_event *fde;
@@ -38,6 +40,45 @@ struct lockwait_handle {
struct timeval start_time;
};
+/* If we managed to obtain a lock, find any overflow records which wanted the
+ * same one and do all the callbacks at once. */
+static void do_overflow(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key)
+{
+ struct lockwait_handle *i, *next;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb_db);
+
+ for (i = ctdb_db->lockwait_overflow; i; i = next) {
+ /* Careful: destructor removes it from list! */
+ next = i->next;
+ if (key.dsize == i->key.dsize
+ && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) {
+ /* Callback might free them, so reparent. */
+ talloc_steal(tmp_ctx, i);
+ i->callback(i->private_data);
+ }
+ }
+
+ /* This will free them if callback didn't. */
+ talloc_free(tmp_ctx);
+
+ /* Remove one from the overflow queue if there is one. */
+ if (ctdb_db->lockwait_overflow) {
+ i = ctdb_db->lockwait_overflow;
+ ctdb_lockwait(ctdb_db, i->key, i->callback, i->private_data);
+ talloc_free(i);
+ }
+}
+
+static int lockwait_destructor(struct lockwait_handle *h)
+{
+ CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
+ kill(h->child, SIGKILL);
+ h->ctdb_db->pending_requests--;
+ DLIST_REMOVE(h->ctdb_db->lockwait_active, h);
+ return 0;
+}
+
static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
@@ -45,16 +86,14 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
struct lockwait_handle);
void (*callback)(void *) = h->callback;
void *p = h->private_data;
- pid_t child = h->child;
TDB_DATA key = h->key;
struct tdb_context *tdb = h->ctdb_db->ltdb->tdb;
TALLOC_CTX *tmp_ctx = talloc_new(ev);
key.dptr = talloc_memdup(tmp_ctx, key.dptr, key.dsize);
+ h->ctdb_db->pending_requests--;
- talloc_set_destructor(h, NULL);
CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "lockwait", lockwait_latency, h->start_time);
- CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
/* the handle needs to go away when the context is gone - when
the handle goes away this implicitly closes the pipe, which
@@ -69,16 +108,19 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
tdb_chainlock_mark(tdb, key);
callback(p);
+ if (h->ctdb_db->lockwait_overflow) {
+ do_overflow(h->ctdb_db, key);
+ }
tdb_chainlock_unmark(tdb, key);
- kill(child, SIGKILL);
talloc_free(tmp_ctx);
}
-static int lockwait_destructor(struct lockwait_handle *h)
+
+static int overflow_lockwait_destructor(struct lockwait_handle *h)
{
CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
- kill(h->child, SIGKILL);
+ DLIST_REMOVE(h->ctdb_db->lockwait_overflow, h);
return 0;
}
@@ -97,7 +139,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
void (*callback)(void *private_data),
void *private_data)
{
- struct lockwait_handle *result;
+ struct lockwait_handle *result, *i;
int ret;
pid_t parent = getpid();
@@ -109,6 +151,33 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
return NULL;
}
+ result->callback = callback;
+ result->private_data = private_data;
+ result->ctdb = ctdb_db->ctdb;
+ result->ctdb_db = ctdb_db;
+ result->key = key;
+
+ /* If we already have a lockwait child for this request, then put this
+ request on the overflow queue straight away
+ */
+ for (i = ctdb_db->lockwait_active; i; i = i->next) {
+ if (key.dsize == i->key.dsize
+ && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) {
+ DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
+ talloc_set_destructor(result, overflow_lockwait_destructor);
+ return result;
+ }
+ }
+
+ /* Don't fire off too many children at once! */
+ if (ctdb_db->pending_requests > 200) {
+ DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
+ talloc_set_destructor(result, overflow_lockwait_destructor);
+ DEBUG(DEBUG_DEBUG, (__location__ " Created overflow for %s\n",
+ ctdb_db->db_name));
+ return result;
+ }
+
ret = pipe(result->fd);
if (ret != 0) {
@@ -127,12 +196,6 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
return NULL;
}
- result->callback = callback;
- result->private_data = private_data;
- result->ctdb = ctdb_db->ctdb;
- result->ctdb_db = ctdb_db;
- result->key = key;
-
if (result->child == 0) {
char c = 0;
close(result->fd[0]);
@@ -149,8 +212,12 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
close(result->fd[1]);
set_close_on_exec(result->fd[0]);
+ /* This is an active lockwait child process */
+ DLIST_ADD_END(ctdb_db->lockwait_active, result, NULL);
+
DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child lockwait process\n", result->fd[0]));
+ ctdb_db->pending_requests++;
talloc_set_destructor(result, lockwait_destructor);
result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
@@ -164,6 +231,5 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
tevent_fd_set_auto_close(result->fde);
result->start_time = timeval_current();
-
return result;
}
diff --git a/server/ctdbd.c b/server/ctdbd.c
index bddd658..9eaba1d 100644
--- a/server/ctdbd.c
+++ b/server/ctdbd.c
@@ -43,6 +43,7 @@ static struct {
const char *single_public_ip;
const char *node_ip;
int valgrinding;
+ int nosetsched;
int use_syslog;
int start_as_disabled;
int start_as_stopped;
@@ -133,6 +134,7 @@ int main(int argc, const char *argv[])
{ "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
+ { "nosetsched", 0, POPT_ARG_NONE, &options.nosetsched, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@@ -315,7 +317,11 @@ int main(int argc, const char *argv[])
}
ctdb->valgrinding = options.valgrinding;
- ctdb->do_setsched = !ctdb->valgrinding;
+ if (options.valgrinding || options.nosetsched) {
+ ctdb->do_setsched = 0;
+ } else {
+ ctdb->do_setsched = 1;
+ }
if (options.max_persistent_check_errors < 0) {
ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
diff --git a/tests/src/ctdb_fetch_one.c b/tests/src/ctdb_fetch_lock_once.c
similarity index 69%
copy from tests/src/ctdb_fetch_one.c
copy to tests/src/ctdb_fetch_lock_once.c
index 15be3ca..ff131b8 100644
--- a/tests/src/ctdb_fetch_one.c
+++ b/tests/src/ctdb_fetch_lock_once.c
@@ -1,6 +1,6 @@
/*
- simple ctdb benchmark
- This test just fetch_locks a record and releases it in a loop.
+ simple ctdb test tool
+ This test just fetch_locks a record and releases it once.
Copyright (C) Ronnie Sahlberg 2009
@@ -24,55 +24,39 @@
#include "popt.h"
#include "cmdline.h"
-#include <sys/time.h>
-#include <time.h>
-
-static int timelimit = 10;
-static int lock_count = 0;
-
static struct ctdb_db_context *ctdb_db;
#define TESTKEY "testkey"
-static void alarm_handler(int sig)
-{
- printf("Locks:%d\n", lock_count);
- lock_count=0;
-
- timelimit--;
- if (timelimit <= 0) {
- exit(0);
- }
- alarm(1);
-}
-
/*
- Just try locking/unlocking the same record over and over
+ Just try locking/unlocking a single record once
*/
-static void bench_fetch_one_loop(struct ctdb_context *ctdb, struct event_context *ev)
+static void fetch_lock_once(struct ctdb_context *ctdb, struct event_context *ev)
{
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
TDB_DATA key, data;
+ struct ctdb_record_handle *h;
key.dptr = discard_const(TESTKEY);
key.dsize = strlen(TESTKEY);
+ printf("Trying to fetch lock the record ...\n");
- while (1) {
- TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- struct ctdb_record_handle *h;
-
- h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
- if (h == NULL) {
- printf("Failed to fetch record '%s' on node %d\n",
- (const char *)key.dptr, ctdb_get_pnn(ctdb));
- talloc_free(tmp_ctx);
- continue;
- }
-
+ h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
+ if (h == NULL) {
+ printf("Failed to fetch record '%s' on node %d\n",
+ (const char *)key.dptr, ctdb_get_pnn(ctdb));
talloc_free(tmp_ctx);
- lock_count++;
+ exit(10);
}
+
+ printf("Record fetchlocked.\n");
+ printf("Press enter to release the record ...\n");
+ (void)getchar();
+
+ talloc_free(tmp_ctx);
+ printf("Record released.\n");
}
/*
@@ -85,7 +69,6 @@ int main(int argc, const char *argv[])
struct poptOption popt_options[] = {
POPT_AUTOHELP
POPT_CTDB_CMDLINE
- { "timelimit", 't', POPT_ARG_INT, &timelimit, 0, "timelimit", "integer" },
POPT_TABLEEND
};
int opt;
@@ -131,10 +114,7 @@ int main(int argc, const char *argv[])
event_loop_once(ev);
}
- signal(SIGALRM, alarm_handler);
- alarm(1);
-
- bench_fetch_one_loop(ctdb, ev);
+ fetch_lock_once(ctdb, ev);
return 0;
}
--
CTDB repository
More information about the samba-cvs
mailing list