[SCM] CTDB repository - branch 1.2 updated - ctdb-1.9.1-305-g8a143a9
Ronnie Sahlberg
sahlberg at samba.org
Sun Jan 23 19:08:06 MST 2011
The branch, 1.2 has been updated
via 8a143a97a313a2e50fb409f6382c759dbd14cdcd (commit)
via 68064f7431d1f25e83dc835adeac0b3322fc819e (commit)
via 8f26219bf65c1ed564f98696833c1aed6d8de988 (commit)
via 04944a2d296f8ae2359f6bda493d7eff1da3944c (commit)
via 310fcd19b0b6621a42476b489fc5fb8432ccc764 (commit)
via 8743509378f9ff014251288740068879cc680c3d (commit)
from 3bdb29692460693dfd0f5ffeea0a28eb3eb419fb (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.2
- Log -----------------------------------------------------------------
commit 8a143a97a313a2e50fb409f6382c759dbd14cdcd
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Mon Jan 24 11:42:50 2011 +1100
LockWait congestion.
Add a dlist to track all active lockwait child processes.
Everytime creating a new lockwait handle, check if there is already an
active lockwait process for this database/key and if so,
send the new request straight to the overflow queue.
This means we will only have one active lockwaic child process for a certain key,
even if there were thousands of fetch-lock requests for this key.
When the lockwait processing finishes for the original request, the processing in d_overflow() will automagically process all remaining keys as well.
Add back a --nosetsched argument to make it easier to run under gdb
commit 68064f7431d1f25e83dc835adeac0b3322fc819e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Mon Jan 24 09:43:45 2011 +1100
Compile fix
commit 8f26219bf65c1ed564f98696833c1aed6d8de988
Author: Rusty Russell <rusty at rustcorp.com.au>
Date: Fri Jan 21 21:17:02 2011 +1030
ctdb_lockwait: create overflow queue.
Once we have more than 200 children waiting on a particular db, don't create
any more. Just put them on an overflow queue, and when a child gets a lock
search that queue to see if others were after the same lock (they probably
were).
commit 04944a2d296f8ae2359f6bda493d7eff1da3944c
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Mon Jan 24 07:39:33 2011 +1100
Add a new test tool that fetch locks a record and then blocks until it receives
user input to unlock the record again.
commit 310fcd19b0b6621a42476b489fc5fb8432ccc764
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Jan 21 10:56:56 2011 +1100
60.nfs
Dont update the statd settings that often.
When we have very many nodes and very many ips, this would generate
a lot of unnessecary load on the system
commit 8743509378f9ff014251288740068879cc680c3d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed Jan 19 08:00:36 2011 +1100
TDB : Fix for a deadlock with transaction lock and lockall/lockallmark
causing ctdbd hangs
-----------------------------------------------------------------------
Summary of changes:
Makefile.in | 6 +-
config/events.d/60.nfs | 4 +-
include/ctdb_private.h | 3 +
packaging/RPM/ctdb.spec.in | 5 +-
server/ctdb_lockwait.c | 94 +++++++++++++++++---
server/ctdbd.c | 8 ++-
.../{ctdb_fetch_one.c => ctdb_fetch_lock_once.c} | 60 ++++--------
7 files changed, 121 insertions(+), 59 deletions(-)
copy tests/src/{ctdb_fetch_one.c => ctdb_fetch_lock_once.c} (69%)
Changeset truncated at 500 lines:
diff --git a/Makefile.in b/Makefile.in
index e5ef81f..6bbf616 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -67,7 +67,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \
- tests/bin/ctdb_store \
+ tests/bin/ctdb_fetch_lock_once tests/bin/ctdb_store \
tests/bin/ctdb_randrec tests/bin/ctdb_persistent \
tests/bin/ctdb_traverse tests/bin/rb_test tests/bin/ctdb_transaction \
@INFINIBAND_BINS@
@@ -162,6 +162,10 @@ tests/bin/ctdb_fetch_one: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_one.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_one.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+tests/bin/ctdb_fetch_lock_once: $(CTDB_CLIENT_OBJ) tests/src/ctdb_fetch_lock_once.o
+ @echo Linking $@
+ @$(CC) $(CFLAGS) -o $@ tests/src/ctdb_fetch_lock_once.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+
tests/bin/ctdb_store: $(CTDB_CLIENT_OBJ) tests/src/ctdb_store.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_store.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
diff --git a/config/events.d/60.nfs b/config/events.d/60.nfs
index 79a071b..0cea531 100755
--- a/config/events.d/60.nfs
+++ b/config/events.d/60.nfs
@@ -179,11 +179,11 @@ case "$1" in
$cmd &
}
- # once every 60 seconds, update the statd state database for which
+ # once every 600 seconds, update the statd state database for which
# clients need notifications
LAST_UPDATE=`stat --printf="%Y" $CTDB_VARDIR/state/statd/update-trigger 2>/dev/null`
CURRENT_TIME=`date +"%s"`
- [ $CURRENT_TIME -ge $(($LAST_UPDATE + 60)) ] && {
+ [ $CURRENT_TIME -ge $(($LAST_UPDATE + 600)) ] && {
mkdir -p $CTDB_VARDIR/state/statd
touch $CTDB_VARDIR/state/statd/update-trigger
$CTDB_BASE/statd-callout updatelocal &
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index cb2b066..4dcf9a5 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -506,6 +506,9 @@ struct ctdb_db_context {
bool transaction_active;
struct ctdb_vacuum_handle *vacuum_handle;
char *unhealthy_reason;
+ int pending_requests;
+ struct lockwait_handle *lockwait_active;
+ struct lockwait_handle *lockwait_overflow;
};
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 4fe9ae0..ba26b15 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -3,7 +3,7 @@ Name: ctdb
Summary: Clustered TDB
Vendor: Samba Team
Packager: Samba Team <samba at samba.org>
-Version: 1.2.17
+Version: 1.2.18
Release: 1GITHASH
Epoch: 0
License: GNU GPL version 3
@@ -143,6 +143,9 @@ development libraries for ctdb
%{_libdir}/libctdb.a
%changelog
+* Wed Jan 18 2011 : Version 1.2.18
+ - Fix for a deadlock in the TDB library, causing "Time Jumped" error messages
+ and hung CTDBD process.
* Mon Jan 17 2011 : Version 1.2.17
- Updates to the event logging from Christian.
- Libctdb : traverse support
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index 1d3a597..9860692 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -23,10 +23,12 @@
#include "system/wait.h"
#include "db_wrap.h"
#include "lib/tdb/include/tdb.h"
+#include "lib/util/dlinklist.h"
#include "../include/ctdb_private.h"
struct lockwait_handle {
+ struct lockwait_handle *next, *prev;
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct fd_event *fde;
@@ -38,6 +40,45 @@ struct lockwait_handle {
struct timeval start_time;
};
+/* If we managed to obtain a lock, find any overflow records which wanted the
+ * same one and do all the callbacks at once. */
+static void do_overflow(struct ctdb_db_context *ctdb_db,
+ TDB_DATA key)
+{
+ struct lockwait_handle *i, *next;
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb_db);
+
+ for (i = ctdb_db->lockwait_overflow; i; i = next) {
+ /* Careful: destructor removes it from list! */
+ next = i->next;
+ if (key.dsize == i->key.dsize
+ && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) {
+ /* Callback might free them, so reparent. */
+ talloc_steal(tmp_ctx, i);
+ i->callback(i->private_data);
+ }
+ }
+
+ /* This will free them if callback didn't. */
+ talloc_free(tmp_ctx);
+
+ /* Remove one from the overflow queue if there is one. */
+ if (ctdb_db->lockwait_overflow) {
+ i = ctdb_db->lockwait_overflow;
+ ctdb_lockwait(ctdb_db, i->key, i->callback, i->private_data);
+ talloc_free(i);
+ }
+}
+
+static int lockwait_destructor(struct lockwait_handle *h)
+{
+ CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
+ kill(h->child, SIGKILL);
+ h->ctdb_db->pending_requests--;
+ DLIST_REMOVE(h->ctdb_db->lockwait_active, h);
+ return 0;
+}
+
static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
@@ -45,16 +86,14 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
struct lockwait_handle);
void (*callback)(void *) = h->callback;
void *p = h->private_data;
- pid_t child = h->child;
TDB_DATA key = h->key;
struct tdb_context *tdb = h->ctdb_db->ltdb->tdb;
TALLOC_CTX *tmp_ctx = talloc_new(ev);
key.dptr = talloc_memdup(tmp_ctx, key.dptr, key.dsize);
+ h->ctdb_db->pending_requests--;
- talloc_set_destructor(h, NULL);
CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "lockwait", lockwait_latency, h->start_time);
- CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
/* the handle needs to go away when the context is gone - when
the handle goes away this implicitly closes the pipe, which
@@ -69,16 +108,19 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
tdb_chainlock_mark(tdb, key);
callback(p);
+ if (h->ctdb_db->lockwait_overflow) {
+ do_overflow(h->ctdb_db, key);
+ }
tdb_chainlock_unmark(tdb, key);
- kill(child, SIGKILL);
talloc_free(tmp_ctx);
}
-static int lockwait_destructor(struct lockwait_handle *h)
+
+static int overflow_lockwait_destructor(struct lockwait_handle *h)
{
CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
- kill(h->child, SIGKILL);
+ DLIST_REMOVE(h->ctdb_db->lockwait_overflow, h);
return 0;
}
@@ -97,7 +139,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
void (*callback)(void *private_data),
void *private_data)
{
- struct lockwait_handle *result;
+ struct lockwait_handle *result, *i;
int ret;
pid_t parent = getpid();
@@ -109,6 +151,33 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
return NULL;
}
+ result->callback = callback;
+ result->private_data = private_data;
+ result->ctdb = ctdb_db->ctdb;
+ result->ctdb_db = ctdb_db;
+ result->key = key;
+
+ /* If we already have a lockwait child for this request, then put this
+ request on the overflow queue straight away
+ */
+ for (i = ctdb_db->lockwait_active; i; i = i->next) {
+ if (key.dsize == i->key.dsize
+ && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) {
+ DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
+ talloc_set_destructor(result, overflow_lockwait_destructor);
+ return result;
+ }
+ }
+
+ /* Don't fire off too many children at once! */
+ if (ctdb_db->pending_requests > 200) {
+ DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
+ talloc_set_destructor(result, overflow_lockwait_destructor);
+ DEBUG(DEBUG_DEBUG, (__location__ " Created overflow for %s\n",
+ ctdb_db->db_name));
+ return result;
+ }
+
ret = pipe(result->fd);
if (ret != 0) {
@@ -127,12 +196,6 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
return NULL;
}
- result->callback = callback;
- result->private_data = private_data;
- result->ctdb = ctdb_db->ctdb;
- result->ctdb_db = ctdb_db;
- result->key = key;
-
if (result->child == 0) {
char c = 0;
close(result->fd[0]);
@@ -149,8 +212,12 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
close(result->fd[1]);
set_close_on_exec(result->fd[0]);
+ /* This is an active lockwait child process */
+ DLIST_ADD_END(ctdb_db->lockwait_active, result, NULL);
+
DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child lockwait process\n", result->fd[0]));
+ ctdb_db->pending_requests++;
talloc_set_destructor(result, lockwait_destructor);
result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
@@ -164,6 +231,5 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
tevent_fd_set_auto_close(result->fde);
result->start_time = timeval_current();
-
return result;
}
diff --git a/server/ctdbd.c b/server/ctdbd.c
index bddd658..9eaba1d 100644
--- a/server/ctdbd.c
+++ b/server/ctdbd.c
@@ -43,6 +43,7 @@ static struct {
const char *single_public_ip;
const char *node_ip;
int valgrinding;
+ int nosetsched;
int use_syslog;
int start_as_disabled;
int start_as_stopped;
@@ -133,6 +134,7 @@ int main(int argc, const char *argv[])
{ "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
+ { "nosetsched", 0, POPT_ARG_NONE, &options.nosetsched, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@@ -315,7 +317,11 @@ int main(int argc, const char *argv[])
}
ctdb->valgrinding = options.valgrinding;
- ctdb->do_setsched = !ctdb->valgrinding;
+ if (options.valgrinding || options.nosetsched) {
+ ctdb->do_setsched = 0;
+ } else {
+ ctdb->do_setsched = 1;
+ }
if (options.max_persistent_check_errors < 0) {
ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
diff --git a/tests/src/ctdb_fetch_one.c b/tests/src/ctdb_fetch_lock_once.c
similarity index 69%
copy from tests/src/ctdb_fetch_one.c
copy to tests/src/ctdb_fetch_lock_once.c
index 15be3ca..ff131b8 100644
--- a/tests/src/ctdb_fetch_one.c
+++ b/tests/src/ctdb_fetch_lock_once.c
@@ -1,6 +1,6 @@
/*
- simple ctdb benchmark
- This test just fetch_locks a record and releases it in a loop.
+ simple ctdb test tool
+ This test just fetch_locks a record and releases it once.
Copyright (C) Ronnie Sahlberg 2009
@@ -24,55 +24,39 @@
#include "popt.h"
#include "cmdline.h"
-#include <sys/time.h>
-#include <time.h>
-
-static int timelimit = 10;
-static int lock_count = 0;
-
static struct ctdb_db_context *ctdb_db;
#define TESTKEY "testkey"
-static void alarm_handler(int sig)
-{
- printf("Locks:%d\n", lock_count);
- lock_count=0;
-
- timelimit--;
- if (timelimit <= 0) {
- exit(0);
- }
- alarm(1);
-}
-
/*
- Just try locking/unlocking the same record over and over
+ Just try locking/unlocking a single record once
*/
-static void bench_fetch_one_loop(struct ctdb_context *ctdb, struct event_context *ev)
+static void fetch_lock_once(struct ctdb_context *ctdb, struct event_context *ev)
{
+ TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
TDB_DATA key, data;
+ struct ctdb_record_handle *h;
key.dptr = discard_const(TESTKEY);
key.dsize = strlen(TESTKEY);
+ printf("Trying to fetch lock the record ...\n");
- while (1) {
- TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
- struct ctdb_record_handle *h;
-
- h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
- if (h == NULL) {
- printf("Failed to fetch record '%s' on node %d\n",
- (const char *)key.dptr, ctdb_get_pnn(ctdb));
- talloc_free(tmp_ctx);
- continue;
- }
-
+ h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
+ if (h == NULL) {
+ printf("Failed to fetch record '%s' on node %d\n",
+ (const char *)key.dptr, ctdb_get_pnn(ctdb));
talloc_free(tmp_ctx);
- lock_count++;
+ exit(10);
}
+
+ printf("Record fetchlocked.\n");
+ printf("Press enter to release the record ...\n");
+ (void)getchar();
+
+ talloc_free(tmp_ctx);
+ printf("Record released.\n");
}
/*
@@ -85,7 +69,6 @@ int main(int argc, const char *argv[])
struct poptOption popt_options[] = {
POPT_AUTOHELP
POPT_CTDB_CMDLINE
- { "timelimit", 't', POPT_ARG_INT, &timelimit, 0, "timelimit", "integer" },
POPT_TABLEEND
};
int opt;
@@ -131,10 +114,7 @@ int main(int argc, const char *argv[])
event_loop_once(ev);
}
- signal(SIGALRM, alarm_handler);
- alarm(1);
-
- bench_fetch_one_loop(ctdb, ev);
+ fetch_lock_once(ctdb, ev);
return 0;
}
--
CTDB repository
More information about the samba-cvs
mailing list