[SCM] CTDB repository - branch 1.2.40 updated - ctdb-1.2.58-12-g1520ec0
Amitay Isaacs
amitay at samba.org
Wed Mar 6 00:15:14 MST 2013
The branch, 1.2.40 has been updated
via 1520ec0262385894c086740f8486b18f29e3fb80 (commit)
via c9a55cacc0c41ba95360849a13b987d96cd03731 (commit)
via 59b9d58655319b515bc20de2529bfec127b4c3ab (commit)
via 28602eeb613030c49d32156f57614c059b136105 (commit)
via 8909931f7dd067e5319a0db15fcb6cb0c335f903 (commit)
via 865fec8f04a6b8f7416903c4d02c188bd39b9683 (commit)
from d788857b1c96f78b0ffa4d410a935ec59bef9148 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40
- Log -----------------------------------------------------------------
commit 1520ec0262385894c086740f8486b18f29e3fb80
Author: Amitay Isaacs <amitay at gmail.com>
Date: Wed Mar 6 17:48:44 2013 +1100
New Version 1.2.59
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
commit c9a55cacc0c41ba95360849a13b987d96cd03731
Author: Amitay Isaacs <amitay at gmail.com>
Date: Mon Feb 18 18:05:28 2013 +1100
ctdbd: Exec lockwait helper for locking a record
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
commit 59b9d58655319b515bc20de2529bfec127b4c3ab
Author: Amitay Isaacs <amitay at gmail.com>
Date: Mon Feb 18 18:04:07 2013 +1100
ctdbd: Create a standalone helper for record locking
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
commit 28602eeb613030c49d32156f57614c059b136105
Author: Stefan Metzmacher <metze at samba.org>
Date: Fri Feb 22 12:45:39 2013 +0100
tevent: optimize adding new timer events
There're two cases:
1. Adding a timer with a zero timestamp.
Such events were used before we had immediate events.
It's likely that there're a lot of this events
and we need to add new ones in fifo order.
2. Adding a timer with a real timestamp.
As this timestamps typically get higher:-)
it's better to traverse the existing list from
the tail.
This is not completely optimal, but it should be better
than before.
Signed-off-by: Stefan Metzmacher <metze at samba.org>
commit 8909931f7dd067e5319a0db15fcb6cb0c335f903
Author: Amitay Isaacs <amitay at gmail.com>
Date: Fri Feb 22 12:59:39 2013 +1100
common/io: For scheduling immediate events use tevent_schedule_immediate
tevent_schedule_immediate() is much more efficient at handling events that need
to be processed immediately rather than creating timed events with
timeval_zero().
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
Cherry-pick-from: 11734be353a1e246163eda631d35dfe55d1d6fb1
commit 865fec8f04a6b8f7416903c4d02c188bd39b9683
Author: Amitay Isaacs <amitay at gmail.com>
Date: Thu Feb 21 13:16:15 2013 +1100
ctdbd: Add an index db for message list for faster searches
When CTDB is busy with lots of smbd, CTDB was spending too much time in
daemon_check_srvids() which searches a list of srvids in the registered
message handlers. Using a hash based index significantly improves the
performance of search in a linked list.
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
Cherry-pick-from: 3e09f25d419635f6dd679b48fa65370f7860be7d
-----------------------------------------------------------------------
Summary of changes:
Makefile.in | 9 ++-
common/ctdb_io.c | 26 +++--
common/ctdb_message.c | 205 ++++++++++++++++++++++++++++++++++++++---
include/ctdb_private.h | 13 ++-
lib/tevent/tevent.c | 1 +
lib/tevent/tevent_internal.h | 1 +
lib/tevent/tevent_timed.c | 62 +++++++++++--
packaging/RPM/ctdb.spec.in | 10 ++-
server/ctdb_daemon.c | 8 +--
server/ctdb_lockwait.c | 105 +++++++++++++++++++---
server/ctdb_lockwait_helper.c | 177 +++++++++++++++++++++++++++++++++++
11 files changed, 559 insertions(+), 58 deletions(-)
create mode 100644 server/ctdb_lockwait_helper.c
Changeset truncated at 500 lines:
diff --git a/Makefile.in b/Makefile.in
index 849abd7..9c4f555 100755
--- a/Makefile.in
+++ b/Makefile.in
@@ -32,7 +32,7 @@ POPT_OBJ = @POPT_OBJ@
CFLAGS=-g -I$(srcdir)/include -Iinclude -Ilib -Ilib/util -I$(srcdir) \
-I at tallocdir@ -I at tdbdir@/include -I at libreplacedir@ \
-DVARDIR=\"$(localstatedir)\" -DETCDIR=\"$(etcdir)\" \
- -DLOGDIR=\"$(logdir)\" \
+ -DLOGDIR=\"$(logdir)\" -DBINDIR=\"$(bindir)\" \
-DUSE_MMAP=1 @CFLAGS@ $(POPT_CFLAGS)
LIB_FLAGS=@LDFLAGS@ -Llib @LIBS@ $(POPT_LIBS) @INFINIBAND_LIBS@ @CTDB_PCAP_LDFLAGS@
@@ -75,7 +75,7 @@ TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \
tests/bin/ctdb_takeover_tests tests/bin/ctdb_update_record \
@INFINIBAND_BINS@
-BINS = bin/ctdb @CTDB_SCSI_IO@ bin/smnotify bin/ping_pong bin/ltdbtool
+BINS = bin/ctdb @CTDB_SCSI_IO@ bin/smnotify bin/ping_pong bin/ltdbtool bin/ctdb_lockwait_helper
SBINS = bin/ctdbd
DIRS = lib bin tests/bin
@@ -112,6 +112,10 @@ bin/ctdbd: $(CTDB_SERVER_OBJ)
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ $(CTDB_SERVER_OBJ) $(LIB_FLAGS)
+bin/ctdb_lockwait_helper: server/ctdb_lockwait_helper.o @TDB_OBJ@
+ @echo Linking $@
+ @$(CC) $(CFLAGS) -o $@ $^ $(LIB_FLAGS)
+
libctdb/libctdb.a: $(CTDB_LIB_OBJ)
@echo Linking $@
-rm -f libctdb.a
@@ -255,6 +259,7 @@ install: all
${INSTALLCMD} -m 755 bin/smnotify $(DESTDIR)$(bindir)
$(INSTALLCMD) -m 755 bin/ping_pong $(DESTDIR)$(bindir)
$(INSTALLCMD) -m 755 bin/ltdbtool $(DESTDIR)$(bindir)
+ $(INSTALLCMD) -m 755 bin/ctdb_lockwait_helper $(DESTDIR)$(bindir)
$(INSTALLCMD) -m 755 libctdb/libctdb.a $(DESTDIR)$(libdir)
${INSTALLCMD} -m 644 include/ctdb.h $(DESTDIR)$(includedir)
${INSTALLCMD} -m 644 include/ctdb_client.h $(DESTDIR)$(includedir)
diff --git a/common/ctdb_io.c b/common/ctdb_io.c
index 2a12a18..4e164d9 100644
--- a/common/ctdb_io.c
+++ b/common/ctdb_io.c
@@ -46,6 +46,7 @@ struct ctdb_queue_pkt {
struct ctdb_queue {
struct ctdb_context *ctdb;
+ struct tevent_immediate *im;
struct ctdb_buffer buffer; /* input buffer */
struct ctdb_queue_pkt *out_queue, *out_queue_tail;
uint32_t out_queue_length;
@@ -82,8 +83,8 @@ static void dump_packet(unsigned char *data, size_t len)
static void queue_process(struct ctdb_queue *queue);
-static void queue_process_event(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
+static void queue_process_event(struct tevent_context *ev, struct tevent_immediate *im,
+ void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
@@ -134,9 +135,9 @@ static void queue_process(struct ctdb_queue *queue)
queue->buffer.length -= pkt_size;
if (queue->buffer.length > 0) {
- /* There is more data to be processed, setup timed event */
- event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
- queue_process_event, queue);
+ /* There is more data to be processed, schedule an event */
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_process_event, queue);
}
/* It is the responsibility of the callback to free 'data' */
@@ -202,8 +203,8 @@ failed:
/* used when an event triggers a dead queue */
-static void queue_dead(struct event_context *ev, struct timed_event *te,
- struct timeval t, void *private_data)
+static void queue_dead(struct event_context *ev, struct tevent_immediate *im,
+ void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
queue->callback(NULL, 0, queue->private_data);
@@ -234,8 +235,8 @@ static void queue_io_write(struct ctdb_queue *queue)
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
- event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
- queue_dead, queue);
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
return;
}
if (n <= 0) return;
@@ -301,8 +302,8 @@ int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
- event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
- queue_dead, queue);
+ tevent_schedule_immediate(queue->im, queue->ctdb->ev,
+ queue_dead, queue);
/* yes, we report success, as the dead node is
handled via a separate event */
return 0;
@@ -412,6 +413,9 @@ struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
va_end(ap);
CTDB_NO_MEMORY_NULL(ctdb, queue->name);
+ queue->im= tevent_create_immediate(queue);
+ CTDB_NO_MEMORY_NULL(ctdb, queue->im);
+
queue->ctdb = ctdb;
queue->fd = fd;
queue->alignment = alignment;
diff --git a/common/ctdb_message.c b/common/ctdb_message.c
index 03a4b55..c6506f4 100644
--- a/common/ctdb_message.c
+++ b/common/ctdb_message.c
@@ -2,6 +2,7 @@
ctdb_message protocol code
Copyright (C) Andrew Tridgell 2007
+ Copyright (C) Amitay Isaacs 2013
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -28,16 +29,103 @@
#include "../include/ctdb_private.h"
#include "lib/util/dlinklist.h"
+static int message_list_db_init(struct ctdb_context *ctdb)
+{
+ ctdb->message_list_indexdb = tdb_open("messagedb", 8192,
+ TDB_INTERNAL|TDB_DISALLOW_NESTING,
+ O_RDWR|O_CREAT, 0);
+ if (ctdb->message_list_indexdb == NULL) {
+ DEBUG(DEBUG_ERR, ("Failed to create message list indexdb\n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int message_list_db_add(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data)
+{
+ int ret;
+
+ if (ctdb->message_list_indexdb == NULL) {
+ ret = message_list_db_init(ctdb);
+ if (ret < 0) {
+ return -1;
+ }
+ }
+
+ ret = tdb_store(ctdb->message_list_indexdb, key, data, TDB_INSERT);
+ if (ret < 0) {
+ DEBUG(DEBUG_ERR, ("Failed to add message list handler (%s)\n",
+ tdb_errorstr(ctdb->message_list_indexdb)));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int message_list_db_delete(struct ctdb_context *ctdb, TDB_DATA key)
+{
+ int ret;
+
+ if (ctdb->message_list_indexdb == NULL) {
+ return -1;
+ }
+
+ ret = tdb_delete(ctdb->message_list_indexdb, key);
+ if (ret < 0) {
+ DEBUG(DEBUG_ERR, ("Failed to delete message list handler (%s)\n",
+ tdb_errorstr(ctdb->message_list_indexdb)));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int message_list_db_fetch(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA *data)
+{
+ if (ctdb->message_list_indexdb == NULL) {
+ return -1;
+ }
+
+ *data = tdb_fetch(ctdb->message_list_indexdb, key);
+ if (data->dsize == 0) {
+ return -1;
+ }
+ return 0;
+}
+
/*
this dispatches the messages to the registered ctdb message handler
*/
int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data)
{
- struct ctdb_message_list *ml;
+ struct ctdb_message_list_header *h;
+ struct ctdb_message_list *m;
+ TDB_DATA key, hdata;
+ uint64_t srvid_all = CTDB_SRVID_ALL;
+ int ret;
+
+ key.dptr = (uint8_t *)&srvid;
+ key.dsize = sizeof(uint64_t);
+
+ ret = message_list_db_fetch(ctdb, key, &hdata);
+ if (ret == 0) {
+ h = *(struct ctdb_message_list_header **)hdata.dptr;
- for (ml=ctdb->message_list;ml;ml=ml->next) {
- if (ml->srvid == srvid || ml->srvid == CTDB_SRVID_ALL) {
- ml->message_handler(ctdb, srvid, data, ml->message_private);
+ for (m=h->m; m; m=m->next) {
+ m->message_handler(ctdb, srvid, data, m->message_private);
+ }
+ }
+
+ key.dptr = (uint8_t *)&srvid_all;
+ key.dsize = sizeof(uint64_t);
+
+ ret = message_list_db_fetch(ctdb, key, &hdata);
+ if (ret == 0) {
+ h = *(struct ctdb_message_list_header **)hdata.dptr;
+
+ for(m=h->m; m; m=m->next) {
+ m->message_handler(ctdb, srvid, data, m->message_private);
}
}
@@ -58,13 +146,37 @@ void ctdb_request_message(struct ctdb_context *ctdb, struct ctdb_req_header *hdr
ctdb_dispatch_message(ctdb, c->srvid, data);
}
+/*
+ * When header is freed, remove all the srvid handlers
+ */
+static int message_header_destructor(struct ctdb_message_list_header *h)
+{
+ struct ctdb_message_list *m;
+ TDB_DATA key;
+
+ while (h->m != NULL) {
+ m = h->m;
+ DLIST_REMOVE(h->m, m);
+ TALLOC_FREE(m);
+ }
+
+ key.dptr = (uint8_t *)&h->srvid;
+ key.dsize = sizeof(uint64_t);
+
+ message_list_db_delete(h->ctdb, key);
+ DLIST_REMOVE(h->ctdb->message_list_header, h);
+
+ return 0;
+}
/*
when a client goes away, we need to remove its srvid handler from the list
*/
static int message_handler_destructor(struct ctdb_message_list *m)
{
- DLIST_REMOVE(m->ctdb->message_list, m);
+ struct ctdb_message_list_header *h = m->h;
+
+ DLIST_REMOVE(h->m, m);
return 0;
}
@@ -77,20 +189,47 @@ int ctdb_register_message_handler(struct ctdb_context *ctdb,
ctdb_msg_fn_t handler,
void *private_data)
{
+ struct ctdb_message_list_header *h;
struct ctdb_message_list *m;
+ TDB_DATA key, data;
+ int ret;
- m = talloc(mem_ctx, struct ctdb_message_list);
+ m = talloc_zero(mem_ctx, struct ctdb_message_list);
CTDB_NO_MEMORY(ctdb, m);
- m->ctdb = ctdb;
- m->srvid = srvid;
m->message_handler = handler;
m->message_private = private_data;
-
- DLIST_ADD(ctdb->message_list, m);
- talloc_set_destructor(m, message_handler_destructor);
+ key.dptr = (uint8_t *)&srvid;
+ key.dsize = sizeof(uint64_t);
+
+ ret = message_list_db_fetch(ctdb, key, &data);
+ if (ret < 0) {
+ /* srvid not registered yet */
+ h = talloc_zero(ctdb, struct ctdb_message_list_header);
+ CTDB_NO_MEMORY(ctdb, h);
+
+ h->ctdb = ctdb;
+ h->srvid = srvid;
+
+ data.dptr = (uint8_t *)&h;
+ data.dsize = sizeof(struct ctdb_message_list_header *);
+ ret = message_list_db_add(ctdb, key, data);
+ if (ret < 0) {
+ talloc_free(m);
+ talloc_free(h);
+ return -1;
+ }
+ DLIST_ADD(ctdb->message_list_header, h);
+ talloc_set_destructor(h, message_header_destructor);
+ } else {
+ h = *(struct ctdb_message_list_header **)data.dptr;
+ }
+
+ m->h = h;
+ DLIST_ADD(h->m, m);
+ talloc_set_destructor(m, message_handler_destructor);
return 0;
}
@@ -100,13 +239,53 @@ int ctdb_register_message_handler(struct ctdb_context *ctdb,
*/
int ctdb_deregister_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
{
+ struct ctdb_message_list_header *h;
struct ctdb_message_list *m;
+ TDB_DATA key, data;
+ int ret;
+
+ key.dptr = (uint8_t *)&srvid;
+ key.dsize = sizeof(uint64_t);
+
+ ret = message_list_db_fetch(ctdb, key, &data);
+ if (ret < 0) {
+ return -1;
+ }
- for (m=ctdb->message_list;m;m=m->next) {
- if (m->srvid == srvid && m->message_private == private_data) {
+ h = *(struct ctdb_message_list_header **)data.dptr;
+ for (m=h->m; m; m=m->next) {
+ if (m->message_private == private_data) {
talloc_free(m);
+ if (h->m == NULL) {
+ talloc_free(h);
+ }
return 0;
}
}
+
return -1;
}
+
+
+/*
+ * check if the given srvid exists
+ */
+bool ctdb_check_message_handler(struct ctdb_context *ctdb, uint64_t srvid)
+{
+ struct ctdb_message_list_header *h;
+ TDB_DATA key, data;
+
+ key.dptr = (uint8_t *)&srvid;
+ key.dsize = sizeof(uint64_t);
+
+ if (message_list_db_fetch(ctdb, key, &data) < 0) {
+ return false;
+ }
+
+ h = *(struct ctdb_message_list_header **)data.dptr;
+ if (h->m == NULL) {
+ return false;
+ }
+
+ return true;
+}
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 6f97702..0eef0e3 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -270,10 +270,15 @@ struct ctdb_upcalls {
/* list of message handlers - needs to be changed to a more efficient data
structure so we can find a message handler given a srvid quickly */
-struct ctdb_message_list {
+struct ctdb_message_list_header {
+ struct ctdb_message_list_header *next, *prev;
struct ctdb_context *ctdb;
- struct ctdb_message_list *next, *prev;
uint64_t srvid;
+ struct ctdb_message_list *m;
+};
+struct ctdb_message_list {
+ struct ctdb_message_list *next, *prev;
+ struct ctdb_message_list_header *h;
ctdb_msg_fn_t message_handler;
void *message_private;
};
@@ -451,7 +456,8 @@ struct ctdb_context {
const struct ctdb_upcalls *upcalls; /* transport upcalls */
void *private_data; /* private to transport */
struct ctdb_db_context *db_list;
- struct ctdb_message_list *message_list;
+ struct ctdb_message_list_header *message_list_header;
+ struct tdb_context *message_list_indexdb;
struct ctdb_daemon_data daemon;
struct ctdb_statistics statistics;
struct ctdb_statistics statistics_current;
@@ -971,6 +977,7 @@ int32_t ctdb_control_traverse_kill(struct ctdb_context *ctdb, TDB_DATA indata,
TDB_DATA *outdata, uint32_t srcnode);
int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data);
+bool ctdb_check_message_handler(struct ctdb_context *ctdb, uint64_t srvid);
int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid);
int ctdb_deregister_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data);
diff --git a/lib/tevent/tevent.c b/lib/tevent/tevent.c
index 5eec5cc..2d9e8d8 100644
--- a/lib/tevent/tevent.c
+++ b/lib/tevent/tevent.c
@@ -162,6 +162,7 @@ int tevent_common_context_destructor(struct tevent_context *ev)
DLIST_REMOVE(ev->fd_events, fd);
}
+ ev->last_zero_timer = NULL;
for (te = ev->timer_events; te; te = tn) {
tn = te->next;
te->event_ctx = NULL;
diff --git a/lib/tevent/tevent_internal.h b/lib/tevent/tevent_internal.h
index ba03bc5..38e627a 100644
--- a/lib/tevent/tevent_internal.h
+++ b/lib/tevent/tevent_internal.h
@@ -228,6 +228,7 @@ struct tevent_context {
/* list of timed events - used by common code */
struct tevent_timer *timer_events;
+ struct tevent_timer *last_zero_timer;
/* list of immediate events - used by common code */
struct tevent_immediate *immediate_events;
diff --git a/lib/tevent/tevent_timed.c b/lib/tevent/tevent_timed.c
index 457ef1c..075d6dc 100644
--- a/lib/tevent/tevent_timed.c
+++ b/lib/tevent/tevent_timed.c
@@ -133,13 +133,18 @@ struct timeval tevent_timeval_current_ofs(uint32_t secs, uint32_t usecs)
*/
static int tevent_common_timed_destructor(struct tevent_timer *te)
{
+ if (te->event_ctx == NULL) {
+ return 0;
+ }
+
tevent_debug(te->event_ctx, TEVENT_DEBUG_TRACE,
"Destroying timer event %p \"%s\"\n",
te, te->handler_name);
- if (te->event_ctx) {
- DLIST_REMOVE(te->event_ctx->timer_events, te);
+ if (te->event_ctx->last_zero_timer == te) {
+ te->event_ctx->last_zero_timer = DLIST_PREV(te);
}
+ DLIST_REMOVE(te->event_ctx->timer_events, te);
return 0;
}
@@ -160,7 +165,8 @@ struct tevent_timer *tevent_common_add_timer(struct tevent_context *ev, TALLOC_C
const char *handler_name,
const char *location)
{
- struct tevent_timer *te, *last_te, *cur_te;
+ struct tevent_timer *te;
+ struct tevent_timer *prev_te = NULL;
te = talloc(mem_ctx?mem_ctx:ev, struct tevent_timer);
if (te == NULL) return NULL;
@@ -173,18 +179,53 @@ struct tevent_timer *tevent_common_add_timer(struct tevent_context *ev, TALLOC_C
te->location = location;
te->additional_data = NULL;
+ if (ev->timer_events == NULL) {
+ ev->last_zero_timer = NULL;
+ }
+
/* keep the list ordered */
- last_te = NULL;
- for (cur_te = ev->timer_events; cur_te; cur_te = cur_te->next) {
- /* if the new event comes before the current one break */
--
CTDB repository
More information about the samba-cvs
mailing list