[SCM] CTDB repository - branch master updated -
1d7c3eb454e33cd17c74606c4ea011fd79959c80
Ronnie Sahlberg
sahlberg at samba.org
Wed May 28 03:45:29 GMT 2008
The branch, master has been updated
via 1d7c3eb454e33cd17c74606c4ea011fd79959c80 (commit)
via 70085523f4c35a20786023c489325554e2a6f9c1 (commit)
via a53db1ec3f29f4418ff51e0f452026c12470bf93 (commit)
via 2da3d1f876f5d654f849af8a3e588f5a61300c3d (commit)
from 9e1adfdd4cc606f4134fdeba8d3539e29e7e5056 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 1d7c3eb454e33cd17c74606c4ea011fd79959c80
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed May 28 13:40:12 2008 +1000
dont bother casting to a void* private_data pointer,
just pass it as 'state' structure
commit 70085523f4c35a20786023c489325554e2a6f9c1
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed May 28 13:31:58 2008 +1000
remove another field we dont need in the childwrite_handle structure
commit a53db1ec3f29f4418ff51e0f452026c12470bf93
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed May 28 13:30:22 2008 +1000
remote a comment that is no longer relevant
remove a field in the childwrite_handle structure we dont need
commit 2da3d1f876f5d654f849af8a3e588f5a61300c3d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Wed May 28 13:04:25 2008 +1000
do persistent writes in a child process
-----------------------------------------------------------------------
Summary of changes:
common/ctdb_ltdb.c | 4 +-
include/ctdb_private.h | 3 +
server/ctdb_persistent.c | 195 ++++++++++++++++++++++++++++++++++++----------
tools/ctdb.c | 3 +
4 files changed, 162 insertions(+), 43 deletions(-)
Changeset truncated at 500 lines:
diff --git a/common/ctdb_ltdb.c b/common/ctdb_ltdb.c
index 5272927..a6bf268 100644
--- a/common/ctdb_ltdb.c
+++ b/common/ctdb_ltdb.c
@@ -160,7 +160,7 @@ int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
/*
write a record to a persistent database
- at this stage the the record is locked by a lockwait child.
+ this is done by a child process
*/
int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
struct ctdb_ltdb_header *header, TDB_DATA data)
@@ -189,7 +189,7 @@ int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
/* if this is a persistent database without NOSYNC then we
will do this via a transaction */
- if (0 && !(ctdb_db->client_tdb_flags & TDB_NOSYNC)) {
+ if (!(ctdb_db->client_tdb_flags & TDB_NOSYNC)) {
ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " Failed to start local transaction\n"));
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 758b506..2d595ff 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -306,11 +306,14 @@ struct ctdb_statistics {
uint32_t pending_calls;
uint32_t lockwait_calls;
uint32_t pending_lockwait_calls;
+ uint32_t childwrite_calls;
+ uint32_t pending_childwrite_calls;
uint32_t memory_used;
uint32_t __last_counter; /* hack for control_statistics_all */
uint32_t max_hop_count;
double max_call_latency;
double max_lockwait_latency;
+ double max_childwrite_latency;
};
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 6a7a3eb..3ba961e 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -136,7 +136,7 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
}
-struct ctdb_persistent_lock_state {
+struct ctdb_persistent_write_state {
struct ctdb_db_context *ctdb_db;
TDB_DATA key;
TDB_DATA data;
@@ -147,9 +147,9 @@ struct ctdb_persistent_lock_state {
/*
- called with a lock held by a lockwait child
+ called from a child process to write the data
*/
-static int ctdb_persistent_store(struct ctdb_persistent_lock_state *state)
+static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
{
struct ctdb_ltdb_header oldheader;
int ret;
@@ -181,25 +181,16 @@ static int ctdb_persistent_store(struct ctdb_persistent_lock_state *state)
/*
- called when we get the lock on the given record
- at this point the lockwait child holds a lock on our behalf
+ called when we the child has completed the persistent write
+ on our behalf
*/
-static void ctdb_persistent_lock_callback(void *private_data)
+static void ctdb_persistent_write_callback(int status, void *private_data)
{
- struct ctdb_persistent_lock_state *state = talloc_get_type(private_data,
- struct ctdb_persistent_lock_state);
- int ret;
+ struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
+ struct ctdb_persistent_write_state);
- ret = tdb_chainlock_mark(state->tdb, state->key);
- if (ret != 0) {
- DEBUG(DEBUG_ERR,("Failed to mark lock in ctdb_persistent_lock_callback\n"));
- ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, ret, NULL);
- return;
- }
- ret = ctdb_persistent_store(state);
- ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, ret, NULL);
- tdb_chainlock_unmark(state->tdb, state->key);
+ ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
talloc_free(state);
}
@@ -210,12 +201,147 @@ static void ctdb_persistent_lock_callback(void *private_data)
static void ctdb_persistent_lock_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
- struct ctdb_persistent_lock_state *state = talloc_get_type(private_data,
- struct ctdb_persistent_lock_state);
+ struct ctdb_persistent_write_state *state = talloc_get_type(private_data,
+ struct ctdb_persistent_write_state);
ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
talloc_free(state);
}
+struct childwrite_handle {
+ struct ctdb_context *ctdb;
+ struct ctdb_db_context *ctdb_db;
+ struct fd_event *fde;
+ int fd[2];
+ pid_t child;
+ void *private_data;
+ void (*callback)(int, void *);
+ struct timeval start_time;
+};
+
+static int childwrite_destructor(struct childwrite_handle *h)
+{
+ h->ctdb->statistics.pending_childwrite_calls--;
+ kill(h->child, SIGKILL);
+ waitpid(h->child, NULL, 0);
+ return 0;
+}
+
+/* called when the child process has finished writing the record to the
+ database
+*/
+static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
+ uint16_t flags, void *private_data)
+{
+ struct childwrite_handle *h = talloc_get_type(private_data,
+ struct childwrite_handle);
+ void *p = h->private_data;
+ void (*callback)(int, void *) = h->callback;
+ pid_t child = h->child;
+ TALLOC_CTX *tmp_ctx = talloc_new(ev);
+ int ret;
+ char c;
+
+ ctdb_latency(&h->ctdb->statistics.max_childwrite_latency, h->start_time);
+ h->ctdb->statistics.pending_childwrite_calls--;
+
+ /* the handle needs to go away when the context is gone - when
+ the handle goes away this implicitly closes the pipe, which
+ kills the child */
+ talloc_steal(tmp_ctx, h);
+
+ talloc_set_destructor(h, NULL);
+
+ ret = read(h->fd[0], &c, 1);
+ if (ret < 1) {
+ DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
+ c = 1;
+ }
+
+ callback(c, p);
+
+ kill(child, SIGKILL);
+ waitpid(child, NULL, 0);
+ talloc_free(tmp_ctx);
+}
+
+/* this creates a child process which will take out a tdb transaction
+ and write the record to the database.
+*/
+struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
+ void (*callback)(int, void *private_data),
+ struct ctdb_persistent_write_state *state)
+{
+ struct childwrite_handle *result;
+ int ret;
+ pid_t parent = getpid();
+
+ ctdb_db->ctdb->statistics.childwrite_calls++;
+ ctdb_db->ctdb->statistics.pending_childwrite_calls++;
+
+ if (!(result = talloc_zero(state, struct childwrite_handle))) {
+ ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+ return NULL;
+ }
+
+ ret = pipe(result->fd);
+
+ if (ret != 0) {
+ talloc_free(result);
+ ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+ return NULL;
+ }
+
+ result->child = fork();
+
+ if (result->child == (pid_t)-1) {
+ close(result->fd[0]);
+ close(result->fd[1]);
+ talloc_free(result);
+ ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+ return NULL;
+ }
+
+ result->callback = callback;
+ result->private_data = state;
+ result->ctdb = ctdb_db->ctdb;
+ result->ctdb_db = ctdb_db;
+
+ if (result->child == 0) {
+ char c = 0;
+
+ close(result->fd[0]);
+ ret = ctdb_persistent_store(state);
+ if (ret != 0) {
+ DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
+ c = 1;
+ }
+
+ write(result->fd[1], &c, 1);
+
+ /* make sure we die when our parent dies */
+ while (kill(parent, 0) == 0 || errno != ESRCH) {
+ sleep(5);
+ }
+ _exit(0);
+ }
+
+ close(result->fd[1]);
+ talloc_set_destructor(result, childwrite_destructor);
+
+ result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
+ EVENT_FD_READ|EVENT_FD_AUTOCLOSE, childwrite_handler,
+ (void *)result);
+ if (result->fde == NULL) {
+ talloc_free(result);
+ ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+ return NULL;
+ }
+
+ result->start_time = timeval_current();
+
+ return result;
+}
+
/*
update a record on this node if the new record has a higher rsn than the
current record
@@ -227,8 +353,8 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
struct ctdb_rec_data *rec = (struct ctdb_rec_data *)&recdata.dptr[0];
struct ctdb_db_context *ctdb_db;
uint32_t db_id = rec->reqid;
- struct ctdb_persistent_lock_state *state;
- struct lockwait_handle *handle;
+ struct ctdb_persistent_write_state *state;
+ struct childwrite_handle *handle;
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_update_record when recovery active\n"));
@@ -241,7 +367,7 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
return -1;
}
- state = talloc(ctdb, struct ctdb_persistent_lock_state);
+ state = talloc(ctdb, struct ctdb_persistent_write_state);
CTDB_NO_MEMORY(ctdb, state);
state->ctdb_db = ctdb_db;
@@ -263,26 +389,13 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
state->data.dptr += sizeof(struct ctdb_ltdb_header);
state->data.dsize -= sizeof(struct ctdb_ltdb_header);
-#if 0
- /* We can not take out a lock here ourself since if this persistent
- database needs safe transaction writes we can not be holding
- a lock on the database.
- Therefore we always create a lock wait child to take out and hold
- the lock for us.
- */
- ret = tdb_chainlock_nonblock(state->tdb, state->key);
- if (ret == 0) {
- ret = ctdb_persistent_store(state);
- tdb_chainunlock(state->tdb, state->key);
- talloc_free(state);
- return ret;
- }
-#endif
- /* wait until we have a lock on this record */
- handle = ctdb_lockwait(ctdb_db, state->key, ctdb_persistent_lock_callback, state);
+ /* create a child process to take out a transaction and
+ write the data.
+ */
+ handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
if (handle == NULL) {
- DEBUG(DEBUG_ERR,("Failed to setup lockwait handler in ctdb_control_update_record\n"));
+ DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
talloc_free(state);
return -1;
}
diff --git a/tools/ctdb.c b/tools/ctdb.c
index a9839d9..151179a 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -117,6 +117,8 @@ static void show_statistics(struct ctdb_statistics *s)
STATISTICS_FIELD(pending_calls),
STATISTICS_FIELD(lockwait_calls),
STATISTICS_FIELD(pending_lockwait_calls),
+ STATISTICS_FIELD(childwrite_calls),
+ STATISTICS_FIELD(pending_childwrite_calls),
STATISTICS_FIELD(memory_used),
STATISTICS_FIELD(max_hop_count),
};
@@ -139,6 +141,7 @@ static void show_statistics(struct ctdb_statistics *s)
}
printf(" %-30s %.6f sec\n", "max_call_latency", s->max_call_latency);
printf(" %-30s %.6f sec\n", "max_lockwait_latency", s->max_lockwait_latency);
+ printf(" %-30s %.6f sec\n", "max_childwrite_latency", s->max_childwrite_latency);
talloc_free(tmp_ctx);
}
--
CTDB repository
More information about the samba-cvs
mailing list