[SCM] CTDB repository - branch master updated - 1d7c3eb454e33cd17c74606c4ea011fd79959c80

Ronnie Sahlberg sahlberg at samba.org
Wed May 28 03:45:29 GMT 2008


The branch, master has been updated
       via  1d7c3eb454e33cd17c74606c4ea011fd79959c80 (commit)
       via  70085523f4c35a20786023c489325554e2a6f9c1 (commit)
       via  a53db1ec3f29f4418ff51e0f452026c12470bf93 (commit)
       via  2da3d1f876f5d654f849af8a3e588f5a61300c3d (commit)
      from  9e1adfdd4cc606f4134fdeba8d3539e29e7e5056 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 1d7c3eb454e33cd17c74606c4ea011fd79959c80
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed May 28 13:40:12 2008 +1000

    dont bother casting to a void* private_data pointer,
    just pass it as 'state' structure

commit 70085523f4c35a20786023c489325554e2a6f9c1
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed May 28 13:31:58 2008 +1000

    remove another field we dont need in the childwrite_handle structure

commit a53db1ec3f29f4418ff51e0f452026c12470bf93
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed May 28 13:30:22 2008 +1000

    remote a comment that is no longer relevant
    
    remove a field in the childwrite_handle structure we dont need

commit 2da3d1f876f5d654f849af8a3e588f5a61300c3d
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Wed May 28 13:04:25 2008 +1000

    do persistent writes in a child process

-----------------------------------------------------------------------

Summary of changes:
 common/ctdb_ltdb.c       |    4 +-
 include/ctdb_private.h   |    3 +
 server/ctdb_persistent.c |  195 ++++++++++++++++++++++++++++++++++++----------
 tools/ctdb.c             |    3 +
 4 files changed, 162 insertions(+), 43 deletions(-)


Changeset truncated at 500 lines:

diff --git a/common/ctdb_ltdb.c b/common/ctdb_ltdb.c
index 5272927..a6bf268 100644
--- a/common/ctdb_ltdb.c
+++ b/common/ctdb_ltdb.c
@@ -160,7 +160,7 @@ int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
 
 /*
   write a record to a persistent database
-  at this stage the the record is locked by a lockwait child.
+  this is done by a child process
 */
 int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, 
 		    struct ctdb_ltdb_header *header, TDB_DATA data)
@@ -189,7 +189,7 @@ int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
 
 	/* if this is a persistent database without NOSYNC then we
 	   will do this via a transaction */
-	if (0 && !(ctdb_db->client_tdb_flags & TDB_NOSYNC)) {
+	if (!(ctdb_db->client_tdb_flags & TDB_NOSYNC)) {
 		ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR, (__location__ " Failed to start local transaction\n"));
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 758b506..2d595ff 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -306,11 +306,14 @@ struct ctdb_statistics {
 	uint32_t pending_calls;
 	uint32_t lockwait_calls;
 	uint32_t pending_lockwait_calls;
+	uint32_t childwrite_calls;
+	uint32_t pending_childwrite_calls;
 	uint32_t memory_used;
 	uint32_t __last_counter; /* hack for control_statistics_all */
 	uint32_t max_hop_count;
 	double max_call_latency;
 	double max_lockwait_latency;
+	double max_childwrite_latency;
 };
 
 
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 6a7a3eb..3ba961e 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -136,7 +136,7 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
 }
 
 
-struct ctdb_persistent_lock_state {
+struct ctdb_persistent_write_state {
 	struct ctdb_db_context *ctdb_db;
 	TDB_DATA key;
 	TDB_DATA data;
@@ -147,9 +147,9 @@ struct ctdb_persistent_lock_state {
 
 
 /*
-  called with a lock held by a lockwait child
+  called from a child process to write the data
  */
-static int ctdb_persistent_store(struct ctdb_persistent_lock_state *state)
+static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
 {
 	struct ctdb_ltdb_header oldheader;
 	int ret;
@@ -181,25 +181,16 @@ static int ctdb_persistent_store(struct ctdb_persistent_lock_state *state)
 
 
 /*
-  called when we get the lock on the given record
-  at this point the lockwait child holds a lock on our behalf
+  called when we the child has completed the persistent write
+  on our behalf
  */
-static void ctdb_persistent_lock_callback(void *private_data)
+static void ctdb_persistent_write_callback(int status, void *private_data)
 {
-	struct ctdb_persistent_lock_state *state = talloc_get_type(private_data, 
-								   struct ctdb_persistent_lock_state);
-	int ret;
+	struct ctdb_persistent_write_state *state = talloc_get_type(private_data, 
+								   struct ctdb_persistent_write_state);
 
-	ret = tdb_chainlock_mark(state->tdb, state->key);
-	if (ret != 0) {
-		DEBUG(DEBUG_ERR,("Failed to mark lock in ctdb_persistent_lock_callback\n"));
-		ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, ret, NULL);
-		return;
-	}
 
-	ret = ctdb_persistent_store(state);
-	ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, ret, NULL);
-	tdb_chainlock_unmark(state->tdb, state->key);
+	ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, status, NULL);
 
 	talloc_free(state);
 }
@@ -210,12 +201,147 @@ static void ctdb_persistent_lock_callback(void *private_data)
 static void ctdb_persistent_lock_timeout(struct event_context *ev, struct timed_event *te, 
 					 struct timeval t, void *private_data)
 {
-	struct ctdb_persistent_lock_state *state = talloc_get_type(private_data, 
-								   struct ctdb_persistent_lock_state);
+	struct ctdb_persistent_write_state *state = talloc_get_type(private_data, 
+								   struct ctdb_persistent_write_state);
 	ctdb_request_control_reply(state->ctdb_db->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_lock");
 	talloc_free(state);
 }
 
+struct childwrite_handle {
+	struct ctdb_context *ctdb;
+	struct ctdb_db_context *ctdb_db;
+	struct fd_event *fde;
+	int fd[2];
+	pid_t child;
+	void *private_data;
+	void (*callback)(int, void *);
+	struct timeval start_time;
+};
+
+static int childwrite_destructor(struct childwrite_handle *h)
+{
+	h->ctdb->statistics.pending_childwrite_calls--;
+	kill(h->child, SIGKILL);
+	waitpid(h->child, NULL, 0);
+	return 0;
+}
+
+/* called when the child process has finished writing the record to the
+   database
+*/
+static void childwrite_handler(struct event_context *ev, struct fd_event *fde, 
+			     uint16_t flags, void *private_data)
+{
+	struct childwrite_handle *h = talloc_get_type(private_data, 
+						     struct childwrite_handle);
+	void *p = h->private_data;
+	void (*callback)(int, void *) = h->callback;
+	pid_t child = h->child;
+	TALLOC_CTX *tmp_ctx = talloc_new(ev);
+	int ret;
+	char c;
+
+	ctdb_latency(&h->ctdb->statistics.max_childwrite_latency, h->start_time);
+	h->ctdb->statistics.pending_childwrite_calls--;
+
+	/* the handle needs to go away when the context is gone - when
+	   the handle goes away this implicitly closes the pipe, which
+	   kills the child */
+	talloc_steal(tmp_ctx, h);
+
+	talloc_set_destructor(h, NULL);
+
+	ret = read(h->fd[0], &c, 1);
+	if (ret < 1) {
+		DEBUG(DEBUG_ERR, (__location__ " Read returned %d. Childwrite failed\n", ret));
+		c = 1;
+	}
+
+	callback(c, p);
+
+	kill(child, SIGKILL);
+	waitpid(child, NULL, 0);
+	talloc_free(tmp_ctx);
+}
+
+/* this creates a child process which will take out a tdb transaction
+   and write the record to the database.
+*/
+struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
+				void (*callback)(int, void *private_data),
+				struct ctdb_persistent_write_state *state)
+{
+	struct childwrite_handle *result;
+	int ret;
+	pid_t parent = getpid();
+
+	ctdb_db->ctdb->statistics.childwrite_calls++;
+	ctdb_db->ctdb->statistics.pending_childwrite_calls++;
+
+	if (!(result = talloc_zero(state, struct childwrite_handle))) {
+		ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+		return NULL;
+	}
+
+	ret = pipe(result->fd);
+
+	if (ret != 0) {
+		talloc_free(result);
+		ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+		return NULL;
+	}
+
+	result->child = fork();
+
+	if (result->child == (pid_t)-1) {
+		close(result->fd[0]);
+		close(result->fd[1]);
+		talloc_free(result);
+		ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+		return NULL;
+	}
+
+	result->callback = callback;
+	result->private_data = state;
+	result->ctdb = ctdb_db->ctdb;
+	result->ctdb_db = ctdb_db;
+
+	if (result->child == 0) {
+		char c = 0;
+
+		close(result->fd[0]);
+		ret = ctdb_persistent_store(state);
+		if (ret != 0) {
+			DEBUG(DEBUG_ERR, (__location__ " Failed to write persistent data\n"));
+			c = 1;
+		}
+
+		write(result->fd[1], &c, 1);
+
+		/* make sure we die when our parent dies */
+		while (kill(parent, 0) == 0 || errno != ESRCH) {
+			sleep(5);
+		}
+		_exit(0);
+	}
+
+	close(result->fd[1]);
+	talloc_set_destructor(result, childwrite_destructor);
+
+	result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
+				   EVENT_FD_READ|EVENT_FD_AUTOCLOSE, childwrite_handler,
+				   (void *)result);
+	if (result->fde == NULL) {
+		talloc_free(result);
+		ctdb_db->ctdb->statistics.pending_childwrite_calls--;
+		return NULL;
+	}
+
+	result->start_time = timeval_current();
+
+	return result;
+}
+
 /* 
    update a record on this node if the new record has a higher rsn than the
    current record
@@ -227,8 +353,8 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
 	struct ctdb_rec_data *rec = (struct ctdb_rec_data *)&recdata.dptr[0];
 	struct ctdb_db_context *ctdb_db;
 	uint32_t db_id = rec->reqid;
-	struct ctdb_persistent_lock_state *state;
-	struct lockwait_handle *handle;
+	struct ctdb_persistent_write_state *state;
+	struct childwrite_handle *handle;
 
 	if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
 		DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_update_record when recovery active\n"));
@@ -241,7 +367,7 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
 		return -1;
 	}
 
-	state = talloc(ctdb, struct ctdb_persistent_lock_state);
+	state = talloc(ctdb, struct ctdb_persistent_write_state);
 	CTDB_NO_MEMORY(ctdb, state);
 
 	state->ctdb_db = ctdb_db;
@@ -263,26 +389,13 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
 	state->data.dptr  += sizeof(struct ctdb_ltdb_header);
 	state->data.dsize -= sizeof(struct ctdb_ltdb_header);
 
-#if 0
-	/* We can not take out a lock here ourself since if this persistent
-	   database needs safe transaction writes we can not be holding
-	   a lock on the database.
-	   Therefore we always create a lock wait child to take out and hold
-	   the lock for us.
-	*/
-	ret = tdb_chainlock_nonblock(state->tdb, state->key);
-	if (ret == 0) {
-		ret = ctdb_persistent_store(state);
-		tdb_chainunlock(state->tdb, state->key);
-		talloc_free(state);
-		return ret;
-	}
-#endif
 
-	/* wait until we have a lock on this record */
-	handle = ctdb_lockwait(ctdb_db, state->key, ctdb_persistent_lock_callback, state);
+	/* create a child process to take out a transaction and 
+	   write the data.
+	*/
+	handle = ctdb_childwrite(ctdb_db, ctdb_persistent_write_callback, state);
 	if (handle == NULL) {
-		DEBUG(DEBUG_ERR,("Failed to setup lockwait handler in ctdb_control_update_record\n"));
+		DEBUG(DEBUG_ERR,("Failed to setup childwrite handler in ctdb_control_update_record\n"));
 		talloc_free(state);
 		return -1;
 	}
diff --git a/tools/ctdb.c b/tools/ctdb.c
index a9839d9..151179a 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -117,6 +117,8 @@ static void show_statistics(struct ctdb_statistics *s)
 		STATISTICS_FIELD(pending_calls),
 		STATISTICS_FIELD(lockwait_calls),
 		STATISTICS_FIELD(pending_lockwait_calls),
+		STATISTICS_FIELD(childwrite_calls),
+		STATISTICS_FIELD(pending_childwrite_calls),
 		STATISTICS_FIELD(memory_used),
 		STATISTICS_FIELD(max_hop_count),
 	};
@@ -139,6 +141,7 @@ static void show_statistics(struct ctdb_statistics *s)
 	}
 	printf(" %-30s     %.6f sec\n", "max_call_latency", s->max_call_latency);
 	printf(" %-30s     %.6f sec\n", "max_lockwait_latency", s->max_lockwait_latency);
+	printf(" %-30s     %.6f sec\n", "max_childwrite_latency", s->max_childwrite_latency);
 	talloc_free(tmp_ctx);
 }
 


-- 
CTDB repository


More information about the samba-cvs mailing list