[SCM] CTDB repository - branch master updated -
3059ab5f3f21e331b80728773c36a74620e46677
Ronnie Sahlberg
sahlberg at samba.org
Fri Aug 8 03:34:44 GMT 2008
The branch, master has been updated
via 3059ab5f3f21e331b80728773c36a74620e46677 (commit)
via b3b9707dd8244758ff1080401a9e03e74766e1ab (commit)
via 7f29c50ccbc7789bfbc20bcb4b65758af9ebe6c5 (commit)
via 7c6b621f7307dc39ffcd7d965ac613642af201b8 (commit)
via e75cc3a030a8ccb43961cf80ff10d41ec81a24b0 (commit)
via 2426b9010ef45f5e96ffc12b8a69a3b0566b4f98 (commit)
via a4814aa8b0b165b9d6c4c55fc5aee33cd1a570bd (commit)
via 7ed5fbe7fa3bc3cb729d9b516d2a73d52e28d22d (commit)
via 6915661a460cd589b441ac7cd8695f35c4e83113 (commit)
from 58e6dc722ad1e2415b71baf1d471885169dde14d (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 3059ab5f3f21e331b80728773c36a74620e46677
Merge: e75cc3a030a8ccb43961cf80ff10d41ec81a24b0 b3b9707dd8244758ff1080401a9e03e74766e1ab
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Aug 8 13:11:07 2008 +1000
Merge git://git.samba.org/tridge/ctdb
commit b3b9707dd8244758ff1080401a9e03e74766e1ab
Author: Andrew Tridgell <tridge at samba.org>
Date: Fri Aug 8 13:11:41 2008 +1000
added retry handling in client
commit 7f29c50ccbc7789bfbc20bcb4b65758af9ebe6c5
Author: Andrew Tridgell <tridge at samba.org>
Date: Fri Aug 8 13:11:28 2008 +1000
added a new control CTDB_CONTROL_TRANS2_COMMIT_RETRY so we can tell
the difference between a initial commit attempt and a retry, which
allows us to get the persistent updates counter right for retries
commit 7c6b621f7307dc39ffcd7d965ac613642af201b8
Author: Andrew Tridgell <tridge at samba.org>
Date: Fri Aug 8 11:04:21 2008 +1000
imported failure handling from dbwrap_ctdb.c
commit e75cc3a030a8ccb43961cf80ff10d41ec81a24b0
Merge: a4814aa8b0b165b9d6c4c55fc5aee33cd1a570bd 7c6b621f7307dc39ffcd7d965ac613642af201b8
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Aug 8 10:59:40 2008 +1000
Merge git://git.samba.org/tridge/ctdb
commit 2426b9010ef45f5e96ffc12b8a69a3b0566b4f98
Author: Andrew Tridgell <tridge at samba.org>
Date: Fri Aug 8 10:15:23 2008 +1000
save writing the same data twice
commit a4814aa8b0b165b9d6c4c55fc5aee33cd1a570bd
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date: Fri Aug 8 10:01:20 2008 +1000
new version 1.0.54
commit 7ed5fbe7fa3bc3cb729d9b516d2a73d52e28d22d
Author: Andrew Tridgell <tridge at samba.org>
Date: Fri Aug 8 10:00:33 2008 +1000
up release number
commit 6915661a460cd589b441ac7cd8695f35c4e83113
Author: Andrew Tridgell <tridge at samba.org>
Date: Fri Aug 8 09:58:49 2008 +1000
return a more detailed error code from a trans2 commit error
-----------------------------------------------------------------------
Summary of changes:
client/ctdb_client.c | 78 +++++++++++++++++++++++++++++++++++----------
include/ctdb_private.h | 11 ++++++
packaging/RPM/ctdb.spec | 6 +++-
server/ctdb_control.c | 1 +
server/ctdb_persistent.c | 35 +++++++++++++++++---
5 files changed, 107 insertions(+), 24 deletions(-)
Changeset truncated at 500 lines:
diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 2b31d81..0d85374 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -3068,12 +3068,13 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h,
{
TALLOC_CTX *tmp_ctx = talloc_new(h);
struct ctdb_ltdb_header header;
+ TDB_DATA olddata;
int ret;
ZERO_STRUCT(header);
/* we need the header so we can update the RSN */
- ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, NULL);
+ ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
if (ret == -1 && header.dmaster == (uint32_t)-1) {
/* the record doesn't exist - create one with us as dmaster.
This is only safe because we are in a transaction and this
@@ -3086,6 +3087,13 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h,
return ret;
}
+ if (data.dsize == olddata.dsize &&
+ memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
+ /* save writing the same data */
+ talloc_free(tmp_ctx);
+ return 0;
+ }
+
header.rsn++;
if (!h->in_replay) {
@@ -3095,13 +3103,13 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h,
talloc_free(tmp_ctx);
return -1;
}
-
- h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
- if (h->m_write == NULL) {
- DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
- talloc_free(tmp_ctx);
- return -1;
- }
+ }
+
+ h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
+ if (h->m_write == NULL) {
+ DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
+ talloc_free(tmp_ctx);
+ return -1;
}
ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
@@ -3120,6 +3128,8 @@ static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
struct ctdb_rec_data *rec = NULL;
h->in_replay = true;
+ talloc_free(h->m_write);
+ h->m_write = NULL;
ret = ctdb_transaction_fetch_start(h);
if (ret != 0) {
@@ -3171,16 +3181,11 @@ failed:
*/
int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
{
- int ret;
+ int ret, retries=0;
int32_t status;
struct ctdb_context *ctdb = h->ctdb_db->ctdb;
struct timeval timeout;
-
- if (h->m_write == NULL) {
- /* no changes were made */
- talloc_free(h);
- return 0;
- }
+ enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
talloc_set_destructor(h, NULL);
@@ -3200,24 +3205,61 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
*/
again:
+ if (h->m_write == NULL) {
+ /* no changes were made */
+ tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+ talloc_free(h);
+ return 0;
+ }
+
/* tell ctdbd to commit to the other nodes */
timeout = timeval_current_ofs(1, 0);
ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
- CTDB_CONTROL_TRANS2_COMMIT, 0,
+ retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
&timeout, NULL);
if (ret != 0 || status != 0) {
tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
sleep(1);
+
+ if (ret != 0) {
+ failure_control = CTDB_CONTROL_TRANS2_ERROR;
+ } else {
+ /* work out what error code we will give if we
+ have to fail the operation */
+ switch ((enum ctdb_trans2_commit_error)status) {
+ case CTDB_TRANS2_COMMIT_SUCCESS:
+ case CTDB_TRANS2_COMMIT_SOMEFAIL:
+ case CTDB_TRANS2_COMMIT_TIMEOUT:
+ failure_control = CTDB_CONTROL_TRANS2_ERROR;
+ break;
+ case CTDB_TRANS2_COMMIT_ALLFAIL:
+ failure_control = CTDB_CONTROL_TRANS2_FINISHED;
+ break;
+ }
+ }
+
+ if (++retries == 10) {
+ DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
+ h->ctdb_db->db_id, retries, (unsigned)failure_control));
+ ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
+ failure_control, CTDB_CTRL_FLAG_NOREPLY,
+ tdb_null, NULL, NULL, NULL, NULL, NULL);
+ talloc_free(h);
+ return -1;
+ }
+
if (ctdb_replay_transaction(h) != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n"));
ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
- CTDB_CONTROL_TRANS2_ERROR, CTDB_CTRL_FLAG_NOREPLY,
+ failure_control, CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL, NULL, NULL, NULL);
talloc_free(h);
return -1;
}
goto again;
+ } else {
+ failure_control = CTDB_CONTROL_TRANS2_ERROR;
}
/* do the real commit locally */
@@ -3225,7 +3267,7 @@ again:
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction\n"));
ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
- CTDB_CONTROL_TRANS2_ERROR, CTDB_CTRL_FLAG_NOREPLY,
+ failure_control, CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL, NULL, NULL, NULL);
talloc_free(h);
return ret;
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index ff4d271..f44a940 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -549,6 +549,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_TRANS2_COMMIT = 83,
CTDB_CONTROL_TRANS2_FINISHED = 84,
CTDB_CONTROL_TRANS2_ERROR = 85,
+ CTDB_CONTROL_TRANS2_COMMIT_RETRY = 86,
};
/*
@@ -789,6 +790,16 @@ struct ctdb_req_keepalive {
struct ctdb_req_header hdr;
};
+
+/* types of failures possible from TRANS2_COMMIT */
+enum ctdb_trans2_commit_error {
+ CTDB_TRANS2_COMMIT_SUCCESS=0, /* all nodes committed successfully */
+ CTDB_TRANS2_COMMIT_TIMEOUT=1, /* at least one node timed out */
+ CTDB_TRANS2_COMMIT_ALLFAIL=2, /* all nodes failed the commit */
+ CTDB_TRANS2_COMMIT_SOMEFAIL=3 /* some nodes failed the commit, some allowed it */
+};
+
+
/* internal prototypes */
void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
void ctdb_fatal(struct ctdb_context *ctdb, const char *msg);
diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec
index fcaf2ee..ea9ffbd 100644
--- a/packaging/RPM/ctdb.spec
+++ b/packaging/RPM/ctdb.spec
@@ -5,7 +5,7 @@ Vendor: Samba Team
Packager: Samba Team <samba at samba.org>
Name: ctdb
Version: 1.0
-Release: 53
+Release: 54
Epoch: 0
License: GNU GPL version 3
Group: System Environment/Daemons
@@ -118,6 +118,10 @@ fi
%{_includedir}/ctdb_private.h
%changelog
+* Fri Aug 8 2008 : Version 1.0.54
+ - fix a looping error in the transaction code
+ - provide a more detailed error code for persistent store errors
+ so clients can make more intelligent choices on how to try to recover
* Thu Aug 7 2008 : Version 1.0.53
- Remove the reclock.pnn file it can cause gpfs to fail to umount
- New transaction code
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 59b0657..edfe344 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -397,6 +397,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
return ctdb_control_cancel_persistent_update(ctdb, c, indata);
case CTDB_CONTROL_TRANS2_COMMIT:
+ case CTDB_CONTROL_TRANS2_COMMIT_RETRY:
return ctdb_control_trans2_commit(ctdb, c, indata, async_reply);
case CTDB_CONTROL_TRANS2_ERROR:
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 5b88b4b..42b148c 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -32,9 +32,17 @@ struct ctdb_persistent_state {
const char *errormsg;
uint32_t num_pending;
int32_t status;
+ uint32_t num_failed, num_sent;
};
/*
+ 1) all nodes fail, and all nodes reply
+ 2) some nodes fail, all nodes reply
+ 3) some nodes timeout
+ 4) all nodes succeed
+ */
+
+/*
called when a node has acknowledged a ctdb_control_update_record call
*/
static void ctdb_persistent_callback(struct ctdb_context *ctdb,
@@ -50,10 +58,19 @@ static void ctdb_persistent_callback(struct ctdb_context *ctdb,
status, errormsg));
state->status = status;
state->errormsg = errormsg;
+ state->num_failed++;
}
state->num_pending--;
if (state->num_pending == 0) {
- ctdb_request_control_reply(state->ctdb, state->c, NULL, state->status, state->errormsg);
+ enum ctdb_trans2_commit_error etype;
+ if (state->num_failed == state->num_sent) {
+ etype = CTDB_TRANS2_COMMIT_ALLFAIL;
+ } else if (state->num_failed != 0) {
+ etype = CTDB_TRANS2_COMMIT_SOMEFAIL;
+ } else {
+ etype = CTDB_TRANS2_COMMIT_SUCCESS;
+ }
+ ctdb_request_control_reply(state->ctdb, state->c, NULL, etype, state->errormsg);
talloc_free(state);
}
}
@@ -66,7 +83,8 @@ static void ctdb_persistent_store_timeout(struct event_context *ev, struct timed
{
struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state);
- ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_state");
+ ctdb_request_control_reply(state->ctdb, state->c, NULL, CTDB_TRANS2_COMMIT_TIMEOUT,
+ "timeout in ctdb_persistent_state");
talloc_free(state);
}
@@ -103,12 +121,18 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
then have it decremented in ctdb_control_trans2_error
or ctdb_control_trans2_finished
*/
- if (c->opcode == CTDB_CONTROL_PERSISTENT_STORE) {
+ switch (c->opcode) {
+ case CTDB_CONTROL_PERSISTENT_STORE:
if (client->num_persistent_updates > 0) {
client->num_persistent_updates--;
- }
- } else {
+ }
+ break;
+ case CTDB_CONTROL_TRANS2_COMMIT:
client->num_persistent_updates++;
+ break;
+ case CTDB_CONTROL_TRANS2_COMMIT_RETRY:
+ /* already updated from the first commit */
+ break;
}
state = talloc_zero(ctdb, struct ctdb_persistent_state);
@@ -141,6 +165,7 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
}
state->num_pending++;
+ state->num_sent++;
}
if (state->num_pending == 0) {
--
CTDB repository
More information about the samba-cvs
mailing list