[SCM] CTDB repository - branch master updated - ctdb-1.0.101-4-g9560f8b

Ronnie Sahlberg sahlberg at samba.org
Wed Oct 28 17:15:39 MDT 2009


The branch, master has been updated
       via  9560f8b7fe0f7ee0386a87c2653333071050fe4b (commit)
       via  02ee9dfd3c6b09f5c5172a7e38738c20b7f0aecd (commit)
       via  813cfd7c625ac8af4ef169cc92fb6d69f66004c9 (commit)
       via  8d430ae6968dfe566614379436fc3c56003fcd88 (commit)
      from  47b67077bdfa64938bb0fa6d1ca8f56fbd5c960e (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 9560f8b7fe0f7ee0386a87c2653333071050fe4b
Author: Michael Adam <obnox at samba.org>
Date:   Thu Oct 22 14:42:05 2009 +0200

    ctdb_client: reformat a comment slightly to enhance clearness.
    
    Michael

commit 02ee9dfd3c6b09f5c5172a7e38738c20b7f0aecd
Author: Michael Adam <obnox at samba.org>
Date:   Wed Oct 28 22:55:44 2009 +0100

    client: fix race condition with concurrent transactions on the same node.
    
    In ctdb_transaction_commit(), when the trans2_commit control fails, there
    is a race condition in the 1 second sleep between the local transaction_cancel
    and the call to ctdb_replay_transaction(): The database is not locked, and
    neither is the transaction_lock record. So another client can start and possibly
    complete a new transaction in this gap, but only on the same node: The locking
    of the transaction_lock record on a different node which involves migration of
    the record to the other node has been disabled by introduction of the
    transaction_active flag on the db which closes precisely this gap from the start
    of the commit until the call to TRANS2_FINISH or TRANS2_ERROR.
    But this mechanism does not cover the case where a process on the same node
    tries to start a transaction: There is no obstacle to locking the transaction_lock
    record because the record does not need to be migrated.
    
    This commit closes this race condition in ctdb_transaction_fetch_start()
    by using the new ctdb_ctrl_transaction_active() call to ask the local
    ctdb daemon whether it has a transaction running on the database.
    If so, the check is repeated until the running transaction is done.
    
    This does introduce an additional call to the local ctdbd when starting
    transactions, but it does close the (hopefully) last race condition.
    
    Michael

commit 813cfd7c625ac8af4ef169cc92fb6d69f66004c9
Author: Michael Adam <obnox at samba.org>
Date:   Wed Oct 28 22:54:49 2009 +0100

    client: add ctdb_ctrl_transaction_active() which calls out to CTDB_TRANS2_ACTIVE
    
    Michael

commit 8d430ae6968dfe566614379436fc3c56003fcd88
Author: Michael Adam <obnox at samba.org>
Date:   Wed Oct 28 22:50:05 2009 +0100

    server: add a new ctdb control CTDB_TRANS2_ACTIVE
    
    This aske the daemon wheter a transaction is currently active on a
    given DB on that node. More precisely this asks for the transaction_active
    flag in the ctdb_db_context that is set in the CTDB_TRANS2_COMMIT
    control and cleared in the CTDB_TRANS2_ERROR or CTDB_TRANS2_FINISHED controls.
    
    This will be useful for fixing race conditions in the transaction code.
    
    Michael

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c     |   48 +++++++++++++++++++++++++++++++++++++++++++--
 include/ctdb_private.h   |    3 ++
 server/ctdb_control.c    |    4 +++
 server/ctdb_persistent.c |   20 +++++++++++++++++++
 4 files changed, 72 insertions(+), 3 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index d4130cd..d47f771 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -3141,12 +3141,42 @@ int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout,
 	return ret;
 }
 
+/**
+ * check whether a transaction is active on a given db on a given node
+ */
+static int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
+					    uint32_t destnode,
+					    uint32_t db_id)
+{
+	int32_t status;
+	int ret;
+	TDB_DATA indata;
+
+	indata.dptr = (uint8_t *)&db_id;
+	indata.dsize = sizeof(db_id);
+
+	ret = ctdb_control(ctdb, destnode, 0,
+			   CTDB_CONTROL_TRANS2_ACTIVE,
+			   0, indata, NULL, NULL, &status,
+			   NULL, NULL);
+
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
+		return -1;
+	}
+
+	return status;
+}
+
+
 struct ctdb_transaction_handle {
 	struct ctdb_db_context *ctdb_db;
 	bool in_replay;
-	/* we store the reads and writes done under a transaction one
-	   list stores both reads and writes, the other just writes
-	*/
+	/*
+	 * we store the reads and writes done under a transaction:
+	 * - one list stores both reads and writes (m_all),
+	 * - the other just writes (m_write)
+	 */
 	struct ctdb_marshall_buffer *m_all;
 	struct ctdb_marshall_buffer *m_write;
 };
@@ -3170,6 +3200,7 @@ static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
 	int ret;
 	struct ctdb_db_context *ctdb_db = h->ctdb_db;
 	pid_t pid;
+	int32_t status;
 
 	key.dptr = discard_const(keyname);
 	key.dsize = strlen(keyname);
@@ -3180,6 +3211,17 @@ static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
 	}
 
 again:
+	status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
+					      CTDB_CURRENT_NODE,
+					      ctdb_db->db_id);
+	if (status == 1) {
+		DEBUG(DEBUG_NOTICE, (__location__ " transaction is active "
+				     "on db_id[%u]. waiting for 1 second\n",
+				     ctdb_db->db_id));
+		sleep(1);
+		goto again;
+	}
+
 	tmp_ctx = talloc_new(h);
 
 	rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 5791df0..2fabfea 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -620,6 +620,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_TRANSACTION_CANCEL      = 113,
 		    CTDB_CONTROL_REGISTER_NOTIFY         = 114,
 		    CTDB_CONTROL_DEREGISTER_NOTIFY       = 115,
+		    CTDB_CONTROL_TRANS2_ACTIVE           = 116,
 };	
 
 /*
@@ -1469,6 +1470,8 @@ int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb,
 				     struct ctdb_req_control *c);
 int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, 
 				  struct ctdb_req_control *c);
+int32_t ctdb_control_trans2_active(struct ctdb_context *ctdb,
+				   uint32_t db_id);
 
 char *ctdb_addr_to_str(ctdb_sock_addr *addr);
 unsigned ctdb_addr_to_port(ctdb_sock_addr *addr);
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 7e84078..b6bad1c 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -424,6 +424,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	case CTDB_CONTROL_TRANS2_FINISHED:
 		return ctdb_control_trans2_finished(ctdb, c);
 
+	case CTDB_CONTROL_TRANS2_ACTIVE:
+		CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
+		return ctdb_control_trans2_active(ctdb, *(uint32_t *)indata.dptr);
+
 	case CTDB_CONTROL_RECD_PING:
 		CHECK_CONTROL_DATA_SIZE(0);
 		return ctdb_control_recd_ping(ctdb);
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index cb77bf0..3c51742 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -604,6 +604,26 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
 	return 0;
 }
 
+/**
+ * Tell whether a transaction is active on this node on the give DB.
+ */
+int32_t ctdb_control_trans2_active(struct ctdb_context *ctdb,
+				   uint32_t db_id)
+{
+	struct ctdb_db_context *ctdb_db;
+
+	ctdb_db = find_ctdb_db(ctdb, db_id);
+	if (!ctdb_db) {
+		DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", db_id));
+		return -1;
+	}
+
+	if (ctdb_db->transaction_active) {
+		return 1;
+	} else {
+		return 0;
+	}
+}
 
 /*
   backwards compatibility:


-- 
CTDB repository


More information about the samba-cvs mailing list