[SCM] CTDB repository - branch origin updated - db6bc3745a56cc12e60e727190a098a6527690d6

Ronnie Sahlberg sahlberg at samba.org
Mon Aug 11 00:39:23 GMT 2008


The branch, origin has been updated
       via  db6bc3745a56cc12e60e727190a098a6527690d6 (commit)
       via  3059ab5f3f21e331b80728773c36a74620e46677 (commit)
       via  b3b9707dd8244758ff1080401a9e03e74766e1ab (commit)
       via  7f29c50ccbc7789bfbc20bcb4b65758af9ebe6c5 (commit)
       via  7c6b621f7307dc39ffcd7d965ac613642af201b8 (commit)
       via  e75cc3a030a8ccb43961cf80ff10d41ec81a24b0 (commit)
       via  2426b9010ef45f5e96ffc12b8a69a3b0566b4f98 (commit)
       via  a4814aa8b0b165b9d6c4c55fc5aee33cd1a570bd (commit)
       via  7ed5fbe7fa3bc3cb729d9b516d2a73d52e28d22d (commit)
       via  6915661a460cd589b441ac7cd8695f35c4e83113 (commit)
       via  58e6dc722ad1e2415b71baf1d471885169dde14d (commit)
       via  0592ba2a4fbd1b3b7a6bd0780eadbd6d449baaad (commit)
       via  bcbac6724840cdabe55e38a4c62779f853bd09ba (commit)
       via  66c61137a5c01afcbae329ffbe121e78ae087399 (commit)
       via  c76c214be401cb116265ed17ffe6c77c979ded82 (commit)
       via  84236e03e40bcf46fa634d106903277c149a734f (commit)
       via  14f2f719e6ddc266aafde4d4bf80ed3a01e145fe (commit)
       via  a9bce1ac9794f108825190948f404c864a533435 (commit)
       via  d1d48f8661d83b01de6c552ee70021acdc6384cb (commit)
       via  09aa91224fe7b835dc0a2c58868cce28ce54809f (commit)
       via  b6d9a0396fb4b325778d3810dc656f719f31b9f1 (commit)
       via  f7a70a5f9043b1d7293a515abf5b5228365693da (commit)
       via  aefcb1f817581ac8cd67712d07159fc802f96623 (commit)
       via  fe55bfc8fb6dc628f72f220843e829a251d09936 (commit)
       via  3a71844cfdb9fe69208030432ddb547b0e215726 (commit)
       via  306af4ccef132ea023f1f01e11f877a3a742ee4c (commit)
       via  bad53b2d342bb9760497e6f4a61e64ca50d6e771 (commit)
       via  9806d18b93218c216d538e28f9ed495269f0a938 (commit)
       via  71d9d24abae62f70acbd7c1ded8af0b817607c2a (commit)
       via  12087e7d751a8756076662cd8db5dcf35316c0c5 (commit)
       via  2bc7f3aef4668bd1680db87ef215c349280a84f2 (commit)
       via  aab710f1c6bcdfd8ff2992f8adc15933276dc39e (commit)
       via  9ff3380099fe6f4d39de126db0826971a10ee692 (commit)
       via  d5dcb46e182466e4b51c106f2491178c23babd8d (commit)
       via  4d205476d286570a6e1f52b59af42858ce051106 (commit)
       via  008533d971aec9c28c6e4750ef4677dd943633ff (commit)
       via  d3f5d75665a78ae0081fda57e58384b27a6ae396 (commit)
       via  22f737be0e70fc043affaa4f953f60d852b7999a (commit)
       via  ded1a974cdd86b436c6e5cba27069d1a3796dbe9 (commit)
       via  795c190b004d404b84dda053593139ed51d345e5 (commit)
       via  b1fed105ad780e89a128a611ef0bd659818eeebf (commit)
       via  8fed021d11160b137f4140ea02947347250e2959 (commit)
       via  e8ef9891aa31c374921b23cc74e1eda1f8218bf0 (commit)
       via  0de79352c9b36c118e36905f08ebbe38ecbb957e (commit)
       via  b08a988fbdad0da850c9b79791c1a8970555147f (commit)
       via  eca73bcaa33f88c683b79d57d85b590659018ad8 (commit)
       via  e24152fbd06ba4c2b6cfd473751c7f00a676b9ae (commit)
       via  c5035657606283d2e35bea40992505e84ca8e7be (commit)
       via  60e2cb175c449ae65793a3e1ffb60cf030a3a0d5 (commit)
       via  3d58f9b524a40c7b43a2a855212db090e9becefa (commit)
       via  554dcf16d37c8b9e4704df11d21fb272f30f5cec (commit)
       via  52716d26eb84104d65828bed38e69f214a5fa824 (commit)
       via  52a38487f981fd5981c02a7a063ad2c598591c10 (commit)
       via  af38c8d4cc03e1b5a314ea2338346c5f8c80aa95 (commit)
       via  ccf9334bd20b1398623dd649987aa15119dac14e (commit)
       via  1ffccb3e0b3b5bd376c5302304029af393709518 (commit)
       via  8140825e1d06053a900fd0adf0a150622c0fc146 (commit)
       via  05918bcb58acd2add7e13c028de09641a7519cd1 (commit)
       via  d7e9c0010a40f9335d28cf108e8f5cf411970a03 (commit)
       via  9478852f2b4f530994b2211fff45413d1da82dd9 (commit)
       via  2b0bd6c302545f2533a7a67dfc6bb5f9f60799f7 (commit)
       via  9043913a54fe707083697f0587c6ffde86ca5a69 (commit)
       via  2fe52c7979ecd28250ec4ac195d3c3999916e573 (commit)
       via  495a6293c284a1e74b9c5e0c112e6ed5feead107 (commit)
       via  c26afe26cc5c1f9cd9eef74166b5fc39dde591d3 (commit)
       via  f8c5f6ff1cae747aae917c454b49dc5db227e140 (commit)
       via  1de62d1ad71fa784d5e93f76da8f872cad9b9f42 (commit)
       via  15bc66ae801b0c69a65a7a2acf5df151e76edc2a (commit)
       via  f4a6dd98c86f2028c00b62313a071a94cafc95f9 (commit)
       via  b18a1b59ecd8913e03a59b1a105002f2c9221324 (commit)
       via  f7bdf96843a7e4ad61ad378786922d6281de9d93 (commit)
       via  dfaf2c1581e547df831b3171ad47acd27b4ca2af (commit)
       via  c47acc0eb2e1275a6c6fc05829bd1131d999f8fd (commit)
       via  2cc9aba3d7e608eccc29c897f710b69f30653bbf (commit)
       via  919af5aadb797cfdd10473b6a1269f4ae2a9dbda (commit)
       via  d94e76bfb3e464c0540331caf282efc13ff60e42 (commit)
       via  563cdf2bd9cca358d234c797d8e1d9c8838c6ca6 (commit)
       via  b2ccb891b81b041e2186e038b67bb4354b7892aa (commit)
       via  00025eef662b867293829228c681df491cd6f371 (commit)
       via  172d01fb34f032e098b1c77a7b0f17bf11301640 (commit)
       via  dd900d4ed8f07003c4f1db2d441cfc2ef2c89ef5 (commit)
       via  90ab48bb8e17f59fcb27ddbff51de546c4447b64 (commit)
       via  3c3d3ac5f7dec258589aaaf0633cab3b3af65cf3 (commit)
       via  77458b2b6b51b2970c12b0e5b097088d3fb9d358 (commit)
       via  02ed800f3c09e2a3d56de5ae226708d0953dd4a1 (commit)
       via  cb67382603ffabda7f7f958b494c79b4a3c5ebe2 (commit)
       via  a77850d8baebcf0b216d7c1391da9cce1c43ce2e (commit)
       via  8814997c1b9623397058088dd0e1775cecfe371b (commit)
       via  bd7b254b81dda4d9d62516abf32f93f2503eb9bb (commit)
       via  36be210bbc5e0af75c5fd6e57863272bfa0e942e (commit)
       via  becce6a9a759730d0c675a56516bb0a32a54c9f3 (commit)
       via  8f6cd88e74de24af8dde2b6cabb2348c4f914b99 (commit)
       via  77255bb5523b8d132770a0a7d4ba29ec9e5043cc (commit)
       via  fd7bb21c4f9289fc34a57f9d8cb7c13a02d06096 (commit)
       via  702ced6c2fe569c01fe96c60d0f35a7e61506a96 (commit)
       via  2030e9ff2ca044181b72c3b87d513bf27057b5a2 (commit)
       via  b1f1e80d3ad50280a300f2ed021513cf0a6f3a76 (commit)
       via  b4b2408ba1bdce22abb3fb19d398b72e96da6505 (commit)
       via  a4e89f57a8d733ea74df7b0de31eb977d6d37388 (commit)
       via  bfba5c7249eff8a10a43b53c1b89dd44b625fd10 (commit)
       via  60f3c04bd8b20ecbe937ffed08875cdc6898b422 (commit)
       via  6043f926f89b361c7fe14fc60d2769fd2ba63dfc (commit)
       via  80e249512f93bca2445d40590db38d31be2aafd7 (commit)
       via  f4929e164be1703f74fc332e740b85cfe1ae3e73 (commit)
       via  ff2985aaef999d180277db4cf644fee0ea79c14d (commit)
       via  45b6ff64f6ddf037b810c4e5f8b9f04d71067b98 (commit)
      from  5168e9fa138995581fe5805f99ae569ce3c127f7 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=origin


- Log -----------------------------------------------------------------
-----------------------------------------------------------------------

Summary of changes:
 Makefile.in                                     |   18 +-
 client/ctdb_client.c                            |  383 +++++++++++++++++++++--
 common/ctdb_ltdb.c                              |   59 ----
 common/ctdb_util.c                              |  102 ++++++
 common/system_aix.c                             |   82 ++++--
 config/ctdb.init                                |    3 +
 config/ctdb.sysconfig                           |   12 +
 config/events.d/10.interface                    |    1 +
 config/events.d/50.samba                        |   97 ++++++-
 config/events.d/60.nfs                          |    3 +-
 config/events.d/61.nfstickle                    |    3 +
 config/events.d/91.lvs                          |   13 +-
 config/functions                                |   44 ++-
 doc/ctdb.1                                      |   45 +++-
 doc/ctdb.1.html                                 |  132 ++++++---
 doc/ctdb.1.xml                                  |   78 +++++
 doc/ctdbd.1                                     |   93 ++++++-
 doc/ctdbd.1.html                                |  155 ++++++++--
 doc/ctdbd.1.xml                                 |  179 +++++++++++
 doc/onnode.1                                    |  347 +++++++++++++++++++--
 doc/onnode.1.html                               |   69 +++--
 doc/onnode.1.xml                                |  126 ++++++--
 include/ctdb.h                                  |   21 ++
 include/ctdb_private.h                          |   50 +++-
 lib/events/events_signal.c                      |   10 +-
 packaging/RPM/ctdb.spec                         |  117 +++++++-
 server/ctdb_control.c                           |   20 +-
 server/ctdb_daemon.c                            |   40 +++-
 server/ctdb_freeze.c                            |   38 ++-
 server/ctdb_lockwait.c                          |    2 -
 server/ctdb_logging.c                           |   39 ++--
 server/ctdb_ltdb_server.c                       |    4 +-
 server/ctdb_persistent.c                        |  299 +++++++++++++++---
 server/ctdb_recover.c                           |   39 +--
 server/ctdb_recoverd.c                          |  254 ++++-----------
 server/ctdb_takeover.c                          |    7 +-
 server/ctdb_traverse.c                          |   72 ++++-
 server/ctdb_tunables.c                          |    2 +-
 server/ctdbd.c                                  |    5 +
 server/eventscript.c                            |   62 +++-
 tcp/tcp_connect.c                               |    3 -
 tests/ctdb_persistent.c                         |   28 +-
 tests/{ctdb_persistent.c => ctdb_transaction.c} |   50 ++--
 tests/ctdb_traverse.c                           |    2 +-
 tests/fetch.sh                                  |    2 +
 tests/persistent.sh                             |    8 +-
 tests/rb_test.c                                 |   40 ++--
 tests/run_tests.sh                              |    2 +
 tests/transaction.sh                            |   28 ++
 tools/ctdb.c                                    |  219 ++++++++++----
 tools/ctdb_vacuum.c                             |   18 +-
 tools/onnode                                    |  203 ++++++++++++
 tools/onnode.rsh                                |   44 ---
 tools/onnode.ssh                                |   44 ---
 54 files changed, 2972 insertions(+), 844 deletions(-)
 mode change 100644 => 100755 Makefile.in
 copy tests/{ctdb_persistent.c => ctdb_transaction.c} (84%)
 create mode 100755 tests/transaction.sh
 create mode 100755 tools/onnode
 delete mode 100644 tools/onnode.rsh
 delete mode 100755 tools/onnode.ssh


Changeset truncated at 500 lines:

diff --git a/Makefile.in b/Makefile.in
old mode 100644
new mode 100755
index 161e2e9..cf1240b
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,10 +1,12 @@
 #!gmake
 
+
 CC = @CC@
 prefix = @prefix@
 exec_prefix = @exec_prefix@
 datarootdir = @datarootdir@
 includedir = @includedir@
+docdir = /usr/share/doc
 libdir = @libdir@
 bindir = @bindir@
 sbindir = @sbindir@
@@ -54,7 +56,8 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
 	server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.c \
 	$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
 
-TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/ctdb_randrec bin/ctdb_persistent bin/ctdb_traverse bin/rb_test \
+TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/ctdb_randrec bin/ctdb_persistent \
+	bin/ctdb_traverse bin/rb_test bin/ctdb_transaction \
 	@INFINIBAND_BINS@
 
 BINS = bin/ctdb @CTDB_SCSI_IO@ bin/ctdb_ipmux bin/smnotify
@@ -139,6 +142,10 @@ bin/ctdb_persistent: $(CTDB_CLIENT_OBJ) tests/ctdb_persistent.o
 	@echo Linking $@
 	@$(CC) $(CFLAGS) -o $@ tests/ctdb_persistent.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
 
+bin/ctdb_transaction: $(CTDB_CLIENT_OBJ) tests/ctdb_transaction.o 
+	@echo Linking $@
+	@$(CC) $(CFLAGS) -o $@ tests/ctdb_transaction.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
+
 bin/ibwrapper_test: $(CTDB_CLIENT_OBJ) ib/ibwrapper_test.o
 	@echo Linking $@
 	@$(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
@@ -170,15 +177,16 @@ install: all
 	mkdir -p $(DESTDIR)$(includedir)
 	mkdir -p $(DESTDIR)$(etcdir)/ctdb
 	mkdir -p $(DESTDIR)$(etcdir)/ctdb/events.d
+	mkdir -p $(DESTDIR)$(docdir)/ctdb
 	${INSTALLCMD} -m 755 bin/ctdb $(DESTDIR)$(bindir)
 	${INSTALLCMD} -m 755 bin/ctdb_ipmux $(DESTDIR)$(bindir)
 	${INSTALLCMD} -m 755 bin/ctdbd $(DESTDIR)$(sbindir)
 	${INSTALLCMD} -m 755 bin/smnotify $(DESTDIR)$(bindir)
 	${INSTALLCMD} -m 644 include/ctdb.h $(DESTDIR)$(includedir)
 	${INSTALLCMD} -m 644 include/ctdb_private.h $(DESTDIR)$(includedir) # for samba3
-	${INSTALLCMD} -m 755 config/functions $(DESTDIR)$(etcdir)/ctdb
+	${INSTALLCMD} -m 644 config/functions $(DESTDIR)$(etcdir)/ctdb
 	${INSTALLCMD} -m 755 config/statd-callout $(DESTDIR)$(etcdir)/ctdb
-	${INSTALLCMD} -m 644 config/events.d/README $(DESTDIR)$(etcdir)/ctdb/events.d
+	${INSTALLCMD} -m 644 config/events.d/README $(DESTDIR)/$(docdir)/ctdb/README.eventscripts
 	${INSTALLCMD} -m 755 config/events.d/00.ctdb $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/10.interface $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/40.vsftpd $(DESTDIR)$(etcdir)/ctdb/events.d
@@ -190,13 +198,11 @@ install: all
 	${INSTALLCMD} -m 755 config/events.d/90.ipmux $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 config/events.d/91.lvs $(DESTDIR)$(etcdir)/ctdb/events.d
 	${INSTALLCMD} -m 755 tools/ctdb_diagnostics $(DESTDIR)$(bindir)
-	${INSTALLCMD} -m 755 tools/onnode.ssh $(DESTDIR)$(bindir)
-	${INSTALLCMD} -m 755 tools/onnode.rsh $(DESTDIR)$(bindir)
+	${INSTALLCMD} -m 755 tools/onnode $(DESTDIR)$(bindir)
 	if [ -f doc/ctdb.1 ];then ${INSTALLCMD} -d $(DESTDIR)$(mandir)/man1; fi
 	if [ -f doc/ctdb.1 ];then ${INSTALLCMD} -m 644 doc/ctdb.1 $(DESTDIR)$(mandir)/man1; fi
 	if [ -f doc/ctdbd.1 ];then ${INSTALLCMD} -m 644 doc/ctdbd.1 $(DESTDIR)$(mandir)/man1; fi
 	if [ -f doc/onnode.1 ];then ${INSTALLCMD} -m 644 doc/onnode.1 $(DESTDIR)$(mandir)/man1; fi
-	cd $(DESTDIR)$(bindir) && ln -sf onnode.ssh onnode
 
 test: all
 	tests/run_tests.sh
diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index ed999f2..0d85374 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -1232,29 +1232,6 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32
 }
 
 /*
-  get the reclock filename
- */
-int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, 
-		       TALLOC_CTX *mem_ctx, const char **reclock)
-{
-	int ret;
-	TDB_DATA outdata;
-	int32_t res;
-
-	ret = ctdb_control(ctdb, destnode, 0, 
-			   CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null, 
-			   mem_ctx, &outdata, &res, &timeout, NULL);
-	if (ret != 0 || res != 0) {
-		DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getreclock failed\n"));
-		return -1;
-	}
-
-	*reclock = (const char *)talloc_steal(mem_ctx, outdata.dptr);
-
-	return 0;
-}
-
-/*
   get a list of nodes (vnn and flags ) from a remote node
  */
 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, 
@@ -1787,6 +1764,11 @@ static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA
 		return;
 	}
 
+	if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
+		/* empty records are deleted records in ctdb */
+		return;
+	}
+
 	if (state->fn(ctdb, key, data, state->private_data) != 0) {
 		state->done = True;
 	}
@@ -1847,6 +1829,7 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
 	return state.count;
 }
 
+#define ISASCII(x) ((x>31)&&(x<128))
 /*
   called on each key during a catdb
  */
@@ -1861,7 +1844,7 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi
 
 	fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
 	for (i=0;i<key.dsize;i++) {
-		if (isascii(key.dptr[i])) {
+		if (ISASCII(key.dptr[i])) {
 			fprintf(f, "%c", key.dptr[i]);
 		} else {
 			fprintf(f, "\\%02X", key.dptr[i]);
@@ -1871,7 +1854,7 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi
 
 	fprintf(f, "data(%u) = \"", (unsigned)data.dsize);
 	for (i=sizeof(*h);i<data.dsize;i++) {
-		if (isascii(data.dptr[i])) {
+		if (ISASCII(data.dptr[i])) {
 			fprintf(f, "%c", data.dptr[i]);
 		} else {
 			fprintf(f, "\\%02X", data.dptr[i]);
@@ -2947,3 +2930,353 @@ int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout,
 	talloc_free(tmp_ctx);
 	return ret;
 }
+
+struct ctdb_transaction_handle {
+	struct ctdb_db_context *ctdb_db;
+	bool in_replay;
+	/* we store the reads and writes done under a transaction one
+	   list stores both reads and writes, the other just writes
+	*/
+	struct ctdb_marshall_buffer *m_all;
+	struct ctdb_marshall_buffer *m_write;
+};
+
+/* start a transaction on a database */
+static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
+{
+	tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+	return 0;
+}
+
+/* start a transaction on a database */
+static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
+{
+	struct ctdb_record_handle *rh;
+	TDB_DATA key;
+	struct ctdb_ltdb_header header;
+	TALLOC_CTX *tmp_ctx;
+	const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
+	int ret;
+	struct ctdb_db_context *ctdb_db = h->ctdb_db;
+
+	key.dptr = discard_const(keyname);
+	key.dsize = strlen(keyname);
+
+	if (!ctdb_db->persistent) {
+		DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
+		return -1;
+	}
+
+again:
+	tmp_ctx = talloc_new(h);
+
+	rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
+	if (rh == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));		
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+	talloc_free(rh);
+
+	ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, NULL);
+	if (ret != 0 || header.dmaster != ctdb_db->ctdb->pnn) {
+		tdb_transaction_cancel(ctdb_db->ltdb->tdb);
+		talloc_free(tmp_ctx);
+		goto again;
+	}
+
+	talloc_free(tmp_ctx);
+
+	return 0;
+}
+
+
+/* start a transaction on a database */
+struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
+						       TALLOC_CTX *mem_ctx)
+{
+	struct ctdb_transaction_handle *h;
+	int ret;
+
+	h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
+	if (h == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));		
+		return NULL;
+	}
+
+	h->ctdb_db = ctdb_db;
+
+	ret = ctdb_transaction_fetch_start(h);
+	if (ret != 0) {
+		talloc_free(h);
+		return NULL;
+	}
+
+	talloc_set_destructor(h, ctdb_transaction_destructor);
+
+	return h;
+}
+
+
+
+/*
+  fetch a record inside a transaction
+ */
+int ctdb_transaction_fetch(struct ctdb_transaction_handle *h, 
+			   TALLOC_CTX *mem_ctx, 
+			   TDB_DATA key, TDB_DATA *data)
+{
+	struct ctdb_ltdb_header header;
+	int ret;
+
+	ZERO_STRUCT(header);
+
+	ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
+	if (ret == -1 && header.dmaster == (uint32_t)-1) {
+		/* record doesn't exist yet */
+		*data = tdb_null;
+		ret = 0;
+	}
+	
+	if (ret != 0) {
+		return ret;
+	}
+
+	if (!h->in_replay) {
+		h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
+		if (h->m_all == NULL) {
+			DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+  stores a record inside a transaction
+ */
+int ctdb_transaction_store(struct ctdb_transaction_handle *h, 
+			   TDB_DATA key, TDB_DATA data)
+{
+	TALLOC_CTX *tmp_ctx = talloc_new(h);
+	struct ctdb_ltdb_header header;
+	TDB_DATA olddata;
+	int ret;
+
+	ZERO_STRUCT(header);
+
+	/* we need the header so we can update the RSN */
+	ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
+	if (ret == -1 && header.dmaster == (uint32_t)-1) {
+		/* the record doesn't exist - create one with us as dmaster.
+		   This is only safe because we are in a transaction and this
+		   is a persistent database */
+		header.dmaster = h->ctdb_db->ctdb->pnn;
+		header.rsn = 0;
+	} else if (ret != 0) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
+		talloc_free(tmp_ctx);
+		return ret;
+	}
+
+	if (data.dsize == olddata.dsize &&
+	    memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
+		/* save writing the same data */
+		talloc_free(tmp_ctx);
+		return 0;
+	}
+
+	header.rsn++;
+
+	if (!h->in_replay) {
+		h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
+		if (h->m_all == NULL) {
+			DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
+			talloc_free(tmp_ctx);
+			return -1;
+		}
+	}		
+
+	h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
+	if (h->m_write == NULL) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+	
+	ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
+
+	talloc_free(tmp_ctx);
+	
+	return ret;
+}
+
+/*
+  replay a transaction
+ */
+static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
+{
+	int ret, i;
+	struct ctdb_rec_data *rec = NULL;
+
+	h->in_replay = true;
+	talloc_free(h->m_write);
+	h->m_write = NULL;
+
+	ret = ctdb_transaction_fetch_start(h);
+	if (ret != 0) {
+		return ret;
+	}
+
+	for (i=0;i<h->m_all->count;i++) {
+		TDB_DATA key, data;
+
+		rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
+		if (rec == NULL) {
+			DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
+			goto failed;
+		}
+
+		if (rec->reqid == 0) {
+			/* its a store */
+			if (ctdb_transaction_store(h, key, data) != 0) {
+				goto failed;
+			}
+		} else {
+			TDB_DATA data2;
+			TALLOC_CTX *tmp_ctx = talloc_new(h);
+
+			if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
+				talloc_free(tmp_ctx);
+				goto failed;
+			}
+			if (data2.dsize != data.dsize ||
+			    memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
+				/* the record has changed on us - we have to give up */
+				talloc_free(tmp_ctx);
+				goto failed;
+			}
+			talloc_free(tmp_ctx);
+		}
+	}
+	
+	return 0;
+
+failed:
+	tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+	return -1;
+}
+
+
+/*
+  commit a transaction
+ */
+int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
+{
+	int ret, retries=0;
+	int32_t status;
+	struct ctdb_context *ctdb = h->ctdb_db->ctdb;
+	struct timeval timeout;
+	enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
+
+	talloc_set_destructor(h, NULL);
+
+	/* our commit strategy is quite complex.
+
+	   - we first try to commit the changes to all other nodes
+
+	   - if that works, then we commit locally and we are done
+
+	   - if a commit on another node fails, then we need to cancel
+	     the transaction, then restart the transaction (thus
+	     opening a window of time for a pending recovery to
+	     complete), then replay the transaction, checking all the
+	     reads and writes (checking that reads give the same data,
+	     and writes succeed). Then we retry the transaction to the
+	     other nodes
+	*/
+
+again:
+	if (h->m_write == NULL) {
+		/* no changes were made */
+		tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+		talloc_free(h);
+		return 0;
+	}
+
+	/* tell ctdbd to commit to the other nodes */
+	timeout = timeval_current_ofs(1, 0);
+	ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
+			   retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0, 
+			   ctdb_marshall_finish(h->m_write), NULL, NULL, &status, 
+			   &timeout, NULL);
+	if (ret != 0 || status != 0) {
+		tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
+		sleep(1);
+
+		if (ret != 0) {
+			failure_control = CTDB_CONTROL_TRANS2_ERROR;
+		} else {
+			/* work out what error code we will give if we 
+			   have to fail the operation */
+			switch ((enum ctdb_trans2_commit_error)status) {
+			case CTDB_TRANS2_COMMIT_SUCCESS:
+			case CTDB_TRANS2_COMMIT_SOMEFAIL:
+			case CTDB_TRANS2_COMMIT_TIMEOUT:
+				failure_control = CTDB_CONTROL_TRANS2_ERROR;
+				break;
+			case CTDB_TRANS2_COMMIT_ALLFAIL:
+				failure_control = CTDB_CONTROL_TRANS2_FINISHED;
+				break;
+			}
+		}
+
+		if (++retries == 10) {
+			DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n", 
+					 h->ctdb_db->db_id, retries, (unsigned)failure_control));
+			ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
+				     failure_control, CTDB_CTRL_FLAG_NOREPLY, 
+				     tdb_null, NULL, NULL, NULL, NULL, NULL);		
+			talloc_free(h);
+			return -1;
+		}		
+
+		if (ctdb_replay_transaction(h) != 0) {
+			DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n"));
+			ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
+				     failure_control, CTDB_CTRL_FLAG_NOREPLY, 
+				     tdb_null, NULL, NULL, NULL, NULL, NULL);		
+			talloc_free(h);
+			return -1;
+		}
+		goto again;
+	} else {
+		failure_control = CTDB_CONTROL_TRANS2_ERROR;
+	}
+
+	/* do the real commit locally */
+	ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction\n"));
+		ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
+			     failure_control, CTDB_CTRL_FLAG_NOREPLY, 
+			     tdb_null, NULL, NULL, NULL, NULL, NULL);		
+		talloc_free(h);
+		return ret;
+	}
+
+	/* tell ctdbd that we are finished with our local commit */
+	ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, 
+		     CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY, 
+		     tdb_null, NULL, NULL, NULL, NULL, NULL);
+	talloc_free(h);
+	return 0;
+}


-- 
CTDB repository


More information about the samba-cvs mailing list