[SCM] CTDB repository - branch 1.2 updated - ctdb-1.9.1-496-gc77008e

Ronnie Sahlberg sahlberg at samba.org
Tue Nov 29 16:55:59 MST 2011


The branch, 1.2 has been updated
       via  c77008ea28b0237be6f78bb26fc62df6b65f3144 (commit)
       via  79eb40c7ea594267cae8ad45e8641e23075b9791 (commit)
       via  2b9b2c874e4f038bd7aefbc5e4e730fcef30c48a (commit)
       via  3ec9b4254e2c73ee9cbfe484dccc6661fce6736c (commit)
      from  85206ee924d8245091264984669abfe99f6e82b9 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2


- Log -----------------------------------------------------------------
commit c77008ea28b0237be6f78bb26fc62df6b65f3144
Author: Volker Lendecke <vl at samba.org>
Date:   Mon Oct 31 13:29:13 2011 +0100

    Add CTDB_CONTROL_CHECK_SRVID

commit 79eb40c7ea594267cae8ad45e8641e23075b9791
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Nov 28 13:56:30 2011 +1100

    Recover Persistent database DB by DB and not record by record
    
    Add a new tunable that changes the mode how persistent databases are recovered.
    RecoveryPDBBySeqNum
    
    When set to 1, persistent databases will be recovered in whole from the node which
    has the highest "__db_sequence_number__" record.
    This record is managed by samba for those databases where we do persistent writes and have
    inter-record relations.
    For these databases we do not want the usual "blend records from all nodes based
    on individual record RSN" but instead a mode where we pick one instance of the persistent database.
    
    If no node was found with a "__db_sequence_number__" record at all, we fail back to the original "recover records independently based on record RSN".
    Some persistent databases do not contain record interrelations and as such does not
    contain this special record at all.

commit 2b9b2c874e4f038bd7aefbc5e4e730fcef30c48a
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Nov 28 10:57:39 2011 +1100

    LibCTDB: add get persistent db seqnum control

commit 3ec9b4254e2c73ee9cbfe484dccc6661fce6736c
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Nov 28 10:41:17 2011 +1100

    DB Seqnum: must provide a ctdb_ltdb_header when calling ctdb_ltdb_fetch()

-----------------------------------------------------------------------

Summary of changes:
 include/ctdb.h           |   52 +++++++++++++++++++
 include/ctdb_private.h   |    3 +
 include/ctdb_protocol.h  |    1 +
 libctdb/control.c        |   40 +++++++++++++++
 libctdb/sync.c           |   18 +++++++
 server/ctdb_control.c    |    3 +
 server/ctdb_daemon.c     |   36 +++++++++++++
 server/ctdb_persistent.c |    3 +-
 server/ctdb_recoverd.c   |  126 ++++++++++++++++++++++++++++++++++++++++++++-
 server/ctdb_tunables.c   |    3 +-
 tools/ctdb.c             |   27 ++++++++++
 11 files changed, 307 insertions(+), 5 deletions(-)


Changeset truncated at 500 lines:

diff --git a/include/ctdb.h b/include/ctdb.h
index c95c2e1..f6b5f9f 100644
--- a/include/ctdb.h
+++ b/include/ctdb.h
@@ -453,6 +453,35 @@ bool ctdb_getpnn_recv(struct ctdb_connection *ctdb,
 
 
 /**
+ * ctdb_getdbseqnum_send - read the sequence number off a db
+ * @ctdb: the ctdb_connection from ctdb_connect.
+ * @destnode: the destination node (see below)
+ * @dbid: database id
+ * @callback: the callback when ctdb replies to our message (typesafe)
+ * @cbdata: the argument to callback()
+ *
+ * There are several special values for destnode, detailed in
+ * ctdb_protocol.h, particularly CTDB_CURRENT_NODE which means the
+ * local ctdbd.
+ */
+struct ctdb_request *
+ctdb_getdbseqnum_send(struct ctdb_connection *ctdb,
+		 uint32_t destnode,
+		 uint32_t dbid,
+		 ctdb_callback_t callback,
+		 void *cbdata);
+/**
+ * ctdb_getdbseqnum_recv - read the sequence number off a database
+ * @ctdb: the ctdb_connection from ctdb_connect.
+ * @req: the completed request.
+ * @seqnum: a pointer to the seqnum to fill in
+ *
+ * This returns false if something went wrong, or otherwise fills in pnn.
+ */
+bool ctdb_getdbseqnum_recv(struct ctdb_connection *ctdb,
+		      struct ctdb_request *req, uint64_t *seqnum);
+
+/**
  * ctdb_getnodemap_send - read the nodemap number from a node.
  * @ctdb: the ctdb_connection from ctdb_connect.
  * @destnode: the destination node (see below)
@@ -652,6 +681,25 @@ bool ctdb_getpnn(struct ctdb_connection *ctdb,
 		 uint32_t *pnn);
 
 /**
+ * ctdb_getdbseqnum - read the seqnum of a database
+ * @ctdb: the ctdb_connection from ctdb_connect.
+ * @destnode: the destination node (see below)
+ * @dbid: database id
+ * @seqnum: sequence number for the database
+ *
+ * There are several special values for destnode, detailed in
+ * ctdb_protocol.h, particularly CTDB_CURRENT_NODE which means the
+ * local ctdbd.
+ *
+ * Returns true and fills in *pnn on success.
+ */
+bool
+ctdb_getdbseqnum(struct ctdb_connection *ctdb,
+		 uint32_t destnode,
+		 uint32_t dbid,
+		 uint64_t *seqnum);
+
+/**
  * ctdb_getrecmaster - read the recovery master of a node (synchronous)
  * @ctdb: the ctdb_connection from ctdb_connect.
  * @destnode: the destination node (see below)
@@ -783,4 +831,8 @@ void ctdb_free_publicips(struct ctdb_all_public_ips *ips);
 	ctdb_getpublicips_send((ctdb), (destnode),			\
 			 ctdb_sendcb((cb), (cbdata)), (cbdata))
 
+#define ctdb_getdbseqnum_send(ctdb, destnode, dbid, cb, cbdata)		\
+	ctdb_getdbseqnum_send((ctdb), (destnode), (dbid),		\
+			 ctdb_sendcb((cb), (cbdata)), (cbdata))
+
 #endif
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index f0050fb..b877035 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -122,6 +122,7 @@ struct ctdb_tunable {
 	uint32_t vacuum_fast_path_count;
 	uint32_t lcp2_public_ip_assignment;
 	uint32_t allow_client_db_attach;
+	uint32_t recover_pdb_by_seqnum;
 };
 
 /*
@@ -958,6 +959,8 @@ int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA da
 int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid);
 int ctdb_deregister_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data);
 int daemon_deregister_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid);
+int daemon_check_srvids(struct ctdb_context *ctdb, TDB_DATA indata,
+			TDB_DATA *outdata);
 
 int32_t ctdb_ltdb_enable_seqnum(struct ctdb_context *ctdb, uint32_t db_id);
 int32_t ctdb_ltdb_update_seqnum(struct ctdb_context *ctdb, uint32_t db_id, uint32_t srcnode);
diff --git a/include/ctdb_protocol.h b/include/ctdb_protocol.h
index 0422afe..d0a9537 100644
--- a/include/ctdb_protocol.h
+++ b/include/ctdb_protocol.h
@@ -363,6 +363,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
 		    CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE  = 126,
 		    CTDB_CONTROL_GET_STAT_HISTORY	 = 127,
 		    CTDB_CONTROL_SCHEDULE_FOR_DELETION   = 128,
+		    CTDB_CONTROL_CHECK_SRVIDS		 = 130,
 };
 
 /*
diff --git a/libctdb/control.c b/libctdb/control.c
index 07185db..18ec1fb 100644
--- a/libctdb/control.c
+++ b/libctdb/control.c
@@ -26,6 +26,7 @@
 #undef ctdb_getpnn_send
 #undef ctdb_getnodemap_send
 #undef ctdb_getpublicips_send
+#undef ctdb_getdbseqnum_send
 
 bool ctdb_getrecmaster_recv(struct ctdb_connection *ctdb,
 			   struct ctdb_request *req, uint32_t *recmaster)
@@ -172,3 +173,42 @@ void ctdb_free_publicips(struct ctdb_all_public_ips *ips)
 	}
 	free(ips);
 }
+
+bool ctdb_getdbseqnum_recv(struct ctdb_connection *ctdb,
+			   struct ctdb_request *req, uint64_t *seqnum)
+{
+	struct ctdb_reply_control *reply;
+
+	reply = unpack_reply_control(ctdb, req, CTDB_CONTROL_GET_DB_SEQNUM);
+	if (!reply) {
+		return false;
+	}
+	if (reply->status == -1) {
+		DEBUG(ctdb, LOG_ERR, "ctdb_getdbseqnum_recv: status -1");
+		return false;
+	}
+
+	if (reply->datalen != sizeof(uint64_t)) {
+		DEBUG(ctdb, LOG_ERR, "ctdb_getdbseqnum wrong size of data was %d but expected %d bytes", reply->datalen, (int)sizeof(uint64_t));
+		return false;
+	}
+
+	*seqnum = *((uint64_t *)reply->data);
+
+	return true;
+}
+
+struct ctdb_request *ctdb_getdbseqnum_send(struct ctdb_connection *ctdb,
+					    uint32_t destnode,
+					    uint32_t dbid,
+					    ctdb_callback_t callback,
+					    void *private_data)
+{
+	uint64_t indata;
+
+	*((uint32_t *)&indata) = dbid;
+
+	return new_ctdb_control_request(ctdb, CTDB_CONTROL_GET_DB_SEQNUM,
+					destnode, &indata, sizeof(uint64_t),
+					callback, private_data);
+}
diff --git a/libctdb/sync.c b/libctdb/sync.c
index a1be3be..26fae56 100644
--- a/libctdb/sync.c
+++ b/libctdb/sync.c
@@ -227,3 +227,21 @@ struct ctdb_lock *ctdb_readrecordlock(struct ctdb_connection *ctdb,
 	}
 	return rrl.lock;
 }
+
+bool ctdb_getdbseqnum(struct ctdb_connection *ctdb,
+		      uint32_t destnode, uint32_t dbid,
+		      uint64_t *seqnum)
+{
+	struct ctdb_request *req;
+	bool done = false;
+	bool ret = false;
+
+	req = synchronous(ctdb,
+			  ctdb_getdbseqnum_send(ctdb, destnode, dbid, set, &done),
+			  &done);
+	if (req != NULL) {
+		ret = ctdb_getdbseqnum_recv(ctdb, req, seqnum);
+		ctdb_request_free(ctdb, req);
+	}
+	return ret;
+}
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 748907f..83e1bd1 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -253,6 +253,9 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 	case CTDB_CONTROL_DEREGISTER_SRVID:
 		return daemon_deregister_message_handler(ctdb, client_id, srvid);
 
+	case CTDB_CONTROL_CHECK_SRVIDS:
+		return daemon_check_srvids(ctdb, indata, outdata);
+
 	case CTDB_CONTROL_ENABLE_SEQNUM:
 		CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
 		return ctdb_ltdb_enable_seqnum(ctdb, *(uint32_t *)indata.dptr);
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index b2a03aa..00dc2cc 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -205,6 +205,42 @@ int daemon_deregister_message_handler(struct ctdb_context *ctdb, uint32_t client
 	return ctdb_deregister_message_handler(ctdb, srvid, client);
 }
 
+int daemon_check_srvids(struct ctdb_context *ctdb, TDB_DATA indata,
+			TDB_DATA *outdata)
+{
+	uint64_t *ids;
+	int i, num_ids;
+	uint8_t *results;
+
+	if ((indata.dsize % sizeof(uint64_t)) != 0) {
+		DEBUG(DEBUG_ERR, ("Bad indata in daemon_check_srvids, "
+				  "size=%d\n", (int)indata.dsize));
+		return -1;
+	}
+
+	ids = (uint64_t *)indata.dptr;
+	num_ids = indata.dsize / 8;
+
+	results = talloc_zero_array(outdata, uint8_t, (num_ids+7)/8);
+	if (results == NULL) {
+		DEBUG(DEBUG_ERR, ("talloc failed in daemon_check_srvids\n"));
+		return -1;
+	}
+	for (i=0; i<num_ids; i++) {
+		struct ctdb_message_list *ml;
+		for (ml=ctdb->message_list; ml; ml=ml->next) {
+			if (ml->srvid == ids[i]) {
+				break;
+			}
+		}
+		if (ml != NULL) {
+			results[i/8] |= (1 << (i%8));
+		}
+	}
+	outdata->dptr = (uint8_t *)results;
+	outdata->dsize = talloc_get_size(results);
+	return 0;
+}
 
 /*
   destroy a ctdb_client
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index b95f456..dd8d479 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -938,6 +938,7 @@ static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
 	TDB_DATA key;
 	TDB_DATA data;
 	TALLOC_CTX *mem_ctx = talloc_new(ctdb);
+	struct ctdb_ltdb_header header;
 
 	ctdb_db = find_ctdb_db(ctdb, db_id);
 	if (!ctdb_db) {
@@ -949,7 +950,7 @@ static int32_t ctdb_get_db_seqnum(struct ctdb_context *ctdb,
 	key.dptr = (uint8_t *)discard_const(keyname);
 	key.dsize = strlen(keyname) + 1;
 
-	ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, NULL, mem_ctx, &data);
+	ret = (int32_t)ctdb_ltdb_fetch(ctdb_db, key, &header, mem_ctx, &data);
 	if (ret != 0) {
 		goto done;
 	}
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 4963c3f..3f80232 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -523,8 +523,7 @@ static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb
   pull the remote database contents from one node into the recdb
  */
 static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode, 
-				    struct tdb_wrap *recdb, uint32_t dbid,
-				    bool persistent)
+				    struct tdb_wrap *recdb, uint32_t dbid)
 {
 	int ret;
 	TDB_DATA outdata;
@@ -603,6 +602,119 @@ static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode,
 	return 0;
 }
 
+
+struct pull_seqnum_cbdata {
+	int failed;
+	uint32_t pnn;
+	uint64_t seqnum;
+};
+
+static void pull_seqnum_cb(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
+{
+	struct pull_seqnum_cbdata *cb_data = talloc_get_type(callback_data, struct pull_seqnum_cbdata);
+	uint64_t seqnum;
+
+	if (cb_data->failed != 0) {
+		DEBUG(DEBUG_ERR, ("Got seqnum from node %d but we have already failed the entire operation\n", node_pnn));
+		return;
+	}
+
+	if (res != 0) {
+		DEBUG(DEBUG_ERR, ("Error when pulling seqnum from node %d\n", node_pnn));
+		cb_data->failed = 1;
+		return;
+	}
+
+	if (outdata.dsize != sizeof(uint64_t)) {
+		DEBUG(DEBUG_ERR, ("Error when reading pull seqnum from node %d, got %d bytes but expected %d\n", node_pnn, (int)outdata.dsize, (int)sizeof(uint64_t)));
+		cb_data->failed = -1;
+		return;
+	}
+
+	seqnum = *((uint64_t *)outdata.dptr);
+
+	if (seqnum > cb_data->seqnum) {
+		cb_data->seqnum = seqnum;
+		cb_data->pnn = node_pnn;
+	}
+}
+
+static void pull_seqnum_fail_cb(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
+{
+	struct pull_seqnum_cbdata *cb_data = talloc_get_type(callback_data, struct pull_seqnum_cbdata);
+
+	DEBUG(DEBUG_ERR, ("Failed to pull db seqnum from node %d\n", node_pnn));
+	cb_data->failed = 1;
+}
+
+static int pull_highest_seqnum_pdb(struct ctdb_context *ctdb,
+				struct ctdb_recoverd *rec, 
+				struct ctdb_node_map *nodemap, 
+				struct tdb_wrap *recdb, uint32_t dbid)
+{
+	TALLOC_CTX *tmp_ctx = talloc_new(NULL);
+	uint32_t *nodes;
+	TDB_DATA data;
+	uint32_t outdata[2];
+	struct pull_seqnum_cbdata *cb_data;
+
+	DEBUG(DEBUG_NOTICE, ("Scan for highest seqnum pdb for db:0x%08x\n", dbid));
+
+	outdata[0] = dbid;
+	outdata[1] = 0;
+
+	data.dsize = sizeof(outdata);
+	data.dptr  = (uint8_t *)&outdata[0];
+
+	cb_data = talloc(tmp_ctx, struct pull_seqnum_cbdata);
+	if (cb_data == NULL) {
+		DEBUG(DEBUG_ERR, ("Failed to allocate pull highest seqnum cb_data structure\n"));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	cb_data->failed = 0;
+	cb_data->pnn    = -1;
+	cb_data->seqnum = 0;
+	
+	nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
+	if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_DB_SEQNUM,
+					nodes, 0,
+					CONTROL_TIMEOUT(), false, data,
+					pull_seqnum_cb,
+					pull_seqnum_fail_cb,
+					cb_data) != 0) {
+		DEBUG(DEBUG_ERR, (__location__ " Failed to run async GET_DB_SEQNUM\n"));
+
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	if (cb_data->failed != 0) {
+		DEBUG(DEBUG_NOTICE, ("Failed to pull sequence numbers for DB 0x%08x\n", dbid));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	if (cb_data->seqnum == 0 || cb_data->pnn == -1) {
+		DEBUG(DEBUG_NOTICE, ("Failed to find a node with highest sequence numbers for DB 0x%08x\n", dbid));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	DEBUG(DEBUG_NOTICE, ("Pull persistent db:0x%08x from node %d with highest seqnum:%lld\n", dbid, cb_data->pnn, (long long)cb_data->seqnum)); 
+
+	if (pull_one_remote_database(ctdb, cb_data->pnn, recdb, dbid) != 0) {
+		DEBUG(DEBUG_ERR, ("Failed to pull higest seqnum database 0x%08x from node %d\n", dbid, cb_data->pnn));
+		talloc_free(tmp_ctx);
+		return -1;
+	}
+
+	talloc_free(tmp_ctx);
+	return 0;
+}
+
+
 /*
   pull all the remote database contents into the recdb
  */
@@ -614,6 +726,14 @@ static int pull_remote_database(struct ctdb_context *ctdb,
 {
 	int j;
 
+	if (persistent && ctdb->tunable.recover_pdb_by_seqnum != 0) {
+		int ret;
+		ret = pull_highest_seqnum_pdb(ctdb, rec, nodemap, recdb, dbid);
+		if (ret == 0) {
+			return 0;
+		}
+	}
+
 	/* pull all records from all other nodes across onto this node
 	   (this merges based on rsn)
 	*/
@@ -622,7 +742,7 @@ static int pull_remote_database(struct ctdb_context *ctdb,
 		if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
 			continue;
 		}
-		if (pull_one_remote_database(ctdb, nodemap->nodes[j].pnn, recdb, dbid, persistent) != 0) {
+		if (pull_one_remote_database(ctdb, nodemap->nodes[j].pnn, recdb, dbid) != 0) {
 			DEBUG(DEBUG_ERR,(__location__ " Failed to pull remote database from node %u\n", 
 				 nodemap->nodes[j].pnn));
 			ctdb_set_culprit_count(rec, nodemap->nodes[j].pnn, nodemap->num);
diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c
index ef86051..cfc80e5 100644
--- a/server/ctdb_tunables.c
+++ b/server/ctdb_tunables.c
@@ -69,7 +69,8 @@ static const struct {
 	{ "AllowUnhealthyDBRead", 0,  offsetof(struct ctdb_tunable, allow_unhealthy_db_read) },
 	{ "StatHistoryInterval",  1,  offsetof(struct ctdb_tunable, stat_history_interval) },
 	{ "DeferredAttachTO",  120,  offsetof(struct ctdb_tunable, deferred_attach_timeout) },
-	{ "AllowClientDBAttach", 1, offsetof(struct ctdb_tunable, allow_client_db_attach) }
+	{ "AllowClientDBAttach", 1, offsetof(struct ctdb_tunable, allow_client_db_attach) },
+	{ "RecoverPDBBySeqNum",  0, offsetof(struct ctdb_tunable, recover_pdb_by_seqnum) }
 };
 
 /*
diff --git a/tools/ctdb.c b/tools/ctdb.c
index d49bc8f..62dc54a 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -4021,6 +4021,32 @@ static int control_getdbprio(struct ctdb_context *ctdb, int argc, const char **a
 }
 
 /*
+  get db seqnum
+ */
+static int control_getdbseqnum(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+	bool ret;
+	uint32_t db_id;
+	uint64_t seqnum;
+
+	if (argc < 1) {
+		usage();
+	}
+
+	db_id = strtoul(argv[0], NULL, 0);
+
+	ret = ctdb_getdbseqnum(ctdb_connection, options.pnn, db_id, &seqnum);
+	if (!ret) {
+		DEBUG(DEBUG_ERR, ("Unable to get seqnum from node."));
+		return -1;
+	}
+
+	printf("Sequence number:%lld\n", (long long)seqnum);
+
+	return 0;
+}
+
+/*
   run an eventscript on a node
  */
 static int control_eventscript(struct ctdb_context *ctdb, int argc, const char **argv)
@@ -5020,6 +5046,7 @@ static const struct {
 	{ "readkey", 	     control_readkey,      	true,	false,  "read the content off a database key", "<tdb-file> <key>" },
 	{ "writekey", 	     control_writekey,      	true,	false,  "write to a database key", "<tdb-file> <key> <value>" },
 	{ "checktcpport",    control_chktcpport,      	false,	true,  "check if a service is bound to a specific tcp port or not", "<port>" },
+	{ "getdbseqnum",     control_getdbseqnum,       false,	false, "get the sequence number off a database", "<dbid>" },
 };
 
 /*


-- 
CTDB repository


More information about the samba-cvs mailing list