[SCM] CTDB repository - branch 1.0.112 updated - ctdb-1.0.111-158-g5ecf734

Ronnie Sahlberg sahlberg at samba.org
Sun Mar 6 19:42:55 MST 2011


The branch, 1.0.112 has been updated
       via  5ecf7341e21506ff1ba75946736e537a1ab97e1e (commit)
      from  8d0bd2f75e29ca8f54bd7cda1e3619e5b6b5d9ac (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.112


- Log -----------------------------------------------------------------
commit 5ecf7341e21506ff1ba75946736e537a1ab97e1e
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Mar 7 12:50:14 2011 +1100

    VACUUMING lmaster check is wrong when nodes are disconnected from the cluster
    
    During vacuuming we incorrectly used the size of the VNN map as the range of possible/allowed lmaster values.
    This is incorrect if for example one or more of the nodes in the cluster are permanently disconnected and taken out of service.
    When this happens, there will be a slow and steady buildup of un-vacuumable records which belong to the last n nodes in the cluster.
    Un vacuumnable since the vnn map is smaller than the niumber of nodes and any records belonging to a node with pnn id higher than the size of the vnn map size would always be ignored.
    
    Eventually the cluster would be full of un-vacuumable dead records which would lead to ctdb outages.
    
    Fix the vacuuming logic to use ctdb->num_nodes as the possible range for lmaster values. Fix the other references in the vacuuming to keep ctdb->num_nodes lists of records and not vnnmap->size.
    
    To reproduce, start a 3 node cluster and shutdown node #1 so only node #0 and node #2 are running.
    To not have to create unrealistically large number of records, we need to tweak the tunables for vacuuming so vacuuming also triggers for small number of rectords :
    onnode all ctdb setvar VacuumLimit 10; onnode all ctdb setvar RepackLimit 10
    
    Use smbtorture to create a large number of dead records in sessionid.tdb :
    smbtorture //127.0.0.1/data -U<user>%<password> --num-ops=3000 BASE-BENCH-HOLDCON
    
    Without the patch, many/most records would eventually be vacuumed, but the database would never become completely empty, there would always be a number of records that would hash to node#2 which would never be removed.
    
    With this patch, all records will eventually become vacuumed and the database would become empty.
    
    alternatively, by starting node#1,  the database would also become empty.
    
    The command 'tdbdump /var/ctdb/sessionid.tdb.* | grep data\(24\) | wc -l'
    can be used to count how many dead records exist in the database.
    
    CQ S1022203

-----------------------------------------------------------------------

Summary of changes:
 server/ctdb_vacuum.c |   26 +++++++++++++++-----------
 1 files changed, 15 insertions(+), 11 deletions(-)


Changeset truncated at 500 lines:

diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index 59713e6..382828a 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -122,7 +122,8 @@ static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
 	}
 	       
 	lmaster = ctdb_lmaster(ctdb, &key);
-	if (lmaster >= ctdb->vnn_map->size) {
+	if (lmaster >= ctdb->num_nodes) {
+		DEBUG(DEBUG_ERR,("VACUUM TRAVERSE lmaster for record is out of bounds : %d but max allowed is %d\n", lmaster, ctdb->vnn_map->size));
 		return 0;
 	}
 
@@ -251,12 +252,12 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v
 
 	ctdb->pnn = pnn;
 	/* the list needs to be of length num_nodes */
-	vdata->list = talloc_array(vdata, struct ctdb_marshall_buffer *, ctdb->vnn_map->size);
+	vdata->list = talloc_array(vdata, struct ctdb_marshall_buffer *, ctdb->num_nodes);
 	if (vdata->list == NULL) {
 		DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
 		return -1;
 	}
-	for (i = 0; i < ctdb->vnn_map->size; i++) {
+	for (i = 0; i < ctdb->num_nodes; i++) {
 		vdata->list[i] = (struct ctdb_marshall_buffer *)
 			talloc_zero_size(vdata->list, 
 							 offsetof(struct ctdb_marshall_buffer, data));
@@ -277,22 +278,22 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v
 		DEBUG(DEBUG_INFO,("Traverse aborted vacuuming '%s'\n", name));
 		return -1;
 	}
-	for ( i = 0; i < ctdb->vnn_map->size; i++) {
+	for (i = 0; i < ctdb->num_nodes; i++) {
 		if (vdata->list[i]->count == 0) {
 			continue;
 		}
 
 		/* for records where we are not the lmaster, tell the lmaster to fetch the record */
-		if (ctdb->vnn_map->map[i] != ctdb->pnn) {
+		if (ctdb->nodes[i]->pnn != ctdb->pnn) {
 			TDB_DATA data;
 			DEBUG(DEBUG_INFO,("Found %u records for lmaster %u in '%s'\n", 
 								vdata->list[i]->count, i, name));
 
 			data.dsize = talloc_get_size(vdata->list[i]);
 			data.dptr  = (void *)vdata->list[i];
-			if (ctdb_send_message(ctdb, ctdb->vnn_map->map[i], CTDB_SRVID_VACUUM_FETCH, data) != 0) {
+			if (ctdb_send_message(ctdb, ctdb->nodes[i]->pnn, CTDB_SRVID_VACUUM_FETCH, data) != 0) {
 				DEBUG(DEBUG_ERR,(__location__ " Failed to send vacuum fetch message to %u\n",
-					 ctdb->vnn_map->map[i]));
+					 ctdb->nodes[i]->pnn));
 				return -1;		
 			}
 			continue;
@@ -332,12 +333,15 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v
 		 * now tell all the other nodes to delete all these records
 		 * (if possible)
 		 */
-		for (i = 0; i < ctdb->vnn_map->size; i++) {
+		for (i = 0; i < ctdb->num_nodes; i++) {
 			struct ctdb_marshall_buffer *records;
 			struct ctdb_rec_data *rec;
 			char c;
 
-			if (ctdb->vnn_map->map[i] == ctdb->pnn) {
+			if (ctdb->nodes[i]->flags & NODE_FLAGS_INACTIVE) {
+				continue;
+			}
+			if (ctdb->nodes[i]->pnn == ctdb->pnn) {
 				/* we dont delete the records on the local node just yet */
 				continue;
 			}
@@ -348,12 +352,12 @@ static int ctdb_vacuum_db(struct ctdb_db_context *ctdb_db, struct vacuum_data *v
 				return -1;
 			}
 
-			ret = ctdb_control(ctdb, ctdb->vnn_map->map[i], 0,
+			ret = ctdb_control(ctdb, ctdb->nodes[i]->pnn, 0,
 					CTDB_CONTROL_TRY_DELETE_RECORDS, 0,
 					indata, recs, &outdata, &res,
 					NULL, NULL);
 			if (ret != 0 || res != 0) {
-				DEBUG(DEBUG_ERR,("Failed to delete records on node %u\n", ctdb->vnn_map->map[i]));
+				DEBUG(DEBUG_ERR,("Failed to delete records on node %u\n", ctdb->nodes[i]->pnn));
 				return -1;
 			}
 


-- 
CTDB repository


More information about the samba-cvs mailing list