[SCM] CTDB repository - branch master updated - ctdb-2.0-11-g4f42d17

Amitay Isaacs amitay at samba.org
Tue Nov 20 22:53:41 MST 2012


The branch, master has been updated
       via  4f42d17b74ce891691eee1cead498959cc8e4837 (commit)
       via  6860c79aea416f56cfd7a6af790bbdf495dbc54e (commit)
       via  909269a4a3690e1245117ca1af935401455785e6 (commit)
       via  bab744e3c49efef2e05dc09e8ea9bd3e3fa58716 (commit)
      from  d8f010355b715e49709836e057a5d0f110919897 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 4f42d17b74ce891691eee1cead498959cc8e4837
Author: Michael Adam <obnox at samba.org>
Date:   Tue Nov 6 01:26:05 2012 +0100

    utils:ping_pong: add a -c switch to check the lock before reading/writing
    
    This is to verify that the fcntl F_GETLK call reports F_UNLCK if called
    from a process already holding a lock. This is for example used by samba's
    strict locking code in combination with "posix locking = true".
    
    Signed-off-by: Michael Adam <obnox at samba.org>

commit 6860c79aea416f56cfd7a6af790bbdf495dbc54e
Author: Michael Adam <obnox at samba.org>
Date:   Mon Nov 19 17:28:03 2012 +0100

    recovery: data corruption of persistent DBs after recoveries: don't delete emtpy records
    
    The record-by-record mode of recovery deletes empty records.
    For persistent databases, this can lead to data corruption
    by deleting records that should be there:
    
    - Assume the cluster has been running for a while.
    
    - A record R in a persistent database has been created and
      deleted a couple of times, the last operation being deletion,
      leaving an empty record with a high RSN, say 10.
    
    - Now a node N is turned off.
    
    - This leaves the local database copy of D on N with the empty
      copy of R and RSN 10. On all other nodes, the recovery has deleted
      the copy of record R.
    
    - Now the record is created again while node N is turned off.
      This creates R with RSN = 1 on all nodes except for N.
    
    - Now node N is turned on again. The following recovery will chose
      the older empty copy of R due to RSN 10 > RSN 1.
    
    ==> Hence the record is gone after the recovery.
    
    On databases like Samba's registry, this can damage the higher-level
    data structures built from the various tdb-level records.
    
    This patch fixes that problem by not deleting empty records in recoveries
    for persistent databases.
    
    Signed-off-by: Michael Adam <obnox at samba.org>

commit 909269a4a3690e1245117ca1af935401455785e6
Author: Michael Adam <obnox at samba.org>
Date:   Mon Nov 19 17:20:11 2012 +0100

    recoverd: fix a comment typo
    
    Signed-off-by: Michael Adam <obnox at samba.org>

commit bab744e3c49efef2e05dc09e8ea9bd3e3fa58716
Author: Michael Adam <obnox at samba.org>
Date:   Fri Nov 16 14:33:41 2012 +0100

    vacuum: fix a comment typo
    
    Pair-Programmed-With: Volker Lendecke <vl at samba.org>
    Signed-off-by: Michael Adam <obnox at samba.org>

-----------------------------------------------------------------------

Summary of changes:
 server/ctdb_recoverd.c      |   35 ++++++++++++++++++++++++++++++++---
 server/ctdb_vacuum.c        |    2 +-
 utils/ping_pong/ping_pong.c |   42 ++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 73 insertions(+), 6 deletions(-)


Changeset truncated at 500 lines:

diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 6d0dbc4..d50e84e 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1185,7 +1185,7 @@ static struct tdb_wrap *create_recdb(struct ctdb_context *ctdb, TALLOC_CTX *mem_
 
 
 /* 
-   a traverse function for pulling all relevent records from recdb
+   a traverse function for pulling all relevant records from recdb
  */
 struct recdb_data {
 	struct ctdb_context *ctdb;
@@ -1202,8 +1202,37 @@ static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
 	struct ctdb_rec_data *rec;
 	struct ctdb_ltdb_header *hdr;
 
-	/* skip empty records */
-	if (data.dsize <= sizeof(struct ctdb_ltdb_header)) {
+	/*
+	 * skip empty records - but NOT for persistent databases:
+	 *
+	 * The record-by-record mode of recovery deletes empty records.
+	 * For persistent databases, this can lead to data corruption
+	 * by deleting records that should be there:
+	 *
+	 * - Assume the cluster has been running for a while.
+	 *
+	 * - A record R in a persistent database has been created and
+	 *   deleted a couple of times, the last operation being deletion,
+	 *   leaving an empty record with a high RSN, say 10.
+	 *
+	 * - Now a node N is turned off.
+	 *
+	 * - This leaves the local database copy of D on N with the empty
+	 *   copy of R and RSN 10. On all other nodes, the recovery has deleted
+	 *   the copy of record R.
+	 *
+	 * - Now the record is created again while node N is turned off.
+	 *   This creates R with RSN = 1 on all nodes except for N.
+	 *
+	 * - Now node N is turned on again. The following recovery will chose
+	 *   the older empty copy of R due to RSN 10 > RSN 1.
+	 *
+	 * ==> Hence the record is gone after the recovery.
+	 *
+	 * On databases like Samba's registry, this can damage the higher-level
+	 * data structures built from the various tdb-level records.
+	 */
+	if (!params->persistent && data.dsize <= sizeof(struct ctdb_ltdb_header)) {
 		return 0;
 	}
 
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index 0ca485d..7f6a8f5 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -679,7 +679,7 @@ static int ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db,
 }
 
 /**
- * Proces the delete list:
+ * Process the delete list:
  * Send the records to delete to all other nodes with the
  * try_delete_records control.
  */
diff --git a/utils/ping_pong/ping_pong.c b/utils/ping_pong/ping_pong.c
index 098dacd..0a49d66 100644
--- a/utils/ping_pong/ping_pong.c
+++ b/utils/ping_pong/ping_pong.c
@@ -2,6 +2,7 @@
    A ping-pong fcntl byte range lock test
 
    Copyright (C) Andrew Tridgell 2002
+   Copyright (C) Michael Adam 2012
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -41,7 +42,7 @@
 
 static struct timeval tp1,tp2;
 
-static int do_reads, do_writes, use_mmap;
+static int do_reads, do_writes, use_mmap, do_check;
 
 static void start_timer(void)
 {
@@ -69,6 +70,36 @@ static int lock_range(int fd, int offset, int len)
 	return fcntl(fd,F_SETLKW,&lock);
 }
 
+/* check whether we could place a lock */
+int check_lock(int fd, int offset, int len)
+{
+	struct flock lock;
+	int ret;
+
+	lock.l_type = F_WRLCK;
+	lock.l_whence = SEEK_SET;
+	lock.l_start = offset;
+	lock.l_len = len;
+	lock.l_pid = 0;
+
+	ret = fcntl(fd, F_GETLK, &lock);
+	if (ret != 0) {
+		printf("error calling fcntl F_GETLCK: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (lock.l_type == F_UNLCK) {
+		/* we would be able to place the lock */
+		return 0;
+	}
+
+	/* we would not be able to place lock */
+	printf("check_lock failed: lock held: "
+	       "pid='%d', type='%d', start='%d', len='%d'\n",
+	       (int)lock.l_pid, (int)lock.l_type, (int)lock.l_start, (int)lock.l_len);
+	return 1;
+}
+
 /* unlock a byte range in a open file */
 static int unlock_range(int fd, int offset, int len)
 {
@@ -123,6 +154,9 @@ static void ping_pong(int fd, int num_locks)
 			printf("lock at %d failed! - %s\n",
 			       (i+1) % num_locks, strerror(errno));
 		}
+		if (do_check) {
+			ret = check_lock(fd, i, 1);
+		}
 		if (do_reads) {
 			unsigned char c;
 			if (use_mmap) {
@@ -169,7 +203,7 @@ int main(int argc, char *argv[])
 	int fd, num_locks;
 	int c;
 
-	while ((c = getopt(argc, argv, "rwm")) != -1) {
+	while ((c = getopt(argc, argv, "rwmc")) != -1) {
 		switch (c){
 		case 'w':
 			do_writes = 1;
@@ -180,6 +214,9 @@ int main(int argc, char *argv[])
 		case 'm':
 			use_mmap = 1;
 			break;
+		case 'c':
+			do_check = 1;
+			break;
 		default:
 			fprintf(stderr, "Unknown option '%c'\n", c);
 			exit(1);
@@ -194,6 +231,7 @@ int main(int argc, char *argv[])
 		printf("           -r    do reads\n");
 		printf("           -w    do writes\n");
 		printf("           -m    use mmap\n");
+		printf("           -c    check locks\n");
 		exit(1);
 	}
 


-- 
CTDB repository


More information about the samba-cvs mailing list