[SCM] CTDB repository - branch master updated - ctdb-2.0-11-g4f42d17
Amitay Isaacs
amitay at samba.org
Tue Nov 20 22:53:41 MST 2012
The branch, master has been updated
via 4f42d17b74ce891691eee1cead498959cc8e4837 (commit)
via 6860c79aea416f56cfd7a6af790bbdf495dbc54e (commit)
via 909269a4a3690e1245117ca1af935401455785e6 (commit)
via bab744e3c49efef2e05dc09e8ea9bd3e3fa58716 (commit)
from d8f010355b715e49709836e057a5d0f110919897 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 4f42d17b74ce891691eee1cead498959cc8e4837
Author: Michael Adam <obnox at samba.org>
Date: Tue Nov 6 01:26:05 2012 +0100
utils:ping_pong: add a -c switch to check the lock before reading/writing
This is to verify that the fcntl F_GETLK call reports F_UNLCK if called
from a process already holding a lock. This is for example used by samba's
strict locking code in combination with "posix locking = true".
Signed-off-by: Michael Adam <obnox at samba.org>
commit 6860c79aea416f56cfd7a6af790bbdf495dbc54e
Author: Michael Adam <obnox at samba.org>
Date: Mon Nov 19 17:28:03 2012 +0100
recovery: data corruption of persistent DBs after recoveries: don't delete emtpy records
The record-by-record mode of recovery deletes empty records.
For persistent databases, this can lead to data corruption
by deleting records that should be there:
- Assume the cluster has been running for a while.
- A record R in a persistent database has been created and
deleted a couple of times, the last operation being deletion,
leaving an empty record with a high RSN, say 10.
- Now a node N is turned off.
- This leaves the local database copy of D on N with the empty
copy of R and RSN 10. On all other nodes, the recovery has deleted
the copy of record R.
- Now the record is created again while node N is turned off.
This creates R with RSN = 1 on all nodes except for N.
- Now node N is turned on again. The following recovery will chose
the older empty copy of R due to RSN 10 > RSN 1.
==> Hence the record is gone after the recovery.
On databases like Samba's registry, this can damage the higher-level
data structures built from the various tdb-level records.
This patch fixes that problem by not deleting empty records in recoveries
for persistent databases.
Signed-off-by: Michael Adam <obnox at samba.org>
commit 909269a4a3690e1245117ca1af935401455785e6
Author: Michael Adam <obnox at samba.org>
Date: Mon Nov 19 17:20:11 2012 +0100
recoverd: fix a comment typo
Signed-off-by: Michael Adam <obnox at samba.org>
commit bab744e3c49efef2e05dc09e8ea9bd3e3fa58716
Author: Michael Adam <obnox at samba.org>
Date: Fri Nov 16 14:33:41 2012 +0100
vacuum: fix a comment typo
Pair-Programmed-With: Volker Lendecke <vl at samba.org>
Signed-off-by: Michael Adam <obnox at samba.org>
-----------------------------------------------------------------------
Summary of changes:
server/ctdb_recoverd.c | 35 ++++++++++++++++++++++++++++++++---
server/ctdb_vacuum.c | 2 +-
utils/ping_pong/ping_pong.c | 42 ++++++++++++++++++++++++++++++++++++++++--
3 files changed, 73 insertions(+), 6 deletions(-)
Changeset truncated at 500 lines:
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 6d0dbc4..d50e84e 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1185,7 +1185,7 @@ static struct tdb_wrap *create_recdb(struct ctdb_context *ctdb, TALLOC_CTX *mem_
/*
- a traverse function for pulling all relevent records from recdb
+ a traverse function for pulling all relevant records from recdb
*/
struct recdb_data {
struct ctdb_context *ctdb;
@@ -1202,8 +1202,37 @@ static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
struct ctdb_rec_data *rec;
struct ctdb_ltdb_header *hdr;
- /* skip empty records */
- if (data.dsize <= sizeof(struct ctdb_ltdb_header)) {
+ /*
+ * skip empty records - but NOT for persistent databases:
+ *
+ * The record-by-record mode of recovery deletes empty records.
+ * For persistent databases, this can lead to data corruption
+ * by deleting records that should be there:
+ *
+ * - Assume the cluster has been running for a while.
+ *
+ * - A record R in a persistent database has been created and
+ * deleted a couple of times, the last operation being deletion,
+ * leaving an empty record with a high RSN, say 10.
+ *
+ * - Now a node N is turned off.
+ *
+ * - This leaves the local database copy of D on N with the empty
+ * copy of R and RSN 10. On all other nodes, the recovery has deleted
+ * the copy of record R.
+ *
+ * - Now the record is created again while node N is turned off.
+ * This creates R with RSN = 1 on all nodes except for N.
+ *
+ * - Now node N is turned on again. The following recovery will chose
+ * the older empty copy of R due to RSN 10 > RSN 1.
+ *
+ * ==> Hence the record is gone after the recovery.
+ *
+ * On databases like Samba's registry, this can damage the higher-level
+ * data structures built from the various tdb-level records.
+ */
+ if (!params->persistent && data.dsize <= sizeof(struct ctdb_ltdb_header)) {
return 0;
}
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index 0ca485d..7f6a8f5 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -679,7 +679,7 @@ static int ctdb_process_vacuum_fetch_lists(struct ctdb_db_context *ctdb_db,
}
/**
- * Proces the delete list:
+ * Process the delete list:
* Send the records to delete to all other nodes with the
* try_delete_records control.
*/
diff --git a/utils/ping_pong/ping_pong.c b/utils/ping_pong/ping_pong.c
index 098dacd..0a49d66 100644
--- a/utils/ping_pong/ping_pong.c
+++ b/utils/ping_pong/ping_pong.c
@@ -2,6 +2,7 @@
A ping-pong fcntl byte range lock test
Copyright (C) Andrew Tridgell 2002
+ Copyright (C) Michael Adam 2012
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -41,7 +42,7 @@
static struct timeval tp1,tp2;
-static int do_reads, do_writes, use_mmap;
+static int do_reads, do_writes, use_mmap, do_check;
static void start_timer(void)
{
@@ -69,6 +70,36 @@ static int lock_range(int fd, int offset, int len)
return fcntl(fd,F_SETLKW,&lock);
}
+/* check whether we could place a lock */
+int check_lock(int fd, int offset, int len)
+{
+ struct flock lock;
+ int ret;
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = offset;
+ lock.l_len = len;
+ lock.l_pid = 0;
+
+ ret = fcntl(fd, F_GETLK, &lock);
+ if (ret != 0) {
+ printf("error calling fcntl F_GETLCK: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (lock.l_type == F_UNLCK) {
+ /* we would be able to place the lock */
+ return 0;
+ }
+
+ /* we would not be able to place lock */
+ printf("check_lock failed: lock held: "
+ "pid='%d', type='%d', start='%d', len='%d'\n",
+ (int)lock.l_pid, (int)lock.l_type, (int)lock.l_start, (int)lock.l_len);
+ return 1;
+}
+
/* unlock a byte range in a open file */
static int unlock_range(int fd, int offset, int len)
{
@@ -123,6 +154,9 @@ static void ping_pong(int fd, int num_locks)
printf("lock at %d failed! - %s\n",
(i+1) % num_locks, strerror(errno));
}
+ if (do_check) {
+ ret = check_lock(fd, i, 1);
+ }
if (do_reads) {
unsigned char c;
if (use_mmap) {
@@ -169,7 +203,7 @@ int main(int argc, char *argv[])
int fd, num_locks;
int c;
- while ((c = getopt(argc, argv, "rwm")) != -1) {
+ while ((c = getopt(argc, argv, "rwmc")) != -1) {
switch (c){
case 'w':
do_writes = 1;
@@ -180,6 +214,9 @@ int main(int argc, char *argv[])
case 'm':
use_mmap = 1;
break;
+ case 'c':
+ do_check = 1;
+ break;
default:
fprintf(stderr, "Unknown option '%c'\n", c);
exit(1);
@@ -194,6 +231,7 @@ int main(int argc, char *argv[])
printf(" -r do reads\n");
printf(" -w do writes\n");
printf(" -m use mmap\n");
+ printf(" -c check locks\n");
exit(1);
}
--
CTDB repository
More information about the samba-cvs
mailing list