[SCM] Samba Shared Repository - branch v3-6-test updated
Karolin Seeger
kseeger at samba.org
Tue Oct 9 01:28:29 MDT 2012
The branch, v3-6-test has been updated
via 493f3a0 s3: Add two tests a CLEAR_IF_FIRST crash
via 15a3dfb tdb: Make tdb robust against improper CLEAR_IF_FIRST restart
via e7e86fc tdb: Make robust against shrinking tdbs
from ac7d976 When setting a non-default ACL, don't forget to apply masks to SMB_ACL_USER and SMB_ACL_GROUP entries. (cherry picked from commit 6575d1d34fee45c7a965c7c9641cc52b566a9e7f)
http://gitweb.samba.org/?p=samba.git;a=shortlog;h=v3-6-test
- Log -----------------------------------------------------------------
commit 493f3a0e84a4bbeb8bac68dc28988c54b1619317
Author: Volker Lendecke <vl at samba.org>
Date: Mon Oct 8 12:25:49 2012 -0700
s3: Add two tests a CLEAR_IF_FIRST crash
The last 3 patches address bug #9268 - Make tdb robust against improper
CLEAR_IF_FIRST restart.
commit 15a3dfbc15de1e214e9aee57d7d83de60fe747bd
Author: Volker Lendecke <vl at samba.org>
Date: Mon Oct 8 12:02:43 2012 -0700
tdb: Make tdb robust against improper CLEAR_IF_FIRST restart
When winbind is restarted, there is a potential crash in tdb. Following
situation: We are in a cluster with ctdb. A winbind child hangs
in a request to the DC. Cluster monitoring decides the node has a
problem. Cluster monitoring decides to kill ctdbd. winbind child
still hangs in a RPC request. winbind parent figures that ctdb is
dead and immediately commits suicide. winbind parent is restarted by
cluster management, overwriting gencache.tdb with CLEAR_IF_FIRST. The
CLEAR_IF_FIRST logic as implemented now will not see that a child still
has the tdb open, only the parent holds the ACTIVE_LOCK due to performance
reasons. During the CLEAR_IF_FIRST logic is done, there is a very small
window where we ftruncate(tfd, 0) the file and re-write a proper header
without a lock. When during this small window the winbind child comes
back, wanting to store something into gencache.tdb, that winbind child
will crash with a SIGBUS.
Sounds unlikely? See:
[2012/09/29 07:02:31.871607, 0] lib/util.c:1183(smb_panic)
PANIC (pid 1814517): internal error
[2012/09/29 07:02:31.877596, 0] lib/util.c:1287(log_stack_trace)
BACKTRACE: 35 stack frames:
#0 winbindd(log_stack_trace+0x1a) [0x7feb7d4ca18a]
#1 winbindd(smb_panic+0x2b) [0x7feb7d4ca25b]
#2 winbindd(+0x1a3cc4) [0x7feb7d4bacc4]
#3 /lib64/libc.so.6(+0x32900) [0x7feb7a929900]
#4 /lib64/libc.so.6(memcpy+0x35) [0x7feb7a97f355]
#5 /usr/lib64/libtdb.so.1(+0x6e76) [0x7feb7b0b0e76]
#6 /usr/lib64/libtdb.so.1(+0x3d37) [0x7feb7b0add37]
#7 /usr/lib64/libtdb.so.1(+0x863d) [0x7feb7b0b263d]
#8 /usr/lib64/libtdb.so.1(+0x8700) [0x7feb7b0b2700]
#9 /usr/lib64/libtdb.so.1(+0x2505) [0x7feb7b0ac505]
#10 /usr/lib64/libtdb.so.1(+0x25b7) [0x7feb7b0ac5b7]
#11 /usr/lib64/libtdb.so.1(tdb_fetch+0x13) [0x7feb7b0ac633]
#12 winbindd(gencache_set_data_blob+0x259) [0x7feb7d4d8449]
#13 winbindd(gencache_set+0x53) [0x7feb7d4d85b3]
#14 winbindd(gencache_del+0x5e) [0x7feb7d4d879e]
#15 winbindd(saf_delete+0x93) [0x7feb7d54b693]
#16 winbindd(+0xe507e) [0x7feb7d3fc07e]
#17 winbindd(+0xe85e5) [0x7feb7d3ff5e5]
#18 winbindd(+0xe65be) [0x7feb7d3fd5be]
#19 winbindd(+0xe7562) [0x7feb7d3fe562]
#20 winbindd(init_dc_connection+0x2e) [0x7feb7d3fe5be]
#21 winbindd(+0xe75d9) [0x7feb7d3fe5d9]
#22 winbindd(cm_connect_netlogon+0x58) [0x7feb7d3fe658]
#23 winbindd(_wbint_PingDc+0x61) [0x7feb7d410991]
#24 winbindd(+0x103175) [0x7feb7d41a175]
#25 winbindd(winbindd_dual_ndrcmd+0xb7) [0x7feb7d4107d7]
#26 winbindd(+0xf8609) [0x7feb7d40f609]
#27 winbindd(+0xf9075) [0x7feb7d410075]
#28 winbindd(tevent_common_loop_immediate+0xe8) [0x7feb7d4db198]
#29 winbindd(run_events_poll+0x3c) [0x7feb7d4d93fc]
#30 winbindd(+0x1c2b52) [0x7feb7d4d9b52]
#31 winbindd(_tevent_loop_once+0x90) [0x7feb7d4d9f60]
#32 winbindd(main+0x7b3) [0x7feb7d3e7aa3]
#33 /lib64/libc.so.6(__libc_start_main+0xfd) [0x7feb7a915cdd]
#34 winbindd(+0xce2a9) [0x7feb7d3e52a9]
This is in a winbind child, logfiles surrounding indicate the parent
was restarted.
This patch takes all chain locks around the CLEAR_IF_FIRST introduced
tdb_new_database.
commit e7e86fcb929e7b8e7d879349d5f7f9422126a3a2
Author: Rusty Russell <rusty at rustcorp.com.au>
Date: Mon Oct 8 11:56:47 2012 -0700
tdb: Make robust against shrinking tdbs
When probing for a size change (eg. just before tdb_expand, tdb_check,
tdb_rescue) we call tdb_oob(tdb, tdb->map_size, 1, 1). Unfortunately
this does nothing if the tdb has actually shrunk, which as Volker
demonstrated, can actually happen if a "longlived" parent crashes.
So move the map/update size/remap before the limit check.
-----------------------------------------------------------------------
Summary of changes:
lib/tdb/common/io.c | 15 ++++++-----
lib/tdb/common/open.c | 34 +++++++++++++++++++++++----
source3/torture/torture.c | 56 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 93 insertions(+), 12 deletions(-)
Changeset truncated at 500 lines:
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index 78bbf2e..649b70f 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -53,6 +53,14 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
return -1;
}
+ /* Unmap, update size, remap */
+ if (tdb_munmap(tdb) == -1) {
+ tdb->ecode = TDB_ERR_IO;
+ return -1;
+ }
+ tdb->map_size = st.st_size;
+ tdb_mmap(tdb);
+
if (st.st_size < (size_t)len) {
if (!probe) {
/* Ensure ecode is set for log fn. */
@@ -63,13 +71,6 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
return -1;
}
- /* Unmap, update size, remap */
- if (tdb_munmap(tdb) == -1) {
- tdb->ecode = TDB_ERR_IO;
- return -1;
- }
- tdb->map_size = st.st_size;
- tdb_mmap(tdb);
return 0;
}
diff --git a/lib/tdb/common/open.c b/lib/tdb/common/open.c
index ec45689..eeea8c7 100644
--- a/lib/tdb/common/open.c
+++ b/lib/tdb/common/open.c
@@ -313,12 +313,36 @@ _PUBLIC_ struct tdb_context *tdb_open_ex(const char *name, int hash_size, int td
if ((tdb_flags & TDB_CLEAR_IF_FIRST) &&
(!tdb->read_only) &&
(locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) {
- open_flags |= O_CREAT;
- if (ftruncate(tdb->fd, 0) == -1) {
+ int ret;
+ ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
+ TDB_LOCK_WAIT);
+ if (ret == -1) {
TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
- "failed to truncate %s: %s\n",
- name, strerror(errno)));
- goto fail; /* errno set by ftruncate */
+ "tdb_brlock failed for %s: %s\n",
+ name, strerror(errno)));
+ goto fail;
+ }
+ ret = tdb_new_database(tdb, hash_size);
+ if (ret == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
+ "tdb_new_database failed for %s: %s\n",
+ name, strerror(errno)));
+ tdb_unlockall(tdb);
+ goto fail;
+ }
+ ret = tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0);
+ if (ret == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
+ "tdb_unlockall failed for %s: %s\n",
+ name, strerror(errno)));
+ goto fail;
+ }
+ ret = lseek(tdb->fd, 0, SEEK_SET);
+ if (ret == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
+ "lseek failed for %s: %s\n",
+ name, strerror(errno)));
+ goto fail;
}
}
diff --git a/source3/torture/torture.c b/source3/torture/torture.c
index 658224a..d37d83c 100644
--- a/source3/torture/torture.c
+++ b/source3/torture/torture.c
@@ -8102,6 +8102,60 @@ fail:
return result;
}
+static bool run_local_tdb_opener(int dummy)
+{
+ TDB_CONTEXT *t;
+ unsigned v = 0;
+
+ while (1) {
+ t = tdb_open("test.tdb", 1000, TDB_CLEAR_IF_FIRST,
+ O_RDWR|O_CREAT, 0755);
+ if (t == NULL) {
+ perror("tdb_open failed");
+ return false;
+ }
+ tdb_close(t);
+
+ v += 1;
+ printf("\r%u", v);
+ }
+ return true;
+}
+
+static bool run_local_tdb_writer(int dummy)
+{
+ TDB_CONTEXT *t;
+ unsigned v = 0;
+ TDB_DATA val;
+
+ t = tdb_open("test.tdb", 1000, 0, O_RDWR|O_CREAT, 0755);
+ if (t == 0) {
+ perror("tdb_open failed");
+ return 1;
+ }
+
+ val.dptr = (uint8_t *)&v;
+ val.dsize = sizeof(v);
+
+ while (1) {
+ TDB_DATA data;
+ int ret;
+
+ ret = tdb_store(t, val, val, 0);
+ if (ret != 0) {
+ printf("%s\n", tdb_errorstr(t));
+ }
+ v += 1;
+ printf("\r%u", v);
+
+ data = tdb_fetch(t, val);
+ if (data.dptr != NULL) {
+ SAFE_FREE(data.dptr);
+ }
+ }
+ return true;
+}
+
static double create_procs(bool (*fn)(int), bool *result)
{
int i, status;
@@ -8290,6 +8344,8 @@ static struct {
{ "LOCAL-binary_to_sid", run_local_binary_to_sid, 0},
{ "LOCAL-DBTRANS", run_local_dbtrans, 0},
{ "LOCAL-TEVENT-SELECT", run_local_tevent_select, 0},
+ { "LOCAL-TDB-OPENER", run_local_tdb_opener, 0 },
+ { "LOCAL-TDB-WRITER", run_local_tdb_writer, 0 },
{NULL, NULL, 0}};
--
Samba Shared Repository
More information about the samba-cvs
mailing list