[PATCH] Improve tdb for ENOSPC
Volker Lendecke
Volker.Lendecke at SerNet.DE
Wed Aug 23 14:24:41 UTC 2017
Hi!
Review appreciated!
Thanks, Volker
--
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt at sernet.de
-------------- next part --------------
From a07cbb8ea4671486f126acdc2c11ff7ab8b8d08d Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 22 Aug 2017 14:51:18 +0200
Subject: [PATCH 1/6] tdb: Fix a typo
Signed-off-by: Volker Lendecke <vl at samba.org>
---
lib/tdb/common/transaction.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/tdb/common/transaction.c b/lib/tdb/common/transaction.c
index 8ec00256924..7d281fc75ff 100644
--- a/lib/tdb/common/transaction.c
+++ b/lib/tdb/common/transaction.c
@@ -43,7 +43,7 @@
tdb_free() the old record to place it on the normal tdb freelist
before allocating the new record
- - during transactions, keep a linked list of writes all that have
+ - during transactions, keep a linked list of all writes that have
been performed by intercepting all tdb_write() calls. The hooked
transaction versions of tdb_read() and tdb_write() check this
linked list and try to use the elements of the list in preference
--
2.11.0
From 58fd6a8a270ff0f7ecd2ff2c06d3316d03c97522 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Tue, 22 Aug 2017 17:09:01 +0200
Subject: [PATCH 2/6] configure: Centralize check for posix_fallocate
This checks for posix_fallocate unless we are sitting on an ancient glibc.
With this we don't need HAVE_BROKEN_POSIX_FALLOCATE anymore,
HAVE_POSIX_FALLOCATE will only be defined if we have a valid [g]libc.
./configure tested on Debian, FreeBSD (which does have posix_fallocate) and
OpenBSD (which does not have posix_fallocate). Also tested with changing the
not have an old-enough glibc around. All did the right thing.
Signed-off-by: Volker Lendecke <vl at samba.org>
---
lib/replace/wscript | 12 ++++++++++++
source3/lib/system.c | 2 +-
source3/wscript | 17 ++---------------
3 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/lib/replace/wscript b/lib/replace/wscript
index 7c50e1daf85..6972f2d6e40 100644
--- a/lib/replace/wscript
+++ b/lib/replace/wscript
@@ -249,6 +249,18 @@ def configure(conf):
if conf.CONFIG_SET('HAVE_MEMALIGN'):
conf.CHECK_DECLS('memalign', headers='malloc.h')
+ # glibc up to 2.3.6 had dangerously broken posix_fallocate(). DON'T USE IT.
+ if conf.CHECK_CODE('''
+#define _XOPEN_SOURCE 600
+#include <stdlib.h>
+#if defined(__GLIBC__) && ((__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 4))
+#error probably broken posix_fallocate
+#endif
+''',
+ '_POSIX_FALLOCATE_CAPABLE_LIBC',
+ msg='Checking for posix_fallocate-capable libc'):
+ conf.CHECK_FUNCS('posix_fallocate')
+
conf.CHECK_FUNCS('prctl dirname basename')
# libbsd on some platforms provides strlcpy and strlcat
diff --git a/source3/lib/system.c b/source3/lib/system.c
index 99462b631c7..70ddf6a4dea 100644
--- a/source3/lib/system.c
+++ b/source3/lib/system.c
@@ -438,7 +438,7 @@ int sys_lstat(const char *fname,SMB_STRUCT_STAT *sbuf,
********************************************************************/
int sys_posix_fallocate(int fd, off_t offset, off_t len)
{
-#if defined(HAVE_POSIX_FALLOCATE) && !defined(HAVE_BROKEN_POSIX_FALLOCATE)
+#if defined(HAVE_POSIX_FALLOCATE)
return posix_fallocate(fd, offset, len);
#elif defined(F_RESVSP64)
/* this handles XFS on IRIX */
diff --git a/source3/wscript b/source3/wscript
index 534ff440a40..62edb3e1cd6 100644
--- a/source3/wscript
+++ b/source3/wscript
@@ -106,7 +106,7 @@ def configure(conf):
conf.CHECK_FUNCS('innetgr')
conf.CHECK_FUNCS('initgroups select poll rdchk getgrnam getgrent pathconf')
conf.CHECK_FUNCS('setpriv setgidx setuidx setgroups syscall sysconf')
- conf.CHECK_FUNCS('atexit grantpt posix_openpt fallocate posix_fallocate')
+ conf.CHECK_FUNCS('atexit grantpt posix_openpt fallocate')
conf.CHECK_FUNCS('fseeko setluid')
conf.CHECK_FUNCS('getpwnam', headers='sys/types.h pwd.h')
conf.CHECK_FUNCS('fdopendir')
@@ -397,7 +397,7 @@ llseek _llseek __llseek _lseek __lseek
_lstat __lstat lutimes
__lxstat memalign mknod mlock mlockall munlock munlockall
_open __open _opendir __opendir
-pathconf poll posix_fallocate
+pathconf poll
posix_memalign pread _pread __pread
pwrite _pwrite __pwrite
rdchk _read __read _readdir __readdir
@@ -1154,19 +1154,6 @@ err:
execute=True,
msg="Checking whether fcntl lock supports open file description locks")
-# glibc up to 2.3.6 had dangerously broken posix_fallocate(). DON'T USE IT.
- if not conf.CHECK_CODE('''
-#define _XOPEN_SOURCE 600
-#include <stdlib.h>
-#if defined(__GLIBC__) && ((__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 4))
-#error probably broken posix_fallocate
-#endif
-''',
- '_HAVE_UNBROKEN_POSIX_FALLOCATE',
- msg='Checking for broken posix_fallocate'):
- conf.DEFINE('HAVE_BROKEN_POSIX_FALLOCATE', '1')
-
-
conf.CHECK_STRUCTURE_MEMBER('struct stat', 'st_mtim.tv_nsec',
define='HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC') # Linux, Solaris
conf.CHECK_STRUCTURE_MEMBER('struct stat', 'st_mtimensec',
--
2.11.0
From 32e1647ee59d2785d57df09deddc81e9cab840f4 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 23 Aug 2017 12:00:00 +0200
Subject: [PATCH 3/6] tdb: Protect against EINTR
Signed-off-by: Volker Lendecke <vl at samba.org>
---
lib/tdb/common/io.c | 27 ++++++++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index fe47d18a5a4..1b422affdca 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -52,27 +52,48 @@ static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
size_t count, off_t offset)
{
+ ssize_t ret;
+
if (!tdb_adjust_offset(tdb, &offset)) {
return -1;
}
- return pwrite(tdb->fd, buf, count, offset);
+
+ do {
+ ret = pwrite(tdb->fd, buf, count, offset);
+ } while ((ret == -1) && (errno == EINTR));
+
+ return ret;
}
static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
size_t count, off_t offset)
{
+ ssize_t ret;
+
if (!tdb_adjust_offset(tdb, &offset)) {
return -1;
}
- return pread(tdb->fd, buf, count, offset);
+
+ do {
+ ret = pread(tdb->fd, buf, count, offset);
+ } while ((ret == -1) && (errno == EINTR));
+
+ return ret;
}
static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
{
+ ssize_t ret;
+
if (!tdb_adjust_offset(tdb, &length)) {
return -1;
}
- return ftruncate(tdb->fd, length);
+
+ do {
+ ret = ftruncate(tdb->fd, length);
+ } while ((ret == -1) && (errno == EINTR));
+
+ return ret;
}
static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
--
2.11.0
From 7e2b66182a43b5d25d38fea6a01631698acb96f8 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 23 Aug 2017 12:48:03 +0200
Subject: [PATCH 4/6] tdb: Truncate the file after expand failure
Without this it's very easy to create virtually huge files: ftruncate expands a
file, the pwrites fail with ENOSPC, thus the write fails. The next writer runs
into the same situation, and ftruncate-expands the file even further. tdb_check
will then spend ages reading the 4GB of zeros byte by byte.
Here we hold the freelist lock or are inside a transaction, so it is safe to
cut the file again. Nobody can have used the space that we have tried to
allocate, so we can't have any stray pointers corrupting the database.
Signed-off-by: Volker Lendecke <vl at samba.org>
---
lib/tdb/common/io.c | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index 1b422affdca..f7a12c34dc9 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -430,14 +430,14 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
"returned 0 twice: giving up!\n"));
errno = ENOSPC;
- return -1;
+ goto fail;
}
if (written == -1) {
tdb->ecode = TDB_ERR_OOM;
TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
"%u bytes failed (%s)\n", (int)n,
strerror(errno)));
- return -1;
+ goto fail;
}
if (written != n) {
TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
@@ -448,6 +448,29 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
size += written;
}
return 0;
+
+fail:
+ {
+ int err = errno;
+ int ret;
+
+ /*
+ * We're holding the freelist lock or are inside a
+ * transaction. Cutting the file is safe, the space we
+ * tried to allocate can't have been used anywhere in
+ * the meantime.
+ */
+
+ ret = tdb_ftruncate(tdb, size);
+ if (ret == -1) {
+ TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: "
+ "retruncate to %ju failed\n",
+ (uintmax_t)size));
+ }
+ errno = err;
+ }
+
+ return -1;
}
--
2.11.0
From 473a95e4c788740668e15308bce7e623d7a002e0 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 23 Aug 2017 12:59:19 +0200
Subject: [PATCH 5/6] tdb: Add an intermediate variable
More README.Coding, but I need "ret" in the next commit as well :-)
Signed-off-by: Volker Lendecke <vl at samba.org>
---
lib/tdb/common/io.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index f7a12c34dc9..ff3f2197ed2 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -379,6 +379,7 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
{
char buf[8192];
tdb_off_t new_size;
+ int ret;
if (tdb->read_only || tdb->traverse_read) {
tdb->ecode = TDB_ERR_RDONLY;
@@ -394,7 +395,8 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
return -1;
}
- if (tdb_ftruncate(tdb, new_size) == -1) {
+ ret = tdb_ftruncate(tdb, new_size);
+ if (ret == -1) {
char b = 0;
ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
if (written == 0) {
@@ -452,7 +454,6 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
fail:
{
int err = errno;
- int ret;
/*
* We're holding the freelist lock or are inside a
--
2.11.0
From eb70b4873825888f7915428382db0bf537d55e66 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl at samba.org>
Date: Wed, 23 Aug 2017 13:02:57 +0200
Subject: [PATCH 6/6] tdb: Use posix_fallocate
This should be significantly faster than pwriting.
openbsd doesn't have posix_fallocate, so we do need the fallback. Also, it
might have weird failure modes, so we keep the old code in place except for
posix_fallocate returning success or ENOSPC.
Signed-off-by: Volker Lendecke <vl at samba.org>
---
lib/tdb/common/io.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 46 insertions(+)
diff --git a/lib/tdb/common/io.c b/lib/tdb/common/io.c
index ff3f2197ed2..15ba5b7d497 100644
--- a/lib/tdb/common/io.c
+++ b/lib/tdb/common/io.c
@@ -96,6 +96,24 @@ static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
return ret;
}
+#if HAVE_POSIX_FALLOCATE
+static int tdb_posix_fallocate(struct tdb_context *tdb, off_t offset,
+ off_t len)
+{
+ ssize_t ret;
+
+ if (!tdb_adjust_offset(tdb, &offset)) {
+ return -1;
+ }
+
+ do {
+ ret = posix_fallocate(tdb->fd, offset, len);
+ } while ((ret == -1) && (errno == EINTR));
+
+ return ret;
+}
+#endif
+
static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
{
int ret;
@@ -395,6 +413,34 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t ad
return -1;
}
+#if HAVE_POSIX_FALLOCATE
+ ret = tdb_posix_fallocate(tdb, size, addition);
+ if (ret == 0) {
+ return 0;
+ }
+ if (ret == ENOSPC) {
+ /*
+ * The Linux glibc (at least as of 2.24) fallback if
+ * the file system does not support fallocate does not
+ * reset the file size back to where it was. Also, to
+ * me it is unclear from the posix spec of
+ * posix_fallocate whether this is allowed or
+ * not. Better be safe than sorry and "goto fail" but
+ * "return -1" here, leaving the EOF pointer too
+ * large.
+ */
+ goto fail;
+ }
+
+ /*
+ * Retry the "old" way. Possibly unnecessary, but looking at
+ * our configure script there seem to be weird failure modes
+ * for posix_fallocate. See commit 3264a98ff16de, which
+ * probably refers to
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=1083.
+ */
+#endif
+
ret = tdb_ftruncate(tdb, new_size);
if (ret == -1) {
char b = 0;
--
2.11.0
More information about the samba-technical
mailing list