Data Corruption bug with Samba's vfs_iouring and Linux 5.6.7/5.7rc3

Stefan Metzmacher metze at samba.org
Fri May 8 22:04:55 UTC 2020


Am 08.05.20 um 23:50 schrieb Jeremy Allison:
> On Fri, May 08, 2020 at 11:40:30PM +0200, Stefan Metzmacher wrote:
>> Am 08.05.20 um 22:51 schrieb Jeremy Allison:
>>> On Fri, May 08, 2020 at 01:47:09PM -0700, Jeremy Allison via samba-technical wrote:
>>>> On Fri, May 08, 2020 at 09:35:31PM +0200, Stefan Metzmacher via samba-technical wrote:
>>>>>
>>>>> Thanks very much Jeremy! I didn't noticed that.
>>>>>
>>>>> I guess the attached patch should be able to fix the recursion.
>>>>
>>>> Oh Metze that's *really* ugly :-). I thought about
>>>> doing that in my code and decided it was in too bad
>>>> taste to live :-).
>>>>
>>>> Is there a cleaner way than putting "busy" and "retry"
>>>> variables in the config struct ?
>>>
>>> And a "Goto again" as well :-(. Bleegh.
>>
>> This version would actually work and looks a bit similar to
>> your version.
>>
>> Can you life with that version?
> 
> Yes I can live with that :-). It at least moves the horror
> to the wrapper function where you can at least concentrate
> all your attention as to why it's doing what it's doing :-).
> 
> RB+ from me if you add a comment header above the function
> as well as in the commit so it explains what it's doing.
> 
> Feel free to crib my ascii art to explain in the header
> comment too :-).
> 
> Thanks for the cleanup !

I'll do that on Monday.

Here's the current set I tested with
io_uring:nowait_pread = yes
on the Ubuntu 5.3 Kernel
using the
echo 1 > /proc/sys/vm/drop_caches
head -c 1 /data/ff11.dat | hexdump -C
tricks

Note for the explicit RWF_NOWAIT I need to catch the EAGAIN status to
retry (for testing).

Feel free to upload this to bugzilla if it's online again.

Do you have some comments on the offset < 0 on write case?

metze

-------------- next part --------------
From 101810b6bebe03c7f7c358ccd05b1fbca93ca1e7 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 13:06:54 +0200
Subject: [PATCH 01/29] lib: util: Add sys_valid_io_range()

This implements the contraints of
[MS-FSA] 2.1.5.2 Server Requests a Read.

The special handling of [MS-FSA] 2.1.5.3 Server Requests a Write
with offset < 0, should be handled by higher layers!
Which means the check can also be used for writes.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/util/sys_rw.c | 24 ++++++++++++++++++++++++
 lib/util/sys_rw.h |  1 +
 2 files changed, 25 insertions(+)

diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c
index 9a6cdcaa6063..6fa7ca573654 100644
--- a/lib/util/sys_rw.c
+++ b/lib/util/sys_rw.c
@@ -24,6 +24,30 @@
 #include "system/filesys.h"
 #include "lib/util/sys_rw.h"
 
+bool sys_valid_io_range(off_t offset, size_t length)
+{
+	uint64_t last_byte_ofs;
+
+	if (offset < 0) {
+		return false;
+	}
+
+	if (offset > INT64_MAX) {
+		return false;
+	}
+
+	if (length > UINT32_MAX) {
+		return false;
+	}
+
+	last_byte_ofs = (uint64_t)offset + (uint64_t)length;
+	if (last_byte_ofs > INT64_MAX) {
+		return false;
+	}
+
+	return true;
+}
+
 /*******************************************************************
 A read wrapper that will deal with EINTR/EWOULDBLOCK
 ********************************************************************/
diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h
index ab456d87b22e..70864cb2b74a 100644
--- a/lib/util/sys_rw.h
+++ b/lib/util/sys_rw.h
@@ -27,6 +27,7 @@
 
 struct iovec;
 
+bool sys_valid_io_range(off_t offset, size_t length);
 ssize_t sys_read(int fd, void *buf, size_t count);
 void sys_read_v(int fd, void *buf, size_t count);
 ssize_t sys_write(int fd, const void *buf, size_t count);
-- 
2.17.1


From 7e27f2c3c8ecf0b027410fe6f56bba383a46b3b9 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:32:48 -0700
Subject: [PATCH 02/29] lib: util: Add sys_pread_full().

A pread wrapper that will deal with EINTR and never return a short
read unless pread returns zero meaning EOF.

Thread-safe so may be used as a replacement for pread
inside pread_do() thread functions.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>
---
 lib/util/sys_rw.c | 35 +++++++++++++++++++++++++++++++++++
 lib/util/sys_rw.h |  1 +
 2 files changed, 36 insertions(+)

diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c
index 6fa7ca573654..87efbfd367cb 100644
--- a/lib/util/sys_rw.c
+++ b/lib/util/sys_rw.c
@@ -143,6 +143,41 @@ ssize_t sys_pread(int fd, void *buf, size_t count, off_t off)
 	return ret;
 }
 
+/*******************************************************************
+ A pread wrapper that will deal with EINTR and never return a short
+ read unless pread returns zero meaning EOF.
+********************************************************************/
+
+ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off)
+{
+	ssize_t total_read = 0;
+	uint8_t *curr_buf = (uint8_t *)buf;
+	size_t curr_count = count;
+	off_t curr_off = off;
+
+	while (curr_count != 0) {
+		ssize_t ret = sys_pread(fd,
+					curr_buf,
+					curr_count,
+					curr_off);
+
+		if (ret == -1) {
+			return -1;
+		}
+		if (ret == 0) {
+			/* EOF */
+			break;
+		}
+
+		curr_buf += ret;
+		curr_count -= ret;
+		curr_off += ret;
+
+		total_read += ret;
+	}
+	return total_read;
+}
+
 /*******************************************************************
 A write wrapper that will deal with EINTR
 ********************************************************************/
diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h
index 70864cb2b74a..1e0dd3730a60 100644
--- a/lib/util/sys_rw.h
+++ b/lib/util/sys_rw.h
@@ -34,6 +34,7 @@ ssize_t sys_write(int fd, const void *buf, size_t count);
 void sys_write_v(int fd, const void *buf, size_t count);
 ssize_t sys_writev(int fd, const struct iovec *iov, int iovcnt);
 ssize_t sys_pread(int fd, void *buf, size_t count, off_t off);
+ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off);
 ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off);
 
 #endif
-- 
2.17.1


From 52287ae30323e44f7da4418eec598cfd92fa2ab7 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 13:09:50 +0200
Subject: [PATCH 03/29] sq pread_full

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/util/sys_rw.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c
index 87efbfd367cb..bfeb2e6b4661 100644
--- a/lib/util/sys_rw.c
+++ b/lib/util/sys_rw.c
@@ -154,6 +154,13 @@ ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off)
 	uint8_t *curr_buf = (uint8_t *)buf;
 	size_t curr_count = count;
 	off_t curr_off = off;
+	bool ok;
+
+	ok = sys_valid_io_range(off, count);
+	if (!ok) {
+		errno = EINVAL;
+		return -1;
+	}
 
 	while (curr_count != 0) {
 		ssize_t ret = sys_pread(fd,
@@ -169,12 +176,18 @@ ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off)
 			break;
 		}
 
+		if (ret > curr_count) {
+			errno = EIO;
+			return -1;
+		}
+
 		curr_buf += ret;
 		curr_count -= ret;
 		curr_off += ret;
 
 		total_read += ret;
 	}
+
 	return total_read;
 }
 
-- 
2.17.1


From c66cc1396eafa4747524b64b31af7f9f36e360f0 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:34:32 -0700
Subject: [PATCH 04/29] lib: util: Add sys_pwrite_full().

A pwrite wrapper that will deal with EINTR and never return a short
write unless the file system returns an error. Copes with the
unspecified edge condition of pwrite returning zero by changing
the return to -1, errno = ENOSPC.

Thread-safe so may be used as a replacement for pwrite
inside pwrite_do() thread functions.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>
---
 lib/util/sys_rw.c | 35 +++++++++++++++++++++++++++++++++++
 lib/util/sys_rw.h |  1 +
 2 files changed, 36 insertions(+)

diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c
index bfeb2e6b4661..6d39bd3afb06 100644
--- a/lib/util/sys_rw.c
+++ b/lib/util/sys_rw.c
@@ -204,3 +204,38 @@ ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off)
 	} while (ret == -1 && errno == EINTR);
 	return ret;
 }
+
+/*******************************************************************
+ A pwrite wrapper that will deal with EINTR and never allow a short
+ write unless the file system returns an error.
+********************************************************************/
+
+ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off)
+{
+	ssize_t total_written = 0;
+	const uint8_t *curr_buf = (const uint8_t *)buf;
+	size_t curr_count = count;
+	off_t curr_off = off;
+
+	while (curr_count != 0) {
+		ssize_t ret = sys_pwrite(fd,
+					 curr_buf,
+					 curr_count,
+					 curr_off);
+
+		if (ret == -1) {
+			return -1;
+		}
+		if (ret == 0) {
+			/* Ensure we can never spin. */
+			errno = ENOSPC;
+			return -1;
+		}
+		curr_buf += ret;
+		curr_count -= ret;
+		curr_off += ret;
+
+		total_written += ret;
+	}
+	return total_written;
+}
diff --git a/lib/util/sys_rw.h b/lib/util/sys_rw.h
index 1e0dd3730a60..b224ecb30ac7 100644
--- a/lib/util/sys_rw.h
+++ b/lib/util/sys_rw.h
@@ -36,5 +36,6 @@ ssize_t sys_writev(int fd, const struct iovec *iov, int iovcnt);
 ssize_t sys_pread(int fd, void *buf, size_t count, off_t off);
 ssize_t sys_pread_full(int fd, void *buf, size_t count, off_t off);
 ssize_t sys_pwrite(int fd, const void *buf, size_t count, off_t off);
+ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off);
 
 #endif
-- 
2.17.1


From a9f41b41ffe4943dab5052123aa476c1d54f0ea8 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 13:10:14 +0200
Subject: [PATCH 05/29] sq pwrite_full

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 lib/util/sys_rw.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/util/sys_rw.c b/lib/util/sys_rw.c
index 6d39bd3afb06..d74395fc4091 100644
--- a/lib/util/sys_rw.c
+++ b/lib/util/sys_rw.c
@@ -216,6 +216,13 @@ ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off)
 	const uint8_t *curr_buf = (const uint8_t *)buf;
 	size_t curr_count = count;
 	off_t curr_off = off;
+	bool ok;
+
+	ok = sys_valid_io_range(off, count);
+	if (!ok) {
+		errno = EINVAL;
+		return -1;
+	}
 
 	while (curr_count != 0) {
 		ssize_t ret = sys_pwrite(fd,
@@ -231,11 +238,18 @@ ssize_t sys_pwrite_full(int fd, const void *buf, size_t count, off_t off)
 			errno = ENOSPC;
 			return -1;
 		}
+
+		if (ret > curr_count) {
+			errno = EIO;
+			return -1;
+		}
+
 		curr_buf += ret;
 		curr_count -= ret;
 		curr_off += ret;
 
 		total_written += ret;
 	}
+
 	return total_written;
 }
-- 
2.17.1


From 1edc5112e61f6b079e43f39cd54f9a17ee91d1c5 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:42:10 -0700
Subject: [PATCH 06/29] s3: VFS: aio_fork: Change sys_pread() ->
 sys_pread_full() to protect against short reads.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>
---
 source3/modules/vfs_aio_fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source3/modules/vfs_aio_fork.c b/source3/modules/vfs_aio_fork.c
index a1fed5c06557..285b331ff9ca 100644
--- a/source3/modules/vfs_aio_fork.c
+++ b/source3/modules/vfs_aio_fork.c
@@ -342,7 +342,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map)
 
 		switch (cmd_struct.cmd) {
 		case READ_CMD:
-			ret_struct.size = sys_pread(
+			ret_struct.size = sys_pread_full(
 				fd, discard_const(map->ptr), cmd_struct.n,
 				cmd_struct.offset);
 #if 0
-- 
2.17.1


From 4eec2c666c304ebe1236ee37f5ca4cfee39fba9d Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:42:53 -0700
Subject: [PATCH 07/29] TODO s3: VFS: aio_fork: Change sys_pwrite() ->
 sys_pwrite_full() to protect against short writes.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>

TODO: check how clients behave...
TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write
---
 source3/modules/vfs_aio_fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source3/modules/vfs_aio_fork.c b/source3/modules/vfs_aio_fork.c
index 285b331ff9ca..7c6f4b00fd04 100644
--- a/source3/modules/vfs_aio_fork.c
+++ b/source3/modules/vfs_aio_fork.c
@@ -353,7 +353,7 @@ static void aio_child_loop(int sockfd, struct mmap_area *map)
 #endif
 			break;
 		case WRITE_CMD:
-			ret_struct.size = sys_pwrite(
+			ret_struct.size = sys_pwrite_full(
 				fd, discard_const(map->ptr), cmd_struct.n,
 				cmd_struct.offset);
 			break;
-- 
2.17.1


From 485ed9258844bf4cb116096db26607d4ec9601a9 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:43:34 -0700
Subject: [PATCH 08/29] s3: VFS: default. Change sys_pread() ->
 sys_pread_full() in SMB_VFS_PREAD() to protect against short reads.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>
---
 source3/modules/vfs_default.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index c7f2020a9ead..26db45dccd05 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -734,7 +734,7 @@ static ssize_t vfswrap_pread(vfs_handle_struct *handle, files_struct *fsp, void
 
 #if defined(HAVE_PREAD) || defined(HAVE_PREAD64)
 	START_PROFILE_BYTES(syscall_pread, n);
-	result = sys_pread(fsp->fh->fd, data, n, offset);
+	result = sys_pread_full(fsp->fh->fd, data, n, offset);
 	END_PROFILE_BYTES(syscall_pread);
 
 	if (result == -1 && errno == ESPIPE) {
-- 
2.17.1


From 86a1d32d49448b3c6321d55365f3615d818092ab Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:44:26 -0700
Subject: [PATCH 09/29] TODO: s3: VFS: default. Change sys_pwrite() ->
 sys_pwrite_full() in SMB_VFS_PWRITE() to protect against short writes.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>

TODO: check how clients behave...
TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write
---
 source3/modules/vfs_default.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index 26db45dccd05..7f49e4f26c3b 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -758,7 +758,7 @@ static ssize_t vfswrap_pwrite(vfs_handle_struct *handle, files_struct *fsp, cons
 
 #if defined(HAVE_PWRITE) || defined(HAVE_PRWITE64)
 	START_PROFILE_BYTES(syscall_pwrite, n);
-	result = sys_pwrite(fsp->fh->fd, data, n, offset);
+	result = sys_pwrite_full(fsp->fh->fd, data, n, offset);
 	END_PROFILE_BYTES(syscall_pwrite);
 
 	if (result == -1 && errno == ESPIPE) {
-- 
2.17.1


From 6872fb5986bb6ae99f81e1fb830e3c6cb67a5696 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:45:10 -0700
Subject: [PATCH 10/29] s3: VFS: default. Change pread() -> sys_pread_full() in
 SMB_VFS_PREAD_SEND() to protect against short reads.

Note that as sys_pread_full() deals with the EINTR case
we can remove the do {} while loop here.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>
---
 source3/modules/vfs_default.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index 7f49e4f26c3b..a17eb0ce75c2 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -838,10 +838,10 @@ static void vfs_pread_do(void *private_data)
 
 	PROFILE_TIMESTAMP(&start_time);
 
-	do {
-		state->ret = pread(state->fd, state->buf, state->count,
-				   state->offset);
-	} while ((state->ret == -1) && (errno == EINTR));
+	state->ret = sys_pread_full(state->fd,
+				    state->buf,
+				    state->count,
+				    state->offset);
 
 	if (state->ret == -1) {
 		state->vfs_aio_state.error = errno;
-- 
2.17.1


From c35600726358f2c7057839561a69a0f0214f542b Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 12:48:49 -0700
Subject: [PATCH 11/29] TODO: s3: VFS: default. Change pwrite() ->
 sys_pwrite_full() in SMB_VFS_PWRITE_SEND() to protect against short writes.

Note that as sys_pwrite_full() deals with the EINTR case
we can remove the do {} while loop here.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Jeremy Allison <jra at samba.org>

TODO: check how clients behave...
TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write
---
 source3/modules/vfs_default.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index a17eb0ce75c2..522ea03260c1 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -966,10 +966,10 @@ static void vfs_pwrite_do(void *private_data)
 
 	PROFILE_TIMESTAMP(&start_time);
 
-	do {
-		state->ret = pwrite(state->fd, state->buf, state->count,
-				   state->offset);
-	} while ((state->ret == -1) && (errno == EINTR));
+	state->ret = sys_pwrite_full(state->fd,
+				     state->buf,
+				     state->count,
+				     state->offset);
 
 	if (state->ret == -1) {
 		state->vfs_aio_state.error = errno;
-- 
2.17.1


From 2a9932a48a5c7ff1a1a223c9f18a40fc8b8f0dd7 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 6 May 2020 03:05:47 -0700
Subject: [PATCH 12/29] vfs_io_uring: fix the prefix for parametric options
 from 'vfs_io_uring' to 'io_uring'

This is what the manpage describes.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 378e48d112f6..b409d0753379 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -172,13 +172,13 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
 	}
 
 	num_entries = lp_parm_ulong(SNUM(handle->conn),
-				    "vfs_io_uring",
+				    "io_uring",
 				    "num_entries",
 				    128);
 	num_entries = MAX(num_entries, 1);
 
 	sqpoll = lp_parm_bool(SNUM(handle->conn),
-			     "vfs_io_uring",
+			     "io_uring",
 			     "sqpoll",
 			     false);
 	if (sqpoll) {
-- 
2.17.1


From 6bcfdb8aff42a465d09374625ca134d76c03060b Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 10:39:52 +0200
Subject: [PATCH 13/29] vfs_io_uring: replace vfs_io_uring_request->state with
 _tevent_req_data()

We don't need a direct pointer to the state...

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index b409d0753379..988b309da525 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -42,7 +42,6 @@ struct vfs_io_uring_request {
 	struct vfs_io_uring_request **list_head;
 	struct vfs_io_uring_config *config;
 	struct tevent_req *req;
-	void *state;
 	struct io_uring_sqe sqe;
 	struct io_uring_cqe cqe;
 	struct timespec start_time;
@@ -58,8 +57,9 @@ static void vfs_io_uring_finish_req(struct vfs_io_uring_request *cur,
 	struct tevent_req *req =
 		talloc_get_type_abort(cur->req,
 		struct tevent_req);
+	void *state = _tevent_req_data(req);
 
-	talloc_set_destructor(cur->state, NULL);
+	talloc_set_destructor(state, NULL);
 	if (cur->list_head != NULL) {
 		DLIST_REMOVE((*cur->list_head), cur);
 		cur->list_head = NULL;
@@ -238,6 +238,7 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
 
 	for (cur = config->queue; cur != NULL; cur = next) {
 		struct io_uring_sqe *sqe = NULL;
+		void *state = _tevent_req_data(cur->req);
 
 		next = cur->next;
 
@@ -246,7 +247,7 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
 			break;
 		}
 
-		talloc_set_destructor(cur->state,
+		talloc_set_destructor(state,
 			vfs_io_uring_request_state_deny_destructor);
 		DLIST_REMOVE(config->queue, cur);
 		*sqe = cur->sqe;
@@ -318,7 +319,6 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 	}
 	state->ur.config = config;
 	state->ur.req = req;
-	state->ur.state = state;
 
 	SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p,
 				     state->ur.profile_bytes, n);
@@ -398,7 +398,6 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 	}
 	state->ur.config = config;
 	state->ur.req = req;
-	state->ur.state = state;
 
 	SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p,
 				     state->ur.profile_bytes, n);
@@ -475,7 +474,6 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand
 	}
 	state->ur.config = config;
 	state->ur.req = req;
-	state->ur.state = state;
 
 	SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p,
 				     state->ur.profile_bytes, 0);
-- 
2.17.1


From f0496aa32470c52869d7f29e5cd411694d3339b4 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 10:42:59 +0200
Subject: [PATCH 14/29] vfs_io_uring: introduce
 vfs_io_uring_request->completion_fn()

We'll need to add more logic than a simple _tevent_req_done()

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 49 +++++++++++++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 988b309da525..abdd4d16e9f8 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -44,6 +44,8 @@ struct vfs_io_uring_request {
 	struct tevent_req *req;
 	struct io_uring_sqe sqe;
 	struct io_uring_cqe cqe;
+	void (*completion_fn)(struct vfs_io_uring_request *cur,
+			      const char *location);
 	struct timespec start_time;
 	struct timespec end_time;
 	SMBPROFILE_BYTES_ASYNC_STATE(profile_bytes);
@@ -74,7 +76,7 @@ static void vfs_io_uring_finish_req(struct vfs_io_uring_request *cur,
 	 * or tevent_req_defer_callback() being called
 	 * already.
 	 */
-	_tevent_req_done(req, location);
+	cur->completion_fn(cur, location);
 }
 
 static void vfs_io_uring_config_destroy(struct vfs_io_uring_config *config,
@@ -297,6 +299,9 @@ struct vfs_io_uring_pread_state {
 	struct iovec iov;
 };
 
+static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
+					  const char *location);
+
 static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *handle,
 					     TALLOC_CTX *mem_ctx,
 					     struct tevent_context *ev,
@@ -319,6 +324,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 	}
 	state->ur.config = config;
 	state->ur.req = req;
+	state->ur.completion_fn = vfs_io_uring_pread_completion;
 
 	SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p,
 				     state->ur.profile_bytes, n);
@@ -344,6 +350,17 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 	return req;
 }
 
+static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
+					  const char *location)
+{
+	/*
+	 * We rely on being inside the _send() function
+	 * or tevent_req_defer_callback() being called
+	 * already.
+	 */
+	_tevent_req_done(cur->req, location);
+}
+
 static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
 				  struct vfs_aio_state *vfs_aio_state)
 {
@@ -376,6 +393,9 @@ struct vfs_io_uring_pwrite_state {
 	struct iovec iov;
 };
 
+static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
+					   const char *location);
+
 static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *handle,
 					      TALLOC_CTX *mem_ctx,
 					      struct tevent_context *ev,
@@ -398,6 +418,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 	}
 	state->ur.config = config;
 	state->ur.req = req;
+	state->ur.completion_fn = vfs_io_uring_pwrite_completion;
 
 	SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p,
 				     state->ur.profile_bytes, n);
@@ -423,6 +444,17 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 	return req;
 }
 
+static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
+					   const char *location)
+{
+	/*
+	 * We rely on being inside the _send() function
+	 * or tevent_req_defer_callback() being called
+	 * already.
+	 */
+	_tevent_req_done(cur->req, location);
+}
+
 static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req,
 				   struct vfs_aio_state *vfs_aio_state)
 {
@@ -454,6 +486,9 @@ struct vfs_io_uring_fsync_state {
 	struct vfs_io_uring_request ur;
 };
 
+static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur,
+					  const char *location);
+
 static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *handle,
 					     TALLOC_CTX *mem_ctx,
 					     struct tevent_context *ev,
@@ -474,6 +509,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand
 	}
 	state->ur.config = config;
 	state->ur.req = req;
+	state->ur.completion_fn = vfs_io_uring_fsync_completion;
 
 	SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p,
 				     state->ur.profile_bytes, 0);
@@ -496,6 +532,17 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand
 	return req;
 }
 
+static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur,
+					  const char *location)
+{
+	/*
+	 * We rely on being inside the _send() function
+	 * or tevent_req_defer_callback() being called
+	 * already.
+	 */
+	_tevent_req_done(cur->req, location);
+}
+
 static int vfs_io_uring_fsync_recv(struct tevent_req *req,
 			      struct vfs_aio_state *vfs_aio_state)
 {
-- 
2.17.1


From 001c033ea39987a3c754f9960e89d155661e6c89 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 10:52:52 +0200
Subject: [PATCH 15/29] vfs_io_uring: move error handling out of
 vfs_io_uring_pread_recv()

We should do that as early as possible and that's in
vfs_io_uring_pread_completion().

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index abdd4d16e9f8..0d8e18330092 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -297,6 +297,7 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev,
 struct vfs_io_uring_pread_state {
 	struct vfs_io_uring_request ur;
 	struct iovec iov;
+	size_t nread;
 };
 
 static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
@@ -353,12 +354,23 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 					  const char *location)
 {
+	struct vfs_io_uring_pread_state *state = tevent_req_data(
+		cur->req, struct vfs_io_uring_pread_state);
+
 	/*
 	 * We rely on being inside the _send() function
 	 * or tevent_req_defer_callback() being called
 	 * already.
 	 */
-	_tevent_req_done(cur->req, location);
+
+	if (cur->cqe.res < 0) {
+		int err = -cur->cqe.res;
+		_tevent_req_error(cur->req, err, location);
+		return;
+	}
+
+	state->nread = state->ur.cqe.res;
+	tevent_req_done(cur->req);
 }
 
 static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
@@ -366,23 +378,19 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
 {
 	struct vfs_io_uring_pread_state *state = tevent_req_data(
 		req, struct vfs_io_uring_pread_state);
-	int ret;
+	ssize_t ret;
 
 	SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes);
 	vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time,
 						 &state->ur.start_time);
 
 	if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
+		tevent_req_received(req);
 		return -1;
 	}
 
-	if (state->ur.cqe.res < 0) {
-		vfs_aio_state->error = -state->ur.cqe.res;
-		ret = -1;
-	} else {
-		vfs_aio_state->error = 0;
-		ret = state->ur.cqe.res;
-	}
+	vfs_aio_state->error = 0;
+	ret = state->nread;
 
 	tevent_req_received(req);
 	return ret;
-- 
2.17.1


From eab8f17efb67e4b672c5f510344fc54c3c175e43 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 10:52:52 +0200
Subject: [PATCH 16/29] vfs_io_uring: move error handling out of
 vfs_io_uring_pwrite_recv()

We should do that as early as possible and that's in
vfs_io_uring_pwrite_completion().

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 0d8e18330092..a8da341e7b7c 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -399,6 +399,7 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
 struct vfs_io_uring_pwrite_state {
 	struct vfs_io_uring_request ur;
 	struct iovec iov;
+	size_t nwritten;
 };
 
 static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
@@ -455,12 +456,23 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
 					   const char *location)
 {
+	struct vfs_io_uring_pwrite_state *state = tevent_req_data(
+		cur->req, struct vfs_io_uring_pwrite_state);
+
 	/*
 	 * We rely on being inside the _send() function
 	 * or tevent_req_defer_callback() being called
 	 * already.
 	 */
-	_tevent_req_done(cur->req, location);
+
+	if (cur->cqe.res < 0) {
+		int err = -cur->cqe.res;
+		_tevent_req_error(cur->req, err, location);
+		return;
+	}
+
+	state->nwritten = state->ur.cqe.res;
+	tevent_req_done(cur->req);
 }
 
 static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req,
@@ -468,23 +480,19 @@ static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req,
 {
 	struct vfs_io_uring_pwrite_state *state = tevent_req_data(
 		req, struct vfs_io_uring_pwrite_state);
-	int ret;
+	ssize_t ret;
 
 	SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes);
 	vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time,
 						 &state->ur.start_time);
 
 	if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
+		tevent_req_received(req);
 		return -1;
 	}
 
-	if (state->ur.cqe.res < 0) {
-		vfs_aio_state->error = -state->ur.cqe.res;
-		ret = -1;
-	} else {
-		vfs_aio_state->error = 0;
-		ret = state->ur.cqe.res;
-	}
+	vfs_aio_state->error = 0;
+	ret = state->nwritten;
 
 	tevent_req_received(req);
 	return ret;
-- 
2.17.1


From 5268026076eba48df8c8bde7f532684205c65874 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 10:52:52 +0200
Subject: [PATCH 17/29] vfs_io_uring: move error handling out of
 vfs_io_uring_fsync_recv()

We should do that as early as possible and that's in
vfs_io_uring_fsync_completion().

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index a8da341e7b7c..0f560c95b67d 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -556,7 +556,14 @@ static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur,
 	 * or tevent_req_defer_callback() being called
 	 * already.
 	 */
-	_tevent_req_done(cur->req, location);
+
+	if (cur->cqe.res < 0) {
+		int err = -cur->cqe.res;
+		_tevent_req_error(cur->req, err, location);
+		return;
+	}
+
+	tevent_req_done(cur->req);
 }
 
 static int vfs_io_uring_fsync_recv(struct tevent_req *req,
@@ -564,26 +571,20 @@ static int vfs_io_uring_fsync_recv(struct tevent_req *req,
 {
 	struct vfs_io_uring_fsync_state *state = tevent_req_data(
 		req, struct vfs_io_uring_fsync_state);
-	int ret;
 
 	SMBPROFILE_BYTES_ASYNC_END(state->ur.profile_bytes);
 	vfs_aio_state->duration = nsec_time_diff(&state->ur.end_time,
 						 &state->ur.start_time);
 
 	if (tevent_req_is_unix_error(req, &vfs_aio_state->error)) {
+		tevent_req_received(req);
 		return -1;
 	}
 
-	if (state->ur.cqe.res < 0) {
-		vfs_aio_state->error = -state->ur.cqe.res;
-		ret = -1;
-	} else {
-		vfs_aio_state->error = 0;
-		ret = state->ur.cqe.res;
-	}
+	vfs_aio_state->error = 0;
 
 	tevent_req_received(req);
-	return ret;
+	return 0;
 }
 
 static struct vfs_fn_pointers vfs_io_uring_fns = {
-- 
2.17.1


From 3106a52406108408492b063ba5714af5fe8a49fe Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 13:17:05 +0200
Subject: [PATCH 18/29] vfs_io_uring: make use of sys_valid_io_range() in
 vfs_io_uring_pread_send()

This makes the follow up commits easier as we don't have to care
about overflows.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 0f560c95b67d..c7565b8c39de 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -25,6 +25,7 @@
 #include "smbd/smbd.h"
 #include "smbd/globals.h"
 #include "lib/util/tevent_unix.h"
+#include "lib/util/sys_rw.h"
 #include "smbprofile.h"
 #include <liburing.h>
 
@@ -313,6 +314,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 	struct tevent_req *req = NULL;
 	struct vfs_io_uring_pread_state *state = NULL;
 	struct vfs_io_uring_config *config = NULL;
+	bool ok;
 
 	SMB_VFS_HANDLE_GET_DATA(handle, config,
 				struct vfs_io_uring_config,
@@ -331,6 +333,12 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 				     state->ur.profile_bytes, n);
 	SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes);
 
+	ok = sys_valid_io_range(offset, n);
+	if (!ok) {
+		tevent_req_error(req, EINVAL);
+		return tevent_req_post(req, ev);
+	}
+
 	state->iov.iov_base = (void *)data;
 	state->iov.iov_len = n;
 	io_uring_prep_readv(&state->ur.sqe,
-- 
2.17.1


From f261ac0cd2cbf9fdf07919be792f858ad593ae31 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 13:17:05 +0200
Subject: [PATCH 19/29] vfs_io_uring: make use of sys_valid_io_range() in
 vfs_io_uring_pwrite_send()

This makes the follow up commits easier as we don't have to care
about overflows.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index c7565b8c39de..ee23449c63c6 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -423,6 +423,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 	struct tevent_req *req = NULL;
 	struct vfs_io_uring_pwrite_state *state = NULL;
 	struct vfs_io_uring_config *config = NULL;
+	bool ok;
 
 	SMB_VFS_HANDLE_GET_DATA(handle, config,
 				struct vfs_io_uring_config,
@@ -441,6 +442,12 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 				     state->ur.profile_bytes, n);
 	SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->ur.profile_bytes);
 
+	ok = sys_valid_io_range(offset, n);
+	if (!ok) {
+		tevent_req_error(req, EINVAL);
+		return tevent_req_post(req, ev);
+	}
+
 	state->iov.iov_base = discard_const(data);
 	state->iov.iov_len = n;
 	io_uring_prep_writev(&state->ur.sqe,
-- 
2.17.1


From ce467400f4098d0edbee9534a5667c0b92422e8f Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 21:29:53 +0200
Subject: [PATCH 20/29] vfs_io_uring: avoid stack recursion of
 vfs_io_uring_queue_run()

Instead we remember if recursion was triggered and jump to
the start of the function again from the end.

This should make it safe to be called from the completion_fn().

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index ee23449c63c6..df41c74a7953 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -34,6 +34,8 @@ struct vfs_io_uring_request;
 struct vfs_io_uring_config {
 	struct io_uring uring;
 	struct tevent_fd *fde;
+	bool busy;
+	bool need_retry;
 	struct vfs_io_uring_request *queue;
 	struct vfs_io_uring_request *pending;
 };
@@ -222,7 +224,7 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
 	return 0;
 }
 
-static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
+static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
 {
 	struct vfs_io_uring_request *cur = NULL, *next = NULL;
 	struct io_uring_cqe *cqe = NULL;
@@ -280,6 +282,22 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
 	io_uring_cq_advance(&config->uring, nr);
 }
 
+static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
+{
+	if (config->busy) {
+		config->need_retry = true;
+		return;
+	}
+	config->busy = true;
+
+	do {
+		config->need_retry = false;
+		_vfs_io_uring_queue_run(config);
+	} while (config->need_retry);
+
+	config->busy = false;
+}
+
 static void vfs_io_uring_fd_handler(struct tevent_context *ev,
 				    struct tevent_fd *fde,
 				    uint16_t flags,
-- 
2.17.1


From abfb6a5b0e9a9e87f0328de7978c11663744400a Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 11:17:51 +0200
Subject: [PATCH 21/29] vfs_io_uring: split out a vfs_io_uring_request_submit()
 function

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index df41c74a7953..7db32760d976 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -298,6 +298,17 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
 	config->busy = false;
 }
 
+static void vfs_io_uring_request_submit(struct vfs_io_uring_request *cur)
+{
+	struct vfs_io_uring_config *config = cur->config;
+
+	io_uring_sqe_set_data(&cur->sqe, cur);
+	DLIST_ADD_END(config->queue, cur);
+	cur->list_head = &config->queue;
+
+	vfs_io_uring_queue_run(config);
+}
+
 static void vfs_io_uring_fd_handler(struct tevent_context *ev,
 				    struct tevent_fd *fde,
 				    uint16_t flags,
@@ -363,11 +374,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 			    fsp->fh->fd,
 			    &state->iov, 1,
 			    offset);
-	io_uring_sqe_set_data(&state->ur.sqe, &state->ur);
-	DLIST_ADD_END(config->queue, &state->ur);
-	state->ur.list_head = &config->queue;
-
-	vfs_io_uring_queue_run(config);
+	vfs_io_uring_request_submit(&state->ur);
 
 	if (!tevent_req_is_in_progress(req)) {
 		return tevent_req_post(req, ev);
@@ -472,11 +479,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 			     fsp->fh->fd,
 			     &state->iov, 1,
 			     offset);
-	io_uring_sqe_set_data(&state->ur.sqe, &state->ur);
-	DLIST_ADD_END(config->queue, &state->ur);
-	state->ur.list_head = &config->queue;
-
-	vfs_io_uring_queue_run(config);
+	vfs_io_uring_request_submit(&state->ur);
 
 	if (!tevent_req_is_in_progress(req)) {
 		return tevent_req_post(req, ev);
@@ -567,11 +570,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand
 	io_uring_prep_fsync(&state->ur.sqe,
 			    fsp->fh->fd,
 			    0); /* fsync_flags */
-	io_uring_sqe_set_data(&state->ur.sqe, &state->ur);
-	DLIST_ADD_END(config->queue, &state->ur);
-	state->ur.list_head = &config->queue;
-
-	vfs_io_uring_queue_run(config);
+	vfs_io_uring_request_submit(&state->ur);
 
 	if (!tevent_req_is_in_progress(req)) {
 		return tevent_req_post(req, ev);
-- 
2.17.1


From 427b02bcd7769b6d088b043c4f917bddb35c1ba2 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 11:17:51 +0200
Subject: [PATCH 22/29] vfs_io_uring: split out a vfs_io_uring_pread_submit()
 function

This can be reused when we add handling for short reads.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 7db32760d976..1f724fbc588e 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -326,10 +326,13 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev,
 
 struct vfs_io_uring_pread_state {
 	struct vfs_io_uring_request ur;
+	struct files_struct *fsp;
+	off_t offset;
 	struct iovec iov;
 	size_t nread;
 };
 
+static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state);
 static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 					  const char *location);
 
@@ -368,13 +371,11 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 		return tevent_req_post(req, ev);
 	}
 
+	state->fsp = fsp;
+	state->offset = offset;
 	state->iov.iov_base = (void *)data;
 	state->iov.iov_len = n;
-	io_uring_prep_readv(&state->ur.sqe,
-			    fsp->fh->fd,
-			    &state->iov, 1,
-			    offset);
-	vfs_io_uring_request_submit(&state->ur);
+	vfs_io_uring_pread_submit(state);
 
 	if (!tevent_req_is_in_progress(req)) {
 		return tevent_req_post(req, ev);
@@ -384,6 +385,15 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 	return req;
 }
 
+static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state)
+{
+	io_uring_prep_readv(&state->ur.sqe,
+			    state->fsp->fh->fd,
+			    &state->iov, 1,
+			    state->offset);
+	vfs_io_uring_request_submit(&state->ur);
+}
+
 static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 					  const char *location)
 {
-- 
2.17.1


From c01dacbdc953d9565f3464527299b63236bc7af2 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 11:17:51 +0200
Subject: [PATCH 23/29] vfs_io_uring: split out a vfs_io_uring_pwrite_submit()
 function

This can be reused when we add handling for short writes.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 1f724fbc588e..416681333634 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -441,10 +441,13 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
 
 struct vfs_io_uring_pwrite_state {
 	struct vfs_io_uring_request ur;
+	struct files_struct *fsp;
+	off_t offset;
 	struct iovec iov;
 	size_t nwritten;
 };
 
+static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state);
 static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
 					   const char *location);
 
@@ -483,13 +486,11 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 		return tevent_req_post(req, ev);
 	}
 
+	state->fsp = fsp;
+	state->offset = offset;
 	state->iov.iov_base = discard_const(data);
 	state->iov.iov_len = n;
-	io_uring_prep_writev(&state->ur.sqe,
-			     fsp->fh->fd,
-			     &state->iov, 1,
-			     offset);
-	vfs_io_uring_request_submit(&state->ur);
+	vfs_io_uring_pwrite_submit(state);
 
 	if (!tevent_req_is_in_progress(req)) {
 		return tevent_req_post(req, ev);
@@ -499,6 +500,15 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
 	return req;
 }
 
+static void vfs_io_uring_pwrite_submit(struct vfs_io_uring_pwrite_state *state)
+{
+	io_uring_prep_writev(&state->ur.sqe,
+			     state->fsp->fh->fd,
+			     &state->iov, 1,
+			     state->offset);
+	vfs_io_uring_request_submit(&state->ur);
+}
+
 static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
 					   const char *location)
 {
-- 
2.17.1


From f10689c35088ca0d6cbc0d06a0321921b1c4efe9 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 11:38:56 +0200
Subject: [PATCH 24/29] vfs_io_uring: protect vfs_io_uring_pread_completion()
 against invalid results

We should never get back more than we asked for.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 416681333634..a9b2db8d3cd3 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -26,6 +26,7 @@
 #include "smbd/globals.h"
 #include "lib/util/tevent_unix.h"
 #include "lib/util/sys_rw.h"
+#include "lib/util/iov_buf.h"
 #include "smbprofile.h"
 #include <liburing.h>
 
@@ -399,6 +400,9 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 {
 	struct vfs_io_uring_pread_state *state = tevent_req_data(
 		cur->req, struct vfs_io_uring_pread_state);
+	struct iovec *iov = &state->iov;
+	int num_iov = 1;
+	bool ok;
 
 	/*
 	 * We rely on being inside the _send() function
@@ -412,6 +416,16 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 		return;
 	}
 
+	ok = iov_advance(&iov, &num_iov, cur->cqe.res);
+	if (!ok) {
+		/* This is not expected! */
+		DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n",
+			(int)cur->cqe.res,
+			(int)state->iov.iov_len);
+		tevent_req_error(cur->req, EIO);
+		return;
+	}
+
 	state->nread = state->ur.cqe.res;
 	tevent_req_done(cur->req);
 }
-- 
2.17.1


From 78bbd4bc884259284e7368d10367296534f8b32d Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 11:38:56 +0200
Subject: [PATCH 25/29] vfs_io_uring: protect vfs_io_uring_pwrite_completion()
 against invalid results

We should never get more acked than we asked for.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index a9b2db8d3cd3..7994ef17d675 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -528,6 +528,9 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
 {
 	struct vfs_io_uring_pwrite_state *state = tevent_req_data(
 		cur->req, struct vfs_io_uring_pwrite_state);
+	struct iovec *iov = &state->iov;
+	int num_iov = 1;
+	bool ok;
 
 	/*
 	 * We rely on being inside the _send() function
@@ -541,6 +544,16 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
 		return;
 	}
 
+	ok = iov_advance(&iov, &num_iov, cur->cqe.res);
+	if (!ok) {
+		/* This is not expected! */
+		DBG_ERR("iov_advance() failed cur->cqe.res=%d > iov_len=%d\n",
+			(int)cur->cqe.res,
+			(int)state->iov.iov_len);
+		tevent_req_error(cur->req, EIO);
+		return;
+	}
+
 	state->nwritten = state->ur.cqe.res;
 	tevent_req_done(cur->req);
 }
-- 
2.17.1


From 9918820e2bc511ce2d69ff7a7c3e557b5ed10244 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 11:38:56 +0200
Subject: [PATCH 26/29] vfs_io_uring: protect vfs_io_uring_fsync_completion()
 against invalid results

We should never get back a value > 0.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 7994ef17d675..2b5706665cf3 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -642,6 +642,13 @@ static void vfs_io_uring_fsync_completion(struct vfs_io_uring_request *cur,
 		return;
 	}
 
+	if (cur->cqe.res > 0) {
+		/* This is not expected! */
+		DBG_ERR("got cur->cqe.res=%d\n", (int)cur->cqe.res);
+		tevent_req_error(cur->req, EIO);
+		return;
+	}
+
 	tevent_req_done(cur->req);
 }
 
-- 
2.17.1


From cfb4fcabf2851fbcb0283289d9ef9dd0a28be5e1 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 13:30:17 +0200
Subject: [PATCH 27/29] vfs_io_uring: retry after a short read in
 vfs_io_uring_pread_completion()

We need to be prepared for short reads from the kernel depending on
the state of the page cache. Windows and Mac clients don't
expect short reads for files, so we need to retry ourself.

For the future we may be able to play with some io_uring flags
in order to avoid the retries in userspace, but for now we just fix
the data corruption bug...

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_io_uring.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 2b5706665cf3..965f8798ff2a 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -416,6 +416,14 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 		return;
 	}
 
+	if (cur->cqe.res == 0) {
+		/*
+		 * We reached EOF, we're done
+		 */
+		tevent_req_done(cur->req);
+		return;
+	}
+
 	ok = iov_advance(&iov, &num_iov, cur->cqe.res);
 	if (!ok) {
 		/* This is not expected! */
@@ -426,8 +434,20 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 		return;
 	}
 
-	state->nread = state->ur.cqe.res;
-	tevent_req_done(cur->req);
+	/* sys_valid_io_range() already checked the boundaries */
+	state->nread += state->ur.cqe.res;
+	if (num_iov == 0) {
+		/* We're done */
+		tevent_req_done(cur->req);
+		return;
+	}
+
+	/*
+	 * sys_valid_io_range() already checked the boundaries
+	 * now try to get the rest.
+	 */
+	state->offset += state->ur.cqe.res;
+	vfs_io_uring_pread_submit(state);
 }
 
 static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
-- 
2.17.1


From af17d2d211ca5f0f1b2ffaaaf46d76fe50691533 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Fri, 8 May 2020 13:30:17 +0200
Subject: [PATCH 28/29] TODO: vfs_io_uring: retry after a short writes in
 vfs_io_uring_pwrite_completion()

We need to be prepared for short writes from the kernel depending on
the state of the page cache.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361

TODO: check how clients handle short writes...
TODO: where do we handle offset < 0 ???, see MS-FSA 2.1.5.3 Server Requests a Write
---
 source3/modules/vfs_io_uring.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 965f8798ff2a..19ffbde8e40a 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -564,6 +564,14 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
 		return;
 	}
 
+	if (cur->cqe.res == 0) {
+		/*
+		 * Ensure we can never spin.
+		 */
+		tevent_req_error(cur->req, ENOSPC);
+		return;
+	}
+
 	ok = iov_advance(&iov, &num_iov, cur->cqe.res);
 	if (!ok) {
 		/* This is not expected! */
@@ -574,8 +582,20 @@ static void vfs_io_uring_pwrite_completion(struct vfs_io_uring_request *cur,
 		return;
 	}
 
-	state->nwritten = state->ur.cqe.res;
-	tevent_req_done(cur->req);
+	/* sys_valid_io_range() already checked the boundaries */
+	state->nwritten += state->ur.cqe.res;
+	if (num_iov == 0) {
+		/* We're done */
+		tevent_req_done(cur->req);
+		return;
+	}
+
+	/*
+	 * sys_valid_io_range() already checked the boundaries
+	 * now try to write the rest.
+	 */
+	state->offset += state->ur.cqe.res;
+	vfs_io_uring_pwrite_submit(state);
 }
 
 static ssize_t vfs_io_uring_pwrite_recv(struct tevent_req *req,
-- 
2.17.1


From d8bdf1f8f28a225a049556ef665968ed69512dd1 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Wed, 6 May 2020 03:12:24 -0700
Subject: [PATCH 29/29] HACK vfs_io_uring: add debugging for bug 14361

---
 source3/modules/vfs_io_uring.c | 105 ++++++++++++++++++++++++++++++++-
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 19ffbde8e40a..59f2a54d608e 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -39,6 +39,10 @@ struct vfs_io_uring_config {
 	bool need_retry;
 	struct vfs_io_uring_request *queue;
 	struct vfs_io_uring_request *pending;
+	bool nowait_pread;
+	bool force_async_pread;
+	bool force_async_pread_retry;
+	size_t truncate_pread;
 };
 
 struct vfs_io_uring_request {
@@ -94,6 +98,7 @@ static void vfs_io_uring_config_destroy(struct vfs_io_uring_config *config,
 		.res = ret,
 	};
 
+DBG_ERR("ret=%d %s\n", ret, location);
 	PROFILE_TIMESTAMP(&start_time);
 
 	if (config->uring.ring_fd != -1) {
@@ -194,6 +199,8 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
 	ret = io_uring_queue_init(num_entries, &config->uring, flags);
 	if (ret < 0) {
 		SMB_VFS_NEXT_DISCONNECT(handle);
+		DBG_ERR("io_uring_queue_init(flags=0x%x) failed - %d %s\n",
+			flags, ret, strerror(-ret));
 		errno = -ret;
 		return -1;
 	}
@@ -204,6 +211,8 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
 	ret = io_uring_ring_dontfork(&config->uring);
 	if (ret < 0) {
 		SMB_VFS_NEXT_DISCONNECT(handle);
+		DBG_ERR("io_uring_ring_dontfork() failed - %d %s\n",
+			ret, strerror(-ret));
 		errno = -ret;
 		return -1;
 	}
@@ -222,6 +231,22 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
 		return -1;
 	}
 
+	config->nowait_pread = lp_parm_bool(SNUM(handle->conn),
+			     "io_uring",
+			     "nowait_pread",
+			     false);
+	config->force_async_pread = lp_parm_bool(SNUM(handle->conn),
+			     "io_uring",
+			     "force_async_pread",
+			     false);
+	config->force_async_pread_retry = lp_parm_bool(SNUM(handle->conn),
+			     "io_uring",
+			     "force_async_pread_retry",
+			     false);
+	config->truncate_pread = lp_parm_ulonglong(SNUM(handle->conn),
+				    "io_uring",
+				    "truncate_pread",
+				    UINT32_MAX);
 	return 0;
 }
 
@@ -268,6 +293,8 @@ static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
 	if (ret == -EAGAIN || ret == -EBUSY) {
 		/* We just retry later */
 	} else if (ret < 0) {
+		DBG_ERR("io_uring_submit() failed - %d %s\n",
+			ret, strerror(-ret));
 		vfs_io_uring_config_destroy(config, ret, __location__);
 		return;
 	}
@@ -330,7 +357,9 @@ struct vfs_io_uring_pread_state {
 	struct files_struct *fsp;
 	off_t offset;
 	struct iovec iov;
+	struct iovec tmp_iov;
 	size_t nread;
+	bool nowait_pread;
 };
 
 static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state);
@@ -376,6 +405,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 	state->offset = offset;
 	state->iov.iov_base = (void *)data;
 	state->iov.iov_len = n;
+	state->nowait_pread = config->nowait_pread;
 	vfs_io_uring_pread_submit(state);
 
 	if (!tevent_req_is_in_progress(req)) {
@@ -388,10 +418,27 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
 
 static void vfs_io_uring_pread_submit(struct vfs_io_uring_pread_state *state)
 {
+	struct vfs_io_uring_config *config = state->ur.config;
+	unsigned flags = 0;
+
+	state->tmp_iov = state->iov;
+	state->tmp_iov.iov_len = MIN(state->tmp_iov.iov_len,
+				     config->truncate_pread);
+
 	io_uring_prep_readv(&state->ur.sqe,
 			    state->fsp->fh->fd,
-			    &state->iov, 1,
+			    &state->tmp_iov, 1,
 			    state->offset);
+	if (state->nowait_pread) {
+		state->ur.sqe.rw_flags |= RWF_NOWAIT;
+	}
+	if (config->force_async_pread) {
+		flags |= IOSQE_ASYNC;
+	}
+	if (state->nread > 0 && config->force_async_pread_retry) {
+		flags |= IOSQE_ASYNC;
+	}
+	io_uring_sqe_set_flags(&state->ur.sqe, flags);
 	vfs_io_uring_request_submit(&state->ur);
 }
 
@@ -412,10 +459,66 @@ static void vfs_io_uring_pread_completion(struct vfs_io_uring_request *cur,
 
 	if (cur->cqe.res < 0) {
 		int err = -cur->cqe.res;
+		DBG_ERR("%p: Invalid last_read=%zu (0x%zx) ofs=%zu (0x%zx) len=%zu (0x%zx) err=%d %s eof=%zu (0x%zx) blks=%zu blocks=%zu %s %s\n",
+			state,
+			(size_t)state->nread,
+			(size_t)state->nread,
+			(size_t)state->offset,
+			(size_t)state->offset,
+			(size_t)state->iov.iov_len,
+			(size_t)state->iov.iov_len,
+			err, strerror(err),
+			(size_t)state->fsp->fsp_name->st.st_ex_size,
+			(size_t)state->fsp->fsp_name->st.st_ex_size,
+			(size_t)state->fsp->fsp_name->st.st_ex_blksize,
+			(size_t)state->fsp->fsp_name->st.st_ex_blocks,
+			fsp_str_dbg(state->fsp),
+			fsp_fnum_dbg(state->fsp));
+		if (err == EAGAIN && state->nowait_pread) {
+			state->nowait_pread = false;
+			vfs_io_uring_pread_submit(state);
+			return;
+		}
 		_tevent_req_error(cur->req, err, location);
 		return;
 	}
 
+	if ((size_t)cur->cqe.res != (size_t)state->iov.iov_len) {
+		DBG_ERR("%p: Invalid last_read=%zu (0x%zx) ofs=%zu (0x%zx) len=%zu (0x%zx) nread=%zu (0x%zx) eof=%zu (0x%zx) blks=%zu blocks=%zu %s %s\n",
+			state,
+			(size_t)state->nread,
+			(size_t)state->nread,
+			(size_t)state->offset,
+			(size_t)state->offset,
+			(size_t)state->iov.iov_len,
+			(size_t)state->iov.iov_len,
+			(size_t)cur->cqe.res,
+			(size_t)cur->cqe.res,
+			(size_t)state->fsp->fsp_name->st.st_ex_size,
+			(size_t)state->fsp->fsp_name->st.st_ex_size,
+			(size_t)state->fsp->fsp_name->st.st_ex_blksize,
+			(size_t)state->fsp->fsp_name->st.st_ex_blocks,
+			fsp_str_dbg(state->fsp),
+			fsp_fnum_dbg(state->fsp));
+	} else {
+		DBG_WARNING("%p: last_read=%zu (0x%zx) ofs=%zu (0x%zx) len=%zu (0x%zx) nread=%zu (0x%zx) eof=%zu (0x%zx) blks=%zu blocks=%zu %s %s\n",
+			state,
+			(size_t)state->nread,
+			(size_t)state->nread,
+			(size_t)state->offset,
+			(size_t)state->offset,
+			(size_t)state->iov.iov_len,
+			(size_t)state->iov.iov_len,
+			(size_t)cur->cqe.res,
+			(size_t)cur->cqe.res,
+			(size_t)state->fsp->fsp_name->st.st_ex_size,
+			(size_t)state->fsp->fsp_name->st.st_ex_size,
+			(size_t)state->fsp->fsp_name->st.st_ex_blksize,
+			(size_t)state->fsp->fsp_name->st.st_ex_blocks,
+			fsp_str_dbg(state->fsp),
+			fsp_fnum_dbg(state->fsp));
+	}
+
 	if (cur->cqe.res == 0) {
 		/*
 		 * We reached EOF, we're done
-- 
2.17.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20200509/f3fee3df/signature.sig>


More information about the samba-technical mailing list