Data Corruption bug with Samba's vfs_iouring and Linux 5.6.7/5.7rc3
Jeremy Allison
jra at samba.org
Fri May 8 06:27:25 UTC 2020
On Fri, May 08, 2020 at 10:39:38AM +0530, Anoop C S wrote:
>
> Your patch fixes mismatch in SHA256 checksum of 100 bin files copied
> using Windows explorer in my environment.
>
> > If so I'll fix it up to be production-ready,
> > (for example I think I can get rid of the
> > immediate event useage) fix the pwrite case
> > and then cut it onto bite-sized reviewable chunks.
OK, here's the "production ready" version.
Gets rid of the crappy immediate event.
Still doesn't fix the io_uring pwrite issue
but if can confirm that this version also fixes
the problem (and it's a much cleaner patchset :-)
then I'll add the pwrite fixes next and I'll
try running in CI.
Then we should have a fix for io_uring in 4.12 !
Cheers,
Jeremy.
-------------- next part --------------
From f3c7199d6f7513c549e6acfaef2e49e555089ae9 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 22:39:13 -0700
Subject: [PATCH 1/7] s3: VFS: io_uring. Add opcode field to struct
vfs_io_uring_request.
Fill it in in the callers. The callback is going to need to
know what kind of operation just completed in order to cope
with short IO read/writes.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361
Signed-off-by: Jeremy Allison <jra at samba.org>
---
source3/modules/vfs_io_uring.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 378e48d112f..124704fc7bf 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -37,10 +37,17 @@ struct vfs_io_uring_config {
struct vfs_io_uring_request *pending;
};
+enum vfs_io_uring_opcode {
+ SMB_VFS_IO_URING_PREAD,
+ SMB_VFS_IO_URING_PWRITE,
+ SMB_VFS_IO_URING_FSYNC
+};
+
struct vfs_io_uring_request {
struct vfs_io_uring_request *prev, *next;
struct vfs_io_uring_request **list_head;
struct vfs_io_uring_config *config;
+ enum vfs_io_uring_opcode opcode;
struct tevent_req *req;
void *state;
struct io_uring_sqe sqe;
@@ -319,6 +326,7 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
state->ur.config = config;
state->ur.req = req;
state->ur.state = state;
+ state->ur.opcode = SMB_VFS_IO_URING_PREAD;
SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p,
state->ur.profile_bytes, n);
@@ -399,6 +407,7 @@ static struct tevent_req *vfs_io_uring_pwrite_send(struct vfs_handle_struct *han
state->ur.config = config;
state->ur.req = req;
state->ur.state = state;
+ state->ur.opcode = SMB_VFS_IO_URING_PWRITE;
SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pwrite, profile_p,
state->ur.profile_bytes, n);
@@ -476,6 +485,7 @@ static struct tevent_req *vfs_io_uring_fsync_send(struct vfs_handle_struct *hand
state->ur.config = config;
state->ur.req = req;
state->ur.state = state;
+ state->ur.opcode = SMB_VFS_IO_URING_FSYNC;
SMBPROFILE_BYTES_ASYNC_START(syscall_asys_fsync, profile_p,
state->ur.profile_bytes, 0);
--
2.20.1
From 4503a5fb6dae2065e27596dd725460953bbc8038 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 22:42:57 -0700
Subject: [PATCH 2/7] s3: VFS: Rename vfs_io_uring_queue_run() ->
_vfs_io_uring_queue_run().
Call from a wrapper function. We'll need this as
eventually we'll need to call this in a loop for
short IO retries.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361
Signed-off-by: Jeremy Allison <jra at samba.org>
---
source3/modules/vfs_io_uring.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 124704fc7bf..047b534b384 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -226,7 +226,7 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
return 0;
}
-static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
+static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
{
struct vfs_io_uring_request *cur = NULL, *next = NULL;
struct io_uring_cqe *cqe = NULL;
@@ -283,6 +283,11 @@ static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
io_uring_cq_advance(&config->uring, nr);
}
+static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
+{
+ _vfs_io_uring_queue_run(config);
+}
+
static void vfs_io_uring_fd_handler(struct tevent_context *ev,
struct tevent_fd *fde,
uint16_t flags,
--
2.20.1
From 0af4ecfa3d49987c3626ea9cab3ec70135e873f4 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 22:46:11 -0700
Subject: [PATCH 3/7] s3: VFS: io_uring. Change _vfs_io_uring_queue_run()
funtion to return a bool.
If true, it means retry the submit calls. Currently
always returns false.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361
Signed-off-by: Jeremy Allison <jra at samba.org>
---
source3/modules/vfs_io_uring.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 047b534b384..d1a5af6c7a7 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -226,7 +226,7 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
return 0;
}
-static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
+static bool _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
{
struct vfs_io_uring_request *cur = NULL, *next = NULL;
struct io_uring_cqe *cqe = NULL;
@@ -235,12 +235,13 @@ static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
struct timespec start_time;
struct timespec end_time;
int ret;
+ bool need_retry = false;
PROFILE_TIMESTAMP(&start_time);
if (config->uring.ring_fd == -1) {
vfs_io_uring_config_destroy(config, -ESTALE, __location__);
- return;
+ return false;
}
for (cur = config->queue; cur != NULL; cur = next) {
@@ -269,7 +270,7 @@ static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
/* We just retry later */
} else if (ret < 0) {
vfs_io_uring_config_destroy(config, ret, __location__);
- return;
+ return false;
}
PROFILE_TIMESTAMP(&end_time);
@@ -281,11 +282,16 @@ static void _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
}
io_uring_cq_advance(&config->uring, nr);
+ return need_retry;
}
static void vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
{
- _vfs_io_uring_queue_run(config);
+ bool need_retry;
+
+ do {
+ need_retry = _vfs_io_uring_queue_run(config);
+ } while (need_retry);
}
static void vfs_io_uring_fd_handler(struct tevent_context *ev,
--
2.20.1
From 8a4eebb653279a8458a95cf2d99f7262203c1ffa Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 22:50:07 -0700
Subject: [PATCH 4/7] s3: VFS: Extend struct vfs_io_uring_pread_state to store
the original request values.
We'll need these later when we decide if a read was short or not.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361
Signed-off-by: Jeremy Allison <jra at samba.org>
---
source3/modules/vfs_io_uring.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index d1a5af6c7a7..18dee5e59d5 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -312,6 +312,12 @@ static void vfs_io_uring_fd_handler(struct tevent_context *ev,
struct vfs_io_uring_pread_state {
struct vfs_io_uring_request ur;
struct iovec iov;
+ /* We have to remember the original values in case of short read. */
+ struct files_struct *fsp;
+ void *data;
+ size_t total_toread;
+ ssize_t total_read;
+ off_t offset;
};
static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *handle,
@@ -338,6 +344,10 @@ static struct tevent_req *vfs_io_uring_pread_send(struct vfs_handle_struct *hand
state->ur.req = req;
state->ur.state = state;
state->ur.opcode = SMB_VFS_IO_URING_PREAD;
+ state->fsp = fsp;
+ state->data = data;
+ state->total_toread = n;
+ state->offset = offset;
SMBPROFILE_BYTES_ASYNC_START(syscall_asys_pread, profile_p,
state->ur.profile_bytes, n);
--
2.20.1
From dc14398334d7db8c9e88db49a9600ea9aa4f1422 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 23:00:42 -0700
Subject: [PATCH 5/7] s3: VFS: io_uring. Add is_io_uring_pread_complete()
funtion to check for short reads.
Currently commented out, not called.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361
Signed-off-by: Jeremy Allison <jra at samba.org>
---
source3/modules/vfs_io_uring.c | 56 ++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)
diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 18dee5e59d5..0afd8c0ed60 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -226,6 +226,25 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
return 0;
}
+#if 0
+static bool is_io_uring_pread_complete(struct vfs_io_uring_request *cur,
+ const struct io_uring_cqe *cqe);
+
+/*
+ * Return true if it was a pread and the bytes transferred
+ * are less than requested.
+ */
+
+static bool vfs_io_uring_complete(struct vfs_io_uring_request *cur,
+ const struct io_uring_cqe *cqe)
+{
+ if (cur->opcode == SMB_VFS_IO_URING_PREAD) {
+ return is_io_uring_pread_complete(cur, cqe);
+ }
+ return true;
+}
+#endif
+
static bool _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
{
struct vfs_io_uring_request *cur = NULL, *next = NULL;
@@ -400,6 +419,43 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
return ret;
}
+#if 0
+/* Returns false if more to read. Updates the total_read count. */
+
+static bool is_io_uring_pread_complete(struct vfs_io_uring_request *cur,
+ const struct io_uring_cqe *cqe)
+{
+ struct tevent_req *req = talloc_get_type_abort(cur->req,
+ struct tevent_req);
+ struct vfs_io_uring_pread_state *state = tevent_req_data(
+ req, struct vfs_io_uring_pread_state);
+
+ if (cqe->res < 0) {
+ /* Error. Deal with it as normal. */
+ return true;
+ }
+
+ if (cqe->res == 0) {
+ /* EOF. Deal with it as normal. */
+ return true;
+ }
+
+ state->total_read += cqe->res;
+
+ if (state->total_read < state->total_toread ) {
+ /* More needed. */
+ DBG_DEBUG("file %s short read, wanted 0x%"PRIx64" at "
+ "offset 0x%"PRIx64" got 0x%"PRIx64"\n",
+ fsp_str_dbg(state->fsp),
+ (uint64_t)state->total_toread,
+ (uint64_t)state->offset,
+ (uint64_t)state->total_read);
+ return false;
+ }
+ return true;
+}
+#endif
+
struct vfs_io_uring_pwrite_state {
struct vfs_io_uring_request ur;
struct iovec iov;
--
2.20.1
From cb869ac6da3c00727559cee93942015b65bd7ba9 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 23:07:47 -0700
Subject: [PATCH 6/7] s3: VFS: io_uring. Add vfs_io_uring_pread_op_reschedule()
Reschedules short IO to completion. Not yet called
so commented out.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361
Signed-off-by: Jeremy Allison <jra at samba.org>
---
source3/modules/vfs_io_uring.c | 45 ++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 0afd8c0ed60..87a8f838247 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -243,6 +243,17 @@ static bool vfs_io_uring_complete(struct vfs_io_uring_request *cur,
}
return true;
}
+
+static void vfs_io_uring_pread_op_reschedule(struct vfs_io_uring_config *config,
+ struct vfs_io_uring_request *cur);
+
+static void vfs_io_uring_op_reschedule(struct vfs_io_uring_config *config,
+ struct vfs_io_uring_request *cur)
+{
+ if (cur->opcode == SMB_VFS_IO_URING_PREAD) {
+ vfs_io_uring_pread_op_reschedule(config, cur);
+ }
+}
#endif
static bool _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
@@ -454,6 +465,40 @@ static bool is_io_uring_pread_complete(struct vfs_io_uring_request *cur,
}
return true;
}
+
+/*
+ * Reschedule short read. Adjust the buffer, length
+ * and offsets and resubmit.
+ */
+
+static void vfs_io_uring_pread_op_reschedule(struct vfs_io_uring_config *config,
+ struct vfs_io_uring_request *cur)
+{
+ struct tevent_req *req = talloc_get_type_abort(cur->req,
+ struct tevent_req);
+ struct vfs_io_uring_pread_state *state = tevent_req_data(
+ req, struct vfs_io_uring_pread_state);
+ off_t offset;
+ uint8_t *new_base = (uint8_t *)state->data + state->total_read;
+
+ /* Set up the parameters for the remaining IO. */
+ state->iov.iov_base = (void *)new_base;
+ state->iov.iov_len = state->total_toread - state->total_read;
+ offset = state->offset + state->total_read;
+
+ DBG_DEBUG("reschedule read on file %s. Get 0x%"PRIx64" at "
+ "offset 0x%"PRIx64"\n",
+ fsp_str_dbg(state->fsp),
+ (uint64_t)state->iov.iov_len,
+ (uint64_t)offset);
+
+ io_uring_prep_readv(&cur->sqe,
+ state->fsp->fh->fd,
+ &state->iov,
+ 1,
+ offset);
+ io_uring_sqe_set_data(&cur->sqe, cur);
+}
#endif
struct vfs_io_uring_pwrite_state {
--
2.20.1
From 6f58fd4d442bf043f83dcc0c160a1943aea31743 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra at samba.org>
Date: Thu, 7 May 2020 23:12:53 -0700
Subject: [PATCH 7/7] s3: VFS: io_uring. On cqe completion, check for short
reads.
If any occurred, call the resheduling functions and cause
_vfs_io_uring_queue_run() to be retried.
BUG: https://bugzilla.samba.org/show_bug.cgi?id=14361
Signed-off-by: Jeremy Allison <jra at samba.org>
---
source3/modules/vfs_io_uring.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/source3/modules/vfs_io_uring.c b/source3/modules/vfs_io_uring.c
index 87a8f838247..80ef1ac0161 100644
--- a/source3/modules/vfs_io_uring.c
+++ b/source3/modules/vfs_io_uring.c
@@ -226,7 +226,6 @@ static int vfs_io_uring_connect(vfs_handle_struct *handle, const char *service,
return 0;
}
-#if 0
static bool is_io_uring_pread_complete(struct vfs_io_uring_request *cur,
const struct io_uring_cqe *cqe);
@@ -254,7 +253,6 @@ static void vfs_io_uring_op_reschedule(struct vfs_io_uring_config *config,
vfs_io_uring_pread_op_reschedule(config, cur);
}
}
-#endif
static bool _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
{
@@ -307,7 +305,24 @@ static bool _vfs_io_uring_queue_run(struct vfs_io_uring_config *config)
io_uring_for_each_cqe(&config->uring, cqhead, cqe) {
cur = (struct vfs_io_uring_request *)io_uring_cqe_get_data(cqe);
- vfs_io_uring_finish_req(cur, cqe, end_time, __location__);
+ if (vfs_io_uring_complete(cur, cqe)) {
+ /* Full IO completed. We're done. */
+ vfs_io_uring_finish_req(cur, cqe, end_time, __location__);
+ } else {
+ /* Short read or write. We must reschedule. */
+ /* Take us off the pending list. */
+ DLIST_REMOVE(config->pending, cur);
+ cur->list_head = NULL;
+
+ vfs_io_uring_op_reschedule(config, cur);
+
+ /* Put us back on the queued list. */
+ DLIST_ADD_END(config->queue, cur);
+ cur->list_head = &config->queue;
+
+ /* Cause the caller to call us again. */
+ need_retry = true;
+ }
nr++;
}
@@ -423,14 +438,13 @@ static ssize_t vfs_io_uring_pread_recv(struct tevent_req *req,
ret = -1;
} else {
vfs_aio_state->error = 0;
- ret = state->ur.cqe.res;
+ ret = state->total_read;
}
tevent_req_received(req);
return ret;
}
-#if 0
/* Returns false if more to read. Updates the total_read count. */
static bool is_io_uring_pread_complete(struct vfs_io_uring_request *cur,
@@ -499,7 +513,6 @@ static void vfs_io_uring_pread_op_reschedule(struct vfs_io_uring_config *config,
offset);
io_uring_sqe_set_data(&cur->sqe, cur);
}
-#endif
struct vfs_io_uring_pwrite_state {
struct vfs_io_uring_request ur;
--
2.20.1
More information about the samba-technical
mailing list