[PATCH] Make server-side copy (copy-chunk) async

Ralph Böhme slow at samba.org
Fri Mar 24 12:10:41 UTC 2017


On Thu, Mar 23, 2017 at 02:09:25PM -0700, Jeremy Allison wrote:
> On Thu, Mar 23, 2017 at 07:16:18PM +0100, Ralph Böhme wrote:
> > On Thu, Mar 23, 2017 at 06:39:46PM +0100, Ralph Böhme wrote:
> > > On Thu, Mar 23, 2017 at 06:11:59AM -0700, Jeremy Allison wrote:
> > > > On Thu, Mar 23, 2017 at 10:39:24AM +0100, Stefan Metzmacher wrote:
> > > > > 
> > > > > The fact that the cancel function passed
> > > > > to tevent_req_set_cancel_fn() only gets the
> > > > > tevent_req *req pointer, makes it impossible to write a generic
> > > > > helper function on top of tevent.
> > > > 
> > > > So is it time for a tevent_req_set_cancel_fn_ex() that
> > > > allows a private pointer to be passed as well ?
> > > > 
> > > > Might that be a better/more general solution ?
> > > 
> > > That can already be done by putting the private pointer into the state of the
> > > request, so I don't see a real benefit here.
> > > 
> > > > 
> > > > > I think tevent_req_set_cancel_forwarding() is easy to use for
> > > > > all trivial cases where we do non parallel requests, which
> > > > > is the majority of all use cases.
> > > > > 
> > > > > If someone has a more complex situation, it's only solvable
> > > > > via a custom cancel function.
> > > > 
> > > > OK, it's just a little ugly IMHO. As I said, I
> > > > can live with it :-).
> > > 
> > > Thanks!
> > > 
> > > metze pointed out that the patch doesn't handle cancellation semantics
> > > correctly: from an MS-SMB2/MS-FSA/Windows behaviour perspective, cancellation
> > > means removing a pending request that is not yet in progress. As soon as a
> > > request in in-progress it must not be cancelled.
> > 
> > metze, I may be missing something, but from looking at cancel_smb2_aio() we make
> > the exact same mistake for async reads/writes that I was about to introduce for
> > async copy-chunk.
> > 
> > Ie cancel_smb2_aio() returns true, meaning operation was cancelled (implying it
> > was not yet runnig), but behaviour is to just detach it and leave it running. As
> > we can't currently cancel correctyy, I'd say we must not attempt to cancel
> > reads/writes with the current code.
> 
> So cancel_smb2_aio() should just return 'false' ? Yeah ?
> 
> Until you put your cancellation code in - in which case
> we can remove threads that haven't started yet...

Ok, so I've come to the point where I believe we can make better progress by
just splitting the async copy-chunk from the cancellation buisiness.

The current sync copy-chunk IO can't be cancelled, so it's not a regression that
the new async code can't be cancelled as well. It would have been nice, but as
it turned out, dragons be there.

Attached is a stripped down version of the async copy-chunk code without the
cancellation stuff. Please review & push if happy. Thanks!

I'll continue to work on the cancellation issues (SMB2 frontend cancel code is
broken, pthreadpool_tevent backend lacks cancellation support) and will post
patches once I have something.

Cheerio!
-slow
-------------- next part --------------
From 2baed35792407b1d757686a503c6c19738ac6063 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Wed, 8 Mar 2017 15:07:06 +0100
Subject: [PATCH 1/7] s3/smbd: move copychunk ioctl limits to IDL

This will be needed in the next commit in vfs_default.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 librpc/idl/ioctl.idl                 | 4 ++++
 source3/smbd/smb2_ioctl_network_fs.c | 3 ---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/librpc/idl/ioctl.idl b/librpc/idl/ioctl.idl
index eb2dc8a..f72f9d2 100644
--- a/librpc/idl/ioctl.idl
+++ b/librpc/idl/ioctl.idl
@@ -11,6 +11,10 @@ interface copychunk
 		uint8 context[4];
 	} req_resume_key_rsp;
 
+	const uint32 COPYCHUNK_MAX_CHUNKS = 256; /* 2k8r2 & win8 = 256 */
+	const uint32 COPYCHUNK_MAX_CHUNK_LEN = 1048576; /* 2k8r2 & win8 = 1048576 */
+	const uint32 COPYCHUNK_MAX_TOTAL_LEN = 16777216; /* 2k8r2 & win8 = 16777216 */
+
 	typedef struct {
 		hyper source_off;
 		hyper target_off;
diff --git a/source3/smbd/smb2_ioctl_network_fs.c b/source3/smbd/smb2_ioctl_network_fs.c
index c2b889b..c279182 100644
--- a/source3/smbd/smb2_ioctl_network_fs.c
+++ b/source3/smbd/smb2_ioctl_network_fs.c
@@ -31,9 +31,6 @@
 #include "smb2_ioctl_private.h"
 #include "../lib/tsocket/tsocket.h"
 
-#define COPYCHUNK_MAX_CHUNKS	256		/* 2k8r2 & win8 = 256 */
-#define COPYCHUNK_MAX_CHUNK_LEN	1048576		/* 2k8r2 & win8 = 1048576 */
-#define COPYCHUNK_MAX_TOTAL_LEN	16777216	/* 2k8r2 & win8 = 16777216 */
 static void copychunk_pack_limits(struct srv_copychunk_rsp *cc_rsp)
 {
 	cc_rsp->chunks_written = COPYCHUNK_MAX_CHUNKS;
-- 
2.9.3


From edd0b5487c01a8a86f57acf498c5c45b0e10e585 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sun, 12 Mar 2017 17:18:39 +0100
Subject: [PATCH 2/7] vfs_default: let copy_chunk_send use const from IDL

This also increases the buffer size from 8 MB to the current value of
COPYCHUNK_MAX_TOTAL_LEN which is 16 MB.

For the typical case when vfswrap_copy_chunk_send is called from the SMB
layer for an copy_chunk ioctl() the parameter "num" is guaranteed to be
at most 1 MB though.

It will only be larger for special callers like vfs_fruit for their
special implementation of copyfile where num will be the size of a file
to copy.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/modules/vfs_default.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index d4610f7..054ff64 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -33,6 +33,7 @@
 #include "lib/util/tevent_ntstatus.h"
 #include "lib/util/sys_rw.h"
 #include "lib/pthreadpool/pthreadpool_tevent.h"
+#include "librpc/gen_ndr/ndr_ioctl.h"
 
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_VFS
@@ -1617,7 +1618,7 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 	}
 
 	vfs_cc_state->buf = talloc_array(vfs_cc_state, uint8_t,
-					 MIN(num, 8*1024*1024));
+					 MIN(num, COPYCHUNK_MAX_TOTAL_LEN));
 	if (tevent_req_nomem(vfs_cc_state->buf, req)) {
 		return tevent_req_post(req, ev);
 	}
-- 
2.9.3


From 0436e43cdc538ab0498faf64d637b9e61105141f Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Tue, 21 Mar 2017 08:26:37 +0100
Subject: [PATCH 3/7] s3/smbd: move cc_copy into fsctl_srv_copychunk_state

No change, in behaviour, just preperational stuff to unroll the core
copy loop.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/smbd/smb2_ioctl_network_fs.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/source3/smbd/smb2_ioctl_network_fs.c b/source3/smbd/smb2_ioctl_network_fs.c
index c279182..efb6fe7 100644
--- a/source3/smbd/smb2_ioctl_network_fs.c
+++ b/source3/smbd/smb2_ioctl_network_fs.c
@@ -77,6 +77,7 @@ static NTSTATUS copychunk_check_limits(struct srv_copychunk_copy *cc_copy)
 
 struct fsctl_srv_copychunk_state {
 	struct connection_struct *conn;
+	struct srv_copychunk_copy cc_copy;
 	uint32_t dispatch_count;
 	uint32_t recv_count;
 	uint32_t bad_recv_count;
@@ -164,7 +165,6 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 						   struct smbd_smb2_request *smb2req)
 {
 	struct tevent_req *req;
-	struct srv_copychunk_copy cc_copy;
 	enum ndr_err_code ndr_ret;
 	uint64_t src_persistent_h;
 	uint64_t src_volatile_h;
@@ -192,7 +192,7 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 		return tevent_req_post(req, ev);
 	}
 
-	ndr_ret = ndr_pull_struct_blob(in_input, mem_ctx, &cc_copy,
+	ndr_ret = ndr_pull_struct_blob(in_input, mem_ctx, &state->cc_copy,
 			(ndr_pull_flags_fn_t)ndr_pull_srv_copychunk_copy);
 	if (ndr_ret != NDR_ERR_SUCCESS) {
 		DEBUG(0, ("failed to unmarshall copy chunk req\n"));
@@ -202,8 +202,8 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 	}
 
 	/* persistent/volatile keys sent as the resume key */
-	src_persistent_h = BVAL(cc_copy.source_key, 0);
-	src_volatile_h = BVAL(cc_copy.source_key, 8);
+	src_persistent_h = BVAL(state->cc_copy.source_key, 0);
+	src_volatile_h = BVAL(state->cc_copy.source_key, 8);
 	state->src_fsp = file_fsp_get(smb2req, src_persistent_h, src_volatile_h);
 	if (state->src_fsp == NULL) {
 		DEBUG(3, ("invalid resume key in copy chunk req\n"));
@@ -223,7 +223,7 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 		return tevent_req_post(req, ev);
 	}
 
-	state->status = copychunk_check_limits(&cc_copy);
+	state->status = copychunk_check_limits(&state->cc_copy);
 	if (!NT_STATUS_IS_OK(state->status)) {
 		DEBUG(3, ("copy chunk req exceeds limits\n"));
 		state->out_data = COPYCHUNK_OUT_LIMITS;
@@ -234,7 +234,7 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 	/* any errors from here onwards should carry copychunk response data */
 	state->out_data = COPYCHUNK_OUT_RSP;
 
-	if (cc_copy.chunk_count == 0) {
+	if (state->cc_copy.chunk_count == 0) {
 		struct tevent_req *vfs_subreq;
 		/*
 		 * Process as OS X copyfile request. This is currently
@@ -266,9 +266,9 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 		return req;
 	}
 
-	for (i = 0; i < cc_copy.chunk_count; i++) {
+	for (i = 0; i < state->cc_copy.chunk_count; i++) {
 		struct tevent_req *vfs_subreq;
-		chunk = &cc_copy.chunks[i];
+		chunk = &state->cc_copy.chunks[i];
 		vfs_subreq = SMB_VFS_COPY_CHUNK_SEND(dst_fsp->conn,
 						     state, ev,
 						     state->src_fsp,
-- 
2.9.3


From ae8e5d0d3781c2b2d8798441ebab6855233d52c0 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Tue, 21 Mar 2017 09:17:03 +0100
Subject: [PATCH 4/7] s3/smbd: implement a serializing async copy-chunk loop

Later commits will make the low level copy-chunk implementation async
using a thread pool. That means the individual chunks may be scheduled
and copied out-of-order at the low level.

According to conversation with MS Dochelp, a server implementation
must process individual chunks in order.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/smbd/smb2_ioctl_network_fs.c | 188 +++++++++++++++++++----------------
 1 file changed, 102 insertions(+), 86 deletions(-)

diff --git a/source3/smbd/smb2_ioctl_network_fs.c b/source3/smbd/smb2_ioctl_network_fs.c
index efb6fe7..42d9857 100644
--- a/source3/smbd/smb2_ioctl_network_fs.c
+++ b/source3/smbd/smb2_ioctl_network_fs.c
@@ -76,11 +76,11 @@ static NTSTATUS copychunk_check_limits(struct srv_copychunk_copy *cc_copy)
 }
 
 struct fsctl_srv_copychunk_state {
+	struct tevent_context *ev;
 	struct connection_struct *conn;
 	struct srv_copychunk_copy cc_copy;
-	uint32_t dispatch_count;
-	uint32_t recv_count;
-	uint32_t bad_recv_count;
+	uint32_t current_chunk;
+	uint32_t next_chunk;
 	NTSTATUS status;
 	off_t total_written;
 	struct files_struct *src_fsp;
@@ -156,6 +156,8 @@ static NTSTATUS copychunk_check_handles(uint32_t ctl_code,
 	return NT_STATUS_OK;
 }
 
+static NTSTATUS fsctl_srv_copychunk_loop(struct tevent_req *req);
+
 static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 						   struct tevent_context *ev,
 						   uint32_t ctl_code,
@@ -164,13 +166,12 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 						   size_t in_max_output,
 						   struct smbd_smb2_request *smb2req)
 {
-	struct tevent_req *req;
-	enum ndr_err_code ndr_ret;
+	struct tevent_req *req = NULL;
+	struct fsctl_srv_copychunk_state *state = NULL;
 	uint64_t src_persistent_h;
 	uint64_t src_volatile_h;
-	int i;
-	struct srv_copychunk *chunk;
-	struct fsctl_srv_copychunk_state *state;
+	enum ndr_err_code ndr_ret;
+	NTSTATUS status;
 
 	/* handler for both copy-chunk variants */
 	SMB_ASSERT((ctl_code == FSCTL_SRV_COPYCHUNK)
@@ -181,7 +182,10 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 	if (req == NULL) {
 		return NULL;
 	}
-	state->conn = dst_fsp->conn;
+	*state = (struct fsctl_srv_copychunk_state) {
+		.conn = dst_fsp->conn,
+		.ev = ev,
+	};
 
 	if (in_max_output < sizeof(struct srv_copychunk_rsp)) {
 		DEBUG(3, ("max output %d not large enough to hold copy chunk "
@@ -234,109 +238,123 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 	/* any errors from here onwards should carry copychunk response data */
 	state->out_data = COPYCHUNK_OUT_RSP;
 
+	status = fsctl_srv_copychunk_loop(req);
+	if (tevent_req_nterror(req, status)) {
+		return tevent_req_post(req, ev);
+	}
+
+	return req;
+}
+
+static NTSTATUS fsctl_srv_copychunk_loop(struct tevent_req *req)
+{
+	struct fsctl_srv_copychunk_state *state = tevent_req_data(
+		req, struct fsctl_srv_copychunk_state);
+	struct tevent_req *subreq = NULL;
+	struct srv_copychunk *chunk = NULL;
+
+	if (state->next_chunk > state->cc_copy.chunk_count) {
+		DBG_ERR("Copy-chunk loop next_chunk [%d] chunk_count [%d]\n",
+			state->next_chunk, state->cc_copy.chunk_count);
+		return NT_STATUS_INTERNAL_ERROR;
+	}
+
 	if (state->cc_copy.chunk_count == 0) {
-		struct tevent_req *vfs_subreq;
 		/*
 		 * Process as OS X copyfile request. This is currently
 		 * the only copychunk request with a chunk count of 0
 		 * we will process.
 		 */
-		if (!state->src_fsp->aapl_copyfile_supported) {
-			tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER);
-			return tevent_req_post(req, ev);
-		}
-		if (!state->dst_fsp->aapl_copyfile_supported) {
-			tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER);
-			return tevent_req_post(req, ev);
+		if (!state->src_fsp->aapl_copyfile_supported ||
+		    !state->dst_fsp->aapl_copyfile_supported)
+		{
+			/*
+			 * This must not return an error but just a chunk count
+			 * of 0 in the response.
+			 */
+			tevent_req_done(req);
+			tevent_req_post(req, state->ev);
+			return NT_STATUS_OK;
 		}
 		state->aapl_copyfile = true;
-		vfs_subreq = SMB_VFS_COPY_CHUNK_SEND(dst_fsp->conn,
-						     state, ev,
-						     state->src_fsp,
-						     0,
-						     state->dst_fsp,
-						     0,
-						     0);
-		if (tevent_req_nomem(vfs_subreq, req)) {
-			return tevent_req_post(req, ev);
+
+		subreq = SMB_VFS_COPY_CHUNK_SEND(state->dst_fsp->conn,
+						 state,
+						 state->ev,
+						 state->src_fsp,
+						 0,
+						 state->dst_fsp,
+						 0,
+						 0);
+		if (subreq == NULL) {
+			return NT_STATUS_NO_MEMORY;
 		}
-		tevent_req_set_callback(vfs_subreq,
+		tevent_req_set_callback(subreq,
 					fsctl_srv_copychunk_vfs_done, req);
-		state->dispatch_count++;
-		return req;
+		return NT_STATUS_OK;
 	}
 
-	for (i = 0; i < state->cc_copy.chunk_count; i++) {
-		struct tevent_req *vfs_subreq;
-		chunk = &state->cc_copy.chunks[i];
-		vfs_subreq = SMB_VFS_COPY_CHUNK_SEND(dst_fsp->conn,
-						     state, ev,
-						     state->src_fsp,
-						     chunk->source_off,
-						     state->dst_fsp,
-						     chunk->target_off,
-						     chunk->length);
-		if (vfs_subreq == NULL) {
-			DEBUG(0, ("VFS copy chunk send failed\n"));
-			state->status = NT_STATUS_NO_MEMORY;
-			if (state->dispatch_count == 0) {
-				/* nothing dispatched, return immediately */
-				tevent_req_nterror(req, state->status);
-				return tevent_req_post(req, ev);
-			} else {
-				/*
-				 * wait for dispatched to complete before
-				 * returning error.
-				 */
-				break;
-			}
-		}
-		tevent_req_set_callback(vfs_subreq,
-					fsctl_srv_copychunk_vfs_done, req);
-		state->dispatch_count++;
+	chunk = &state->cc_copy.chunks[state->current_chunk];
+
+	/*
+	 * Doing the increment and calculation for the next chunk here and not
+	 * in the done function, as a later commit will make this a more
+	 * sophisticated logic.
+	 */
+	state->next_chunk++;
+
+	subreq = SMB_VFS_COPY_CHUNK_SEND(state->dst_fsp->conn,
+					 state,
+					 state->ev,
+					 state->src_fsp,
+					 chunk->source_off,
+					 state->dst_fsp,
+					 chunk->target_off,
+					 chunk->length);
+	if (tevent_req_nomem(subreq, req)) {
+		return NT_STATUS_NO_MEMORY;
 	}
+	tevent_req_set_callback(subreq,	fsctl_srv_copychunk_vfs_done, req);
 
-	return req;
+	return NT_STATUS_OK;
 }
 
 static void fsctl_srv_copychunk_vfs_done(struct tevent_req *subreq)
 {
 	struct tevent_req *req = tevent_req_callback_data(
 		subreq, struct tevent_req);
-	struct fsctl_srv_copychunk_state *state = tevent_req_data(req,
-					struct fsctl_srv_copychunk_state);
+	struct fsctl_srv_copychunk_state *state = tevent_req_data(
+		req, struct fsctl_srv_copychunk_state);
 	off_t chunk_nwritten;
 	NTSTATUS status;
 
-	state->recv_count++;
 	status = SMB_VFS_COPY_CHUNK_RECV(state->conn, subreq,
 					 &chunk_nwritten);
 	TALLOC_FREE(subreq);
-	if (NT_STATUS_IS_OK(status)) {
-		DEBUG(10, ("good copy chunk recv %u of %u\n",
-			   (unsigned int)state->recv_count,
-			   (unsigned int)state->dispatch_count));
-		state->total_written += chunk_nwritten;
-	} else {
-		DEBUG(0, ("bad status in copy chunk recv %u of %u: %s\n",
-			  (unsigned int)state->recv_count,
-			  (unsigned int)state->dispatch_count,
-			  nt_errstr(status)));
-		state->bad_recv_count++;
-		/* may overwrite previous failed status */
-		state->status = status;
-	}
-
-	if (state->recv_count != state->dispatch_count) {
-		/*
-		 * Wait for all VFS copy_chunk requests to complete, even
-		 * if an error is received for a specific chunk.
-		 */
+	if (!NT_STATUS_IS_OK(status)) {
+		DBG_ERR("copy chunk failed [%s] chunk [%u] of [%u]\n",
+			nt_errstr(status),
+			(unsigned int)state->next_chunk,
+			(unsigned int)state->cc_copy.chunk_count);
+		tevent_req_nterror(req, status);
 		return;
 	}
 
-	if (!tevent_req_nterror(req, state->status)) {
+	DBG_DEBUG("good copy chunk [%u] of [%u]\n",
+		  (unsigned int)state->next_chunk,
+		  (unsigned int)state->cc_copy.chunk_count);
+	state->total_written += chunk_nwritten;
+
+	state->current_chunk = state->next_chunk;
+
+	if (state->next_chunk == state->cc_copy.chunk_count) {
 		tevent_req_done(req);
+		return;
+	}
+
+	status = fsctl_srv_copychunk_loop(req);
+	if (tevent_req_nterror(req, status)) {
+		return;
 	}
 }
 
@@ -361,7 +379,7 @@ static NTSTATUS fsctl_srv_copychunk_recv(struct tevent_req *req,
 		if (state->aapl_copyfile == true) {
 			cc_rsp->chunks_written = 0;
 		} else {
-			cc_rsp->chunks_written = state->recv_count - state->bad_recv_count;
+			cc_rsp->chunks_written = state->current_chunk;
 		}
 		cc_rsp->chunk_bytes_written = 0;
 		cc_rsp->total_bytes_written = state->total_written;
@@ -371,9 +389,7 @@ static NTSTATUS fsctl_srv_copychunk_recv(struct tevent_req *req,
 		assert(1);
 		break;
 	}
-	status = state->status;
-	tevent_req_received(req);
-
+	status = tevent_req_simple_recv_ntstatus(req);
 	return status;
 }
 
-- 
2.9.3


From 9d2848e1b36decdc2b02f1935517207e760fc590 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Tue, 21 Mar 2017 18:34:22 +0100
Subject: [PATCH 5/7] s3/smbd: optimize copy-chunk by merging chunks if
 possible

Merge chunks with adjacent ranges. This results in fewer IO requests for
the typical server-side file copy usecase: just one 16 MB copy instead
of sixteen 1 MB.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/smbd/smb2_ioctl_network_fs.c | 79 ++++++++++++++++++++++++++++++------
 1 file changed, 67 insertions(+), 12 deletions(-)

diff --git a/source3/smbd/smb2_ioctl_network_fs.c b/source3/smbd/smb2_ioctl_network_fs.c
index 42d9857..5b869ec 100644
--- a/source3/smbd/smb2_ioctl_network_fs.c
+++ b/source3/smbd/smb2_ioctl_network_fs.c
@@ -246,12 +246,74 @@ static struct tevent_req *fsctl_srv_copychunk_send(TALLOC_CTX *mem_ctx,
 	return req;
 }
 
+/*
+ * Work out the next IO request from the remaining chunks starting with
+ * state->current_chunk. Chunks with left to right adjacent ranges can be merged
+ * into a single IO request.
+ */
+static uint32_t next_merged_io(struct fsctl_srv_copychunk_state *state)
+{
+	struct srv_copychunk *chunk = NULL;
+	uint32_t length;
+	off_t next_src_offset;
+	off_t next_dst_offset;
+
+	/*
+	 * We're expected to process at least one chunk and return it's length,
+	 * so let's do this first.
+	 */
+
+	chunk = &state->cc_copy.chunks[state->current_chunk];
+	length = chunk->length;
+	state->next_chunk++;
+
+	/*
+	 * Now process subsequent chunks merging chunks as long as ranges are
+	 * adjacent and the source file size is not exceeded (this is needed
+	 * for error reporting).
+	 */
+
+	next_src_offset = chunk->source_off + chunk->length;
+	next_dst_offset = chunk->target_off + chunk->length;
+
+	while (state->next_chunk < state->cc_copy.chunk_count) {
+		chunk = &state->cc_copy.chunks[state->next_chunk];
+
+		if ((chunk->source_off != next_src_offset) ||
+		    (chunk->target_off != next_dst_offset))
+		{
+			/* Not adjacent, stop merging */
+			break;
+		}
+
+		next_src_offset += chunk->length;
+		next_dst_offset += chunk->length;
+
+		if (next_src_offset > state->src_fsp->fsp_name->st.st_ex_size) {
+			/* Source filesize exceeded, stop merging */
+			break;
+		}
+
+		/*
+		 * Found a mergable chunk, merge it and continue searching.
+		 * Note: this can't wrap, limits were already checked in
+		 * copychunk_check_limits().
+		 */
+		length += chunk->length;
+
+		state->next_chunk++;
+	}
+
+	return length;
+}
+
 static NTSTATUS fsctl_srv_copychunk_loop(struct tevent_req *req)
 {
 	struct fsctl_srv_copychunk_state *state = tevent_req_data(
 		req, struct fsctl_srv_copychunk_state);
 	struct tevent_req *subreq = NULL;
 	struct srv_copychunk *chunk = NULL;
+	uint32_t length;
 
 	if (state->next_chunk > state->cc_copy.chunk_count) {
 		DBG_ERR("Copy-chunk loop next_chunk [%d] chunk_count [%d]\n",
@@ -269,8 +331,8 @@ static NTSTATUS fsctl_srv_copychunk_loop(struct tevent_req *req)
 		    !state->dst_fsp->aapl_copyfile_supported)
 		{
 			/*
-			 * This must not return an error but just a chunk count
-			 * of 0 in the response.
+			 * This must not produce an error but just return a
+			 * chunk count of 0 in the response.
 			 */
 			tevent_req_done(req);
 			tevent_req_post(req, state->ev);
@@ -295,13 +357,7 @@ static NTSTATUS fsctl_srv_copychunk_loop(struct tevent_req *req)
 	}
 
 	chunk = &state->cc_copy.chunks[state->current_chunk];
-
-	/*
-	 * Doing the increment and calculation for the next chunk here and not
-	 * in the done function, as a later commit will make this a more
-	 * sophisticated logic.
-	 */
-	state->next_chunk++;
+	length = next_merged_io(state);
 
 	subreq = SMB_VFS_COPY_CHUNK_SEND(state->dst_fsp->conn,
 					 state,
@@ -310,7 +366,7 @@ static NTSTATUS fsctl_srv_copychunk_loop(struct tevent_req *req)
 					 chunk->source_off,
 					 state->dst_fsp,
 					 chunk->target_off,
-					 chunk->length);
+					 length);
 	if (tevent_req_nomem(subreq, req)) {
 		return NT_STATUS_NO_MEMORY;
 	}
@@ -346,8 +402,7 @@ static void fsctl_srv_copychunk_vfs_done(struct tevent_req *subreq)
 	state->total_written += chunk_nwritten;
 
 	state->current_chunk = state->next_chunk;
-
-	if (state->next_chunk == state->cc_copy.chunk_count) {
+	if (state->current_chunk == state->cc_copy.chunk_count) {
 		tevent_req_done(req);
 		return;
 	}
-- 
2.9.3


From 2a43667d6ef9d49e612b6be2c34d36714d41132c Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sun, 12 Mar 2017 17:23:09 +0100
Subject: [PATCH 6/7] vfs_default: move check for fsp->op validity

Move the check whether fsp->of is valid out of the copy loop in
vfswrap_copy_chunk_send().

It's sufficient to check src_fsp->op and dest_fsp->op once before the
copy loop. fsp->op can only be NULL for internal opens (cf file_new()),
it's not expected to become NULL behind our backs.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/modules/vfs_default.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index 054ff64..545e21a 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -1642,6 +1642,16 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 		return tevent_req_post(req, ev);
 	}
 
+	if (src_fsp->op == NULL) {
+		tevent_req_nterror(req, NT_STATUS_INTERNAL_ERROR);
+		return tevent_req_post(req, ev);
+	}
+
+	if (dest_fsp->op == NULL) {
+		tevent_req_nterror(req, NT_STATUS_INTERNAL_ERROR);
+		return tevent_req_post(req, ev);
+	}
+
 	/* could use 2.6.33+ sendfile here to do this in kernel */
 	while (vfs_cc_state->copied < num) {
 		ssize_t ret;
@@ -1651,10 +1661,6 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 		off_t this_num = MIN(talloc_array_length(vfs_cc_state->buf),
 				     num - vfs_cc_state->copied);
 
-		if (src_fsp->op == NULL) {
-			tevent_req_nterror(req, NT_STATUS_INTERNAL_ERROR);
-			return tevent_req_post(req, ev);
-		}
 		init_strict_lock_struct(src_fsp,
 					src_fsp->op->global->open_persistent_id,
 					src_off,
@@ -1688,11 +1694,6 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 
 		src_off += ret;
 
-		if (dest_fsp->op == NULL) {
-			tevent_req_nterror(req, NT_STATUS_INTERNAL_ERROR);
-			return tevent_req_post(req, ev);
-		}
-
 		init_strict_lock_struct(dest_fsp,
 					dest_fsp->op->global->open_persistent_id,
 					dest_off,
-- 
2.9.3


From 23eda7b9668af1f26f10f12ccdd17b99a7cad966 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sun, 12 Mar 2017 18:13:48 +0100
Subject: [PATCH 7/7] s3/smbd: make copy chunk asynchronous

Just use SMB_VFS_PREAD_SEND/RECV and SMB_VFS_PWRITE_SEND/RECV in a
sensible loop.

Signed-off-by: Ralph Boehme <slow at samba.org>
---
 source3/modules/vfs_default.c | 253 +++++++++++++++++++++++++++++-------------
 1 file changed, 173 insertions(+), 80 deletions(-)

diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index 545e21a..73af4f7 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -1592,10 +1592,23 @@ static NTSTATUS vfswrap_fset_dos_attributes(struct vfs_handle_struct *handle,
 }
 
 struct vfs_cc_state {
-	off_t copied;
+	struct tevent_context *ev;
 	uint8_t *buf;
+	bool read_lck_locked;
+	struct lock_struct read_lck;
+	bool write_lck_locked;
+	struct lock_struct write_lck;
+	struct files_struct *src_fsp;
+	off_t src_off;
+	struct files_struct *dst_fsp;
+	off_t dst_off;
+	off_t to_copy;
+	off_t remaining;
+	size_t next_io_size;
 };
 
+static NTSTATUS copy_chunk_loop(struct tevent_req *req);
+
 static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *handle,
 						  TALLOC_CTX *mem_ctx,
 						  struct tevent_context *ev,
@@ -1603,23 +1616,31 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 						  off_t src_off,
 						  struct files_struct *dest_fsp,
 						  off_t dest_off,
-						  off_t num)
+						  off_t to_copy)
 {
 	struct tevent_req *req;
-	struct vfs_cc_state *vfs_cc_state;
+	struct vfs_cc_state *state = NULL;
+	size_t num = MIN(to_copy, COPYCHUNK_MAX_TOTAL_LEN);
 	NTSTATUS status;
 
-	DEBUG(10, ("performing server side copy chunk of length %lu\n",
-		   (unsigned long)num));
+	DBG_DEBUG("server side copy chunk of length %" PRIu64 "\n", to_copy);
 
-	req = tevent_req_create(mem_ctx, &vfs_cc_state, struct vfs_cc_state);
+	req = tevent_req_create(mem_ctx, &state, struct vfs_cc_state);
 	if (req == NULL) {
 		return NULL;
 	}
 
-	vfs_cc_state->buf = talloc_array(vfs_cc_state, uint8_t,
-					 MIN(num, COPYCHUNK_MAX_TOTAL_LEN));
-	if (tevent_req_nomem(vfs_cc_state->buf, req)) {
+	*state = (struct vfs_cc_state) {
+		.ev = ev,
+		.src_fsp = src_fsp,
+		.src_off = src_off,
+		.dst_fsp = dest_fsp,
+		.dst_off = dest_off,
+		.to_copy = to_copy,
+		.remaining = to_copy,
+	};
+	state->buf = talloc_array(state, uint8_t, num);
+	if (tevent_req_nomem(state->buf, req)) {
 		return tevent_req_post(req, ev);
 	}
 
@@ -1652,106 +1673,178 @@ static struct tevent_req *vfswrap_copy_chunk_send(struct vfs_handle_struct *hand
 		return tevent_req_post(req, ev);
 	}
 
-	/* could use 2.6.33+ sendfile here to do this in kernel */
-	while (vfs_cc_state->copied < num) {
-		ssize_t ret;
-		struct lock_struct lck;
-		int saved_errno;
+	status = copy_chunk_loop(req);
+	if (!NT_STATUS_IS_OK(status)) {
+		tevent_req_nterror(req, status);
+		return tevent_req_post(req, ev);
+	}
 
-		off_t this_num = MIN(talloc_array_length(vfs_cc_state->buf),
-				     num - vfs_cc_state->copied);
+	return req;
+}
 
-		init_strict_lock_struct(src_fsp,
-					src_fsp->op->global->open_persistent_id,
-					src_off,
-					this_num,
-					READ_LOCK,
-					&lck);
+static void vfswrap_copy_chunk_read_done(struct tevent_req *subreq);
 
-		if (!SMB_VFS_STRICT_LOCK(src_fsp->conn, src_fsp, &lck)) {
-			tevent_req_nterror(req, NT_STATUS_FILE_LOCK_CONFLICT);
-			return tevent_req_post(req, ev);
-		}
+static NTSTATUS copy_chunk_loop(struct tevent_req *req)
+{
+	struct vfs_cc_state *state = tevent_req_data(req, struct vfs_cc_state);
+	struct tevent_req *subreq = NULL;
+	bool ok;
 
-		ret = SMB_VFS_PREAD(src_fsp, vfs_cc_state->buf,
-				    this_num, src_off);
-		if (ret == -1) {
-			saved_errno = errno;
-		}
+	state->next_io_size = MIN(state->remaining, talloc_array_length(state->buf));
 
-		SMB_VFS_STRICT_UNLOCK(src_fsp->conn, src_fsp, &lck);
+	init_strict_lock_struct(state->src_fsp,
+				state->src_fsp->op->global->open_persistent_id,
+				state->src_off,
+				state->next_io_size,
+				READ_LOCK,
+				&state->read_lck);
 
-		if (ret == -1) {
-			errno = saved_errno;
-			tevent_req_nterror(req, map_nt_error_from_unix(errno));
-			return tevent_req_post(req, ev);
-		}
-		if (ret != this_num) {
-			/* zero tolerance for short reads */
-			tevent_req_nterror(req, NT_STATUS_IO_DEVICE_ERROR);
-			return tevent_req_post(req, ev);
-		}
+	ok = SMB_VFS_STRICT_LOCK(state->src_fsp->conn,
+				 state->src_fsp,
+				 &state->read_lck);
+	if (!ok) {
+		return NT_STATUS_FILE_LOCK_CONFLICT;
+	}
 
-		src_off += ret;
+	subreq = SMB_VFS_PREAD_SEND(state,
+				    state->src_fsp->conn->sconn->ev_ctx,
+				    state->src_fsp,
+				    state->buf,
+				    state->next_io_size,
+				    state->src_off);
+	if (subreq == NULL) {
+		return NT_STATUS_NO_MEMORY;
+	}
+	tevent_req_set_callback(subreq, vfswrap_copy_chunk_read_done, req);
 
-		init_strict_lock_struct(dest_fsp,
-					dest_fsp->op->global->open_persistent_id,
-					dest_off,
-					this_num,
-					WRITE_LOCK,
-					&lck);
+	return NT_STATUS_OK;
+}
 
-		if (!SMB_VFS_STRICT_LOCK(dest_fsp->conn, dest_fsp, &lck)) {
-			tevent_req_nterror(req, NT_STATUS_FILE_LOCK_CONFLICT);
-			return tevent_req_post(req, ev);
-		}
+static void vfswrap_copy_chunk_write_done(struct tevent_req *subreq);
 
-		ret = SMB_VFS_PWRITE(dest_fsp, vfs_cc_state->buf,
-				     this_num, dest_off);
-		if (ret == -1) {
-			saved_errno = errno;
-		}
+static void vfswrap_copy_chunk_read_done(struct tevent_req *subreq)
+{
+	struct tevent_req *req = tevent_req_callback_data(
+		subreq, struct tevent_req);
+	struct vfs_cc_state *state = tevent_req_data(req, struct vfs_cc_state);
+	struct vfs_aio_state aio_state;
+	ssize_t nread;
+	bool ok;
 
-		SMB_VFS_STRICT_UNLOCK(dest_fsp->conn, dest_fsp, &lck);
+	SMB_VFS_STRICT_UNLOCK(state->src_fsp->conn,
+			      state->src_fsp,
+			      &state->read_lck);
+	ZERO_STRUCT(state->read_lck);
 
-		if (ret == -1) {
-			errno = saved_errno;
-			tevent_req_nterror(req, map_nt_error_from_unix(errno));
-			return tevent_req_post(req, ev);
-		}
-		if (ret != this_num) {
-			/* zero tolerance for short writes */
-			tevent_req_nterror(req, NT_STATUS_IO_DEVICE_ERROR);
-			return tevent_req_post(req, ev);
-		}
-		dest_off += ret;
+	nread = SMB_VFS_PREAD_RECV(subreq, &aio_state);
+	TALLOC_FREE(subreq);
+	if (nread == -1) {
+		DBG_ERR("read failed: %s\n", strerror(errno));
+		tevent_req_nterror(req, map_nt_error_from_unix(aio_state.error));
+		return;
+	}
+	if (nread != state->next_io_size) {
+		DBG_ERR("Short read, only %zd of %zu\n",
+			nread, state->next_io_size);
+		tevent_req_nterror(req, NT_STATUS_IO_DEVICE_ERROR);
+		return;
+	}
+
+	state->src_off += nread;
 
-		vfs_cc_state->copied += this_num;
+	init_strict_lock_struct(state->dst_fsp,
+				state->dst_fsp->op->global->open_persistent_id,
+				state->dst_off,
+				state->next_io_size,
+				WRITE_LOCK,
+				&state->write_lck);
+
+	ok = SMB_VFS_STRICT_LOCK(state->dst_fsp->conn,
+				 state->dst_fsp,
+				 &state->write_lck);
+	if (!ok) {
+		tevent_req_nterror(req, NT_STATUS_FILE_LOCK_CONFLICT);
+		return;
 	}
 
-	tevent_req_done(req);
-	return tevent_req_post(req, ev);
+	subreq = SMB_VFS_PWRITE_SEND(state,
+				     state->ev,
+				     state->dst_fsp,
+				     state->buf,
+				     state->next_io_size,
+				     state->dst_off);
+	if (subreq == NULL) {
+		tevent_req_nterror(req, NT_STATUS_NO_MEMORY);
+		return;
+	}
+	tevent_req_set_callback(subreq, vfswrap_copy_chunk_write_done, req);
+}
+
+static void vfswrap_copy_chunk_write_done(struct tevent_req *subreq)
+{
+	struct tevent_req *req = tevent_req_callback_data(
+		subreq, struct tevent_req);
+	struct vfs_cc_state *state = tevent_req_data(req, struct vfs_cc_state);
+	struct vfs_aio_state aio_state;
+	ssize_t nwritten;
+	NTSTATUS status;
+
+	SMB_VFS_STRICT_UNLOCK(state->dst_fsp->conn,
+			      state->dst_fsp,
+			      &state->write_lck);
+	ZERO_STRUCT(state->write_lck);
+
+	nwritten = SMB_VFS_PWRITE_RECV(subreq, &aio_state);
+	TALLOC_FREE(subreq);
+	if (nwritten == -1) {
+		DBG_ERR("write failed: %s\n", strerror(errno));
+		tevent_req_nterror(req, map_nt_error_from_unix(aio_state.error));
+		return;
+	}
+	if (nwritten != state->next_io_size) {
+		DBG_ERR("Short write, only %zd of %zu\n", nwritten, state->next_io_size);
+		tevent_req_nterror(req, NT_STATUS_IO_DEVICE_ERROR);
+		return;
+	}
+
+	state->dst_off += nwritten;
+
+	if (state->remaining < nwritten) {
+		/* Paranoia check */
+		tevent_req_nterror(req, NT_STATUS_INTERNAL_ERROR);
+		return;
+	}
+	state->remaining -= nwritten;
+	if (state->remaining == 0) {
+		tevent_req_done(req);
+		return;
+	}
+
+	status = copy_chunk_loop(req);
+	if (!NT_STATUS_IS_OK(status)) {
+		tevent_req_nterror(req, status);
+		return;
+	}
+
+	return;
 }
 
 static NTSTATUS vfswrap_copy_chunk_recv(struct vfs_handle_struct *handle,
 					struct tevent_req *req,
 					off_t *copied)
 {
-	struct vfs_cc_state *vfs_cc_state = tevent_req_data(req,
-							struct vfs_cc_state);
+	struct vfs_cc_state *state = tevent_req_data(req, struct vfs_cc_state);
 	NTSTATUS status;
 
 	if (tevent_req_is_nterror(req, &status)) {
-		DEBUG(2, ("server side copy chunk failed: %s\n",
-			  nt_errstr(status)));
+		DBG_DEBUG("copy chunk failed: %s\n", nt_errstr(status));
 		*copied = 0;
 		tevent_req_received(req);
 		return status;
 	}
 
-	*copied = vfs_cc_state->copied;
-	DEBUG(10, ("server side copy chunk copied %lu\n",
-		   (unsigned long)*copied));
+	*copied = state->to_copy;
+	DBG_DEBUG("copy chunk copied %lu\n", (unsigned long)*copied);
 	tevent_req_received(req);
 
 	return NT_STATUS_OK;
-- 
2.9.3



More information about the samba-technical mailing list