[PATCH v2 041/127] smb: smbdirect: introduce smbdirect_connection_send_iter() and related functions

Stefan Metzmacher metze at samba.org
Wed Oct 29 13:20:19 UTC 2025


This is basically a copy of the following functions in the server:

smb_direct_post_send       => smbdirect_connection_post_send_wr
smb_direct_send_ctx_init   => smbdirect_connection_send_batch_init
smb_direct_flush_send_list => smbdirect_connection_send_batch_flush
wait_for_send_lcredit      => smbdirect_connection_wait_for_send_lcredit
wait_for_send_credits      => smbdirect_connection_wait_for_send_credits
post_sendmsg               => smbdirect_connection_post_send_io
smb_direct_post_send_data  => smbdirect_connection_send_single_iter
smb_direct_send_iter       => smbdirect_connection_send_iter

They will replace the server functions soon and will also be used in
the client as smbdirect_connection_send_single_iter() is very similar to
smbd_post_send_iter().

There's also a smbdirect_connection_send_wait_zero_pending()
helper that can be used together with
smbdirect_connection_send_single_iter() in the client
until it can use smbdirect_connection_send_iter(),
which can happen with David's refactoring.

Cc: Steve French <smfrench at gmail.com>
Cc: Tom Talpey <tom at talpey.com>
Cc: Long Li <longli at microsoft.com>
Cc: Namjae Jeon <linkinjeon at kernel.org>
Cc: David Howells <dhowells at redhat.com>
Cc: linux-cifs at vger.kernel.org
Cc: samba-technical at lists.samba.org
Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 .../common/smbdirect/smbdirect_connection.c   | 404 +++++++++++++++++-
 1 file changed, 403 insertions(+), 1 deletion(-)

diff --git a/fs/smb/common/smbdirect/smbdirect_connection.c b/fs/smb/common/smbdirect/smbdirect_connection.c
index e2eb3c6cf0f1..e834fcfe05af 100644
--- a/fs/smb/common/smbdirect/smbdirect_connection.c
+++ b/fs/smb/common/smbdirect/smbdirect_connection.c
@@ -1076,7 +1076,410 @@ static bool smbdirect_connection_request_keep_alive(struct smbdirect_socket *sc)
 	return false;
 }
 
+static int smbdirect_connection_post_send_wr(struct smbdirect_socket *sc,
+					     struct ib_send_wr *wr)
+{
+	int ret;
+
+	if (unlikely(sc->first_error))
+		return sc->first_error;
+
+	atomic_inc(&sc->send_io.pending.count);
+	ret = ib_post_send(sc->ib.qp, wr, NULL);
+	if (ret) {
+		atomic_dec(&sc->send_io.pending.count);
+		smbdirect_log_rdma_send(sc, SMBDIRECT_LOG_ERR,
+			"ib_post_send() failed %1pe\n",
+			SMBDIRECT_DEBUG_ERR_PTR(ret));
+		smbdirect_connection_schedule_disconnect(sc, ret);
+	}
+
+	return ret;
+}
+
+static void smbdirect_connection_send_batch_init(struct smbdirect_send_batch *batch,
+						 bool need_invalidate_rkey,
+						 unsigned int remote_key)
+{
+	INIT_LIST_HEAD(&batch->msg_list);
+	batch->wr_cnt = 0;
+	batch->need_invalidate_rkey = need_invalidate_rkey;
+	batch->remote_key = remote_key;
+}
+
+static int smbdirect_connection_send_batch_flush(struct smbdirect_socket *sc,
+						 struct smbdirect_send_batch *batch,
+						 bool is_last)
+{
+	struct smbdirect_send_io *first, *last;
+	int ret;
+
+	if (list_empty(&batch->msg_list))
+		return 0;
+
+	first = list_first_entry(&batch->msg_list,
+				 struct smbdirect_send_io,
+				 sibling_list);
+	last = list_last_entry(&batch->msg_list,
+			       struct smbdirect_send_io,
+			       sibling_list);
+
+	if (batch->need_invalidate_rkey) {
+		first->wr.opcode = IB_WR_SEND_WITH_INV;
+		first->wr.ex.invalidate_rkey = batch->remote_key;
+		batch->need_invalidate_rkey = false;
+		batch->remote_key = 0;
+	}
+
+	last->wr.send_flags = IB_SEND_SIGNALED;
+	last->wr.wr_cqe = &last->cqe;
+
+	/*
+	 * Remove last from send_ctx->msg_list
+	 * and splice the rest of send_ctx->msg_list
+	 * to last->sibling_list.
+	 *
+	 * send_ctx->msg_list is a valid empty list
+	 * at the end.
+	 */
+	list_del_init(&last->sibling_list);
+	list_splice_tail_init(&batch->msg_list, &last->sibling_list);
+	batch->wr_cnt = 0;
+
+	ret = smbdirect_connection_post_send_wr(sc, &first->wr);
+	if (ret) {
+		struct smbdirect_send_io *sibling, *next;
+
+		list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) {
+			list_del_init(&sibling->sibling_list);
+			smbdirect_connection_free_send_io(sibling);
+		}
+		smbdirect_connection_free_send_io(last);
+	}
+
+	return ret;
+}
+
+
+static int smbdirect_connection_wait_for_send_lcredit(struct smbdirect_socket *sc,
+						      struct smbdirect_send_batch *batch)
+{
+	if (batch && atomic_read(&sc->send_io.lcredits.count) <= 1) {
+		int ret;
+
+		ret = smbdirect_connection_send_batch_flush(sc, batch, false);
+		if (ret)
+			return ret;
+	}
+
+	return smbdirect_connection_wait_for_credits(sc,
+						     &sc->send_io.lcredits.wait_queue,
+						     &sc->send_io.lcredits.count,
+						     1);
+}
+
+static int smbdirect_connection_wait_for_send_credits(struct smbdirect_socket *sc,
+						      struct smbdirect_send_batch *batch)
+{
+	if (batch && (batch->wr_cnt >= 16 || atomic_read(&sc->send_io.credits.count) <= 1)) {
+		int ret;
+
+		ret = smbdirect_connection_send_batch_flush(sc, batch, false);
+		if (ret)
+			return ret;
+	}
+
+	return smbdirect_connection_wait_for_credits(sc,
+						     &sc->send_io.credits.wait_queue,
+						     &sc->send_io.credits.count,
+						     1);
+}
+
+static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc);
+
+static int smbdirect_connection_post_send_io(struct smbdirect_socket *sc,
+					     struct smbdirect_send_batch *batch,
+					     struct smbdirect_send_io *msg)
+{
+	int i;
+
+	for (i = 0; i < msg->num_sge; i++)
+		ib_dma_sync_single_for_device(sc->ib.dev,
+					      msg->sge[i].addr, msg->sge[i].length,
+					      DMA_TO_DEVICE);
+
+	msg->cqe.done = smbdirect_connection_send_io_done;
+	msg->wr.wr_cqe = &msg->cqe;
+	msg->wr.opcode = IB_WR_SEND;
+	msg->wr.sg_list = &msg->sge[0];
+	msg->wr.num_sge = msg->num_sge;
+	msg->wr.next = NULL;
+
+	if (batch) {
+		msg->wr.send_flags = 0;
+		if (!list_empty(&batch->msg_list)) {
+			struct smbdirect_send_io *last;
+
+			last = list_last_entry(&batch->msg_list,
+					       struct smbdirect_send_io,
+					       sibling_list);
+			last->wr.next = &msg->wr;
+		}
+		list_add_tail(&msg->sibling_list, &batch->msg_list);
+		batch->wr_cnt++;
+		return 0;
+	}
+
+	msg->wr.send_flags = IB_SEND_SIGNALED;
+	return smbdirect_connection_post_send_wr(sc, &msg->wr);
+}
+
+static int smbdirect_connection_send_single_iter(struct smbdirect_socket *sc,
+						 struct smbdirect_send_batch *batch,
+						 struct iov_iter *iter,
+						 unsigned int flags,
+						 u32 remaining_data_length)
+{
+	const struct smbdirect_socket_parameters *sp = &sc->parameters;
+	struct smbdirect_send_io *msg;
+	struct smbdirect_data_transfer *packet;
+	size_t header_length;
+	u16 new_credits = 0;
+	u32 data_length = 0;
+	int ret;
+
+	if (WARN_ON_ONCE(flags))
+		return -EINVAL; /* no flags support for now */
+
+	if (iter) {
+		if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE))
+			return -EINVAL; /* It's a bug in upper layer to get there */
+
+		header_length = sizeof(struct smbdirect_data_transfer);
+		if (WARN_ON_ONCE(remaining_data_length == 0 ||
+				 iov_iter_count(iter) > remaining_data_length))
+			return -EINVAL;
+	} else {
+		/* If this is a packet without payload, don't send padding */
+		header_length = offsetof(struct smbdirect_data_transfer, padding);
+		if (WARN_ON_ONCE(remaining_data_length))
+			return -EINVAL;
+	}
+
+	if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+		smbdirect_log_write(sc, SMBDIRECT_LOG_ERR,
+			"status=%s first_error=%1pe => %s\n",
+			smbdirect_socket_status_string(sc->status),
+			SMBDIRECT_DEBUG_ERR_PTR(sc->first_error),
+			errname(-ENOTCONN));
+		return -ENOTCONN;
+	}
+
+	ret = smbdirect_connection_wait_for_send_lcredit(sc, batch);
+	if (ret)
+		goto lcredit_failed;
+
+	ret = smbdirect_connection_wait_for_send_credits(sc, batch);
+	if (ret)
+		goto credit_failed;
+
+	msg = smbdirect_connection_alloc_send_io(sc);
+	if (IS_ERR(msg)) {
+		ret = PTR_ERR(msg);
+		goto alloc_failed;
+	}
+
+	/* Map the packet to DMA */
+	msg->sge[0].addr = ib_dma_map_single(sc->ib.dev,
+					     msg->packet,
+					     header_length,
+					     DMA_TO_DEVICE);
+	ret = ib_dma_mapping_error(sc->ib.dev, msg->sge[0].addr);
+	if (ret)
+		goto err;
+
+	msg->sge[0].length = header_length;
+	msg->sge[0].lkey = sc->ib.pd->local_dma_lkey;
+	msg->num_sge = 1;
+
+	if (iter) {
+		struct smbdirect_map_sges extract = {
+			.num_sge	= msg->num_sge,
+			.max_sge	= ARRAY_SIZE(msg->sge),
+			.sge		= msg->sge,
+			.device		= sc->ib.dev,
+			.local_dma_lkey	= sc->ib.pd->local_dma_lkey,
+			.direction	= DMA_TO_DEVICE,
+		};
+		size_t payload_len = umin(iov_iter_count(iter),
+					  sp->max_send_size - sizeof(*packet));
+
+		ret = smbdirect_map_sges_from_iter(iter, payload_len, &extract);
+		if (ret < 0)
+			goto err;
+		data_length = ret;
+		remaining_data_length -= data_length;
+		msg->num_sge = extract.num_sge;
+	}
+
+	/* Fill in the packet header */
+	packet = (struct smbdirect_data_transfer *)msg->packet;
+	packet->credits_requested = cpu_to_le16(sp->send_credit_target);
+	new_credits = smbdirect_connection_grant_recv_credits(sc);
+	packet->credits_granted = cpu_to_le16(new_credits);
+
+	packet->flags = 0;
+	if (smbdirect_connection_request_keep_alive(sc))
+		packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED);
+
+	packet->reserved = 0;
+	if (!data_length)
+		packet->data_offset = 0;
+	else
+		packet->data_offset = cpu_to_le32(24);
+	packet->data_length = cpu_to_le32(data_length);
+	packet->remaining_data_length = cpu_to_le32(remaining_data_length);
+	packet->padding = 0;
+
+	smbdirect_log_outgoing(sc, SMBDIRECT_LOG_INFO,
+		"DataOut: %s=%u, %s=%u, %s=0x%x, %s=%u, %s=%u, %s=%u\n",
+		"CreditsRequested",
+		le16_to_cpu(packet->credits_requested),
+		"CreditsGranted",
+		le16_to_cpu(packet->credits_granted),
+		"Flags",
+		le16_to_cpu(packet->flags),
+		"RemainingDataLength",
+		le32_to_cpu(packet->remaining_data_length),
+		"DataOffset",
+		le32_to_cpu(packet->data_offset),
+		"DataLength",
+		le32_to_cpu(packet->data_length));
+
+	ret = smbdirect_connection_post_send_io(sc, batch, msg);
+	if (ret)
+		goto err;
+	return data_length;
+err:
+	smbdirect_connection_free_send_io(msg);
+alloc_failed:
+	atomic_inc(&sc->send_io.credits.count);
+credit_failed:
+	atomic_inc(&sc->send_io.lcredits.count);
+lcredit_failed:
+	return ret;
+}
+
 __maybe_unused /* this is temporary while this file is included in orders */
+static int smbdirect_connection_send_wait_zero_pending(struct smbdirect_socket *sc)
+{
+	/*
+	 * As an optimization, we don't wait for individual I/O to finish
+	 * before sending the next one.
+	 * Send them all and wait for pending send count to get to 0
+	 * that means all the I/Os have been out and we are good to return
+	 */
+
+	wait_event(sc->send_io.pending.zero_wait_queue,
+		   atomic_read(&sc->send_io.pending.count) == 0 ||
+		   sc->status != SMBDIRECT_SOCKET_CONNECTED);
+	if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+		smbdirect_log_write(sc, SMBDIRECT_LOG_ERR,
+			"status=%s first_error=%1pe => %s\n",
+			smbdirect_socket_status_string(sc->status),
+			SMBDIRECT_DEBUG_ERR_PTR(sc->first_error),
+			errname(-ENOTCONN));
+		return -ENOTCONN;
+	}
+
+	return 0;
+}
+
+__maybe_unused /* this is temporary while this file is included in orders */
+static int smbdirect_connection_send_iter(struct smbdirect_socket *sc,
+					  struct iov_iter *iter,
+					  unsigned int flags,
+					  bool need_invalidate,
+					  unsigned int remote_key)
+{
+	struct smbdirect_socket_parameters *sp = &sc->parameters;
+	struct smbdirect_send_batch batch;
+	int total_count = iov_iter_count(iter);
+	int ret;
+	int error = 0;
+	__be32 hdr;
+
+	if (WARN_ON_ONCE(flags))
+		return -EINVAL; /* no flags support for now */
+
+	if (WARN_ON_ONCE(iov_iter_rw(iter) != ITER_SOURCE))
+		return -EINVAL; /* It's a bug in upper layer to get there */
+
+	if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+		smbdirect_log_write(sc, SMBDIRECT_LOG_ERR,
+			"status=%s first_error=%1pe => %s\n",
+			smbdirect_socket_status_string(sc->status),
+			SMBDIRECT_DEBUG_ERR_PTR(sc->first_error),
+			errname(-ENOTCONN));
+		return -ENOTCONN;
+	}
+
+	/*
+	 * For now we expect the iter to have the full
+	 * message, including a 4 byte length header.
+	 */
+	if (iov_iter_count(iter) <= 4)
+		return -EINVAL;
+	if (!copy_from_iter_full(&hdr, sizeof(hdr), iter))
+		return -EFAULT;
+	if (iov_iter_count(iter) != be32_to_cpu(hdr))
+		return -EINVAL;
+
+	/*
+	 * The size must fit into the negotiated
+	 * fragmented send size.
+	 */
+	if (iov_iter_count(iter) > sp->max_fragmented_send_size)
+		return -EMSGSIZE;
+
+	smbdirect_log_write(sc, SMBDIRECT_LOG_INFO,
+		"Sending (RDMA): length=%zu\n",
+		iov_iter_count(iter));
+
+	smbdirect_connection_send_batch_init(&batch, need_invalidate, remote_key);
+	while (iov_iter_count(iter)) {
+		ret = smbdirect_connection_send_single_iter(sc,
+							    &batch,
+							    iter,
+							    flags,
+							    iov_iter_count(iter));
+		if (unlikely(ret < 0)) {
+			error = ret;
+			break;
+		}
+	}
+
+	ret = smbdirect_connection_send_batch_flush(sc, &batch, true);
+	if (unlikely(ret && !error))
+		error = ret;
+
+	/*
+	 * As an optimization, we don't wait for individual I/O to finish
+	 * before sending the next one.
+	 * Send them all and wait for pending send count to get to 0
+	 * that means all the I/Os have been out and we are good to return
+	 */
+
+	ret = smbdirect_connection_send_wait_zero_pending(sc);
+	if (unlikely(ret && !error))
+		error = ret;
+
+	if (unlikely(error))
+		return error;
+
+	return total_count;
+}
+
 static void smbdirect_connection_send_io_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct smbdirect_send_io *msg =
@@ -1788,7 +2191,6 @@ static ssize_t smbdirect_map_sges_from_folioq(struct iov_iter *iter,
  * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some
  * way.
  */
-__maybe_unused /* this is temporary while this file is included in orders */
 static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len,
 					    struct smbdirect_map_sges *state)
 {
-- 
2.43.0




More information about the samba-technical mailing list