[Patch v8 14/16] CIFS: SMBD: Upper layer performs SMB read via RDMA write through memory registration

Steve French smfrench at gmail.com
Wed Jan 3 21:31:28 UTC 2018


Fixed minor typo ("transferred") before merging

On Wed, Nov 22, 2017 at 6:38 PM, Long Li <longli at exchange.microsoft.com> wrote:
> From: Long Li <longli at microsoft.com>
>
> If I/O size is larger than rdma_readwrite_threshold, use RDMA write for
> SMB read by specifying channel SMB2_CHANNEL_RDMA_V1 or
> SMB2_CHANNEL_RDMA_V1_INVALIDATE in the SMB packet, depending on SMB dialect
> used. Append a smbd_buffer_descriptor_v1 to the end of the SMB packet and fill
> in other values to indicate this SMB read uses RDMA write.
>
> There is no need to read from the transport for incoming payload. At the time
> SMB read response comes back, the data is already transfered and placed in the
> pages by RDMA hardware.
>
> When SMB read is finished, deregister the memory regions if RDMA write is used
> for this SMB read. smbd_deregister_mr may need to do local invalidation and
> sleep, if server remote invalidation is not used.
>
> There are situations where the MID may not be created on I/O failure, under
> which memory region is deregistered when read data context is released.
>
> Signed-off-by: Long Li <longli at microsoft.com>
> ---
>  fs/cifs/file.c    | 17 +++++++++++++++--
>  fs/cifs/smb2pdu.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 59 insertions(+), 3 deletions(-)
>
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index df9f682..93259a16 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -42,7 +42,7 @@
>  #include "cifs_debug.h"
>  #include "cifs_fs_sb.h"
>  #include "fscache.h"
> -
> +#include "smbdirect.h"
>
>  static inline int cifs_convert_flags(unsigned int flags)
>  {
> @@ -2902,7 +2902,12 @@ cifs_readdata_release(struct kref *refcount)
>  {
>         struct cifs_readdata *rdata = container_of(refcount,
>                                         struct cifs_readdata, refcount);
> -
> +#ifdef CONFIG_CIFS_SMB_DIRECT
> +       if (rdata->mr) {
> +               smbd_deregister_mr(rdata->mr);
> +               rdata->mr = NULL;
> +       }
> +#endif
>         if (rdata->cfile)
>                 cifsFileInfo_put(rdata->cfile);
>
> @@ -3031,6 +3036,10 @@ uncached_fill_pages(struct TCP_Server_Info *server,
>                 }
>                 if (iter)
>                         result = copy_page_from_iter(page, 0, n, iter);
> +#ifdef CONFIG_CIFS_SMB_DIRECT
> +               else if (rdata->mr)
> +                       result = n;
> +#endif
>                 else
>                         result = cifs_read_page_from_socket(server, page, n);
>                 if (result < 0)
> @@ -3600,6 +3609,10 @@ readpages_fill_pages(struct TCP_Server_Info *server,
>
>                 if (iter)
>                         result = copy_page_from_iter(page, 0, n, iter);
> +#ifdef CONFIG_CIFS_SMB_DIRECT
> +               else if (rdata->mr)
> +                       result = n;
> +#endif
>                 else
>                         result = cifs_read_page_from_socket(server, page, n);
>                 if (result < 0)
> diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
> index 908d777..bee0871d 100644
> --- a/fs/cifs/smb2pdu.c
> +++ b/fs/cifs/smb2pdu.c
> @@ -2458,7 +2458,40 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
>         req->MinimumCount = 0;
>         req->Length = cpu_to_le32(io_parms->length);
>         req->Offset = cpu_to_le64(io_parms->offset);
> +#ifdef CONFIG_CIFS_SMB_DIRECT
> +       /*
> +        * If we want to do a RDMA write, fill in and append
> +        * smbd_buffer_descriptor_v1 to the end of read request
> +        */
> +       if (server->rdma && rdata &&
> +               rdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) {
> +
> +               struct smbd_buffer_descriptor_v1 *v1;
> +               bool need_invalidate =
> +                       io_parms->tcon->ses->server->dialect == SMB30_PROT_ID;
> +
> +               rdata->mr = smbd_register_mr(
> +                               server->smbd_conn, rdata->pages,
> +                               rdata->nr_pages, rdata->tailsz,
> +                               true, need_invalidate);
> +               if (!rdata->mr)
> +                       return -ENOBUFS;
> +
> +               req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
> +               if (need_invalidate)
> +                       req->Channel = SMB2_CHANNEL_RDMA_V1;
> +               req->ReadChannelInfoOffset =
> +                       offsetof(struct smb2_read_plain_req, Buffer);
> +               req->ReadChannelInfoLength =
> +                       sizeof(struct smbd_buffer_descriptor_v1);
> +               v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0];
> +               v1->offset = rdata->mr->mr->iova;
> +               v1->token = rdata->mr->mr->rkey;
> +               v1->length = rdata->mr->mr->length;
>
> +               *total_len += sizeof(*v1) - 1;
> +       }
> +#endif
>         if (request_type & CHAINED_REQUEST) {
>                 if (!(request_type & END_OF_CHAIN)) {
>                         /* next 8-byte aligned request */
> @@ -2537,7 +2570,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
>                 if (rdata->result != -ENODATA)
>                         rdata->result = -EIO;
>         }
> -
> +#ifdef CONFIG_CIFS_SMB_DIRECT
> +       /*
> +        * If this rdata has a memmory registered, the MR can be freed
> +        * MR needs to be freed as soon as I/O finishes to prevent deadlock
> +        * because they have limited number and are used for future I/Os
> +        */
> +       if (rdata->mr) {
> +               smbd_deregister_mr(rdata->mr);
> +               rdata->mr = NULL;
> +       }
> +#endif
>         if (rdata->result)
>                 cifs_stats_fail_inc(tcon, SMB2_READ_HE);
>
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Thanks,

Steve



More information about the samba-technical mailing list