[PATCH v2 01/12] smb: smbdirect: add smbdirect_pdu.h with protocol definitions
Stefan Metzmacher
metze at samba.org
Tue Jun 3 09:06:09 UTC 2025
Am 03.06.25 um 08:20 schrieb Namjae Jeon:
> On Tue, Jun 3, 2025 at 7:03 AM Stefan Metzmacher <metze at samba.org> wrote:
>>
>> Am 02.06.25 um 04:19 schrieb Namjae Jeon:
>>> On Mon, Jun 2, 2025 at 10:57 AM Steve French <smfrench at gmail.com> wrote:
>>>>
>>>>> Can you explain why he has split it into smbdirect_socket.h?
>>>>
>>>> The three header names seem plausible, but would be useful to have
>>>> Metze's clarification/explanation:
>>>> - the "protocol" related header info for smbdirect goes in
>>>> smb/common/smbdirect/smbdirect_pdu.h (we use similar name smb2pdu.h
>>>> for the smb2/smb3 protocol related wire definitions)
>>>> - smbdirect.h for internal smbdirect structure definitions
>>>> - smbdirect_socket.h for things related to exporting it as a socket
>>>> (since one of the goals is to make smbdirect useable by Samba
>>>> userspace tools)
>>> There is no need to do things in advance that are not yet concrete and
>>> may change later.
>>
>> The current idea is to merge transport_tcp and transport_rdma into
>> transport_sock, see
>> https://git.samba.org/?p=metze/linux/wip.git;a=blob;f=fs/smb/server/transport_sock.c;hb=66714b6c0fc1eacbeb5b85d07524caa722fc19cf
I've attached to this mail, but remember this is just a demo I need to redo that
based on the current transport_tcp.c and transport_rdma.c
>> Which uses this interface:
>> https://git.samba.org/?p=metze/linux/wip.git;a=blob;f=fs/smb/common/smbdirect/smbdirect.h;hb=66714b6c0fc1eacbeb5b85d07524caa722fc19cf
> Hm.. I can not access these links.. Is it just me?
Here's a trimmed down version with only the in kernel related stuff of smbdirect.h:
/*
* userspace:
*
* int socket(int family, int type, int protocol);
*
* kernel:
*
* int sock_create_kern(struct net *net,
* int family, int type, int protocol,
* struct socket **res);
*
* family: PF_SMBDIRECT
* type: SOCK_STREAM, together with SOCK_CLOEXEC SOCK_NONBLOCK
* protocol: address family + protocol flavor
*
* address family: AF_INET or AF_INET6
* protocol flavor: SMBDIRECT_FLAVOR_AUTO
*/
/*
* AF_INET, AF_INET6 and AF_IB are below 0xff
*/
#define SMBDIRECT_FAMILY_MASK 0x000000ff
/*
* For now just auto use iWarp and ROCE
*/
#define SMBDIRECT_FLAVOR_MASK 0x0000f000
#define SMBDIRECT_FLAVOR_AUTO 0x00000000
/*
* Additional hints/flags
*/
#define SMBDIRECT_FLAGS_MASK 0x7fff0000
#define SMBDIRECT_FLAGS_VALID ( \
0)
#define SMBDIRECT_PROTOCOL_INVALID_MASK ~((unsigned)( \
SMBDIRECT_FAMILY_MASK | \
SMBDIRECT_FLAVOR_MASK | \
SMBDIRECT_FLAGS_MASK | \
0))
enum {
__SMBDIRECT_CONNECTION_PARAMETERS = 100,
__SMBDIRECT_BUFFER_UNREGISTER = 200,
__SMBDIRECT_BUFFER_REGISTER_READ = 201,
__SMBDIRECT_BUFFER_REGISTER_WRITE = 202,
__SMBDIRECT_BUFFER_REMOTE_INVALIDATE = 300,
__SMBDIRECT_BUFFER_REMOTE_READ = 301,
__SMBDIRECT_BUFFER_REMOTE_WRITE = 302,
};
struct smbdirect_connection_parameters {
__u16 recv_credit_max;
__u16 send_credit_target;
__u32 max_send_size;
__u32 max_fragmented_send_size;
__u32 max_recv_size;
__u32 max_fragmented_recv_size;
__u32 max_read_write_size;
__u32 keepalive_interval;
} __packed;
int smbdirect_kern_connection_get_parameters(struct socket *sock,
struct smbdirect_connection_parameters *params);
struct smbdirect_buffer_descriptor_v1 {
__le64 offset;
__le32 token;
__le32 length;
} __packed;
struct smbdirect_buffer_descriptors_v1 {
#define SMBDIRECT_BUFFER_ALLOW_READ 0x00000001
#define SMBDIRECT_BUFFER_ALLOW_WRITE 0x00000002
#define SMBDIRECT_BUFFER_INVALIDATE 0x00000004
#define SMBDIRECT_BUFFER_REMOTE 0x00000008
#define SMBDIRECT_BUFFER_DEFINED_FLAGS ( \
SMBDIRECT_BUFFER_ALLOW_READ | \
SMBDIRECT_BUFFER_ALLOW_WRITE | \
SMBDIRECT_BUFFER_INVALIDATE | \
SMBDIRECT_BUFFER_REMOTE | \
0)
__u32 flags;
/* (UINT16_MAX / sizeof(smbdirect_buffer_descriptor_v1)) = 4096 */
#define _SMBDIRECT_BUFFER_COUNT_MAX 4096
__u16 max_count;
__u16 count;
struct smbdirect_buffer_descriptor_v1 array[];
} __packed;
struct smbdirect_buffer_descriptors_v1_fast {
struct smbdirect_buffer_descriptors_v1 hdr;
#ifndef SMBDIRECT_BUFFER_COUNT_FAST
#define SMBDIRECT_BUFFER_COUNT_FAST 32
#endif
struct smbdirect_buffer_descriptor_v1 __fast_array[SMBDIRECT_BUFFER_COUNT_FAST];
} __packed;
#define SMBDIRECT_BUFFER_DESCRIPTORS_V1_FAST_INIT { \
.hdr = { \
.max_count = SMBDIRECT_BUFFER_COUNT_FAST, \
}, \
}
#define SMBDIRECT_BUFFER_DESCRIPTORS_V1_SIZE(count) \
(sizeof(struct smbdirect_buffer_descriptors_v1) + \
sizeof(struct smbdirect_buffer_descriptor_v1)*count)
struct smbdirect_cmsg_buffer {
uint8_t msg_control[CMSG_SPACE(24)];
};
static inline void __smbdirect_cmsg_prepare(struct msghdr *msg,
struct smbdirect_cmsg_buffer *cbuffer,
int cmsg_type,
const void *payload,
size_t payloadlen)
{
size_t cmsg_space = CMSG_SPACE(payloadlen);
size_t cmsg_len = CMSG_LEN(payloadlen);
struct cmsghdr *cmsg = NULL;
void *dataptr = NULL;
BUILD_BUG_ON(cmsg_space > sizeof(cbuffer->msg_control));
BUG_ON(cmsg_space > sizeof(cbuffer->msg_control));
memset(cbuffer, 0, sizeof(*cbuffer));
msg->msg_control = cbuffer->msg_control;
msg->msg_controllen = cmsg_space;
cmsg = CMSG_FIRSTHDR(msg);
cmsg->cmsg_level = SOL_SMBDIRECT;
cmsg->cmsg_type = cmsg_type;
cmsg->cmsg_len = cmsg_len;
dataptr = CMSG_DATA(cmsg);
memcpy(dataptr, payload, payloadlen);
msg->msg_controllen = cmsg->cmsg_len;
}
struct smbdirect_buffer_remote_invalidate_args {
struct smbdirect_buffer_descriptor_v1 first_desc;
} __packed;
#define SMBDIRECT_BUFFER_REMOTE_INVALIDATE_CMSG_TYPE \
_IOW('S', __SMBDIRECT_BUFFER_REMOTE_INVALIDATE, \
struct smbdirect_buffer_remote_invalidate_args)
static inline void smbdirect_buffer_remote_invalidate_cmsg_prepare(struct msghdr *msg,
struct smbdirect_cmsg_buffer *cbuffer,
const struct smbdirect_buffer_descriptor_v1 *desc)
{
struct smbdirect_buffer_remote_invalidate_args args = {
.first_desc = *desc,
};
__smbdirect_cmsg_prepare(msg, cbuffer,
SMBDIRECT_BUFFER_REMOTE_INVALIDATE_CMSG_TYPE,
&args, sizeof(args));
}
ssize_t smbdirect_kern_rdma_v1_writev(struct socket *sock,
const struct smbdirect_buffer_descriptors_v1 *remote,
size_t size,
struct iov_iter *iter);
ssize_t smbdirect_kern_rdma_v1_readv(struct socket *sock,
const struct smbdirect_buffer_descriptors_v1 *remote,
size_t size,
struct iov_iter *iter);
>> But note that is just the direction were it goes, that current code has a lot of resolved merge conflicts,
>> which may not work at all currently.
>>
>> Instead of putting my current code I try to take the existing client and server
>> code and merge it, so that we don't have a flag day commit that switches to
>> completely new code. Instead I try to do tiny steps in that direction
>> and may end with an interface that is similar but might be a bit different in
>> some parts.
> Okay.
>
>>
>>> He can just put these changes in his own queue and work on them.
>>> I am pointing out why he is trying to put unfinished things in the public queue.
>>
>> Because I want to base the next steps on something that is already accepted.
>>
>> I really don't want to work on it for weeks and then some review will void
>> that work completely and I can start again.
> It was too tiny a step and unclear.
> i.e. the patch description should not have comments like "It will be
> used in the next commits..."
What should it say if something is introduced but not yet used?
I mean I could explain in more detail how it will be used in
the next commits?
>>> If You want to apply it, Please do it only on cifs.ko. When it is
>>> properly implemented, I want to apply it to ksmbd.
>>
>> I can keep the ksmbd patches rebased on top and send them again
>> each time to get more feedback.
>>
>> Would that work for you?
> Okay, Please re-include the ksmbd patches in the next patch-set and I
> will check them.
>>
>> The key for me is discuss patches first and have them reviewed early
>> so that the following work rely on. Any the tiny steps should
>> make it possible to do easy review and make it possible to test each
>> tiny step.
> Okay. I agreed. But It should not be too tiny.
> As I said above, please don't send it in pieces that I can understand
> by looking at the next commits.
I'll try to keep them tiny they can always be squashed later,
but splitting them again would be a pain.
You can apply them and do a diff over multiple patches
and tell me which commits I should squash.
Thanks!
metze
-------------- next part --------------
A non-text attachment was scrubbed...
Name: transport_sock.c
Type: text/x-csrc
Size: 20708 bytes
Desc: not available
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20250603/dd78ff85/transport_sock.c>
More information about the samba-technical
mailing list