Using sendfile to improve performance

Ephi Dror edror at panasas.com
Fri Aug 3 22:33:10 GMT 2001


Hi All,

In FreeBSD and most likely other UNIX versions, the kernel implementation of
sendfile() is with zero copy. Meaning that data can be sent from an fd to a
socket without the need to copy the data to user land (read op) and writing
the data back to kernel (write op).

I have modified reply.c code to use it for all smb read commands and am
trying to see if it makes any performance improvement. (See readX  example
below)

Does anyone out there has tried it or has any thoughts about it.

All comments are highly appreciated.

Cheers,
Ephi.

Example of one of the changes I made:
=============================

/***************************************************************************
*
  reply to a read and X
****************************************************************************
/
int reply_read_and_X(connection_struct *conn, char *inbuf,char *outbuf,int
length,int bufsize)
{

  /*added to support sendfile start */
  int outsize;
  off_t sbytes;
  struct sf_hdtr	smb_sf_hdtr;
  struct iovec smb_hdr[1];
  int smb_com2;
 /*added to support sendfile start */

  files_struct *fsp = file_fsp(inbuf,smb_vwv2);
  SMB_OFF_T startpos = IVAL(inbuf,smb_vwv3);
  size_t smb_maxcnt = SVAL(inbuf,smb_vwv5);
  size_t smb_mincnt = SVAL(inbuf,smb_vwv6);
  ssize_t nread = -1;
  char *data;
  START_PROFILE(SMBreadX);

  DEBUG(4,("Ephi Debug: In reply_read_and_X  case\n")); /*@@@*/

  /* If it's an IPC, pass off the pipe handler. */
  if (IS_IPC(conn)) {
    END_PROFILE(SMBreadX);
    return reply_pipe_read_and_X(inbuf,outbuf,length,bufsize);
  }

  CHECK_FSP(fsp,conn);
  CHECK_READ(fsp);
  CHECK_ERROR(fsp);

  set_message(outbuf,12,0,True);
  data = smb_buf(outbuf);

  if(CVAL(inbuf,smb_wct) == 12) {
#ifdef LARGE_SMB_OFF_T
    /*
     * This is a large offset (64 bit) read.
     */
    startpos |= (((SMB_OFF_T)IVAL(inbuf,smb_vwv10)) << 32);

#else /* !LARGE_SMB_OFF_T */

    /*
     * Ensure we haven't been sent a >32 bit offset.
     */

    if(IVAL(inbuf,smb_vwv10) != 0) {
      DEBUG(0,("reply_read_and_X - large offset (%x << 32) used and we don't
support \
64 bit offsets.\n", (unsigned int)IVAL(inbuf,smb_vwv10) ));
      END_PROFILE(SMBreadX);
      return(ERROR(ERRDOS,ERRbadaccess));
    }

#endif /* LARGE_SMB_OFF_T */

  }

  if (is_locked(fsp,conn,(SMB_BIG_UINT)smb_maxcnt,(SMB_BIG_UINT)startpos,
READ_LOCK)) {
    END_PROFILE(SMBreadX);
    return(ERROR(ERRDOS,ERRlock));
  }

  /*  added sendfile support */
 /*+++++++++++++++++++*/
  smb_com2 = CVAL(inbuf,smb_vwv0);
  if (lp_use_sendfile()  && (smb_com2 == 0xFF))
  {
     /* here we use sendfile and we know we don't need to chain */
     CVAL(outbuf, smb_vwv0) = 0xFF;
    SSVAL(outbuf,smb_vwv5,smb_maxcnt);
    SSVAL(outbuf,smb_vwv6,smb_offset(data,outbuf));
    SSVAL(smb_buf(outbuf),-2,smb_maxcnt);
    outsize = chain_reply(inbuf,outbuf,length,bufsize);
    smb_setlen(outbuf, outsize - 4);

     smb_hdr[0].iov_base = outbuf;
     smb_hdr[0].iov_len = data - outbuf;

     smb_sf_hdtr.headers =  smb_hdr;
     smb_sf_hdtr.hdr_cnt  = 1;
	 smb_sf_hdtr.trailers = 0;
     smb_sf_hdtr.trl_cnt  = 0;

     if (sendfile(fsp->fd, smbd_server_fd(), startpos, smb_maxcnt +
smb_hdr[0].iov_len ,
	     &smb_sf_hdtr, &sbytes, 0) < 0)
    {
	DEBUG(4,("sendfile:  sent %d bytes, errno=%d\n",(int)sbytes, errno));
	DEBUG(4, ( "readX fnum=%d min=%d max=%d outsize=%d\n",
	 fsp->fnum, (int)smb_mincnt, (int)smb_maxcnt, (int)outsize ) );
    }

    END_PROFILE(SMBreadX);
    return -1;

  } else
  {
             /* normal case as before */
	  nread = read_file(fsp,data,startpos,smb_maxcnt);

	  if (nread < 0) {
		END_PROFILE(SMBreadX);
		return(UNIXERROR(ERRDOS,ERRnoaccess));
	  }

	  SSVAL(outbuf,smb_vwv5,nread);
	  SSVAL(outbuf,smb_vwv6,smb_offset(data,outbuf));
	  SSVAL(smb_buf(outbuf),-2,nread);

	  DEBUG( 3, ( "readX fnum=%d min=%d max=%d nread=%d\n",
			  fsp->fnum, (int)smb_mincnt, (int)smb_maxcnt, (int)nread ) );

	  END_PROFILE(SMBreadX);

	  return chain_reply(inbuf,outbuf,length,bufsize);
  }

}





More information about the samba-technical mailing list