[linux-cifs-client] [patch] cifs_readpages() asynchronous support

Kirill Kuvaldin kuvkir at epsmu.com
Sat Jun 23 13:19:42 GMT 2007


From: Kirill Kuvaldin <kirill.kuvaldin at gmail.com>

Add asynchronous support for cifs_readpages().
It works by sending multiple SMB Read Requests simultaneously and waiting
for multiple responses.

This implementation uses the generic read_cache_pages() routine
(from mm/readahead.c), which enables us to avoid working directly with the
LRU cache. It also introduces several new helper routines for managing
readpages private data.

Please note that it is work in progess and shouldn't be used for production ;)
The patch might be applied against v2.6.22-rc5 linux kernel source tree.

Signed-off-by: Kirill Kuvaldin <kirill.kuvaldin at gmail.com>

---
 fs/cifs/cifsglob.h  |   25 +++++++
 fs/cifs/cifsproto.h |    5 +-
 fs/cifs/cifssmb.c   |   17 +++++-
 fs/cifs/connect.c   |    2 +-
 fs/cifs/file.c      |  186 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/cifs/inode.c     |    2 +-
 fs/cifs/transport.c |   77 +++++++++++++++++++++-
 7 files changed, 306 insertions(+), 8 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 23655de..803d047 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -387,6 +387,19 @@ static inline void cifs_stats_bytes_read(struct cifsTconInfo *tcon,
 
 #endif
 
+/* FIXME remove this debug stuff eventually */
+#if 0
+#define dprintk(fmt, args...)	\
+	printk("DBG at %s:%d: "fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define dprintk(fmt, args...)	do { } while (0)
+#endif
+
+#define CIFS_NEW_READPAGES	/* use new async ->readpages() */
+
+#define CIFS_READPAGES_MAX	32 /* gives us 32*4k = 128k (just as a maximum
+				      for CIFSMaxBufSize) */
+
 /* one of these for every pending CIFS request to the server */
 struct mid_q_entry {
 	struct list_head qhead;	/* mids waiting on reply from this server */
@@ -406,6 +419,18 @@ struct mid_q_entry {
 	unsigned largeBuf:1;    /* if valid response, is pointer to large buf */
 	unsigned multiRsp:1;   /* multiple trans2 responses for one request  */
 	unsigned multiEnd:1; /* both received */
+	struct page *pages[CIFS_READPAGES_MAX]; /* pages for ->readpages() */
+	unsigned nr_pages;	/* number of pages in the request */
+	struct list_head midq_entry; /* mids linked into midq_list */
+};
+
+/* ->readpages() private data */
+struct cifs_readpages_data {
+	struct file *file;
+	struct page *pages[CIFS_READPAGES_MAX];
+	unsigned nr_pages;
+	int xid;
+	struct list_head midq_list;
 };
 
 struct oplock_q_entry {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 5d163e2..16d4acf 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -48,6 +48,8 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
 			struct smb_hdr * /* input */ ,
 			struct smb_hdr * /* out */ ,
 			int * /* bytes returned */ , const int long_op);
+extern int Send(const unsigned int xid, struct cifsSesInfo *ses, 
+	     struct kvec *iov, int n_vec, struct cifs_readpages_data *data);
 extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
 			struct kvec *, int /* nvec to send */, 
 			int * /* type of buf returned */ , const int long_op);
@@ -76,6 +78,7 @@ extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
 			     const int stage, 
 			     const struct nls_table *nls_cp);
 extern __u16 GetNextMid(struct TCP_Server_Info *server);
+extern void DeleteMidQEntry(struct mid_q_entry *);
 extern struct oplock_q_entry * AllocOplockQEntry(struct inode *, u16, 
 						 struct cifsTconInfo *);
 extern void DeleteOplockQEntry(struct oplock_q_entry *);
@@ -255,7 +258,7 @@ extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
 extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
                         const int netfid, unsigned int count,
                         const __u64 lseek, unsigned int *nbytes, char **buf,
-			int * return_buf_type);
+			int * return_buf_type, int async_flag);
 extern int CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
 			const int netfid, const unsigned int count,
 			const __u64 lseek, unsigned int *nbytes,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 57419a1..39afdab 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1288,7 +1288,7 @@ int
 CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
             const int netfid, const unsigned int count,
             const __u64 lseek, unsigned int *nbytes, char **buf,
-	    int * pbuf_type)
+	    int * pbuf_type, int async_flag)
 {
 	int rc = -EACCES;
 	READ_REQ *pSMB = NULL;
@@ -1335,6 +1335,21 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
 
 	iov[0].iov_base = (char *)pSMB;
 	iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
+
+	if (async_flag) {
+		/* FIXME
+		 * We treat the buf argument as a cifs_readpages data there.
+		 * Yeah, it may seem confusing, but I haven't come up with
+		 * anything better yet.
+		 */
+		struct cifs_readpages_data *data = 
+			(struct cifs_readpages_data *) *buf;
+		rc = Send(xid, tcon->ses, iov, 1, data);
+		cifs_stats_inc(&tcon->num_reads);
+		/* return immediately */
+		return rc;
+	}
+
 	rc = SendReceive2(xid, tcon->ses, iov, 
 			  1 /* num iovecs */,
 			  &resp_buf_type, 0); 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f4e9266..b589938 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -452,7 +452,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 		with the most common, zero, as regular data */
 		temp = *((char *) smb_buffer);
 
-		/* Note that FC 1001 length is big endian on the wire, 
+		/* Note that RFC 1001 length is big endian on the wire, 
 		but we convert it here so it is always manipulated
 		as host byte order */
 		pdu_length = ntohl(smb_buffer->smb_buf_length);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 94d5b49..4f4d3d1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1524,7 +1524,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
 					 open_file->netfid,
 					 current_read_size, *poffset,
 					 &bytes_read, &smb_read_data,
-					 &buf_type);
+					 &buf_type, 0);
 			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
 			if (smb_read_data) {
 				if (copy_to_user(current_offset,
@@ -1610,7 +1610,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 					 open_file->netfid,
 					 current_read_size, *poffset,
 					 &bytes_read, &current_offset,
-					 &buf_type);
+					 &buf_type, 0);
 		}
 		if (rc || (bytes_read == 0)) {
 			if (total_read) {
@@ -1645,6 +1645,184 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	return rc;
 }
 
+#ifdef CIFS_NEW_READPAGES
+
+static int cifs_send_readpages(struct cifs_readpages_data *data)
+{
+	int rc;
+	int bogus_nbytes; /* FIXME CIFSSMBRead expects non-null nbytes arg */
+	struct cifsTconInfo *pTcon;
+	struct cifs_sb_info *cifs_sb;
+	struct cifsFileInfo *open_file;
+	unsigned int read_size;
+	loff_t offset;
+
+	open_file = (struct cifsFileInfo *)data->file->private_data;
+	cifs_sb = CIFS_SB(data->file->f_path.dentry->d_sb);
+	pTcon = cifs_sb->tcon;
+	read_size = data->nr_pages * PAGE_CACHE_SIZE;
+	offset = data->pages[0]->index << PAGE_CACHE_SHIFT;
+
+	rc = CIFSSMBRead(data->xid, pTcon, open_file->netfid, read_size,
+			offset, &bogus_nbytes, (char **)&data, 
+			NULL, 1 /* async flag */);
+	return rc;
+}
+
+/**
+ * is_response_received - check whether one or more responses came in
+ *
+ * The routine inspects the state of each of midQ entries that we wait for.
+ * Returns 1 if there is an entry in the data->midq_list that has changed 
+ * its state from MID_REQUEST_SUBMITTED.
+ * Returns 0, otherwise.
+ */
+static inline int is_response_received(struct cifs_readpages_data *data,
+		struct mid_q_entry **midQ_recv)
+{
+	struct mid_q_entry *midQ = NULL;
+
+	list_for_each_entry_reverse(midQ, &data->midq_list, midq_entry) {
+		dprintk("INFO: MIDQ mid=%d, state=%d\n",
+				midQ->mid, midQ->midState);
+		if (midQ->midState != MID_REQUEST_SUBMITTED) {
+			*midQ_recv = midQ;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int wait_for_multiple_responses(struct cifsSesInfo *ses,
+		struct cifs_readpages_data *data)
+{
+	int timeout = 30 * HZ; /* FIXME: is that value good enough? */
+	int ret;
+	struct mid_q_entry *midQ = NULL;
+	READ_RSP *pSMBr = NULL;
+	int bytes_read;
+	char *dst, *src;
+	int i;
+
+	dprintk("NOTICE: wait_for_multiple_responses: in\n");
+	
+	while (!list_empty(&data->midq_list)) {
+		ret = wait_event_timeout(ses->server->response_q,
+				is_response_received(data, &midQ),
+				timeout);
+		if (ret == 0)
+			goto timeout;
+
+		if (midQ == NULL || midQ->resp_buf == NULL || 
+				midQ->midState != MID_RESPONSE_RECEIVED) {
+			dprintk("WARNING: bad mid state\n");
+			goto out;
+		}
+		dprintk("NOTICE: GOT MIDQ: mid=%d\n",
+				midQ->mid);
+		pSMBr = (READ_RSP *)midQ->resp_buf;
+		bytes_read = (le16_to_cpu(pSMBr->DataLengthHigh) << 16) +
+			le16_to_cpu(pSMBr->DataLength);
+		if (bytes_read > midQ->nr_pages * PAGE_CACHE_SIZE) {
+			dprintk("WARNING: response too big, bytes_read = %d\n",
+					bytes_read);
+			goto out;
+		}
+		i = 0;
+		src = (char *)pSMBr + 4 + le16_to_cpu(pSMBr->DataOffset);
+		while (bytes_read > 0) {
+			struct page *page = midQ->pages[i];
+			dst = kmap_atomic(page, KM_USER0);
+			if (PAGE_CACHE_SIZE > bytes_read) {
+				memcpy(dst, src, bytes_read);
+				/* zero the tail end of this partial page */
+				memset(dst + bytes_read, 0,
+						PAGE_CACHE_SIZE - bytes_read);
+				bytes_read = 0;
+			} else {
+				memcpy(dst, src, PAGE_CACHE_SIZE);
+				bytes_read -= PAGE_CACHE_SIZE;
+			}
+			kunmap_atomic(dst, KM_USER0);
+			flush_dcache_page(page);
+			SetPageUptodate(page);
+			unlock_page(page);
+			i++;
+			src += PAGE_CACHE_SIZE;
+		}
+out:
+		list_del(&midQ->midq_entry);
+		DeleteMidQEntry(midQ);
+		atomic_dec(&ses->server->inFlight);
+		wake_up(&ses->server->request_q);
+	}
+
+	return 0;
+
+timeout:
+	/* FIXME: How do we handle that ? */
+	dprintk("WARNING: wait_for_multiple_responses: timeout occurred\n");
+	return -EIO;
+}
+
+static int cifs_readpages_filler(void *_data, struct page *page)
+{
+	struct cifs_readpages_data *data = _data;
+	struct cifs_sb_info *cifs_sb;
+	int rc;
+
+	cifs_sb = CIFS_SB(data->file->f_path.dentry->d_sb);
+
+	if (data->nr_pages > 0 &&
+		(data->nr_pages == CIFS_READPAGES_MAX ||
+		 (data->nr_pages + 1) * PAGE_CACHE_SIZE > cifs_sb->rsize ||
+		 data->pages[data->nr_pages - 1]->index + 1 != page->index)) {
+		rc = cifs_send_readpages(data);
+		data->nr_pages = 0;
+		if (rc)
+			return rc;
+	}
+	data->pages[data->nr_pages] = page;
+	data->nr_pages++;
+	return 0;
+}
+
+static int cifs_readpages(struct file *file, struct address_space *mapping,
+	struct list_head *pages, unsigned nr_pages)
+{
+	int rc = -EACCES;
+	int xid;
+	struct cifs_readpages_data data;
+	struct cifsTconInfo *pTcon;
+	struct cifs_sb_info *cifs_sb;
+
+	dprintk("cifs_readpages: file=%p, nr_pages = %d\n", file, nr_pages);
+	xid = GetXid();
+	if (file->private_data == NULL) {
+		rc = -EBADF;
+		goto out;
+	}
+
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	pTcon = cifs_sb->tcon;
+	INIT_LIST_HEAD(&data.midq_list);
+	data.xid = xid;
+	data.file = file;
+	data.nr_pages = 0;
+
+	rc = read_cache_pages(mapping, pages, cifs_readpages_filler, &data);
+	if (!rc && data.nr_pages > 0) {
+		rc = cifs_send_readpages(&data);
+	}
+
+	rc = wait_for_multiple_responses(pTcon->ses, &data);
+
+out:
+	FreeXid(xid);
+	return rc;
+}
+
+#else
 
 static void cifs_copy_cache_pages(struct address_space *mapping, 
 	struct list_head *pages, int bytes_read, char *data,
@@ -1767,7 +1945,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 					 open_file->netfid,
 					 read_size, offset,
 					 &bytes_read, &smb_read_data,
-					 &buf_type);
+					 &buf_type, 0);
 			/* BB more RC checks ? */
 			if (rc== -EAGAIN) {
 				if (smb_read_data) {
@@ -1835,6 +2013,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 	return rc;
 }
 
+#endif
+
 static int cifs_readpage_worker(struct file *file, struct page *page,
 	loff_t *poffset)
 {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f0ff12b..2d57243 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -247,7 +247,7 @@ static int decode_sfu_inode(struct inode * inode, __u64 size,
 		rc = CIFSSMBRead(xid, pTcon,
 			         netfid,
 				 24 /* length */, 0 /* offset */,
-				 &bytes_read, &pbuf, &buf_type);
+				 &bytes_read, &pbuf, &buf_type, 0);
 		if ((rc == 0) && (bytes_read >= 8)) {
 			if (memcmp("IntxBLK", pbuf, 8) == 0) {
 				cFYI(1,("Block device"));
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 5f46845..44f9bd5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -75,7 +75,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 	return temp;
 }
 
-static void
+void
 DeleteMidQEntry(struct mid_q_entry *midEntry)
 {
 #ifdef CONFIG_CIFS_STATS2
@@ -417,6 +417,81 @@ static int wait_for_response(struct cifsSesInfo *ses,
 }
 
 int
+Send(const unsigned int xid, struct cifsSesInfo *ses, 
+	     struct kvec *iov, int n_vec, struct cifs_readpages_data *data)
+{
+	int rc = 0;
+	struct mid_q_entry *midQ;
+	struct smb_hdr *in_buf = iov[0].iov_base;
+	int i;
+	
+	if ((ses == NULL) || (ses->server == NULL)) {
+		cifs_small_buf_release(in_buf);
+		cERROR(1,("Null session"));
+		return -EIO;
+	}
+
+	if(ses->server->tcpStatus == CifsExiting) {
+		cifs_small_buf_release(in_buf);
+		return -ENOENT;
+	}
+
+	/* Ensure that we do not send more than 50 overlapping requests 
+	   to the same server. We may make this configurable later or
+	   use ses->maxReq */
+
+	rc = wait_for_free_request(ses, 0);
+	if (rc) {
+		cifs_small_buf_release(in_buf);
+		return rc;
+	}
+
+	/* make sure that we sign in the same order that we send on this socket 
+	   and avoid races inside tcp sendmsg code that could cause corruption
+	   of smb data */
+
+	down(&ses->server->tcpSem); 
+
+	rc = allocate_mid(ses, in_buf, &midQ);
+	if (rc) {
+		up(&ses->server->tcpSem);
+		cifs_small_buf_release(in_buf);
+		/* Update # of requests on wire to server */
+		atomic_dec(&ses->server->inFlight); 
+		wake_up(&ses->server->request_q);
+		return rc;
+	}
+
+	for (i = 0; i < data->nr_pages; i++) {
+		midQ->pages[i] = data->pages[i];
+	}
+	midQ->nr_pages = data->nr_pages;
+	list_add_tail(&midQ->midq_entry, &data->midq_list);
+
+ 	rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
+
+	midQ->midState = MID_REQUEST_SUBMITTED;
+#ifdef CONFIG_CIFS_STATS2
+	atomic_inc(&ses->server->inSend);
+#endif
+	rc = smb_send2(ses->server->ssocket, iov, n_vec,
+		      (struct sockaddr *) &(ses->server->addr.sockAddr));
+#ifdef CONFIG_CIFS_STATS2
+	atomic_dec(&ses->server->inSend);
+	midQ->when_sent = jiffies;
+#endif
+
+	up(&ses->server->tcpSem);
+	cifs_small_buf_release(in_buf);
+
+	dprintk("NOTICE: MIDQ SENT: mid=%d, nr_pages=%d, inFlight=%d\n",
+			midQ->mid, midQ->nr_pages,
+			atomic_read(&ses->server->inFlight));
+
+	return rc;
+}
+
+int
 SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, 
 	     struct kvec *iov, int n_vec, int * pRespBufType /* ret */, 
 	     const int long_op)


More information about the linux-cifs-client mailing list