[linux-cifs-client] Async io support.

James Roper u3205097 at alumni.anu.edu.au
Sat Jun 11 03:49:27 GMT 2005


Jeremy,

I'll test my Linux cifs vfs async read pages patch with it.  
Unfortunately I don't actually have two linux boxes at the moment, so I 
can only connect to a samba server on localhost, but I may consider 
installing linux on my laptop again to test it.  Anyway, attached is my 
latest patch, which is diffed against the cifs vfs svn head at 
samba.org, if anyone else wants to test it.  To turn async routines on 
(should be on by default) echo 1 > /proc/fs/cifs/AsyncRoutinesEnabled.  
You can also set the maximum number of pages that the kernel will 
request at once, default is 32, in /proc/fs/cifs/ReadaheadMaxPages, I 
haven't noticed it to make a huge difference (increases performance by a 
percent if you set it at 64).  Also, this patch only requests 16KB per 
read, increasing this will change things.  I'm yet to integrate it with 
Steve's work on large writes.

James

Jeremy Allison wrote:

>Steve, Conrad & Eric,
>
>	I just finished adding aync io read/write support into Samba 3.x HEAD
>for Volker (who has a specific need for it). However, it turns out
>that for everyday use with Windows clients it hurts, not helps :-(.
>
>This is because it seems they don't asynchronously send reads/writes but
>wait for each reply before sending another. Bummer :-(. I suppose it might
>help with terminal server clients - so long as they do async read/write
>calls (hmmm. not sure if they do). Another reason is that the aio calls on
>Linux are emulated with posix threads :-(.
>
>However I was wondering if this would help the CIFSFS, HPUX or MacOS X clients ?
>
>Do you currently do async read/writes ? If so, can you test against HEAD
>to see if it has any effect on performance ?
>
>Jeremy.
>_______________________________________________
>linux-cifs-client mailing list
>linux-cifs-client at lists.samba.org
>https://lists.samba.org/mailman/listinfo/linux-cifs-client
>  
>

-------------- next part --------------
Index: cifssmb.c
===================================================================
--- cifssmb.c	(revision 27)
+++ cifssmb.c	(working copy)
@@ -791,6 +791,172 @@
 	return rc;
 }
 
+int
+CIFSSMBReadAsync(const int xid, struct cifsTconInfo *tcon,
+	    const int netfid, struct list_head *async_q)
+{
+	int rc = -EACCES;
+	READ_REQ *pSMB = NULL;
+	READ_RSP *pSMBr = NULL;
+	int sent=0, received=0;
+	int response_obtained, request_obtained; 
+	/* Whether this iteration of the loop has sent/received a response */
+	struct async_rw_q_entry *response;
+	struct async_rw_q_entry *request;
+	int timeout = 0;
+
+	request = list_entry(async_q->next, struct async_rw_q_entry, qhead);
+	/* Loop until every request has been sent and received */
+	while (&request->qhead != async_q || received != sent) {
+		request_obtained = 0;
+		/* If we haven't sent everything, send a request.  Sending up to the
+		 * maximum number of requests allowed is ok for samba, as requests
+		 * just sit in the tcp buffer, in fact is probably most efficient
+		 * (according to tridge), and windows is probably is similar.  We do
+		 * however want to be friendly to other processes, so we won't have
+		 * more than one request waiting in the request_q */
+		if (&request->qhead != async_q) {	
+		/* Join request_q. We only want to block if this is the only request
+		 * from us in flight, otherwise if we want to handle responses */
+			spin_lock(&GlobalMid_Lock);
+			while(1) {
+				if(atomic_read(&tcon->ses->server->inFlight) >= CIFS_MAX_REQ) {
+					spin_unlock(&GlobalMid_Lock);
+					if (sent == received) {
+						wait_event(tcon->ses->server->request_q,
+							atomic_read(&tcon->ses->server->inFlight) 
+							< CIFS_MAX_REQ);
+						spin_lock(&GlobalMid_Lock);
+					} else {
+						break;
+					}	
+				} else {
+					if(tcon->ses->server->tcpStatus == CifsExiting) {
+						spin_unlock(&GlobalMid_Lock);
+						rc = -ENOENT;
+						goto cifs_read_async_cleanup;
+					}
+
+					/* update # of requests on the wire to server */
+					atomic_inc(&tcon->ses->server->inFlight);
+					spin_unlock(&GlobalMid_Lock);
+					request_obtained = 1;
+					break;
+				}
+			}
+		}
+		if (request_obtained)
+		{
+			request->state = ASYNC_IN_FLIGHT;
+			request->nbytes = 0;
+			rc = smb_init(SMB_COM_READ_ANDX, 12, tcon, (void **) &pSMB,
+				(void **) &pSMBr);
+			if (rc)
+				goto cifs_read_async_cleanup;
+			request->buf = (char *) pSMBr;
+			/* tcon and ses pointer are checked in smb_init */
+			if (tcon->ses->server == NULL) {
+				rc = -ECONNABORTED;
+				goto cifs_read_async_cleanup;
+			}
+			pSMB->AndXCommand = 0xFF;	/* none */
+			pSMB->Fid = netfid;
+			pSMB->OffsetLow = cpu_to_le32(request->lseek & 0xFFFFFFFF);
+			pSMB->OffsetHigh = cpu_to_le32(request->lseek >> 32);
+			pSMB->Remaining = 0;
+			pSMB->MaxCount = cpu_to_le16(request->count);
+			pSMB->MaxCountHigh = 0;
+			pSMB->ByteCount = 0;  /* no need to do le conversion since it is 0 */
+
+			rc = SendMsg(xid, tcon->ses, (struct smb_hdr *) pSMB, 
+					&request->midQ, 0);
+			if (rc) {
+				cERROR(1, ("Send error in read = %d", rc));
+				request->state = ASYNC_ALLOCATED;
+				goto cifs_read_async_cleanup;
+			}
+			request->state = ASYNC_SENT;
+			sent++;
+			/* Iterate to next request in async_q */
+			request = list_entry(request->qhead.next, struct async_rw_q_entry, qhead);
+				
+		}
+		response_obtained = 0;
+		/* Iterate through request_q, handling any receieved responses */
+		list_for_each_entry(response, async_q, qhead) {
+			if (!(response->state & ASYNC_SENT)) continue;
+			if (!(response->midQ->midState & MID_RESPONSE_RECEIVED)) continue;
+			response_obtained++;
+			pSMBr = (READ_RSP *) response->buf;
+			rc = ReceiveMsg(xid, tcon->ses, (struct smb_hdr *) pSMBr, 
+					&response->nbytes, response->midQ);
+			if (rc)
+				goto cifs_read_async_cleanup;
+			
+			/* Delete midQ and decrement number of requests on wire */
+			DeleteMidQEntry(response->midQ);
+			atomic_dec(&tcon->ses->server->inFlight);
+			wake_up(&tcon->ses->server->request_q);
+			
+			received++;
+			response->state = ASYNC_HANDLED;
+			
+			pSMBr->DataLength = le16_to_cpu(pSMBr->DataLength);
+			response->nbytes = pSMBr->DataLength;
+			/*check that DataLength would not go beyond end of SMB */
+			if ((response->nbytes > CIFS_MAX_MSGSIZE) 
+					|| (response->nbytes > response->count)) {
+				cFYI(1,("bad length %d for count %d",
+						response->nbytes,response->count));
+				rc = -EIO;
+				response->nbytes = 0;
+				goto cifs_read_async_cleanup;
+			}
+		}
+		/* Timing out is done like so:
+		 * The timeout is 15 seconds.  Everytime we either recieve a response
+		 * or send a request, we reset timeout to 15 seconds.  If we don't
+		 * send or receive, then we want to timeout, using schedule_timeout,
+		 * which will decrement timeout.  Once timeout reaches 0, we cleanup
+		 * and exit with an error */
+		if (response_obtained == 0 && 
+				request_obtained == 0 && !signal_pending(current)) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			timeout = schedule_timeout(timeout);
+			if (!timeout) {
+				/* If we reach 0 timeout without receiving a signal, cleanup */
+				if (tcon->ses->server->tcpStatus == CifsExiting)
+					rc = -EHOSTDOWN;
+				else if (tcon->ses->server->tcpStatus == CifsNeedReconnect)
+					rc = -EAGAIN;
+				else rc = -EIO;
+				goto cifs_read_async_cleanup;
+			} 
+		} else {
+			/* Reset timeout to 15 seconds */
+			timeout = 15 * HZ;
+		}
+	}
+
+	return rc;
+	
+	
+cifs_read_async_cleanup:
+	list_for_each_entry(request, async_q, qhead) {
+		if (request->state & ASYNC_SENT)
+			/* means there's a midQ entry */
+			DeleteMidQEntry(request->midQ);
+		if (request->state & (ASYNC_SENT | ASYNC_IN_FLIGHT)) {
+			/* Get out of request_q */
+			atomic_dec(&tcon->ses->server->inFlight);
+			wake_up(&tcon->ses->server->request_q);
+		}
+		/* Reset state */
+		request->state = ASYNC_ALLOCATED;
+	}
+	return rc;
+}
+
 /* If no buffer passed in, then caller wants to do the copy
 	as in the case of readpages so the SMB buffer must be
 	freed by the caller */
Index: cifs_debug.c
===================================================================
--- cifs_debug.c	(revision 27)
+++ cifs_debug.c	(working copy)
@@ -230,6 +230,11 @@
 			midCount.counter);
 	length += item_length;
 	buf += item_length;
+	item_length = 
+		sprintf(buf,"Peak Simultaneous Operations: %d\n",
+			peakMids.counter);
+	length += item_length;
+	buf += item_length;
 	item_length = sprintf(buf,
 		"\n%d session %d share reconnects\n",
 		tcpSesReconnectCount.counter,tconInfoReconnectCount.counter);
@@ -326,6 +331,10 @@
 static write_proc_t quotaEnabled_write;
 static read_proc_t linuxExtensionsEnabled_read;
 static write_proc_t linuxExtensionsEnabled_write;
+static read_proc_t async_routines_enabled_read;
+static write_proc_t async_routines_enabled_write;
+static read_proc_t readahead_max_pages_read;
+static write_proc_t readahead_max_pages_write;
 
 void
 cifs_proc_init(void)
@@ -399,6 +408,17 @@
 				packet_signing_enabled_read, NULL);
 	if (pde)
 		pde->write_proc = packet_signing_enabled_write;
+
+	pde =
+		create_proc_read_entry("AsyncRoutinesEnabled", 0, proc_fs_cifs,
+				async_routines_enabled_read, NULL);
+	if (pde)
+		pde->write_proc = async_routines_enabled_write;
+	pde =
+		create_proc_read_entry("ReadaheadMaxPages", 0, proc_fs_cifs,
+				readahead_max_pages_read, NULL);
+	if (pde)
+		pde->write_proc = readahead_max_pages_write;
 }
 
 void
@@ -421,6 +441,8 @@
 	remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs);
 	remove_proc_entry("ReenableOldCifsReaddirCode",proc_fs_cifs);
 	remove_proc_entry("LookupCacheEnabled",proc_fs_cifs);
+	remove_proc_entry("AsyncRoutinesEnabled",proc_fs_cifs);
+	remove_proc_entry("ReadaheadMaxPages",proc_fs_cifs);
 	remove_proc_entry("cifs", proc_root_fs);
 }
 
@@ -818,5 +840,82 @@
 	return count;
 }
 
+static int
+async_routines_enabled_read(char *page, char **start, off_t off,
+		       int count, int *eof, void *data)
+{
+	int len;
 
+	len = sprintf(page, "%d\n", async_routines_enabled);
+
+	len -= off;
+	*start = page + off;
+
+	if (len > count)
+		len = count;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+static int
+async_routines_enabled_write(struct file *file, const char __user *buffer,
+			unsigned long count, void *data)
+{
+	char c;
+	int rc;
+
+	rc = get_user(c, buffer);
+	if (rc)
+		return rc;
+	if (c == '0' || c == 'n' || c == 'N')
+		async_routines_enabled = 0;
+	else if (c == '1' || c == 'y' || c == 'Y')
+		async_routines_enabled = 1;
+
+	return count;
+}
+
+static int
+readahead_max_pages_read(char *page, char **start, off_t off,
+		       int count, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(page, "%d\n", readahead_max_pages);
+
+	len -= off;
+	*start = page + off;
+
+	if (len > count)
+		len = count;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+static int
+readahead_max_pages_write(struct file *file, const char __user *buffer,
+			unsigned long count, void *data)
+{
+	char str[4];
+	int rc;
+	int i;
+
+	rc = copy_from_user(str, buffer, 4);
+	if (rc < 0)
+		return rc;
+	rc = sscanf(str, "%d", &i); 
+	
+	if (rc)
+		readahead_max_pages = i;
+	
+	return count;
+}
 #endif
Index: inode.c
===================================================================
--- inode.c	(revision 27)
+++ inode.c	(working copy)
@@ -167,6 +167,7 @@
 			else
 				inode->i_fop = &cifs_file_ops;
 			inode->i_data.a_ops = &cifs_addr_ops;
+			inode->i_data.backing_dev_info = &cifs_sb->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
 			cFYI(1, (" Directory inode"));
 			inode->i_op = &cifs_dir_inode_ops;
Index: cifs_fs_sb.h
===================================================================
--- cifs_fs_sb.h	(revision 27)
+++ cifs_fs_sb.h	(working copy)
@@ -25,6 +25,8 @@
 #define CIFS_MOUNT_NO_XATTR  0x10 /* if set - disable xattr support */
 #define CIFS_MOUNT_MAP_SPECIAL_CHR 0x20 /* remap illegal chars in filenames */
 
+#include <linux/backing-dev.h>
+
 struct cifs_sb_info {
 	struct cifsTconInfo *tcon;	/* primary mount */
 	struct list_head nested_tcon_q;
@@ -36,5 +38,6 @@
 	mode_t	mnt_file_mode;
 	mode_t	mnt_dir_mode;
 	int     mnt_cifs_flags;
+    struct backing_dev_info backing_dev_info;
 };
 #endif				/* _CIFS_FS_SB_H */
Index: cifsproto.h
===================================================================
--- cifsproto.h	(revision 27)
+++ cifsproto.h	(working copy)
@@ -47,6 +47,14 @@
 			struct smb_hdr * /* input */ ,
 			struct smb_hdr * /* out */ ,
 			int * /* bytes returned */ , const int long_op);
+extern int ReceiveMsg(const unsigned int /* xid */, struct cifsSesInfo *,
+            struct smb_hdr * /* output */ , int * /*bytes returned */ ,
+            struct mid_q_entry *);
+extern int SendMsg(const unsigned int /* xid */, struct cifsSesInfo *,
+            struct smb_hdr * /* input */ , struct mid_q_entry **,
+            const int long_op);
+extern void DeleteMidQEntry(struct mid_q_entry *);
+
 extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid);
 extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length);
 extern int is_valid_oplock_break(struct smb_hdr *smb);
@@ -210,6 +218,8 @@
 extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
 			const int smb_file_id);
 
+extern int CIFSSMBReadAsync(const int xid, struct cifsTconInfo *tcon,
+            const int netfid, struct list_head *async_q);
 extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
 			const int netfid, unsigned int count,
 			const __u64 lseek, unsigned int *nbytes, char **buf);
Index: cifsfs.c
===================================================================
--- cifsfs.c	(revision 27)
+++ cifsfs.c	(working copy)
@@ -57,6 +57,8 @@
 unsigned int extended_security = 0;
 unsigned int ntlmv2_support = 0;
 unsigned int sign_CIFS_PDUs = 1;
+unsigned int async_routines_enabled = 1;
+unsigned int readahead_max_pages = 32;
 extern struct task_struct * oplockThread; /* remove sparse warning */
 struct task_struct * oplockThread = NULL;
 unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
@@ -232,6 +234,7 @@
 static kmem_cache_t *cifs_mid_cachep;
 kmem_cache_t *cifs_oplock_cachep;
 static kmem_cache_t *cifs_sm_req_cachep;
+kmem_cache_t *cifs_async_cachep;
 mempool_t *cifs_sm_req_poolp;
 mempool_t *cifs_req_poolp;
 mempool_t *cifs_mid_poolp;
@@ -751,7 +754,15 @@
 		mempool_destroy(cifs_mid_poolp);
 		return -ENOMEM;
 	}
-
+	cifs_async_cachep = kmem_cache_create("cifs_async_seq_ids",
+			    sizeof (struct async_rw_q_entry), 0,
+				SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (cifs_async_cachep == NULL) {
+		kmem_cache_destroy(cifs_mid_cachep);
+		kmem_cache_destroy(cifs_oplock_cachep);
+	    mempool_destroy(cifs_mid_poolp);
+		return -ENOMEM;
+		}
 	return 0;
 }
 
@@ -766,6 +777,10 @@
 	if (kmem_cache_destroy(cifs_oplock_cachep))
 		printk(KERN_WARNING
 		       "error not all oplock structures were freed\n");
+	if (kmem_cache_destroy(cifs_async_cachep))
+		printk(KERN_WARNING
+				"cifs_destroy_mids: error not all asyncs were freed\n");
+		
 }
 
 static int cifs_oplock_thread(void * dummyarg)
Index: file.c
===================================================================
--- file.c	(revision 27)
+++ file.c	(working copy)
@@ -35,6 +35,8 @@
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
 
+extern kmem_cache_t *cifs_async_cachep;
+
 static inline struct cifsFileInfo *cifs_init_private(
 	struct cifsFileInfo *private_data, struct inode *inode,
 	struct file *file, __u16 netfid)
@@ -1378,21 +1380,22 @@
 	return;
 }
 
-static int cifs_readpages(struct file *file, struct address_space *mapping,
-	struct list_head *page_list, unsigned num_pages)
+static inline int
+cifs_readpages_sync(struct file *file, struct address_space *mapping,
+		struct list_head *page_list, unsigned num_pages)
 {
 	int rc = -EACCES;
 	int xid;
 	loff_t offset;
-	struct page *page;
+	struct page * page;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
 	int bytes_read = 0;
 	unsigned int read_size,i;
-	char *smb_read_data = NULL;
-	struct smb_com_read_rsp *pSMBr;
+	char * smb_read_data = NULL;
+	struct smb_com_read_rsp * pSMBr;
 	struct pagevec lru_pvec;
-	struct cifsFileInfo *open_file;
+	struct cifsFileInfo * open_file;
 
 	xid = GetXid();
 	if (file->private_data == NULL) {
@@ -1405,46 +1408,44 @@
 
 	pagevec_init(&lru_pvec, 0);
 
-	for (i = 0; i < num_pages; ) {
+	for(i = 0;i<num_pages;) {
 		unsigned contig_pages;
-		struct page *tmp_page;
+		struct page * tmp_page;
 		unsigned long expected_index;
 
-		if (list_empty(page_list))
+		if(list_empty(page_list)) {
 			break;
-
+		}
 		page = list_entry(page_list->prev, struct page, lru);
 		offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 
 		/* count adjacent pages that we will read into */
 		contig_pages = 0;
-		expected_index = 
-			list_entry(page_list->prev, struct page, lru)->index;
+		expected_index = list_entry(page_list->prev,struct page,lru)->index;
 		list_for_each_entry_reverse(tmp_page,page_list,lru) {
-			if (tmp_page->index == expected_index) {
+			if(tmp_page->index == expected_index) {
 				contig_pages++;
 				expected_index++;
-			} else
+			} else {
 				break; 
+			}
 		}
-		if (contig_pages + i >  num_pages)
+		if(contig_pages + i >  num_pages) {
 			contig_pages = num_pages - i;
+		}
 
-		/* for reads over a certain size could initiate async
-		   read ahead */
+		/* for reads over a certain size could initiate async read ahead */
 
 		read_size = contig_pages * PAGE_CACHE_SIZE;
 		/* Read size needs to be in multiples of one page */
-		read_size = min_t(const unsigned int, read_size,
-				  cifs_sb->rsize & PAGE_CACHE_MASK);
+		read_size = min_t(const unsigned int,read_size,cifs_sb->rsize & PAGE_CACHE_MASK);
 
 		rc = -EAGAIN;
-		while (rc == -EAGAIN) {
-			if ((open_file->invalidHandle) && 
-			    (!open_file->closePend)) {
+		while(rc == -EAGAIN) {
+			if ((open_file->invalidHandle) && (!open_file->closePend)) {
 				rc = cifs_reopen_file(file->f_dentry->d_inode,
 					file, TRUE);
-				if (rc != 0)
+				if(rc != 0)
 					break;
 			}
 
@@ -1453,19 +1454,18 @@
 				read_size, offset,
 				&bytes_read, &smb_read_data);
 			/* BB need to check return code here */
-			if (rc== -EAGAIN) {
-				if (smb_read_data) {
+			if(rc== -EAGAIN) {
+				if(smb_read_data) {
 					cifs_buf_release(smb_read_data);
 					smb_read_data = NULL;
 				}
 			}
 		}
 		if ((rc < 0) || (smb_read_data == NULL)) {
-			cFYI(1, ("Read error in readpages: %d", rc));
+			cFYI(1,("Read error in readpages: %d",rc));
 			/* clean up remaing pages off list */
 			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
+				page = list_entry(page_list->prev, struct page, lru);
 				list_del(&page->lru);
 				page_cache_release(page);
 			}
@@ -1475,65 +1475,257 @@
 			cifs_copy_cache_pages(mapping, page_list, bytes_read,
 				smb_read_data + 4 /* RFC1001 hdr */ +
 				le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
-
-			i +=  bytes_read >> PAGE_CACHE_SHIFT;
+           i +=  bytes_read >> PAGE_CACHE_SHIFT;
 #ifdef CONFIG_CIFS_STATS
-			atomic_inc(&pTcon->num_reads);
-			spin_lock(&pTcon->stat_lock);
-			pTcon->bytes_read += bytes_read;
-			spin_unlock(&pTcon->stat_lock);
+            atomic_inc(&pTcon->num_reads);
+            spin_lock(&pTcon->stat_lock);
+            pTcon->bytes_read += bytes_read;
+            spin_unlock(&pTcon->stat_lock);
 #endif
-			if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
-				i++; /* account for partial page */
+            if((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
+                cFYI(1,("Partial page %d of %d read to cache",i++,num_pages));
 
-				/* server copy of file can have smaller size 
-				   than client */
-				/* BB do we need to verify this common case ? 
-				   this case is ok - if we are at server EOF 
-				   we will hit it on next read */
+                i++; /* account for partial page */
 
-			/* while (!list_empty(page_list) && (i < num_pages)) {
-					page = list_entry(page_list->prev, 
-							  struct page, list);
-					list_del(&page->list);
-					page_cache_release(page);
-				}
-				break; */
-			}
-		} else {
-			cFYI(1, ("No bytes read (%d) at offset %lld . "
-				 "Cleaning remaining pages from readahead list",
-				 bytes_read, offset));
-			/* BB turn off caching and do new lookup on 
-			   file size at server? */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
+                /* server copy of file can have smaller size than client */
+                /* BB do we need to verify this common case ? this case is ok -
+                if we are at server EOF we will hit it on next read */
 
-				/* BB removeme - replace with zero of page? */
-				page_cache_release(page);
-			}
-			break;
-		}
-		if (smb_read_data) {
-			cifs_buf_release(smb_read_data);
-			smb_read_data = NULL;
-		}
-		bytes_read = 0;
-	}
+            /* while(!list_empty(page_list) && (i < num_pages)) {
+                    page = list_entry(page_list->prev,struct page, list);
+                    list_del(&page->list);
+                    page_cache_release(page);
+                }
+                break; */
+            }
+        } else {
+            cFYI(1,("No bytes read (%d) at offset %lld . Cleaning remaining pages from readahead list",bytes_read,offset));
+            /* BB turn off caching and do new lookup on file size at server? */
+            while (!list_empty(page_list) && (i < num_pages)) {
+                page = list_entry(page_list->prev, struct page, lru);
+                list_del(&page->lru);
+                page_cache_release(page); /* BB removeme - replace with zero ofpage? */
+            }
+            break;
+        }
+        if(smb_read_data) {
+            cifs_buf_release(smb_read_data);
+            smb_read_data = NULL;
+        }
+        bytes_read = 0;
+    }
 
-	pagevec_lru_add(&lru_pvec);
+    pagevec_lru_add(&lru_pvec);
 
 /* need to free smb_read_data buf before exit */
-	if (smb_read_data) {
-		cifs_buf_release(smb_read_data);
-		smb_read_data = NULL;
-	} 
+    if(smb_read_data) {
+        cifs_buf_release(smb_read_data);
+        smb_read_data = NULL;
+    }
 
-	FreeXid(xid);
+    FreeXid(xid);
+    return rc;
+}
+
+static inline int
+cifs_readpages_async(struct file *file, struct address_space *mapping,
+        struct list_head *page_list, unsigned num_pages)
+{
+
+	int rc = -EACCES;
+	int xid;
+	struct page * page;
+   struct cifs_sb_info *cifs_sb;
+    struct cifsTconInfo *pTcon;
+    unsigned int i, max_pages;
+    struct smb_com_read_rsp * pSMBr;
+    struct pagevec lru_pvec;
+    struct cifsFileInfo * open_file;
+    struct list_head async_rw_q;
+    struct async_rw_q_entry *temp;
+    struct list_head *tmp_page_list;
+
+    xid = GetXid();
+    if (file->private_data == NULL) {
+        FreeXid(xid);
+        return -EBADF;
+    }
+    open_file = (struct cifsFileInfo *)file->private_data;
+    cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+    pTcon = cifs_sb->tcon;
+
+    pagevec_init(&lru_pvec, 0);
+    /* tmp_page_list is used to iterate through page_list without modifying
+     * page_list.  Could be a better way of doing this... */
+    tmp_page_list = page_list;
+
+    /* The maximum number of pages that we will request */
+    max_pages = (cifs_sb->rsize & PAGE_CACHE_MASK) >> PAGE_CACHE_SHIFT;
+
+    INIT_LIST_HEAD(&async_rw_q);
+
+    for(i = 0;i<num_pages;) {
+        unsigned contig_pages;
+        unsigned long expected_index;
+
+        if(list_empty(page_list)) {
+            break;
+        }
+
+        /* Allocate memory for async request */
+        temp = (struct async_rw_q_entry *) kmem_cache_alloc(cifs_async_cachep,
+                SLAB_KERNEL);
+
+        if (temp == NULL) {
+            cFYI(1, ("Unable to obtain async_q_entry"));
+            while (!list_empty(page_list) && (i < num_pages)) {
+                page = list_entry(page_list->prev, struct page, lru);
+                list_del(&page->lru);
+                page_cache_release(page);
+            }
+            FreeXid(xid);
+            return rc;
+        }
+        list_add_tail(&temp->qhead, &async_rw_q);
+
+        temp->buf = NULL;
+        temp->midQ = NULL;
+        temp->nbytes = 0;
+        temp->state = ASYNC_ALLOCATED;
+        page = list_entry(tmp_page_list->prev, struct page, lru);
+        temp->lseek = (loff_t)page->index << PAGE_CACHE_SHIFT;
+
+
+        /* count adjacent pages that we will read into */
+        contig_pages = 0;
+        expected_index =
+            list_entry(tmp_page_list->prev,struct page,lru)->index;
+        while (list_entry(tmp_page_list->prev, struct page, lru)->index
+                == expected_index && contig_pages < max_pages) {
+            tmp_page_list = tmp_page_list->prev;
+            contig_pages++;
+            expected_index++;
+        }
+        if(contig_pages + i >  num_pages) {
+            contig_pages = num_pages - i;
+        }
+
+        i += contig_pages;
+
+        temp->count = contig_pages * PAGE_CACHE_SIZE;
+    }
+
+    rc = -EAGAIN;
+    while(rc == -EAGAIN) {
+        if ((open_file->invalidHandle) && (!open_file->closePend)) {
+            rc = cifs_reopen_file(file->f_dentry->d_inode,
+                file, TRUE);
+            if(rc != 0)
+                break;
+        }
+        rc = CIFSSMBReadAsync(xid, pTcon,
+                open_file->netfid, &async_rw_q);
+        if(rc== -EAGAIN) {
+            list_for_each_entry(temp, &async_rw_q, qhead) {
+                /* There may be some buffers, release them */
+                if (temp->buf != NULL) {
+                    cifs_buf_release(temp->buf);
+                    temp->buf = NULL;
+                }
+            }
+        }
+    }
+
+    i = 0;
+    /* Handle each response */
+    list_for_each_entry(temp, &async_rw_q, qhead) {
+        if(list_empty(page_list)) {
+            break;
+        }
+        if ((rc < 0) || (temp->buf == NULL)) {
+            cFYI(1,("Read error in readpages: %d",rc));
+            /* clean up remaing pages off list */
+            while (!list_empty(page_list) && (i < num_pages)) {
+                page = list_entry(page_list->prev, struct page, lru);
+                list_del(&page->lru);
+                page_cache_release(page);
+            }
+            break;
+        } else if (temp->nbytes > 0) {
+            pSMBr = (struct smb_com_read_rsp *)temp->buf;
+            cifs_copy_cache_pages(mapping, page_list, temp->nbytes,
+                temp->buf + 4 /* RFC1001 hdr */ +
+                le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
+
+            i += temp->nbytes >> PAGE_CACHE_SHIFT;
+#ifdef CONFIG_CIFS_STATS
+            atomic_inc(&pTcon->num_reads);
+            spin_lock(&pTcon->stat_lock);
+            pTcon->bytes_read += temp->nbytes;
+            spin_unlock(&pTcon->stat_lock);
+#endif
+            if((int)(temp->nbytes & PAGE_CACHE_MASK) != temp->nbytes) {
+                cFYI(1,("Partial page %d of %d read to cache",++i,num_pages));
+
+                i++; /* account for partial page */
+
+            }
+        } else {
+            cFYI(1,("No bytes read (%d) at offset %lld . Cleaning remaining pages from readahead list",temp->nbytes,temp->lseek));
+            /* BB turn off caching and do new lookup on file size at server? */
+            while (!list_empty(page_list) && (i < num_pages)) {
+                page = list_entry(page_list->prev, struct page, lru);
+                list_del(&page->lru);
+                page_cache_release(page); /* BB removeme - replace with zero of
+page? */
+            }
+            break;
+        }
+        if(temp->buf) {
+            cifs_buf_release(temp->buf);
+            /* Important, if there is no allocated memory, it MUST be NULL,
+             * otherwise we will end up trying to free it twice */
+            temp->buf = NULL;
+        }
+    }
+
+    pagevec_lru_add(&lru_pvec);
+
+/* need to free smb buffers before exit */
+
+    while (!list_empty(&async_rw_q)) {
+        temp = list_entry(async_rw_q.prev, struct async_rw_q_entry, qhead);
+        if(temp->buf) {
+            cifs_buf_release(temp->buf);
+            temp->buf = NULL;
+        }
+        list_del(&temp->qhead);
+        kmem_cache_free(cifs_async_cachep, temp);
+    }
+    /* Delete the rest of the pages */
+    while (!list_empty(page_list) && (i < num_pages)) {
+        page = list_entry(page_list->prev, struct page, lru);
+        list_del(&page->lru);
+        page_cache_release(page);
+    }
+    FreeXid(xid);
+    return rc;
+}
+
+static int
+cifs_readpages(struct file *file, struct address_space *mapping,
+        struct list_head *page_list, unsigned num_pages)
+{
+	int rc;
+	/* Call sync or async based on async_routines_enabled */
+	if (async_routines_enabled) {
+		rc = cifs_readpages_async(file, mapping, page_list, num_pages);
+	} else {
+		rc = cifs_readpages_sync(file, mapping, page_list, num_pages);
+	}
 	return rc;
 }
+		
 
 static int cifs_readpage_worker(struct file *file, struct page *page,
 	loff_t *poffset)
Index: connect.c
===================================================================
--- connect.c	(revision 27)
+++ connect.c	(working copy)
@@ -1024,6 +1024,10 @@
 			}
 		} else if (strnicmp(data, "noac", 4) == 0) {
 			printk(KERN_WARNING "CIFS: Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
+		} else if (strnicmp(data, "async", 5) == 0) {
+			async_routines_enabled = 1;
+		} else if (strnicmp(data, "noasync", 7) == 0) {
+			async_routines_enabled = 0;
 		} else
 			printk(KERN_WARNING "CIFS: Unknown mount option %s\n",data);
 	}
@@ -1629,6 +1633,9 @@
 			cifs_sb->rsize = PAGE_CACHE_SIZE;
 			cERROR(1,("Attempt to set readsize for mount to less than one page (4096)"));
 		}
+		/* Set the maximum read ahead pages */
+		cifs_sb->backing_dev_info.ra_pages = readahead_max_pages;
+			
 		cifs_sb->mnt_uid = volume_info.linux_uid;
 		cifs_sb->mnt_gid = volume_info.linux_gid;
 		cifs_sb->mnt_file_mode = volume_info.file_mode;
Index: transport.c
===================================================================
--- transport.c	(revision 27)
+++ transport.c	(working copy)
@@ -66,12 +66,18 @@
 	spin_lock(&GlobalMid_Lock);
 	list_add_tail(&temp->qhead, &ses->server->pending_mid_q);
 	atomic_inc(&midCount);
+#ifdef CONFIG_CIFS_STATS
+	/* FIXME: Is this thread safe?  Do we really need atomic_set, atomic_read
+	 * etc? */
+	if (atomic_read(&midCount) > atomic_read(&peakMids))
+		atomic_set(&peakMids, midCount.counter);
+#endif
 	temp->midState = MID_REQUEST_ALLOCATED;
 	spin_unlock(&GlobalMid_Lock);
 	return temp;
 }
 
-static void
+void
 DeleteMidQEntry(struct mid_q_entry *midEntry)
 {
 	spin_lock(&GlobalMid_Lock);
@@ -382,59 +388,18 @@
 
 #endif /* CIFS_EXPERIMENTAL */
 
-int
-SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
-	    struct smb_hdr *in_buf, struct smb_hdr *out_buf,
-	    int *pbytes_returned, const int long_op)
+/**
+ * Send a message.
+ * @param xid ??
+ * @param ses Session information
+ * @param in_buf Message buffer containing message to be sent
+ * */
+int SendMsg(const unsigned int xid, struct cifsSesInfo *ses,
+                struct smb_hdr *in_buf, struct mid_q_entry **midQout,
+				const int long_op)
 {
-	int rc = 0;
-	unsigned int receive_len;
-	unsigned long timeout;
-	struct mid_q_entry *midQ;
-
-	if (ses == NULL) {
-		cERROR(1,("Null smb session"));
-		return -EIO;
-	}
-	if(ses->server == NULL) {
-		cERROR(1,("Null tcp session"));
-		return -EIO;
-	}
-
-	/* Ensure that we do not send more than 50 overlapping requests 
-	   to the same server. We may make this configurable later or
-	   use ses->maxReq */
-	if(long_op == -1) {
-		/* oplock breaks must not be held up */
-		atomic_inc(&ses->server->inFlight);
-	} else {
-		spin_lock(&GlobalMid_Lock); 
-		while(1) {        
-			if(atomic_read(&ses->server->inFlight) >= 
-					cifs_max_pending){
-				spin_unlock(&GlobalMid_Lock);
-				wait_event(ses->server->request_q,
-					atomic_read(&ses->server->inFlight)
-					 < cifs_max_pending);
-				spin_lock(&GlobalMid_Lock);
-			} else {
-				if(ses->server->tcpStatus == CifsExiting) {
-					spin_unlock(&GlobalMid_Lock);
-					return -ENOENT;
-				}
-
-			/* can not count locking commands against total since
-			   they are allowed to block on server */
-					
-				if(long_op < 3) {
-				/* update # of requests on the wire to server */
-					atomic_inc(&ses->server->inFlight);
-				}
-				spin_unlock(&GlobalMid_Lock);
-				break;
-			}
-		}
-	}
+    int rc = 0;
+    struct mid_q_entry *midQ;
 	/* make sure that we sign in the same order that we send on this socket 
 	   and avoid races inside tcp sendmsg code that could cause corruption
 	   of smb data */
@@ -488,49 +453,37 @@
 		      (struct sockaddr *) &(ses->server->addr.sockAddr));
 	if(rc < 0) {
 		DeleteMidQEntry(midQ);
+		goto out_unlock;
+	} else {
 		up(&ses->server->tcpSem);
-		/* If not lock req, update # of requests on wire to server */
-		if(long_op < 3) {
-			atomic_dec(&ses->server->inFlight); 
-			wake_up(&ses->server->request_q);
-		}
+		*midQout = midQ;
 		return rc;
-	} else
-		up(&ses->server->tcpSem);
-	if (long_op == -1)
-		goto cifs_no_response_exit;
-	else if (long_op == 2) /* writes past end of file can take loong time */
-		timeout = 300 * HZ;
-	else if (long_op == 1)
-		timeout = 45 * HZ; /* should be greater than 
-			servers oplock break timeout (about 43 seconds) */
-	else if (long_op > 2) {
-		timeout = MAX_SCHEDULE_TIMEOUT;
-	} else
-		timeout = 15 * HZ;
-	/* wait for 15 seconds or until woken up due to response arriving or 
-	   due to last connection to this server being unmounted */
-	if (signal_pending(current)) {
-		/* if signal pending do not hold up user for full smb timeout
-		but we still give response a change to complete */
-		timeout = 2 * HZ;
-	}   
-
-	/* No user interrupts in wait - wreaks havoc with performance */
-	if(timeout != MAX_SCHEDULE_TIMEOUT) {
-		timeout += jiffies;
-		wait_event(ses->server->response_q,
-			(!(midQ->midState & MID_REQUEST_SUBMITTED)) || 
-			time_after(jiffies, timeout) || 
-			((ses->server->tcpStatus != CifsGood) &&
-			 (ses->server->tcpStatus != CifsNew)));
-	} else {
-		wait_event(ses->server->response_q,
-			(!(midQ->midState & MID_REQUEST_SUBMITTED)) || 
-			((ses->server->tcpStatus != CifsGood) &&
-			 (ses->server->tcpStatus != CifsNew)));
 	}
+out_unlock:
+	up(&ses->server->tcpSem);
+	/* If not lock req, update # of requests on wire to server */
+	if(long_op < 3) {
+		atomic_dec(&ses->server->inFlight); 
+		wake_up(&ses->server->request_q);
+	}
 
+	return rc;
+}
+
+/**
+ * Receive a message.
+ * @param xid process id
+ * @param ses Session information
+ * @param out_buf Message buffer to return received message
+ * @param pbytes_returned number of bytes returned in the buffer
+ */
+int ReceiveMsg(const unsigned int xid, struct cifsSesInfo *ses,
+        struct smb_hdr *out_buf, int *pbytes_returned,
+        struct mid_q_entry *midQ)
+{
+    int rc = 0;
+    unsigned int receive_len;
+
 	spin_lock(&GlobalMid_Lock);
 	if (midQ->resp_buf) {
 		spin_unlock(&GlobalMid_Lock);
@@ -555,17 +508,10 @@
 			}
 		}
 		spin_unlock(&GlobalMid_Lock);
-		DeleteMidQEntry(midQ);
-		/* If not lock req, update # of requests on wire to server */
-		if(long_op < 3) {
-			atomic_dec(&ses->server->inFlight); 
-			wake_up(&ses->server->request_q);
-		}
 		return rc;
 	}
-  
-	if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
-		cERROR(1, ("Frame too large received.  Length: %d  Xid: %d",
+  	if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
+cERROR(1, ("Frame too large received.  Length: %d  Xid: %d",
 			receive_len, xid));
 		rc = -EIO;
 	} else {		/* rcvd frame is ok */
@@ -586,8 +532,8 @@
 						ses->server->mac_signing_key,
 						midQ->sequence_number+1);
 				if(rc) {
-					cERROR(1,("Unexpected SMB signature"));
-					/* BB FIXME add code to kill session */
+					cERROR(1,("Unexpected packet signature received from server"));
+					/* BB FIXME - add code to kill session here */
 				}
 			}
 
@@ -607,19 +553,105 @@
 			cFYI(1,("Bad MID state? "));
 		}
 	}
-cifs_no_response_exit:
-	DeleteMidQEntry(midQ);
+	return rc;
+}
 
-	if(long_op < 3) {
-		atomic_dec(&ses->server->inFlight); 
-		wake_up(&ses->server->request_q);
+int
+SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
+	    struct smb_hdr *in_buf, struct smb_hdr *out_buf,
+	    int *pbytes_returned, const int long_op)
+{
+	int rc = 0;
+	unsigned long timeout;
+	struct mid_q_entry *midQ;
+
+	if (ses == NULL) {
+		cERROR(1,("Null smb session"));
+		return -EIO;
 	}
+	if(ses->server == NULL) {
+		cERROR(1,("Null tcp session"));
+		return -EIO;
+	}
 
-	return rc;
+	/* Ensure that we do not send more than 50 overlapping requests 
+	   to the same server. We may make this configurable later or
+	   use ses->maxReq */
+	if(long_op == -1) {
+		/* oplock breaks must not be held up */
+		atomic_inc(&ses->server->inFlight);
+	} else {
+		spin_lock(&GlobalMid_Lock); 
+		while(1) {        
+			if(atomic_read(&ses->server->inFlight) >= CIFS_MAX_REQ){
+				spin_unlock(&GlobalMid_Lock);
+				wait_event(ses->server->request_q,
+					atomic_read(&ses->server->inFlight)
+					 < CIFS_MAX_REQ);
+				spin_lock(&GlobalMid_Lock);
+			} else {
+				if(ses->server->tcpStatus == CifsExiting) {
+					spin_unlock(&GlobalMid_Lock);
+					return -ENOENT;
+				}
 
-out_unlock:
-	up(&ses->server->tcpSem);
-	/* If not lock req, update # of requests on wire to server */
+			/* can not count locking commands against total since
+			   they are allowed to block on server */
+					
+				if(long_op < 3) {
+				/* update # of requests on the wire to server */
+					atomic_inc(&ses->server->inFlight);
+				}
+				spin_unlock(&GlobalMid_Lock);
+				break;
+			}
+		}
+	}
+
+    rc = SendMsg(xid, ses, in_buf, &midQ, 0);
+	if (rc)
+		return rc;
+
+	if (long_op == -1)
+		goto cifs_no_response_exit;
+	else if (long_op == 2) /* writes past end of file can take looooong time */
+		timeout = 300 * HZ;
+	else if (long_op == 1)
+		timeout = 45 * HZ; /* should be greater than 
+			servers oplock break timeout (about 43 seconds) */
+	else if (long_op > 2) {
+		timeout = MAX_SCHEDULE_TIMEOUT;
+	} else
+		timeout = 15 * HZ;
+	/* wait for 15 seconds or until woken up due to response arriving or 
+	   due to last connection to this server being unmounted */
+	if (signal_pending(current)) {
+		/* if signal pending do not hold up user for full smb timeout
+		but we still give response a change to complete */
+		timeout = 2 * HZ;
+		
+	}   
+
+	/* No user interrupts in wait - wreaks havoc with performance */
+	if(timeout != MAX_SCHEDULE_TIMEOUT) {
+		timeout += jiffies;
+		wait_event(ses->server->response_q,
+			(midQ->midState & MID_RESPONSE_RECEIVED) || 
+			time_after(jiffies, timeout) || 
+			((ses->server->tcpStatus != CifsGood) &&
+			 (ses->server->tcpStatus != CifsNew)));
+	} else {
+		wait_event(ses->server->response_q,
+			(midQ->midState & MID_RESPONSE_RECEIVED) || 
+			((ses->server->tcpStatus != CifsGood) &&
+			 (ses->server->tcpStatus != CifsNew)));
+	}
+
+    rc = ReceiveMsg(xid, ses, out_buf, pbytes_returned, midQ);
+
+cifs_no_response_exit:
+	DeleteMidQEntry(midQ);
+
 	if(long_op < 3) {
 		atomic_dec(&ses->server->inFlight); 
 		wake_up(&ses->server->request_q);
Index: cifsglob.h
===================================================================
--- cifsglob.h	(revision 27)
+++ cifsglob.h	(working copy)
@@ -313,17 +313,31 @@
 	__u16 mid;		/* multiplex id */
 	__u16 pid;		/* process id */
 	__u32 sequence_number;  /* for CIFS signing */
+	__u16 command;		/* smb command code */
 	struct timeval when_sent;	/* time when smb sent */
 	struct cifsSesInfo *ses;	/* smb was sent to this server */
 	struct task_struct *tsk;	/* task waiting for response */
 	struct smb_hdr *resp_buf;	/* response buffer */
 	int midState;	/* wish this were enum but can not pass to wait_event */
-	__u8 command;	/* smb command code */
-	unsigned multiPart:1;	/* multiple responses to one SMB request */
-	unsigned largeBuf:1;    /* if valid response, is pointer to large buf */
-	unsigned multiResp:1;   /* multiple trans2 responses for one request  */
 };
 
+/* one of these for every read request sent by a read pages
+ * routine */
+struct async_rw_q_entry {
+    struct list_head qhead; /* other entries from this command */
+    struct mid_q_entry * midQ;   /* the mid for this request */
+    char *buf; /* response buffer */
+    unsigned int count; /* number of bytes to read */
+    __u64 lseek;    /* offset */
+    unsigned int nbytes;   /* number of bytes read */
+    int state;   /* sequence number for multiplexing */
+};
+
+#define ASYNC_ALLOCATED 1
+#define ASYNC_SENT  2
+#define ASYNC_HANDLED  4
+#define ASYNC_IN_FLIGHT 8
+
 struct oplock_q_entry {
 	struct list_head qhead;
 	struct inode * pinode;
@@ -422,6 +436,7 @@
 GLOBAL_EXTERN atomic_t bufAllocCount;
 GLOBAL_EXTERN atomic_t smBufAllocCount;      
 GLOBAL_EXTERN atomic_t midCount;
+GLOBAL_EXTERN atomic_t peakMids;
 
 /* Misc globals */
 GLOBAL_EXTERN unsigned int multiuser_mount;	/* if enabled allows new sessions
@@ -436,6 +451,9 @@
 GLOBAL_EXTERN unsigned int ntlmv2_support;  /* better optional password hash */
 GLOBAL_EXTERN unsigned int sign_CIFS_PDUs;  /* enable smb packet signing */
 GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
+GLOBAL_EXTERN unsigned int async_routines_enabled; /* enable reading/writing
+                                                      asynchronously */
+GLOBAL_EXTERN unsigned int readahead_max_pages; /* max pages to read at once */
 GLOBAL_EXTERN unsigned int CIFSMaxBufSize;  /* max size not including hdr */
 GLOBAL_EXTERN unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
 GLOBAL_EXTERN unsigned int cifs_min_small;  /* min size of small buf pool */


More information about the linux-cifs-client mailing list