[linux-cifs-client] Re: [PATCH] do not attempt to close cifs files which are already closed due to session reconnect

Jeff Layton jlayton at redhat.com
Thu Nov 20 13:02:41 GMT 2008


On Wed, 19 Nov 2008 23:24:47 -0600
"Steve French" <smfrench at gmail.com> wrote:

> On Wed, Nov 19, 2008 at 6:04 AM, Jeff Layton <jlayton at redhat.com> wrote:
> > On Tue, 18 Nov 2008 21:46:59 -0600
> > "Steve French" <smfrench at gmail.com> wrote:
> >
> >> In hunting down why we could get EBADF returned on close in some cases
> >> after reconnect, I found out that cifs_close was checking to see if
> >> the share (mounted server export) was valid (didn't need reconnect due
> >> to session crash/timeout) but we weren't checking if the handle was
> >> valid (ie the share was reconnected, but the file handle was not
> >> reopened yet).  It also adds some locking around the updates/checks of
> >> the cifs_file->invalidHandle flag
> >>
> 
> >
> > Do we need a lock around this check for invalidHandle? Could this race
> > with mark_open_files_invalid()?
> The attached patch may reduce the window of opportunity for the
> race you describe.   Do you think we need another flag?  (one
> to keep requests other than a write retry from using this
> handle, and one to prevent reopen when the handle is about to be closed
> after we have given up on write retries getting through?
> 


So that I make sure I understand the problem...

We have a file that is getting ready to be closed (closePend is set),
but the tcon has been reconnected and the filehandle is now invalid.
You only want to reopen the file in order to flush data out of the
cache, but only if there are actually dirty pages to be flushed.

If closePend is set then the userspace filehandle is already dead? No
further pages can be dirtied, right?

Rather than a new flag, I suggest checking for whether there are dirty
pages attached to the inode. If so, then we'll want to reopen the file
and flush it before finally closing it.

Or am I missing something here?

> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index 6449e1a..6ccb9c7 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -489,14 +489,13 @@ int cifs_close(struct inode *inode, struct file *file)
>  	if (pSMBFile) {
>  		struct cifsLockInfo *li, *tmp;
> 
> +		write_lock(&GlobalSMBSeslock);
>  		pSMBFile->closePend = true;
> +		write_unlock(&GlobalSMBSeslock);
>  		if (pTcon) {
> -			/* no sense reconnecting to close a file that is
> -			   already closed */
> -			if (!pTcon->need_reconnect) {
> -				timeout = 2;
> -				while ((atomic_read(&pSMBFile->wrtPending) != 0)
> -					&& (timeout <= 2048)) {
> +			timeout = 2;
> +			while ((atomic_read(&pSMBFile->wrtPending) != 0)
> +				&& (timeout <= 2048)) {
>  					/* Give write a better chance to get to
>  					server ahead of the close.  We do not
>  					want to add a wait_q here as it would
> @@ -504,19 +503,28 @@ int cifs_close(struct inode *inode, struct file *file)
>  					the struct would be in each open file,
>  					but this should give enough time to
>  					clear the socket */
> -					cFYI(DBG2,
> -						("close delay, write pending"));
> -					msleep(timeout);
> -					timeout *= 4;
> -				}
> -				if (atomic_read(&pSMBFile->wrtPending))
> -					cERROR(1,
> -						("close with pending writes"));
> -				rc = CIFSSMBClose(xid, pTcon,
> -						  pSMBFile->netfid);
> +				cFYI(DBG2, ("close delay, write pending"));
> +				msleep(timeout);
> +				timeout *= 4;
>  			}
> +			if (atomic_read(&pSMBFile->wrtPending))
> +				cERROR(1, ("close with pending writes"));
> +
> +			/* no sense reconnecting to close a file that is
> +			   already closed */
> +			write_lock(&GlobalSMBSeslock);
> +			if (!pTcon->need_reconnect &&
> +			    !pSMBFile->invalidHandle) {
> +				write_unlock(&GlobalSMBSeslock);
> +				rc = CIFSSMBClose(xid, pTcon, pSMBFile->netfid);
> +			} else
> +				write_unlock(&GlobalSMBSeslock);
>  		}
> 
> +		write_lock(&GlobalSMBSeslock);
> +		list_del(&pSMBFile->flist);
> +		list_del(&pSMBFile->tlist);
> +		write_unlock(&GlobalSMBSeslock);
>  		/* Delete any outstanding lock records.
>  		   We'll lose them when the file is closed anyway. */
>  		mutex_lock(&pSMBFile->lock_mutex);
> @@ -525,11 +533,6 @@ int cifs_close(struct inode *inode, struct file *file)
>  			kfree(li);
>  		}
>  		mutex_unlock(&pSMBFile->lock_mutex);
> -
> -		write_lock(&GlobalSMBSeslock);
> -		list_del(&pSMBFile->flist);
> -		list_del(&pSMBFile->tlist);
> -		write_unlock(&GlobalSMBSeslock);
>  		timeout = 10;
>  		/* We waited above to give the SMBWrite a chance to issue
>  		   on the wire (so we do not get SMBWrite returning EBADF
> @@ -587,15 +590,18 @@ int cifs_closedir(struct inode *inode, struct file *file)
>  		pTcon = cifs_sb->tcon;
> 
>  		cFYI(1, ("Freeing private data in close dir"));
> +		write_lock(&GlobalSMBSeslock);
>  		if (!pCFileStruct->srch_inf.endOfSearch &&
>  		    !pCFileStruct->invalidHandle) {
>  			pCFileStruct->invalidHandle = true;
> +			write_unlock(&GlobalSMBSeslock);
>  			rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
>  			cFYI(1, ("Closing uncompleted readdir with rc %d",
>  				 rc));
>  			/* not much we can do if it fails anyway, ignore rc */
>  			rc = 0;
> -		}
> +		} else
> +			write_unlock(&GlobalSMBSeslock);
>  		ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
>  		if (ptmp) {
>  			cFYI(1, ("closedir free smb buf in srch struct"));
> diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
> index addd1dc..9ee3f68 100644
> --- a/fs/cifs/misc.c
> +++ b/fs/cifs/misc.c
> @@ -555,12 +555,14 @@ is_valid_oplock_break(struct smb_hdr *buf,
> struct TCP_Server_Info *srv)
>  				continue;
> 
>  			cifs_stats_inc(&tcon->num_oplock_brks);
> +			write_lock(&GlobalSMBSeslock);
>  			list_for_each(tmp2, &tcon->openFileList) {
>  				netfile = list_entry(tmp2, struct cifsFileInfo,
>  						     tlist);
>  				if (pSMB->Fid != netfile->netfid)
>  					continue;
> 
> +				write_unlock(&GlobalSMBSeslock);
>  				read_unlock(&cifs_tcp_ses_lock);
>  				cFYI(1, ("file id match, oplock break"));
>  				pCifsInode = CIFS_I(netfile->pInode);
> @@ -576,6 +578,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct
> TCP_Server_Info *srv)
> 
>  				return true;
>  			}
> +			write_unlock(&GlobalSMBSeslock);
>  			read_unlock(&cifs_tcp_ses_lock);
>  			cFYI(1, ("No matching file for oplock break"));
>  			return true;
> diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
> index 58d5729..9f51f9b 100644
> --- a/fs/cifs/readdir.c
> +++ b/fs/cifs/readdir.c
> @@ -741,11 +741,14 @@ static int find_cifs_entry(const int xid, struct
> cifsTconInfo *pTcon,
>  	   (index_to_find < first_entry_in_buffer)) {
>  		/* close and restart search */
>  		cFYI(1, ("search backing up - close and restart search"));
> +		write_lock(&GlobalSMBSeslock);
>  		if (!cifsFile->srch_inf.endOfSearch &&
>  		    !cifsFile->invalidHandle) {
>  			cifsFile->invalidHandle = true;
> +			write_unlock(&GlobalSMBSeslock);
>  			CIFSFindClose(xid, pTcon, cifsFile->netfid);
> -		}
> +		} else
> +			write_unlock(&GlobalSMBSeslock);
>  		if (cifsFile->srch_inf.ntwrk_buf_start) {
>  			cFYI(1, ("freeing SMB ff cache buf on search rewind"));
>  			if (cifsFile->srch_inf.smallBuf)
> 
> > Also, initiate_cifs_search() allocates a cifsFileInfo struct and then
> > sets invalidHandle to true. Is there a possible race between those
> > operations? It may be safe, but it might be nice to comment why that
> > is. In hindsight it might have been better to invert this flag (i.e.
> > validHandle) so that it would be false immediately after kzalloc()
> > until it is set true...
> Reversing the flag might be cleared but in the case you describe
> setting invalidHandle shouldn't race because the only place it
> is checked in the readdir related path would be on close of
> the directory and we a race with close and readdir with search
> rewind is unlikely to be an issue.
> 
> 
> -- 
> Thanks,
> 
> Steve


-- 
Jeff Layton <jlayton at redhat.com>


More information about the linux-cifs-client mailing list