>From 4cff08e44b97e0a5ac103ce91cba45e0265bf367 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 19 Jun 2012 10:58:11 -0700 Subject: [PATCH 01/15] Add uint64_t mid field to the files_struct. Ensure it is initialized so we know what mid created this file. --- source3/include/vfs.h | 1 + source3/smbd/files.c | 1 + 2 files changed, 2 insertions(+) diff --git a/source3/include/vfs.h b/source3/include/vfs.h index e6a9ef4..b0d7393 100644 --- a/source3/include/vfs.h +++ b/source3/include/vfs.h @@ -197,6 +197,7 @@ typedef struct files_struct { struct timeval open_time; uint32 access_mask; /* NTCreateX access bits (FILE_READ_DATA etc.) */ uint32 share_access; /* NTCreateX share constants (FILE_SHARE_READ|FILE_SHARE_WRITE|FILE_SHARE_DELETE). */ + uint64_t mid; /* The mid of the operation that created us. */ bool update_write_time_triggered; struct timed_event *update_write_time_event; diff --git a/source3/smbd/files.c b/source3/smbd/files.c index bec157b..29229e3 100644 --- a/source3/smbd/files.c +++ b/source3/smbd/files.c @@ -174,6 +174,7 @@ NTSTATUS file_new(struct smb_request *req, connection_struct *conn, i, fsp_fnum_dbg(fsp), (unsigned int)sconn->num_files)); if (req != NULL) { + fsp->mid = req->mid; req->chain_fsp = fsp; } -- 1.7.9.5 >From 8c3985d1a8887d0f7cfe379301c79c7f92910665 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 12 Jun 2012 13:43:48 -0700 Subject: [PATCH 02/15] Fix defer_open() fuction in the open code path to cope with a NULL lck parameter. --- source3/smbd/open.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/source3/smbd/open.c b/source3/smbd/open.c index 26d6971..5389425 100644 --- a/source3/smbd/open.c +++ b/source3/smbd/open.c @@ -1355,23 +1355,24 @@ static void defer_open(struct share_mode_lock *lck, struct deferred_open_record *state) { struct server_id self = messaging_server_id(req->sconn->msg_ctx); - int i; /* Paranoia check */ - - for (i=0; idata->num_share_modes; i++) { - struct share_mode_entry *e = &lck->data->share_modes[i]; - - if (is_deferred_open_entry(e) && - procid_equal(&self, &e->pid) && - (e->op_mid == req->mid)) { - DEBUG(0, ("Trying to defer an already deferred " - "request: mid=%llu, exiting\n", - (unsigned long long)req->mid)); - exit_server("attempt to defer a deferred request"); + if (lck) { + int i; + + for (i=0; idata->num_share_modes; i++) { + struct share_mode_entry *e = &lck->data->share_modes[i]; + + if (is_deferred_open_entry(e) && + procid_equal(&self, &e->pid) && + (e->op_mid == req->mid)) { + DEBUG(0, ("Trying to defer an already deferred " + "request: mid=%llu, exiting\n", + (unsigned long long)req->mid)); + exit_server("attempt to defer a deferred request"); + } } } - /* End paranoia check */ DEBUG(10,("defer_open_sharing_error: time [%u.%06u] adding deferred " @@ -1384,7 +1385,9 @@ static void defer_open(struct share_mode_lock *lck, state->id, (char *)state, sizeof(*state))) { exit_server("push_deferred_open_message_smb failed"); } - add_deferred_open(lck, req->mid, request_time, self, state->id); + if (lck) { + add_deferred_open(lck, req->mid, request_time, self, state->id); + } } -- 1.7.9.5 >From 54a462e7fa2fdedd374681f0f20805e9e07963e2 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 12 Jun 2012 13:46:45 -0700 Subject: [PATCH 03/15] Ensure the 'struct deferred_open_record' is always correctly initialized to zeros before use. --- source3/smbd/open.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source3/smbd/open.c b/source3/smbd/open.c index 5389425..2ab828b 100644 --- a/source3/smbd/open.c +++ b/source3/smbd/open.c @@ -1529,6 +1529,7 @@ static void schedule_defer_open(struct share_mode_lock *lck, between a 30 second delay due to oplock break, and a 1 second delay for share mode conflicts. */ + ZERO_STRUCT(state); state.delayed_for_oplocks = True; state.id = lck->data->id; @@ -2163,6 +2164,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, between a 30 second delay due to oplock break, and a 1 second delay for share mode conflicts. */ + ZERO_STRUCT(state); state.delayed_for_oplocks = False; state.id = id; @@ -2304,6 +2306,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, if (!NT_STATUS_IS_OK(status)) { struct deferred_open_record state; + ZERO_STRUCT(state); state.delayed_for_oplocks = False; state.id = id; -- 1.7.9.5 >From 374ab21b498d48ca4c38abe5039a7b8e37322064 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 12 Jun 2012 13:54:08 -0700 Subject: [PATCH 04/15] Add new bool field async_open to struct deferred_open_record. Not used yet. --- source3/smbd/open.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source3/smbd/open.c b/source3/smbd/open.c index 2ab828b..e3deaf5 100644 --- a/source3/smbd/open.c +++ b/source3/smbd/open.c @@ -35,6 +35,7 @@ extern const struct generic_mapping file_generic_mapping; struct deferred_open_record { bool delayed_for_oplocks; + bool async_open; struct file_id id; }; @@ -1531,6 +1532,7 @@ static void schedule_defer_open(struct share_mode_lock *lck, ZERO_STRUCT(state); state.delayed_for_oplocks = True; + state.async_open = false; state.id = lck->data->id; if (!request_timed_out(request_time, timeout)) { @@ -2166,6 +2168,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, ZERO_STRUCT(state); state.delayed_for_oplocks = False; + state.async_open = false; state.id = id; if ((req != NULL) @@ -2308,6 +2311,7 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, ZERO_STRUCT(state); state.delayed_for_oplocks = False; + state.async_open = false; state.id = id; /* Do it all over again immediately. In the second -- 1.7.9.5 >From c8d14d42013f18214e5c120d856fb7564618081f Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 12 Jun 2012 14:10:13 -0700 Subject: [PATCH 05/15] The core of the open path changes to allow for async opens. If the SMB_VFS_OPEN() function returns -1, EINTR -> NT_STATUS_RETRY, then queue the open up to be completed when the async open completes. --- source3/smbd/open.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/source3/smbd/open.c b/source3/smbd/open.c index e3deaf5..2b1146b 100644 --- a/source3/smbd/open.c +++ b/source3/smbd/open.c @@ -1541,6 +1541,27 @@ static void schedule_defer_open(struct share_mode_lock *lck, } /**************************************************************************** + Reschedule an open call that went asynchronous. +****************************************************************************/ + +static void schedule_async_open(struct timeval request_time, + struct smb_request *req) +{ + struct deferred_open_record state; + struct timeval timeout; + + timeout = timeval_set(20, 0); + + ZERO_STRUCT(state); + state.delayed_for_oplocks = false; + state.async_open = true; + + if (!request_timed_out(request_time, timeout)) { + defer_open(NULL, request_time, timeout, req, &state); + } +} + +/**************************************************************************** Work out what access_mask to use from what the client sent us. ****************************************************************************/ @@ -1772,10 +1793,17 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, request with this mid. We'll use it later to see if this has timed out. */ - /* Remove the deferred open entry under lock. */ - remove_deferred_open_entry( - state->id, req->mid, - messaging_server_id(req->sconn->msg_ctx)); + /* If it was an async create retry, the file + didn't exist. */ + if (state->async_open) { + SET_STAT_INVALID(smb_fname->st); + file_existed = false; + } else { + /* Remove the deferred open entry under lock. */ + remove_deferred_open_entry( + state->id, req->mid, + messaging_server_id(req->sconn->msg_ctx)); + } /* Ensure we don't reprocess this message. */ remove_deferred_open_message_smb(req->sconn, req->mid); @@ -2223,6 +2251,9 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, open_access_mask); if (!NT_STATUS_IS_OK(fsp_open)) { + if (NT_STATUS_EQUAL(fsp_open, NT_STATUS_RETRY)) { + schedule_async_open(request_time, req); + } TALLOC_FREE(lck); return fsp_open; } -- 1.7.9.5 >From 76a782818141967205eaee9f80862c2af3e7474e Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 12 Jun 2012 14:15:14 -0700 Subject: [PATCH 06/15] Make schedule_deferred_open_message_smb2() return an indication of success. --- source3/smbd/globals.h | 2 +- source3/smbd/smb2_create.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/source3/smbd/globals.h b/source3/smbd/globals.h index 8113b8e..7c1d796 100644 --- a/source3/smbd/globals.h +++ b/source3/smbd/globals.h @@ -311,7 +311,7 @@ bool open_was_deferred_smb2(struct smbd_server_connection *sconn, uint64_t mid); void remove_deferred_open_message_smb2( struct smbd_server_connection *sconn, uint64_t mid); -void schedule_deferred_open_message_smb2( +bool schedule_deferred_open_message_smb2( struct smbd_server_connection *sconn, uint64_t mid); bool push_deferred_open_message_smb2(struct smbd_smb2_request *smb2req, struct timeval request_time, diff --git a/source3/smbd/smb2_create.c b/source3/smbd/smb2_create.c index 9881ed2..d3075cb 100644 --- a/source3/smbd/smb2_create.c +++ b/source3/smbd/smb2_create.c @@ -1033,7 +1033,11 @@ static void smbd_smb2_create_request_dispatch_immediate(struct tevent_context *c } } -void schedule_deferred_open_message_smb2( +/***************************************************************** + Schedule a deferred open. Returns true if success, false on fail. +*****************************************************************/ + +bool schedule_deferred_open_message_smb2( struct smbd_server_connection *sconn, uint64_t mid) { struct smbd_smb2_create_state *state = NULL; @@ -1045,18 +1049,18 @@ void schedule_deferred_open_message_smb2( DEBUG(10,("schedule_deferred_open_message_smb2: " "can't find mid %llu\n", (unsigned long long)mid )); - return; + return false; } if (!smb2req->subreq) { - return; + return false; } if (!tevent_req_is_in_progress(smb2req->subreq)) { - return; + return false; } state = tevent_req_data(smb2req->subreq, struct smbd_smb2_create_state); if (!state) { - return; + return false; } /* Ensure we don't have any outstanding timer event. */ @@ -1077,7 +1081,7 @@ void schedule_deferred_open_message_smb2( if (!state->im) { smbd_server_connection_terminate(smb2req->sconn, nt_errstr(NT_STATUS_NO_MEMORY)); - return; + return false; } DEBUG(10,("schedule_deferred_open_message_smb2: " @@ -1088,6 +1092,8 @@ void schedule_deferred_open_message_smb2( smb2req->sconn->ev_ctx, smbd_smb2_create_request_dispatch_immediate, smb2req); + + return true; } /********************************************************* -- 1.7.9.5 >From 691f8d37c9de1e8f432467b7ad9daa12210bf0a0 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 12 Jun 2012 14:21:12 -0700 Subject: [PATCH 07/15] Return a bool indication of success or failure for schedule_deferred_open_message_smb(). --- source3/smbd/process.c | 9 +++++---- source3/smbd/proto.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/source3/smbd/process.c b/source3/smbd/process.c index 9e1abb2..c87aa21 100644 --- a/source3/smbd/process.c +++ b/source3/smbd/process.c @@ -709,15 +709,14 @@ void remove_deferred_open_message_smb(struct smbd_server_connection *sconn, schedule it for immediate processing. ****************************************************************************/ -void schedule_deferred_open_message_smb(struct smbd_server_connection *sconn, +bool schedule_deferred_open_message_smb(struct smbd_server_connection *sconn, uint64_t mid) { struct pending_message_list *pml; int i = 0; if (sconn->using_smb2) { - schedule_deferred_open_message_smb2(sconn, mid); - return; + return schedule_deferred_open_message_smb2(sconn, mid); } for (pml = sconn->deferred_open_queue; pml; pml = pml->next) { @@ -759,13 +758,15 @@ void schedule_deferred_open_message_smb(struct smbd_server_connection *sconn, TALLOC_FREE(pml->te); pml->te = te; DLIST_PROMOTE(sconn->deferred_open_queue, pml); - return; + return true; } } DEBUG(10,("schedule_deferred_open_message_smb: failed to " "find message mid %llu\n", (unsigned long long)mid )); + + return false; } /**************************************************************************** diff --git a/source3/smbd/proto.h b/source3/smbd/proto.h index 8196e69..ffdae8a 100644 --- a/source3/smbd/proto.h +++ b/source3/smbd/proto.h @@ -766,7 +766,7 @@ int srv_set_message(char *buf, bool zero); void remove_deferred_open_message_smb(struct smbd_server_connection *sconn, uint64_t mid); -void schedule_deferred_open_message_smb(struct smbd_server_connection *sconn, +bool schedule_deferred_open_message_smb(struct smbd_server_connection *sconn, uint64_t mid); bool open_was_deferred(struct smbd_server_connection *sconn, uint64_t mid); bool get_deferred_open_message_state(struct smb_request *smbreq, -- 1.7.9.5 >From 525180a294719cd9c9fba945e0e01239748668f6 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 19 Jun 2012 13:11:34 -0700 Subject: [PATCH 08/15] Don't allow asynchronous creates to be canceled in SMB2. --- source3/smbd/open.c | 17 ++++++++++++++--- source3/smbd/proto.h | 1 + source3/smbd/smb2_create.c | 5 +++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/source3/smbd/open.c b/source3/smbd/open.c index 2b1146b..daf48cc 100644 --- a/source3/smbd/open.c +++ b/source3/smbd/open.c @@ -1684,6 +1684,17 @@ void remove_deferred_open_entry(struct file_id id, uint64_t mid, } /**************************************************************************** + Return true if this is a state pointer to an asynchronous create. +****************************************************************************/ + +bool is_deferred_open_async(const void *ptr) +{ + const struct deferred_open_record *state = (const struct deferred_open_record *)ptr; + + return state->async_open; +} + +/**************************************************************************** Open a file with a share mode. Passed in an already created files_struct *. ****************************************************************************/ @@ -1787,18 +1798,18 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, if (get_deferred_open_message_state(req, &request_time, &ptr)) { - - struct deferred_open_record *state = (struct deferred_open_record *)ptr; /* Remember the absolute time of the original request with this mid. We'll use it later to see if this has timed out. */ /* If it was an async create retry, the file didn't exist. */ - if (state->async_open) { + + if (is_deferred_open_async(ptr)) { SET_STAT_INVALID(smb_fname->st); file_existed = false; } else { + struct deferred_open_record *state = (struct deferred_open_record *)ptr; /* Remove the deferred open entry under lock. */ remove_deferred_open_entry( state->id, req->mid, diff --git a/source3/smbd/proto.h b/source3/smbd/proto.h index ffdae8a..3527df5 100644 --- a/source3/smbd/proto.h +++ b/source3/smbd/proto.h @@ -621,6 +621,7 @@ bool open_match_attributes(connection_struct *conn, mode_t *returned_unx_mode); void remove_deferred_open_entry(struct file_id id, uint64_t mid, struct server_id pid); +bool is_deferred_open_async(const void *ptr); NTSTATUS open_file_fchmod(connection_struct *conn, struct smb_filename *smb_fname, files_struct **result); diff --git a/source3/smbd/smb2_create.c b/source3/smbd/smb2_create.c index d3075cb..8b92198 100644 --- a/source3/smbd/smb2_create.c +++ b/source3/smbd/smb2_create.c @@ -1163,6 +1163,11 @@ static bool smbd_smb2_create_cancel(struct tevent_req *req) smb2req = state->smb2req; mid = get_mid_from_smb2req(smb2req); + if (is_deferred_open_async(state->private_data.data)) { + /* Can't cancel an async create. */ + return false; + } + remove_deferred_open_entry(state->id, mid, messaging_server_id(smb2req->sconn->msg_ctx)); remove_deferred_open_message_smb2_internal(smb2req, mid); -- 1.7.9.5 >From 1eefc0ff3dbb82b2883efdea97c664f68b0a9122 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Fri, 8 Jun 2012 15:40:35 -0700 Subject: [PATCH 09/15] Fix init_aio_threadpool() so it can be setup for different threadpool handles with different completion functions. --- source3/modules/vfs_aio_pthread.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/source3/modules/vfs_aio_pthread.c b/source3/modules/vfs_aio_pthread.c index 36ce9ab..9b676d6 100644 --- a/source3/modules/vfs_aio_pthread.c +++ b/source3/modules/vfs_aio_pthread.c @@ -50,34 +50,37 @@ static void aio_pthread_handle_completion(struct event_context *event_ctx, uint16 flags, void *p); - /************************************************************************ Ensure thread pool is initialized. ***********************************************************************/ -static bool init_aio_threadpool(struct vfs_handle_struct *handle) +static bool init_aio_threadpool(struct pthreadpool **pp_pool, + void (*completion_fn)(struct event_context *, + struct fd_event *, + uint16, + void *)) { struct fd_event *sock_event = NULL; int ret = 0; - if (pool) { + if (*pp_pool) { return true; } - ret = pthreadpool_init(aio_pending_size, &pool); + ret = pthreadpool_init(aio_pending_size, pp_pool); if (ret) { errno = ret; return false; } sock_event = tevent_add_fd(server_event_context(), NULL, - pthreadpool_signal_fd(pool), + pthreadpool_signal_fd(*pp_pool), TEVENT_FD_READ, - aio_pthread_handle_completion, + completion_fn, NULL); if (sock_event == NULL) { - pthreadpool_destroy(pool); - pool = NULL; + pthreadpool_destroy(*pp_pool); + *pp_pool = NULL; return false; } @@ -87,7 +90,6 @@ static bool init_aio_threadpool(struct vfs_handle_struct *handle) return true; } - /************************************************************************ Worker function - core of the pthread aio engine. This is the function that actually does the IO. @@ -172,7 +174,7 @@ static int aio_pthread_read(struct vfs_handle_struct *handle, struct aio_private_data *pd = NULL; int ret; - if (!init_aio_threadpool(handle)) { + if (!init_aio_threadpool(&pool, aio_pthread_handle_completion)) { return -1; } @@ -209,7 +211,7 @@ static int aio_pthread_write(struct vfs_handle_struct *handle, struct aio_private_data *pd = NULL; int ret; - if (!init_aio_threadpool(handle)) { + if (!init_aio_threadpool(&pool, aio_pthread_handle_completion)) { return -1; } -- 1.7.9.5 >From dc3562220bae5762bb8e80428a41ee79c29e50e8 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 19 Jun 2012 13:59:07 -0700 Subject: [PATCH 10/15] The async open engine. Fix for open race condition from an idea by Volker. --- source3/modules/vfs_aio_pthread.c | 422 +++++++++++++++++++++++++++++++++++++ 1 file changed, 422 insertions(+) diff --git a/source3/modules/vfs_aio_pthread.c b/source3/modules/vfs_aio_pthread.c index 9b676d6..790ab1d 100644 --- a/source3/modules/vfs_aio_pthread.c +++ b/source3/modules/vfs_aio_pthread.c @@ -583,6 +583,423 @@ static int aio_pthread_suspend(struct vfs_handle_struct *handle, return ret; } +/* NB. This threadpool is shared over all + instances of this VFS module, as is the + current jobid. */ + +static struct pthreadpool *open_pool; +static int aio_pthread_open_jobid; + +struct aio_open_private_data { + struct aio_open_private_data *prev, *next; + int jobid; + int dir_fd; + int flags; + mode_t mode; + uint64_t mid; + bool in_progress; + struct smb_filename *smb_fname; + struct smbd_server_connection *sconn; + uid_t uid; + gid_t gid; + /* Returns. */ + int ret_fd; + int ret_errno; +}; + +/* List of outstanding requests we have. */ +static struct aio_open_private_data *open_pd_list; + +/************************************************************************ + Find the open private data by jobid. +***********************************************************************/ + +static struct aio_open_private_data *find_open_private_data_by_jobid(int jobid) +{ + struct aio_open_private_data *opd; + + for (opd = open_pd_list; opd != NULL; opd = opd->next) { + if (opd->jobid == jobid) { + return opd; + } + } + + return NULL; +} + +/************************************************************************ + Find the open private data by mid. +***********************************************************************/ + +static struct aio_open_private_data *find_open_private_data_by_mid(uint64_t mid) +{ + struct aio_open_private_data *opd; + + for (opd = open_pd_list; opd != NULL; opd = opd->next) { + if (opd->mid == mid) { + return opd; + } + } + + return NULL; +} + +/************************************************************************ + Callback when an open completes. +***********************************************************************/ + +static void aio_open_handle_completion(struct event_context *event_ctx, + struct fd_event *event, + uint16 flags, + void *p) +{ + struct aio_open_private_data *opd = NULL; + int jobid = 0; + int ret; + + DEBUG(10, ("aio_open_handle_completion called with flags=%d\n", + (int)flags)); + + if ((flags & EVENT_FD_READ) == 0) { + return; + } + + ret = pthreadpool_finished_job(open_pool, &jobid); + if (ret) { + smb_panic("aio_open_handle_completion"); + return; + } + + opd = find_open_private_data_by_jobid(jobid); + if (opd == NULL) { + DEBUG(1, ("aio_open_handle_completion cannot find jobid %d\n", + jobid)); + smb_panic("aio_open_handle_completion - no jobid"); + return; + } + + DEBUG(10,("aio_open_handle_completion: jobid %d completed\n", + jobid )); + + opd->in_progress = false; + + if (opd->dir_fd != -1) { + /* We are done with the + dir_fd and can close it + early. */ + close(opd->dir_fd); + opd->dir_fd = -1; + } + + /* Find outstanding event and reschdule. */ + if (!schedule_deferred_open_message_smb(opd->sconn, opd->mid)) { + /* Outstanding event didn't exist or was + cancelled. Free up the fd and throw + away the result. */ + if (opd->ret_fd != -1) { + close(opd->ret_fd); + opd->ret_fd = -1; + } + TALLOC_FREE(opd); + } +} + +/***************************************************************** + The core of the async open code - the worker function. Note we + use the new openat() system call to avoid any problems with + current working directory changes. +*****************************************************************/ + +static void aio_open_worker(void *private_data) +{ + struct aio_open_private_data *opd = + (struct aio_open_private_data *)private_data; + + /* Force O_EXCL to deal with race condition. */ + opd->ret_fd = openat(opd->dir_fd, + opd->smb_fname->base_name, + opd->flags | O_EXCL, + opd->mode); + + if (opd->ret_fd == -1) { + opd->ret_errno = errno; + } else { + /* Create was successful. */ + opd->ret_errno = 0; + } +} + +/************************************************************************ + Open private data destructor. +***********************************************************************/ + +static int opd_destructor(struct aio_open_private_data *opd) +{ + if (opd->dir_fd != -1) { + close(opd->dir_fd); + } + DLIST_REMOVE(open_pd_list, opd); + return 0; +} + +/************************************************************************ + Create and initialize a private data struct for async open. +***********************************************************************/ + +static struct aio_open_private_data *create_private_open_data(const files_struct *fsp, + int flags, + mode_t mode) +{ + NTSTATUS status; + struct aio_open_private_data *opd = talloc_zero(NULL, + struct aio_open_private_data); + if (!opd) { + return NULL; + } + + opd->jobid = aio_pthread_open_jobid++; + opd->dir_fd = -1; + opd->ret_fd = -1; + opd->ret_errno = EINPROGRESS; + opd->flags = flags; + opd->mode = mode; + opd->mid = fsp->mid; + opd->in_progress = true; + opd->uid = geteuid(); + opd->gid = getegid(); + opd->sconn = fsp->conn->sconn; + + status = copy_smb_filename(opd, fsp->fsp_name, &opd->smb_fname); + if (!NT_STATUS_IS_OK(status)) { + TALLOC_FREE(opd); + return NULL; + } + +#if defined(O_DIRECTORY) + opd->dir_fd = open(".", O_RDONLY|O_DIRECTORY); +#else + opd->dir_fd = open(".", O_RDONLY); +#endif + if (opd->dir_fd == -1) { + TALLOC_FREE(opd); + return NULL; + } + + talloc_set_destructor(opd, opd_destructor); + DLIST_ADD_END(open_pd_list, opd, struct aio_open_private_data *); + return opd; +} + +/***************************************************************** + Setup an async open. +*****************************************************************/ + +static int open_async(const files_struct *fsp, + int flags, + mode_t mode) +{ + struct aio_open_private_data *opd = NULL; + int ret; + + if (!init_aio_threadpool(&open_pool, + aio_open_handle_completion)) { + return -1; + } + + opd = create_private_open_data(fsp, flags, mode); + if (opd == NULL) { + DEBUG(10, ("open_async: Could not create private data.\n")); + return -1; + } + + DEBUG(5,("open_async: file %s\n", + smb_fname_str_dbg(opd->smb_fname))); + + ret = pthreadpool_add_job(open_pool, + opd->jobid, + aio_open_worker, + (void *)opd); + if (ret) { + errno = ret; + return -1; + } + + /* Cause the calling code to reschedule us. */ + errno = EINTR; /* Maps to NT_STATUS_RETRY. */ + return -1; +} + +/***************************************************************** + If the parent directory has the SETGID bit set, ensure the opd + gid is that of the parent directory, not the creating process. +*****************************************************************/ + +static void parent_dir_setguid_adjust(struct aio_open_private_data *opd) +{ + struct stat st; + char *dname = NULL; + + if (!parent_dirname(talloc_tos(), + opd->smb_fname->base_name, + &dname, + NULL)) { + return; + } + if (stat(dname, &st) == -1) { + TALLOC_FREE(dname); + return; + } + if (st.st_mode & S_ISGID) { + opd->gid = st.st_gid; + } + TALLOC_FREE(dname); +} + +/***************************************************************** + We know the file didn't exist, and now it does. If it doesn't + have the right ownership we hit a process uid change and must fix + it up. +*****************************************************************/ + +static void fix_file_ownership(struct aio_open_private_data *opd) +{ + int ret; + struct stat st; + + if (opd->ret_fd == -1) { + return; + } + + ret = fstat(opd->ret_fd, &st); + if (ret == -1) { + return; + } + + if (st.st_uid == opd->uid && st.st_gid == opd->gid) { + /* The common case. */ + return; + } + + /* We know we created this (see O_EXCL above). + So make sure it has our uid and the correct + gid. NB. POSIX ACL inheritance doesn't affect + us if we have the correct uid and gid set + on the file. */ + + parent_dir_setguid_adjust(opd); + + become_root(); + ret = fchown(opd->ret_fd, opd->uid, opd->gid); + unbecome_root(); + + DEBUG(2,("fix_file_ownership: fchown on file %s " + "to uid %u gid %u returned %d\n", + smb_fname_str_dbg(opd->smb_fname), + opd->uid, + opd->gid, + ret)); +} + +/***************************************************************** + Look for a matching SMB2 mid. If we find it we're rescheduled, + just return the completed open. +*****************************************************************/ + +static bool find_completed_open(files_struct *fsp, + int *p_fd, + int *p_errno) +{ + struct aio_open_private_data *opd; + + opd = find_open_private_data_by_mid(fsp->mid); + if (!opd) { + return false; + } + + DEBUG(5,("find_completed_open: Found existing open for " + "file %s\n", + smb_fname_str_dbg(fsp->fsp_name))); + + if (opd->in_progress) { + /* Disaster ! This is an open + timeout. Just panic. */ + smb_panic("find_completed_open - in_progress\n"); + } + + if (opd->ret_fd == -1) { + /* Was it EEXIST and we didn't have O_EXCL in the + passed in flags ? If so, then just re-open + synchronously. */ + if (!(opd->flags & O_EXCL) && + opd->ret_errno == EEXIST) { + opd->ret_fd = open(opd->smb_fname->base_name, + opd->flags, + opd->mode); + if (opd->ret_fd == -1) { + opd->ret_errno = errno; + } else { + opd->ret_errno = 0; + } + } + } else { + /* Fix up the ownership if we have to. */ + fix_file_ownership(opd); + } + + *p_fd = opd->ret_fd; + *p_errno = opd->ret_errno; + + /* Now we can free the opd. */ + TALLOC_FREE(opd); + return true; +} + +static int aio_open_fn(vfs_handle_struct *handle, + struct smb_filename *smb_fname, + files_struct *fsp, + int flags, + mode_t mode) +{ + int my_errno = 0; + int fd = -1; + bool aio_allow_open = lp_parm_bool( + SNUM(handle->conn), "aio_pthread", "aio open", false); + + if (!aio_allow_open) { + /* aio opens turned off. */ + return open(smb_fname->base_name, flags, mode); + } + + if (!(flags & O_CREAT)) { + /* Only creates matter. */ + return open(smb_fname->base_name, flags, mode); + } + + if (VALID_STAT(fsp->fsp_name->st)) { + /* Only creates on a non-existent file matter. */ + return open(smb_fname->base_name, flags, mode); + } + + if (smb_fname->stream_name) { + /* Don't handle stream opens. */ + return open(smb_fname->base_name, flags, mode); + } + + /* See if this is a reentrant call - i.e. is this a + restart of an existing open that just completed. + */ + + if (find_completed_open(fsp, + &fd, + &my_errno)) { + errno = my_errno; + return fd; + } + + /* Ok, it's a create call on a new file - pass it to a thread helper. */ + return open_async(fsp, flags, mode); +} + static int aio_pthread_connect(vfs_handle_struct *handle, const char *service, const char *user) { @@ -595,6 +1012,10 @@ static int aio_pthread_connect(vfs_handle_struct *handle, const char *service, * requests and Windows clients don't use this anyway. * Essentially we want this to be unlimited unless smb.conf * says different. + * + * NB. Also note - this is the maximum number of threads + * used in a threadpool across *ALL* instances of this module + * loaded into an smbd. *********************************************************************/ aio_pending_size = lp_parm_int( SNUM(handle->conn), "aio_pthread", "aio num threads", 100); @@ -603,6 +1024,7 @@ static int aio_pthread_connect(vfs_handle_struct *handle, const char *service, static struct vfs_fn_pointers vfs_aio_pthread_fns = { .connect_fn = aio_pthread_connect, + .open_fn = aio_open_fn, .aio_read_fn = aio_pthread_read, .aio_write_fn = aio_pthread_write, .aio_return_fn = aio_pthread_return_fn, -- 1.7.9.5 >From 6e1c7861ea8cd58d2e7e224b52cb29375903ce8b Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 19 Jun 2012 14:06:06 -0700 Subject: [PATCH 11/15] Fix the build for aio_pthread now we also use the openat() call. --- source3/configure.in | 21 ++++++++++++++++++++- source3/wscript | 6 +++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/source3/configure.in b/source3/configure.in index 6e324a1..0b9e964 100644 --- a/source3/configure.in +++ b/source3/configure.in @@ -5716,6 +5716,25 @@ if test x"$samba_cv_HAVE_POSIX_FADVISE" = x"yes"; then fi ############################################ +# See if we have the openat syscall. + +AC_CACHE_CHECK([for openat], + samba_cv_HAVE_OPENAT,[ + AC_TRY_LINK([ +#if defined(HAVE_UNISTD_H) +#include +#endif +#include ], + [int fd = openat(AT_FDCWD, ".", O_RDONLY);], + samba_cv_HAVE_OPENAT=yes, + samba_cv_HAVE_OPENAT=no)]) + +if test x"$samba_cv_HAVE_OPENAT" = x"yes"; then + AC_DEFINE(HAVE_OPENAT,1, + [Whether openat is available]) +fi + +############################################ # See if we have the Linux splice syscall. case "$host_os" in @@ -6173,7 +6192,7 @@ if test x"$enable_pthreadpool" = x"yes" -a x"$samba_cv_HAVE_PTHREAD" = x"yes"; t LIBS="$LIBS $PTHREAD_LDFLAGS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" AC_SUBST(PTHREADPOOL_OBJ, "lib/pthreadpool/pthreadpool.o") - if test x"$samba_cv_HAVE_AIO" = x"yes"; then + if test x"$samba_cv_HAVE_AIO" = x"yes" -a x"$samba_cv_HAVE_OPENAT" = x"yes" ; then default_shared_modules="$default_shared_modules vfs_aio_pthread" fi else diff --git a/source3/wscript b/source3/wscript index 5b21c3b..f8fbf2a 100755 --- a/source3/wscript +++ b/source3/wscript @@ -352,6 +352,10 @@ return acl_get_perm_np(permset_d, perm); msg="Checking whether Linux readahead is available", headers='unistd.h fcntl.h') conf.CHECK_DECLS('readahead', headers='fcntl.h', always=True) + conf.CHECK_CODE('int fd = openat(AT_FDCWD, ".", O_RDONLY);', + 'HAVE_OPENAT', + msg='Checking for openat', + headers='fcntl.h') if Options.options.with_aio_support: conf.CHECK_FUNCS_IN('aio_read', 'aio') @@ -1394,7 +1398,7 @@ main() { if conf.CONFIG_SET('HAVE_AIO') and (conf.CONFIG_SET('HAVE_MSGHDR_MSG_CONTROL') or conf.CONFIG_SET('HAVE_MSGHDR_MSG_ACCTRIGHTS')): default_shared_modules.extend(TO_LIST('vfs_aio_fork')) - if conf.CONFIG_SET('HAVE_AIO') and Options.options.with_pthreadpool: + if conf.CONFIG_SET('HAVE_AIO') and conf.CONFIG_SET('HAVE_OPENAT') and Options.options.with_pthreadpool: default_shared_modules.extend(TO_LIST('vfs_aio_pthread')) if conf.CONFIG_SET('HAVE_AIO') and conf.CONFIG_SET('HAVE_LINUX_KERNEL_AIO'): -- 1.7.9.5 >From 24dfad6751fd66963e77071f6908b7b08a4bb694 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 19 Jun 2012 14:36:32 -0700 Subject: [PATCH 12/15] Add documentation for aio_pthread:aio open = true. --- docs-xml/manpages-3/vfs_aio_pthread.8.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs-xml/manpages-3/vfs_aio_pthread.8.xml b/docs-xml/manpages-3/vfs_aio_pthread.8.xml index 3e41ee9..db4385d 100644 --- a/docs-xml/manpages-3/vfs_aio_pthread.8.xml +++ b/docs-xml/manpages-3/vfs_aio_pthread.8.xml @@ -54,6 +54,14 @@ terminate themselves if idle for one second. + aio_pthread can also use a pthread pool to process + file creation for both SMB1 and SMB2 requests, if the smb.conf + parameter aio_pthread:aio open = true + is set. This is a per-share parameter. Allowing + asynchronous file creation can greatly help performance + for applications that create many files. + + Note that the smb.conf parameters aio read size and aio write size must also be set appropriately -- 1.7.9.5 >From 12f4c2e04fb83011144fa2afdb4087796e0b5d81 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Fri, 8 Jun 2012 15:09:17 -0700 Subject: [PATCH 13/15] Make get_windows_lock_ref_count() public. The async close code needs it. --- source3/locking/posix.c | 2 +- source3/locking/proto.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/source3/locking/posix.c b/source3/locking/posix.c index 02d9b6d..3575ab8 100644 --- a/source3/locking/posix.c +++ b/source3/locking/posix.c @@ -495,7 +495,7 @@ static void decrement_windows_lock_ref_count(files_struct *fsp) Fetch the lock ref count. ****************************************************************************/ -static int get_windows_lock_ref_count(files_struct *fsp) +int get_windows_lock_ref_count(files_struct *fsp) { struct lock_ref_count_key tmp; TDB_DATA dbuf; diff --git a/source3/locking/proto.h b/source3/locking/proto.h index f6a6f2e..e9bfa2a 100644 --- a/source3/locking/proto.h +++ b/source3/locking/proto.h @@ -208,6 +208,7 @@ bool is_posix_locked(files_struct *fsp, bool posix_locking_init(bool read_only); bool posix_locking_end(void); void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount); +int get_windows_lock_ref_count(files_struct *fsp); int fd_close_posix(struct files_struct *fsp); bool set_posix_lock_windows_flavour(files_struct *fsp, uint64_t u_offset, -- 1.7.9.5 >From 1e3f5865bfae8aa8d8bb5b2a51f18107eee39f3f Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 19 Jun 2012 14:42:09 -0700 Subject: [PATCH 14/15] From an idea by Volker, add asynchronous close as well. --- source3/modules/vfs_aio_pthread.c | 188 ++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 1 deletion(-) diff --git a/source3/modules/vfs_aio_pthread.c b/source3/modules/vfs_aio_pthread.c index 790ab1d..8408104 100644 --- a/source3/modules/vfs_aio_pthread.c +++ b/source3/modules/vfs_aio_pthread.c @@ -1000,6 +1000,191 @@ static int aio_open_fn(vfs_handle_struct *handle, return open_async(fsp, flags, mode); } +/* NB. This threadpool is shared over all + instances of this VFS module, as is the + current jobid. */ + +static struct pthreadpool *close_pool; +static int aio_pthread_close_jobid; + +struct aio_close_private_data { + struct aio_close_private_data *prev, *next; + int jobid; + int fd; + int ret; + int ret_errno; +}; + +/* List of outstanding requests we have. */ +static struct aio_close_private_data *close_pd_list; + +/************************************************************************ + Find thec close private data by jobid. +***********************************************************************/ + +static struct aio_close_private_data *find_close_private_data_by_jobid(int jobid) +{ + struct aio_close_private_data *cpd; + + for (cpd = close_pd_list; cpd != NULL; cpd = cpd->next) { + if (cpd->jobid == jobid) { + return cpd; + } + } + + return NULL; +} + +/************************************************************************ + Callback when a close completes. +***********************************************************************/ + +static void aio_close_handle_completion(struct event_context *event_ctx, + struct fd_event *event, + uint16 flags, + void *p) +{ + struct aio_close_private_data *cpd = NULL; + int jobid = 0; + int ret; + + DEBUG(10, ("aio_close_handle_completion called with flags=%d\n", + (int)flags)); + + if ((flags & EVENT_FD_READ) == 0) { + return; + } + + ret = pthreadpool_finished_job(close_pool, &jobid); + if (ret) { + smb_panic("aio_close_handle_completion"); + return; + } + + cpd = find_close_private_data_by_jobid(jobid); + if (cpd == NULL) { + DEBUG(1, ("aio_close_handle_completion cannot find jobid %d\n", + jobid)); + smb_panic("aio_close_handle_completion - no jobid"); + return; + } + + DEBUG(10,("aio_close_handle_completion: jobid %d completed\n", + jobid )); + + /* We're done. Nothing to report back. */ + cpd->fd = -1; + TALLOC_FREE(cpd); +} + +/***************************************************************** + The core of the async close code - the worker function. +*****************************************************************/ + +static void aio_close_worker(void *private_data) +{ + struct aio_close_private_data *cpd = + (struct aio_close_private_data *)private_data; + + /* We could report an error if we wanted to.. */ + cpd->ret = close(cpd->fd); + + if (cpd->ret == -1) { + cpd->ret_errno = errno; + } else { + /* Close was successful. */ + cpd->ret_errno = 0; + } +} + +/************************************************************************ + Close private data destructor. +***********************************************************************/ + +static int cpd_destructor(struct aio_close_private_data *cpd) +{ + DLIST_REMOVE(close_pd_list, cpd); + return 0; +} + +/************************************************************************ + Create and initialize a private data struct for async close. +***********************************************************************/ + +static struct aio_close_private_data *create_private_close_data(const files_struct *fsp) +{ + struct aio_close_private_data *cpd = talloc_zero(NULL, + struct aio_close_private_data); + if (!cpd) { + return NULL; + } + + cpd->jobid = aio_pthread_close_jobid++; + cpd->fd = fsp->fh->fd; + cpd->ret = -1; + cpd->ret_errno = EINPROGRESS; + + talloc_set_destructor(cpd, cpd_destructor); + DLIST_ADD_END(close_pd_list, cpd, struct aio_close_private_data *); + return cpd; +} + +/***************************************************************** + Setup an async close. +*****************************************************************/ + +static int close_async(const files_struct *fsp) +{ + struct aio_close_private_data *cpd = NULL; + int ret; + + if (!init_aio_threadpool(&close_pool, + aio_close_handle_completion)) { + return -1; + } + + cpd = create_private_close_data(fsp); + if (cpd == NULL) { + DEBUG(10, ("close_async: Could not create private data.\n")); + return -1; + } + + DEBUG(5,("close_async: file %s\n", + smb_fname_str_dbg(fsp->fsp_name))); + + ret = pthreadpool_add_job(close_pool, + cpd->jobid, + aio_close_worker, + (void *)cpd); + if (ret) { + errno = ret; + return -1; + } + + return 0; +} + +static int aio_close_fn(vfs_handle_struct *handle, files_struct *fsp) +{ + bool aio_allow_close = lp_parm_bool(SNUM(handle->conn), + "aio_pthread", "aio close", false); + if (!aio_allow_close) { + /* aio close turned off. */ + return fd_close_posix(fsp); + } + + if (!lp_locking(fsp->conn->params) || + !lp_posix_locking(fsp->conn->params)) { + return close_async(fsp); + } + + if (get_windows_lock_ref_count(fsp)) { + return fd_close_posix(fsp); + } + + return close_async(fsp); +} + static int aio_pthread_connect(vfs_handle_struct *handle, const char *service, const char *user) { @@ -1014,7 +1199,7 @@ static int aio_pthread_connect(vfs_handle_struct *handle, const char *service, * says different. * * NB. Also note - this is the maximum number of threads - * used in a threadpool across *ALL* instances of this module + * used in each threadpool across *ALL* instances of this module * loaded into an smbd. *********************************************************************/ aio_pending_size = lp_parm_int( @@ -1025,6 +1210,7 @@ static int aio_pthread_connect(vfs_handle_struct *handle, const char *service, static struct vfs_fn_pointers vfs_aio_pthread_fns = { .connect_fn = aio_pthread_connect, .open_fn = aio_open_fn, + .close_fn = aio_close_fn, .aio_read_fn = aio_pthread_read, .aio_write_fn = aio_pthread_write, .aio_return_fn = aio_pthread_return_fn, -- 1.7.9.5 >From 97000cfeea2490b0a9cba3438e033bb5efb30707 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Tue, 19 Jun 2012 14:43:26 -0700 Subject: [PATCH 15/15] Document aio_pthread:aio close = true. --- docs-xml/manpages-3/vfs_aio_pthread.8.xml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs-xml/manpages-3/vfs_aio_pthread.8.xml b/docs-xml/manpages-3/vfs_aio_pthread.8.xml index db4385d..ec2bcc9 100644 --- a/docs-xml/manpages-3/vfs_aio_pthread.8.xml +++ b/docs-xml/manpages-3/vfs_aio_pthread.8.xml @@ -55,10 +55,11 @@ aio_pthread can also use a pthread pool to process - file creation for both SMB1 and SMB2 requests, if the smb.conf - parameter aio_pthread:aio open = true - is set. This is a per-share parameter. Allowing - asynchronous file creation can greatly help performance + file creation and file close for both SMB1 and SMB2 requests, + if the smb.conf parameters aio_pthread:aio open = true + and aio_pthread:aio close = true + are set. These are per-share parameters. Allowing + asynchronous file creation and close can greatly help performance for applications that create many files. -- 1.7.9.5