impersonation part7 (smb_vfs_ev_glue)

Stefan Metzmacher metze at samba.org
Tue Jul 24 19:25:14 UTC 2018


Hi,

>>> Here goes part 6, the kitchen sink before the fancy VFS stuff comes next.

Here's the magic glue to make the impersonation usable in the SMB_VFS layer.

Here's a pipeline with the current state:
https://gitlab.com/samba-team/devel/samba/pipelines/26422478

Here're the verbose comments from source3/smbd/vfs.c:

/*
 * Design of the smb_vfs_ev_glue infrastructure:
 *
 * smb_vfs_ev_glue makes it possible to pass
 * down an tevent_context and pthreadpool_tevent
 * used for impersonation through the SMB_VFS stack.
 *
 * tevent_req based function take an tevent_context as
 * there 2nd argument, e.g.:
 *
 *   struct tevent_req *something_send(TALLOC_CTX *mem_ctx,
 *                                     struct tevent_context *ev,
 *                                     ...);
 *
 * For the SMB_VFS stack we'll use the following:
 *
 *   struct tevent_req *SMB_VFS_SOMETHING_SEND(TALLOC_CTX *mem_ctx,
 *                                             struct smb_vfs_ev_glue *evg,
 *                                             ...);
 *
 * Typically the 'evg' is just passed through the stack down
 * to vfs_default.c. In order to do real work an
 * tevent_context and pthreadpool_tevent are required
 * to do call a 'somthing()' syscall in an async fashion.
 * Therefore it will the following to get the pointer
 * back out of evg:
 *
 *   ev = smb_vfs_ev_glue_ev_ctx(evg);
 *   tp = smb_vfs_ev_glue_tp_chdir_safe(evg);
 *
 * If some function in the stack is sure it needs to run as root
 * to get some information (after careful checks!), it used
 * to frame that work into become_root()/unbecome_root().
 * This can't work when using async functions!
 * Now it's possible to use something like this (simplified!):
 *
 *   ev = smb_vfs_ev_glue_ev_ctx(evg);
 *   root_evg = smb_vfs_ev_glue_get_root_glue(evg);
 *   subreq = SMB_VFS_SOMETHING_NEXT_SEND(state, root_evg, ...);
 *   if (tevent_req_nomem(subreq, req)) {
 *        return tevent_req_post(req, ev);
 *   }
 *   tevent_req_set_callback(subreq, module_something_done, req);
 *
 *   return req;
 *
 *   static void module_something_done(struct tevent_req *subreq)
 *   {
 *      ...
 *
 *      status = SMB_VFS_SOMETHING_NEXT_RECV(subreq, &state->aio_state);
 *      TALLOC_FREE(subreq);
 *
 *      tevent_req_done(req);
 *   }
 *
 * In the code above the something_send_fn() function of the next
 * module in the stack will be called as root.
 * The smb_vfs_call_something_*() glue code, which is the magic
 * behind the SMB_VFS_SOMETHING[_NEXT]_{SEND,RECV}() macros,
 * will look like this:
 *
 *   struct smb_vfs_call_something_state {
 *       ssize_t (*recv_fn)(struct tevent_req *req,
 *                          struct vfs_aio_state *aio_state,
 *                          ...);
 *       ssize_t retval;
 *       struct vfs_aio_state vfs_aio_state;
 *       ...
 *   };
 *
 *   static void smb_vfs_call_something_done(struct tevent_req *subreq);
 *
 *   struct tevent_req *smb_vfs_call_something_send(
 *                  TALLOC_CTX *mem_ctx,
 *                  const struct smb_vfs_ev_glue *evg,
 *                  struct vfs_handle_struct *handle,
 *                  ...)
 *   {
 *       struct tevent_req *req = NULL;
 *       struct smb_vfs_call_something_state *state = NULL;
 *       struct tevent_req *subreq = NULL;
 *       bool ok;
 *
 *       req = tevent_req_create(mem_ctx, &state,
 *                               struct smb_vfs_call_something_state);
 *       if (req == NULL) {
 *           return NULL;
 *       }
 *
 *       VFS_FIND(something_send);
 *       state->recv_fn = handle->fns->something_recv_fn;
 *
 *       ok = smb_vfs_ev_glue_push_use(evg, req);
 *       if (!ok) {
 *           tevent_req_error(req, EIO);
 *           return tevent_req_post(req, evg->return_ev);
 *       }
 *
 *       subreq = handle->fns->something_send_fn(mem_ctx,
 *                                               evg->next_glue,
 *                                               handle,
 *                                               ...);
 *       smb_vfs_ev_glue_pop_use(evg);
 *
 *       if (tevent_req_nomem(subreq, req)) {
 *           return tevent_req_post(req, evg->return_ev);
 *       }
 *       tevent_req_set_callback(subreq, smb_vfs_call_something_done, req);
 *
 *       return req;
 *   }
 *
 *   static void smb_vfs_call_something_done(struct tevent_req *subreq)
 *   {
 *       struct tevent_req *req =
 *           tevent_req_callback_data(subreq,
 *           struct tevent_req);
 *       struct smb_vfs_call_something_state *state =
 *           tevent_req_data(req,
 *           struct smb_vfs_call_something_state);
 *
 *       state->retval = state->recv_fn(subreq,
 *                                      &state->vfs_aio_state,
 *                                      ....);
 *       TALLOC_FREE(subreq);
 *
 *       if (state->retval == -1) {
 *           tevent_req_error(req, state->vfs_aio_state.error);
 *           return;
 *       }
 *       tevent_req_done(req);
 *   }
 *
 *   ssize_t smb_vfs_call_something_recv(struct tevent_req *req,
 *                                        struct vfs_aio_state *aio_state,
 *                                        ....)
 *   {
 *       struct smb_vfs_call_something_state *state =
 *           tevent_req_data(req,
 *           struct smb_vfs_call_something_state);
 *       ssize_t retval = state->retval;
 *
 *       if (tevent_req_is_unix_error(req, &aio_state->error)) {
 *           tevent_req_received(req);
 *           return -1;
 *       }
 *
 *       *aio_state = state->vfs_aio_state;
 *       ...
 *
 *       tevent_req_received(req);
 *       return retval;
 *   }
 *
 * The most important details are these:
 *
 * 1. smb_vfs_ev_glue_push_use(evg, req):
 *    - is a no-op if evg->run_ev and evg->return_ev are the same,
 *      it means that we're already at the correct impersonation
 *      and don't need any additional work to be done.
 *    - Otherwise it will call tevent_req_defer_callback(req,
evg->return_ev)
 *      This means that tevent_req_error() and tevent_req_done()
 *      will just trigger an immediate event on evg->return_ev.
 *      Therefore the callers callback function will be called
 *      in the impersonation of evg->return_ev! This is important
 *      in order to get the impersonation correct on the way back
 *      through the stack.
 *    - It will call tevent_context_push_use(evg->run_ev),
 *      which will start the impersonation to run_ev.
 *      So the following code run in the correct context.
 * 2. handle->fns->something_send_fn(..., evg->next_glue, ...):
 *    - We're passing evg->next_glue to the next module.
 *    - Typically evg->next_glue points to evg again,
 *      but that is not the case if smb_vfs_ev_glue_create_switch()
 *      is used.
 *    - In case evg->run_ev and evg->return_ev are not the same,
 *      next_glue will have run_ev and return_ev pointing to evg->run_ev.
 *      So that the switch from evg->run_ev to evg->return_ev
 *      happens on the correct boundary.
 * 3. smb_vfs_ev_glue_pop_use(evg):
 *    - is a no-op if evg->run_ev and evg->return_ev are the same,
 *      it means that we're already at the correct impersonation
 *      and don't need any additional work to be done.
 *    - It will call tevent_context_pop_use(evg->run_ev),
 *      which will revert the impersonation done in
 *      smb_vfs_ev_glue_push_use().
 * 4. smb_vfs_call_something_send():
 *    - The is called in the environment of evg->return_ev.
 *    - So it needs to use tevent_req_post(req, evg->return_ev)
 * 5. smb_vfs_call_something_done():
 *    - The is called in the environment of evg->run_ev
 * 6. smb_vfs_call_something_recv():
 *    - The is called in the environment of evg->return_ev again.
 *
 *
 * Here are some more complex examples:
 *
 * Example 1: only user_evg without switch to root
 *
 * SMBD: already impersonated user_evg
 *  evg'1 = smb2_req->user_evg
 *  r'1 = SMB_VFS_*_SEND(evg'1); # smb_vfs_call_*_send()
 *  |
 *  | smb_vfs_ev_glue_push_use(evg'1, r'1);
 *  | |
 *  | | # no-op run_ev == return_ev
 *  | |
 *  | evg'2 = evg'1->next_glue;
 *  | r'2 = module1_*_send(evg'2);
 *  | |
 *  | | evg'3 = evg'2
 *  | | r'3 = SMB_VFS_*_NEXT_SEND(evg'3); # smb_vfs_call_*_send()
 *  | | |
 *  | | | smb_vfs_ev_glue_push_use(evg'3, r'3);
 *  | | | |
 *  | | | | # no-op run_ev == return_ev
 *  | | | |
 *  | | | evg'4 = evg'3->next_glue;
 *  | | | r'4 = module2_*_send(evg'4);
 *  | | | |
 *  | | | | evg'5 = evg'4
 *  | | | | r'5 = SMB_VFS_*_NEXT_SEND(evg'5); # smb_vfs_call_*_send()
 *  | | | | |
 *  | | | | | smb_vfs_ev_glue_push_use(evg'5, r'5);
 *  | | | | | |
 *  | | | | | | # no-op run_ev == return_ev
 *  | | | | | |
 *  | | | | | evg'6 = evg'5->next_glue;
 *  | | | | | r'6 = default_*_send(evg'6);
 *  | | | | | |
 *  | | | | | | ev'6 = smb_vfs_ev_glue_ev_ctx(evg'6)
 *  | | | | | | tp'6 = smb_vfs_ev_glue_tp_chdir_safe(evg'6)
 *  | | | | | | r'7 = pthreadpool_tevent_send(ev'6, tp'6);
 *  | | | | | | |
 *  | | | | | | | pthread_create...
 *  | | | | | | |
 *  | | | | | | tevent_req_set_callback(r'7, default_*_done, r'6);
 *  | | | | | |
 *  | | | | | smb_vfs_ev_glue_pop_use(evg'5);
 *  | | | | | |
 *  | | | | | | # no-op run_ev == return_ev
 *  | | | | | |
 *  | | | | | tevent_req_set_callback(r'6, smb_vfs_call_*_done, r'5);
 *  | | | | |
 *  | | | | tevent_req_set_callback(r'5, module2_*_done, r'4);
 *  | | | |
 *  | | | smb_vfs_ev_glue_pop_use(evg'3);
 *  | | | |
 *  | | | | # no-op run_ev == return_ev
 *  | | | |
 *  | | | tevent_req_set_callback(r'4, smb_vfs_call_*_done, r'3);
 *  | | |
 *  | | tevent_req_set_callback(r'3, module1_*_done, r'2);
 *  | |
 *  | smb_vfs_ev_glue_pop_use(evg'1);
 *  | |
 *  | | # no-op run_ev == return_ev
 *  | |
 *  | tevent_req_set_callback(r'2, smb_vfs_call_*_done, r'1);
 *  |
 *  tevent_req_set_callback(r'1, smbd_*_done, smb2_req);
 *
 *  Worker thread finished, just one event handler processes
 *  everything as there's no impersonation change.
 *
 *  tevent_common_invoke_immediate_handler:
 *  |
 *  | before_immediate_handler(ev'6);
 *  | |
 *  | | change_to_user()
 *  | |
 *  | pthreadpool_tevent_job_done(r'7);
 *  | |
 *  | | default_*_done(r'7);
 *  | | |
 *  | | | pthreadpool_tevent_recv(r'7);
 *  | | | TALLOC_FREE(r'7);
 *  | | | tevent_req_done('r6);
 *  | | | |
 *  | | | | smb_vfs_call_*_done(r'6);
 *  | | | | |
 *  | | | | | default_*_recv(r'6);
 *  | | | | | TALLOC_FREE(r'6)
 *  | | | | | tevent_req_done(r'5);
 *  | | | | | |
 *  | | | | | | module2_*_done(r'5):
 *  | | | | | | |
 *  | | | | | | | SMB_VFS_*_recv(r'5); # smb_vfs_call_*_recv()
 *  | | | | | | | TALLOC_FREE(r'5)
 *  | | | | | | | tevent_req_done(r'4);
 *  | | | | | | | |
 *  | | | | | | | | smb_vfs_call_*_done(r'4);
 *  | | | | | | | | |
 *  | | | | | | | | | module2_*_recv(r'4);
 *  | | | | | | | | | TALLOC_FREE(r'4)
 *  | | | | | | | | | tevent_req_done(r'3);
 *  | | | | | | | | | |
 *  | | | | | | | | | | module1_*_done(r'3):
 *  | | | | | | | | | | |
 *  | | | | | | | | | | | SMB_VFS_*_recv(r'3); # smb_vfs_call_*_recv()
 *  | | | | | | | | | | | TALLOC_FREE(r'3)
 *  | | | | | | | | | | | tevent_req_done(r'2);
 *  | | | | | | | | | | | |
 *  | | | | | | | | | | | | smb_vfs_*_done(r'2);
 *  | | | | | | | | | | | | |
 *  | | | | | | | | | | | | | module1_*_recv(r'2);
 *  | | | | | | | | | | | | | TALLOC_FREE(r'2)
 *  | | | | | | | | | | | | | tevent_req_done(r'1);
 *  | | | | | | | | | | | | | |
 *  | | | | | | | | | | | | | | smbd_*_done(r'1);
 *  | | | | | | | | | | | | | | |
 *  | | | | | | | | | | | | | | | SMB_VFS_*_recv(r'1); #
smb_vfs_call_*_recv()
 *  | | | | | | | | | | | | | | | TALLOC_FREE(r'1)
 *  | | | | | | | | | | | | | | | smbd_response_to_client()
 *  | | | | | | | | | | | | | | | return
 *  | | | | | | | | | | | | | | |
 *  | | | | | | | | | | | | | | return
 *  | | | | | | | | | | | | | |
 *  | | | | | | | | | | | | | return
 *  | | | | | | | | | | | | |
 *  | | | | | | | | | | | | return
 *  | | | | | | | | | | | |
 *  | | | | | | | | | | | return
 *  | | | | | | | | | | |
 *  | | | | | | | | | | return
 *  | | | | | | | | | |
 *  | | | | | | | | | return
 *  | | | | | | | | |
 *  | | | | | | | | return
 *  | | | | | | | |
 *  | | | | | | | return
 *  | | | | | | |
 *  | | | | | | return
 *  | | | | | |
 *  | | | | | return
 *  | | | | |
 *  | | | | return
 *  | | | |
 *  | | | return
 *  | | |
 *  | | return
 *  | |
 *  | after_immediate_handler(ev'6);
 *  | |
 *  | | # lazy no change_to_user()
 *  | |
 *  | return
 *
 *
 * Example 2: start with user_evg and let module1 switch to root
 *
 * SMBD: already impersonated user_evg
 *  evg'1 = smb2_req->user_evg
 *  r'1 = SMB_VFS_*_SEND(evg'1); # smb_vfs_call_*_send()
 *  |
 *  | smb_vfs_ev_glue_push_use(evg'1, r'1);
 *  | |
 *  | | # no-op run_ev == return_ev
 *  | |
 *  | evg'2 = evg'1->next_glue;
 *  | r'2 = module1_*_send(evg'2);
 *  | |
 *  | | evg'3 = smb_vfs_ev_glue_get_root_glue(evg'2)
 *  | | r'3 = SMB_VFS_*_NEXT_SEND(evg'3); # smb_vfs_call_*_send()
 *  | | |
 *  | | | smb_vfs_ev_glue_push_use(evg'3, r'3);
 *  | | | |
 *  | | | | tevent_req_defer_callback(r'3, evg'3->return_ev);
 *  | | | | tevent_context_push_use(evg'3->run_ev)
 *  | | | | |
 *  | | | | | become_root()
 *  | | | | |
 *  | | | |
 *  | | | evg'4 = evg'3->next_glue;
 *  | | | r'4 = module2_*_send(evg'4);
 *  | | | |
 *  | | | | evg'5 = smb_vfs_ev_glue_get_root_glue(evg'4)
 *  | | | | r'5 = SMB_VFS_*_NEXT_SEND(evg'5); # smb_vfs_call_*_send()
 *  | | | | |
 *  | | | | | smb_vfs_ev_glue_push_use(evg'5, r'5);
 *  | | | | | |
 *  | | | | | | # no-op run_ev == return_ev, already root
 *  | | | | | |
 *  | | | | | evg'6 = evg'5->next_glue;
 *  | | | | | r'6 = default_*_send(evg'6);
 *  | | | | | |
 *  | | | | | | ev'6 = smb_vfs_ev_glue_ev_ctx(evg'6)
 *  | | | | | | tp'6 = smb_vfs_ev_glue_tp_chdir_safe(evg'6)
 *  | | | | | | r'7 = pthreadpool_tevent_send(ev'6, tp'6);
 *  | | | | | | |
 *  | | | | | | | pthread_create...
 *  | | | | | | |
 *  | | | | | | tevent_req_set_callback(r'7, default_*_done, r'6);
 *  | | | | | |
 *  | | | | | smb_vfs_ev_glue_pop_use(evg'5);
 *  | | | | | |
 *  | | | | | | # no-op run_ev == return_ev, still stay as root
 *  | | | | | |
 *  | | | | | tevent_req_set_callback(r'6, smb_vfs_*_done, r'5);
 *  | | | | |
 *  | | | | tevent_req_set_callback(r'5, module2_*_done, r'4);
 *  | | | |
 *  | | | smb_vfs_ev_glue_pop_use(evg'3);
 *  | | | |
 *  | | | | tevent_context_pop_use(evg'3->run_ev)
 *  | | | | |
 *  | | | | | unbecome_root()
 *  | | | |
 *  | | | tevent_req_set_callback(r'4, smb_vfs_*_done, r'3);
 *  | | |
 *  | | tevent_req_set_callback(r'3, module1_*_done, r'2);
 *  | |
 *  | smb_vfs_ev_glue_pop_use(evg'1);
 *  | |
 *  | | # no-op run_ev == return_ev
 *  | |
 *  | tevent_req_set_callback(r'2, smb_vfs_*_done, r'1);
 *  |
 *  tevent_req_set_callback(r'1, smbd_*_done, smb2_req);
 *
 *  Worker thread finished, just one event handler processes
 *  everything as there's no impersonation change.
 *
 *  tevent_common_invoke_immediate_handler:
 *  |
 *  | before_immediate_handler(ev'6);
 *  | |
 *  | | become_root()
 *  | |
 *  | pthreadpool_tevent_job_done(r'7);
 *  | |
 *  | | default_*_done(r'7);
 *  | | |
 *  | | | pthreadpool_tevent_recv(r'7);
 *  | | | TALLOC_FREE(r'7);
 *  | | | tevent_req_done('r6);
 *  | | | |
 *  | | | | smb_vfs_*_done(r'6);
 *  | | | | |
 *  | | | | | default_*_recv(r'6);
 *  | | | | | TALLOC_FREE(r'6)
 *  | | | | | tevent_req_done(r'5);
 *  | | | | | |
 *  | | | | | | module2_*_done(r'5):
 *  | | | | | | |
 *  | | | | | | | SMB_VFS_*_recv(r'5);
 *  | | | | | | | TALLOC_FREE(r'5)
 *  | | | | | | | tevent_req_done(r'4);
 *  | | | | | | | |
 *  | | | | | | | | smb_vfs_*_done(r'4);
 *  | | | | | | | | |
 *  | | | | | | | | | module2_*_recv(r'4);
 *  | | | | | | | | | TALLOC_FREE(r'4)
 *  | | | | | | | | | tevent_req_done(r'3);
 *  | | | | | | | | | | return
 *  | | | | | | | | | |
 *  | | | | | | | | | return
 *  | | | | | | | | |
 *  | | | | | | | | return
 *  | | | | | | | |
 *  | | | | | | | return
 *  | | | | | | |
 *  | | | | | | return
 *  | | | | | |
 *  | | | | | return
 *  | | | | |
 *  | | | | return
 *  | | | |
 *  | | | return
 *  | | |
 *  | | return
 *  | |
 *  | |
 *  | after_immediate_handler(ev'6);
 *  | |
 *  | | unbecome_root()
 *  | |
 *  | return
 *  |
 *  tevent_common_invoke_immediate_handler:
 *  |
 *  | before_immediate_handler(ev'6);
 *  | |
 *  | | change_to_user()
 *  | |
 *  | tevent_req_trigger();
 *  | ...
 *  | _tevent_req_notify_callback(r'3)
 *  | |
 *  | | module1_*_done(r'3):
 *  | | |
 *  | | | SMB_VFS_*_recv(r'3);
 *  | | | TALLOC_FREE(r'3)
 *  | | | tevent_req_done(r'2);
 *  | | | |
 *  | | | | smb_vfs_*_done(r'2);
 *  | | | | |
 *  | | | | | module1_*_recv(r'2);
 *  | | | | | TALLOC_FREE(r'2)
 *  | | | | | tevent_req_done(r'1);
 *  | | | | | |
 *  | | | | | | smbd_*_done(r'1);
 *  | | | | | | |
 *  | | | | | | | SMB_VFS_*_recv(r'1);
 *  | | | | | | | TALLOC_FREE(r'1)
 *  | | | | | | | smbd_response_to_client()
 *  | | | | | | | return
 *  | | | | | | |
 *  | | | | | | return
 *  | | | | | |
 *  | | | | | return
 *  | | | | |
 *  | | | | return
 *  | | | |
 *  | | | return
 *  | | |
 *  | | return
 *  | |
 *  | after_immediate_handler(ev'6);
 *  | |
 *  | | # lazy no change_to_user()
 *  | |
 *  | return
 *
 */
struct smb_vfs_ev_glue {
	/*
	 * The event context that should be used
	 * to report the result back.
	 *
	 * The is basically the callers context.
	 */
	struct tevent_context *return_ev;

	/*
	 * The event context and threadpool wrappers
	 * the current context should use.
	 *
	 * tp_fd_safe only allows fd based functions
	 * which don't require impersonation, this
	 * is basically the raw threadpool.
	 *
	 * tp_path_safe allows path based functions
	 * to be called under the correct impersonation.
	 * But chdir/fchdir is not allowed!
	 * Typically calls like openat() or other *at()
	 * syscalls.
	 *
	 * tp_chdir_safe is like path_safe, but also
	 * allows chdir/fchdir to be called, the job
	 * can safely return with a changed directory,
	 * the threadpool wrapper takes care of
	 * a cleanup if required.
	 * This is needed if *at() syscalls need
	 * to be simulated by fchdir();$syscall(),
	 * e.g. getxattr().
	 *
	 * The distinction between these threadpool
	 * is required because of OS limitations
	 * (as of 2018):
	 * - only Linux supports per thread
	 *   credentials (seteuid....)
	 * - only Linux supports a per thread
	 *   current working directory,
	 *   using unshare(CLONE_FS). But
	 *   in some constrained container
	 *   environments even that is not available
	 *   on Linux.
	 *
	 * tp_fd_safe is typically the raw threadpool
	 * without a wrapper.
	 *
	 * On Linux tp_path_safe and tp_chdir_safe
	 * are typically the same (if unshare(CLONE_FS) is available)
	 * they're implemented as wrappers of the raw threadpool.
	 *
	 * On other OSes tp_path_safe is a wrapper
	 * arround a sync threadpool (without real threads, just blocking
	 * the main thread), but hidden behind the pthreadpool_tevent
	 * api in order to make the restriction transparent.
	 *
	 * On other OSes tp_chdir_safe is a wrapper
	 * arround a sync threadpool (without real threads, just blocking
	 * the main thread), but hidden behind the pthreadpool_tevent
	 * api in order to make the restriction transparent.
	 * It just remembers/restores the current working directory,
	 * typically using open(".", O_RDONLY | O_DIRECTORY) and fchdir().
	 */
	struct tevent_context *run_ev;
	struct pthreadpool_tevent *run_tp_fd_safe;
	struct pthreadpool_tevent *run_tp_path_safe;
	struct pthreadpool_tevent *run_tp_chdir_safe;

	/*
	 * The glue that should be passed down
	 * to sub request in the stack.
	 *
	 * Typically this points to itself.
	 *
	 * But smb_vfs_ev_glue_create_switch() allows
	 * to create context that can switch
	 * between two user glues.
	 */
	const struct smb_vfs_ev_glue *next_glue;

	/*
	 * If some code path wants to run
	 * some constraint code as root,
	 * basically an async version of become_root()
	 * and unbecome_root().
	 *
	 * The caller can call smb_vfs_ev_glue_get_root_glue()
	 * to get a root glue that can be passed
	 * to the SMB_VFS_*_SEND() function that
	 * should run as root.
	 *
	 * Note that the callback (registered with
	 * tevent_req_set_callback()) won't run as
	 * root anymore!
	 */
	const struct smb_vfs_ev_glue *root_glue;
};

I may add a few more comments tomorrow, but this should be
enough to get the basic design.

Please review :-)

metze
-------------- next part --------------
From 326c96adb03012433cf63ae66a5194be90d5c02a Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Fri, 13 Jul 2018 17:17:50 +0200
Subject: [PATCH 1/5] smbd: rename sconn->pool to sconn->raw_thread_pool

This should in future not be used directly, we'll provide
wrapper pools, which will provide impersonation for
path based async calls.

Signed-off-by: Ralph Boehme <slow at samba.org>
Reviewed-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_aio_pthread.c | 2 +-
 source3/modules/vfs_default.c     | 7 ++++---
 source3/smbd/globals.h            | 2 +-
 source3/smbd/process.c            | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/source3/modules/vfs_aio_pthread.c b/source3/modules/vfs_aio_pthread.c
index c1d1a7d518a6..da1ca534907f 100644
--- a/source3/modules/vfs_aio_pthread.c
+++ b/source3/modules/vfs_aio_pthread.c
@@ -277,7 +277,7 @@ static int open_async(const files_struct *fsp,
 
 	subreq = pthreadpool_tevent_job_send(opd,
 					     fsp->conn->user_ev_ctx,
-					     fsp->conn->sconn->pool,
+					     fsp->conn->sconn->raw_thread_pool,
 					     aio_open_worker, opd);
 	if (subreq == NULL) {
 		return -1;
diff --git a/source3/modules/vfs_default.c b/source3/modules/vfs_default.c
index f078cef9422a..72dbd7567359 100644
--- a/source3/modules/vfs_default.c
+++ b/source3/modules/vfs_default.c
@@ -686,7 +686,7 @@ static struct tevent_req *vfswrap_pread_send(struct vfs_handle_struct *handle,
 	SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->profile_bytes);
 
 	subreq = pthreadpool_tevent_job_send(
-		state, ev, handle->conn->sconn->pool,
+		state, ev, handle->conn->sconn->raw_thread_pool,
 		vfs_pread_do, state);
 	if (tevent_req_nomem(subreq, req)) {
 		return tevent_req_post(req, ev);
@@ -804,7 +804,7 @@ static struct tevent_req *vfswrap_pwrite_send(struct vfs_handle_struct *handle,
 	SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->profile_bytes);
 
 	subreq = pthreadpool_tevent_job_send(
-		state, ev, handle->conn->sconn->pool,
+		state, ev, handle->conn->sconn->raw_thread_pool,
 		vfs_pwrite_do, state);
 	if (tevent_req_nomem(subreq, req)) {
 		return tevent_req_post(req, ev);
@@ -914,7 +914,8 @@ static struct tevent_req *vfswrap_fsync_send(struct vfs_handle_struct *handle,
 	SMBPROFILE_BYTES_ASYNC_SET_IDLE(state->profile_bytes);
 
 	subreq = pthreadpool_tevent_job_send(
-		state, ev, handle->conn->sconn->pool, vfs_fsync_do, state);
+		state, ev, handle->conn->sconn->raw_thread_pool,
+		vfs_fsync_do, state);
 	if (tevent_req_nomem(subreq, req)) {
 		return tevent_req_post(req, ev);
 	}
diff --git a/source3/smbd/globals.h b/source3/smbd/globals.h
index 77e8f5c0dd6b..e75ec5408f33 100644
--- a/source3/smbd/globals.h
+++ b/source3/smbd/globals.h
@@ -939,7 +939,7 @@ struct smbd_server_connection {
 		} locks;
 	} smb2;
 
-	struct pthreadpool_tevent *pool;
+	struct pthreadpool_tevent *raw_thread_pool;
 
 	struct smbXsrv_client *client;
 };
diff --git a/source3/smbd/process.c b/source3/smbd/process.c
index 0a4106257f58..8e1fceab0aaf 100644
--- a/source3/smbd/process.c
+++ b/source3/smbd/process.c
@@ -3950,7 +3950,7 @@ void smbd_process(struct tevent_context *ev_ctx,
 	sconn->msg_ctx = msg_ctx;
 
 	ret = pthreadpool_tevent_init(sconn, lp_aio_max_threads(),
-				      &sconn->pool);
+				      &sconn->raw_thread_pool);
 	if (ret != 0) {
 		exit_server("pthreadpool_tevent_init() failed.");
 	}
-- 
2.17.1


From fd43d535115caf0ef7e80cc583ccc7871997c2a0 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze at samba.org>
Date: Tue, 24 Jul 2018 10:56:34 +0200
Subject: [PATCH 2/5] smbd: introduce sconn->sync_thread_pool

This just simulates a threadpool, but executes the
job functions inline (blocking) in the main thread.

This will be used to work arround some OS limitations,
e.g. if per thread credentials or per thread working directory
are not supported.

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/smbd/globals.h |  1 +
 source3/smbd/msdfs.c   | 12 ++++++++++++
 source3/smbd/process.c | 16 +++++++++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/source3/smbd/globals.h b/source3/smbd/globals.h
index e75ec5408f33..19a130e64f64 100644
--- a/source3/smbd/globals.h
+++ b/source3/smbd/globals.h
@@ -939,6 +939,7 @@ struct smbd_server_connection {
 		} locks;
 	} smb2;
 
+	struct pthreadpool_tevent *sync_thread_pool;
 	struct pthreadpool_tevent *raw_thread_pool;
 
 	struct smbXsrv_client *client;
diff --git a/source3/smbd/msdfs.c b/source3/smbd/msdfs.c
index c74de7acf880..bf9b3abee4a7 100644
--- a/source3/smbd/msdfs.c
+++ b/source3/smbd/msdfs.c
@@ -32,6 +32,7 @@
 #include "libcli/security/security.h"
 #include "librpc/gen_ndr/ndr_dfsblobs.h"
 #include "lib/tsocket/tsocket.h"
+#include "lib/pthreadpool/pthreadpool_tevent.h"
 
 /**********************************************************************
  Parse a DFS pathname of the form \hostname\service\reqpath
@@ -251,6 +252,7 @@ static NTSTATUS create_conn_struct_as_root(TALLOC_CTX *ctx,
 	const char *vfs_user;
 	struct smbd_server_connection *sconn;
 	const char *servicename = lp_const_servicename(snum);
+	int ret;
 
 	sconn = talloc_zero(ctx, struct smbd_server_connection);
 	if (sconn == NULL) {
@@ -274,6 +276,16 @@ static NTSTATUS create_conn_struct_as_root(TALLOC_CTX *ctx,
 		return NT_STATUS_NO_MEMORY;
 	}
 
+	/*
+	 * We only provide sync threadpools.
+	 */
+	ret = pthreadpool_tevent_init(sconn, 0, &sconn->sync_thread_pool);
+	if (ret != 0) {
+		TALLOC_FREE(sconn);
+		return NT_STATUS_NO_MEMORY;
+	}
+	sconn->raw_thread_pool = sconn->sync_thread_pool;
+
 	sconn->msg_ctx = msg;
 
 	conn = conn_new(sconn);
diff --git a/source3/smbd/process.c b/source3/smbd/process.c
index 8e1fceab0aaf..35b5f4df385d 100644
--- a/source3/smbd/process.c
+++ b/source3/smbd/process.c
@@ -3907,6 +3907,7 @@ void smbd_process(struct tevent_context *ev_ctx,
 	const char *locaddr = NULL;
 	const char *remaddr = NULL;
 	int ret;
+	size_t max_threads;
 	NTSTATUS status;
 	struct timeval tv = timeval_current();
 	NTTIME now = timeval_to_nttime(&tv);
@@ -3952,7 +3953,20 @@ void smbd_process(struct tevent_context *ev_ctx,
 	ret = pthreadpool_tevent_init(sconn, lp_aio_max_threads(),
 				      &sconn->raw_thread_pool);
 	if (ret != 0) {
-		exit_server("pthreadpool_tevent_init() failed.");
+		exit_server("pthreadpool_tevent_init(raw) failed.");
+	}
+
+	max_threads = pthreadpool_tevent_max_threads(sconn->raw_thread_pool);
+	if (max_threads == 0) {
+		/*
+		 * We only have a sync pool, no need to create a 2nd one.
+		 */
+		sconn->sync_thread_pool = sconn->raw_thread_pool;
+	} else {
+		ret = pthreadpool_tevent_init(sconn, 0, &sconn->sync_thread_pool);
+		if (ret != 0) {
+			exit_server("pthreadpool_tevent_init(sync) failed.");
+		}
 	}
 
 	if (lp_server_max_protocol() >= PROTOCOL_SMB2_02) {
-- 
2.17.1


From 3e3a7236052f2b4b6e6c58404bc2e21a41e80770 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Sun, 8 Jul 2018 16:28:02 +0200
Subject: [PATCH 3/5] s3: vfs: add smb_vfs_ev_glue

This adds VFS helper functions and that work on a struct smb_vfs_ev_glue
object which bundles two event contexts and a few threadpools.

This will be used to streamline the use of impersonating wrappers
in the SMB_VFS.

Notice the verbose comments in source3/smbd/vfs.c.

This will allow us to introduce path based async operations
to the SMB_VFS layer.

Pair-Programmed-With: Stefan Metzmacher <metze at samba.org>

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/include/vfs.h |  29 ++
 source3/smbd/vfs.c    | 823 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 852 insertions(+)

diff --git a/source3/include/vfs.h b/source3/include/vfs.h
index 90d39acd3fc9..c663e2a22a79 100644
--- a/source3/include/vfs.h
+++ b/source3/include/vfs.h
@@ -257,6 +257,7 @@
 /* Version 39 - Remove struct dfree_cached_info pointer from
 		connection struct */
 /* Bump to version 40, Samba 4.10 will ship with that */
+/* Version 40 - Introduce smb_vfs_ev_glue infrastructure. */
 
 #define SMB_VFS_INTERFACE_VERSION 40
 
@@ -279,6 +280,9 @@ struct smb_file_time;
 struct blocking_lock_record;
 struct smb_filename;
 struct dfs_GetDFSReferral;
+struct tevent_context;
+struct pthreadpool_tevent;
+struct smb_vfs_ev_glue;
 
 typedef union unid_t {
 	uid_t uid;
@@ -1500,4 +1504,29 @@ void *vfs_fetch_fsp_extension(vfs_handle_struct *handle, files_struct *fsp);
 void smb_vfs_assert_all_fns(const struct vfs_fn_pointers* fns,
 			    const char *module);
 
+/*
+ * Notice the "Design of the smb_vfs_ev_glue infrastructure"
+ * comment in source3/smbd/vfs.c!
+ *
+ * This explains smb_vfs_ev_glue infrastructure in detail.
+ */
+struct tevent_context *smb_vfs_ev_glue_ev_ctx(const struct smb_vfs_ev_glue *evg);
+struct pthreadpool_tevent *smb_vfs_ev_glue_tp_fd_safe(const struct smb_vfs_ev_glue *evg);
+struct pthreadpool_tevent *smb_vfs_ev_glue_tp_path_safe(const struct smb_vfs_ev_glue *evg);
+struct pthreadpool_tevent *smb_vfs_ev_glue_tp_chdir_safe(const struct smb_vfs_ev_glue *evg);
+const struct smb_vfs_ev_glue *smb_vfs_ev_glue_get_root_glue(const struct smb_vfs_ev_glue *evg);
+struct smb_vfs_ev_glue *smb_vfs_ev_glue_create(TALLOC_CTX *mem_ctx,
+				struct tevent_context *user_ev,
+				struct pthreadpool_tevent *user_tp_fd_safe,
+				struct pthreadpool_tevent *user_tp_path_safe,
+				struct pthreadpool_tevent *user_tp_chdir_safe,
+				struct tevent_context *root_ev,
+				struct pthreadpool_tevent *root_tp_fd_safe,
+				struct pthreadpool_tevent *root_tp_path_safe,
+				struct pthreadpool_tevent *root_tp_chdir_safe);
+struct smb_vfs_ev_glue *smb_vfs_ev_glue_create_switch(
+			TALLOC_CTX *mem_ctx,
+			const struct smb_vfs_ev_glue *run_evg,
+			const struct smb_vfs_ev_glue *return_evg);
+
 #endif /* _VFS_H */
diff --git a/source3/smbd/vfs.c b/source3/smbd/vfs.c
index a6f428020655..37249ab511d8 100644
--- a/source3/smbd/vfs.c
+++ b/source3/smbd/vfs.c
@@ -1455,6 +1455,829 @@ struct file_id vfs_file_id_from_sbuf(connection_struct *conn, const SMB_STRUCT_S
 	return SMB_VFS_FILE_ID_CREATE(conn, sbuf);
 }
 
+/*
+ * Design of the smb_vfs_ev_glue infrastructure:
+ *
+ * smb_vfs_ev_glue makes it possible to pass
+ * down an tevent_context and pthreadpool_tevent
+ * used for impersonation through the SMB_VFS stack.
+ *
+ * tevent_req based function take an tevent_context as
+ * there 2nd argument, e.g.:
+ *
+ *   struct tevent_req *something_send(TALLOC_CTX *mem_ctx,
+ *                                     struct tevent_context *ev,
+ *                                     ...);
+ *
+ * For the SMB_VFS stack we'll use the following:
+ *
+ *   struct tevent_req *SMB_VFS_SOMETHING_SEND(TALLOC_CTX *mem_ctx,
+ *                                             struct smb_vfs_ev_glue *evg,
+ *                                             ...);
+ *
+ * Typically the 'evg' is just passed through the stack down
+ * to vfs_default.c. In order to do real work an
+ * tevent_context and pthreadpool_tevent are required
+ * to do call a 'somthing()' syscall in an async fashion.
+ * Therefore it will the following to get the pointer
+ * back out of evg:
+ *
+ *   ev = smb_vfs_ev_glue_ev_ctx(evg);
+ *   tp = smb_vfs_ev_glue_tp_chdir_safe(evg);
+ *
+ * If some function in the stack is sure it needs to run as root
+ * to get some information (after careful checks!), it used
+ * to frame that work into become_root()/unbecome_root().
+ * This can't work when using async functions!
+ * Now it's possible to use something like this (simplified!):
+ *
+ *   ev = smb_vfs_ev_glue_ev_ctx(evg);
+ *   root_evg = smb_vfs_ev_glue_get_root_glue(evg);
+ *   subreq = SMB_VFS_SOMETHING_NEXT_SEND(state, root_evg, ...);
+ *   if (tevent_req_nomem(subreq, req)) {
+ *        return tevent_req_post(req, ev);
+ *   }
+ *   tevent_req_set_callback(subreq, module_something_done, req);
+ *
+ *   return req;
+ *
+ *   static void module_something_done(struct tevent_req *subreq)
+ *   {
+ *      ...
+ *
+ *      status = SMB_VFS_SOMETHING_NEXT_RECV(subreq, &state->aio_state);
+ *      TALLOC_FREE(subreq);
+ *
+ *      tevent_req_done(req);
+ *   }
+ *
+ * In the code above the something_send_fn() function of the next
+ * module in the stack will be called as root.
+ * The smb_vfs_call_something_*() glue code, which is the magic
+ * behind the SMB_VFS_SOMETHING[_NEXT]_{SEND,RECV}() macros,
+ * will look like this:
+ *
+ *   struct smb_vfs_call_something_state {
+ *       ssize_t (*recv_fn)(struct tevent_req *req,
+ *                          struct vfs_aio_state *aio_state,
+ *                          ...);
+ *       ssize_t retval;
+ *       struct vfs_aio_state vfs_aio_state;
+ *       ...
+ *   };
+ *
+ *   static void smb_vfs_call_something_done(struct tevent_req *subreq);
+ *
+ *   struct tevent_req *smb_vfs_call_something_send(
+ *                  TALLOC_CTX *mem_ctx,
+ *                  const struct smb_vfs_ev_glue *evg,
+ *                  struct vfs_handle_struct *handle,
+ *                  ...)
+ *   {
+ *       struct tevent_req *req = NULL;
+ *       struct smb_vfs_call_something_state *state = NULL;
+ *       struct tevent_req *subreq = NULL;
+ *       bool ok;
+ *
+ *       req = tevent_req_create(mem_ctx, &state,
+ *                               struct smb_vfs_call_something_state);
+ *       if (req == NULL) {
+ *           return NULL;
+ *       }
+ *
+ *       VFS_FIND(something_send);
+ *       state->recv_fn = handle->fns->something_recv_fn;
+ *
+ *       ok = smb_vfs_ev_glue_push_use(evg, req);
+ *       if (!ok) {
+ *           tevent_req_error(req, EIO);
+ *           return tevent_req_post(req, evg->return_ev);
+ *       }
+ *
+ *       subreq = handle->fns->something_send_fn(mem_ctx,
+ *                                               evg->next_glue,
+ *                                               handle,
+ *                                               ...);
+ *       smb_vfs_ev_glue_pop_use(evg);
+ *
+ *       if (tevent_req_nomem(subreq, req)) {
+ *           return tevent_req_post(req, evg->return_ev);
+ *       }
+ *       tevent_req_set_callback(subreq, smb_vfs_call_something_done, req);
+ *
+ *       return req;
+ *   }
+ *
+ *   static void smb_vfs_call_something_done(struct tevent_req *subreq)
+ *   {
+ *       struct tevent_req *req =
+ *           tevent_req_callback_data(subreq,
+ *           struct tevent_req);
+ *       struct smb_vfs_call_something_state *state =
+ *           tevent_req_data(req,
+ *           struct smb_vfs_call_something_state);
+ *
+ *       state->retval = state->recv_fn(subreq,
+ *                                      &state->vfs_aio_state,
+ *                                      ....);
+ *       TALLOC_FREE(subreq);
+ *
+ *       if (state->retval == -1) {
+ *           tevent_req_error(req, state->vfs_aio_state.error);
+ *           return;
+ *       }
+ *       tevent_req_done(req);
+ *   }
+ *
+ *   ssize_t smb_vfs_call_something_recv(struct tevent_req *req,
+ *                                        struct vfs_aio_state *aio_state,
+ *                                        ....)
+ *   {
+ *       struct smb_vfs_call_something_state *state =
+ *           tevent_req_data(req,
+ *           struct smb_vfs_call_something_state);
+ *       ssize_t retval = state->retval;
+ *
+ *       if (tevent_req_is_unix_error(req, &aio_state->error)) {
+ *           tevent_req_received(req);
+ *           return -1;
+ *       }
+ *
+ *       *aio_state = state->vfs_aio_state;
+ *       ...
+ *
+ *       tevent_req_received(req);
+ *       return retval;
+ *   }
+ *
+ * The most important details are these:
+ *
+ * 1. smb_vfs_ev_glue_push_use(evg, req):
+ *    - is a no-op if evg->run_ev and evg->return_ev are the same,
+ *      it means that we're already at the correct impersonation
+ *      and don't need any additional work to be done.
+ *    - Otherwise it will call tevent_req_defer_callback(req, evg->return_ev)
+ *      This means that tevent_req_error() and tevent_req_done()
+ *      will just trigger an immediate event on evg->return_ev.
+ *      Therefore the callers callback function will be called
+ *      in the impersonation of evg->return_ev! This is important
+ *      in order to get the impersonation correct on the way back
+ *      through the stack.
+ *    - It will call tevent_context_push_use(evg->run_ev),
+ *      which will start the impersonation to run_ev.
+ *      So the following code run in the correct context.
+ * 2. handle->fns->something_send_fn(..., evg->next_glue, ...):
+ *    - We're passing evg->next_glue to the next module.
+ *    - Typically evg->next_glue points to evg again,
+ *      but that is not the case if smb_vfs_ev_glue_create_switch()
+ *      is used.
+ *    - In case evg->run_ev and evg->return_ev are not the same,
+ *      next_glue will have run_ev and return_ev pointing to evg->run_ev.
+ *      So that the switch from evg->run_ev to evg->return_ev
+ *      happens on the correct boundary.
+ * 3. smb_vfs_ev_glue_pop_use(evg):
+ *    - is a no-op if evg->run_ev and evg->return_ev are the same,
+ *      it means that we're already at the correct impersonation
+ *      and don't need any additional work to be done.
+ *    - It will call tevent_context_pop_use(evg->run_ev),
+ *      which will revert the impersonation done in
+ *      smb_vfs_ev_glue_push_use().
+ * 4. smb_vfs_call_something_send():
+ *    - The is called in the environment of evg->return_ev.
+ *    - So it needs to use tevent_req_post(req, evg->return_ev)
+ * 5. smb_vfs_call_something_done():
+ *    - The is called in the environment of evg->run_ev
+ * 6. smb_vfs_call_something_recv():
+ *    - The is called in the environment of evg->return_ev again.
+ *
+ *
+ * Here are some more complex examples:
+ *
+ * Example 1: only user_evg without switch to root
+ *
+ * SMBD: already impersonated user_evg
+ *  evg'1 = smb2_req->user_evg
+ *  r'1 = SMB_VFS_*_SEND(evg'1); # smb_vfs_call_*_send()
+ *  |
+ *  | smb_vfs_ev_glue_push_use(evg'1, r'1);
+ *  | |
+ *  | | # no-op run_ev == return_ev
+ *  | |
+ *  | evg'2 = evg'1->next_glue;
+ *  | r'2 = module1_*_send(evg'2);
+ *  | |
+ *  | | evg'3 = evg'2
+ *  | | r'3 = SMB_VFS_*_NEXT_SEND(evg'3); # smb_vfs_call_*_send()
+ *  | | |
+ *  | | | smb_vfs_ev_glue_push_use(evg'3, r'3);
+ *  | | | |
+ *  | | | | # no-op run_ev == return_ev
+ *  | | | |
+ *  | | | evg'4 = evg'3->next_glue;
+ *  | | | r'4 = module2_*_send(evg'4);
+ *  | | | |
+ *  | | | | evg'5 = evg'4
+ *  | | | | r'5 = SMB_VFS_*_NEXT_SEND(evg'5); # smb_vfs_call_*_send()
+ *  | | | | |
+ *  | | | | | smb_vfs_ev_glue_push_use(evg'5, r'5);
+ *  | | | | | |
+ *  | | | | | | # no-op run_ev == return_ev
+ *  | | | | | |
+ *  | | | | | evg'6 = evg'5->next_glue;
+ *  | | | | | r'6 = default_*_send(evg'6);
+ *  | | | | | |
+ *  | | | | | | ev'6 = smb_vfs_ev_glue_ev_ctx(evg'6)
+ *  | | | | | | tp'6 = smb_vfs_ev_glue_tp_chdir_safe(evg'6)
+ *  | | | | | | r'7 = pthreadpool_tevent_send(ev'6, tp'6);
+ *  | | | | | | |
+ *  | | | | | | | pthread_create...
+ *  | | | | | | |
+ *  | | | | | | tevent_req_set_callback(r'7, default_*_done, r'6);
+ *  | | | | | |
+ *  | | | | | smb_vfs_ev_glue_pop_use(evg'5);
+ *  | | | | | |
+ *  | | | | | | # no-op run_ev == return_ev
+ *  | | | | | |
+ *  | | | | | tevent_req_set_callback(r'6, smb_vfs_call_*_done, r'5);
+ *  | | | | |
+ *  | | | | tevent_req_set_callback(r'5, module2_*_done, r'4);
+ *  | | | |
+ *  | | | smb_vfs_ev_glue_pop_use(evg'3);
+ *  | | | |
+ *  | | | | # no-op run_ev == return_ev
+ *  | | | |
+ *  | | | tevent_req_set_callback(r'4, smb_vfs_call_*_done, r'3);
+ *  | | |
+ *  | | tevent_req_set_callback(r'3, module1_*_done, r'2);
+ *  | |
+ *  | smb_vfs_ev_glue_pop_use(evg'1);
+ *  | |
+ *  | | # no-op run_ev == return_ev
+ *  | |
+ *  | tevent_req_set_callback(r'2, smb_vfs_call_*_done, r'1);
+ *  |
+ *  tevent_req_set_callback(r'1, smbd_*_done, smb2_req);
+ *
+ *  Worker thread finished, just one event handler processes
+ *  everything as there's no impersonation change.
+ *
+ *  tevent_common_invoke_immediate_handler:
+ *  |
+ *  | before_immediate_handler(ev'6);
+ *  | |
+ *  | | change_to_user()
+ *  | |
+ *  | pthreadpool_tevent_job_done(r'7);
+ *  | |
+ *  | | default_*_done(r'7);
+ *  | | |
+ *  | | | pthreadpool_tevent_recv(r'7);
+ *  | | | TALLOC_FREE(r'7);
+ *  | | | tevent_req_done('r6);
+ *  | | | |
+ *  | | | | smb_vfs_call_*_done(r'6);
+ *  | | | | |
+ *  | | | | | default_*_recv(r'6);
+ *  | | | | | TALLOC_FREE(r'6)
+ *  | | | | | tevent_req_done(r'5);
+ *  | | | | | |
+ *  | | | | | | module2_*_done(r'5):
+ *  | | | | | | |
+ *  | | | | | | | SMB_VFS_*_recv(r'5); # smb_vfs_call_*_recv()
+ *  | | | | | | | TALLOC_FREE(r'5)
+ *  | | | | | | | tevent_req_done(r'4);
+ *  | | | | | | | |
+ *  | | | | | | | | smb_vfs_call_*_done(r'4);
+ *  | | | | | | | | |
+ *  | | | | | | | | | module2_*_recv(r'4);
+ *  | | | | | | | | | TALLOC_FREE(r'4)
+ *  | | | | | | | | | tevent_req_done(r'3);
+ *  | | | | | | | | | |
+ *  | | | | | | | | | | module1_*_done(r'3):
+ *  | | | | | | | | | | |
+ *  | | | | | | | | | | | SMB_VFS_*_recv(r'3); # smb_vfs_call_*_recv()
+ *  | | | | | | | | | | | TALLOC_FREE(r'3)
+ *  | | | | | | | | | | | tevent_req_done(r'2);
+ *  | | | | | | | | | | | |
+ *  | | | | | | | | | | | | smb_vfs_*_done(r'2);
+ *  | | | | | | | | | | | | |
+ *  | | | | | | | | | | | | | module1_*_recv(r'2);
+ *  | | | | | | | | | | | | | TALLOC_FREE(r'2)
+ *  | | | | | | | | | | | | | tevent_req_done(r'1);
+ *  | | | | | | | | | | | | | |
+ *  | | | | | | | | | | | | | | smbd_*_done(r'1);
+ *  | | | | | | | | | | | | | | |
+ *  | | | | | | | | | | | | | | | SMB_VFS_*_recv(r'1); # smb_vfs_call_*_recv()
+ *  | | | | | | | | | | | | | | | TALLOC_FREE(r'1)
+ *  | | | | | | | | | | | | | | | smbd_response_to_client()
+ *  | | | | | | | | | | | | | | | return
+ *  | | | | | | | | | | | | | | |
+ *  | | | | | | | | | | | | | | return
+ *  | | | | | | | | | | | | | |
+ *  | | | | | | | | | | | | | return
+ *  | | | | | | | | | | | | |
+ *  | | | | | | | | | | | | return
+ *  | | | | | | | | | | | |
+ *  | | | | | | | | | | | return
+ *  | | | | | | | | | | |
+ *  | | | | | | | | | | return
+ *  | | | | | | | | | |
+ *  | | | | | | | | | return
+ *  | | | | | | | | |
+ *  | | | | | | | | return
+ *  | | | | | | | |
+ *  | | | | | | | return
+ *  | | | | | | |
+ *  | | | | | | return
+ *  | | | | | |
+ *  | | | | | return
+ *  | | | | |
+ *  | | | | return
+ *  | | | |
+ *  | | | return
+ *  | | |
+ *  | | return
+ *  | |
+ *  | after_immediate_handler(ev'6);
+ *  | |
+ *  | | # lazy no change_to_user()
+ *  | |
+ *  | return
+ *
+ *
+ * Example 2: start with user_evg and let module1 switch to root
+ *
+ * SMBD: already impersonated user_evg
+ *  evg'1 = smb2_req->user_evg
+ *  r'1 = SMB_VFS_*_SEND(evg'1); # smb_vfs_call_*_send()
+ *  |
+ *  | smb_vfs_ev_glue_push_use(evg'1, r'1);
+ *  | |
+ *  | | # no-op run_ev == return_ev
+ *  | |
+ *  | evg'2 = evg'1->next_glue;
+ *  | r'2 = module1_*_send(evg'2);
+ *  | |
+ *  | | evg'3 = smb_vfs_ev_glue_get_root_glue(evg'2)
+ *  | | r'3 = SMB_VFS_*_NEXT_SEND(evg'3); # smb_vfs_call_*_send()
+ *  | | |
+ *  | | | smb_vfs_ev_glue_push_use(evg'3, r'3);
+ *  | | | |
+ *  | | | | tevent_req_defer_callback(r'3, evg'3->return_ev);
+ *  | | | | tevent_context_push_use(evg'3->run_ev)
+ *  | | | | |
+ *  | | | | | become_root()
+ *  | | | | |
+ *  | | | |
+ *  | | | evg'4 = evg'3->next_glue;
+ *  | | | r'4 = module2_*_send(evg'4);
+ *  | | | |
+ *  | | | | evg'5 = smb_vfs_ev_glue_get_root_glue(evg'4)
+ *  | | | | r'5 = SMB_VFS_*_NEXT_SEND(evg'5); # smb_vfs_call_*_send()
+ *  | | | | |
+ *  | | | | | smb_vfs_ev_glue_push_use(evg'5, r'5);
+ *  | | | | | |
+ *  | | | | | | # no-op run_ev == return_ev, already root
+ *  | | | | | |
+ *  | | | | | evg'6 = evg'5->next_glue;
+ *  | | | | | r'6 = default_*_send(evg'6);
+ *  | | | | | |
+ *  | | | | | | ev'6 = smb_vfs_ev_glue_ev_ctx(evg'6)
+ *  | | | | | | tp'6 = smb_vfs_ev_glue_tp_chdir_safe(evg'6)
+ *  | | | | | | r'7 = pthreadpool_tevent_send(ev'6, tp'6);
+ *  | | | | | | |
+ *  | | | | | | | pthread_create...
+ *  | | | | | | |
+ *  | | | | | | tevent_req_set_callback(r'7, default_*_done, r'6);
+ *  | | | | | |
+ *  | | | | | smb_vfs_ev_glue_pop_use(evg'5);
+ *  | | | | | |
+ *  | | | | | | # no-op run_ev == return_ev, still stay as root
+ *  | | | | | |
+ *  | | | | | tevent_req_set_callback(r'6, smb_vfs_*_done, r'5);
+ *  | | | | |
+ *  | | | | tevent_req_set_callback(r'5, module2_*_done, r'4);
+ *  | | | |
+ *  | | | smb_vfs_ev_glue_pop_use(evg'3);
+ *  | | | |
+ *  | | | | tevent_context_pop_use(evg'3->run_ev)
+ *  | | | | |
+ *  | | | | | unbecome_root()
+ *  | | | |
+ *  | | | tevent_req_set_callback(r'4, smb_vfs_*_done, r'3);
+ *  | | |
+ *  | | tevent_req_set_callback(r'3, module1_*_done, r'2);
+ *  | |
+ *  | smb_vfs_ev_glue_pop_use(evg'1);
+ *  | |
+ *  | | # no-op run_ev == return_ev
+ *  | |
+ *  | tevent_req_set_callback(r'2, smb_vfs_*_done, r'1);
+ *  |
+ *  tevent_req_set_callback(r'1, smbd_*_done, smb2_req);
+ *
+ *  Worker thread finished, just one event handler processes
+ *  everything as there's no impersonation change.
+ *
+ *  tevent_common_invoke_immediate_handler:
+ *  |
+ *  | before_immediate_handler(ev'6);
+ *  | |
+ *  | | become_root()
+ *  | |
+ *  | pthreadpool_tevent_job_done(r'7);
+ *  | |
+ *  | | default_*_done(r'7);
+ *  | | |
+ *  | | | pthreadpool_tevent_recv(r'7);
+ *  | | | TALLOC_FREE(r'7);
+ *  | | | tevent_req_done('r6);
+ *  | | | |
+ *  | | | | smb_vfs_*_done(r'6);
+ *  | | | | |
+ *  | | | | | default_*_recv(r'6);
+ *  | | | | | TALLOC_FREE(r'6)
+ *  | | | | | tevent_req_done(r'5);
+ *  | | | | | |
+ *  | | | | | | module2_*_done(r'5):
+ *  | | | | | | |
+ *  | | | | | | | SMB_VFS_*_recv(r'5);
+ *  | | | | | | | TALLOC_FREE(r'5)
+ *  | | | | | | | tevent_req_done(r'4);
+ *  | | | | | | | |
+ *  | | | | | | | | smb_vfs_*_done(r'4);
+ *  | | | | | | | | |
+ *  | | | | | | | | | module2_*_recv(r'4);
+ *  | | | | | | | | | TALLOC_FREE(r'4)
+ *  | | | | | | | | | tevent_req_done(r'3);
+ *  | | | | | | | | | | return
+ *  | | | | | | | | | |
+ *  | | | | | | | | | return
+ *  | | | | | | | | |
+ *  | | | | | | | | return
+ *  | | | | | | | |
+ *  | | | | | | | return
+ *  | | | | | | |
+ *  | | | | | | return
+ *  | | | | | |
+ *  | | | | | return
+ *  | | | | |
+ *  | | | | return
+ *  | | | |
+ *  | | | return
+ *  | | |
+ *  | | return
+ *  | |
+ *  | |
+ *  | after_immediate_handler(ev'6);
+ *  | |
+ *  | | unbecome_root()
+ *  | |
+ *  | return
+ *  |
+ *  tevent_common_invoke_immediate_handler:
+ *  |
+ *  | before_immediate_handler(ev'6);
+ *  | |
+ *  | | change_to_user()
+ *  | |
+ *  | tevent_req_trigger();
+ *  | ...
+ *  | _tevent_req_notify_callback(r'3)
+ *  | |
+ *  | | module1_*_done(r'3):
+ *  | | |
+ *  | | | SMB_VFS_*_recv(r'3);
+ *  | | | TALLOC_FREE(r'3)
+ *  | | | tevent_req_done(r'2);
+ *  | | | |
+ *  | | | | smb_vfs_*_done(r'2);
+ *  | | | | |
+ *  | | | | | module1_*_recv(r'2);
+ *  | | | | | TALLOC_FREE(r'2)
+ *  | | | | | tevent_req_done(r'1);
+ *  | | | | | |
+ *  | | | | | | smbd_*_done(r'1);
+ *  | | | | | | |
+ *  | | | | | | | SMB_VFS_*_recv(r'1);
+ *  | | | | | | | TALLOC_FREE(r'1)
+ *  | | | | | | | smbd_response_to_client()
+ *  | | | | | | | return
+ *  | | | | | | |
+ *  | | | | | | return
+ *  | | | | | |
+ *  | | | | | return
+ *  | | | | |
+ *  | | | | return
+ *  | | | |
+ *  | | | return
+ *  | | |
+ *  | | return
+ *  | |
+ *  | after_immediate_handler(ev'6);
+ *  | |
+ *  | | # lazy no change_to_user()
+ *  | |
+ *  | return
+ *
+ */
+struct smb_vfs_ev_glue {
+	/*
+	 * The event context that should be used
+	 * to report the result back.
+	 *
+	 * The is basically the callers context.
+	 */
+	struct tevent_context *return_ev;
+
+	/*
+	 * The event context and threadpool wrappers
+	 * the current context should use.
+	 *
+	 * tp_fd_safe only allows fd based functions
+	 * which don't require impersonation, this
+	 * is basically the raw threadpool.
+	 *
+	 * tp_path_safe allows path based functions
+	 * to be called under the correct impersonation.
+	 * But chdir/fchdir is not allowed!
+	 * Typically calls like openat() or other *at()
+	 * syscalls.
+	 *
+	 * tp_chdir_safe is like path_safe, but also
+	 * allows chdir/fchdir to be called, the job
+	 * can safely return with a changed directory,
+	 * the threadpool wrapper takes care of
+	 * a cleanup if required.
+	 * This is needed if *at() syscalls need
+	 * to be simulated by fchdir();$syscall(),
+	 * e.g. getxattr().
+	 *
+	 * The distinction between these threadpool
+	 * is required because of OS limitations
+	 * (as of 2018):
+	 * - only Linux supports per thread
+	 *   credentials (seteuid....)
+	 * - only Linux supports a per thread
+	 *   current working directory,
+	 *   using unshare(CLONE_FS). But
+	 *   in some constrained container
+	 *   environments even that is not available
+	 *   on Linux.
+	 *
+	 * tp_fd_safe is typically the raw threadpool
+	 * without a wrapper.
+	 *
+	 * On Linux tp_path_safe and tp_chdir_safe
+	 * are typically the same (if unshare(CLONE_FS) is available)
+	 * they're implemented as wrappers of the raw threadpool.
+	 *
+	 * On other OSes tp_path_safe is a wrapper
+	 * arround a sync threadpool (without real threads, just blocking
+	 * the main thread), but hidden behind the pthreadpool_tevent
+	 * api in order to make the restriction transparent.
+	 *
+	 * On other OSes tp_chdir_safe is a wrapper
+	 * arround a sync threadpool (without real threads, just blocking
+	 * the main thread), but hidden behind the pthreadpool_tevent
+	 * api in order to make the restriction transparent.
+	 * It just remembers/restores the current working directory,
+	 * typically using open(".", O_RDONLY | O_DIRECTORY) and fchdir().
+	 */
+	struct tevent_context *run_ev;
+	struct pthreadpool_tevent *run_tp_fd_safe;
+	struct pthreadpool_tevent *run_tp_path_safe;
+	struct pthreadpool_tevent *run_tp_chdir_safe;
+
+	/*
+	 * The glue that should be passed down
+	 * to sub request in the stack.
+	 *
+	 * Typically this points to itself.
+	 *
+	 * But smb_vfs_ev_glue_create_switch() allows
+	 * to create context that can switch
+	 * between two user glues.
+	 */
+	const struct smb_vfs_ev_glue *next_glue;
+
+	/*
+	 * If some code path wants to run
+	 * some constraint code as root,
+	 * basically an async version of become_root()
+	 * and unbecome_root().
+	 *
+	 * The caller can call smb_vfs_ev_glue_get_root_glue()
+	 * to get a root glue that can be passed
+	 * to the SMB_VFS_*_SEND() function that
+	 * should run as root.
+	 *
+	 * Note that the callback (registered with
+	 * tevent_req_set_callback()) won't run as
+	 * root anymore!
+	 */
+	const struct smb_vfs_ev_glue *root_glue;
+};
+
+static struct smb_vfs_ev_glue *smb_vfs_ev_glue_create_internal(
+	TALLOC_CTX *mem_ctx,
+	struct tevent_context *return_ev,
+	struct tevent_context *run_ev,
+	struct pthreadpool_tevent *run_tp_fd_safe,
+	struct pthreadpool_tevent *run_tp_path_safe,
+	struct pthreadpool_tevent *run_tp_chdir_safe)
+{
+	struct smb_vfs_ev_glue *evg = NULL;
+
+	evg = talloc_zero(mem_ctx, struct smb_vfs_ev_glue);
+	if (evg == NULL) {
+		return NULL;
+	}
+	*evg = (struct smb_vfs_ev_glue) {
+		.return_ev = return_ev,
+		.run_ev = run_ev,
+		.run_tp_fd_safe = run_tp_fd_safe,
+		.run_tp_path_safe = run_tp_path_safe,
+		.run_tp_chdir_safe = run_tp_chdir_safe,
+		.next_glue = evg,
+	};
+
+	return evg;
+}
+
+struct tevent_context *smb_vfs_ev_glue_ev_ctx(const struct smb_vfs_ev_glue *evg)
+{
+	return evg->run_ev;
+}
+
+struct pthreadpool_tevent *smb_vfs_ev_glue_tp_fd_safe(const struct smb_vfs_ev_glue *evg)
+{
+	return evg->run_tp_fd_safe;
+}
+
+struct pthreadpool_tevent *smb_vfs_ev_glue_tp_path_safe(const struct smb_vfs_ev_glue *evg)
+{
+	return evg->run_tp_path_safe;
+}
+
+struct pthreadpool_tevent *smb_vfs_ev_glue_tp_chdir_safe(const struct smb_vfs_ev_glue *evg)
+{
+	return evg->run_tp_chdir_safe;
+}
+
+const struct smb_vfs_ev_glue *smb_vfs_ev_glue_get_root_glue(const struct smb_vfs_ev_glue *evg)
+{
+	return evg->root_glue;
+}
+
+struct smb_vfs_ev_glue *smb_vfs_ev_glue_create(TALLOC_CTX *mem_ctx,
+				struct tevent_context *user_ev,
+				struct pthreadpool_tevent *user_tp_fd_safe,
+				struct pthreadpool_tevent *user_tp_path_safe,
+				struct pthreadpool_tevent *user_tp_chdir_safe,
+				struct tevent_context *root_ev,
+				struct pthreadpool_tevent *root_tp_fd_safe,
+				struct pthreadpool_tevent *root_tp_path_safe,
+				struct pthreadpool_tevent *root_tp_chdir_safe)
+{
+	struct smb_vfs_ev_glue *evg_uu = NULL;
+	struct smb_vfs_ev_glue *evg_ru = NULL;
+	struct smb_vfs_ev_glue *evg_rr = NULL;
+
+	/*
+	 * The top level glue (directly returned from this function).
+	 *
+	 * It uses user_ev and user_tp_* only.
+	 */
+	evg_uu = smb_vfs_ev_glue_create_internal(mem_ctx,
+						 user_ev, /* return_ev */
+						 user_ev, /* run_ev */
+						 user_tp_fd_safe,
+						 user_tp_path_safe,
+						 user_tp_chdir_safe);
+	if (evg_uu == NULL) {
+		return NULL;
+	}
+
+	/*
+	 * The first root glue (returned by smb_vfs_ev_glue_get_root_glue()).
+	 *
+	 * It uses root_ev and root_tp, but user_ev as return ev,
+	 * which means that the caller's callback (registered with
+	 * tevent_req_set_callback()) will run as user_ev.
+	 */
+	evg_ru = smb_vfs_ev_glue_create_internal(evg_uu,
+						 user_ev, /* return_ev */
+						 root_ev, /* run_ev */
+						 root_tp_fd_safe,
+						 root_tp_path_safe,
+						 root_tp_chdir_safe);
+	if (evg_ru == NULL) {
+		TALLOC_FREE(evg_uu);
+		return NULL;
+	}
+
+	/*
+	 * The second root glue (returned by smb_vfs_ev_glue_get_root_glue() on
+	 * root glue itself. This means code can always call
+	 * smb_vfs_ev_glue_get_root_glue() and don't have to care if the
+	 * passed glue is already a root glue.
+	 *
+	 * This will then recursively point to its own root_glue pointer.
+	 *
+	 * It only uses root_ev and root_tp.
+	 */
+	evg_rr = smb_vfs_ev_glue_create_internal(evg_ru,
+						 root_ev, /* return_ev */
+						 root_ev, /* run_ev */
+						 root_tp_fd_safe,
+						 root_tp_path_safe,
+						 root_tp_chdir_safe);
+	if (evg_rr == NULL) {
+		TALLOC_FREE(evg_uu);
+		return NULL;
+	}
+
+	/*
+	 * We now setup the glue hierachie.
+	 *
+	 * Search for "Design of the smb_vfs_ev_glue infrastructure" above
+	 * for a detailed description how the chain works.
+	 *
+	 * "Example 2: start with user_evg and let module1 switch to root"
+	 * explains it for the root_glue chaining.
+	 */
+	evg_rr->root_glue = evg_rr;
+	evg_ru->root_glue = evg_rr;
+	evg_uu->root_glue = evg_ru;
+
+	return evg_uu;
+}
+
+struct smb_vfs_ev_glue *smb_vfs_ev_glue_create_switch(
+			TALLOC_CTX *mem_ctx,
+			const struct smb_vfs_ev_glue *run_evg,
+			const struct smb_vfs_ev_glue *return_evg)
+{
+	struct smb_vfs_ev_glue *evg = NULL;
+
+	evg = smb_vfs_ev_glue_create_internal(mem_ctx,
+					  return_evg->return_ev,
+					  run_evg->run_ev,
+					  run_evg->run_tp_fd_safe,
+					  run_evg->run_tp_path_safe,
+					  run_evg->run_tp_chdir_safe);
+	if (evg == NULL) {
+		return NULL;
+	}
+	evg->next_glue = run_evg;
+	evg->root_glue = run_evg->root_glue;
+
+	return evg;
+}
+
+_UNUSED_
+static bool smb_vfs_ev_glue_push_use(const struct smb_vfs_ev_glue *evg,
+				     struct tevent_req *req)
+{
+	if (evg->run_ev == evg->return_ev) {
+		/*
+		 * We're already in the correct
+		 * impersonation environment.
+		 */
+		return true;
+	}
+
+	/*
+	 * Make sure that our callers callback function
+	 * will be called in the return_ev environment.
+	 */
+	tevent_req_defer_callback(req, evg->return_ev);
+
+	/*
+	 * let the event context wrapper do
+	 * the required impersonation.
+	 */
+	return tevent_context_push_use(evg->run_ev);
+}
+
+_UNUSED_
+static void smb_vfs_ev_glue_pop_use(const struct smb_vfs_ev_glue *evg)
+{
+	if (evg->run_ev == evg->return_ev) {
+		/*
+		 * smb_vfs_ev_glue_push_use() didn't
+		 * change the impersonation environment.
+		 */
+		return;
+	}
+
+	/*
+	 * undo the impersonation
+	 */
+	tevent_context_pop_use(evg->run_ev);
+}
+
 int smb_vfs_call_connect(struct vfs_handle_struct *handle,
 			 const char *service, const char *user)
 {
-- 
2.17.1


From 24dca099372b3698d5ac059a66a91f93073bd748 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Thu, 5 Jul 2018 13:09:53 +0200
Subject: [PATCH 4/5] s3: vfs: add user_vfs_evg to connection_struct

This will be used to in order to pass down the
impersonation magic from the SMB layer through
the SMB_VFS layer.

This includes the following options:

smbd:force sync user path safe threadpool
smbd:force sync user chdir safe threadpool
smbd:force sync root path safe threadpool
smbd:force sync root chdir safe threadpool

They can be used in order to test the non linux code
path on linux, once we get code that makes full use
of the new infrastructure.

Pair-Programmed-With: Stefan Metzmacher <metze at samba.org>

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/include/vfs.h          |   3 +-
 source3/modules/vfs_readonly.c |   2 +-
 source3/smbd/conn.c            |   8 +-
 source3/smbd/msdfs.c           |  67 ++-
 source3/smbd/proto.h           |  13 +
 source3/smbd/uid.c             | 754 ++++++++++++++++++++++++++++++++-
 6 files changed, 818 insertions(+), 29 deletions(-)

diff --git a/source3/include/vfs.h b/source3/include/vfs.h
index c663e2a22a79..96fcba827391 100644
--- a/source3/include/vfs.h
+++ b/source3/include/vfs.h
@@ -404,7 +404,7 @@ typedef struct files_struct {
 
 struct vuid_cache_entry {
 	struct auth_session_info *session_info;
-	struct tevent_context *user_ev_ctx;
+	struct smb_vfs_ev_glue *user_vfs_evg;
 	uint64_t vuid; /* SMB2 compat */
 	bool read_only;
 	uint32_t share_access;
@@ -453,6 +453,7 @@ typedef struct connection_struct {
 	 */
 	struct auth_session_info *session_info;
 	struct tevent_context *user_ev_ctx;
+	struct smb_vfs_ev_glue *user_vfs_evg;
 
 	/*
 	 * If the "force group" parameter is set, this is the primary gid that
diff --git a/source3/modules/vfs_readonly.c b/source3/modules/vfs_readonly.c
index e7e12747a222..37a9e806a156 100644
--- a/source3/modules/vfs_readonly.c
+++ b/source3/modules/vfs_readonly.c
@@ -84,7 +84,7 @@ static int readonly_connect(vfs_handle_struct *handle,
       for (i=0; i< VUID_CACHE_SIZE; i++) {
         struct vuid_cache_entry *ent = &conn->vuid_cache->array[i];
         ent->vuid = UID_FIELD_INVALID;
-        TALLOC_FREE(ent->user_ev_ctx);
+        TALLOC_FREE(ent->user_vfs_evg);
         TALLOC_FREE(ent->session_info);
         ent->read_only = false;
         ent->share_access = 0;
diff --git a/source3/smbd/conn.c b/source3/smbd/conn.c
index cfff6404608f..d8dc1c27d427 100644
--- a/source3/smbd/conn.c
+++ b/source3/smbd/conn.c
@@ -95,10 +95,12 @@ static void conn_clear_vuid_cache(connection_struct *conn, uint64_t vuid)
 		if (ent->vuid == vuid) {
 			ent->vuid = UID_FIELD_INVALID;
 
-			if (conn->user_ev_ctx == ent->user_ev_ctx) {
-				conn->user_ev_ctx = NULL;
+			conn->user_ev_ctx = NULL;
+
+			if (conn->user_vfs_evg == ent->user_vfs_evg) {
+				conn->user_vfs_evg = NULL;
 			}
-			TALLOC_FREE(ent->user_ev_ctx);
+			TALLOC_FREE(ent->user_vfs_evg);
 
 			/*
 			 * We need to keep conn->session_info around
diff --git a/source3/smbd/msdfs.c b/source3/smbd/msdfs.c
index bf9b3abee4a7..00ecb8eae269 100644
--- a/source3/smbd/msdfs.c
+++ b/source3/smbd/msdfs.c
@@ -252,6 +252,10 @@ static NTSTATUS create_conn_struct_as_root(TALLOC_CTX *ctx,
 	const char *vfs_user;
 	struct smbd_server_connection *sconn;
 	const char *servicename = lp_const_servicename(snum);
+	const struct security_unix_token *unix_token = NULL;
+	struct tevent_context *user_ev_ctx = NULL;
+	struct pthreadpool_tevent *user_tp_chdir_safe = NULL;
+	struct pthreadpool_tevent *root_tp_chdir_safe = NULL;
 	int ret;
 
 	sconn = talloc_zero(ctx, struct smbd_server_connection);
@@ -328,6 +332,7 @@ static NTSTATUS create_conn_struct_as_root(TALLOC_CTX *ctx,
 			TALLOC_FREE(conn);
 			return NT_STATUS_NO_MEMORY;
 		}
+		unix_token = conn->session_info->unix_token;
 		/* unix_info could be NULL in session_info */
 		if (conn->session_info->unix_info != NULL) {
 			vfs_user = conn->session_info->unix_info->unix_name;
@@ -339,6 +344,10 @@ static NTSTATUS create_conn_struct_as_root(TALLOC_CTX *ctx,
 		vfs_user = get_current_username();
 	}
 
+	if (unix_token == NULL) {
+		unix_token = get_current_utok(conn);
+	}
+
 	/*
 	 * The impersonation has to be done by the caller
 	 * of create_conn_struct_tos[_cwd]().
@@ -352,14 +361,64 @@ static NTSTATUS create_conn_struct_as_root(TALLOC_CTX *ctx,
 	 * to avoid crashes because TALLOC_FREE(conn->user_ev_ctx)
 	 * would also remove sconn->raw_ev_ctx.
 	 */
-	conn->user_ev_ctx = smbd_impersonate_debug_create(sconn->raw_ev_ctx,
-							  "FAKE impersonation",
-							  DBGLVL_DEBUG);
-	if (conn->user_ev_ctx == NULL) {
+	user_ev_ctx = smbd_impersonate_debug_create(sconn->raw_ev_ctx,
+						    "FAKE impersonation",
+						    DBGLVL_DEBUG);
+	if (user_ev_ctx == NULL) {
+		TALLOC_FREE(conn);
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	user_tp_chdir_safe = smbd_impersonate_tp_current_create(conn,
+						conn->sconn->sync_thread_pool,
+						conn,
+						conn->vuid,
+						true, /* chdir_safe */
+						unix_token);
+	if (user_tp_chdir_safe == NULL) {
+		TALLOC_FREE(conn);
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	root_tp_chdir_safe = smbd_impersonate_tp_become_create(conn,
+						conn->sconn->sync_thread_pool,
+						true, /* chdir_safe */
+						become_root,
+						unbecome_root);
+	if (root_tp_chdir_safe == NULL) {
 		TALLOC_FREE(conn);
 		return NT_STATUS_NO_MEMORY;
 	}
 
+	/*
+	 * We only use the chdir_safe wrappers
+	 * for everything in order to keep
+	 * it simple.
+	 */
+	conn->user_vfs_evg = smb_vfs_ev_glue_create(conn,
+						    user_ev_ctx,
+						    user_tp_chdir_safe,
+						    user_tp_chdir_safe,
+						    user_tp_chdir_safe,
+						    conn->sconn->root_ev_ctx,
+						    root_tp_chdir_safe,
+						    root_tp_chdir_safe,
+						    root_tp_chdir_safe);
+	if (conn->user_vfs_evg == NULL) {
+		TALLOC_FREE(conn);
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	SMB_ASSERT(talloc_reparent(conn, conn->user_vfs_evg, user_ev_ctx));
+	SMB_ASSERT(talloc_reparent(conn, conn->user_vfs_evg, user_tp_chdir_safe));
+	SMB_ASSERT(talloc_reparent(conn, conn->user_vfs_evg, root_tp_chdir_safe));
+
+	conn->user_ev_ctx = smb_vfs_ev_glue_ev_ctx(conn->user_vfs_evg);
+	if (conn->user_ev_ctx == NULL) {
+		TALLOC_FREE(conn);
+		return NT_STATUS_INTERNAL_ERROR;
+	}
+
 	set_conn_connectpath(conn, connpath);
 
 	/*
diff --git a/source3/smbd/proto.h b/source3/smbd/proto.h
index 29121d5c4961..1b4d5366f452 100644
--- a/source3/smbd/proto.h
+++ b/source3/smbd/proto.h
@@ -1233,6 +1233,19 @@ struct tevent_context *smbd_impersonate_conn_sess_create(
 struct tevent_context *smbd_impersonate_root_create(struct tevent_context *main_ev);
 struct tevent_context *smbd_impersonate_guest_create(struct tevent_context *main_ev);
 
+struct pthreadpool_tevent *smbd_impersonate_tp_current_create(
+				TALLOC_CTX *mem_ctx,
+				struct pthreadpool_tevent *sync_tp,
+				struct connection_struct *conn,
+				uint64_t vuid, bool chdir_safe,
+				const struct security_unix_token *unix_token);
+struct pthreadpool_tevent *smbd_impersonate_tp_become_create(
+					TALLOC_CTX *mem_ctx,
+					struct pthreadpool_tevent *sync_tp,
+					bool chdir_safe,
+					void (*become_fn)(void),
+					void (*unbecome_fn)(void));
+
 /* The following definitions come from smbd/utmp.c  */
 
 void sys_utmp_claim(const char *username, const char *hostname,
diff --git a/source3/smbd/uid.c b/source3/smbd/uid.c
index fcc4d51a698c..41bb66e2df1d 100644
--- a/source3/smbd/uid.c
+++ b/source3/smbd/uid.c
@@ -18,6 +18,7 @@
 */
 
 #include "includes.h"
+#include "system/filesys.h"
 #include "system/passwd.h"
 #include "smbd/smbd.h"
 #include "smbd/globals.h"
@@ -26,6 +27,12 @@
 #include "passdb/lookup_sid.h"
 #include "auth.h"
 #include "lib/util/time_basic.h"
+#include "lib/pthreadpool/pthreadpool_tevent.h"
+
+static struct smb_vfs_ev_glue *smbd_impersonate_user_ev_glue_create(
+				struct connection_struct *conn,
+				uint64_t vuid,
+				struct auth_session_info *session_info);
 
 struct smbd_impersonate_debug_state {
 	int dbg_lvl;
@@ -306,7 +313,8 @@ static void free_conn_session_info_if_unused(connection_struct *conn)
 		}
 	}
 	/* Not used, safe to free. */
-	TALLOC_FREE(conn->user_ev_ctx);
+	conn->user_ev_ctx = NULL;
+	TALLOC_FREE(conn->user_vfs_evg);
 	TALLOC_FREE(conn->session_info);
 }
 
@@ -432,10 +440,13 @@ static bool check_user_ok(connection_struct *conn,
 			}
 			free_conn_session_info_if_unused(conn);
 			conn->session_info = ent->session_info;
-			conn->user_ev_ctx = ent->user_ev_ctx;
+			conn->user_vfs_evg = ent->user_vfs_evg;
 			conn->read_only = ent->read_only;
 			conn->share_access = ent->share_access;
 			conn->vuid = ent->vuid;
+			conn->user_ev_ctx = smb_vfs_ev_glue_ev_ctx(
+						conn->user_vfs_evg);
+			SMB_ASSERT(conn->user_ev_ctx != NULL);
 			return(True);
 		}
 	}
@@ -481,22 +492,12 @@ static bool check_user_ok(connection_struct *conn,
 		ent->session_info->unix_token->uid = sec_initial_uid();
 	}
 
-	if (vuid == UID_FIELD_INVALID) {
-		ent->user_ev_ctx = smbd_impersonate_conn_sess_create(
-			conn->sconn->raw_ev_ctx, conn, ent->session_info);
-		if (ent->user_ev_ctx == NULL) {
-			TALLOC_FREE(ent->session_info);
-			ent->vuid = UID_FIELD_INVALID;
-			return false;
-		}
-	} else {
-		ent->user_ev_ctx = smbd_impersonate_conn_vuid_create(
-			conn->sconn->raw_ev_ctx, conn, vuid);
-		if (ent->user_ev_ctx == NULL) {
-			TALLOC_FREE(ent->session_info);
-			ent->vuid = UID_FIELD_INVALID;
-			return false;
-		}
+	ent->user_vfs_evg = smbd_impersonate_user_ev_glue_create(conn,
+							vuid, ent->session_info);
+	if (ent->user_vfs_evg == NULL) {
+		TALLOC_FREE(ent->session_info);
+		ent->vuid = UID_FIELD_INVALID;
+		return false;
 	}
 
 	/*
@@ -511,7 +512,10 @@ static bool check_user_ok(connection_struct *conn,
 	free_conn_session_info_if_unused(conn);
 	conn->session_info = ent->session_info;
 	conn->vuid = ent->vuid;
-	conn->user_ev_ctx = ent->user_ev_ctx;
+	conn->user_vfs_evg = ent->user_vfs_evg;
+	conn->user_ev_ctx = smb_vfs_ev_glue_ev_ctx(conn->user_vfs_evg);
+	SMB_ASSERT(conn->user_ev_ctx != NULL);
+
 	if (vuid == UID_FIELD_INVALID) {
 		/*
 		 * Not strictly needed, just make it really
@@ -520,7 +524,7 @@ static bool check_user_ok(connection_struct *conn,
 		ent->read_only = false;
 		ent->share_access = 0;
 		ent->session_info = NULL;
-		ent->user_ev_ctx = NULL;
+		ent->user_vfs_evg = NULL;
 	}
 
 	conn->read_only = readonly_share;
@@ -1932,3 +1936,713 @@ struct tevent_context *smbd_impersonate_guest_create(struct tevent_context *main
 
 	return ev;
 }
+
+struct smbd_impersonate_tp_current_state {
+	const void *conn_ptr;
+	uint64_t vuid; /* SMB2 compat */
+	struct security_unix_token partial_ut;
+	bool chdir_safe;
+	int saved_cwd_fd;
+};
+
+static int smbd_impersonate_tp_current_state_destructor(
+		struct smbd_impersonate_tp_current_state *state)
+{
+	if (state->saved_cwd_fd != -1) {
+		smb_panic(__location__);
+	}
+
+	return 0;
+}
+
+static bool smbd_impersonate_tp_current_before_job(struct pthreadpool_tevent *wrap,
+						   void *private_data,
+						   struct pthreadpool_tevent *main,
+						   const char *location)
+{
+	struct smbd_impersonate_tp_current_state *state =
+		talloc_get_type_abort(private_data,
+		struct smbd_impersonate_tp_current_state);
+
+	if (state->conn_ptr != current_user.conn) {
+		smb_panic(__location__);
+	}
+
+	if (state->vuid != current_user.vuid) {
+		smb_panic(__location__);
+	}
+
+	if (state->partial_ut.uid != current_user.ut.uid) {
+		smb_panic(__location__);
+	}
+
+	if (state->partial_ut.gid != current_user.ut.gid) {
+		smb_panic(__location__);
+	}
+
+	if (state->partial_ut.ngroups != current_user.ut.ngroups) {
+		smb_panic(__location__);
+	}
+
+	/*
+	 * We don't verify the group list, we should have hit
+	 * an assert before. We only want to catch programmer
+	 * errors here!
+	 *
+	 * We just have a sync pool and want to make sure
+	 * we're already in the correct state.
+	 *
+	 * So we don't do any active impersonation.
+	 */
+
+	/*
+	 * we may need to remember the current working directory
+	 * and later restore it in the after_job hook.
+	 */
+	if (state->chdir_safe) {
+		int open_flags = O_RDONLY;
+		bool ok;
+
+#ifdef O_DIRECTORY
+		open_flags |= O_DIRECTORY;
+#endif
+#ifdef O_CLOEXEC
+		open_flags |= O_CLOEXEC;
+#endif
+
+		state->saved_cwd_fd = open(".", open_flags);
+		if (state->saved_cwd_fd == -1) {
+			DBG_ERR("unable to open '.' with open_flags[0x%x] - %s\n",
+				open_flags, strerror(errno));
+			smb_panic("smbd_impersonate_tp_current_before_job: "
+				  "unable to open cwd '.'");
+			return false;
+		}
+		ok = smb_set_close_on_exec(state->saved_cwd_fd);
+		SMB_ASSERT(ok);
+	}
+
+	return true;
+}
+
+static bool smbd_impersonate_tp_current_after_job(struct pthreadpool_tevent *wrap,
+						  void *private_data,
+						  struct pthreadpool_tevent *main,
+						  const char *location)
+{
+	struct smbd_impersonate_tp_current_state *state =
+		talloc_get_type_abort(private_data,
+		struct smbd_impersonate_tp_current_state);
+	int ret;
+
+	/*
+	 * There's no impersonation to revert.
+	 *
+	 * But we may need to reset the current working directory.
+	 */
+	if (state->saved_cwd_fd == -1) {
+		return true;
+	}
+
+	ret = fchdir(state->saved_cwd_fd);
+	if (ret != 0) {
+		DBG_ERR("unable to fchdir to the original directory - %s\n",
+			strerror(errno));
+		smb_panic("smbd_impersonate_tp_current_after_job: "
+			  "unable restore cwd with fchdir.");
+		return false;
+	}
+
+	close(state->saved_cwd_fd);
+	state->saved_cwd_fd = -1;
+
+	return true;
+}
+
+static const struct pthreadpool_tevent_wrapper_ops smbd_impersonate_tp_current_ops = {
+	.name		= "smbd_impersonate_tp_current",
+	.before_job	= smbd_impersonate_tp_current_before_job,
+	.after_job	= smbd_impersonate_tp_current_after_job,
+};
+
+struct pthreadpool_tevent *smbd_impersonate_tp_current_create(
+				TALLOC_CTX *mem_ctx,
+				struct pthreadpool_tevent *sync_tp,
+				struct connection_struct *conn,
+				uint64_t vuid, bool chdir_safe,
+				const struct security_unix_token *unix_token)
+{
+	struct pthreadpool_tevent *wrap_tp = NULL;
+	struct smbd_impersonate_tp_current_state *state = NULL;
+	size_t max_threads;
+
+	max_threads = pthreadpool_tevent_max_threads(sync_tp);
+	SMB_ASSERT(max_threads == 0);
+
+	/*
+	 * We have a fake threadpool without real threads.
+	 * So we just provide a a wrapper that asserts that
+	 * we are already in the required impersonation state.
+	 */
+
+	wrap_tp = pthreadpool_tevent_wrapper_create(sync_tp,
+					mem_ctx,
+					&smbd_impersonate_tp_current_ops,
+					&state,
+					struct smbd_impersonate_tp_current_state);
+	if (wrap_tp == NULL) {
+		return NULL;
+	}
+
+	state->conn_ptr = conn;
+	state->vuid = vuid;
+	state->partial_ut = *unix_token;
+	state->partial_ut.groups = NULL;
+	state->chdir_safe = chdir_safe;
+	state->saved_cwd_fd = -1;
+
+	if (chdir_safe) {
+		pthreadpool_tevent_force_per_thread_cwd(wrap_tp, state);
+	}
+
+	talloc_set_destructor(state, smbd_impersonate_tp_current_state_destructor);
+
+	return wrap_tp;
+}
+
+struct smbd_impersonate_tp_sess_state {
+	const struct security_unix_token *unix_token;
+};
+
+static bool smbd_impersonate_tp_sess_before_job(struct pthreadpool_tevent *wrap,
+						void *private_data,
+						struct pthreadpool_tevent *main,
+						const char *location)
+{
+	struct smbd_impersonate_tp_sess_state *state =
+		talloc_get_type_abort(private_data,
+		struct smbd_impersonate_tp_sess_state);
+	int ret;
+
+	/* Become the correct credential on this thread. */
+	ret = set_thread_credentials(state->unix_token->uid,
+				     state->unix_token->gid,
+				     (size_t)state->unix_token->ngroups,
+				     state->unix_token->groups);
+	if (ret != 0) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool smbd_impersonate_tp_sess_after_job(struct pthreadpool_tevent *wrap,
+					       void *private_data,
+					       struct pthreadpool_tevent *main,
+					       const char *location)
+{
+	/*
+	 * We skip the 'unbecome' here, if the following
+	 * job cares, it already called set_thread_credentials() again.
+	 *
+	 * fd based jobs on the raw pool, don't really care...
+	 */
+	return true;
+}
+
+static const struct pthreadpool_tevent_wrapper_ops smbd_impersonate_tp_sess_ops = {
+	.name		= "smbd_impersonate_tp_sess",
+	.before_job	= smbd_impersonate_tp_sess_before_job,
+	.after_job	= smbd_impersonate_tp_sess_after_job,
+};
+
+static struct pthreadpool_tevent *smbd_impersonate_tp_sess_create(
+				TALLOC_CTX *mem_ctx,
+				struct pthreadpool_tevent *main_tp,
+				struct auth_session_info *session_info)
+{
+	struct pthreadpool_tevent *wrap_tp = NULL;
+	struct smbd_impersonate_tp_sess_state *state = NULL;
+	size_t max_threads;
+
+	max_threads = pthreadpool_tevent_max_threads(main_tp);
+	SMB_ASSERT(max_threads > 0);
+
+	wrap_tp = pthreadpool_tevent_wrapper_create(main_tp,
+					mem_ctx,
+					&smbd_impersonate_tp_sess_ops,
+					&state,
+					struct smbd_impersonate_tp_sess_state);
+	if (wrap_tp == NULL) {
+		return NULL;
+	}
+
+	state->unix_token = copy_unix_token(state, session_info->unix_token);
+	if (state->unix_token == NULL) {
+		int saved_errno = errno;
+		TALLOC_FREE(wrap_tp);
+		errno = saved_errno;
+		return NULL;
+	}
+
+	return wrap_tp;
+}
+
+struct smbd_impersonate_tp_become_state {
+	void (*become_fn)(void);
+	void (*unbecome_fn)(void);
+	bool chdir_safe;
+	int saved_cwd_fd;
+};
+
+static int smbd_impersonate_tp_become_state_destructor(
+		struct smbd_impersonate_tp_become_state *state)
+{
+	if (state->saved_cwd_fd != -1) {
+		smb_panic(__location__);
+	}
+
+	return 0;
+}
+
+
+static bool smbd_impersonate_tp_become_before_job(struct pthreadpool_tevent *wrap,
+						   void *private_data,
+						   struct pthreadpool_tevent *main,
+						   const char *location)
+{
+	struct smbd_impersonate_tp_become_state *state =
+		talloc_get_type_abort(private_data,
+		struct smbd_impersonate_tp_become_state);
+
+	/*
+	 * we may need to remember the current working directory
+	 * and later restore it in the after_job hook.
+	 */
+	if (state->chdir_safe) {
+		int open_flags = O_RDONLY;
+		bool ok;
+
+#ifdef O_DIRECTORY
+		open_flags |= O_DIRECTORY;
+#endif
+#ifdef O_CLOEXEC
+		open_flags |= O_CLOEXEC;
+#endif
+
+		state->saved_cwd_fd = open(".", open_flags);
+		if (state->saved_cwd_fd == -1) {
+			DBG_ERR("unable to open '.' with open_flags[0x%x] - %s\n",
+				open_flags, strerror(errno));
+			smb_panic("smbd_impersonate_tp_current_before_job: "
+				  "unable to open cwd '.'");
+			return false;
+		}
+		ok = smb_set_close_on_exec(state->saved_cwd_fd);
+		SMB_ASSERT(ok);
+	}
+
+	/*
+	 * The function should abort on error...
+	 */
+	state->become_fn();
+
+	return true;
+}
+
+static bool smbd_impersonate_tp_become_after_job(struct pthreadpool_tevent *wrap,
+						  void *private_data,
+						  struct pthreadpool_tevent *main,
+						  const char *location)
+{
+	struct smbd_impersonate_tp_become_state *state =
+		talloc_get_type_abort(private_data,
+		struct smbd_impersonate_tp_become_state);
+	int ret;
+
+	/*
+	 * The function should abort on error...
+	 */
+	state->unbecome_fn();
+
+	/*
+	 * There's no impersonation to revert.
+	 *
+	 * But we may need to reset the current working directory.
+	 */
+	if (state->saved_cwd_fd == -1) {
+		return true;
+	}
+
+	ret = fchdir(state->saved_cwd_fd);
+	if (ret != 0) {
+		DBG_ERR("unable to fchdir to the original directory - %s\n",
+			strerror(errno));
+		smb_panic("smbd_impersonate_tp_current_after_job: "
+			  "unable restore cwd with fchdir.");
+		return false;
+	}
+
+	close(state->saved_cwd_fd);
+	state->saved_cwd_fd = -1;
+
+	return true;
+}
+
+static const struct pthreadpool_tevent_wrapper_ops smbd_impersonate_tp_become_ops = {
+	.name		= "smbd_impersonate_tp_become",
+	.before_job	= smbd_impersonate_tp_become_before_job,
+	.after_job	= smbd_impersonate_tp_become_after_job,
+};
+
+struct pthreadpool_tevent *smbd_impersonate_tp_become_create(
+					TALLOC_CTX *mem_ctx,
+					struct pthreadpool_tevent *sync_tp,
+					bool chdir_safe,
+					void (*become_fn)(void),
+					void (*unbecome_fn)(void))
+{
+	struct pthreadpool_tevent *wrap_tp = NULL;
+	struct smbd_impersonate_tp_become_state *state = NULL;
+	size_t max_threads;
+
+	max_threads = pthreadpool_tevent_max_threads(sync_tp);
+	SMB_ASSERT(max_threads == 0);
+
+	/*
+	 * We have a fake threadpool without real threads.
+	 * So we just provide a a wrapper that asserts that
+	 * we are already in the required impersonation state.
+	 */
+
+	wrap_tp = pthreadpool_tevent_wrapper_create(sync_tp,
+					mem_ctx,
+					&smbd_impersonate_tp_become_ops,
+					&state,
+					struct smbd_impersonate_tp_become_state);
+	if (wrap_tp == NULL) {
+		return NULL;
+	}
+
+	state->become_fn = become_fn;
+	state->unbecome_fn = unbecome_fn;
+	state->chdir_safe = chdir_safe;
+	state->saved_cwd_fd = -1;
+
+	if (chdir_safe) {
+		pthreadpool_tevent_force_per_thread_cwd(wrap_tp, state);
+	}
+
+	talloc_set_destructor(state, smbd_impersonate_tp_become_state_destructor);
+
+	return wrap_tp;
+}
+
+struct smbd_impersonate_tp_root_state {
+	const struct security_unix_token *fallback_token;
+};
+
+static bool smbd_impersonate_tp_root_before_job(struct pthreadpool_tevent *wrap,
+						void *private_data,
+						struct pthreadpool_tevent *main,
+						const char *location)
+{
+	int ret;
+
+	/*
+	 * Become root in this thread.
+	 */
+	ret = set_thread_credentials(0, 0, 0, NULL);
+	if (ret != 0) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool smbd_impersonate_tp_root_after_job(struct pthreadpool_tevent *wrap,
+					       void *private_data,
+					       struct pthreadpool_tevent *main,
+					       const char *location)
+{
+	struct smbd_impersonate_tp_root_state *state =
+		talloc_get_type_abort(private_data,
+		struct smbd_impersonate_tp_root_state);
+	int ret;
+
+	/*
+	 * Move to a non root token again.
+	 * We just use the one of the user_ev_ctx.
+	 *
+	 * The main goal is that we don't leave
+	 * a thread arround with a root token.
+	 */
+	ret = set_thread_credentials(state->fallback_token->uid,
+				     state->fallback_token->gid,
+				     (size_t)state->fallback_token->ngroups,
+				     state->fallback_token->groups);
+	if (ret != 0) {
+		return false;
+	}
+
+	return true;
+}
+
+static const struct pthreadpool_tevent_wrapper_ops smbd_impersonate_tp_root_ops = {
+	.name		= "smbd_impersonate_tp_root",
+	.before_job	= smbd_impersonate_tp_root_before_job,
+	.after_job	= smbd_impersonate_tp_root_after_job,
+};
+
+static struct pthreadpool_tevent *smbd_impersonate_tp_root_create(
+				TALLOC_CTX *mem_ctx,
+				struct pthreadpool_tevent *main_tp,
+				int snum,
+				const struct security_unix_token *fallback_token)
+{
+	struct pthreadpool_tevent *wrap_tp = NULL;
+	struct smbd_impersonate_tp_root_state *state = NULL;
+	size_t max_threads;
+
+	max_threads = pthreadpool_tevent_max_threads(main_tp);
+	SMB_ASSERT(max_threads > 0);
+
+	wrap_tp = pthreadpool_tevent_wrapper_create(main_tp,
+				mem_ctx,
+				&smbd_impersonate_tp_root_ops,
+				&state,
+				struct smbd_impersonate_tp_root_state);
+	if (wrap_tp == NULL) {
+		return NULL;
+	}
+
+	state->fallback_token = copy_unix_token(state, fallback_token);
+	if (state->fallback_token == NULL) {
+		int saved_errno = errno;
+		TALLOC_FREE(wrap_tp);
+		errno = saved_errno;
+		return NULL;
+	}
+
+	return wrap_tp;
+}
+
+static struct smb_vfs_ev_glue *smbd_impersonate_user_ev_glue_create(
+				struct connection_struct *conn,
+				uint64_t vuid,
+				struct auth_session_info *session_info)
+{
+	TALLOC_CTX *frame = talloc_stackframe();
+	struct smb_vfs_ev_glue *user_vfs_evg = NULL;
+	struct tevent_context *user_ev_ctx = NULL;
+	struct pthreadpool_tevent *user_tp_fd_safe = NULL;
+	struct pthreadpool_tevent *user_tp_path_safe = NULL;
+	bool user_tp_path_sync = true;
+	struct pthreadpool_tevent *user_tp_chdir_safe = NULL;
+	bool user_tp_chdir_sync = true;
+	struct pthreadpool_tevent *root_tp_fd_safe = NULL;
+	struct pthreadpool_tevent *root_tp_path_safe = NULL;
+	bool root_tp_path_sync = true;
+	struct pthreadpool_tevent *root_tp_chdir_safe = NULL;
+	bool root_tp_chdir_sync = true;
+	size_t max_threads;
+
+	if (vuid == UID_FIELD_INVALID) {
+		user_ev_ctx = smbd_impersonate_conn_sess_create(
+			conn->sconn->raw_ev_ctx, conn, session_info);
+		if (user_ev_ctx == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+	} else {
+		user_ev_ctx = smbd_impersonate_conn_vuid_create(
+			conn->sconn->raw_ev_ctx, conn, vuid);
+		if (user_ev_ctx == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+	}
+	SMB_ASSERT(talloc_reparent(conn, frame, user_ev_ctx));
+
+#ifdef HAVE_LINUX_THREAD_CREDENTIALS
+	user_tp_path_sync = lp_parm_bool(SNUM(conn),
+					 "smbd",
+					 "force sync user path safe threadpool",
+					 false);
+	user_tp_chdir_sync = lp_parm_bool(SNUM(conn),
+					  "smbd",
+					  "force sync user chdir safe threadpool",
+					  false);
+	root_tp_path_sync = lp_parm_bool(SNUM(conn),
+					 "smbd",
+					 "force sync root path safe threadpool",
+					 false);
+	root_tp_chdir_sync = lp_parm_bool(SNUM(conn),
+					  "smbd",
+					  "force sync root chdir safe threadpool",
+					  false);
+#endif
+
+	max_threads = pthreadpool_tevent_max_threads(conn->sconn->raw_thread_pool);
+	if (max_threads == 0) {
+		/*
+		 * We don't have real threads, so we need to force
+		 * the sync versions...
+		 */
+		user_tp_path_sync = true;
+		user_tp_chdir_sync = true;
+		root_tp_path_sync = true;
+		root_tp_chdir_sync = true;
+	}
+
+	/*
+	 * fd_safe is easy :-)
+	 */
+	user_tp_fd_safe = conn->sconn->raw_thread_pool;
+	root_tp_fd_safe = conn->sconn->raw_thread_pool;
+
+	if (user_tp_path_sync) {
+		/*
+		 * We don't have support for per thread credentials,
+		 * so we just provide a sync thread pool with a wrapper
+		 * that asserts that we are already in the required
+		 * impersonation state.
+		 */
+		user_tp_path_safe = smbd_impersonate_tp_current_create(conn,
+						conn->sconn->sync_thread_pool,
+						conn,
+						vuid,
+						false, /* chdir_safe */
+						session_info->unix_token);
+		if (user_tp_path_safe == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+	} else {
+		user_tp_path_safe = smbd_impersonate_tp_sess_create(conn,
+						conn->sconn->raw_thread_pool,
+						session_info);
+		if (user_tp_path_safe == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+	}
+	SMB_ASSERT(talloc_reparent(conn, frame, user_tp_path_safe));
+
+	if (pthreadpool_tevent_per_thread_cwd(user_tp_path_safe)) {
+		user_tp_chdir_safe = user_tp_path_safe;
+	} else {
+		user_tp_chdir_sync = true;
+	}
+
+	if (user_tp_chdir_sync) {
+		/*
+		 * We don't have support for per thread credentials,
+		 * so we just provide a sync thread pool with a wrapper
+		 * that asserts that we are already in the required
+		 * impersonation state.
+		 *
+		 * And it needs to cleanup after [f]chdir() within
+		 * the job...
+		 */
+		user_tp_chdir_safe = smbd_impersonate_tp_current_create(conn,
+						conn->sconn->sync_thread_pool,
+						conn,
+						vuid,
+						true, /* chdir_safe */
+						session_info->unix_token);
+		if (user_tp_chdir_safe == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+		SMB_ASSERT(talloc_reparent(conn, frame, user_tp_chdir_safe));
+	} else {
+		SMB_ASSERT(user_tp_chdir_safe != NULL);
+	}
+
+	if (root_tp_path_sync) {
+		/*
+		 * We don't have support for per thread credentials,
+		 * so we just provide a sync thread pool with a wrapper
+		 * that wrapps the job in become_root()/unbecome_root().
+		 */
+		root_tp_path_safe = smbd_impersonate_tp_become_create(conn,
+						conn->sconn->sync_thread_pool,
+						false, /* chdir_safe */
+						become_root,
+						unbecome_root);
+		if (root_tp_path_safe == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+	} else {
+		root_tp_path_safe = smbd_impersonate_tp_root_create(conn,
+						conn->sconn->raw_thread_pool,
+						SNUM(conn),
+						session_info->unix_token);
+		if (root_tp_path_safe == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+	}
+	SMB_ASSERT(talloc_reparent(conn, frame, root_tp_path_safe));
+
+	if (pthreadpool_tevent_per_thread_cwd(root_tp_path_safe)) {
+		root_tp_chdir_safe = root_tp_path_safe;
+	} else {
+		root_tp_chdir_sync = true;
+	}
+
+	if (root_tp_chdir_sync) {
+		/*
+		 * We don't have support for per thread credentials,
+		 * so we just provide a sync thread pool with a wrapper
+		 * that wrapps the job in become_root()/unbecome_root().
+		 *
+		 * And it needs to cleanup after [f]chdir() within
+		 * the job...
+		 */
+		root_tp_chdir_safe = smbd_impersonate_tp_become_create(conn,
+						conn->sconn->sync_thread_pool,
+						true, /* chdir_safe */
+						become_root,
+						unbecome_root);
+		if (root_tp_chdir_safe == NULL) {
+			TALLOC_FREE(frame);
+			return NULL;
+		}
+		SMB_ASSERT(talloc_reparent(conn, frame, root_tp_chdir_safe));
+	} else {
+		SMB_ASSERT(root_tp_chdir_safe != NULL);
+	}
+
+	user_vfs_evg = smb_vfs_ev_glue_create(conn,
+					      user_ev_ctx,
+					      user_tp_fd_safe,
+					      user_tp_path_safe,
+					      user_tp_chdir_safe,
+					      conn->sconn->root_ev_ctx,
+					      root_tp_fd_safe,
+					      root_tp_path_safe,
+					      root_tp_chdir_safe);
+	if (user_vfs_evg == NULL) {
+		TALLOC_FREE(frame);
+		return NULL;
+	}
+
+	/*
+	 * Make sure everything is a talloc child of user_vfs_evg
+	 */
+	SMB_ASSERT(talloc_reparent(frame, user_vfs_evg, user_ev_ctx));
+	SMB_ASSERT(talloc_reparent(frame, user_vfs_evg, user_tp_path_safe));
+	if (user_tp_path_safe != user_tp_chdir_safe) {
+		SMB_ASSERT(talloc_reparent(frame, user_vfs_evg, user_tp_chdir_safe));
+	}
+	SMB_ASSERT(talloc_reparent(frame, user_vfs_evg, root_tp_path_safe));
+	if (root_tp_path_safe != root_tp_chdir_safe) {
+		SMB_ASSERT(talloc_reparent(frame, user_vfs_evg, root_tp_chdir_safe));
+	}
+
+	TALLOC_FREE(frame);
+	return user_vfs_evg;
+}
-- 
2.17.1


From ecb0d04370d6ab0176b5d3ca6158c066861e53a6 Mon Sep 17 00:00:00 2001
From: Ralph Boehme <slow at samba.org>
Date: Fri, 13 Jul 2018 16:48:19 +0200
Subject: [PATCH 5/5] vfs_aio_pthread: use event context and threadpool from
 user_vfs_evg

Or the root glue in case we're already root.

Pair-Programmed-With: Stefan Metzmacher <metze at samba.org>

Signed-off-by: Stefan Metzmacher <metze at samba.org>
---
 source3/modules/vfs_aio_pthread.c | 40 ++++++++++++++-----------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/source3/modules/vfs_aio_pthread.c b/source3/modules/vfs_aio_pthread.c
index da1ca534907f..cf5b7f61d5b0 100644
--- a/source3/modules/vfs_aio_pthread.c
+++ b/source3/modules/vfs_aio_pthread.c
@@ -51,7 +51,6 @@ struct aio_open_private_data {
 	const char *fname;
 	char *dname;
 	struct smbd_server_connection *sconn;
-	const struct security_unix_token *ux_tok;
 	uint64_t initial_allocation_size;
 	/* Returns. */
 	int ret_fd;
@@ -140,16 +139,6 @@ static void aio_open_worker(void *private_data)
 	struct aio_open_private_data *opd =
 		(struct aio_open_private_data *)private_data;
 
-	/* Become the correct credential on this thread. */
-	if (set_thread_credentials(opd->ux_tok->uid,
-				opd->ux_tok->gid,
-				(size_t)opd->ux_tok->ngroups,
-				opd->ux_tok->groups) != 0) {
-		opd->ret_fd = -1;
-		opd->ret_errno = errno;
-		return;
-	}
-
 	opd->ret_fd = openat(opd->dir_fd,
 			opd->fname,
 			opd->flags,
@@ -219,13 +208,6 @@ static struct aio_open_private_data *create_private_open_data(const files_struct
 	opd->sconn = fsp->conn->sconn;
 	opd->initial_allocation_size = fsp->initial_allocation_size;
 
-	/* Copy our current credentials. */
-	opd->ux_tok = copy_unix_token(opd, get_current_utok(fsp->conn));
-	if (opd->ux_tok == NULL) {
-		TALLOC_FREE(opd);
-		return NULL;
-	}
-
 	/*
 	 * Copy the parent directory name and the
 	 * relative path within it.
@@ -268,6 +250,10 @@ static int open_async(const files_struct *fsp,
 {
 	struct aio_open_private_data *opd = NULL;
 	struct tevent_req *subreq = NULL;
+	const struct smb_vfs_ev_glue *evg = NULL;
+	struct tevent_context *ev = NULL;
+	struct pthreadpool_tevent *tp = NULL;
+	uid_t uid = -1;
 
 	opd = create_private_open_data(fsp, flags, mode);
 	if (opd == NULL) {
@@ -275,10 +261,20 @@ static int open_async(const files_struct *fsp,
 		return -1;
 	}
 
-	subreq = pthreadpool_tevent_job_send(opd,
-					     fsp->conn->user_ev_ctx,
-					     fsp->conn->sconn->raw_thread_pool,
-					     aio_open_worker, opd);
+	evg = fsp->conn->user_vfs_evg;
+
+	uid = get_current_uid(fsp->conn);
+	if (uid == 0) {
+		/*
+		 * If we're already running as root,
+		 * so the root glue.
+		 */
+		evg = smb_vfs_ev_glue_get_root_glue(evg);
+	}
+	ev = smb_vfs_ev_glue_ev_ctx(fsp->conn->user_vfs_evg);
+	tp = smb_vfs_ev_glue_tp_path_safe(fsp->conn->user_vfs_evg);
+
+	subreq = pthreadpool_tevent_job_send(opd, ev, tp, aio_open_worker, opd);
 	if (subreq == NULL) {
 		return -1;
 	}
-- 
2.17.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20180724/7defa1ac/signature.sig>


More information about the samba-technical mailing list